Skip to content

Commit 237a800

Browse files
igerberclaude
andcommitted
Address CI review P0 + P1 on Phase 1c
P0: bias_corrected_local_linear now routes the CI through `safe_inference()` so degenerate cases with `se_robust <= 0` or non-finite `se_robust` (e.g., exact-fit / constant-y) return `(NaN, NaN)` rather than a misleading zero-width or infinite CI. Matches the repo-wide inference contract (CLAUDE.md Key Design Pattern #6). P1: Auto-bandwidth path now calls `lpbwselect_mse_dpi` directly with `cluster`, `vce`, and `nnmatch` forwarded. Previously it went through `mse_optimal_bandwidth` which hard-codes unclustered / nn / nnmatch=3, silently mismatching the downstream `lprobust` fit's reported estimator. Tests added: TestNaNSafeCI (constant-y + near-zero-SE) and TestAutoBandwidthForwardsParameters (cluster+auto, vce='hc1'+auto, nnmatch=5+auto), all asserting the selected bandwidth changes when the corresponding parameter changes (catches silent fallback). Also: suppress spurious BLAS FPE warnings in lprobust_bw's hc/hc2/hc3 branch (numpy issue #21432 pattern), newly reachable via the wired-through vce='hc1' auto-bandwidth path. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 189f560 commit 237a800

3 files changed

Lines changed: 183 additions & 19 deletions

File tree

diff_diff/_nprobust_port.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -519,12 +519,18 @@ def lprobust_bw(
519519
dups_B = dups[ind1] if dups is not None else None
520520
dupsid_B = dupsid[ind1] if dupsid is not None else None
521521
if vce in ("hc0", "hc1", "hc2", "hc3"):
522-
predicts_B = R_B1 @ beta_B1
523-
if vce in ("hc2", "hc3"):
524-
hii_B = np.empty(n_B1, dtype=np.float64)
525-
RW1 = R_B1 * eW1[:, None]
526-
for i in range(n_B1):
527-
hii_B[i] = R_B1[i, :] @ invG_B1 @ RW1[i, :]
522+
# Suppress spurious BLAS FPE warnings (numpy issue #21432
523+
# pattern); matmul on some platforms (Accelerate / OpenBLAS)
524+
# sets divide/overflow flags on SIMD intermediates even when
525+
# input and output are finite.
526+
with np.errstate(divide="ignore", over="ignore",
527+
invalid="ignore", under="ignore"):
528+
predicts_B = R_B1 @ beta_B1
529+
if vce in ("hc2", "hc3"):
530+
hii_B = np.empty(n_B1, dtype=np.float64)
531+
RW1 = R_B1 * eW1[:, None]
532+
for i in range(n_B1):
533+
hii_B[i] = R_B1[i, :] @ invG_B1 @ RW1[i, :]
528534
res_B = lprobust_res(
529535
eX1,
530536
eY1,

diff_diff/local_linear.py

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,16 +1073,59 @@ def bias_corrected_local_linear(
10731073
# bandwidth_diagnostics.b_mse for callers that want to inspect or
10741074
# override. The paper (de Chaisemartin et al. 2026) likewise uses a
10751075
# single h*_G throughout Equation 8.
1076+
#
1077+
# In auto mode, cluster / vce / nnmatch are forwarded to
1078+
# ``lpbwselect_mse_dpi`` so bandwidth selection reflects the same
1079+
# estimator the final ``lprobust`` call will use. Calling
1080+
# ``mse_optimal_bandwidth`` (the public wrapper) instead would hard-code
1081+
# ``cluster=None, vce="nn", nnmatch=3`` and silently mismatch the
1082+
# downstream fit — a methodology bug (CI review PR #340 P1).
10761083
bw_source: str
10771084
bw_diag: Optional[BandwidthResult] = None
10781085
if h is None and b is None:
1079-
bw_diag = mse_optimal_bandwidth(
1080-
d=d,
1086+
# Defer heavy import to call time to avoid import-cycle risk.
1087+
from diff_diff._nprobust_port import lpbwselect_mse_dpi
1088+
1089+
stages = lpbwselect_mse_dpi(
10811090
y=y,
1082-
boundary=boundary,
1091+
x=d,
1092+
cluster=cluster_arr,
1093+
eval_point=float(boundary),
1094+
p=1,
1095+
q=2,
1096+
deriv=0,
1097+
kernel=nprobust_kernel,
1098+
bwcheck=21,
1099+
bwregul=1.0,
1100+
vce=vce,
1101+
nnmatch=nnmatch,
1102+
interior=False,
1103+
)
1104+
bw_diag = BandwidthResult(
1105+
h_mse=stages.h_mse_dpi,
1106+
b_mse=stages.b_mse_dpi,
1107+
c_bw=stages.c_bw,
1108+
bw_mp2=stages.bw_mp2,
1109+
bw_mp3=stages.bw_mp3,
1110+
stage_d1_V=stages.stage_d1.V,
1111+
stage_d1_B1=stages.stage_d1.B1,
1112+
stage_d1_B2=stages.stage_d1.B2,
1113+
stage_d1_R=stages.stage_d1.R,
1114+
stage_d2_V=stages.stage_d2.V,
1115+
stage_d2_B1=stages.stage_d2.B1,
1116+
stage_d2_B2=stages.stage_d2.B2,
1117+
stage_d2_R=stages.stage_d2.R,
1118+
stage_b_V=stages.stage_b.V,
1119+
stage_b_B1=stages.stage_b.B1,
1120+
stage_b_B2=stages.stage_b.B2,
1121+
stage_b_R=stages.stage_b.R,
1122+
stage_h_V=stages.stage_h.V,
1123+
stage_h_B1=stages.stage_h.B1,
1124+
stage_h_B2=stages.stage_h.B2,
1125+
stage_h_R=stages.stage_h.R,
1126+
n=int(d.shape[0]),
10831127
kernel=kernel,
1084-
weights=None, # already validated
1085-
return_diagnostics=True,
1128+
boundary=float(boundary),
10861129
)
10871130
h_val = float(bw_diag.h_mse)
10881131
b_val = h_val # rho=1 default to match nprobust
@@ -1125,14 +1168,21 @@ def bias_corrected_local_linear(
11251168
bwcheck=21,
11261169
)
11271170

1128-
# --- Bias-corrected CI (lprobust summary.lprobust:420-421) ---
1129-
# z_{1 - alpha/2}; Python uses scipy.stats.norm.ppf. For parity with R
1130-
# on the golden tests, the golden JSON stores R's qnorm value.
1131-
from scipy.stats import norm as _norm
1132-
1133-
z = float(_norm.ppf(1.0 - alpha / 2.0))
1134-
ci_low = result.tau_bc - z * result.se_rb
1135-
ci_high = result.tau_bc + z * result.se_rb
1171+
# --- Bias-corrected CI via safe_inference (NaN-safe gate) ---
1172+
# When se_robust is zero, negative, or non-finite (e.g., exact-fit
1173+
# cases where the residual vector collapses), ALL inference fields —
1174+
# including the CI — must return NaN. This enforces the repo-wide
1175+
# inference contract (CLAUDE.md Key Design Pattern #6; CI review
1176+
# PR #340 P0) rather than returning a misleading zero-width or infinite
1177+
# CI. safe_inference also handles the R z = qnorm(1 - alpha/2) critical
1178+
# value via scipy.stats.norm.ppf (the golden JSON stores R's z so
1179+
# parity tests consume R's value directly and drift is pure
1180+
# tau.bc + z * se.rb arithmetic).
1181+
from diff_diff.utils import safe_inference
1182+
1183+
_, _, (ci_low, ci_high) = safe_inference(
1184+
result.tau_bc, result.se_rb, alpha=float(alpha)
1185+
)
11361186

11371187
return BiasCorrectedFit(
11381188
estimate_classical=result.tau_cl,

tests/test_bias_corrected_lprobust.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,114 @@ def test_auto_bandwidth_covers_truth_at_95(self):
387387
assert fit.ci_low <= beta0_true <= fit.ci_high
388388

389389

390+
# =============================================================================
391+
# NaN-safe CI (CI review PR #340 P0)
392+
# =============================================================================
393+
394+
395+
class TestNaNSafeCI:
396+
"""``bias_corrected_local_linear`` must route the CI through
397+
``safe_inference`` so degenerate cases with ``se_robust <= 0`` or
398+
non-finite ``se_robust`` surface as ``ci_low = ci_high = NaN`` rather
399+
than a misleading zero-width or infinite CI."""
400+
401+
def test_constant_y_returns_nan_ci(self):
402+
"""Constant y makes residuals zero; se_robust collapses to 0. CI
403+
must be (NaN, NaN), not a finite zero-width CI."""
404+
d = np.linspace(0.0, 1.0, 200)
405+
y = np.full_like(d, 1.5) # zero residuals everywhere
406+
fit = bias_corrected_local_linear(d, y, h=0.3, b=0.3)
407+
assert fit.se_robust == 0.0 or not np.isfinite(fit.se_robust)
408+
assert np.isnan(fit.ci_low)
409+
assert np.isnan(fit.ci_high)
410+
411+
def test_near_zero_se_returns_nan_ci(self):
412+
"""Near-constant y produces a tiny se_robust; the NaN-safe gate
413+
fires when it hits zero exactly (covers the exact-fit edge case
414+
the CI review flagged without tripping a pre-existing Phase 1b
415+
ZeroDivisionError in the auto-bandwidth selector on truly
416+
constant y, which is tracked separately)."""
417+
rng = np.random.default_rng(0)
418+
d = np.linspace(0.0, 1.0, 500)
419+
# Residuals near machine epsilon; tau_bc stays finite.
420+
y = 0.1 * np.ones_like(d) + rng.normal(0, 1e-300, size=d.shape)
421+
fit = bias_corrected_local_linear(d, y, h=0.3, b=0.3)
422+
# Either the inference should be fully valid, OR the CI gate has
423+
# correctly fired. The contract is: se_rb <= 0 / non-finite =>
424+
# NaN CI.
425+
if not (np.isfinite(fit.se_robust) and fit.se_robust > 0):
426+
assert np.isnan(fit.ci_low)
427+
assert np.isnan(fit.ci_high)
428+
429+
430+
# =============================================================================
431+
# Auto-bandwidth parameter forwarding (CI review PR #340 P1)
432+
# =============================================================================
433+
434+
435+
class TestAutoBandwidthForwardsParameters:
436+
"""Auto-bandwidth must forward ``cluster``, ``vce``, and ``nnmatch`` to
437+
the bandwidth selector. Calling the public ``mse_optimal_bandwidth``
438+
wrapper would hard-code ``cluster=None, vce="nn", nnmatch=3`` and
439+
silently mismatch the downstream ``lprobust`` fit — a methodology
440+
bug. These tests pin the correct wiring."""
441+
442+
def _smoke_data(self, seed=33):
443+
rng = np.random.default_rng(seed)
444+
G = 600
445+
d = rng.uniform(0.0, 1.0, G)
446+
y = d + d ** 2 + rng.normal(0, 0.3, G)
447+
return d, y
448+
449+
def test_auto_cluster_returns_finite(self):
450+
"""Auto-bandwidth with cluster produces a finite BiasCorrectedFit.
451+
452+
No R parity anchor (nprobust's internal lpbwselect has a
453+
singleton-cluster bug on the pilot fits); this test pins that the
454+
Python path completes and uses the clustered bandwidth downstream,
455+
not the unclustered one.
456+
"""
457+
d, y = self._smoke_data()
458+
cluster = np.repeat(np.arange(30), 20)
459+
fit_cluster = bias_corrected_local_linear(d, y, cluster=cluster)
460+
fit_uncluster = bias_corrected_local_linear(d, y)
461+
assert fit_cluster.bandwidth_source == "auto"
462+
assert np.isfinite(fit_cluster.estimate_bias_corrected)
463+
assert np.isfinite(fit_cluster.se_robust)
464+
# The clustered bandwidth should differ from the unclustered one
465+
# (different residual meat feeds into Stage-2/3 AMSE minimization).
466+
# If the wrapper were silently passing cluster=None, these would
467+
# be identical to bit-parity.
468+
assert fit_cluster.h != fit_uncluster.h
469+
470+
def test_auto_vce_hc1_returns_finite(self):
471+
"""Auto-bandwidth with non-default vce must use the requested vce
472+
during bandwidth selection, not silently fall back to nn."""
473+
d, y = self._smoke_data()
474+
fit_hc1 = bias_corrected_local_linear(d, y, vce="hc1")
475+
fit_nn = bias_corrected_local_linear(d, y, vce="nn")
476+
assert fit_hc1.bandwidth_source == "auto"
477+
assert np.isfinite(fit_hc1.estimate_bias_corrected)
478+
assert np.isfinite(fit_hc1.se_robust)
479+
# Different residual definitions yield different stage-2/3 AMSE
480+
# and therefore different bandwidths. Bit-identity would indicate
481+
# the selector silently ignored vce.
482+
assert fit_hc1.h != fit_nn.h
483+
484+
def test_auto_nnmatch_non_default_returns_finite(self):
485+
"""Auto-bandwidth with non-default nnmatch must forward it to the
486+
selector, not silently use the hard-coded default of 3."""
487+
d, y = self._smoke_data()
488+
fit_nn5 = bias_corrected_local_linear(d, y, nnmatch=5)
489+
fit_nn3 = bias_corrected_local_linear(d, y, nnmatch=3)
490+
assert fit_nn5.bandwidth_source == "auto"
491+
assert np.isfinite(fit_nn5.estimate_bias_corrected)
492+
# nnmatch controls the NN residual construction; different values
493+
# give different meat matrices and therefore different stage
494+
# bandwidths.
495+
assert fit_nn5.h != fit_nn3.h
496+
497+
390498
# =============================================================================
391499
# Validator idempotence (regression gate for the Phase 1b extraction)
392500
# =============================================================================

0 commit comments

Comments
 (0)