Address CI AI review round 6: Design 1' support check + empty guard

igerber · claude · igerber · commit a313e05b17d9 · 2026-04-19T15:17:10.000-04:00
P1 #1: boundary=0 now enforces a Design 1' support plausibility heuristic: d.min() <= 5% * median(|d|). Samples with d.min() substantially positive (e.g. U(0.5, 1)) are rejected with ValueError directing the caller to boundary=float(d.min()). Threshold chosen at 5% (not REGISTRY's 1%) so the paper's thin-boundary-density DGPs (Beta(2,2), d.min/median ~ 3%) still pass. Reordered so the mass-point check (NotImplementedError, paper Section 3.2.4) fires before the support-check -- mass-point data should be redirected to 2SLS regardless of the boundary the caller picked. P1 #2: Empty-input front-door guard. d.size == 0 raises ValueError with a targeted "must be non-empty" message instead of leaking the NumPy reduction error from d.min(). P3 (docstring sync): _nprobust_port module docstring no longer says weighted data can be handled by the public wrapper -- the wrapper explicitly raises NotImplementedError. Docstring now matches the actual contract. P3 (deferred, same as last round): tri/uni/shifted-boundary golden parity extension. REGISTRY.md Phase 1b note expanded to document the full input contract (nonnegativity, boundary applicability, Design 1' support heuristic, mass-point redirection) so the public API surface is fully specified in the methodology registry. 178 tests pass (up from 177). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/diff_diff/_nprobust_port.py b/diff_diff/_nprobust_port.py
@@ -31,9 +31,12 @@
 
 Deviations from nprobust (documented):
 
-* ``weights=`` is not supported here (nprobust's ``lpbwselect`` has no
-  weight argument). Weighted data can be handled by the public wrapper
-  or via the user's own design matrix.
+* ``weights=`` is not supported here or in the public wrapper
+  (nprobust's ``lpbwselect`` has no weight argument, so Phase 1b has
+  no parity anchor). Weighted-data support is queued for Phase 2+
+  (survey-design adaptation). The public wrapper
+  ``mse_optimal_bandwidth`` raises ``NotImplementedError`` when a
+  ``weights`` array is passed.
 * ``vce="nn"`` is the default and is fully ported. ``vce in
   {"hc0", "hc1", "hc2", "hc3"}`` is implemented in ``lprobust_res`` /
   ``lprobust_vce`` but has not been separately golden-tested; use at
diff --git a/diff_diff/local_linear.py b/diff_diff/local_linear.py
@@ -615,6 +615,11 @@ def mse_optimal_bandwidth(
     y = np.asarray(y, dtype=np.float64).ravel()
     if d.shape != y.shape:
         raise ValueError(f"d and y must have the same shape; got {d.shape} and {y.shape}")
+    if d.size == 0:
+        raise ValueError(
+            "d and y must be non-empty; the selector cannot estimate a "
+            "bandwidth from zero observations."
+        )
     if not np.all(np.isfinite(d)):
         raise ValueError("d contains non-finite values (NaN or Inf)")
     if not np.all(np.isfinite(y)):
@@ -668,13 +673,15 @@ def mse_optimal_bandwidth(
         )
 
     # Mass-point design check (paper Section 3.2.4, REGISTRY 2% rule).
-    # When d_min > 0 and there is bunching at d_min, Design 1 requires
-    # the 2SLS sample-average path (Phase 2), not the CCF nonparametric
-    # selector. The check applies independently of the boundary the
-    # user supplied: mass-point data is never appropriate for this
-    # wrapper. The check explicitly excludes d_min ~ 0, which is the
-    # Design 1' "untreated units present" subcase that the paper's
-    # simulations and the Garrett et al. (2020) application accept.
+    # Must fire BEFORE the Design 1' support check: mass-point data is
+    # never appropriate for the CCF nonparametric selector regardless
+    # of the boundary the caller supplied. The correct remediation is
+    # the 2SLS sample-average path (Phase 2), not a boundary
+    # reclassification.
+    #
+    # The check explicitly excludes d_min ~ 0 (the Design 1'
+    # "untreated units present" subcase that the paper's simulations
+    # and the Garrett et al. (2020) application accept).
     _MASS_POINT_THRESHOLD = 0.02  # REGISTRY rule: > 2% modal-min
     if d_min > _boundary_tol:
         eps_eq = 1e-12 * max(1.0, abs(d_min))
@@ -695,6 +702,37 @@ def mse_optimal_bandwidth(
                 f"applicable."
             )
 
+    # Design 1' support check: boundary ~ 0 requires the realized
+    # sample minimum to be compatible with a population support
+    # infimum at 0. Otherwise the selector calibrates ``h_mse`` at
+    # an off-support limit.
+    #
+    # Rule: when boundary ~ 0 (not also at d.min()), require
+    # d.min() <= 5% * median(|d|). The 5% threshold is generous
+    # enough to accept Design 1' samples with vanishing boundary
+    # density (Beta(2,2): d.min/median ~ 3%) while rejecting samples
+    # substantially off-support (U(0.5, 1): d.min/median ~ 1.0).
+    # Samples just between these (e.g. U(0.05, 1), d.min/median ~ 10%)
+    # are directed to boundary=float(d.min()) for the continuous-
+    # near-d_lower path.
+    _DESIGN_1_PRIME_RATIO = 0.05
+    if _at_zero and not _at_d_min:
+        d_median_abs = float(np.median(np.abs(d)))
+        effective_threshold = _DESIGN_1_PRIME_RATIO * max(d_median_abs, 1e-12)
+        if d_min > effective_threshold:
+            raise ValueError(
+                f"boundary ~ 0 selected but d.min()={d_min!r} is not "
+                f"compatible with a Design 1' support infimum at 0 "
+                f"(rule: d.min() <= "
+                f"{_DESIGN_1_PRIME_RATIO} * median(|d|) = "
+                f"{effective_threshold!r}). This sample is not "
+                f"Design 1'. Either: (a) pass boundary=float(d.min()) "
+                f"for the Design 1 continuous-near-d_lower path, or "
+                f"(b) verify the population support actually has "
+                f"infimum at 0 (in which case the realized d.min() "
+                f"would be closer to zero relative to the data scale)."
+            )
+
     # Defer heavy import to call time to avoid import-cycle risk.
     from diff_diff._nprobust_port import lpbwselect_mse_dpi
 
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -2302,7 +2302,7 @@ Shipped as `did_had_pretest_workflow()` and surfaced via `practitioner_next_step
 - [x] Phase 1a: `vcov_type` enum threaded through `DifferenceInDifferences` (`MultiPeriodDiD`, `TwoWayFixedEffects` inherit); `robust=True` <=> `vcov_type="hc1"`, `robust=False` <=> `vcov_type="classical"`. Conflict detection at `__init__`. Results summary prints the variance-family label.
     - **Note (deviation from the fully-symmetric enum):** `MultiPeriodDiD(cluster=..., vcov_type="hc2_bm")` is intentionally **not supported** and raises `NotImplementedError`. The scalar-coefficient `DifferenceInDifferences` path handles the cluster + CR2 Bell-McCaffrey combination (`_compute_cr2_bm` returns a per-coefficient Satterthwaite DOF that is valid for the single-ATT contrast), but `MultiPeriodDiD` also reports a post-period-average ATT constructed as a *contrast* of the event-study coefficients. The cluster-aware CR2 BM DOF for that contrast (i.e., the Pustejovsky-Tipton 2018 per-cluster adjustment matrices applied to an arbitrary aggregation contrast) is not yet implemented. Pairing CR2 cluster-robust SEs with the one-way Imbens-Kolesar (2016) contrast DOF would be a broken hybrid, so the combination fails fast with a clear workaround message (drop the cluster for one-way HC2+BM, or use `vcov_type="hc1"` with cluster for CR1 Liang-Zeger). Tracked in `TODO.md` under Methodology/Correctness. Applies only to `MultiPeriodDiD`; `DifferenceInDifferences(cluster=..., vcov_type="hc2_bm")` works.
 - [x] Phase 1a: `clubSandwich::vcovCR(..., type="CR2")` parity harness committed: R script at `benchmarks/R/generate_clubsandwich_golden.R` plus a regression-anchor JSON at `benchmarks/data/clubsandwich_cr2_golden.json`. **Note:** the committed JSON currently has `"source": "python_self_reference"` and pins numerical stability only; authoritative R-produced values are generated by running the R script, which the TODO.md row under Methodology/Correctness tracks. The parity test at `tests/test_linalg_hc2_bm.py::TestCR2BMCluster::test_cr2_parity_with_golden` runs at 1e-6 tolerance (Phase 1a plan commits 6-digit parity once R regen completes).
-- [x] Phase 1b: Calonico-Cattaneo-Farrell (2018) MSE-optimal bandwidth selector. In-house port of `nprobust::lpbwselect(bwselect="mse-dpi")` (nprobust 0.5.0, SHA `36e4e53`) as `diff_diff.mse_optimal_bandwidth` and `BandwidthResult`, backed by the private `diff_diff._nprobust_port` module (`kernel_W`, `lprobust_bw`, `lpbwselect_mse_dpi`). Three-stage DPI with four `lprobust.bw` calls at orders `q+1`, `q+2`, `q`, `p`. Parity verified at `0.0000%` on all five stage bandwidths (`c_bw`, `bw_mp2`, `bw_mp3`, `b_mse`, `h_mse`) across three deterministic DGPs (uniform, Beta(2,2), half-normal) via `benchmarks/R/generate_nprobust_golden.R` → `benchmarks/data/nprobust_mse_dpi_golden.json`. **Note:** `weights=` is currently unsupported (raises `NotImplementedError`); nprobust's `lpbwselect` has no weight argument so there is no parity anchor. Weighted-data support deferred to Phase 2 (survey-design adaptation). **Note (public API scope restriction):** the exported wrapper `mse_optimal_bandwidth` hard-codes the HAD Phase 1b configuration (`p=1`, `deriv=0`, `interior=False`, `vce="nn"`, `nnmatch=3`). The underlying port supports a broader surface (`hc0`/`hc1`/`hc2`/`hc3` variance, interior evaluation, higher `p`), but those paths are not parity-tested against `nprobust` and are deferred. Callers needing the broader surface should use `diff_diff._nprobust_port.lpbwselect_mse_dpi` directly and accept that parity has not been verified on non-HAD configurations.
+- [x] Phase 1b: Calonico-Cattaneo-Farrell (2018) MSE-optimal bandwidth selector. In-house port of `nprobust::lpbwselect(bwselect="mse-dpi")` (nprobust 0.5.0, SHA `36e4e53`) as `diff_diff.mse_optimal_bandwidth` and `BandwidthResult`, backed by the private `diff_diff._nprobust_port` module (`kernel_W`, `lprobust_bw`, `lpbwselect_mse_dpi`). Three-stage DPI with four `lprobust.bw` calls at orders `q+1`, `q+2`, `q`, `p`. Parity verified at `0.0000%` on all five stage bandwidths (`c_bw`, `bw_mp2`, `bw_mp3`, `b_mse`, `h_mse`) across three deterministic DGPs (uniform, Beta(2,2), half-normal) via `benchmarks/R/generate_nprobust_golden.R` → `benchmarks/data/nprobust_mse_dpi_golden.json`. **Note:** `weights=` is currently unsupported (raises `NotImplementedError`); nprobust's `lpbwselect` has no weight argument so there is no parity anchor. Weighted-data support deferred to Phase 2 (survey-design adaptation). **Note (public API scope restriction):** the exported wrapper `mse_optimal_bandwidth` hard-codes the HAD Phase 1b configuration (`p=1`, `deriv=0`, `interior=False`, `vce="nn"`, `nnmatch=3`). The underlying port supports a broader surface (`hc0`/`hc1`/`hc2`/`hc3` variance, interior evaluation, higher `p`), but those paths are not parity-tested against `nprobust` and are deferred. Callers needing the broader surface should use `diff_diff._nprobust_port.lpbwselect_mse_dpi` directly and accept that parity has not been verified on non-HAD configurations. **Note (input contract):** the wrapper enforces HAD's support restriction `D_{g,2} >= 0` (front-door `ValueError` on negative doses and empty inputs). `boundary` must equal `0` (Design 1') or `float(d.min())` (Design 1 continuous-near-d_lower) within float tolerance; off-support values raise `ValueError`. When `boundary ~ 0`, the wrapper additionally requires `d.min() <= 0.05 * median(|d|)` as a Design 1' support plausibility heuristic, chosen to pass the paper's thin-boundary-density DGPs (Beta(2,2), d.min/median ~ 3%) while rejecting substantially off-support samples (U(0.5, 1.0), d.min/median ~ 1.0). Detected mass-point designs (`d.min() > 0` with modal fraction at `d.min() > 2%`) raise `NotImplementedError` pointing to the Phase 2 2SLS path per paper Section 3.2.4.
 - [ ] Phase 1c: First-order bias estimator `M̂_{ĥ*_G}` and robust variance `V̂_{ĥ*_G}`.
 - [ ] Phase 1c: Bias-corrected CI (Equation 8) with `nprobust` parity.
 - [ ] Phase 2: `HeterogeneousAdoptionDiD` class with separate code paths for Design 1', Design 1 mass-point, and Design 1 continuous-near-`d̲`.
diff --git a/tests/test_bandwidth_selector.py b/tests/test_bandwidth_selector.py
@@ -332,18 +332,25 @@ def test_boundary_zero_thin_boundary_density_accepted(self):
         assert np.isfinite(h)
         assert h > 0.0
 
-    def test_boundary_zero_with_data_far_from_zero_fails_gracefully(self):
-        """boundary=0 passes the boundary-validation and mass-point
-        checks but then hits the per-stage count guard deeper in the
-        selector because the kernel window is empty (d ~ U(0.5, 1.0)
-        has no data near 0). Must surface a clear ValueError, not an
-        opaque failure."""
+    def test_boundary_zero_with_data_far_from_zero_rejected(self):
+        """boundary=0 with d.min() substantially positive fails the
+        Design 1' support check (d.min() > 1% of median(|d|)). The
+        caller must either pass boundary=float(d.min()) for the
+        continuous-near-d_lower path or confirm Design 1' applicability."""
         rng = np.random.default_rng(2026)
-        d = rng.uniform(0.5, 1.0, size=1500)  # d.min() ~ 0.5, no mass
+        d = rng.uniform(0.5, 1.0, size=1500)  # d.min() ~ 0.5 >> 1% of median
         y = d + rng.normal(0, 0.3, size=1500)
-        with pytest.raises(ValueError, match="lprobust_bw"):
+        with pytest.raises(ValueError, match="Design 1'"):
             mse_optimal_bandwidth(d, y, boundary=0.0)
 
+    def test_empty_input_rejected(self):
+        """Empty d/y must raise a targeted ValueError up front, not
+        leak a NumPy reduction error from d.min()."""
+        d = np.array([], dtype=np.float64)
+        y = np.array([], dtype=np.float64)
+        with pytest.raises(ValueError, match="non-empty"):
+            mse_optimal_bandwidth(d, y)
+
     def test_boundary_zero_with_d_min_mass_point_rejected(self):
         """boundary=0 with d.min() > 0 AND mass at d.min() is a
         Design 1 mass-point design and must be redirected to 2SLS."""