Address CI AI review round 2: mass-point guard, rank check, R parity

igerber · claude · igerber · commit 03532fc0b22e · 2026-04-19T13:53:39.000-04:00
P1 #1 (methodology): mse_optimal_bandwidth now rejects Design 1 mass-point designs. When boundary > 0 and the modal fraction at d.min() exceeds the REGISTRY-specified 2% threshold, raise NotImplementedError pointing to the 2SLS sample-average estimator per de Chaisemartin et al. (2026) Section 3.2.4. Design 1' with untreated units at d=0 (boundary=0) is still accepted per Garrett et al. (2020) application precedent. P1 #2 (code quality): qrXXinv now catches np.linalg.LinAlgError from Cholesky and re-raises as ValueError with a targeted message naming the failing dimension and suggesting remediation. Duplicate-support windows or other rank-deficient designs now fail with a clear error instead of leaking LinAlgError out of the port. P3 (tests): Added TestStageDiagnosticsParity::test_R_parity covering all four stages. Previously only V/B1/B2 were pinned; R (BWreg) was only trivially checked for stage_d1 (scale=0 -> R=0). Now stage_b and stage_h R values are explicitly parity-tested at 1% against R nprobust. New behavioral tests: - test_mass_point_design_rejected: 10% mass at 0.1 -> NotImplementedError - test_continuous_near_d_lower_accepted: uniform(0.1, 1.0) passes - test_untreated_at_zero_accepted: 15% at d=0 with boundary=0 passes - test_rank_deficient_design_raises_valueerror: rank-1 X -> ValueError - R parity on all four stages across 3 DGPs (12 new parametrized cases) 169 tests pass (up from 153). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/diff_diff/_nprobust_port.py b/diff_diff/_nprobust_port.py
@@ -122,11 +122,29 @@ def qrXXinv(x: np.ndarray) -> np.ndarray:
     -------
     np.ndarray, shape (k, k)
         Inverse of ``x.T @ x``.
+
+    Raises
+    ------
+    ValueError
+        If ``x.T @ x`` is rank-deficient (Cholesky fails). Converts
+        the raw ``np.linalg.LinAlgError`` into a targeted message so
+        callers (``lprobust_bw``) can surface a clear failure reason
+        instead of an opaque linear-algebra error.
     """
     xtx = x.T @ x
-    # Cholesky solve for the inverse. Matches R's chol2inv(chol(.)).
-    L = np.linalg.cholesky(xtx)
     k = xtx.shape[0]
+    # Cholesky solve for the inverse. Matches R's chol2inv(chol(.)).
+    try:
+        L = np.linalg.cholesky(xtx)
+    except np.linalg.LinAlgError as exc:
+        raise ValueError(
+            f"qrXXinv: Cholesky decomposition of X'X ({k}x{k}) failed. "
+            f"The weighted design matrix is rank-deficient, likely "
+            f"because the in-window support has fewer than {k} distinct "
+            f"points. Increase sample size, widen the bandwidth, or pick "
+            f"a boundary with more distinct values nearby. "
+            f"(LinAlgError: {exc})"
+        ) from exc
     Linv = np.linalg.solve(L, np.eye(k))
     return Linv.T @ Linv
 
diff --git a/diff_diff/local_linear.py b/diff_diff/local_linear.py
@@ -633,6 +633,36 @@ def mse_optimal_bandwidth(
             f"separately parity-tested against nprobust."
         )
 
+    # Mass-point design check (REGISTRY.md plans `>2%` modal-min rule).
+    # If d_lower > 0 and the distribution bunches at d_lower, the paper
+    # (de Chaisemartin et al. 2026 Section 3.2.4) prescribes the 2SLS
+    # sample-average path, NOT the nonparametric CCF local-polynomial
+    # path. Detect bunching and redirect the caller.
+    #
+    # We only flag when boundary > 0 (Design 1 continuous-near-d_lower
+    # vs Design 1 mass-point). For boundary = 0 (Design 1' or "untreated
+    # units present" subcase), the paper accepts nonparametric even with
+    # mass at 0 (Garrett et al. 2020 application with 12/2954 at 0).
+    if boundary > _boundary_tol:
+        eps_eq = 1e-12 * max(1.0, abs(d_min))
+        at_boundary_mask = np.abs(d - d_min) <= eps_eq
+        modal_fraction = float(np.mean(at_boundary_mask))
+        _MASS_POINT_THRESHOLD = 0.02  # REGISTRY rule: > 2% modal-min
+        if modal_fraction > _MASS_POINT_THRESHOLD:
+            raise NotImplementedError(
+                f"Detected mass-point design: the lower boundary "
+                f"d_lower={d_min!r} has modal fraction "
+                f"{modal_fraction:.4f} > {_MASS_POINT_THRESHOLD:.2f}. "
+                f"Per de Chaisemartin et al. (2026) Section 3.2.4 and "
+                f"the methodology registry, this case requires the 2SLS "
+                f"sample-average estimator with instrument 1{{D_2 > "
+                f"d_lower}}, not the nonparametric CCF local-polynomial "
+                f"bandwidth selector. That estimator is queued for "
+                f"Phase 2 (HeterogeneousAdoptionDiD). For continuous "
+                f"near-d_lower designs (modal fraction <= "
+                f"{_MASS_POINT_THRESHOLD:.2f}), this wrapper is applicable."
+            )
+
     # Defer heavy import to call time to avoid import-cycle risk.
     from diff_diff._nprobust_port import lpbwselect_mse_dpi
 
diff --git a/tests/test_bandwidth_selector.py b/tests/test_bandwidth_selector.py
@@ -116,6 +116,25 @@ def test_B2_parity(self, dgp_case, stage):
         else:
             assert actual == pytest.approx(expected, rel=_PARITY_TOL), f"{name} {stage}"
 
+    @pytest.mark.parametrize(
+        "stage",
+        ["stage_d1", "stage_d2", "stage_b", "stage_h"],
+    )
+    def test_R_parity(self, dgp_case, stage):
+        """R (BWreg) parity. stage_d1 / stage_d2 use scale=0 so R=0;
+        stage_b / stage_h use scale=bwregul=1 so R is non-trivial and
+        must match nprobust."""
+        name, d, y, g = dgp_case
+        br = mse_optimal_bandwidth(d, y, return_diagnostics=True)
+        actual = getattr(br, f"{stage}_R")
+        expected = g[stage]["R"]
+        if expected == 0:
+            assert actual == pytest.approx(0, abs=1e-10), f"{name} {stage}"
+        else:
+            assert actual == pytest.approx(expected, rel=_PARITY_TOL), (
+                f"{name} {stage}: py={actual!r} R={expected!r}"
+            )
+
 
 # =============================================================================
 # Behavioral tests
@@ -303,6 +322,53 @@ def test_boundary_below_min_d_accepted(self):
         assert np.isfinite(h)
         assert h > 0.0
 
+    def test_mass_point_design_rejected(self):
+        """Design 1 mass-point case (boundary > 0, modal fraction > 2%)
+        must be rejected with NotImplementedError pointing to 2SLS."""
+        rng = np.random.default_rng(2026)
+        n_mass = 200  # 10% mass at d_lower
+        n_cont = 1800
+        d_mass = np.full(n_mass, 0.1)
+        d_cont = rng.uniform(0.1, 1.0, size=n_cont)
+        d = np.concatenate([d_mass, d_cont])
+        y = d + rng.normal(0, 0.5, size=d.size)
+        with pytest.raises(NotImplementedError, match="mass-point"):
+            mse_optimal_bandwidth(d, y, boundary=float(d.min()))
+
+    def test_continuous_near_d_lower_accepted(self):
+        """Design 1 continuous-near-d_lower (boundary > 0, modal
+        fraction <= 2%) must pass through to nonparametric."""
+        rng = np.random.default_rng(20260419)
+        d = rng.uniform(0.1, 1.0, size=1500)  # no mass point
+        y = d + rng.normal(0, 0.3, size=1500)
+        h = mse_optimal_bandwidth(d, y, boundary=float(d.min()))
+        assert np.isfinite(h)
+        assert h > 0.0
+
+    def test_untreated_at_zero_accepted(self):
+        """Paper Section 3.1.5 / Garrett et al. application: untreated
+        units at d=0 are OK for Design 1'. boundary=0 with mass at 0
+        must NOT trigger the mass-point rejection."""
+        rng = np.random.default_rng(2026)
+        # ~15% at d=0 (genuinely untreated), rest continuous on (0, 1).
+        d_zero = np.zeros(300)
+        d_pos = rng.uniform(0.01, 1.0, size=1700)
+        d = np.concatenate([d_zero, d_pos])
+        y = d + rng.normal(0, 0.5, size=d.size)
+        h = mse_optimal_bandwidth(d, y, boundary=0.0)
+        assert np.isfinite(h)
+        assert h > 0.0
+
+    def test_rank_deficient_design_raises_valueerror(self):
+        """Duplicate-support windows must fail with a clear ValueError
+        from qrXXinv's Cholesky guard, not an opaque LinAlgError."""
+        from diff_diff._nprobust_port import qrXXinv
+
+        # Rank-1 X: all rows identical -> X.T @ X is rank-1.
+        X = np.tile([[1.0, 2.0, 3.0]], (10, 1))
+        with pytest.raises(ValueError, match="qrXXinv"):
+            qrXXinv(X)
+
 
 class TestKernelDispatch:
     """Different kernels produce different bandwidths."""