Surface silent np.linalg.solve fallbacks across axis-A minor solver paths

igerber · claude · igerber · commit 573de52a76e2 · 2026-04-19T11:31:07.000-04:00
Addresses findings #17, #18, #19 from the Phase 2 silent-failures audit (axis A, all Minor). Each site previously ran np.linalg.solve against a matrix that could be rank-deficient or near-singular with no user-facing signal. - StaggeredTripleDifference: `_compute_did_panel` now appends a condition-number sample to an instance tracker on LinAlgError; `fit()` emits ONE aggregate UserWarning listing affected (g, g_c, t) cells and the max condition number instead of silently falling back to np.linalg.lstsq per pair. Tracker resets on repeat fit. - EfficientDiD covariate sieve (estimate_propensity_ratio_sieve, estimate_inverse_propensity_sieve): precondition-check the normal-equations matrix via np.linalg.cond before solve and reject K values above 1/sqrt(eps); partial-K skips now surface via UserWarning listing the skipped K values, instead of being swallowed by `continue`. - compute_survey_vcov: check cond(X'WX) before the sandwich solve; emit UserWarning above the 1/sqrt(eps) threshold so ill-conditioned bread matrices don't silently produce unstable variance estimates. Sibling sites picked up via repo-wide lstsq-fallback pattern grep (per the pattern-check feedback memory): - two_stage.py:1768 (TSL variance bread) - two_stage_bootstrap.py:197 (multiplier bootstrap bread) Both now warn before the silent lstsq fallback. Adds 8 targeted tests across test_staggered_triple_diff.py, test_efficient_did.py, and test_survey.py, covering collinear/ill-conditioned triggers and happy-path negatives. REGISTRY.md notes added for each affected estimator section. No behavioral change on well-conditioned inputs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
@@ -227,6 +227,10 @@ def estimate_propensity_ratio_sieve(
 
     best_ic = np.inf
     best_ratio = np.ones(n_units)  # fallback: constant ratio 1
+    singular_K: List[int] = []  # K values skipped due to rank deficiency (#18)
+    # Near-singular matrices solve without raising LinAlgError but return
+    # numerically meaningless beta. Rule-of-thumb threshold: 1/sqrt(eps).
+    cond_threshold = 1.0 / np.sqrt(np.finfo(float).eps)
 
     for K in range(1, k_max + 1):
         n_basis = comb(K + d, d)
@@ -249,13 +253,23 @@ def estimate_propensity_ratio_sieve(
             A = Psi_gp.T @ Psi_gp
             b = Psi_g.sum(axis=0)
 
+        # Precondition check (#18, axis A): reject near-singular A explicitly
+        # so np.linalg.solve can't silently return garbage coefficients.
+        with np.errstate(invalid="ignore", over="ignore"):
+            A_cond = float(np.linalg.cond(A))
+        if not np.isfinite(A_cond) or A_cond > cond_threshold:
+            singular_K.append(K)
+            continue
+
         try:
             beta = np.linalg.solve(A, b)
         except np.linalg.LinAlgError:
+            singular_K.append(K)
             continue  # singular — try next K
 
         # Check for NaN/Inf in solution
         if not np.all(np.isfinite(beta)):
+            singular_K.append(K)
             continue
 
         # Predicted ratio for all units
@@ -282,6 +296,18 @@ def estimate_propensity_ratio_sieve(
             UserWarning,
             stacklevel=2,
         )
+    elif singular_K:
+        # Finding #18 (axis A): partial K-failure was previously silent.
+        # Surface it so users see that the selected basis order was
+        # forced by rank deficiency at higher K rather than by the IC.
+        warnings.warn(
+            f"Propensity ratio sieve: skipped K={singular_K} due to "
+            f"rank-deficient or non-finite normal equations. "
+            f"Selected basis used the remaining K values; "
+            f"this may indicate limited variation in the covariates.",
+            UserWarning,
+            stacklevel=2,
+        )
 
     # Overlap diagnostics: warn if ratios require significant clipping
     n_extreme = int(np.sum((best_ratio < 1.0 / ratio_clip) | (best_ratio > ratio_clip)))
@@ -377,6 +403,8 @@ def estimate_inverse_propensity_sieve(
 
     best_ic = np.inf
     best_s = np.full(n_units, fallback_ratio)  # fallback: unconditional
+    singular_K: List[int] = []  # K values skipped due to rank deficiency (#18)
+    cond_threshold = 1.0 / np.sqrt(np.finfo(float).eps)
 
     for K in range(1, k_max + 1):
         n_basis = comb(K + d, d)
@@ -397,11 +425,20 @@ def estimate_inverse_propensity_sieve(
             # RHS: sum of basis over ALL units (not just one group)
             b = basis_all.sum(axis=0)
 
+        # Precondition check (#18, axis A): see ratio-sieve comment above.
+        with np.errstate(invalid="ignore", over="ignore"):
+            A_cond = float(np.linalg.cond(A))
+        if not np.isfinite(A_cond) or A_cond > cond_threshold:
+            singular_K.append(K)
+            continue
+
         try:
             beta = np.linalg.solve(A, b)
         except np.linalg.LinAlgError:
+            singular_K.append(K)
             continue
         if not np.all(np.isfinite(beta)):
+            singular_K.append(K)
             continue
 
         s_hat = basis_all @ beta
@@ -423,6 +460,16 @@ def estimate_inverse_propensity_sieve(
             UserWarning,
             stacklevel=2,
         )
+    elif singular_K:
+        # Finding #18 (axis A): partial K-failure was previously silent.
+        warnings.warn(
+            f"Inverse propensity sieve: skipped K={singular_K} due to "
+            f"rank-deficient or non-finite normal equations. "
+            f"Selected basis used the remaining K values; "
+            f"this may indicate limited variation in the covariates.",
+            UserWarning,
+            stacklevel=2,
+        )
 
     # Overlap diagnostics: warn if s_hat values require clipping
     n_clipped = int(np.sum((best_s < 1.0) | (best_s > float(n_units))))
diff --git a/diff_diff/staggered_triple_diff.py b/diff_diff/staggered_triple_diff.py
@@ -348,6 +348,11 @@ def fit(
             {} if (covariates and self.estimation_method in ("ipw", "dr")) else None
         )
 
+        # Tracker for rank-deficient OR-IF solves across all (g, g_c, t) cells.
+        # _compute_did_panel appends one condition-number sample per LinAlgError
+        # so we emit ONE aggregate warning below rather than fanning out.
+        self._lstsq_fallback_tracker: List[float] = []
+
         for g in treatment_groups:
             # In universal mode, skip the reference period (t == g-1-anticipation)
             # so it's omitted from GT estimation. The event-study mixin injects
@@ -507,6 +512,26 @@ def fit(
                 comparison_group_counts[(g, t)] = len(gc_labels)
                 gmm_weights_store[(g, t)] = dict(zip(gc_labels, gmm_w.tolist()))
 
+        # Consolidated OR influence-function rank-deficiency warning.
+        # Finding #17 in the Phase 2 silent-failures audit: the per-pair OR
+        # solve at _compute_did_panel() previously fell back to lstsq with no
+        # signal, so near/fully singular X'WX in the covariate expansion went
+        # to the user as a normal result.
+        if self._lstsq_fallback_tracker:
+            n_cells = len(self._lstsq_fallback_tracker)
+            finite_conds = [c for c in self._lstsq_fallback_tracker if np.isfinite(c)]
+            max_cond = max(finite_conds) if finite_conds else float("inf")
+            warnings.warn(
+                f"Rank-deficient X'WX detected in the outcome-regression "
+                f"influence-function step for {n_cells} (g, g_c, t) pair(s); "
+                f"fell back to np.linalg.lstsq. "
+                f"Max condition number of affected X'WX: {max_cond:.2e}. "
+                f"Consider dropping collinear covariates or using "
+                f"estimation_method='ipw' to avoid the OR projection.",
+                UserWarning,
+                stacklevel=2,
+            )
+
         # Consolidated EPV summary warning
         if epv_diagnostics:
             low_epv = {k: v for k, v in epv_diagnostics.items() if v.get("is_low")}
@@ -1330,6 +1355,14 @@ def _compute_did_panel(
             try:
                 asy_linear_or = (np.linalg.solve(XpX, or_ex.T)).T
             except np.linalg.LinAlgError:
+                # Rank-deficient X'WX in the OR influence-function step. Record
+                # a condition-number sample so fit() can emit ONE aggregate
+                # warning across all (g, g_c, t) cells rather than fanning out.
+                tracker = getattr(self, "_lstsq_fallback_tracker", None)
+                if tracker is not None:
+                    with np.errstate(invalid="ignore", over="ignore"):
+                        cond = float(np.linalg.cond(XpX))
+                    tracker.append(cond)
                 asy_linear_or = (np.linalg.lstsq(XpX, or_ex.T, rcond=None)[0]).T
 
             inf_treat_or = -(asy_linear_or @ M1)
diff --git a/diff_diff/survey.py b/diff_diff/survey.py
@@ -1445,6 +1445,25 @@ def compute_survey_vcov(
             return np.zeros((k, k))
         return np.full((k, k), np.nan)
 
+    # Precondition check: near-singular X'WX lets np.linalg.solve return
+    # unstable values without raising (finding #19, axis A). Threshold of
+    # 1/sqrt(eps) ≈ 6.7e7 is the standard rule of thumb — above it, the
+    # sandwich bread becomes numerically unreliable and the caller should
+    # be told so.
+    with np.errstate(invalid="ignore", over="ignore"):
+        XtWX_cond = float(np.linalg.cond(XtWX))
+    cond_threshold = 1.0 / np.sqrt(np.finfo(float).eps)
+    if np.isfinite(XtWX_cond) and XtWX_cond > cond_threshold:
+        warnings.warn(
+            f"X'WX is ill-conditioned (cond={XtWX_cond:.2e}) in the "
+            f"survey sandwich variance; variance estimates may be "
+            f"numerically unstable. This typically indicates near "
+            f"multicollinearity or zero-weight strata dominating the "
+            f"bread matrix.",
+            UserWarning,
+            stacklevel=2,
+        )
+
     # Sandwich: (X'WX)^{-1} meat (X'WX)^{-1}
     try:
         temp = np.linalg.solve(XtWX, meat)
diff --git a/diff_diff/two_stage.py b/diff_diff/two_stage.py
@@ -1768,6 +1768,17 @@ def _compute_gmm_variance(
         try:
             bread = np.linalg.solve(XtWX_2, np.eye(k))
         except np.linalg.LinAlgError:
+            # Sibling of finding #17 (axis A) — silent lstsq fallback in the
+            # TSL-variance bread was previously silent. Surface it so a
+            # rank-deficient second-stage design doesn't quietly degrade SEs.
+            warnings.warn(
+                "Rank-deficient second-stage X'WX in TwoStageDiD TSL variance; "
+                "falling back to np.linalg.lstsq for the bread matrix. "
+                "Analytical SEs may be numerically unstable; consider dropping "
+                "collinear covariates.",
+                UserWarning,
+                stacklevel=2,
+            )
             bread = np.linalg.lstsq(XtWX_2, np.eye(k), rcond=None)[0]
 
         # 7. V = bread @ meat @ bread
diff --git a/diff_diff/two_stage_bootstrap.py b/diff_diff/two_stage_bootstrap.py
@@ -197,6 +197,17 @@ def _compute_cluster_S_scores(
         try:
             bread = np.linalg.solve(XtX_2, np.eye(k))
         except np.linalg.LinAlgError:
+            # Sibling of finding #17 (axis A) — silent lstsq fallback in the
+            # TwoStage bootstrap bread matrix. Called once per (static / event-
+            # study / group) aggregation, so warning fan-out is bounded.
+            warnings.warn(
+                "Rank-deficient second-stage X'WX in TwoStageDiD multiplier "
+                "bootstrap bread; falling back to np.linalg.lstsq. Bootstrap "
+                "SEs may be numerically unstable; consider dropping collinear "
+                "covariates.",
+                UserWarning,
+                stacklevel=2,
+            )
             bread = np.linalg.lstsq(XtX_2, np.eye(k), rcond=None)[0]
 
         return S, bread, unique_clusters
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -753,6 +753,7 @@ See `docs/methodology/continuous-did.md` Section 4 for full details.
 - **Balanced panel**: Short balanced panel required ("large-n, fixed-T" regime). Does not handle unbalanced panels or repeated cross-sections
 - Warn if treatment varies within units (non-absorbing treatment)
 - Warn if propensity score estimates are near boundary values
+- **Note:** Polynomial-sieve propensity fits now reject any K whose normal-equations matrix has condition number above `1/sqrt(eps)` (≈ 6.7e7) — previously a near-singular `np.linalg.solve` could return numerically meaningless coefficients without raising. If at least one K succeeds but others were skipped via this precondition, a `UserWarning` lists the skipped K values. If every K is skipped, the existing "estimation failed for all K values" fallback warning still fires. Axis-A finding #18 in the Phase 2 silent-failures audit.
 
 *Estimator equation -- single treatment date (Equations 3.2, 3.5):*
 
@@ -1175,6 +1176,7 @@ Our implementation uses multiplier bootstrap on the GMM influence function: clus
 - **Zero-observation cohorts in group effects:** If all treated observations for a cohort have NaN `y_tilde` (excluded from estimation), that cohort's group effect is NaN with n_obs=0.
 - **Note:** Survey weights in TwoStageDiD GMM sandwich via weighted cross-products: bread uses (X'_2 W X_2)^{-1}, gamma_hat uses (X'_{10} W X_{10})^{-1}(X'_1 W X_2), per-cluster scores multiply by survey weights. PSU clustering, stratification, and FPC are fully supported in the meat matrix via `_compute_stratified_meat_from_psu_scores()`. When strata or FPC are present, the meat computation replaces `S' S` with the stratified formula `sum_h (1 - f_h) * (n_h/(n_h-1)) * centered_h' centered_h`. Strata also enters survey df (n_PSU - n_strata) for t-distribution inference. Bootstrap + survey supported (Phase 6) via PSU-level multiplier weights.
 - **Note:** Both the iterative FE solver (`_iterative_fe`, Stage 1) and the iterative alternating-projection demeaning helper (`_iterative_demean`, used in covariate residualization) emit `UserWarning` when `max_iter` exhausts without reaching `tol`, via `diff_diff.utils.warn_if_not_converged`. Silent return of the current iterate was classified as a silent failure under the Phase 2 audit and replaced with an explicit signal to match the logistic/Poisson IRLS pattern in `linalg.py`.
+- **Note:** When the Stage-2 bread `X'_2 W X_2` is singular, both the analytical TSL variance (`two_stage.py`) and the multiplier-bootstrap bread (`two_stage_bootstrap.py`) now emit a `UserWarning` before falling back to `np.linalg.lstsq`. Previously this fallback was silent. Sibling of axis-A finding #17 in the Phase 2 silent-failures audit; surfaced by the repo-wide lstsq-fallback pattern grep that accompanied the StaggeredTripleDifference fix.
 - **Note:** The GMM sandwich and bootstrap paths both use `scipy.sparse.linalg.factorized` for the Stage 1 normal-equations solve `(X'_{10} W X_{10}) gamma = X'_1 W X_2` and fall back to dense `lstsq` when the sparse factorization raises `RuntimeError` on a near-singular matrix. Both fallback sites emit a `UserWarning` (silent-failure audit axis C) so callers know SE estimates came from the degraded path rather than the fast sparse path.
 
 **Reference implementation(s):**
@@ -1695,6 +1697,7 @@ has no additional effect.
 - **Note:** `pscore_fallback` default changed from unconditional to error.
   Set `pscore_fallback="unconditional"` for legacy behavior.
 - Warns on singular GMM covariance matrix (falls back to pseudoinverse)
+- **Note:** Rank-deficient X'WX in the per-pair outcome-regression influence-function step now emits ONE aggregate `UserWarning` at `fit()` time (counting affected (g, g_c, t) cells and reporting the max condition number), instead of silently falling back to `np.linalg.lstsq`. Axis-A finding #17 in the Phase 2 silent-failures audit.
 
 *Data structure:*
 
@@ -2719,6 +2722,12 @@ unequal selection probabilities).
   per-observation PSUs for the TSL meat computation, consistent with the
   stratified-no-PSU path. The adjustment factor is `n/(n-1)` (not HC1's
   `n/(n-k)`).
+- **Note:** TSL now precondition-checks `X'WX` via `np.linalg.cond` before
+  solving the sandwich. If the condition number exceeds `1/sqrt(eps)` (≈
+  6.7e7) a `UserWarning` fires stating that the bread is ill-conditioned
+  and variance estimates may be numerically unstable. Previously a near-
+  singular `X'WX` could silently produce unstable SEs. Axis-A finding #19
+  in the Phase 2 silent-failures audit.
 
 ### Weight Type Effects on Inference
 
diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py
@@ -2052,3 +2052,85 @@ def test_inverse_propensity_sieve_fallback_warns(self):
         assert np.all(np.isfinite(s_hat))
         # Should fall back to unconditional n/n_group = 100/2 = 50
         assert np.allclose(s_hat, 50.0)
+
+
+# ---------------------------------------------------------------------------
+# Silent-failure audit PR #9: finding #18 — estimate_*_sieve silently
+# `continue`'d past rank-deficient K values. Now we track skipped K and
+# warn when we ship a result that wasn't the IC-winner across all K.
+# ---------------------------------------------------------------------------
+
+
+class TestSievePartialKSkipWarning:
+    """Finding #18 (axis A): partial K-failure no longer silent."""
+
+    def test_ratio_sieve_partial_skip_warns(self):
+        """If some K's are rank-deficient but at least one succeeds,
+        the function warns about the partial skip instead of swallowing it."""
+        from diff_diff.efficient_did_covariates import estimate_propensity_ratio_sieve
+
+        rng = np.random.default_rng(7)
+        n = 200
+        # 1D covariate with discrete support {0, 1}. At K=1 the basis is
+        # [1, x]; at K>=2 the basis reaches size >= n_gp for most groups
+        # before hitting singularity, but with this discrete support the
+        # polynomial powers x^2, x^3, ... equal x, yielding rank-deficient
+        # normal equations deterministically.
+        X = rng.integers(0, 2, size=(n, 1)).astype(float)
+        mask_g = np.zeros(n, dtype=bool)
+        mask_g[:100] = True
+        mask_gp = np.zeros(n, dtype=bool)
+        mask_gp[100:] = True
+        with pytest.warns(UserWarning) as caught:
+            ratio = estimate_propensity_ratio_sieve(X, mask_g, mask_gp, k_max=3)
+        assert np.all(np.isfinite(ratio))
+        partial_skip_msgs = [
+            str(w.message) for w in caught if "skipped K=" in str(w.message)
+        ]
+        assert partial_skip_msgs, (
+            "Expected a partial-K-skip warning when some K's are rank deficient "
+            "but at least one succeeds; got none."
+        )
+        # Message should name the specific K values that were skipped.
+        assert any("K=" in m for m in partial_skip_msgs)
+
+    def test_inverse_propensity_sieve_partial_skip_warns(self):
+        """Same contract for the inverse propensity sieve."""
+        from diff_diff.efficient_did_covariates import estimate_inverse_propensity_sieve
+
+        rng = np.random.default_rng(7)
+        n = 200
+        X = rng.integers(0, 2, size=(n, 1)).astype(float)
+        mask = np.zeros(n, dtype=bool)
+        mask[:100] = True
+        with pytest.warns(UserWarning) as caught:
+            s_hat = estimate_inverse_propensity_sieve(X, mask, k_max=3)
+        assert np.all(np.isfinite(s_hat))
+        partial_skip_msgs = [
+            str(w.message) for w in caught if "skipped K=" in str(w.message)
+        ]
+        assert partial_skip_msgs
+
+    def test_ratio_sieve_no_warning_when_no_skips(self):
+        """Clean, well-conditioned covariates → no partial-skip warning."""
+        from diff_diff.efficient_did_covariates import estimate_propensity_ratio_sieve
+
+        rng = np.random.default_rng(101)
+        n = 300
+        X = rng.normal(0, 1, (n, 2))
+        mask_g = np.zeros(n, dtype=bool)
+        mask_g[:150] = True
+        mask_gp = np.zeros(n, dtype=bool)
+        mask_gp[150:] = True
+        import warnings as _w
+
+        with _w.catch_warnings(record=True) as caught:
+            _w.simplefilter("always")
+            ratio = estimate_propensity_ratio_sieve(X, mask_g, mask_gp, k_max=3)
+        assert np.all(np.isfinite(ratio))
+        partial_skip_msgs = [
+            str(w.message) for w in caught if "skipped K=" in str(w.message)
+        ]
+        assert partial_skip_msgs == [], (
+            f"Unexpected partial-skip warning on clean data: {partial_skip_msgs}"
+        )
diff --git a/tests/test_staggered_triple_diff.py b/tests/test_staggered_triple_diff.py
diff --git a/tests/test_survey.py b/tests/test_survey.py