Add solve_logit weight validation, update docstrings, add negative tests

igerber · claude · igerber · commit 133df1974f64 · 2026-03-23T10:57:02.000-04:00
P2: solve_logit() now validates weights (shape, NaN, Inf, positive) before
IRLS, giving clear errors instead of opaque numerical failures.

P3 docs: update CallawaySantAnna fit() docstring to weights-only contract;
add survey_design param docs to ImputationDiD/TwoStageDiD fit() and wrapper
docstrings; update REGISTRY treatment_effects["weight"] note for survey mode.

P3 tests: add negative tests for solve_logit bad weights (NaN, negative,
wrong shape); add aweight/fweight rejection and FPC rejection tests for
ImputationDiD and TwoStageDiD.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/imputation.py b/diff_diff/imputation.py
@@ -203,6 +203,12 @@ def fit(
         balance_e : int, optional
             When computing event study, restrict to cohorts observed at all
             relative times in [-balance_e, max_h].
+        survey_design : SurveyDesign, optional
+            Survey design specification for design-based inference. Supports
+            pweight only (aweight/fweight raise ValueError). FPC raises
+            NotImplementedError. PSU is used as cluster variable for Theorem 3
+            variance. Strata enters survey df for t-distribution inference.
+            Requires analytical inference (n_bootstrap=0).
 
         Returns
         -------
@@ -1951,6 +1957,12 @@ def imputation_did(
         Aggregation mode: None, "simple", "event_study", "group", "all".
     balance_e : int, optional
         Balance event study to cohorts observed at all relative times.
+    survey_design : SurveyDesign, optional
+        Survey design specification for design-based inference. Supports
+        pweight only (aweight/fweight raise ValueError). FPC raises
+        NotImplementedError. PSU is used as cluster variable for Theorem 3
+        variance. Strata enters survey df for t-distribution inference.
+        Requires analytical inference (n_bootstrap=0).
     **kwargs
         Additional keyword arguments passed to ImputationDiD constructor.
 
diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py
@@ -1157,6 +1157,18 @@ def solve_logit(
     X_with_intercept = np.column_stack([np.ones(n), X])
     k = p + 1  # number of parameters including intercept
 
+    # Validate weights
+    if weights is not None:
+        weights = np.asarray(weights, dtype=np.float64)
+        if weights.shape != (n,):
+            raise ValueError(f"weights must have shape ({n},), got {weights.shape}")
+        if np.any(np.isnan(weights)):
+            raise ValueError("weights contain NaN values")
+        if np.any(~np.isfinite(weights)):
+            raise ValueError("weights contain Inf values")
+        if np.any(weights <= 0):
+            raise ValueError("weights must be strictly positive")
+
     # Validate rank_deficient_action
     valid_actions = {"warn", "error", "silent"}
     if rank_deficient_action not in valid_actions:
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
@@ -1155,8 +1155,10 @@ def fit(
             For event study, balance the panel at relative time e.
             Ensures all groups contribute to each relative period.
         survey_design : SurveyDesign, optional
-            Survey design specification for design-based inference.
-            Supports weights, strata, PSU, and FPC.
+            Survey design specification. Only weights-only designs are supported
+            (strata/PSU/FPC raise NotImplementedError). Supports pweight only.
+            Covariates + IPW/DR + survey also raises NotImplementedError.
+            Use analytical inference (n_bootstrap=0) with survey_design.
 
         Returns
         -------
diff --git a/diff_diff/two_stage.py b/diff_diff/two_stage.py
@@ -199,6 +199,12 @@ def fit(
         balance_e : int, optional
             When computing event study, restrict to cohorts observed at all
             relative times in [-balance_e, max_h].
+        survey_design : SurveyDesign, optional
+            Survey design specification for design-based inference. Supports
+            pweight only (aweight/fweight raise ValueError). FPC raises
+            NotImplementedError. PSU is used as cluster variable for Theorem 3
+            variance. Strata enters survey df for t-distribution inference.
+            Requires analytical inference (n_bootstrap=0).
 
         Returns
         -------
@@ -1663,6 +1669,12 @@ def two_stage_did(
         Aggregation mode: None, "simple", "event_study", "group", "all".
     balance_e : int, optional
         Balance event study to cohorts observed at all relative times.
+    survey_design : SurveyDesign, optional
+        Survey design specification for design-based inference. Supports
+        pweight only (aweight/fweight raise ValueError). FPC raises
+        NotImplementedError. PSU is used as cluster variable for Theorem 3
+        variance. Strata enters survey df for t-distribution inference.
+        Requires analytical inference (n_bootstrap=0).
     **kwargs
         Additional keyword arguments passed to TwoStageDiD constructor.
 
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -840,7 +840,7 @@ Y_it = alpha_i + beta_t [+ X'_it * delta] + W'_it * gamma + epsilon_it
 - **`balance_e` cohort filtering:** When `balance_e` is set, cohort balance is checked against the *full panel* (pre + post treatment) via `_build_cohort_rel_times()`, requiring observations at every relative time in `[-balance_e, max_h]`. Both analytical aggregation and bootstrap inference use the same `_compute_balanced_cohort_mask` with pre-computed cohort horizons.
 - **Bootstrap clustering:** Multiplier bootstrap generates weights at `cluster_var` granularity (defaults to `unit` if `cluster` not specified). Invalid cluster column raises ValueError.
 - **Non-constant `first_treat` within a unit:** Emits `UserWarning` identifying the count and example unit. The estimator proceeds using the first observed value per unit (via `.first()` aggregation), but results may be unreliable.
-- **treatment_effects DataFrame weights:** `weight` column uses `1/n_valid` for finite tau_hat and 0 for NaN tau_hat, consistent with the ATT estimand.
+- **treatment_effects DataFrame weights:** `weight` column uses `1/n_valid` for finite tau_hat and 0 for NaN tau_hat, consistent with the ATT estimand (unweighted), or normalized survey weights `sw_i/sum(sw)` when `survey_design` is active.
 - **Rank-deficient covariates in variance:** Covariates with NaN coefficients (dropped for rank deficiency in Step 1) are excluded from the variance design matrices `A_0`/`A_1`. Only covariates with finite coefficients participate in the `v_it` projection.
 - **Sparse variance solver:** `_compute_v_untreated_with_covariates` uses `scipy.sparse.linalg.spsolve` to solve `(A_0'A_0) z = A_1'w` without densifying the normal equations matrix. Falls back to dense `lstsq` if the sparse solver fails.
 - **Note:** Survey weights enter ImputationDiD via weighted iterative FE (Step 1), survey-weighted ATT aggregation (Step 3), and survey-weighted conservative variance (Theorem 3). PSU is used as the cluster variable for Theorem 3 variance. Strata enters survey df (n_PSU - n_strata) for t-distribution inference. FPC is not supported (raises NotImplementedError). Strata does NOT enter the variance formula itself (no stratified sandwich) — this is conservative relative to stratified variance. Bootstrap + survey deferred.
diff --git a/tests/test_survey_phase4.py b/tests/test_survey_phase4.py
@@ -183,6 +183,37 @@ def test_weight_scale_invariance(self):
 
         np.testing.assert_allclose(beta1, beta2, atol=1e-10)
 
+    def test_nan_weights_raises(self):
+        """NaN weights should raise ValueError."""
+        rng = np.random.RandomState(42)
+        n = 50
+        X = rng.randn(n, 2)
+        y = (X @ [0.5, -0.5] + rng.randn(n) > 0).astype(float)
+        weights = np.ones(n)
+        weights[3] = np.nan
+        with pytest.raises(ValueError, match="NaN"):
+            solve_logit(X, y, weights=weights)
+
+    def test_negative_weights_raises(self):
+        """Negative weights should raise ValueError."""
+        rng = np.random.RandomState(42)
+        n = 50
+        X = rng.randn(n, 2)
+        y = (X @ [0.5, -0.5] + rng.randn(n) > 0).astype(float)
+        weights = np.ones(n)
+        weights[0] = -1.0
+        with pytest.raises(ValueError, match="strictly positive"):
+            solve_logit(X, y, weights=weights)
+
+    def test_wrong_shape_weights_raises(self):
+        """Wrong-length weights should raise ValueError."""
+        rng = np.random.RandomState(42)
+        n = 50
+        X = rng.randn(n, 2)
+        y = (X @ [0.5, -0.5] + rng.randn(n) > 0).astype(float)
+        with pytest.raises(ValueError, match="shape"):
+            solve_logit(X, y, weights=np.ones(n + 5))
+
 
 # =============================================================================
 # TestImputationDiDSurvey
@@ -444,6 +475,32 @@ def test_aggregate_all_with_survey(self, staggered_survey_data, survey_design_we
         assert result.event_study_effects is not None
         assert result.group_effects is not None
 
+    def test_aweight_raises(self, staggered_survey_data):
+        """aweight survey design should raise ValueError."""
+        sd = SurveyDesign(weights="weight", weight_type="aweight")
+        with pytest.raises(ValueError, match="pweight"):
+            ImputationDiD().fit(
+                staggered_survey_data,
+                "outcome",
+                "unit",
+                "period",
+                "first_treat",
+                survey_design=sd,
+            )
+
+    def test_fpc_raises(self, staggered_survey_data):
+        """FPC survey design should raise NotImplementedError."""
+        sd = SurveyDesign(weights="weight", fpc="fpc")
+        with pytest.raises(NotImplementedError, match="FPC"):
+            ImputationDiD().fit(
+                staggered_survey_data,
+                "outcome",
+                "unit",
+                "period",
+                "first_treat",
+                survey_design=sd,
+            )
+
 
 # =============================================================================
 # TestTwoStageDiDSurvey
@@ -647,6 +704,32 @@ def test_always_treated_with_survey(self, staggered_survey_data):
         assert np.isfinite(result.overall_se)
         assert result.survey_metadata is not None
 
+    def test_aweight_raises(self, staggered_survey_data):
+        """aweight survey design should raise ValueError."""
+        sd = SurveyDesign(weights="weight", weight_type="aweight")
+        with pytest.raises(ValueError, match="pweight"):
+            TwoStageDiD().fit(
+                staggered_survey_data,
+                "outcome",
+                "unit",
+                "period",
+                "first_treat",
+                survey_design=sd,
+            )
+
+    def test_fpc_raises(self, staggered_survey_data):
+        """FPC survey design should raise NotImplementedError."""
+        sd = SurveyDesign(weights="weight", fpc="fpc")
+        with pytest.raises(NotImplementedError, match="FPC"):
+            TwoStageDiD().fit(
+                staggered_survey_data,
+                "outcome",
+                "unit",
+                "period",
+                "first_treat",
+                survey_design=sd,
+            )
+
 
 # =============================================================================
 # TestCallawaySantAnnaSurvey