Fix zero-SE inference, full-census FPC, fweight contract, and absorbed sample counts from PR #218 review (round 10)

igerber · claude · igerber · commit a9bf9cece1f2 · 2026-03-21T07:05:27.000-04:00
Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/estimators.py b/diff_diff/estimators.py
@@ -246,6 +246,10 @@ def fit(
         absorbed_vars = []
         n_absorbed_effects = 0
 
+        # Save raw treatment counts before absorb demeaning
+        n_treated_raw = int(np.sum(data[treatment].values.astype(float)))
+        n_control_raw = len(data) - n_treated_raw
+
         if absorb:
             # FWL theorem: demean ALL regressors alongside outcome.
             # Regressors collinear with absorbed FE (e.g., treatment after
@@ -358,9 +362,9 @@ def fit(
 
         r_squared = compute_r_squared(y, residuals)
 
-        # Count observations
-        n_treated = int(np.sum(d))
-        n_control = int(np.sum(1 - d))
+        # Count observations (use raw counts to avoid demeaned values from absorb)
+        n_treated = n_treated_raw
+        n_control = n_control_raw
 
         # Create coefficient dictionary
         coef_dict = {name: coef for name, coef in zip(var_names, coefficients)}
@@ -985,6 +989,10 @@ def fit(  # type: ignore[override]
         working_data = data.copy()
         n_absorbed_effects = 0
 
+        # Save raw treatment counts before absorb demeaning
+        n_treated_raw = int(np.sum(data[treatment].values.astype(float)))
+        n_control_raw = len(data) - n_treated_raw
+
         # Pre-compute non_ref_periods (needed for absorb demeaning)
         non_ref_periods = [p for p in all_periods if p != reference_period]
 
@@ -1216,9 +1224,9 @@ def fit(  # type: ignore[override]
                     avg_att, avg_se, alpha=self.alpha, df=df
                 )
 
-        # Count observations
-        n_treated = int(np.sum(d))
-        n_control = int(np.sum(1 - d))
+        # Count observations (use raw counts to avoid demeaned values from absorb)
+        n_treated = n_treated_raw
+        n_control = n_control_raw
 
         # Create coefficient dictionary
         coef_dict = {name: coef for name, coef in zip(var_names, coefficients)}
diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py
@@ -1835,25 +1835,6 @@ def get_inference(
         coef = float(self.coefficients_[index])
         se = float(np.sqrt(self.vcov_[index, index]))
 
-        # Handle zero or negative SE (indicates perfect fit or numerical issues)
-        if se <= 0:
-            import warnings
-
-            warnings.warn(
-                f"Standard error is zero or negative (se={se}) for coefficient at index {index}. "
-                "This may indicate perfect multicollinearity or numerical issues.",
-                UserWarning,
-            )
-            # NOTE: Deliberately uses ±inf (not NaN via safe_inference) for zero-SE coefficients.
-            if coef > 0:
-                t_stat = np.inf
-            elif coef < 0:
-                t_stat = -np.inf
-            else:
-                t_stat = 0.0
-        else:
-            t_stat = coef / se
-
         # Use instance alpha if not provided
         effective_alpha = alpha if alpha is not None else self.alpha
 
@@ -1877,11 +1858,12 @@ def get_inference(
             )
             effective_df = None
 
-        # Compute p-value
-        p_value = _compute_p_value(t_stat, df=effective_df)
+        # Use project-standard NaN-safe inference (returns all-NaN when SE <= 0)
+        from diff_diff.utils import safe_inference
 
-        # Compute confidence interval
-        conf_int = _compute_confidence_interval(coef, se, effective_alpha, df=effective_df)
+        t_stat, p_value, conf_int = safe_inference(
+            coef, se, alpha=effective_alpha, df=effective_df
+        )
 
         return InferenceResult(
             coefficient=coef,
diff --git a/diff_diff/survey.py b/diff_diff/survey.py
@@ -105,15 +105,13 @@ def resolve(self, data: pd.DataFrame) -> "ResolvedSurveyDesign":
             if np.any(raw_weights <= 0):
                 raise ValueError("Weights must be strictly positive")
 
-            # fweight validation: should be positive integers
+            # fweight validation: must be positive integers
             if self.weight_type == "fweight":
                 fractional = raw_weights - np.round(raw_weights)
                 if np.any(np.abs(fractional) > 1e-10):
-                    warnings.warn(
-                        "Frequency weights (fweight) should be positive integers. "
-                        "Fractional values detected; rounding will not be applied.",
-                        UserWarning,
-                        stacklevel=2,
+                    raise ValueError(
+                        "Frequency weights (fweight) must be positive integers. "
+                        "Fractional values detected. Use pweight for non-integer weights."
                     )
 
             # Normalize: pweights/aweights to sum=n (mean=1); fweights unchanged
@@ -493,7 +491,7 @@ def compute_survey_vcov(
     strata = resolved.strata
     psu = resolved.psu
 
-    certainty_strata_count = 0
+    legitimate_zero_count = 0
 
     if strata is None and psu is None:
         # No survey structure beyond weights — use implicit per-observation PSUs
@@ -521,6 +519,8 @@ def compute_survey_vcov(
             if resolved.fpc is not None:
                 N_h = resolved.fpc[0]
                 f_h = n_psu / N_h
+                if f_h >= 1.0:
+                    legitimate_zero_count += 1
             adjustment = (1.0 - f_h) * (n_psu / (n_psu - 1))
             meat = adjustment * (centered.T @ centered)
     else:
@@ -558,7 +558,7 @@ def compute_survey_vcov(
                 if resolved.lonely_psu == "remove":
                     continue  # Skip this stratum
                 elif resolved.lonely_psu == "certainty":
-                    certainty_strata_count += 1
+                    legitimate_zero_count += 1
                     continue  # f_h = 1, so (1-f_h) = 0, zero contribution
                 elif resolved.lonely_psu == "adjust":
                     # Center around overall mean instead of stratum mean
@@ -572,6 +572,8 @@ def compute_survey_vcov(
             if resolved.fpc is not None:
                 N_h = resolved.fpc[mask_h][0]
                 f_h = n_psu_h / N_h
+                if f_h >= 1.0:
+                    legitimate_zero_count += 1
 
             # Stratum mean of PSU scores
             psu_mean_h = psu_scores_h.mean(axis=0, keepdims=True)
@@ -584,8 +586,8 @@ def compute_survey_vcov(
 
     # Guard: if no stratum contributed variance, check why
     if not np.any(meat != 0):
-        if certainty_strata_count > 0:
-            # All zero variance came from certainty PSUs — legitimate zero
+        if legitimate_zero_count > 0:
+            # All zero variance came from legitimate sources (certainty PSUs or full-census FPC)
             return np.zeros((k, k))
         return np.full((k, k), np.nan)
 
diff --git a/tests/test_survey.py b/tests/test_survey.py
@@ -940,20 +940,17 @@ def test_multiperiod_with_survey_design(self, multiperiod_data):
         # Average ATT should be close to 2.5
         assert abs(result.avg_att - 2.5) < 1.5
 
-    def test_fweight_warning_for_fractional(self):
-        """Fractional fweights emit a UserWarning."""
+    def test_fweight_error_for_fractional(self):
+        """Fractional fweights raise ValueError."""
         df = pd.DataFrame(
             {
                 "y": [1, 2, 3],
                 "w": [1.5, 2.0, 3.0],  # 1.5 is fractional
             }
         )
         sd = SurveyDesign(weights="w", weight_type="fweight")
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter("always")
+        with pytest.raises(ValueError, match="Frequency weights.*must be positive integers"):
             sd.resolve(df)
-            fweight_warnings = [x for x in w if "Frequency weights" in str(x.message)]
-            assert len(fweight_warnings) >= 1
 
     def test_lonely_psu_remove_warning(self):
         """Singleton stratum with lonely_psu='remove' emits warning."""
@@ -2443,3 +2440,142 @@ def test_multiperiod_fweight_df_rounding(self):
         assert np.isfinite(result.avg_att)
         assert np.isfinite(result.avg_se)
         assert result.avg_se > 0
+
+
+class TestRound10Fixes:
+    """Tests for PR #218 review round 10 fixes."""
+
+    def test_zero_se_estimator_nan_inference(self):
+        """Zero-SE path in LinearRegression.get_inference() returns NaN, not ±inf."""
+        # Build a design where all strata are certainty PSUs → zero vcov → zero SE
+        np.random.seed(42)
+        n = 40
+        strata = np.repeat([0, 1, 2, 3], 10)
+        psu = strata.copy()  # 1 PSU per stratum → all certainty
+        df = pd.DataFrame(
+            {
+                "outcome": np.random.randn(n),
+                "treated": np.array([1] * 20 + [0] * 20),
+                "post": np.tile([0, 1], 20),
+                "w": np.ones(n),
+                "strat": strata,
+                "cluster": psu,
+            }
+        )
+        sd = SurveyDesign(
+            weights="w",
+            weight_type="pweight",
+            strata="strat",
+            psu="cluster",
+            lonely_psu="certainty",
+        )
+        did = DifferenceInDifferences()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            result = did.fit(
+                df,
+                outcome="outcome",
+                treatment="treated",
+                time="post",
+                survey_design=sd,
+            )
+        # SE should be 0 (all certainty strata), inference should be NaN
+        assert result.se == 0.0
+        assert np.isnan(result.t_stat)
+        assert np.isnan(result.p_value)
+        assert np.isnan(result.conf_int[0])
+        assert np.isnan(result.conf_int[1])
+
+    def test_full_census_fpc_stratified_zero_vcov(self):
+        """Full-census FPC (f_h=1) returns zero vcov, not NaN."""
+        np.random.seed(42)
+        n = 60
+        strata = np.repeat([0, 1, 2], 20)
+        psu = np.tile(np.arange(5), 12)  # 5 PSUs per stratum
+
+        X = np.column_stack([np.ones(n), np.random.randn(n)])
+        y = np.random.randn(n)
+        residuals = np.random.randn(n)
+        weights = np.ones(n)
+
+        # FPC = n_psu per stratum (full census: f_h = 5/5 = 1)
+        fpc = np.array([5.0] * n)
+
+        resolved = ResolvedSurveyDesign(
+            weights=weights,
+            weight_type="pweight",
+            strata=strata,
+            psu=psu,
+            fpc=fpc,
+            n_strata=3,
+            n_psu=15,
+            lonely_psu="remove",
+        )
+        vcov = compute_survey_vcov(X, residuals, resolved=resolved)
+        # Full census → zero variance → zero vcov
+        np.testing.assert_array_equal(vcov, np.zeros((2, 2)))
+
+    def test_full_census_fpc_unstratified_zero_vcov(self):
+        """Unstratified full-census FPC returns zero vcov, not NaN."""
+        np.random.seed(42)
+        n = 30
+        psu = np.repeat(np.arange(6), 5)  # 6 PSUs
+
+        X = np.column_stack([np.ones(n), np.random.randn(n)])
+        y = np.random.randn(n)
+        residuals = np.random.randn(n)
+        weights = np.ones(n)
+
+        # FPC = n_psu (full census: f_h = 6/6 = 1)
+        fpc = np.array([6.0] * n)
+
+        resolved = ResolvedSurveyDesign(
+            weights=weights,
+            weight_type="pweight",
+            strata=None,
+            psu=psu,
+            fpc=fpc,
+            n_strata=0,
+            n_psu=6,
+            lonely_psu="remove",
+        )
+        vcov = compute_survey_vcov(X, residuals, resolved=resolved)
+        # Full census → (1-f_h)=0 → zero meat → zero vcov
+        np.testing.assert_array_equal(vcov, np.zeros((2, 2)))
+
+    def test_absorbed_did_sample_counts(self):
+        """n_treated/n_control reflect raw data, not demeaned values after absorb."""
+        np.random.seed(42)
+        n_units = 20
+        n_times = 4
+        rows = []
+        for u in range(n_units):
+            for t in range(n_times):
+                rows.append(
+                    {
+                        "unit": u,
+                        "time": t,
+                        "treated": 1 if u < 8 else 0,
+                        "post": 1 if t >= 2 else 0,
+                        "outcome": np.random.randn(),
+                        "region": u % 3,
+                    }
+                )
+        df = pd.DataFrame(rows)
+
+        did = DifferenceInDifferences()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            result = did.fit(
+                df,
+                outcome="outcome",
+                treatment="treated",
+                time="post",
+                absorb=["region"],
+            )
+
+        # Raw counts: 8 treated units * 4 times = 32 treated obs
+        raw_treated = int(df["treated"].sum())
+        raw_control = len(df) - raw_treated
+        assert result.n_treated == raw_treated
+        assert result.n_control == raw_control