Return n_valid from replicate variance functions, fix df properly

igerber · claude · igerber · commit 84809cdc4320 · 2026-03-27T12:11:53.000-04:00
Stop mutating resolved.n_replicates in place — instead return
(result, n_valid) tuples from compute_replicate_vcov() and
compute_replicate_if_variance(). Callers unpack the tuple and
LinearRegression.fit() uses n_valid-1 for survey_df.

This eliminates the shared-object mutation that the CI reviewer
flagged as P0 (order-dependent bugs on reused resolved designs)
while properly threading the effective df through inference.

Updated all 7 callers across 5 files + 4 test call sites.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/continuous_did.py b/diff_diff/continuous_did.py
@@ -643,7 +643,7 @@ def fit(
 
                                 # Score-scale: psi = w * if_es (matches TSL bread)
                                 psi_es = unit_resolved_es.weights * if_es
-                                variance = compute_replicate_if_variance(psi_es, unit_resolved_es)
+                                variance, _nv = compute_replicate_if_variance(psi_es, unit_resolved_es)
                                 es_se = float(np.sqrt(max(variance, 0.0))) if np.isfinite(variance) else np.nan
                             else:
                                 X_ones_es = np.ones((n_units, 1))
@@ -1240,7 +1240,7 @@ def _compute_analytical_se(
 
                 def _rep_se(if_vals):
                     psi_scaled = _w_rep * if_vals
-                    v = compute_replicate_if_variance(psi_scaled, unit_resolved)
+                    v, _nv = compute_replicate_if_variance(psi_scaled, unit_resolved)
                     return float(np.sqrt(max(v, 0.0))) if np.isfinite(v) else np.nan
 
                 overall_att_se = _rep_se(if_att_glob)
diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
@@ -1088,7 +1088,7 @@ def _compute_survey_eif_se(self, eif_vals: np.ndarray) -> float:
             # Score-scale IFs to match TSL bread: psi = w * eif / sum(w)
             w = self._unit_resolved_survey.weights
             psi_scaled = w * eif_vals / w.sum()
-            variance = compute_replicate_if_variance(psi_scaled, self._unit_resolved_survey)
+            variance, _n_valid = compute_replicate_if_variance(psi_scaled, self._unit_resolved_survey)
             return float(np.sqrt(max(variance, 0.0))) if np.isfinite(variance) else np.nan
 
         from diff_diff.survey import compute_survey_vcov
diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py
@@ -1802,19 +1802,22 @@ def fit(
                 if np.any(nan_mask):
                     kept_cols = np.where(~nan_mask)[0]
                     if len(kept_cols) > 0:
-                        vcov_reduced = compute_replicate_vcov(
+                        vcov_reduced, _n_valid_rep = compute_replicate_vcov(
                             X[:, kept_cols], y, coefficients[kept_cols],
                             _effective_survey_design,
                             weight_type=self.weight_type,
                         )
                         vcov = _expand_vcov_with_nan(vcov_reduced, X.shape[1], kept_cols)
                     else:
                         vcov = np.full((X.shape[1], X.shape[1]), np.nan)
+                        _n_valid_rep = 0
                 else:
-                    vcov = compute_replicate_vcov(
+                    vcov, _n_valid_rep = compute_replicate_vcov(
                         X, y, coefficients, _effective_survey_design,
                         weight_type=self.weight_type,
                     )
+                # Store effective replicate df (n_valid - 1) for later use
+                self._replicate_df = _n_valid_rep - 1 if _n_valid_rep > 1 else None
             else:
                 from diff_diff.survey import compute_survey_vcov
 
@@ -1858,6 +1861,9 @@ def fit(
 
             if isinstance(_effective_survey_design, ResolvedSurveyDesign):
                 self.survey_df_ = _effective_survey_design.df_survey
+                # Override with effective replicate df if available
+                if hasattr(self, '_replicate_df') and self._replicate_df is not None:
+                    self.survey_df_ = self._replicate_df
 
         return self
 
diff --git a/diff_diff/staggered_aggregation.py b/diff_diff/staggered_aggregation.py
@@ -476,7 +476,7 @@ def _compute_aggregated_se_with_wif(
         if resolved_survey is not None and hasattr(resolved_survey, "uses_replicate_variance") and resolved_survey.uses_replicate_variance:
             from diff_diff.survey import compute_replicate_if_variance
 
-            variance = compute_replicate_if_variance(psi_total, resolved_survey)
+            variance, _n_valid = compute_replicate_if_variance(psi_total, resolved_survey)
             if np.isnan(variance):
                 return np.nan
             return np.sqrt(max(variance, 0.0))
diff --git a/diff_diff/survey.py b/diff_diff/survey.py
@@ -1308,9 +1308,6 @@ def compute_replicate_vcov(
             stacklevel=2,
         )
     n_valid = int(np.sum(valid))
-    # Update effective replicate count so df_survey reflects valid replicates
-    if n_valid < R:
-        resolved.n_replicates = n_valid
     if n_valid < 2:
         if n_valid == 0:
             warnings.warn(
@@ -1323,7 +1320,7 @@ def compute_replicate_vcov(
                 f"with fewer than 2. Returning NaN.",
                 UserWarning, stacklevel=2,
             )
-        return np.full((k, k), np.nan)
+        return np.full((k, k), np.nan), n_valid
     coef_valid = coef_reps[valid]
     c = full_sample_coef
 
@@ -1333,7 +1330,7 @@ def compute_replicate_vcov(
 
     if method in ("BRR", "Fay", "JK1"):
         factor = _replicate_variance_factor(method, int(np.sum(valid)), resolved.fay_rho)
-        return factor * outer_sum
+        return factor * outer_sum, n_valid
     elif method == "JKn":
         # JKn: V = sum_h ((n_h-1)/n_h) * sum_{r in h} (c_r - c)(c_r - c)^T
         rep_strata = resolved.replicate_strata
@@ -1348,15 +1345,15 @@ def compute_replicate_vcov(
                 continue
             diffs_h = diffs[mask_h]
             V += ((n_h - 1.0) / n_h) * (diffs_h.T @ diffs_h)
-        return V
+        return V, n_valid
     else:
         raise ValueError(f"Unknown replicate method: {method}")
 
 
 def compute_replicate_if_variance(
     psi: np.ndarray,
     resolved: "ResolvedSurveyDesign",
-) -> float:
+) -> Tuple[float, int]:
     """Compute replicate-based variance for influence-function estimators.
 
     Instead of re-running the full estimator, reweights the influence
@@ -1401,22 +1398,18 @@ def compute_replicate_if_variance(
 
     valid = np.isfinite(theta_reps)
     n_valid = int(np.sum(valid))
-    # Update effective replicate count so df_survey reflects valid replicates
-    if n_valid < R:
-        resolved.n_replicates = n_valid
     if n_valid < 2:
-        return np.nan
+        return np.nan, n_valid
     diffs = theta_reps[valid] - theta_full
     ss = float(np.sum(diffs**2))
 
     if method in ("BRR", "Fay", "JK1"):
-        factor = _replicate_variance_factor(method, int(np.sum(valid)), resolved.fay_rho)
-        return factor * ss
+        factor = _replicate_variance_factor(method, n_valid, resolved.fay_rho)
+        return factor * ss, n_valid
     elif method == "JKn":
         rep_strata = resolved.replicate_strata
         if rep_strata is None:
             raise ValueError("JKn requires replicate_strata")
-        # Filter to valid replicates
         valid_strata = rep_strata[valid]
         valid_diffs = diffs
         result = 0.0
@@ -1426,7 +1419,7 @@ def compute_replicate_if_variance(
             if n_h < 1:
                 continue
             result += ((n_h - 1.0) / n_h) * float(np.sum(valid_diffs[mask_h] ** 2))
-        return result
+        return result, n_valid
     else:
         raise ValueError(f"Unknown replicate method: {method}")
 
diff --git a/diff_diff/triple_diff.py b/diff_diff/triple_diff.py
@@ -1096,7 +1096,7 @@ def _estimate_ddd_decomposition(
                     psi_rep = inf_func / w_sum
                 else:
                     psi_rep = resolved_survey.weights * inf_func / w_sum
-                variance = compute_replicate_if_variance(psi_rep, resolved_survey)
+                variance, _nv = compute_replicate_if_variance(psi_rep, resolved_survey)
                 se = float(np.sqrt(max(variance, 0.0))) if np.isfinite(variance) else np.nan
             else:
                 from diff_diff.survey import compute_survey_vcov
diff --git a/tests/test_survey_phase6.py b/tests/test_survey_phase6.py
@@ -453,7 +453,7 @@ def test_brr_vcov(self, replicate_data):
             X, y, weights=resolved.weights, weight_type="pweight",
         )
 
-        vcov = compute_replicate_vcov(X, y, coef, resolved)
+        vcov, _nv = compute_replicate_vcov(X, y, coef, resolved)
         assert np.all(np.isfinite(np.diag(vcov)))
 
     def test_fay_inflates_over_brr(self, replicate_data):
@@ -471,14 +471,14 @@ def test_fay_inflates_over_brr(self, replicate_data):
         )
         resolved_brr = sd_brr.resolve(data)
         coef, _, _ = solve_ols(X, y, weights=resolved_brr.weights)
-        vcov_brr = compute_replicate_vcov(X, y, coef, resolved_brr)
+        vcov_brr, _nv = compute_replicate_vcov(X, y, coef, resolved_brr)
 
         sd_fay = SurveyDesign(
             weights="weight", replicate_weights=rep_cols,
             replicate_method="Fay", fay_rho=0.5,
         )
         resolved_fay = sd_fay.resolve(data)
-        vcov_fay = compute_replicate_vcov(X, y, coef, resolved_fay)
+        vcov_fay, _nv = compute_replicate_vcov(X, y, coef, resolved_fay)
 
         # Fay variance = BRR variance / (1-rho)^2 > BRR variance
         assert np.all(np.diag(vcov_fay) > np.diag(vcov_brr))
@@ -534,7 +534,7 @@ def test_replicate_if_variance(self, replicate_data):
 
         # Synthetic influence function
         psi = np.random.randn(len(data)) * 0.1
-        var = compute_replicate_if_variance(psi, resolved)
+        var, _nv = compute_replicate_if_variance(psi, resolved)
         assert np.isfinite(var)
         assert var >= 0
 
@@ -583,7 +583,7 @@ def test_jkn_variance(self, replicate_data):
         X = np.column_stack([np.ones(len(data)), data["x"].values])
 
         coef, _, _ = solve_ols(X, y, weights=resolved.weights)
-        vcov = compute_replicate_vcov(X, y, coef, resolved)
+        vcov, _nv = compute_replicate_vcov(X, y, coef, resolved)
         assert np.all(np.isfinite(np.diag(vcov)))
         assert np.all(np.diag(vcov) > 0)
 
@@ -611,7 +611,7 @@ def test_replicate_if_scale_matches_analytical(self):
         )
         resolved = sd.resolve(data)
 
-        v_rep = compute_replicate_if_variance(psi, resolved)
+        v_rep, _nv = compute_replicate_if_variance(psi, resolved)
         v_analytical = float(np.sum(psi**2))
 
         # JK1 gives (n-1)/n * sum(...) which should approximate sum(psi^2)
@@ -665,7 +665,7 @@ def test_replicate_if_matches_survey_if_variance(self):
             replicate_method="JK1",
             n_replicates=n_psu,
         )
-        v_rep = compute_replicate_if_variance(psi, resolved_rep)
+        v_rep, _nv = compute_replicate_if_variance(psi, resolved_rep)
 
         # Should be in the same ballpark (within 50% — different estimators
         # of the same quantity)
@@ -795,7 +795,7 @@ def test_replicate_if_no_divide_by_zero_warning(self):
         with warnings.catch_warnings():
             warnings.simplefilter("error", RuntimeWarning)
             # Should NOT raise RuntimeWarning for divide by zero
-            v = compute_replicate_if_variance(psi, resolved)
+            v, _nv = compute_replicate_if_variance(psi, resolved)
             assert np.isfinite(v)