Fix CS df key name, re-read df after aggregation, propagate ContinuousDiD df

igerber · claude · igerber · commit 59302869f282 · 2026-03-27T13:42:09.000-04:00
- Fix precomputed key: "df_survey" not "survey_df" in CS aggregation
- CS: re-read df_survey from precomputed after aggregation so overall
  ATT inference uses updated n_valid-1 when replicate columns are dropped
- ContinuousDiD: track _rep_n_valid across replicate IF calls, use
  min(n_valid) for df_survey in analytical SE return

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/continuous_did.py b/diff_diff/continuous_did.py
@@ -1237,10 +1237,13 @@ def _compute_analytical_se(
                 from diff_diff.survey import compute_replicate_if_variance
 
                 _w_rep = unit_resolved.weights
+                _rep_n_valid = unit_resolved.n_replicates  # track effective count
 
                 def _rep_se(if_vals):
+                    nonlocal _rep_n_valid
                     psi_scaled = _w_rep * if_vals
-                    v, _nv = compute_replicate_if_variance(psi_scaled, unit_resolved)
+                    v, nv = compute_replicate_if_variance(psi_scaled, unit_resolved)
+                    _rep_n_valid = min(_rep_n_valid, nv)  # worst-case valid count
                     return float(np.sqrt(max(v, 0.0))) if np.isfinite(v) else np.nan
 
                 overall_att_se = _rep_se(if_att_glob)
@@ -1282,7 +1285,11 @@ def _rep_se(if_vals):
             acrt_d_se = np.sqrt(np.sum(if_acrt_d**2, axis=0))
 
         # Return unit-level survey df and resolved design for metadata recomputation
-        unit_df_survey = unit_resolved.df_survey if resolved_survey is not None else None
+        # Use effective replicate df if available (from _rep_se calls)
+        if resolved_survey is not None and hasattr(resolved_survey, 'uses_replicate_variance') and resolved_survey.uses_replicate_variance:
+            unit_df_survey = _rep_n_valid - 1 if _rep_n_valid > 1 else None
+        else:
+            unit_df_survey = unit_resolved.df_survey if resolved_survey is not None else None
 
         return {
             "overall_att_se": overall_att_se,
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
@@ -1493,6 +1493,8 @@ def fit(
         overall_att, overall_se = self._aggregate_simple(
             group_time_effects, influence_func_info, df, unit, precomputed
         )
+        # Re-read df_survey in case replicate aggregation updated it
+        df_survey = precomputed.get("df_survey")
         overall_t, overall_p, overall_ci = safe_inference(
             overall_att,
             overall_se,
diff --git a/diff_diff/staggered_aggregation.py b/diff_diff/staggered_aggregation.py
@@ -479,7 +479,7 @@ def _compute_aggregated_se_with_wif(
             variance, n_valid_rep = compute_replicate_if_variance(psi_total, resolved_survey)
             # Update precomputed survey df to reflect valid replicate count
             if precomputed is not None and n_valid_rep < resolved_survey.n_replicates:
-                precomputed["survey_df"] = n_valid_rep - 1 if n_valid_rep > 1 else None
+                precomputed["df_survey"] = n_valid_rep - 1 if n_valid_rep > 1 else None
             if np.isnan(variance):
                 return np.nan
             return np.sqrt(max(variance, 0.0))

Original file line number	Diff line number	Diff line change
`@@ -1493,6 +1493,8 @@ def fit(`
`1493`	`1493`	`overall_att, overall_se = self._aggregate_simple(`
`1494`	`1494`	`group_time_effects, influence_func_info, df, unit, precomputed`
`1495`	`1495`	`)`
	`1496`	`+ # Re-read df_survey in case replicate aggregation updated it`
	`1497`	`+ df_survey = precomputed.get("df_survey")`
`1496`	`1498`	`overall_t, overall_p, overall_ci = safe_inference(`
`1497`	`1499`	`overall_att,`
`1498`	`1500`	`overall_se,`