Fix round-4 review P1s: CS WIF normalization, IPW nuisance IF, TwoStage n_psu

igerber · claude · igerber · commit 98d80c60f2fc · 2026-03-22T21:26:09.000-04:00
- CallawaySantAnna WIF: remove inner /total_weight from indicator_diff — the
  final psi_wif/total_weight handles normalization once, matching R's did::wif()
- CallawaySantAnna IPW covariate: add propensity score nuisance IF correction
  (survey-weighted Hessian, score, M2 gradient) so per-cell and aggregated SEs
  account for PS estimation uncertainty
- TwoStageDiD: recompute n_psu/n_strata after always-treated filtering via
  np.unique() on subsetted arrays, then recompute survey_metadata

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
@@ -1774,8 +1774,42 @@ def _ipw_estimation(
                 )
                 inf_func = np.concatenate([inf_treated, inf_control])
 
+                # Propensity score IF correction
+                # Accounts for estimation uncertainty in logistic regression coefficients
+                X_all_int = np.column_stack([np.ones(n_t + n_c), X_all])
+                pscore_all = np.concatenate([pscore_treated, pscore_control])
+
+                # Survey-weighted PS Hessian: sum(w_i * mu_i * (1-mu_i) * x_i * x_i')
+                W_ps = pscore_all * (1 - pscore_all)
+                if sw_all is not None:
+                    W_ps = W_ps * sw_all
+                H = X_all_int.T @ (W_ps[:, None] * X_all_int)
+                try:
+                    H_inv = np.linalg.solve(H, np.eye(H.shape[0]))
+                except np.linalg.LinAlgError:
+                    H_inv = np.linalg.lstsq(H, np.eye(H.shape[0]), rcond=None)[0]
+
+                # PS score: w_i * (D_i - pi_i) * X_i
+                D_all = np.concatenate([np.ones(n_t), np.zeros(n_c)])
+                score_ps = (D_all - pscore_all)[:, None] * X_all_int
+                if sw_all is not None:
+                    score_ps = score_ps * sw_all[:, None]
+                asy_lin_rep_ps = score_ps @ H_inv  # shape (n_t + n_c, p)
+
+                # M2: gradient of ATT w.r.t. PS parameters
+                att_control_weighted = np.sum(weights_control_norm * control_change)
+                M2 = np.mean(
+                    (weights_control_norm * (control_change - att_control_weighted))[:, None]
+                    * X_all_int[n_t:],
+                    axis=0,
+                )
+
+                # PS correction to influence function
+                inf_ps_correction = asy_lin_rep_ps @ M2
+                inf_func = inf_func + inf_ps_correction
+
                 # SE from influence function variance
-                var_psi = np.sum(inf_treated**2) + np.sum(inf_control**2)
+                var_psi = np.sum(inf_func**2)
                 se = float(np.sqrt(var_psi)) if var_psi > 0 else 0.0
             else:
                 # IPW weights for control units: p(X) / (1 - p(X))
diff --git a/diff_diff/staggered_aggregation.py b/diff_diff/staggered_aggregation.py
@@ -359,7 +359,7 @@ def _compute_combined_influence_function(
             # s_i * p_g_k  (symmetric weight application)
             weighted_pg_term = pg_keepers[np.newaxis, :] * unit_sw[:, np.newaxis]
             # s_i * (1{G_i == g_k} - p_g_k) / sum(s_j)
-            indicator_diff = (weighted_indicator - weighted_pg_term) / total_weight
+            indicator_diff = weighted_indicator - weighted_pg_term
             indicator_sum_w = np.sum(indicator_diff, axis=1)
 
             with np.errstate(divide="ignore", invalid="ignore", over="ignore"):
diff --git a/diff_diff/two_stage.py b/diff_diff/two_stage.py
@@ -312,6 +312,25 @@ def fit(
                         else None
                     ),
                 )
+                # Recompute n_psu/n_strata after subsetting
+                new_n_psu = (
+                    len(np.unique(resolved_survey.psu)) if resolved_survey.psu is not None else 0
+                )
+                new_n_strata = (
+                    len(np.unique(resolved_survey.strata))
+                    if resolved_survey.strata is not None
+                    else 0
+                )
+                resolved_survey = replace(resolved_survey, n_psu=new_n_psu, n_strata=new_n_strata)
+                # Recompute survey_metadata since it depends on these counts
+                from diff_diff.survey import compute_survey_metadata
+
+                raw_w = (
+                    df[survey_design.weights].values.astype(np.float64)
+                    if survey_design.weights
+                    else np.ones(len(df), dtype=np.float64)
+                )
+                survey_metadata = compute_survey_metadata(resolved_survey, raw_w)
 
         # Treatment indicator with anticipation
         effective_treat = df[first_treat] - self.anticipation