Fix round-3 review P1s: DDD double-weighting, CS WIF scaling, CS covariate nuisance IF

igerber · claude · igerber · commit e754b04cc727 · 2026-03-22T21:05:22.000-04:00
- TripleDifference: remove double-weighting in IPW/DR moment corrections —
  since Riesz representers already incorporate survey weights, moment means
  use np.mean() not np.average(weights=). Removed _wmean_ax0 helper.
- CallawaySantAnna WIF: apply s_i symmetrically to both indicator and pg terms
  in the weighted share estimator IF. Normalize by total_weight (sum of survey
  weights) instead of n_units.
- CallawaySantAnna outcome regression covariate IF: add weighted regression
  nuisance IF correction (asymptotic linear representation of beta from WLS,
  projected onto weighted treated covariate mean). IPW and DR IFs unchanged
  (IPW matches unweighted structure; DR is self-correcting per Theorem 3.1).

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
@@ -1591,9 +1591,34 @@ def _outcome_regression(
                 sw_c_norm = sw_control / np.sum(sw_control)
                 att = float(np.sum(sw_t_norm * treated_residuals))
 
+                # --- Regression nuisance IF correction ---
+                # Account for uncertainty in beta estimation
+                X_c = np.column_stack([np.ones(n_c), X_control])
+                X_t = np.column_stack([np.ones(n_t), X_treated])
+
+                # Weighted bread: (X'WX)^{-1}
+                XWX = X_c.T @ (X_c * sw_control[:, None])
+                try:
+                    XWX_inv = np.linalg.solve(XWX, np.eye(XWX.shape[0]))
+                except np.linalg.LinAlgError:
+                    XWX_inv = np.linalg.lstsq(XWX, np.eye(XWX.shape[0]), rcond=None)[0]
+
+                # Per-control regression score: w_i * x_i * resid_i
+                resid_c = control_change - X_c @ beta
+                score_c = X_c * (sw_control * resid_c)[:, None]
+                asy_lin_rep_reg = score_c @ XWX_inv  # shape (n_c, p)
+
+                # Weighted treated covariate mean
+                X_treated_mean_w = np.average(X_t, axis=0, weights=sw_treated)
+
+                # Regression IF correction for control observations
+                inf_control_reg_corr = asy_lin_rep_reg @ X_treated_mean_w
+
                 # Influence function (survey-weighted)
                 inf_treated = sw_t_norm * (treated_residuals - att)
-                inf_control = -sw_c_norm * (control_change - np.sum(sw_c_norm * control_change))
+                inf_control = (
+                    -sw_c_norm * (control_change - np.dot(X_c, beta)) + inf_control_reg_corr / n_c
+                )
                 inf_func = np.concatenate([inf_treated, inf_control])
 
                 # SE from influence function variance
diff --git a/diff_diff/staggered_aggregation.py b/diff_diff/staggered_aggregation.py
@@ -341,7 +341,8 @@ def _compute_combined_influence_function(
         ).astype(np.float64)
 
         if survey_w is not None:
-            # Survey-weighted WIF: indicator entries are sw_i / sum(sw_all)
+            # Survey-weighted WIF for group-share estimator p_g = sum(s_i * 1{G_i=g}) / sum(s_j).
+            # IF_i(p_g) = s_i * (1{G_i=g} - p_g) / sum(s_j)
             # Build per-unit weight vector aligned to our index space
             if global_unit_to_idx is not None and precomputed is not None:
                 unit_sw = np.zeros(n_units)
@@ -353,12 +354,16 @@ def _compute_combined_influence_function(
             else:
                 unit_sw = np.ones(n_units)
 
-            # Weighted indicator: sw_i * 1{G_i == g_k} / sum(sw_all)
-            weighted_indicator = indicator_matrix * (unit_sw / total_weight)[:, np.newaxis]
-            indicator_sum_w = np.sum(weighted_indicator - pg_keepers, axis=1)
+            # s_i * 1{G_i == g_k}
+            weighted_indicator = indicator_matrix * unit_sw[:, np.newaxis]
+            # s_i * p_g_k  (symmetric weight application)
+            weighted_pg_term = pg_keepers[np.newaxis, :] * unit_sw[:, np.newaxis]
+            # s_i * (1{G_i == g_k} - p_g_k) / sum(s_j)
+            indicator_diff = (weighted_indicator - weighted_pg_term) / total_weight
+            indicator_sum_w = np.sum(indicator_diff, axis=1)
 
             with np.errstate(divide="ignore", invalid="ignore", over="ignore"):
-                if1_matrix = (weighted_indicator - pg_keepers) / sum_pg_keepers
+                if1_matrix = indicator_diff / sum_pg_keepers
                 if2_matrix = np.outer(indicator_sum_w, pg_keepers) / (sum_pg_keepers**2)
                 wif_matrix = if1_matrix - if2_matrix
                 wif_contrib = wif_matrix @ effects
@@ -386,8 +391,9 @@ def _compute_combined_influence_function(
             nan_result = np.full(n_units, np.nan)
             return nan_result, all_units
 
-        # Scale by 1/n_units to match R's getSE formula
-        psi_wif = wif_contrib / n_units
+        # Scale by 1/total_weight to match R's getSE formula
+        # (for non-survey, total_weight == n_units; for survey, total_weight == sum(sw))
+        psi_wif = wif_contrib / total_weight
 
         # Combine standard and wif terms
         psi_total = psi_standard + psi_wif
diff --git a/diff_diff/triple_diff.py b/diff_diff/triple_diff.py
@@ -1329,26 +1329,16 @@ def _hajek(riesz, y_vals):
                 score_ps = score_ps * weights[:, None]
             asy_lin_rep_ps = score_ps @ hessian
 
-            if weights is not None:
-                M2_pre = np.average(
-                    (riesz_control_pre * (y - att_control_pre))[:, None] * covX,
-                    axis=0,
-                    weights=weights,
-                ) / np.mean(riesz_control_pre)
-                M2_post = np.average(
-                    (riesz_control_post * (y - att_control_post))[:, None] * covX,
-                    axis=0,
-                    weights=weights,
-                ) / np.mean(riesz_control_post)
-            else:
-                M2_pre = np.mean(
-                    (riesz_control_pre * (y - att_control_pre))[:, None] * covX,
-                    axis=0,
-                ) / np.mean(riesz_control_pre)
-                M2_post = np.mean(
-                    (riesz_control_post * (y - att_control_post))[:, None] * covX,
-                    axis=0,
-                ) / np.mean(riesz_control_post)
+            # Riesz representers already incorporate survey weights,
+            # so use np.mean (not np.average with weights) to avoid double-weighting.
+            M2_pre = np.mean(
+                (riesz_control_pre * (y - att_control_pre))[:, None] * covX,
+                axis=0,
+            ) / np.mean(riesz_control_pre)
+            M2_post = np.mean(
+                (riesz_control_post * (y - att_control_post))[:, None] * covX,
+                axis=0,
+            ) / np.mean(riesz_control_post)
             inf_control_ps = asy_lin_rep_ps @ (M2_post - M2_pre)
             inf_control = inf_control + inf_control_ps
 
@@ -1616,19 +1606,15 @@ def _safe_ratio(num, denom):
         )
 
         # OR correction for treated
-        def _wmean_ax0(arr):
-            """Weighted or unweighted column mean."""
-            if weights is not None:
-                return np.average(arr, axis=0, weights=weights)
-            return np.mean(arr, axis=0)
-
+        # Riesz representers already incorporate survey weights,
+        # so use np.mean (not weighted average) to avoid double-weighting.
         M1_post = (
-            (-_wmean_ax0((riesz_treat_post * post)[:, None] * covX) / m_riesz_treat_post)
+            (-np.mean((riesz_treat_post * post)[:, None] * covX, axis=0) / m_riesz_treat_post)
             if m_riesz_treat_post > 0
             else np.zeros(covX.shape[1])
         )
         M1_pre = (
-            (-_wmean_ax0((riesz_treat_pre * (1 - post))[:, None] * covX) / m_riesz_treat_pre)
+            (-np.mean((riesz_treat_pre * (1 - post))[:, None] * covX, axis=0) / m_riesz_treat_pre)
             if m_riesz_treat_pre > 0
             else np.zeros(covX.shape[1])
         )
@@ -1653,15 +1639,19 @@ def _wmean_ax0(arr):
         # PS correction for control
         M2_pre = (
             (
-                _wmean_ax0((riesz_control_pre * (y - or_ctrl - att_control_pre))[:, None] * covX)
+                np.mean(
+                    (riesz_control_pre * (y - or_ctrl - att_control_pre))[:, None] * covX, axis=0
+                )
                 / m_riesz_control_pre
             )
             if m_riesz_control_pre > 0
             else np.zeros(covX.shape[1])
         )
         M2_post = (
             (
-                _wmean_ax0((riesz_control_post * (y - or_ctrl - att_control_post))[:, None] * covX)
+                np.mean(
+                    (riesz_control_post * (y - or_ctrl - att_control_post))[:, None] * covX, axis=0
+                )
                 / m_riesz_control_post
             )
             if m_riesz_control_post > 0
@@ -1671,12 +1661,15 @@ def _wmean_ax0(arr):
 
         # OR correction for control
         M3_post = (
-            (-_wmean_ax0((riesz_control_post * post)[:, None] * covX) / m_riesz_control_post)
+            (-np.mean((riesz_control_post * post)[:, None] * covX, axis=0) / m_riesz_control_post)
             if m_riesz_control_post > 0
             else np.zeros(covX.shape[1])
         )
         M3_pre = (
-            (-_wmean_ax0((riesz_control_pre * (1 - post))[:, None] * covX) / m_riesz_control_pre)
+            (
+                -np.mean((riesz_control_pre * (1 - post))[:, None] * covX, axis=0)
+                / m_riesz_control_pre
+            )
             if m_riesz_control_pre > 0
             else np.zeros(covX.shape[1])
         )
@@ -1704,12 +1697,16 @@ def _wmean_ax0(arr):
 
         # OR combination
         mom_post = (
-            _wmean_ax0((riesz_d[:, None] / m_riesz_d - riesz_dt1[:, None] / m_riesz_dt1) * covX)
+            np.mean(
+                (riesz_d[:, None] / m_riesz_d - riesz_dt1[:, None] / m_riesz_dt1) * covX, axis=0
+            )
             if (m_riesz_d > 0 and m_riesz_dt1 > 0)
             else np.zeros(covX.shape[1])
         )
         mom_pre = (
-            _wmean_ax0((riesz_d[:, None] / m_riesz_d - riesz_dt0[:, None] / m_riesz_dt0) * covX)
+            np.mean(
+                (riesz_d[:, None] / m_riesz_d - riesz_dt0[:, None] / m_riesz_dt0) * covX, axis=0
+            )
             if (m_riesz_d > 0 and m_riesz_dt0 > 0)
             else np.zeros(covX.shape[1])
         )