Fix P0/P1 findings from AI review: TripleDiff IPW/DR survey threading, CS SE formula

igerber · claude · igerber · commit ab65708f0116 · 2026-03-22T19:15:28.000-04:00
- P0: Thread survey_weights through TripleDifference IPW and DR call chains
  (_ipw_estimation, _doubly_robust, _compute_did_rc_ipw, _compute_did_rc_dr).
  Survey weights now enter Riesz representers for weighted Hajek averages.
- P1: Fix CallawaySantAnna no-covariate survey SE to derive from sum(IF^2)
  instead of sum(w_norm * (y-mean)^2). All 4 locations now consistent with
  stored influence functions.
- P1: Update REGISTRY.md TripleDifference entry to reflect full survey support
  (was still marked as "IPW/DR deferred").
- P2: Add behavioral tests for TripleDiff IPW/DR survey: non-uniform weights
  change ATT, uniform weights match unweighted.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
@@ -715,13 +715,15 @@ def _compute_all_att_gt_vectorized(
                 mu_c = float(np.sum(sw_c_norm * control_change))
                 att = mu_t - mu_c
 
-                var_t = float(np.sum(sw_t_norm * (treated_change - mu_t) ** 2))
-                var_c = float(np.sum(sw_c_norm * (control_change - mu_c) ** 2))
-                se = float(np.sqrt(var_t + var_c)) if (n_t > 0 and n_c > 0) else 0.0
-
                 # Influence function (survey-weighted)
                 inf_treated = sw_t_norm * (treated_change - mu_t)
                 inf_control = -sw_c_norm * (control_change - mu_c)
+                # SE derived from IF: sum(IF_i^2)
+                se = (
+                    float(np.sqrt(np.sum(inf_treated**2) + np.sum(inf_control**2)))
+                    if (n_t > 0 and n_c > 0)
+                    else 0.0
+                )
                 sw_sum = float(np.sum(sw_t))
             else:
                 att = float(np.mean(treated_change) - np.mean(control_change))
@@ -1624,9 +1626,11 @@ def _outcome_regression(
                 inf_func = np.concatenate([inf_treated, inf_control])
 
                 # SE from influence function variance
-                var_t = float(np.sum(sw_t_norm * (treated_change - mu_t) ** 2))
-                var_c = float(np.sum(sw_c_norm * (control_change - mu_c) ** 2))
-                se = float(np.sqrt(var_t + var_c)) if (n_t > 0 and n_c > 0) else 0.0
+                se = (
+                    float(np.sqrt(np.sum(inf_treated**2) + np.sum(inf_control**2)))
+                    if (n_t > 0 and n_c > 0)
+                    else 0.0
+                )
             else:
                 att = float(np.mean(treated_change) - np.mean(control_change))
 
@@ -1787,9 +1791,11 @@ def _ipw_estimation(
                 inf_control = -sw_c_norm * (control_change - mu_c)
                 inf_func = np.concatenate([inf_treated, inf_control])
 
-                var_t = float(np.sum(sw_t_norm * (treated_change - mu_t) ** 2))
-                var_c = float(np.sum(sw_c_norm * (control_change - mu_c) ** 2))
-                se = float(np.sqrt(var_t + var_c)) if (n_t > 0 and n_c > 0) else 0.0
+                se = (
+                    float(np.sqrt(np.sum(inf_treated**2) + np.sum(inf_control**2)))
+                    if (n_t > 0 and n_c > 0)
+                    else 0.0
+                )
             else:
                 p_treat = n_treated / n_total  # unconditional propensity score
 
@@ -1998,9 +2004,11 @@ def _doubly_robust(
                 inf_control = -sw_c_norm * (control_change - mu_c)
                 inf_func = np.concatenate([inf_treated, inf_control])
 
-                var_t = float(np.sum(sw_t_norm * (treated_change - mu_t) ** 2))
-                var_c = float(np.sum(sw_c_norm * (control_change - mu_c) ** 2))
-                se = float(np.sqrt(var_t + var_c)) if (n_t > 0 and n_c > 0) else 0.0
+                se = (
+                    float(np.sqrt(np.sum(inf_treated**2) + np.sum(inf_control**2)))
+                    if (n_t > 0 and n_c > 0)
+                    else 0.0
+                )
             else:
                 att = float(np.mean(treated_change) - np.mean(control_change))
 
diff --git a/diff_diff/triple_diff.py b/diff_diff/triple_diff.py
@@ -565,9 +565,25 @@ def fit(
                 resolved_survey=resolved_survey,
             )
         elif self.estimation_method == "ipw":
-            att, se, r_squared, pscore_stats = self._ipw_estimation(y, G, P, T, X)
+            att, se, r_squared, pscore_stats = self._ipw_estimation(
+                y,
+                G,
+                P,
+                T,
+                X,
+                survey_weights=survey_weights,
+                resolved_survey=resolved_survey,
+            )
         else:  # doubly robust
-            att, se, r_squared, pscore_stats = self._doubly_robust(y, G, P, T, X)
+            att, se, r_squared, pscore_stats = self._doubly_robust(
+                y,
+                G,
+                P,
+                T,
+                X,
+                survey_weights=survey_weights,
+                resolved_survey=resolved_survey,
+            )
 
         # Compute inference
         # When survey design is active, use survey df (n_PSU - n_strata)
@@ -758,6 +774,8 @@ def _ipw_estimation(
         P: np.ndarray,
         T: np.ndarray,
         X: Optional[np.ndarray],
+        survey_weights: Optional[np.ndarray] = None,
+        resolved_survey=None,
     ) -> Tuple[float, float, Optional[float], Optional[Dict[str, float]]]:
         """
         Estimate ATT using inverse probability weighting via three-DiD
@@ -767,7 +785,15 @@ def _ipw_estimation(
         subgroup membership P(subgroup=4|X) within {j, 4} subset.
         Matches R's triplediff::ddd() with est_method="ipw".
         """
-        return self._estimate_ddd_decomposition(y, G, P, T, X)
+        return self._estimate_ddd_decomposition(
+            y,
+            G,
+            P,
+            T,
+            X,
+            survey_weights=survey_weights,
+            resolved_survey=resolved_survey,
+        )
 
     def _doubly_robust(
         self,
@@ -776,6 +802,8 @@ def _doubly_robust(
         P: np.ndarray,
         T: np.ndarray,
         X: Optional[np.ndarray],
+        survey_weights: Optional[np.ndarray] = None,
+        resolved_survey=None,
     ) -> Tuple[float, float, Optional[float], Optional[Dict[str, float]]]:
         """
         Estimate ATT using doubly robust estimation via three-DiD
@@ -786,7 +814,15 @@ def _doubly_robust(
         correctly specified. Matches R's triplediff::ddd() with
         est_method="dr".
         """
-        return self._estimate_ddd_decomposition(y, G, P, T, X)
+        return self._estimate_ddd_decomposition(
+            y,
+            G,
+            P,
+            T,
+            X,
+            survey_weights=survey_weights,
+            resolved_survey=resolved_survey,
+        )
 
     def _estimate_ddd_decomposition(
         self,
@@ -1186,7 +1222,17 @@ def _compute_did_rc(
         Matches R's triplediff::compute_did_rc().
         """
         if est_method == "ipw":
-            return self._compute_did_rc_ipw(y, post, PA4, PAa, pscore, covX, hessian, n)
+            return self._compute_did_rc_ipw(
+                y,
+                post,
+                PA4,
+                PAa,
+                pscore,
+                covX,
+                hessian,
+                n,
+                weights=weights,
+            )
         elif est_method == "reg":
             return self._compute_did_rc_reg(
                 y,
@@ -1215,6 +1261,7 @@ def _compute_did_rc(
                 or_trt_post,
                 hessian,
                 n,
+                weights=weights,
             )
 
     def _compute_did_rc_ipw(
@@ -1227,6 +1274,7 @@ def _compute_did_rc_ipw(
         covX: np.ndarray,
         hessian: Optional[np.ndarray],
         n: int,
+        weights: Optional[np.ndarray] = None,
     ) -> Tuple[float, np.ndarray]:
         """IPW DiD for a single pairwise comparison (RC)."""
         # Riesz representers (IPW weights * indicators)
@@ -1235,6 +1283,13 @@ def _compute_did_rc_ipw(
         riesz_control_pre = pscore * PAa * (1 - post) / (1 - pscore)
         riesz_control_post = pscore * PAa * post / (1 - pscore)
 
+        # Incorporate survey weights into Riesz representers
+        if weights is not None:
+            riesz_treat_pre = riesz_treat_pre * weights
+            riesz_treat_post = riesz_treat_post * weights
+            riesz_control_pre = riesz_control_pre * weights
+            riesz_control_post = riesz_control_post * weights
+
         # Hajek-normalized cell-time means
         def _hajek(riesz, y_vals):
             denom = np.mean(riesz)
@@ -1393,6 +1448,7 @@ def _compute_did_rc_dr(
         or_trt_post: np.ndarray,
         hessian: Optional[np.ndarray],
         n: int,
+        weights: Optional[np.ndarray] = None,
     ) -> Tuple[float, np.ndarray]:
         """Doubly robust DiD for a single pairwise comparison (RC)."""
         or_ctrl = post * or_ctrl_post + (1 - post) * or_ctrl_pre
@@ -1406,6 +1462,16 @@ def _compute_did_rc_dr(
         riesz_dt1 = PA4 * post
         riesz_dt0 = PA4 * (1 - post)
 
+        # Incorporate survey weights into Riesz representers
+        if weights is not None:
+            riesz_treat_pre = riesz_treat_pre * weights
+            riesz_treat_post = riesz_treat_post * weights
+            riesz_control_pre = riesz_control_pre * weights
+            riesz_control_post = riesz_control_post * weights
+            riesz_d = riesz_d * weights
+            riesz_dt1 = riesz_dt1 * weights
+            riesz_dt0 = riesz_dt0 * weights
+
         # DR cell-time components
         def _safe_ratio(num, denom):
             return num / denom if denom > 0 else 0.0
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -1245,8 +1245,7 @@ has no additional effect.
 - [x] Influence function SE: std(w3·IF_3 + w2·IF_2 - w1·IF_1) / sqrt(n)
 - [x] Cluster-robust SE via Liang-Zeger variance on influence function
 - [x] ATT and SE match R within <0.001% for all methods and DGP types
-- [x] Survey design support (Phase 3): regression method with weighted OLS + TSL on combined influence functions; IPW/DR deferred
-- **Note:** TripleDifference IPW/DR with survey weights deferred until weighted solve_logit() (Phase 5)
+- [x] Survey design support: all methods (reg, IPW, DR) with weighted OLS/logit + TSL on combined influence functions. Weighted solve_logit() for propensity scores in IPW/DR paths.
 
 ---
 
diff --git a/tests/test_survey_phase4.py b/tests/test_survey_phase4.py
@@ -715,6 +715,94 @@ def test_ipw_survey_results_finite(self, ddd_survey_data):
         assert np.isfinite(result.se)
         assert result.survey_metadata is not None
 
+    def test_ipw_nonuniform_weights_change_att(self, ddd_survey_data):
+        """Non-uniform survey weights should change IPW ATT vs unweighted."""
+        sd = SurveyDesign(weights="weight")
+        r_no = TripleDifference(estimation_method="ipw").fit(
+            ddd_survey_data,
+            "outcome",
+            "group",
+            "partition",
+            "time",
+        )
+        r_sv = TripleDifference(estimation_method="ipw").fit(
+            ddd_survey_data,
+            "outcome",
+            "group",
+            "partition",
+            "time",
+            survey_design=sd,
+        )
+        assert not np.isclose(
+            r_no.att, r_sv.att, atol=1e-6
+        ), "Non-uniform survey weights should change IPW ATT"
+
+    def test_dr_nonuniform_weights_change_att(self, ddd_survey_data):
+        """Non-uniform survey weights should change DR ATT vs unweighted."""
+        sd = SurveyDesign(weights="weight")
+        r_no = TripleDifference(estimation_method="dr").fit(
+            ddd_survey_data,
+            "outcome",
+            "group",
+            "partition",
+            "time",
+        )
+        r_sv = TripleDifference(estimation_method="dr").fit(
+            ddd_survey_data,
+            "outcome",
+            "group",
+            "partition",
+            "time",
+            survey_design=sd,
+        )
+        assert not np.isclose(
+            r_no.att, r_sv.att, atol=1e-6
+        ), "Non-uniform survey weights should change DR ATT"
+
+    def test_ipw_uniform_weights_match_unweighted(self, ddd_survey_data):
+        """Uniform survey weights should match unweighted IPW result."""
+        data = ddd_survey_data.copy()
+        data["uw"] = 1.0
+        sd = SurveyDesign(weights="uw")
+        r_no = TripleDifference(estimation_method="ipw").fit(
+            data,
+            "outcome",
+            "group",
+            "partition",
+            "time",
+        )
+        r_sv = TripleDifference(estimation_method="ipw").fit(
+            data,
+            "outcome",
+            "group",
+            "partition",
+            "time",
+            survey_design=sd,
+        )
+        assert np.isclose(r_no.att, r_sv.att, atol=1e-6)
+
+    def test_dr_uniform_weights_match_unweighted(self, ddd_survey_data):
+        """Uniform survey weights should match unweighted DR result."""
+        data = ddd_survey_data.copy()
+        data["uw"] = 1.0
+        sd = SurveyDesign(weights="uw")
+        r_no = TripleDifference(estimation_method="dr").fit(
+            data,
+            "outcome",
+            "group",
+            "partition",
+            "time",
+        )
+        r_sv = TripleDifference(estimation_method="dr").fit(
+            data,
+            "outcome",
+            "group",
+            "partition",
+            "time",
+            survey_design=sd,
+        )
+        assert np.isclose(r_no.att, r_sv.att, atol=1e-6)
+
 
 # =============================================================================
 # TestCallawaySantAnnaSurveyInference