Address CI review: RCS IF corrections, aggregation weights, replicate VCV, panel on results

igerber · claude · igerber · commit 4bf566d1f94e · 2026-03-28T20:14:22.000-04:00
Fix 5 findings from PR #240 CI review: - Add cross-sectional nuisance IF corrections (PS + OR) to _ipw_estimation_rc and _doubly_robust_rc, matching panel path methodology - Use fixed full-sample cohort masses for unweighted RCS aggregation weights (consistency with WIF group-share denominator) - Guard replicate-weight designs from full event-study VCV (diagonal fallback) - Add panel field to CallawaySantAnnaResults, fix summary labels for RCS - Add panel to class docstring, replicate VCV test, RCS IF correction test Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
@@ -181,6 +181,13 @@ class CallawaySantAnna(
         Trimming bound for propensity scores. Scores are clipped to
         ``[pscore_trim, 1 - pscore_trim]`` before weight computation
         in IPW and DR estimation. Must be in ``(0, 0.5)``.
+    panel : bool, default=True
+        Whether the data is a balanced/unbalanced panel (units observed
+        across multiple time periods). Set to ``False`` for repeated
+        cross-sections where each observation has a unique unit ID and
+        units do not repeat across periods. Uses cross-sectional DRDID
+        (Sant'Anna & Zhao 2020, Section 4) with per-observation influence
+        functions.
 
     Attributes
     ----------
@@ -1783,6 +1790,7 @@ def fit(
             pscore_trim=self.pscore_trim,
             survey_metadata=survey_metadata,
             event_study_vcov=event_study_vcov,
+            panel=self.panel,
         )
 
         self.is_fitted_ = True
@@ -2972,6 +2980,40 @@ def _ipw_estimation_rc(
         inf_control = np.concatenate([inf_ct, inf_cs])
         inf_all = np.concatenate([inf_treated, inf_control])
 
+        # PS IF correction for cross-sectional IPW
+        X_all_int = np.column_stack([np.ones(len(D_all)), X_all])
+        pscore_all = pscore  # already computed and clipped
+
+        W_ps = pscore_all * (1 - pscore_all)
+        if sw_all is not None:
+            W_ps = W_ps * sw_all
+        H_ps = X_all_int.T @ (W_ps[:, None] * X_all_int)
+        H_ps_inv = _safe_inv(H_ps)
+
+        score_ps = (D_all - pscore_all)[:, None] * X_all_int
+        if sw_all is not None:
+            score_ps = score_ps * sw_all[:, None]
+        asy_lin_rep_ps = score_ps @ H_ps_inv  # (n_all, p+1)
+
+        # M2: gradient of IPW ATT w.r.t. PS parameters
+        # Control IPW residuals from both periods
+        ipw_resid_ct = w_ct_norm * (y_ct - mu_ct_ipw)
+        ipw_resid_cs = w_cs_norm * (y_cs - mu_cs_ipw)
+        # Zero for treated observations
+        M2_rc = np.zeros(X_all_int.shape[1])
+        # Control-t contribution
+        M2_rc += np.mean(
+            ipw_resid_ct[:, None] * X_all_int[n_gt + n_gs : n_gt + n_gs + n_ct],
+            axis=0,
+        )
+        # Control-s contribution (opposite sign -- base period)
+        M2_rc -= np.mean(
+            ipw_resid_cs[:, None] * X_all_int[n_gt + n_gs + n_ct :],
+            axis=0,
+        )
+
+        inf_all = inf_all + asy_lin_rep_ps @ M2_rc
+
         se = float(np.sqrt(np.sum(inf_all**2)))
 
         idx_all = None
@@ -3121,6 +3163,70 @@ def _doubly_robust_rc(
         inf_control = np.concatenate([inf_ct, inf_cs])
         inf_all = np.concatenate([inf_treated, inf_control])
 
+        # --- PS IF correction ---
+        X_all_int = np.column_stack([np.ones(len(D_all)), X_all])
+        pscore_all = pscore
+
+        W_ps = pscore_all * (1 - pscore_all)
+        if sw_all is not None:
+            W_ps = W_ps * sw_all
+        H_ps = X_all_int.T @ (W_ps[:, None] * X_all_int)
+        H_ps_inv = _safe_inv(H_ps)
+
+        score_ps = (D_all - pscore_all)[:, None] * X_all_int
+        if sw_all is not None:
+            score_ps = score_ps * sw_all[:, None]
+        asy_lin_rep_ps = score_ps @ H_ps_inv
+
+        # M2_dr: uses DR residuals (m-y) instead of raw y
+        dr_resid_ct = m_ct - y_ct  # control period-t DR residuals
+        dr_resid_cs = m_cs - y_cs  # control period-s DR residuals
+        normalizer = np.sum(sw_gt) if sw_gt is not None else n_gt
+        M2_dr = np.zeros(X_all_int.shape[1])
+        # Control-t: (w_ct/normalizer) * (m_ct - y_ct) * X
+        ct_slice = slice(n_gt + n_gs, n_gt + n_gs + n_ct)
+        M2_dr += np.mean(
+            ((w_ct / normalizer) * dr_resid_ct)[:, None] * X_all_int[ct_slice],
+            axis=0,
+        )
+        # Control-s: -(w_cs/normalizer) * (m_cs - y_cs) * X (opposite sign)
+        cs_slice = slice(n_gt + n_gs + n_ct, None)
+        M2_dr -= np.mean(
+            ((w_cs / normalizer) * dr_resid_cs)[:, None] * X_all_int[cs_slice],
+            axis=0,
+        )
+
+        inf_all = inf_all + asy_lin_rep_ps @ M2_dr
+
+        # --- OR IF correction -- period t model ---
+        W_t = sw_ct if sw_ct is not None else np.ones(n_ct)
+        bread_t = _safe_inv(X_ct_int.T @ (W_t[:, None] * X_ct_int))
+
+        # M1_t: dATT/dbeta_t (from treated-t prediction and control-t augmentation)
+        sw_gt_vals = sw_gt if sw_gt is not None else np.ones(n_gt)
+        M1_t = (
+            -np.sum(sw_gt_vals[:, None] * X_gt_int, axis=0)
+            + np.sum(w_ct[:, None] * X_ct_int, axis=0)
+        ) / normalizer
+
+        asy_lin_rep_or_t = (W_t * (y_ct - m_ct))[:, None] * X_ct_int @ bread_t
+        # Apply only to control-t portion of inf_all
+        inf_all[n_gt + n_gs : n_gt + n_gs + n_ct] += asy_lin_rep_or_t @ M1_t
+
+        # --- OR IF correction -- period s model ---
+        W_s = sw_cs if sw_cs is not None else np.ones(n_cs)
+        bread_s = _safe_inv(X_cs_int.T @ (W_s[:, None] * X_cs_int))
+
+        sw_gs_vals = sw_gs if sw_gs is not None else np.ones(n_gs)
+        M1_s = (
+            np.sum(sw_gs_vals[:, None] * X_gs_int, axis=0)
+            - np.sum(w_cs[:, None] * X_cs_int, axis=0)
+        ) / normalizer
+
+        asy_lin_rep_or_s = (W_s * (y_cs - m_cs))[:, None] * X_cs_int @ bread_s
+        # Apply only to control-s portion of inf_all
+        inf_all[n_gt + n_gs + n_ct :] += asy_lin_rep_or_s @ M1_s
+
         se = float(np.sqrt(np.sum(inf_all**2)))
 
         idx_all = None
diff --git a/diff_diff/staggered_aggregation.py b/diff_diff/staggered_aggregation.py
@@ -73,15 +73,31 @@ def _aggregate_simple(
                 if g > 0:  # exclude never-treated (0)
                     survey_cohort_weights[g] = float(np.sum(sw[unit_cohorts == g]))
 
+        # For unweighted RCS: use fixed full-sample cohort counts so that
+        # aggregation weights match the WIF group-share denominator.
+        rcs_cohort_counts = None
+        if (
+            precomputed is not None
+            and not precomputed.get("is_panel", True)
+            and survey_cohort_weights is None
+        ):
+            unit_cohorts = precomputed["unit_cohorts"]
+            rcs_cohort_counts = {}
+            for g in np.unique(unit_cohorts):
+                if g > 0:
+                    rcs_cohort_counts[g] = int(np.sum(unit_cohorts == g))
+
         for (g, t), data in group_time_effects.items():
             # Only include post-treatment effects (t >= g - anticipation)
             # Pre-treatment effects are for parallel trends, not overall ATT
             if t < g - self.anticipation:
                 continue
             effects.append(data["effect"])
-            # Use fixed cohort-level survey weight sum for aggregation
+            # Use fixed cohort-level weights for aggregation
             if survey_cohort_weights is not None and g in survey_cohort_weights:
                 weights_list.append(survey_cohort_weights[g])
+            elif rcs_cohort_counts is not None and g in rcs_cohort_counts:
+                weights_list.append(rcs_cohort_counts[g])
             else:
                 weights_list.append(data["n_treated"])
             gt_pairs.append((g, t))
@@ -571,15 +587,29 @@ def _aggregate_event_study(
                 if g > 0:
                     survey_cohort_weights[g] = float(np.sum(sw[unit_cohorts == g]))
 
+        # For unweighted RCS: fixed full-sample cohort counts (matching WIF)
+        rcs_cohort_counts = None
+        if (
+            precomputed is not None
+            and not precomputed.get("is_panel", True)
+            and survey_cohort_weights is None
+        ):
+            unit_cohorts_es = precomputed["unit_cohorts"]
+            rcs_cohort_counts = {}
+            for g in np.unique(unit_cohorts_es):
+                if g > 0:
+                    rcs_cohort_counts[g] = int(np.sum(unit_cohorts_es == g))
+
         for (g, t), data in group_time_effects.items():
             e = t - g  # Relative time
             if e not in effects_by_e:
                 effects_by_e[e] = []
-            w = (
-                survey_cohort_weights[g]
-                if survey_cohort_weights is not None and g in survey_cohort_weights
-                else data["n_treated"]
-            )
+            if survey_cohort_weights is not None and g in survey_cohort_weights:
+                w = survey_cohort_weights[g]
+            elif rcs_cohort_counts is not None and g in rcs_cohort_counts:
+                w = rcs_cohort_counts[g]
+            else:
+                w = data["n_treated"]
             effects_by_e[e].append(
                 (
                     (g, t),  # Keep track of the (g,t) pair
@@ -733,8 +763,16 @@ def _aggregate_event_study(
 
                     meat, _, _ = _compute_stratified_psu_meat(Psi, resolved_survey)
                     event_study_vcov = meat
+                elif (
+                    resolved_survey is not None
+                    and hasattr(resolved_survey, "uses_replicate_variance")
+                    and resolved_survey.uses_replicate_variance
+                ):
+                    # Replicate-weight: fall back to None (diagonal in HonestDiD)
+                    # until multivariate replicate VCV is implemented
+                    event_study_vcov = None
                 else:
-                    # Simple sum-of-outer-products (no survey or replicate-only)
+                    # No survey: simple sum-of-outer-products
                     event_study_vcov = Psi.T @ Psi
             except (ValueError, np.linalg.LinAlgError):
                 pass  # Fall back to diagonal (None)
diff --git a/diff_diff/staggered_results.py b/diff_diff/staggered_results.py
@@ -111,6 +111,7 @@ class CallawaySantAnnaResults:
     alpha: float = 0.05
     control_group: str = "never_treated"
     base_period: str = "varying"
+    panel: bool = True
     event_study_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None)
     group_effects: Optional[Dict[Any, Dict[str, Any]]] = field(default=None)
     influence_functions: Optional["np.ndarray"] = field(default=None, repr=False)
@@ -155,8 +156,8 @@ def summary(self, alpha: Optional[float] = None) -> str:
             "=" * 85,
             "",
             f"{'Total observations:':<30} {self.n_obs:>10}",
-            f"{'Treated units:':<30} {self.n_treated_units:>10}",
-            f"{'Never-treated units:':<30} {self.n_control_units:>10}",
+            f"{'Treated ' + ('obs:' if not self.panel else 'units:'):<30} {self.n_treated_units:>10}",
+            f"{'Control ' + ('obs:' if not self.panel else 'units:'):<30} {self.n_control_units:>10}",
             f"{'Treatment cohorts:':<30} {len(self.groups):>10}",
             f"{'Time periods:':<30} {len(self.time_periods):>10}",
             f"{'Control group:':<30} {self.control_group:>10}",
diff --git a/tests/test_honest_did.py b/tests/test_honest_did.py
@@ -1243,6 +1243,43 @@ def test_no_survey_gives_none_df(self):
         assert h_result.df_survey is None
         assert h_result.survey_metadata is None
 
+    def test_replicate_weight_uses_diagonal_fallback(self):
+        """Replicate-weight designs should NOT produce full event_study_vcov."""
+        from diff_diff import CallawaySantAnna, SurveyDesign, generate_staggered_data
+
+        data = generate_staggered_data(n_units=100, n_periods=5, seed=42)
+        unit_ids = data["unit"].unique()
+        n_units = len(unit_ids)
+        unit_map = {uid: i for i, uid in enumerate(unit_ids)}
+        idx = data["unit"].map(unit_map).values
+
+        # Create replicate weights (4 replicates)
+        rng = np.random.default_rng(42)
+        data["weight"] = (1.0 + 0.3 * (np.arange(n_units) % 3))[idx]
+        for k in range(4):
+            data[f"repwt_{k}"] = data["weight"] * rng.uniform(0.8, 1.2, len(data))
+            # Make constant within unit
+            unit_rw = data.groupby("unit")[f"repwt_{k}"].first()
+            data[f"repwt_{k}"] = data["unit"].map(unit_rw)
+
+        sd = SurveyDesign(
+            weights="weight",
+            replicate_weights=[f"repwt_{k}" for k in range(4)],
+            replicate_method="JK1",
+        )
+        cs_result = CallawaySantAnna().fit(
+            data,
+            "outcome",
+            "unit",
+            "period",
+            "first_treat",
+            survey_design=sd,
+            aggregate="event_study",
+        )
+
+        # event_study_vcov should be None (diagonal fallback for replicate designs)
+        assert cs_result.event_study_vcov is None
+
 
 # =============================================================================
 # Tests for Visualization (without matplotlib)
diff --git a/tests/test_staggered_rc.py b/tests/test_staggered_rc.py
@@ -349,3 +349,57 @@ def test_empty_cell_nan(self):
             v["effect"] for v in result.group_time_effects.values() if np.isfinite(v["effect"])
         ]
         assert len(finite_effects) > 0
+
+
+# =============================================================================
+# Methodology: IF corrections change SE
+# =============================================================================
+
+
+class TestIFCorrections:
+    """Verify RCS DR/IPW IF corrections are non-trivial."""
+
+    def test_dr_se_differs_from_reg_rc(self, rc_data_with_covariates):
+        """DR and reg should give different SEs in RCS (DR has IF corrections)."""
+        r_reg = CallawaySantAnna(estimation_method="reg", panel=False).fit(
+            rc_data_with_covariates,
+            "outcome",
+            "unit",
+            "period",
+            "first_treat",
+            covariates=["x1"],
+        )
+        r_dr = CallawaySantAnna(estimation_method="dr", panel=False).fit(
+            rc_data_with_covariates,
+            "outcome",
+            "unit",
+            "period",
+            "first_treat",
+            covariates=["x1"],
+        )
+        # SEs should differ (DR has nuisance IF corrections)
+        assert r_reg.overall_se != r_dr.overall_se
+
+    def test_panel_field_on_results(self, rc_data):
+        """panel=False should be reflected on CallawaySantAnnaResults."""
+        result = CallawaySantAnna(estimation_method="reg", panel=False).fit(
+            rc_data,
+            "outcome",
+            "unit",
+            "period",
+            "first_treat",
+        )
+        assert result.panel is False
+
+    def test_summary_labels_rcs(self, rc_data):
+        """Summary should use 'obs' labels for RCS, not 'units'."""
+        result = CallawaySantAnna(estimation_method="reg", panel=False).fit(
+            rc_data,
+            "outcome",
+            "unit",
+            "period",
+            "first_treat",
+        )
+        summary = result.summary()
+        assert "obs:" in summary
+        assert "units:" not in summary.split("\n")[3]  # Treated line