Fix CI review R10: within-group-constant strata/PSU + sanitize SE helper

igerber · claude · igerber · commit 0266d59ac66c · 2026-04-17T07:03:59.000-04:00
- P1 #1/#2: Add _validate_group_constant_strata_psu() helper and call it from fit() after the weight_type/replicate-weights checks. The dCDH IF expansion psi_i = U[g] * (w_i / W_g) treats each group as the effective sampling unit; when strata or PSU vary within group it silently spreads horizon-specific IF mass across observations in different PSUs, contaminating the stratified-PSU variance. Walk back the overstated claim at the old line 669 comment to match. Within- group-varying weights remain supported. - P1 #3: _survey_se_from_group_if now filters zero-weight rows before np.unique/np.bincount so NaN / non-comparable group IDs on excluded subpopulation rows cannot crash SE factorization. psi stays full- length with zeros in excluded positions to preserve alignment with resolved.strata / resolved.psu inside compute_survey_if_variance. - REGISTRY.md line 652 Note updated: explicitly states the within-group-constant strata/PSU requirement and the within-group-varying weights support. - Tests: new TestSurveyWithinGroupValidation class (4 tests — rejects varying PSU, rejects varying strata, accepts varying weights, and ignores zero-weight rows during the constancy check) plus TestZeroWeightSubpopulation.test_zero_weight_row_with_nan_group_id. All 268 targeted tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -666,9 +666,15 @@ def fit(
                     "Use strata/PSU/FPC for design-based inference via Taylor "
                     "Series Linearization."
                 )
-            # No group-constant survey validation: the IF expansion
-            # psi_i = U[g] * (w_i / W_g) handles observation-level
-            # variation in weights, strata, and PSU within groups.
+            # Within-group-constant PSU/strata is required: the IF
+            # expansion psi_i = U[g] * (w_i / W_g) supports within-group
+            # variation in WEIGHTS (each obs contributes proportionally),
+            # but PSU and strata must be constant within group — the
+            # group is treated as the effective sampling unit for the
+            # Binder stratified-PSU variance formula.
+            _validate_group_constant_strata_psu(
+                resolved_survey, data, group, survey_weights,
+            )
 
         # Design-2 precondition: requires drop_larger_lower=False
         if design2 and self.drop_larger_lower:
@@ -4696,6 +4702,61 @@ def _plugin_se(U_centered: np.ndarray, divisor: int) -> float:
     return float(np.sqrt(sigma_hat_sq) / np.sqrt(divisor))
 
 
+def _validate_group_constant_strata_psu(
+    resolved: Any,
+    data: pd.DataFrame,
+    group_col: str,
+    survey_weights: Optional[np.ndarray],
+) -> None:
+    """Reject survey designs where strata or PSU vary within group.
+
+    The dCDH IF expansion ``psi_i = U[g] * (w_i / W_g)`` treats the
+    group as the effective sampling unit for design-based variance.
+    When strata or PSU vary within group, the expansion silently spreads
+    horizon-specific IF mass onto observations whose survey stratum or
+    PSU differs from the rest of the group, contaminating the
+    stratified-PSU variance. Reject those designs with a clear error.
+
+    Zero-weight rows are excluded from the check (subpopulation
+    contract — an excluded row with a different stratum/PSU label does
+    not actually participate in the variance).
+    """
+    if resolved is None:
+        return
+    pos_mask = np.asarray(survey_weights) > 0
+    g_eff = np.asarray(data[group_col].values)[pos_mask]
+    if resolved.strata is not None:
+        s_eff = np.asarray(resolved.strata)[pos_mask]
+        df_s = pd.DataFrame({"g": g_eff, "s": s_eff})
+        varying = df_s.groupby("g")["s"].nunique()
+        bad = varying[varying > 1]
+        if len(bad) > 0:
+            raise ValueError(
+                f"ChaisemartinDHaultfoeuille survey support requires "
+                f"strata to be constant within group, but "
+                f"{len(bad)} group(s) have multiple strata "
+                f"(examples: {bad.index.tolist()[:5]}). The IF "
+                f"expansion psi_i = U[g] * (w_i / W_g) treats the "
+                f"group as the effective sampling unit for stratified "
+                f"design-based variance."
+            )
+    if resolved.psu is not None:
+        p_eff = np.asarray(resolved.psu)[pos_mask]
+        df_p = pd.DataFrame({"g": g_eff, "p": p_eff})
+        varying = df_p.groupby("g")["p"].nunique()
+        bad = varying[varying > 1]
+        if len(bad) > 0:
+            raise ValueError(
+                f"ChaisemartinDHaultfoeuille survey support requires "
+                f"PSU to be constant within group, but "
+                f"{len(bad)} group(s) have multiple PSUs "
+                f"(examples: {bad.index.tolist()[:5]}). The IF "
+                f"expansion psi_i = U[g] * (w_i / W_g) treats the "
+                f"group as the effective sampling unit for stratified "
+                f"design-based variance."
+            )
+
+
 def _compute_se(
     U_centered: np.ndarray,
     divisor: int,
@@ -4762,20 +4823,37 @@ def _survey_se_from_group_if(
     weights = obs_survey_info["weights"]
     resolved = obs_survey_info["resolved"]
 
+    # Zero-weight rows are out-of-sample (SurveyDesign.subpopulation()).
+    # Skip them before the group-ID factorization so NaN / non-comparable
+    # group IDs on excluded rows cannot crash np.unique. psi stays full-
+    # length with zeros in excluded positions so the alignment with
+    # resolved.strata / resolved.psu inside compute_survey_if_variance
+    # is preserved.
+    weights_arr = np.asarray(weights, dtype=np.float64)
+    pos_mask = weights_arr > 0
+    n_obs = len(weights_arr)
+    psi = np.zeros(n_obs, dtype=np.float64)
+
+    if not pos_mask.any():
+        return float("nan")
+
+    gids_eff = np.asarray(group_ids)[pos_mask]
+    w_eff = weights_arr[pos_mask]
+
     # Build group → U_centered lookup (vectorized via factorization)
     group_to_u = {gid: U_centered[idx] for idx, gid in enumerate(eligible_groups)}
 
-    # Map group IFs to observation level
-    u_obs = np.array([group_to_u.get(gid, 0.0) for gid in group_ids])
+    # Map group IFs to observation level (effective sample only)
+    u_obs_eff = np.array([group_to_u.get(gid, 0.0) for gid in gids_eff])
 
-    # Compute per-group weight totals W_g via bincount
-    unique_gids, inverse = np.unique(group_ids, return_inverse=True)
-    w_totals_per_group = np.bincount(inverse, weights=weights)
-    w_obs_total = w_totals_per_group[inverse]
+    # Compute per-group weight totals W_g via bincount on effective sample
+    unique_gids, inverse = np.unique(gids_eff, return_inverse=True)
+    w_totals_per_group = np.bincount(inverse, weights=w_eff)
+    w_obs_total_eff = w_totals_per_group[inverse]
 
     # Expand to observation level: psi_i = U[g] * (w_i / W_g)
-    safe_w = np.where(w_obs_total > 0, w_obs_total, 1.0)
-    psi = u_obs * (weights / safe_w)
+    safe_w = np.where(w_obs_total_eff > 0, w_obs_total_eff, 1.0)
+    psi[pos_mask] = u_obs_eff * (w_eff / safe_w)
 
     variance = compute_survey_if_variance(psi, resolved)
     if not np.isfinite(variance) or variance < 0:
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -649,7 +649,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 - [x] Design-2 switch-in/switch-out descriptive wrapper (Web Appendix Section 1.6)
 - [x] HonestDiD (Rambachan-Roth 2023) integration on placebo + event study surface
 - [x] Survey design support: pweight with strata/PSU/FPC via Taylor Series Linearization, covering the main ATT surface, covariate adjustment (DID^X), heterogeneity testing, the TWFE diagnostic (fit and standalone `twowayfeweights()` helper), and HonestDiD bounds. Replicate weights and PSU-level bootstrap deferred.
-- **Note:** Survey IF expansion (`psi_i = U[g] * (w_i / W_g)`) is a library extension not in the dCDH papers. The paper's plug-in variance assumes iid sampling; the TSL variance accounts for complex survey design by expanding group-level influence functions to observation level proportionally to survey weights, then applying the standard Binder (1983) stratified PSU variance formula.
+- **Note:** Survey IF expansion (`psi_i = U[g] * (w_i / W_g)`) is a library extension not in the dCDH papers. The paper's plug-in variance assumes iid sampling; the TSL variance accounts for complex survey design by expanding group-level influence functions to observation level proportionally to survey weights, then applying the standard Binder (1983) stratified PSU variance formula. The expansion treats each group as the effective sampling unit; **strata and PSU must therefore be constant within group** (validated in `fit()` — designs with mixed strata or PSU labels within a single group raise `ValueError`). Within-group-varying **weights** are supported (each observation contributes proportionally).
 - **Note (survey + bootstrap fallback):** When `survey_design` and `n_bootstrap > 0` are both active, the multiplier bootstrap uses group-level Rademacher/Mammen/Webb weights rather than PSU-level resampling. A `UserWarning` is emitted from `fit()`. This is conservative when groups are finer than PSUs; a PSU-level survey bootstrap is deferred to a future release. For design-based analytical variance, the TSL path (non-bootstrap) is the recommended contract.
 
 ---
diff --git a/tests/test_survey_dcdh.py b/tests/test_survey_dcdh.py
@@ -861,6 +861,26 @@ def test_zero_weight_row_with_nan_outcome(self, base_data):
         )
         assert np.isfinite(result.overall_att)
 
+    def test_zero_weight_row_with_nan_group_id(self, base_data):
+        """A zero-weight row with NaN group id must not crash the SE
+        factorization. SurveyDesign.subpopulation() contract."""
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        # Cast group to object to allow NaN without coercion errors
+        df_["group"] = df_["group"].astype(object)
+        sample = df_.iloc[0].copy()
+        sample["group"] = np.nan
+        sample["pw"] = 0.0
+        df_ = pd.concat([df_, pd.DataFrame([sample])], ignore_index=True)
+        sd = SurveyDesign(weights="pw")
+        # Must succeed — zero-weight row's NaN group id is out-of-sample
+        result = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd,
+        )
+        assert np.isfinite(result.overall_att)
+
     def test_zero_weight_row_with_nan_control(self, base_data):
         """A zero-weight row with NaN in a control column must not abort
         the DID^X path, and the covariate cell aggregation must use only
@@ -1010,3 +1030,78 @@ def test_survey_design2_runs(self):
         # switch_in and switch_out mean effects should be finite
         assert np.isfinite(r.design2_effects["switch_in"]["mean_effect"])
         assert np.isfinite(r.design2_effects["switch_out"]["mean_effect"])
+
+
+# ── Test: Within-group constancy of strata and PSU ──────────────────
+
+
+class TestSurveyWithinGroupValidation:
+    """Survey designs with strata or PSU varying within a single group
+    are rejected because the dCDH IF expansion treats the group as the
+    effective sampling unit."""
+
+    def test_rejects_varying_psu_within_group(self, base_data):
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        df_["stratum"] = 0
+        # PSU varies within each group (alternates by period)
+        df_["psu"] = df_["period"] % 2
+        sd = SurveyDesign(weights="pw", strata="stratum", psu="psu")
+        with pytest.raises(ValueError, match="PSU to be constant within group"):
+            ChaisemartinDHaultfoeuille(seed=1).fit(
+                df_, outcome="outcome", group="group",
+                time="period", treatment="treatment",
+                survey_design=sd,
+            )
+
+    def test_rejects_varying_strata_within_group(self, base_data):
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        # Stratum varies within each group
+        df_["stratum"] = df_["period"] % 2
+        # Give each obs a unique PSU label so the SurveyDesign resolver
+        # doesn't reject on cross-stratum PSU reuse — we want our
+        # within-group strata check to fire first.
+        df_["psu"] = np.arange(len(df_))
+        sd = SurveyDesign(weights="pw", strata="stratum", psu="psu")
+        with pytest.raises(ValueError, match="strata to be constant within group"):
+            ChaisemartinDHaultfoeuille(seed=1).fit(
+                df_, outcome="outcome", group="group",
+                time="period", treatment="treatment",
+                survey_design=sd,
+            )
+
+    def test_accepts_varying_weights_within_group(self, base_data):
+        """Within-group-varying pweights remain supported — the expansion
+        psi_i = U[g] * (w_i / W_g) handles obs-level weight variation."""
+        df_ = base_data.copy()
+        rng = np.random.default_rng(7)
+        df_["pw"] = rng.uniform(0.5, 2.0, size=len(df_))
+        sd = SurveyDesign(weights="pw")
+        result = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd,
+        )
+        assert np.isfinite(result.overall_att)
+
+    def test_rejection_excludes_zero_weight_rows(self, base_data):
+        """A zero-weight row with a different PSU from its group must
+        not trigger rejection — it is out-of-sample by the
+        subpopulation contract and does not enter the variance."""
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        df_["stratum"] = 0
+        df_["psu"] = 0
+        # Inject a zero-weight row with a different PSU
+        sample = df_.iloc[0].copy()
+        sample["psu"] = 99  # would violate constancy if counted
+        sample["pw"] = 0.0
+        df_ = pd.concat([df_, pd.DataFrame([sample])], ignore_index=True)
+        sd = SurveyDesign(weights="pw", strata="stratum", psu="psu")
+        result = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd,
+        )
+        assert np.isfinite(result.overall_att)