Narrow auto-inject guard to nest=False + regression test for nest=True

igerber · claude · igerber · commit ca7b6bfa5d0e · 2026-04-18T18:55:42.000-04:00
The previous round's guard fired on any varying-strata + omitted-psu
combination, which rejected `SurveyDesign(weights, strata, nest=True)`
unnecessarily. `SurveyDesign.resolve()` at `diff_diff/survey.py:299-302`
combines `(stratum, psu)` into globally-unique labels under nest=True,
so the auto-injected `psu=&lt;group&gt;` is re-labeled per stratum and the
cross-stratum uniqueness check passes. Only the `nest=False` default
path actually needs the up-front guard.

Narrows the guard to `not getattr(survey_design, "nest", False)` and
updates the error message to enumerate three actionable remediations
(constant-within-group strata, or `nest=True`, or explicit `psu`).

Adds `test_auto_inject_with_varying_strata_nest_true_succeeds` under
`TestSurveyWithinGroupValidation` covering the newly-accepted path:
byte-for-byte match against explicit `SurveyDesign(weights, strata,
psu="group", nest=True)` on `overall_se` and `survey_metadata.df_survey`.
The default `nest=False` still-raises regression
(`test_auto_inject_with_varying_strata_raises`) remains unchanged.

Updates fit() docstring and the REGISTRY.md survey IF expansion Note
to enumerate the three supported auto-inject paths: (1) strata constant
within group, (2) strata vary + nest=True, (3) strata vary + nest=False
(rejected with targeted ValueError).

All 338 tests pass (affected surface + slow MC coverage sim).

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -650,7 +650,15 @@ def fit(
             **Strata and PSU may vary across cells of a group** but
             must be constant within each ``(g, t)`` cell (trivially
             true in one-obs-per-cell panels; enforced otherwise with
-            ``ValueError``). When ``n_bootstrap > 0`` and a survey
+            ``ValueError``). Three supported combinations under the
+            auto-injected ``psu=<group_col>``:
+            (1) strata constant within group (any ``nest`` flag works);
+            (2) strata vary within group **and** ``nest=True`` — the
+            resolver re-labels the synthesized ``psu`` uniquely within
+            strata; (3) strata vary within group **and** ``nest=False``
+            — rejected up front with a targeted ``ValueError``; pass
+            ``SurveyDesign(..., nest=True)`` or an explicit
+            ``psu=<col>`` with globally-unique labels instead. When ``n_bootstrap > 0`` and a survey
             design is supplied, the multiplier bootstrap operates at
             the PSU level (Hall-Mammen wild PSU bootstrap) — under the
             default auto-inject this collapses to a group-level
@@ -726,36 +734,43 @@ def fit(
                 or resolved_survey.replicate_weights.shape[1] == 0
             )
         ):
-            # Pre-auto-inject contract check: the auto-injected PSU
-            # column reuses group labels with nest=False, but the
-            # survey resolver enforces globally-unique PSU labels when
-            # nest=False and strata are present (see
-            # ``diff_diff/survey.py``). If strata varies within group,
-            # the synthesized PSU column collides across strata and
-            # resolution fails downstream with an opaque error. Flag
-            # that configuration up front with an actionable message
-            # pointing users to the explicit ``psu=<col>, nest=True``
-            # path (REGISTRY.md survey IF expansion Note).
-            if resolved_survey.strata is not None:
+            # Pre-auto-inject contract check: the auto-inject path
+            # synthesizes ``psu=<group>`` and preserves the user's
+            # ``nest`` flag. Under ``nest=False`` (the default), the
+            # survey resolver requires globally-unique PSU labels when
+            # strata are present; if strata varies within group, the
+            # synthesized PSU column reuses group labels across strata
+            # and trips the cross-stratum PSU uniqueness check at
+            # resolution time. Under ``nest=True`` the resolver
+            # re-labels ``(stratum, psu)`` uniquely within strata
+            # (``diff_diff/survey.py:299-302``), so varying strata is
+            # fine — let the auto-inject proceed. Only the
+            # ``nest=False`` + varying-strata + omitted-psu triple
+            # warrants an up-front targeted error.
+            if resolved_survey.strata is not None and not getattr(
+                survey_design, "nest", False
+            ):
                 _strata_varies_pre, _ = _strata_psu_vary_within_group(
                     resolved_survey, data, group, survey_weights,
                 )
                 if _strata_varies_pre:
                     raise ValueError(
                         "ChaisemartinDHaultfoeuille survey support: "
                         "strata that vary across cells of the same "
-                        "group require an explicit `psu=<col>` with "
-                        "`nest=True` so that `(stratum, psu)` pairs "
-                        "are globally unique. The default auto-"
-                        "injected `psu=<group>` path does NOT support "
-                        "this because the synthesized PSU column "
-                        "reuses group labels across strata and trips "
-                        "the cross-stratum PSU uniqueness check in "
-                        "survey resolution. Either (a) set strata "
-                        "constant within each group, or (b) pass "
-                        "`SurveyDesign(..., psu=<col>, nest=True)` "
-                        "with PSU labels that are unique within "
-                        "strata."
+                        "group require either an explicit "
+                        "`psu=<col>` (any column whose labels are "
+                        "globally unique within strata) or the "
+                        "original `SurveyDesign(..., nest=True)` "
+                        "flag so the auto-injected `psu=<group>` is "
+                        "re-labeled uniquely within strata by the "
+                        "resolver. The default `nest=False` auto-"
+                        "inject path reuses group labels across "
+                        "strata and trips the cross-stratum PSU "
+                        "uniqueness check in survey resolution. "
+                        "Either (a) set strata constant within each "
+                        "group, (b) pass `SurveyDesign(..., "
+                        "nest=True)`, or (c) pass an explicit "
+                        "`psu=<col>` with globally-unique labels."
                     )
 
             from diff_diff.survey import SurveyDesign as _SurveyDesign
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -649,7 +649,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 - [x] Design-2 switch-in/switch-out descriptive wrapper (Web Appendix Section 1.6)
 - [x] HonestDiD (Rambachan-Roth 2023) integration on placebo + event study surface
 - [x] Survey design support: pweight with strata/PSU/FPC via Taylor Series Linearization (analytical) **or replicate-weight variance (BRR/Fay/JK1/JKn/SDR)**, covering the main ATT surface, covariate adjustment (DID^X), heterogeneity testing, the TWFE diagnostic (fit and standalone `twowayfeweights()` helper), and HonestDiD bounds. Opt-in **PSU-level Hall-Mammen wild bootstrap** is also supported via `n_bootstrap > 0`.
-- **Note (Survey IF expansion — library convention):** Survey IF expansion is a library extension not in the dCDH papers (the paper's plug-in variance assumes iid sampling). The library convention builds observation-level `psi_i` by proportionally distributing per-group IF mass within weight share: either at the group level (`psi_i = U_centered[g] * w_i / W_g`, the previous convention) or at the per-`(g, t)` cell level via the cell-period allocator shipped in this release. Cell-level expansion: decompose `U[g]` into per-period attributions `U[g, t]`, cohort-center each column independently, then expand to observation level as `psi_i = U_centered_per_period[g_i, t_i] * (w_i / W_{g_i, t_i})`. Binder (1983) stratified-PSU variance aggregates the resulting `psi` at PSU level. **Post-period attribution convention:** each transition term in the IF sum (of the form `role_weight * (Y_{g, t} - Y_{g, t-1})` for DID_M or `S_g * (Y_{g, out} - Y_{g, ref})` for DID_l) is attributed as a single *difference* to the POST-period cell, not split into a `+Y_post` / `-Y_pre` pair across two cells. This is a library *convention*, not a theorem — adopted because it preserves the group-sum, PSU-sum, and cohort-sum identities of the previous group-level expansion (so Binder variance coincides with the group-level variance under the auto-injected `psu=group`) and because Monte Carlo coverage at nominal 95% is empirically close to nominal on a DGP where PSUs vary across the cells of each group (see `tests/test_dcdh_cell_period_coverage.py`). A covariance-aware two-cell allocator is a plausible alternative and may be worth exploring if future designs motivate an explicit observation-level IF derivation; the method currently in the library is **not derived from the observation-level survey linearization of the contrast** and makes no stronger claim than "coverage is approximately nominal under the tested DGPs and the group-sum identity holds exactly." Under within-group-constant PSU (the pre-allocator accepted input), per-cell sums telescope to `U_centered[g]` and Binder variance is byte-identical (up to single-ULP floating-point noise) to the previous group-level expansion. **Strata and PSU must be constant within each `(g, t)` cell** (trivially satisfied in one-obs-per-cell panels — the canonical dCDH structure); variation **across cells of a group** is supported by the allocator. Within-group-varying **weights** are supported as before. When `survey_design.psu` is not specified, `fit()` auto-injects `psu=<group column>` so the TSL variance, `df_survey`, and t-based inference match the per-group PSU structure. **Strata that vary across cells of a group require an explicit `psu=<col>` with `nest=True`** so that `(stratum, psu)` labels are globally unique; the auto-injected `psu=<group>` path does NOT support this (the synthesized PSU column would reuse group labels across strata and trip the cross-stratum PSU uniqueness check in `SurveyDesign.resolve()`). `fit()` detects that combination before survey resolution and raises a targeted `ValueError` pointing users at the explicit-`psu, nest=True` path. Under replicate-weight designs, the same cell-level `psi_i` is aggregated via Rao-Wu weight-ratio rescaling (`compute_replicate_if_variance` at `diff_diff/survey.py:1681`) rather than the Binder TSL formula. All five methods (BRR/Fay/JK1/JKn/SDR) are supported method-agnostically through the unified helper; the effective `df_survey` is reduced to `min(n_valid) - 1` across IF sites when some replicate solves fail (matching `efficient_did.py:1133-1135` and `triple_diff.py:676-686` precedents). Under DID^X, the first-stage residualization coefficient `theta_hat` is computed once on full-sample weights and treated as fixed (FWL plug-in IF convention) — per-replicate refits of `theta_hat` are not performed. **Scope limitations (follow-up PRs):** (a) `heterogeneity=` combined with within-group-varying PSU/strata raises `NotImplementedError` — the heterogeneity WLS `psi_obs` still uses the legacy group-level expansion, to be extended in PR 3; (b) `n_bootstrap > 0` combined with within-group-varying PSU raises `NotImplementedError` — the PSU-level Hall-Mammen wild bootstrap still uses the legacy group-level PSU map, to be extended in PR 4.
+- **Note (Survey IF expansion — library convention):** Survey IF expansion is a library extension not in the dCDH papers (the paper's plug-in variance assumes iid sampling). The library convention builds observation-level `psi_i` by proportionally distributing per-group IF mass within weight share: either at the group level (`psi_i = U_centered[g] * w_i / W_g`, the previous convention) or at the per-`(g, t)` cell level via the cell-period allocator shipped in this release. Cell-level expansion: decompose `U[g]` into per-period attributions `U[g, t]`, cohort-center each column independently, then expand to observation level as `psi_i = U_centered_per_period[g_i, t_i] * (w_i / W_{g_i, t_i})`. Binder (1983) stratified-PSU variance aggregates the resulting `psi` at PSU level. **Post-period attribution convention:** each transition term in the IF sum (of the form `role_weight * (Y_{g, t} - Y_{g, t-1})` for DID_M or `S_g * (Y_{g, out} - Y_{g, ref})` for DID_l) is attributed as a single *difference* to the POST-period cell, not split into a `+Y_post` / `-Y_pre` pair across two cells. This is a library *convention*, not a theorem — adopted because it preserves the group-sum, PSU-sum, and cohort-sum identities of the previous group-level expansion (so Binder variance coincides with the group-level variance under the auto-injected `psu=group`) and because Monte Carlo coverage at nominal 95% is empirically close to nominal on a DGP where PSUs vary across the cells of each group (see `tests/test_dcdh_cell_period_coverage.py`). A covariance-aware two-cell allocator is a plausible alternative and may be worth exploring if future designs motivate an explicit observation-level IF derivation; the method currently in the library is **not derived from the observation-level survey linearization of the contrast** and makes no stronger claim than "coverage is approximately nominal under the tested DGPs and the group-sum identity holds exactly." Under within-group-constant PSU (the pre-allocator accepted input), per-cell sums telescope to `U_centered[g]` and Binder variance is byte-identical (up to single-ULP floating-point noise) to the previous group-level expansion. **Strata and PSU must be constant within each `(g, t)` cell** (trivially satisfied in one-obs-per-cell panels — the canonical dCDH structure); variation **across cells of a group** is supported by the allocator. Within-group-varying **weights** are supported as before. When `survey_design.psu` is not specified, `fit()` auto-injects `psu=<group column>` so the TSL variance, `df_survey`, and t-based inference match the per-group PSU structure. **Strata that vary across cells of a group require either an explicit `psu=<col>` or the original `SurveyDesign(..., nest=True)` flag** — under `nest=True` the resolver combines `(stratum, psu)` into globally-unique labels, so the auto-injected `psu=<group>` is re-labeled per stratum and the cell allocator proceeds. Only the `nest=False` + varying-strata + omitted-psu combination is rejected up front with a targeted `ValueError` at `fit()` time (the synthesized PSU column would reuse group labels across strata and trip the cross-stratum PSU uniqueness check in `SurveyDesign.resolve()`). Under replicate-weight designs, the same cell-level `psi_i` is aggregated via Rao-Wu weight-ratio rescaling (`compute_replicate_if_variance` at `diff_diff/survey.py:1681`) rather than the Binder TSL formula. All five methods (BRR/Fay/JK1/JKn/SDR) are supported method-agnostically through the unified helper; the effective `df_survey` is reduced to `min(n_valid) - 1` across IF sites when some replicate solves fail (matching `efficient_did.py:1133-1135` and `triple_diff.py:676-686` precedents). Under DID^X, the first-stage residualization coefficient `theta_hat` is computed once on full-sample weights and treated as fixed (FWL plug-in IF convention) — per-replicate refits of `theta_hat` are not performed. **Scope limitations (follow-up PRs):** (a) `heterogeneity=` combined with within-group-varying PSU/strata raises `NotImplementedError` — the heterogeneity WLS `psi_obs` still uses the legacy group-level expansion, to be extended in PR 3; (b) `n_bootstrap > 0` combined with within-group-varying PSU raises `NotImplementedError` — the PSU-level Hall-Mammen wild bootstrap still uses the legacy group-level PSU map, to be extended in PR 4.
 - **Note (survey + bootstrap contract):** When `survey_design` and `n_bootstrap > 0` are both active, the bootstrap uses Hall-Mammen wild multiplier weights (Rademacher/Mammen/Webb) **at the PSU level**. Under the default auto-injected `psu=group`, the PSU coincides with the group so the wild bootstrap is a clean group-level clustered bootstrap (identity-map fast path, bit-identical to the non-survey multiplier bootstrap). When the user passes an explicit strictly-coarser PSU (e.g., `psu=state` with groups at county level), the IF contributions of all groups within a PSU receive the same bootstrap multiplier — the standard Hall-Mammen wild PSU bootstrap. Strata do not participate in the bootstrap randomization (they contribute only through the analytical TSL variance); this is conservative when strata differ substantially in variance. A `UserWarning` fires only when PSU is strictly coarser than group. **Scope note (cell-period allocator):** The PSU-level bootstrap uses a group-level `group_id_to_psu_code` map and therefore requires PSU to be constant within each group. Combining `n_bootstrap > 0` with a PSU that varies within group raises `NotImplementedError`; the cell-level Hall-Mammen extension is deferred to a follow-up PR. The analytical TSL variance fully supports within-group-varying PSU via the cell-period allocator — use `n_bootstrap=0` for those designs. **Replicate-weight designs and `n_bootstrap > 0` are mutually exclusive** (replicate variance is closed-form; bootstrap would double-count variance) — the combination raises `NotImplementedError`, matching `efficient_did.py:989`, `staggered.py:1869`, `two_stage.py:251-253`. For HonestDiD bounds under replicate weights, the replicate-effective `df_survey = min(resolved_survey.df_survey, min(n_valid_across_sites) - 1)` propagates to t-critical values — capped by the design's QR-rank-based df so a rank-deficient replicate matrix never produces a larger effective df than the design supports. When `resolved_survey.df_survey` is undefined (QR-rank ≤ 1), the effective df stays `None` and all inference fields (including HonestDiD bounds) are NaN — per-site `n_valid` cannot rescue a rank-deficient design.
 
 ---
diff --git a/tests/test_survey_dcdh.py b/tests/test_survey_dcdh.py
@@ -1172,6 +1172,46 @@ def test_bootstrap_with_varying_psu_raises(self, base_data):
                 survey_design=sd,
             )
 
+    def test_auto_inject_with_varying_strata_nest_true_succeeds(self, base_data):
+        """When strata varies across cells of a group and the user
+        passes ``nest=True`` with no explicit ``psu``, the auto-inject
+        path is valid: ``SurveyDesign.resolve()`` combines
+        ``(stratum, psu)`` into globally-unique labels via the
+        nest=True path (``diff_diff/survey.py:299-302``), so the
+        cross-stratum PSU uniqueness check is satisfied. Byte-check
+        against the explicit ``SurveyDesign(..., psu="group",
+        nest=True)`` baseline — both paths resolve to the same design.
+        """
+        df_ = base_data.copy()
+        df_["pw"] = 1.0
+        df_["stratum"] = df_["period"] % 2
+        sd_auto = SurveyDesign(weights="pw", strata="stratum", nest=True)
+        sd_explicit = SurveyDesign(
+            weights="pw", strata="stratum", psu="group", nest=True,
+        )
+        r_auto = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd_auto, L_max=2,
+        )
+        r_explicit = ChaisemartinDHaultfoeuille(seed=1).fit(
+            df_, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd_explicit, L_max=2,
+        )
+        assert np.isfinite(r_auto.overall_att)
+        assert np.isfinite(r_auto.overall_se)
+        if np.isfinite(r_auto.overall_se) and np.isfinite(r_explicit.overall_se):
+            assert r_auto.overall_se == pytest.approx(
+                r_explicit.overall_se, rel=1e-6
+            )
+        assert r_auto.survey_metadata is not None
+        assert r_explicit.survey_metadata is not None
+        assert (
+            r_auto.survey_metadata.df_survey
+            == r_explicit.survey_metadata.df_survey
+        )
+
     def test_auto_inject_with_varying_strata_raises(self, base_data):
         """Auto-injected `psu=<group>` with nest=False cannot honor
         strata that vary across cells of a group — the synthesized PSU