fix: address CI review round 9 - last_cohort guard, scalar validation

igerber · claude · igerber · commit b91a48549f93 · 2026-04-12T16:04:49.000-04:00
- P1: reject control_group='last_cohort' (EfficientDiD) with survey_config
  (needs multi-cohort DGP, same as not_yet_treated)
- P2: add psu_re_sd and fpc_per_stratum finiteness validation to
  SurveyPowerConfig.__post_init__
- Update REGISTRY.md to list last_cohort alongside other restrictions
- Add regression tests for last_cohort rejection and scalar validation

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/power.py b/diff_diff/power.py
@@ -137,6 +137,10 @@ def __post_init__(self) -> None:
                 f"weight_variation must be 'none', 'moderate', or 'high', "
                 f"got '{self.weight_variation}'"
             )
+        if not np.isfinite(self.psu_re_sd) or self.psu_re_sd < 0:
+            raise ValueError(f"psu_re_sd must be finite and >= 0, got {self.psu_re_sd}")
+        if not np.isfinite(self.fpc_per_stratum):
+            raise ValueError(f"fpc_per_stratum must be finite, got {self.fpc_per_stratum}")
         if self.icc is not None and not (0 < self.icc < 1):
             raise ValueError(f"icc must be between 0 and 1 (exclusive), got {self.icc}")
         if self.icc is not None and self.psu_re_sd != 2.0:
@@ -1997,11 +2001,11 @@ def simulate_power(
         # cohort_periods/never_treated_frac overrides.
         control_group = getattr(estimator, "control_group", "never_treated")
         clean_control = getattr(estimator, "clean_control", None)
-        if control_group == "not_yet_treated":
+        if control_group in ("not_yet_treated", "last_cohort"):
             raise ValueError(
-                f"survey_config does not support control_group='not_yet_treated' "
+                f"survey_config does not support control_group='{control_group}' "
                 f"(requires multi-cohort DGP). Use the custom data_generator "
-                f"path for survey power with not-yet-treated controls."
+                f"path for survey power with this control-group design."
             )
         if clean_control == "strict":
             raise ValueError(
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -2255,7 +2255,7 @@ n = 2(t_{α/2} + t_{1-κ})² σ² / MDE²
 - **Note:** The simulation-based power registry (`simulate_power`, `simulate_mde`, `simulate_sample_size`) uses a single-cohort staggered DGP by default. Estimators configured with `control_group="not_yet_treated"`, `clean_control="strict"`, or `anticipation>0` will receive a `UserWarning` because the default DGP does not match their identification strategy. Users must supply `data_generator_kwargs` (e.g., `cohort_periods=[2, 4]`, `never_treated_frac=0.0`) or a custom `data_generator` to match the estimator design.
 - **Note:** The `TripleDifference` registry adapter uses `generate_ddd_data`, a fixed 2×2×2 factorial DGP (group × partition × time). The `n_periods`, `treatment_period`, and `treatment_fraction` parameters are ignored — DDD always simulates 2 periods with balanced groups. `n_units` is mapped to `n_per_cell = max(2, n_units // 8)` (effective total N = `n_per_cell × 8`), so non-multiples of 8 are rounded down and values below 16 are clamped to 16. A `UserWarning` is emitted when simulation inputs differ from the effective DDD design. When rounding occurs, all result objects (`SimulationPowerResults`, `SimulationMDEResults`, `SimulationSampleSizeResults`) set `effective_n_units` to the actual sample size used; it is `None` when no rounding occurred. `simulate_sample_size()` snaps bisection candidates to multiples of 8 so that `required_n` is always a realizable DDD sample size. Passing `n_per_cell` in `data_generator_kwargs` suppresses the effective-N rounding warning but not warnings for ignored parameters (`n_periods`, `treatment_period`, `treatment_fraction`).
 - **Note:** The analytical power methods (`PowerAnalysis.power/mde/sample_size` and the `compute_power/compute_mde/compute_sample_size` convenience functions) accept a `deff` parameter (survey design effect, default 1.0). This inflates variance multiplicatively: `Var(ATT) *= deff`, and inflates required sample size: `n_total *= deff`. The `deff` parameter is **not redundant** with `rho` (intra-cluster correlation): `rho` models within-unit serial correlation in panel data via the Moulton factor `1 + (T-1)*rho`, while `deff` models the survey design effect from stratified multi-stage sampling (clustering + unequal weighting). A survey panel study may need both. Values `deff > 0` are accepted; `deff < 1.0` (net variance reduction, e.g., from stratification gain) emits a warning.
-- **Note:** The simulation-based power functions (`simulate_power/simulate_mde/simulate_sample_size`) accept a `survey_config` parameter (`SurveyPowerConfig` dataclass). When set, the simulation loop uses `generate_survey_did_data` instead of the default registry DGP, and automatically injects `SurveyDesign(weights="weight", strata="stratum", psu="psu", fpc="fpc")` into the estimator's `fit()` call. Supported estimators: DifferenceInDifferences, TwoWayFixedEffects, MultiPeriodDiD, CallawaySantAnna, SunAbraham, ImputationDiD, TwoStageDiD, StackedDiD, EfficientDiD. Unsupported (raises `ValueError`): TROP, SyntheticDiD, TripleDifference (generate_survey_did_data produces staggered cohort data incompatible with factor-model/DDD DGPs). `survey_config` and `data_generator` are mutually exclusive. `data_generator_kwargs` may not contain keys managed by `SurveyPowerConfig` (n_strata, psu_per_stratum, etc.) but may contain passthrough DGP params (unit_fe_sd, add_covariates, strata_sizes). Repeated cross-section survey power (`panel=False`) is only supported for `CallawaySantAnna(panel=False)` with a matching `data_generator_kwargs={"panel": False}`; both mismatch directions are rejected. `estimator_kwargs` may not contain `survey_design` when `survey_config` is set (use `SurveyPowerConfig(survey_design=...)` instead). Estimator settings that require a multi-cohort DGP (`control_group="not_yet_treated"`, `clean_control="strict"`) are rejected because the survey DGP uses a single cohort; use the custom `data_generator` path for these configurations. `simulate_sample_size` raises the bisection floor to `n_strata * psu_per_stratum * 2` to ensure viable survey structure and rejects `strata_sizes` in `data_generator_kwargs` (it depends on `n_units` which varies during bisection).
+- **Note:** The simulation-based power functions (`simulate_power/simulate_mde/simulate_sample_size`) accept a `survey_config` parameter (`SurveyPowerConfig` dataclass). When set, the simulation loop uses `generate_survey_did_data` instead of the default registry DGP, and automatically injects `SurveyDesign(weights="weight", strata="stratum", psu="psu", fpc="fpc")` into the estimator's `fit()` call. Supported estimators: DifferenceInDifferences, TwoWayFixedEffects, MultiPeriodDiD, CallawaySantAnna, SunAbraham, ImputationDiD, TwoStageDiD, StackedDiD, EfficientDiD. Unsupported (raises `ValueError`): TROP, SyntheticDiD, TripleDifference (generate_survey_did_data produces staggered cohort data incompatible with factor-model/DDD DGPs). `survey_config` and `data_generator` are mutually exclusive. `data_generator_kwargs` may not contain keys managed by `SurveyPowerConfig` (n_strata, psu_per_stratum, etc.) but may contain passthrough DGP params (unit_fe_sd, add_covariates, strata_sizes). Repeated cross-section survey power (`panel=False`) is only supported for `CallawaySantAnna(panel=False)` with a matching `data_generator_kwargs={"panel": False}`; both mismatch directions are rejected. `estimator_kwargs` may not contain `survey_design` when `survey_config` is set (use `SurveyPowerConfig(survey_design=...)` instead). Estimator settings that require a multi-cohort DGP (`control_group="not_yet_treated"`, `control_group="last_cohort"`, `clean_control="strict"`) are rejected because the survey DGP uses a single cohort; use the custom `data_generator` path for these configurations. `simulate_sample_size` raises the bisection floor to `n_strata * psu_per_stratum * 2` to ensure viable survey structure and rejects `strata_sizes` in `data_generator_kwargs` (it depends on `n_units` which varies during bisection).
 
 **Reference implementation(s):**
 - R: `pwr` package (general), `DeclareDesign` (simulation-based)
diff --git a/tests/test_power.py b/tests/test_power.py
@@ -2602,6 +2602,17 @@ def test_survey_rejects_not_yet_treated(self):
                 **_SIM_KW,
             )
 
+    def test_survey_rejects_last_cohort(self):
+        """control_group='last_cohort' rejected (needs multi-cohort DGP)."""
+        with pytest.raises(ValueError, match="last_cohort"):
+            simulate_power(
+                EfficientDiD(control_group="last_cohort"),
+                survey_config=_SURVEY_CFG,
+                n_simulations=1,
+                seed=42,
+                **_SIM_KW,
+            )
+
     def test_survey_rejects_clean_control_strict(self):
         """clean_control='strict' rejected (needs multi-cohort DGP)."""
         with pytest.raises(ValueError, match="clean_control.*strict"):
@@ -2629,3 +2640,15 @@ def test_survey_sample_size_rejects_strata_sizes(self):
                 sigma=1.0,
                 progress=False,
             )
+
+    def test_survey_config_validation_psu_re_sd_negative(self):
+        with pytest.raises(ValueError, match="psu_re_sd"):
+            SurveyPowerConfig(psu_re_sd=-1.0)
+
+    def test_survey_config_validation_psu_re_sd_nan(self):
+        with pytest.raises(ValueError, match="psu_re_sd"):
+            SurveyPowerConfig(psu_re_sd=np.nan)
+
+    def test_survey_config_validation_fpc_nan(self):
+        with pytest.raises(ValueError, match="fpc_per_stratum must be finite"):
+            SurveyPowerConfig(fpc_per_stratum=np.inf)