Skip to content

Commit f60ece2

Browse files
igerberclaude
andcommitted
Address PR #355 R8 P1: front-door FPC validation for implicit-PSU SDID bootstrap
When ``SurveyDesign(fpc=...)`` is declared without an explicit ``psu=``, ``bootstrap_utils.generate_rao_wu_weights`` (L654-L655) treats each unit as its own PSU. The helper rejects ``FPC < n_PSU`` mid-draw (L684-L688), so if FPC is set lower than the unit count (per stratum if stratified), every bootstrap draw raises ValueError; ``_bootstrap_se`` swallows the error in its retry loop and the user eventually sees a generic bootstrap-exhaustion message instead of a targeted FPC/design error. Add a front-door validation on ``resolved_survey_unit`` after ``collapse_survey_to_unit_level``: - unstratified: fpc >= total unit count; - stratified: fpc_h >= per-stratum unit count. Error messages point at the two actionable fixes (declare an explicit psu= column, or raise FPC). Two regression tests added: ``test_fit_raises_on_implicit_psu_fpc_below_unit_count_unstratified`` and ``test_fit_raises_on_implicit_psu_fpc_below_stratum_unit_count``. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 5515fbe commit f60ece2

2 files changed

Lines changed: 92 additions & 0 deletions

File tree

diff_diff/synthetic_did.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,47 @@ def fit( # type: ignore[override]
439439
resolved_survey_unit = collapse_survey_to_unit_level(
440440
resolved_survey, data, unit, all_units_for_bootstrap,
441441
)
442+
# Front-door FPC validation for implicit-PSU Rao-Wu (PR #355
443+
# R8 P1). When psu is None but fpc is set,
444+
# ``generate_rao_wu_weights`` (bootstrap_utils.py L654-L655)
445+
# treats each unit as its own PSU and rejects
446+
# ``FPC < n_units`` per stratum mid-draw. ``_bootstrap_se``
447+
# catches that ``ValueError`` and keeps retrying, so the user
448+
# sees a generic bootstrap-exhaustion message instead of a
449+
# targeted FPC/design error. Validate upstream so the user
450+
# gets a clean error before the bootstrap loop even starts.
451+
if (
452+
resolved_survey_unit.psu is None
453+
and resolved_survey_unit.fpc is not None
454+
):
455+
if resolved_survey_unit.strata is None:
456+
n_units_total = len(resolved_survey_unit.weights)
457+
fpc_val = float(resolved_survey_unit.fpc[0])
458+
if fpc_val < n_units_total:
459+
raise ValueError(
460+
f"FPC ({fpc_val}) is less than the number of "
461+
f"units ({n_units_total}). With no explicit "
462+
"psu= column, SDID Rao-Wu treats each unit as "
463+
"its own PSU; FPC must be >= the number of "
464+
"units. Declare an explicit psu= column or "
465+
"increase FPC."
466+
)
467+
else:
468+
unique_strata = np.unique(resolved_survey_unit.strata)
469+
for h in unique_strata:
470+
mask_h = resolved_survey_unit.strata == h
471+
n_h_units = int(mask_h.sum())
472+
fpc_h = float(resolved_survey_unit.fpc[mask_h][0])
473+
if fpc_h < n_h_units:
474+
raise ValueError(
475+
f"FPC ({fpc_h}) in stratum {h} is less than "
476+
f"the number of units in that stratum "
477+
f"({n_h_units}). With no explicit psu= "
478+
"column, SDID Rao-Wu treats each unit as "
479+
"its own PSU within strata; FPC must be "
480+
">= the per-stratum unit count. Declare an "
481+
"explicit psu= column or increase FPC."
482+
)
442483
# Source w_control / w_treated from resolved_survey_unit.weights
443484
# rather than re-extracting raw panel columns. resolved_survey.weights
444485
# is normalized to mean=1 by SurveyDesign.resolve() (survey.py L189-

tests/test_methodology_sdid.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,57 @@ def test_fit_raises_on_zero_treated_mass_under_full_design(self):
909909
survey_design=SurveyDesign(weights="wt", strata="stratum", psu="psu"),
910910
)
911911

912+
def test_fit_raises_on_implicit_psu_fpc_below_unit_count_unstratified(self):
913+
"""Fit-time FPC validation fires when psu=None and FPC < n_units.
914+
915+
When ``SurveyDesign(fpc=...)`` is declared without an explicit
916+
``psu=``, SDID Rao-Wu (via ``bootstrap_utils.generate_rao_wu_weights``
917+
L654-L655) treats each unit as its own PSU. The helper rejects
918+
``FPC < n_PSU`` mid-draw (``bootstrap_utils.py`` L684-L688); without
919+
a front-door guard, every bootstrap draw raises, ``_bootstrap_se``
920+
swallows the ``ValueError`` in its retry loop, and the user sees a
921+
generic bootstrap-exhaustion error. Regression against PR #355 R8
922+
P1: ``fit()`` must validate FPC vs unit count before dispatching
923+
the bootstrap.
924+
"""
925+
from diff_diff.survey import SurveyDesign
926+
927+
df = _make_panel(n_control=10, n_treated=3, seed=42)
928+
df["wt"] = 1.0
929+
# 13 units total; FPC says population size is 5 — infeasible for
930+
# 13 implicit PSUs.
931+
df["fpc_pop"] = 5.0
932+
with pytest.raises(ValueError, match=r"FPC.*less than the number of units"):
933+
SyntheticDiD(variance_method="bootstrap", n_bootstrap=20, seed=1).fit(
934+
df, outcome="outcome", treatment="treated",
935+
unit="unit", time="period",
936+
post_periods=[5, 6, 7],
937+
survey_design=SurveyDesign(weights="wt", fpc="fpc_pop"),
938+
)
939+
940+
def test_fit_raises_on_implicit_psu_fpc_below_stratum_unit_count(self):
941+
"""Fit-time FPC validation fires per stratum under implicit PSU.
942+
943+
Mirror of the unstratified case but with strata present. Each
944+
stratum's FPC must be >= its unit count (PR #355 R8 P1).
945+
"""
946+
from diff_diff.survey import SurveyDesign
947+
948+
df = _make_panel(n_control=12, n_treated=4, seed=42)
949+
df["wt"] = 1.0
950+
df["stratum"] = df["unit"] % 2 # 2 strata, ~8 units each
951+
# FPC says 3 per stratum — infeasible for 8 implicit PSUs/stratum.
952+
df["fpc_pop"] = 3.0
953+
with pytest.raises(ValueError, match=r"FPC.*less than the number of units in that stratum"):
954+
SyntheticDiD(variance_method="bootstrap", n_bootstrap=20, seed=1).fit(
955+
df, outcome="outcome", treatment="treated",
956+
unit="unit", time="period",
957+
post_periods=[5, 6, 7],
958+
survey_design=SurveyDesign(
959+
weights="wt", strata="stratum", fpc="fpc_pop",
960+
),
961+
)
962+
912963
def test_bootstrap_scale_invariance_under_pweight_rescaling(self):
913964
"""Survey-bootstrap SE / p / CI are invariant to a global pweight rescaling.
914965

0 commit comments

Comments
 (0)