Skip to content

Commit 2c325f3

Browse files
igerberclaude
andcommitted
Address PR #363 R6 review (1 P2 + 1 P3)
R6 P2 (event-study opt-out overhead): cband=False on the weighted event-study path no longer allocates the stacked (G, H) IF matrix or forces per-horizon IF return on the weights= shortcut. Split two flags internally: needs_per_horizon_if = survey= path OR (weights= AND cband=True) needs_stacked_if_matrix = weights= AND cband=True (alias for weighted_es AND cband) - Psi allocation gated on needs_stacked_if_matrix. - _fit_continuous force_return_influence gated on (needs_stacked_if_matrix AND resolved_survey_unit_full is None) — under survey= path, _fit_continuous returns the IF anyway via its resolved_survey_unit gate, so no extra cost. - _fit_mass_point_2sls return_influence gated on needs_per_horizon_if — survey= path needs the per-horizon IF for the Binder-TSL override regardless of cband. Net effect: cband=False + weights= shortcut + weighted_es skips the O(GH) Psi allocation and the per-horizon IF work entirely. cband=True paths and survey= paths unchanged. R6 P3 (event-study survey= integration coverage): added two end-to-end integration tests for the previously-unguarded positive-path estimator-level survey= + aggregate='event_study' dispatch: - test_survey_event_study_continuous_end_to_end: continuous_at_zero + SurveyDesign(strata='stratum') — asserts variance_formula= 'survey_binder_tsl', survey_metadata.df_survey=G-n_strata, cband_* populated, PSU dispatch through _aggregate_unit_resolved_survey. - test_survey_event_study_mass_point_end_to_end: mass_point + SurveyDesign(strata=...) — asserts variance_formula= 'survey_binder_tsl_2sls' and that the 2SLS IF flows through per-horizon Binder-TSL + sup-t bootstrap. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 8d7cf94 commit 2c325f3

2 files changed

Lines changed: 138 additions & 19 deletions

File tree

diff_diff/had.py

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4127,11 +4127,18 @@ def _fit_event_study(
41274127
# carries the full-design effective_n / n_psu / etc.).
41284128
n_obs_arr = np.full(n_horizons, n_units, dtype=np.int64)
41294129

4130-
# Per-horizon IF matrix on the weighted path (shape (G, H)); drives
4131-
# both per-horizon Binder-TSL variance (already composed inside
4132-
# ``_fit_continuous`` for continuous, or explicitly below for
4133-
# mass-point) AND the shared-PSU multiplier bootstrap for sup-t.
4134-
if weighted_es:
4130+
# Two IF-consumption flags (review R6 P2): the PER-HORIZON IF is
4131+
# needed when the survey= path composes Binder-TSL variance (via
4132+
# compute_survey_if_variance inside _fit_continuous or the
4133+
# mass-point override below); the STACKED (G, H) IF matrix is
4134+
# needed only when the sup-t multiplier bootstrap runs
4135+
# (``cband=True`` on the weighted path). Splitting them avoids
4136+
# allocating / filling Psi on the common opt-out path
4137+
# ``cband=False`` + weights= shortcut, where no IF consumer
4138+
# exists.
4139+
needs_per_horizon_if = resolved_survey_unit_full is not None or (weighted_es and cband)
4140+
needs_stacked_if_matrix = weighted_es and cband
4141+
if needs_stacked_if_matrix:
41354142
Psi = np.full((G_full, n_horizons), np.nan, dtype=np.float64)
41364143
else:
41374144
Psi = np.zeros((0, 0), dtype=np.float64) # sentinel, not used
@@ -4173,23 +4180,28 @@ def _fit_event_study(
41734180
d_lower_val,
41744181
weights_arr=weights_unit_full,
41754182
resolved_survey_unit=resolved_survey_unit_full,
4176-
# Force IF return on the weighted event-study path
4177-
# (needed for the sup-t bootstrap). Does NOT change
4178-
# the per-horizon SE formula — that still follows
4179-
# the static-path convention (Binder-TSL under
4180-
# survey=, bc_fit.se_robust under weights= shortcut).
4181-
force_return_influence=weighted_es,
4183+
# Force IF return only when the sup-t bootstrap
4184+
# needs the stacked matrix AND the survey= gate
4185+
# won't already produce it. Under survey= path,
4186+
# _fit_continuous returns the IF automatically
4187+
# (resolved_survey_unit_full != None); under the
4188+
# weights= shortcut + cband=True, force it here;
4189+
# otherwise skip the O(G) IF work (review R6 P2).
4190+
force_return_influence=(
4191+
needs_stacked_if_matrix and resolved_survey_unit_full is None
4192+
),
41824193
)
41834194
if bc_fits is not None:
41844195
bc_fits.append(bc_fit_e)
41854196
if bw_diags is not None:
41864197
bw_diags.append(bw_diag_e)
4187-
# Collect per-unit IF on β̂-scale (psi_bc / den) so the
4188-
# sup-t bootstrap operates on the same θ̂-scale IF that
4189-
# the analytical variance sees. Per continuous-path
4190-
# construction in _fit_continuous, bc_fit.influence_function
4191-
# is the numerator IF; dividing by |den| yields the β̂ IF.
4192-
if weighted_es and bc_fit_e is not None and bc_fit_e.influence_function is not None:
4198+
# Collect per-unit IF on β̂-scale (psi_bc / den) into
4199+
# Psi ONLY when the sup-t bootstrap will consume it.
4200+
if (
4201+
needs_stacked_if_matrix
4202+
and bc_fit_e is not None
4203+
and bc_fit_e.influence_function is not None
4204+
):
41934205
if resolved_design == "continuous_at_zero":
41944206
den_e = float(np.average(d_arr_full, weights=weights_unit_full))
41954207
else:
@@ -4209,7 +4221,12 @@ def _fit_event_study(
42094221
cluster_arr,
42104222
vcov_requested,
42114223
weights=weights_unit_full,
4212-
return_influence=resolved_survey_unit_full is not None or weighted_es,
4224+
# Return IF only when a consumer exists: survey=
4225+
# path needs it for per-horizon Binder-TSL override;
4226+
# weights= shortcut + cband=True needs it for the
4227+
# bootstrap. weights= shortcut + cband=False skips
4228+
# IF computation entirely (review R6 P2).
4229+
return_influence=needs_per_horizon_if,
42134230
)
42144231
# Survey path: override analytical sandwich SE with
42154232
# Binder-TSL via compute_survey_if_variance (matches
@@ -4222,7 +4239,7 @@ def _fit_event_study(
42224239
se_e = float(np.sqrt(v_survey))
42234240
else:
42244241
se_e = float("nan")
4225-
if weighted_es and psi_e is not None:
4242+
if needs_stacked_if_matrix and psi_e is not None:
42264243
Psi[:, i] = psi_e
42274244
else:
42284245
raise ValueError(f"Internal error: unhandled design={resolved_design!r}.")

tests/test_had.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5351,6 +5351,108 @@ def test_mass_point_default_vcov_event_study_cband_rejected(self):
53515351
cband=True,
53525352
)
53535353

5354+
def test_survey_event_study_continuous_end_to_end(self):
5355+
"""Review R6 P3: estimator-level
5356+
``fit(aggregate='event_study', survey=SurveyDesign(...))``
5357+
integration lock for the continuous path. Verifies
5358+
variance_formula, survey_metadata.df_survey (t-inference path),
5359+
cband_* population, and stratified PSU dispatch through
5360+
_aggregate_unit_resolved_survey."""
5361+
from diff_diff.survey import SurveyDesign
5362+
5363+
rng = np.random.default_rng(70)
5364+
G, T, n_strata = 200, 4, 4
5365+
d_post = rng.uniform(0.0, 1.0, G)
5366+
strata_per_unit = np.repeat(np.arange(n_strata), G // n_strata)
5367+
rng.shuffle(strata_per_unit)
5368+
rows = []
5369+
for t in range(T):
5370+
for g in range(G):
5371+
dose = d_post[g] if t == T - 1 else 0.0
5372+
y = 0.2 * t + (2.0 * dose if t == T - 1 else 0.0) + 0.5 * rng.standard_normal()
5373+
rows.append((g, t, dose, y, strata_per_unit[g]))
5374+
panel = pd.DataFrame(
5375+
rows,
5376+
columns=["unit", "period", "dose", "outcome", "stratum"],
5377+
)
5378+
w_unit = 1.0 + 0.3 * np.abs(rng.standard_normal(G))
5379+
panel["w"] = panel["unit"].map(lambda g: w_unit[g])
5380+
sd = SurveyDesign(weights="w", strata="stratum")
5381+
with warnings.catch_warnings():
5382+
warnings.simplefilter("ignore", UserWarning)
5383+
est = HeterogeneousAdoptionDiD(design="continuous_at_zero", seed=0, n_bootstrap=200)
5384+
r = est.fit(
5385+
panel,
5386+
"outcome",
5387+
"dose",
5388+
"period",
5389+
"unit",
5390+
aggregate="event_study",
5391+
survey=sd,
5392+
)
5393+
assert r.variance_formula == "survey_binder_tsl"
5394+
assert r.survey_metadata is not None
5395+
assert r.survey_metadata.n_strata == n_strata
5396+
assert r.survey_metadata.n_psu == G
5397+
assert r.survey_metadata.df_survey == G - n_strata
5398+
assert r.cband_crit_value is not None and np.isfinite(r.cband_crit_value)
5399+
assert r.cband_method == "multiplier_bootstrap"
5400+
assert r.cband_n_bootstrap == 200
5401+
assert r.cband_low is not None and r.cband_high is not None
5402+
assert np.all(np.isfinite(r.se))
5403+
5404+
def test_survey_event_study_mass_point_end_to_end(self):
5405+
"""Review R6 P3: estimator-level
5406+
``fit(design='mass_point', aggregate='event_study',
5407+
survey=...)`` integration lock. Verifies
5408+
variance_formula='survey_binder_tsl_2sls' and that the
5409+
weighted 2SLS IF flows correctly through per-horizon
5410+
Binder-TSL + sup-t bootstrap."""
5411+
from diff_diff.survey import SurveyDesign
5412+
5413+
rng = np.random.default_rng(71)
5414+
G, T = 200, 4
5415+
d_mp = np.concatenate([np.full(40, 0.3), rng.uniform(0.3, 1.0, G - 40)])
5416+
rng.shuffle(d_mp)
5417+
strata_per_unit = np.repeat(np.arange(4), G // 4)
5418+
rng.shuffle(strata_per_unit)
5419+
rows = []
5420+
for t in range(T):
5421+
for g in range(G):
5422+
dose = d_mp[g] if t == T - 1 else 0.0
5423+
y = 0.2 * t + (2.0 * dose if t == T - 1 else 0.0) + 0.5 * rng.standard_normal()
5424+
rows.append((g, t, dose, y, strata_per_unit[g]))
5425+
panel = pd.DataFrame(
5426+
rows,
5427+
columns=["unit", "period", "dose", "outcome", "stratum"],
5428+
)
5429+
w_unit = 1.0 + 0.3 * np.abs(rng.standard_normal(G))
5430+
panel["w"] = panel["unit"].map(lambda g: w_unit[g])
5431+
sd = SurveyDesign(weights="w", strata="stratum")
5432+
with warnings.catch_warnings():
5433+
warnings.simplefilter("ignore", UserWarning)
5434+
est = HeterogeneousAdoptionDiD(
5435+
design="mass_point",
5436+
vcov_type="hc1",
5437+
seed=0,
5438+
n_bootstrap=200,
5439+
)
5440+
r = est.fit(
5441+
panel,
5442+
"outcome",
5443+
"dose",
5444+
"period",
5445+
"unit",
5446+
aggregate="event_study",
5447+
survey=sd,
5448+
)
5449+
assert r.variance_formula == "survey_binder_tsl_2sls"
5450+
assert r.survey_metadata is not None
5451+
assert r.survey_metadata.n_strata == 4
5452+
assert r.cband_crit_value is not None and np.isfinite(r.cband_crit_value)
5453+
assert r.cband_method == "multiplier_bootstrap"
5454+
assert np.all(np.isfinite(r.se))
5455+
53545456
def test_mass_point_default_vcov_robust_true_survey_allowed(self):
53555457
"""Complement: robust=True on the default path resolves to
53565458
hc1, so the survey= mass-point fit is allowed with no explicit

0 commit comments

Comments
 (0)