Skip to content

Commit 6c8a68c

Browse files
igerberclaude
andcommitted
Add survey-composition tests and to_dataframe het_* docstring
Adds 3 slow regression tests for `by_path + heterogeneity + survey_design` composition that the original PR claimed via REGISTRY/CHANGELOG but lacked automated coverage: 1. `test_per_path_heterogeneity_under_survey_finite` — analytical Binder TSL SE finite per (path, l) under PSU=group survey design. 2. `test_per_path_heterogeneity_replicate_weights_propagates_n_valid` — BRR replicate-weight fit drives `_replicate_n_valid_list` through per-(path, l) heterogeneity calls and final `survey_metadata.df_survey == n_replicates - 1`. 3. `test_survey_design_plus_n_bootstrap_with_heterogeneity_still_raises` — confirms heterogeneity composition does not accidentally re-route around the existing per-path multiplier-bootstrap-survey gate. Also extends `to_dataframe(level="by_path")` docstring at `chaisemartin_dhaultfoeuille_results.py:1527` to list the new `het_*` columns alongside `cband_*` and `cumulated_*`, mirroring the always-present NaN-when-None convention. The empty-DataFrame schema already includes them; this is a docstring-only sync. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 7f7e3d5 commit 6c8a68c

2 files changed

Lines changed: 214 additions & 3 deletions

File tree

diff_diff/chaisemartin_dhaultfoeuille_results.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,8 +1531,10 @@ def to_dataframe(self, level: str = "overall") -> pd.DataFrame:
15311531
``effect``, ``se``, ``t_stat``, ``p_value``,
15321532
``conf_int_lower``, ``conf_int_upper``, ``n_obs``,
15331533
``cband_lower``, ``cband_upper``, ``cumulated_effect``,
1534-
``cumulated_se``. The ``horizon`` column takes negative
1535-
ints for placebo rows when ``placebo=True``. The
1534+
``cumulated_se``, ``het_beta``, ``het_se``,
1535+
``het_t_stat``, ``het_p_value``, ``het_conf_int_lower``,
1536+
``het_conf_int_upper``. The ``horizon`` column takes
1537+
negative ints for placebo rows when ``placebo=True``. The
15361538
``cband_*`` columns mirror the OVERALL
15371539
``level="event_study"`` schema (joint sup-t simultaneous
15381540
bands); they are populated for positive-horizon rows of
@@ -1544,7 +1546,13 @@ def to_dataframe(self, level: str = "overall") -> pd.DataFrame:
15441546
positive-horizon rows when ``trends_linear=True`` is
15451547
also set, NaN for placebo rows or non-trends_linear fits
15461548
(always-present, NaN-when-None — same convention as
1547-
``cband_*``).
1549+
``cband_*``). The ``het_*`` columns surface the per-path
1550+
heterogeneity coefficient (Web Appendix Section 1.5,
1551+
Lemma 7) when ``heterogeneity="<col>"`` is also set;
1552+
populated for positive-horizon rows and NaN for placebo
1553+
rows / non-heterogeneity fits / the requested-but-empty
1554+
fallback DataFrame (always-present, NaN-when-None — same
1555+
convention as ``cband_*`` and ``cumulated_*``).
15481556
15491557
Returns
15501558
-------

tests/test_chaisemartin_dhaultfoeuille.py

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10298,6 +10298,209 @@ def test_per_path_heterogeneity_no_multi_baseline_warning(self):
1029810298
f"{[str(w.message) for w in multi_baseline]}"
1029910299
)
1030010300

10301+
# Survey composition (slow)
10302+
10303+
@staticmethod
10304+
def _by_path_het_data_with_survey(seed=44, n_replicates=0):
10305+
"""Extends `_by_path_het_data` with survey columns (weights /
10306+
strata / PSU). When ``n_replicates > 0``, also attaches BRR
10307+
replicate-weight columns ``rep_0..rep_{n_replicates-1}``.
10308+
10309+
Strata are coarser than groups (3 strata) and PSU=group for the
10310+
analytical Binder TSL path. Replicate weights are mutually
10311+
exclusive with strata/PSU/FPC at the SurveyDesign level (see
10312+
survey.py validation), so the caller picks one mode by passing
10313+
the appropriate kwargs to SurveyDesign.
10314+
"""
10315+
rng = np.random.RandomState(seed)
10316+
n_switchers, n_controls, n_periods = 90, 30, 10
10317+
n_groups_total = n_switchers + n_controls
10318+
H = (
10319+
rng.choice([-1, 1], size=(n_groups_total, n_replicates))
10320+
if n_replicates > 0
10321+
else None
10322+
)
10323+
rows = []
10324+
paths = [(0, 1, 1, 1), (0, 1, 0, 0), (0, 1, 1, 0)]
10325+
for g in range(n_switchers):
10326+
F_g = 3 + ((g // 3) % 3)
10327+
path = paths[g % 3]
10328+
het_x = 1 if g < n_switchers // 2 else 0
10329+
effect = 5.0 + 3.0 * het_x
10330+
stratum = g // 30
10331+
psu = g // 3
10332+
weight = 1.0 + 0.1 * (g % 5)
10333+
for t in range(n_periods):
10334+
if F_g - 1 <= t < F_g - 1 + len(path):
10335+
d = path[t - (F_g - 1)]
10336+
elif t >= F_g - 1 + len(path):
10337+
d = path[-1]
10338+
else:
10339+
d = 0
10340+
y = 0.5 * t + effect * d + rng.normal(0, 0.5)
10341+
row = {
10342+
"group": g,
10343+
"period": t,
10344+
"treatment": d,
10345+
"outcome": y,
10346+
"het_x": het_x,
10347+
"survey_weights": weight,
10348+
"strata": stratum,
10349+
"psu": psu,
10350+
}
10351+
if H is not None:
10352+
for r in range(n_replicates):
10353+
row[f"rep_{r}"] = float(weight) * (1 + 0.5 * H[g, r])
10354+
rows.append(row)
10355+
for k in range(n_controls):
10356+
het_x = 1 if k < n_controls // 2 else 0
10357+
g = n_switchers + k
10358+
stratum = g // 30
10359+
psu = g // 3
10360+
weight = 1.0 + 0.1 * (k % 5)
10361+
for t in range(n_periods):
10362+
row = {
10363+
"group": g,
10364+
"period": t,
10365+
"treatment": 0,
10366+
"outcome": 0.5 * t + rng.normal(0, 0.5),
10367+
"het_x": het_x,
10368+
"survey_weights": weight,
10369+
"strata": stratum,
10370+
"psu": psu,
10371+
}
10372+
if H is not None:
10373+
for r in range(n_replicates):
10374+
row[f"rep_{r}"] = float(weight) * (1 + 0.5 * H[g, r])
10375+
rows.append(row)
10376+
return pd.DataFrame(rows)
10377+
10378+
@pytest.mark.slow
10379+
def test_per_path_heterogeneity_under_survey_finite(self):
10380+
"""Analytical Binder TSL SE finite per (path, l) under
10381+
``by_path + heterogeneity + survey_design``. Wave 5 #11 plan
10382+
regression coverage for the documented survey composition
10383+
(REGISTRY: "Per-path heterogeneity testing" → "Survey
10384+
composition")."""
10385+
from diff_diff.survey import SurveyDesign
10386+
10387+
df = self._by_path_het_data_with_survey()
10388+
sd = SurveyDesign(weights="survey_weights", strata="strata", psu="psu")
10389+
est = ChaisemartinDHaultfoeuille(drop_larger_lower=False, by_path=2)
10390+
with warnings.catch_warnings():
10391+
warnings.simplefilter("ignore", UserWarning)
10392+
res = est.fit(
10393+
df,
10394+
outcome="outcome",
10395+
group="group",
10396+
time="period",
10397+
treatment="treatment",
10398+
L_max=3,
10399+
heterogeneity="het_x",
10400+
survey_design=sd,
10401+
)
10402+
assert res.path_heterogeneity_effects
10403+
finite_count = 0
10404+
for path, horizons in res.path_heterogeneity_effects.items():
10405+
for l_h, vals in horizons.items():
10406+
if vals["n_obs"] >= 3:
10407+
assert np.isfinite(vals["beta"]), (
10408+
f"path={path} l={l_h}: beta is NaN under survey TSL"
10409+
)
10410+
assert np.isfinite(vals["se"]) and vals["se"] > 0, (
10411+
f"path={path} l={l_h}: se non-positive under survey TSL"
10412+
)
10413+
finite_count += 1
10414+
assert finite_count >= 4, (
10415+
f"Expected ≥4 finite (path, l) entries, got {finite_count}"
10416+
)
10417+
10418+
@pytest.mark.slow
10419+
def test_per_path_heterogeneity_replicate_weights_propagates_n_valid(self):
10420+
"""Under replicate weights, every per-(path, l) replicate fit
10421+
appends ``n_valid`` to the shared accumulator and the final
10422+
``survey_metadata.df_survey`` reflects ``min(n_valid) - 1``.
10423+
10424+
For BRR with ``n_replicates=8`` and well-formed data, the
10425+
expected df_survey is ``n_replicates - 1 = 7`` (every replicate
10426+
produces a finite SE on this DGP). Anti-regression: drives the
10427+
end-to-end `_replicate_n_valid_list` accumulator through per-
10428+
(path, l) heterogeneity calls.
10429+
"""
10430+
from diff_diff.survey import SurveyDesign
10431+
10432+
n_replicates = 8
10433+
df = self._by_path_het_data_with_survey(n_replicates=n_replicates)
10434+
sd = SurveyDesign(
10435+
weights="survey_weights",
10436+
replicate_weights=[f"rep_{r}" for r in range(n_replicates)],
10437+
replicate_method="BRR",
10438+
)
10439+
est = ChaisemartinDHaultfoeuille(drop_larger_lower=False, by_path=2)
10440+
with warnings.catch_warnings():
10441+
warnings.simplefilter("ignore", UserWarning)
10442+
res = est.fit(
10443+
df,
10444+
outcome="outcome",
10445+
group="group",
10446+
time="period",
10447+
treatment="treatment",
10448+
L_max=3,
10449+
heterogeneity="het_x",
10450+
survey_design=sd,
10451+
)
10452+
assert res.path_heterogeneity_effects
10453+
assert res.survey_metadata is not None
10454+
# df_survey ≤ n_replicates - 1 per Rao-Wu replicate convention.
10455+
# With well-formed BRR weights and n_obs >= 3 per (path, l), we
10456+
# expect every replicate fit to produce finite SE → df = 7.
10457+
assert res.survey_metadata.df_survey is not None, (
10458+
"df_survey must be populated under replicate-weight survey"
10459+
)
10460+
assert res.survey_metadata.df_survey == n_replicates - 1, (
10461+
f"df_survey={res.survey_metadata.df_survey}, "
10462+
f"expected {n_replicates - 1}"
10463+
)
10464+
# Every populated (path, l) should have finite inference under
10465+
# replicate weights too.
10466+
for path, horizons in res.path_heterogeneity_effects.items():
10467+
for l_h, vals in horizons.items():
10468+
if vals["n_obs"] >= 3:
10469+
assert np.isfinite(vals["se"]), (
10470+
f"path={path} l={l_h}: replicate SE non-finite"
10471+
)
10472+
10473+
@pytest.mark.slow
10474+
def test_survey_design_plus_n_bootstrap_with_heterogeneity_still_raises(
10475+
self,
10476+
):
10477+
"""The existing ``by_path + survey_design + n_bootstrap > 0``
10478+
gate (PR #408) must still fire when ``heterogeneity`` is also
10479+
set. Anti-regression: confirms heterogeneity composition does
10480+
not accidentally re-route around the multiplier-bootstrap
10481+
gate.
10482+
"""
10483+
from diff_diff.survey import SurveyDesign
10484+
10485+
df = self._by_path_het_data_with_survey()
10486+
sd = SurveyDesign(weights="survey_weights", strata="strata", psu="psu")
10487+
est = ChaisemartinDHaultfoeuille(
10488+
drop_larger_lower=False, by_path=2, n_bootstrap=10, seed=1
10489+
)
10490+
with warnings.catch_warnings():
10491+
warnings.simplefilter("ignore", UserWarning)
10492+
with pytest.raises(NotImplementedError, match="multiplier"):
10493+
est.fit(
10494+
df,
10495+
outcome="outcome",
10496+
group="group",
10497+
time="period",
10498+
treatment="treatment",
10499+
L_max=3,
10500+
heterogeneity="het_x",
10501+
survey_design=sd,
10502+
)
10503+
1030110504
# DataFrame integration
1030210505

1030310506
def test_to_dataframe_by_path_includes_heterogeneity_columns(self):

0 commit comments

Comments
 (0)