Skip to content

Commit cdbc6bb

Browse files
igerberclaude
andcommitted
Address PR #412 R2 review (1 P2 — multi-baseline test was vacuous)
The prior `test_per_path_heterogeneity_no_multi_baseline_warning` used `_by_path_het_data` whose paths all start with `D=0`, so it never exercised the multi-baseline switcher panel regime claimed in REGISTRY ("cohort dummies absorb baseline by construction"). The new CI reviewer flagged this as a P2 claim-vs-test mismatch. Replaces the test with a TRUE multi-baseline DGP via new `_multi_baseline_het_data` static fixture: 60 joiners (D_{g,1}=0, path (0,1,1,1)) + 60 leavers (D_{g,1}=1, path (1,0,0,0)), F_g in {3,4,5} for both baselines. Selected via `paths_of_interest=[(0,1,1,1), (1,0,0,0)]`. Test now asserts: 1. Fixture sanity: switcher baselines actually span {0, 1}. 2. Both selected paths populate `path_heterogeneity_effects`. 3. Each path has ≥1 finite (path, l) entry — confirms the regression is non-degenerate under multi-baseline switchers. 4. No multi-baseline UserWarning emitted (cohort dummies absorb). 5. No verbatim `by_path / paths_of_interest + controls/trends_linear` divergence warning emitted (strict text-fragment check). Empirically verified: this fixture produces finite beta/SE on every in-window horizon for both paths and emits zero baseline warnings. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent a57f583 commit cdbc6bb

1 file changed

Lines changed: 114 additions & 10 deletions

File tree

tests/test_chaisemartin_dhaultfoeuille.py

Lines changed: 114 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10273,31 +10273,135 @@ def test_path_with_too_few_eligible_yields_nan(self):
1027310273
assert not np.isfinite(vals["conf_int"][0])
1027410274
assert not np.isfinite(vals["conf_int"][1])
1027510275

10276+
@staticmethod
10277+
def _multi_baseline_het_data(seed=44):
10278+
"""Multi-baseline DGP: joiners (D_{g,1}=0, path (0,1,1,1)) +
10279+
leavers (D_{g,1}=1, path (1,0,0,0)). F_g varies in {3,4,5} for
10280+
BOTH baselines so each path has multi-cohort variation. het_x
10281+
binary, balanced within each baseline. This is the regime where
10282+
``controls`` and ``trends_linear`` emit a multi-baseline
10283+
UserWarning (R-divergence); per-path heterogeneity must NOT
10284+
emit one because cohort dummies absorb baseline.
10285+
"""
10286+
rng = np.random.RandomState(seed)
10287+
rows = []
10288+
n_per_baseline, n_periods = 60, 10
10289+
# Joiners: baseline=0, path (0,1,1,1)
10290+
for g in range(n_per_baseline):
10291+
F_g = 3 + ((g // 3) % 3)
10292+
het_x = 1 if g < n_per_baseline // 2 else 0
10293+
effect = 5.0 + 3.0 * het_x
10294+
path = (0, 1, 1, 1)
10295+
for t in range(n_periods):
10296+
if F_g - 1 <= t < F_g - 1 + len(path):
10297+
d = path[t - (F_g - 1)]
10298+
elif t >= F_g - 1 + len(path):
10299+
d = path[-1]
10300+
else:
10301+
d = 0
10302+
y = 0.5 * t + effect * d + rng.normal(0, 0.5)
10303+
rows.append({"group": g, "period": t, "treatment": d,
10304+
"outcome": y, "het_x": het_x})
10305+
# Leavers: baseline=1, path (1,0,0,0)
10306+
for g_offset in range(n_per_baseline):
10307+
g = n_per_baseline + g_offset
10308+
F_g = 3 + ((g_offset // 3) % 3)
10309+
het_x = 1 if g_offset < n_per_baseline // 2 else 0
10310+
effect = 5.0 + 3.0 * het_x
10311+
path = (1, 0, 0, 0)
10312+
for t in range(n_periods):
10313+
if F_g - 1 <= t < F_g - 1 + len(path):
10314+
d = path[t - (F_g - 1)]
10315+
elif t >= F_g - 1 + len(path):
10316+
d = path[-1]
10317+
else:
10318+
d = 1 # baseline=1 — treated pre-window
10319+
y = 0.5 * t + effect * d + rng.normal(0, 0.5)
10320+
rows.append({"group": g, "period": t, "treatment": d,
10321+
"outcome": y, "het_x": het_x})
10322+
return pd.DataFrame(rows)
10323+
1027610324
def test_per_path_heterogeneity_no_multi_baseline_warning(self):
10277-
"""Anti-regression: heterogeneity + by_path does NOT emit the
10278-
multi-baseline UserWarning that controls/trends_linear emit.
10279-
Cohort dummies absorb baseline by construction (REGISTRY)."""
10280-
df = _by_path_het_data()
10325+
"""Anti-regression: heterogeneity + by_path / paths_of_interest
10326+
does NOT emit the multi-baseline UserWarning that
10327+
``controls`` / ``trends_linear`` emit on switcher panels
10328+
spanning multiple ``D_{g,1}`` values. Cohort dummies in the
10329+
design matrix absorb baseline by construction (REGISTRY:
10330+
"Per-path heterogeneity testing"), so cross-baseline switcher
10331+
panels do not produce R-divergence in the heterogeneity test
10332+
and no parallel warning is needed.
10333+
10334+
Uses a TRUE multi-baseline DGP (joiners with D_{g,1}=0 path
10335+
``(0,1,1,1)`` + leavers with D_{g,1}=1 path ``(1,0,0,0)``)
10336+
selected via ``paths_of_interest``. Verified empirically:
10337+
both paths produce finite per-path heterogeneity at l=1,2
10338+
with zero baseline-related warnings.
10339+
"""
10340+
df = self._multi_baseline_het_data()
10341+
# Sanity check: panel actually has both baselines among switchers
10342+
baselines = df.groupby("group")["treatment"].first().unique()
10343+
assert set(baselines) >= {0, 1}, (
10344+
f"fixture must include both baselines; got {sorted(baselines)}"
10345+
)
10346+
1028110347
with warnings.catch_warnings(record=True) as caught:
1028210348
warnings.simplefilter("always")
1028310349
res = ChaisemartinDHaultfoeuille(
10284-
drop_larger_lower=False, by_path=3
10350+
drop_larger_lower=False,
10351+
paths_of_interest=[(0, 1, 1, 1), (1, 0, 0, 0)],
1028510352
).fit(
10286-
df, outcome="outcome", group="group", time="period",
10287-
treatment="treatment", L_max=3, heterogeneity="het_x",
10353+
df,
10354+
outcome="outcome",
10355+
group="group",
10356+
time="period",
10357+
treatment="treatment",
10358+
L_max=3,
10359+
heterogeneity="het_x",
1028810360
)
10289-
assert res.path_heterogeneity_effects
10290-
# Filter for any multi-baseline-style warning
10361+
10362+
# Both selected paths surface (per-baseline switchers populate both)
10363+
assert res.path_heterogeneity_effects is not None
10364+
assert (0, 1, 1, 1) in res.path_heterogeneity_effects
10365+
assert (1, 0, 0, 0) in res.path_heterogeneity_effects
10366+
10367+
# Each path has at least one finite (path, horizon) entry —
10368+
# confirms the regression is non-degenerate under multi-baseline.
10369+
for path in [(0, 1, 1, 1), (1, 0, 0, 0)]:
10370+
horizons = res.path_heterogeneity_effects[path]
10371+
finite_count = sum(
10372+
1 for v in horizons.values()
10373+
if np.isfinite(v["beta"]) and np.isfinite(v["se"])
10374+
)
10375+
assert finite_count >= 1, (
10376+
f"path={path}: expected ≥1 finite per-(path, l) entry, "
10377+
f"got {finite_count}"
10378+
)
10379+
10380+
# No multi-baseline UserWarning. Match the controls / trends_lin
10381+
# warning shape (mentions "baseline" + "multi" or "by_path /
10382+
# paths_of_interest + controls/trends_linear" R-divergence text).
10383+
# Be strict — both fragments must appear in the same warning.
1029110384
multi_baseline = [
1029210385
w for w in caught
1029310386
if "baseline" in str(w.message).lower()
1029410387
and "multi" in str(w.message).lower()
1029510388
]
1029610389
assert not multi_baseline, (
10297-
f"Unexpected multi-baseline warning(s): "
10390+
f"Unexpected multi-baseline warning(s) under heterogeneity: "
1029810391
f"{[str(w.message) for w in multi_baseline]}"
1029910392
)
1030010393

10394+
# Also check no controls/trends-linear divergence verbatim text
10395+
controls_divergence = [
10396+
w for w in caught
10397+
if "by_path / paths_of_interest + controls" in str(w.message)
10398+
or "by_path / paths_of_interest + trends_linear" in str(w.message)
10399+
]
10400+
assert not controls_divergence, (
10401+
f"Unexpected controls / trends_linear divergence warning(s): "
10402+
f"{[str(w.message) for w in controls_divergence]}"
10403+
)
10404+
1030110405
# Survey composition (slow)
1030210406

1030310407
@staticmethod

0 commit comments

Comments
 (0)