Skip to content

Commit 4eca232

Browse files
igerberclaude
andcommitted
Fix CI review R3: thread _df_survey to delta + HonestDiD surfaces
- P0: delta overall surface now uses _df_survey instead of df=None at both safe_inference sites (primary delta path + placebo NaN-SE fallback). This makes overall_* under L_max>=2 use survey-t inference and respects safe_inference's df<=0 NaN guard. - P1: HonestDiD dCDH extraction now propagates df_survey from survey_metadata (mirrors CS pattern). Survey-backed dCDH HonestDiD bounds now use survey-aware critical values. - P2: Add 4 regressions (survey delta t-matches-reported, t-vs-z differs, survey+controls, survey+honest_did df propagation). Update stale comment in test_dcdh_extraction. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent cf17cb2 commit 4eca232

4 files changed

Lines changed: 145 additions & 4 deletions

File tree

diff_diff/chaisemartin_dhaultfoeuille.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,7 +2146,7 @@ def fit(
21462146
if np.isfinite(delta_se):
21472147
effective_overall_se = delta_se
21482148
effective_overall_t, effective_overall_p, effective_overall_ci = safe_inference(
2149-
delta_val, delta_se, alpha=self.alpha, df=None
2149+
delta_val, delta_se, alpha=self.alpha, df=_df_survey
21502150
)
21512151
else:
21522152
effective_overall_se = float("nan")
@@ -2180,7 +2180,7 @@ def fit(
21802180
# Fallback: NaN SE (Phase 1 path or missing IF)
21812181
pl_se = float("nan")
21822182
pl_t, pl_p, pl_ci = safe_inference(
2183-
pl_data["placebo_l"], pl_se, alpha=self.alpha, df=None
2183+
pl_data["placebo_l"], pl_se, alpha=self.alpha, df=_df_survey
21842184
)
21852185
placebo_event_study_dict[-lag_l] = {
21862186
"effect": pl_data["placebo_l"],

diff_diff/honest_did.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -967,14 +967,24 @@ def _largest_consecutive_block(times, boundary_val):
967967
beta_hat = np.array(effects)
968968
sigma = np.diag(np.array(ses) ** 2)
969969

970+
# Extract survey df. For replicate designs with undefined df
971+
# (rank <= 1), use sentinel df=0 so _get_critical_value returns
972+
# NaN, matching the safe_inference contract.
973+
df_survey = None
974+
if hasattr(results, "survey_metadata") and results.survey_metadata is not None:
975+
sm = results.survey_metadata
976+
df_survey = getattr(sm, "df_survey", None)
977+
if df_survey is None and getattr(sm, "replicate_method", None) is not None:
978+
df_survey = 0 # undefined replicate df → NaN inference
979+
970980
return (
971981
beta_hat,
972982
sigma,
973983
len(pre_times),
974984
len(post_times),
975985
pre_times,
976986
post_times,
977-
None, # df_survey: dCDH has no survey support
987+
df_survey,
978988
)
979989
except ImportError:
980990
pass

tests/test_honest_did.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1381,7 +1381,7 @@ def test_dcdh_extraction(self):
13811381
assert sigma.shape == (n_pre + n_post, n_pre + n_post)
13821382
assert all(t < 0 for t in pre_t)
13831383
assert all(t > 0 for t in post_t)
1384-
assert df_s is None # dCDH has no survey support
1384+
assert df_s is None # non-survey fixture → df_survey is None
13851385

13861386
def test_dcdh_no_placebos_raises(self):
13871387
"""dCDH results without placebos raise ValueError."""

tests/test_survey_dcdh.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,3 +442,134 @@ def test_zero_weight_cell_excluded(self, base_data):
442442
survey_design=sd,
443443
)
444444
assert np.isfinite(result.overall_att)
445+
446+
447+
# ── Test: Delta overall surface threads survey df ───────────────────
448+
449+
450+
class TestSurveyDeltaInference:
451+
"""Verify the L_max>=2 cost-benefit delta surface uses survey df."""
452+
453+
def test_survey_delta_uses_survey_df(self, data_with_survey):
454+
"""Under L_max=2 with a survey design, overall_p_value must match
455+
t-distribution inference with df=df_survey (not z-inference)."""
456+
from scipy import stats
457+
458+
sd = SurveyDesign(
459+
weights="pw", strata="stratum", psu="cluster", nest=True
460+
)
461+
r = ChaisemartinDHaultfoeuille(seed=1).fit(
462+
data_with_survey,
463+
outcome="outcome", group="group",
464+
time="period", treatment="treatment",
465+
L_max=2, survey_design=sd,
466+
)
467+
if not (np.isfinite(r.overall_se) and r.overall_se > 0):
468+
pytest.skip("delta not estimable on this fixture")
469+
470+
assert r.survey_metadata is not None
471+
df_s = r.survey_metadata.df_survey
472+
assert df_s is not None and df_s > 0, (
473+
f"expected positive df_survey, got {df_s}"
474+
)
475+
476+
t_stat = r.overall_att / r.overall_se
477+
p_t = 2.0 * (1.0 - stats.t.cdf(abs(t_stat), df=df_s))
478+
# Reported p-value must match t-based (proving _df_survey was threaded)
479+
assert r.overall_p_value == pytest.approx(p_t, abs=1e-10)
480+
481+
def test_survey_delta_t_differs_from_z(self, base_data):
482+
"""With a small-df design (df~4), survey-t p-value must differ
483+
measurably from z p-value at the delta surface."""
484+
from scipy import stats
485+
486+
df_ = base_data.copy()
487+
df_["pw"] = 1.0
488+
# 2 strata × 3 clusters/stratum = 6 nested PSUs → df_survey = 4
489+
groups = sorted(df_["group"].unique())
490+
n_g = len(groups)
491+
strata_map = {g: i // (n_g // 2) for i, g in enumerate(groups)}
492+
psu_map = {g: i // (n_g // 6) for i, g in enumerate(groups)}
493+
df_["stratum"] = df_["group"].map(strata_map)
494+
df_["cluster"] = df_["group"].map(psu_map)
495+
sd = SurveyDesign(
496+
weights="pw", strata="stratum", psu="cluster", nest=True
497+
)
498+
r = ChaisemartinDHaultfoeuille(seed=1).fit(
499+
df_,
500+
outcome="outcome", group="group",
501+
time="period", treatment="treatment",
502+
L_max=2, survey_design=sd,
503+
)
504+
if not (np.isfinite(r.overall_se) and r.overall_se > 0):
505+
pytest.skip("delta not estimable on this fixture")
506+
assert r.survey_metadata is not None
507+
df_s = r.survey_metadata.df_survey
508+
assert df_s is not None and df_s < 30, (
509+
f"expected small df_survey for t-vs-z gap, got {df_s}"
510+
)
511+
512+
t_stat = r.overall_att / r.overall_se
513+
p_t = 2.0 * (1.0 - stats.t.cdf(abs(t_stat), df=df_s))
514+
p_z = 2.0 * (1.0 - stats.norm.cdf(abs(t_stat)))
515+
# Threaded p-value must match t, not z
516+
assert r.overall_p_value == pytest.approx(p_t, abs=1e-10)
517+
assert abs(r.overall_p_value - p_z) > 1e-6, (
518+
"overall_p_value must differ from z-inference when df_survey is small"
519+
)
520+
521+
522+
# ── Test: Survey + controls (DID^X) ─────────────────────────────────
523+
524+
525+
class TestSurveyControls:
526+
"""Covariate-adjusted (DID^X) path must work with survey_design."""
527+
528+
def test_survey_plus_controls_runs(self, data_with_survey):
529+
"""Covariate-adjusted dCDH with survey_design produces finite ATT."""
530+
rng = np.random.default_rng(7)
531+
df_ = data_with_survey.copy()
532+
df_["x"] = rng.normal(0, 1.0, size=len(df_))
533+
sd = SurveyDesign(
534+
weights="pw", strata="stratum", psu="cluster", nest=True
535+
)
536+
r = ChaisemartinDHaultfoeuille(seed=1).fit(
537+
df_,
538+
outcome="outcome", group="group",
539+
time="period", treatment="treatment",
540+
controls=["x"], L_max=1, survey_design=sd,
541+
)
542+
assert np.isfinite(r.overall_att)
543+
assert r.survey_metadata is not None
544+
545+
546+
# ── Test: Survey + HonestDiD ────────────────────────────────────────
547+
548+
549+
class TestSurveyHonestDiD:
550+
"""HonestDiD bounds on survey-backed dCDH results must carry df_survey."""
551+
552+
def test_survey_honest_did_propagates_df(self, data_with_survey):
553+
"""results.honest_did_results.df_survey must match
554+
results.survey_metadata.df_survey (non-None propagation)."""
555+
import warnings
556+
557+
sd = SurveyDesign(
558+
weights="pw", strata="stratum", psu="cluster", nest=True
559+
)
560+
with warnings.catch_warnings():
561+
# dCDH HonestDiD emits a methodology-deviation warning
562+
warnings.simplefilter("ignore")
563+
r = ChaisemartinDHaultfoeuille(seed=1).fit(
564+
data_with_survey,
565+
outcome="outcome", group="group",
566+
time="period", treatment="treatment",
567+
L_max=2, honest_did=True, survey_design=sd,
568+
)
569+
if r.honest_did_results is None:
570+
pytest.skip("HonestDiD computation returned None on this fixture")
571+
assert r.survey_metadata is not None
572+
df_meta = r.survey_metadata.df_survey
573+
assert df_meta is not None
574+
# df_survey must propagate from survey_metadata into HonestDiD result
575+
assert r.honest_did_results.df_survey == df_meta

0 commit comments

Comments
 (0)