@@ -442,3 +442,134 @@ def test_zero_weight_cell_excluded(self, base_data):
442442 survey_design = sd ,
443443 )
444444 assert np .isfinite (result .overall_att )
445+
446+
447+ # ── Test: Delta overall surface threads survey df ───────────────────
448+
449+
450+ class TestSurveyDeltaInference :
451+ """Verify the L_max>=2 cost-benefit delta surface uses survey df."""
452+
453+ def test_survey_delta_uses_survey_df (self , data_with_survey ):
454+ """Under L_max=2 with a survey design, overall_p_value must match
455+ t-distribution inference with df=df_survey (not z-inference)."""
456+ from scipy import stats
457+
458+ sd = SurveyDesign (
459+ weights = "pw" , strata = "stratum" , psu = "cluster" , nest = True
460+ )
461+ r = ChaisemartinDHaultfoeuille (seed = 1 ).fit (
462+ data_with_survey ,
463+ outcome = "outcome" , group = "group" ,
464+ time = "period" , treatment = "treatment" ,
465+ L_max = 2 , survey_design = sd ,
466+ )
467+ if not (np .isfinite (r .overall_se ) and r .overall_se > 0 ):
468+ pytest .skip ("delta not estimable on this fixture" )
469+
470+ assert r .survey_metadata is not None
471+ df_s = r .survey_metadata .df_survey
472+ assert df_s is not None and df_s > 0 , (
473+ f"expected positive df_survey, got { df_s } "
474+ )
475+
476+ t_stat = r .overall_att / r .overall_se
477+ p_t = 2.0 * (1.0 - stats .t .cdf (abs (t_stat ), df = df_s ))
478+ # Reported p-value must match t-based (proving _df_survey was threaded)
479+ assert r .overall_p_value == pytest .approx (p_t , abs = 1e-10 )
480+
481+ def test_survey_delta_t_differs_from_z (self , base_data ):
482+ """With a small-df design (df~4), survey-t p-value must differ
483+ measurably from z p-value at the delta surface."""
484+ from scipy import stats
485+
486+ df_ = base_data .copy ()
487+ df_ ["pw" ] = 1.0
488+ # 2 strata × 3 clusters/stratum = 6 nested PSUs → df_survey = 4
489+ groups = sorted (df_ ["group" ].unique ())
490+ n_g = len (groups )
491+ strata_map = {g : i // (n_g // 2 ) for i , g in enumerate (groups )}
492+ psu_map = {g : i // (n_g // 6 ) for i , g in enumerate (groups )}
493+ df_ ["stratum" ] = df_ ["group" ].map (strata_map )
494+ df_ ["cluster" ] = df_ ["group" ].map (psu_map )
495+ sd = SurveyDesign (
496+ weights = "pw" , strata = "stratum" , psu = "cluster" , nest = True
497+ )
498+ r = ChaisemartinDHaultfoeuille (seed = 1 ).fit (
499+ df_ ,
500+ outcome = "outcome" , group = "group" ,
501+ time = "period" , treatment = "treatment" ,
502+ L_max = 2 , survey_design = sd ,
503+ )
504+ if not (np .isfinite (r .overall_se ) and r .overall_se > 0 ):
505+ pytest .skip ("delta not estimable on this fixture" )
506+ assert r .survey_metadata is not None
507+ df_s = r .survey_metadata .df_survey
508+ assert df_s is not None and df_s < 30 , (
509+ f"expected small df_survey for t-vs-z gap, got { df_s } "
510+ )
511+
512+ t_stat = r .overall_att / r .overall_se
513+ p_t = 2.0 * (1.0 - stats .t .cdf (abs (t_stat ), df = df_s ))
514+ p_z = 2.0 * (1.0 - stats .norm .cdf (abs (t_stat )))
515+ # Threaded p-value must match t, not z
516+ assert r .overall_p_value == pytest .approx (p_t , abs = 1e-10 )
517+ assert abs (r .overall_p_value - p_z ) > 1e-6 , (
518+ "overall_p_value must differ from z-inference when df_survey is small"
519+ )
520+
521+
522+ # ── Test: Survey + controls (DID^X) ─────────────────────────────────
523+
524+
525+ class TestSurveyControls :
526+ """Covariate-adjusted (DID^X) path must work with survey_design."""
527+
528+ def test_survey_plus_controls_runs (self , data_with_survey ):
529+ """Covariate-adjusted dCDH with survey_design produces finite ATT."""
530+ rng = np .random .default_rng (7 )
531+ df_ = data_with_survey .copy ()
532+ df_ ["x" ] = rng .normal (0 , 1.0 , size = len (df_ ))
533+ sd = SurveyDesign (
534+ weights = "pw" , strata = "stratum" , psu = "cluster" , nest = True
535+ )
536+ r = ChaisemartinDHaultfoeuille (seed = 1 ).fit (
537+ df_ ,
538+ outcome = "outcome" , group = "group" ,
539+ time = "period" , treatment = "treatment" ,
540+ controls = ["x" ], L_max = 1 , survey_design = sd ,
541+ )
542+ assert np .isfinite (r .overall_att )
543+ assert r .survey_metadata is not None
544+
545+
546+ # ── Test: Survey + HonestDiD ────────────────────────────────────────
547+
548+
549+ class TestSurveyHonestDiD :
550+ """HonestDiD bounds on survey-backed dCDH results must carry df_survey."""
551+
552+ def test_survey_honest_did_propagates_df (self , data_with_survey ):
553+ """results.honest_did_results.df_survey must match
554+ results.survey_metadata.df_survey (non-None propagation)."""
555+ import warnings
556+
557+ sd = SurveyDesign (
558+ weights = "pw" , strata = "stratum" , psu = "cluster" , nest = True
559+ )
560+ with warnings .catch_warnings ():
561+ # dCDH HonestDiD emits a methodology-deviation warning
562+ warnings .simplefilter ("ignore" )
563+ r = ChaisemartinDHaultfoeuille (seed = 1 ).fit (
564+ data_with_survey ,
565+ outcome = "outcome" , group = "group" ,
566+ time = "period" , treatment = "treatment" ,
567+ L_max = 2 , honest_did = True , survey_design = sd ,
568+ )
569+ if r .honest_did_results is None :
570+ pytest .skip ("HonestDiD computation returned None on this fixture" )
571+ assert r .survey_metadata is not None
572+ df_meta = r .survey_metadata .df_survey
573+ assert df_meta is not None
574+ # df_survey must propagate from survey_metadata into HonestDiD result
575+ assert r .honest_did_results .df_survey == df_meta
0 commit comments