@@ -1627,7 +1627,7 @@ def fit(
16271627
16281628 did_l_val = multi_horizon_dids [l_h ]["did_l" ]
16291629 _df_s = _effective_df_survey (resolved_survey , _replicate_n_valid_list )
1630- t_l , p_l , ci_l = safe_inference (did_l_val , se_l , alpha = self .alpha , df = _df_s )
1630+ t_l , p_l , ci_l = safe_inference (did_l_val , se_l , alpha = self .alpha , df = _inference_df ( _df_s , resolved_survey ) )
16311631 multi_horizon_inference [l_h ] = {
16321632 "effect" : did_l_val ,
16331633 "se" : se_l ,
@@ -1756,7 +1756,8 @@ def fit(
17561756 pl_val = pl_data ["placebo_l" ]
17571757 _df_s = _effective_df_survey (resolved_survey , _replicate_n_valid_list )
17581758 t_pl_l , p_pl_l , ci_pl_l = safe_inference (
1759- pl_val , se_pl_l , alpha = self .alpha , df = _df_s
1759+ pl_val , se_pl_l , alpha = self .alpha ,
1760+ df = _inference_df (_df_s , resolved_survey ),
17601761 )
17611762 placebo_horizon_inference [lag_l ] = {
17621763 "effect" : pl_val ,
@@ -1863,7 +1864,8 @@ def fit(
18631864 )
18641865 _df_survey = _effective_df_survey (resolved_survey , _replicate_n_valid_list )
18651866 overall_t , overall_p , overall_ci = safe_inference (
1866- overall_att , overall_se , alpha = self .alpha , df = _df_survey
1867+ overall_att , overall_se , alpha = self .alpha ,
1868+ df = _inference_df (_df_survey , resolved_survey ),
18671869 )
18681870
18691871 # Joiners SE (uses joiner-only centered IF; conservative bound)
@@ -1878,7 +1880,8 @@ def fit(
18781880 _replicate_n_valid_list .append (n_valid_joiners )
18791881 _df_survey = _effective_df_survey (resolved_survey , _replicate_n_valid_list )
18801882 joiners_t , joiners_p , joiners_ci = safe_inference (
1881- joiners_att , joiners_se , alpha = self .alpha , df = _df_survey
1883+ joiners_att , joiners_se , alpha = self .alpha ,
1884+ df = _inference_df (_df_survey , resolved_survey ),
18821885 )
18831886 else :
18841887 joiners_se , joiners_t , joiners_p , joiners_ci = (
@@ -1900,7 +1903,8 @@ def fit(
19001903 _replicate_n_valid_list .append (n_valid_leavers )
19011904 _df_survey = _effective_df_survey (resolved_survey , _replicate_n_valid_list )
19021905 leavers_t , leavers_p , leavers_ci = safe_inference (
1903- leavers_att , leavers_se , alpha = self .alpha , df = _df_survey
1906+ leavers_att , leavers_se , alpha = self .alpha ,
1907+ df = _inference_df (_df_survey , resolved_survey ),
19041908 )
19051909 else :
19061910 leavers_se , leavers_t , leavers_p , leavers_ci = (
@@ -2200,17 +2204,17 @@ def fit(
22002204 overall_se = br .overall_se
22012205 overall_p = br .overall_p_value if br .overall_p_value is not None else np .nan
22022206 overall_ci = br .overall_ci if br .overall_ci is not None else (np .nan , np .nan )
2203- overall_t = safe_inference (overall_att , overall_se , alpha = self .alpha , df = _df_survey )[0 ]
2207+ overall_t = safe_inference (overall_att , overall_se , alpha = self .alpha , df = _inference_df ( _df_survey , resolved_survey ) )[0 ]
22042208 if joiners_available and br .joiners_se is not None and np .isfinite (br .joiners_se ):
22052209 joiners_se = br .joiners_se
22062210 joiners_p = br .joiners_p_value if br .joiners_p_value is not None else np .nan
22072211 joiners_ci = br .joiners_ci if br .joiners_ci is not None else (np .nan , np .nan )
2208- joiners_t = safe_inference (joiners_att , joiners_se , alpha = self .alpha , df = _df_survey )[0 ]
2212+ joiners_t = safe_inference (joiners_att , joiners_se , alpha = self .alpha , df = _inference_df ( _df_survey , resolved_survey ) )[0 ]
22092213 if leavers_available and br .leavers_se is not None and np .isfinite (br .leavers_se ):
22102214 leavers_se = br .leavers_se
22112215 leavers_p = br .leavers_p_value if br .leavers_p_value is not None else np .nan
22122216 leavers_ci = br .leavers_ci if br .leavers_ci is not None else (np .nan , np .nan )
2213- leavers_t = safe_inference (leavers_att , leavers_se , alpha = self .alpha , df = _df_survey )[0 ]
2217+ leavers_t = safe_inference (leavers_att , leavers_se , alpha = self .alpha , df = _inference_df ( _df_survey , resolved_survey ) )[0 ]
22142218
22152219 # ------------------------------------------------------------------
22162220 # Step 20: Build the results dataclass
@@ -2372,7 +2376,8 @@ def fit(
23722376 if np .isfinite (delta_se ):
23732377 effective_overall_se = delta_se
23742378 effective_overall_t , effective_overall_p , effective_overall_ci = safe_inference (
2375- delta_val , delta_se , alpha = self .alpha , df = _df_survey
2379+ delta_val , delta_se , alpha = self .alpha ,
2380+ df = _inference_df (_df_survey , resolved_survey ),
23762381 )
23772382 else :
23782383 effective_overall_se = float ("nan" )
@@ -2406,7 +2411,8 @@ def fit(
24062411 # Fallback: NaN SE (Phase 1 path or missing IF)
24072412 pl_se = float ("nan" )
24082413 pl_t , pl_p , pl_ci = safe_inference (
2409- pl_data ["placebo_l" ], pl_se , alpha = self .alpha , df = _df_survey
2414+ pl_data ["placebo_l" ], pl_se , alpha = self .alpha ,
2415+ df = _inference_df (_df_survey , resolved_survey ),
24102416 )
24112417 placebo_event_study_dict [- lag_l ] = {
24122418 "effect" : pl_data ["placebo_l" ],
@@ -2457,7 +2463,8 @@ def fit(
24572463 bs_ci if bs_ci is not None else (np .nan , np .nan )
24582464 )
24592465 placebo_event_study_dict [neg_key ]["t_stat" ] = safe_inference (
2460- eff , bs_se , alpha = self .alpha , df = _df_survey
2466+ eff , bs_se , alpha = self .alpha ,
2467+ df = _inference_df (_df_survey , resolved_survey ),
24612468 )[0 ]
24622469
24632470 # Phase 2: build normalized_effects with SE
@@ -2470,7 +2477,7 @@ def fit(
24702477 # SE via delta method: SE(DID^n_l) = SE(DID_l) / delta^D_l
24712478 se_did_l = multi_horizon_se .get (l_h , float ("nan" ))
24722479 se_norm = se_did_l / denom if np .isfinite (denom ) and denom > 0 else float ("nan" )
2473- t_n , p_n , ci_n = safe_inference (eff , se_norm , alpha = self .alpha , df = _df_survey )
2480+ t_n , p_n , ci_n = safe_inference (eff , se_norm , alpha = self .alpha , df = _inference_df ( _df_survey , resolved_survey ) )
24742481 normalized_effects_out [l_h ] = {
24752482 "effect" : eff ,
24762483 "se" : se_norm ,
@@ -2529,7 +2536,8 @@ def fit(
25292536 else :
25302537 running_se_ub = float ("nan" )
25312538 cum_t , cum_p , cum_ci = safe_inference (
2532- cum_effect , running_se_ub , alpha = self .alpha , df = _df_survey
2539+ cum_effect , running_se_ub , alpha = self .alpha ,
2540+ df = _inference_df (_df_survey , resolved_survey ),
25332541 )
25342542 cumulated [l_h ] = {
25352543 "effect" : cum_effect ,
@@ -2683,21 +2691,73 @@ def fit(
26832691 effective_joiners_available = False
26842692 effective_leavers_available = False
26852693
2686- # Persist the final effective df_survey (post-heterogeneity,
2687- # post-all-IF-sites) into survey_metadata so downstream
2688- # consumers — HonestDiD bounds (honest_did.py:973 reads
2689- # results.survey_metadata.df_survey), exported metadata, and
2690- # users — all see the same df that the top-level dCDH inference
2691- # actually used. Before this assignment, survey_metadata carries
2692- # the design-level `resolved_survey.df_survey` from survey
2693- # resolution; under replicate designs the effective df may be
2694- # smaller (reduced via _replicate_n_valid_list). SurveyMetadata
2695- # is a mutable @dataclass (diff_diff/survey.py:681), so direct
2696- # attribute assignment is safe.
2697- if survey_metadata is not None :
2698- survey_metadata .df_survey = _effective_df_survey (
2699- resolved_survey , _replicate_n_valid_list
2694+ # R2 P1b: Finalize replicate-df propagation across public
2695+ # surfaces. When heterogeneity is active, its n_valid is
2696+ # appended to `_replicate_n_valid_list` AFTER the main
2697+ # surfaces (overall / joiners / leavers / event study /
2698+ # placebo horizon) have already computed their t/p/CI fields
2699+ # with an intermediate `_df_survey`. If heterogeneity reports
2700+ # the smallest n_valid, the main-surface inference would be
2701+ # anti-conservative relative to `survey_metadata.df_survey`
2702+ # and HonestDiD. Re-run safe_inference with the FINAL
2703+ # effective df so every surface agrees.
2704+ _final_eff_df = _effective_df_survey (
2705+ resolved_survey , _replicate_n_valid_list
2706+ )
2707+ if _replicate_n_valid_list :
2708+ _final_inf_df = _inference_df (_final_eff_df , resolved_survey )
2709+ overall_t , overall_p , overall_ci = safe_inference (
2710+ overall_att , overall_se ,
2711+ alpha = self .alpha , df = _final_inf_df ,
27002712 )
2713+ if joiners_available :
2714+ joiners_t , joiners_p , joiners_ci = safe_inference (
2715+ joiners_att , joiners_se ,
2716+ alpha = self .alpha , df = _final_inf_df ,
2717+ )
2718+ if leavers_available :
2719+ leavers_t , leavers_p , leavers_ci = safe_inference (
2720+ leavers_att , leavers_se ,
2721+ alpha = self .alpha , df = _final_inf_df ,
2722+ )
2723+ if multi_horizon_inference is not None :
2724+ for _lag_r2 , _info_r2 in list (multi_horizon_inference .items ()):
2725+ _t_r2 , _p_r2 , _ci_r2 = safe_inference (
2726+ _info_r2 ["effect" ], _info_r2 ["se" ],
2727+ alpha = self .alpha , df = _final_inf_df ,
2728+ )
2729+ _info_r2 ["t_stat" ] = _t_r2
2730+ _info_r2 ["p_value" ] = _p_r2
2731+ _info_r2 ["conf_int" ] = _ci_r2
2732+ if placebo_horizon_inference is not None :
2733+ for _lag_r2 , _info_r2 in list (placebo_horizon_inference .items ()):
2734+ _t_r2 , _p_r2 , _ci_r2 = safe_inference (
2735+ _info_r2 ["effect" ], _info_r2 ["se" ],
2736+ alpha = self .alpha , df = _final_inf_df ,
2737+ )
2738+ _info_r2 ["t_stat" ] = _t_r2
2739+ _info_r2 ["p_value" ] = _p_r2
2740+ _info_r2 ["conf_int" ] = _ci_r2
2741+ if heterogeneity_effects :
2742+ for _lag_r2 , _info_r2 in list (heterogeneity_effects .items ()):
2743+ if np .isfinite (_info_r2 ["se" ]):
2744+ _t_r2 , _p_r2 , _ci_r2 = safe_inference (
2745+ _info_r2 ["beta" ], _info_r2 ["se" ],
2746+ alpha = self .alpha , df = _final_inf_df ,
2747+ )
2748+ _info_r2 ["t_stat" ] = _t_r2
2749+ _info_r2 ["p_value" ] = _p_r2
2750+ _info_r2 ["conf_int" ] = _ci_r2
2751+
2752+ # Persist the final effective df_survey into survey_metadata so
2753+ # downstream consumers — HonestDiD bounds (honest_did.py:973
2754+ # reads results.survey_metadata.df_survey), exported metadata,
2755+ # and users — all see the same df that the recomputed
2756+ # inference above used. SurveyMetadata is a mutable @dataclass
2757+ # (diff_diff/survey.py:681), so direct attribute assignment is
2758+ # safe.
2759+ if survey_metadata is not None :
2760+ survey_metadata .df_survey = _final_eff_df
27012761
27022762 results = ChaisemartinDHaultfoeuilleResults (
27032763 overall_att = effective_overall_att ,
@@ -3496,17 +3556,18 @@ def _compute_heterogeneity_test(
34963556 obs_gids_raw = np .asarray (obs_survey_info ["group_ids" ])
34973557 obs_w_raw = np .asarray (obs_survey_info ["weights" ], dtype = np .float64 )
34983558 resolved = obs_survey_info ["resolved" ]
3499- # df_s starts from the design-level df (n_psu - n_strata under TSL,
3500- # R - 1 under replicate). It may be reduced further by replicate
3501- # failures reported via replicate_n_valid_list (this function
3502- # appends its own n_valid observations).
3503- if (
3504- replicate_n_valid_list is not None
3505- and len (replicate_n_valid_list ) > 0
3506- ):
3507- df_s = int (min (replicate_n_valid_list )) - 1
3508- else :
3509- df_s = resolved .df_survey if resolved is not None else None
3559+ # df_s starts from the shared effective df (base-capped by
3560+ # design df). Under replicate designs the helper handles the
3561+ # min(resolved.df_survey, min(n_valid) - 1) reduction and
3562+ # preserves None when the base df is undefined (QR-rank ≤ 1).
3563+ # Using the helper here — rather than re-deriving locally —
3564+ # keeps heterogeneity's df consistent with the main dCDH
3565+ # surfaces (R2 P1a). `list(... or [])` avoids accidental
3566+ # mutation of the caller's shared tracker at this site; the
3567+ # explicit append happens inside the horizon loop below.
3568+ df_s = _effective_df_survey (
3569+ resolved , list (replicate_n_valid_list or [])
3570+ )
35103571 # Contract: only obs whose group is in the canonical post-filter
35113572 # list contribute. Groups dropped upstream (Step 5b interior gaps,
35123573 # Step 6 multi-switch) appear in obs_gids_raw but must be
@@ -3674,9 +3735,17 @@ def _compute_heterogeneity_test(
36743735 if replicate_n_valid_list is not None :
36753736 replicate_n_valid_list .append (n_valid_het )
36763737 # Reduce df_s to reflect this horizon's n_valid.
3677- df_s_local = int (n_valid_het ) - 1 if df_s is None else min (
3678- df_s , int (n_valid_het ) - 1
3679- )
3738+ # R2 P1a: when the shared base-capped df_s is None
3739+ # (undefined base df, e.g., QR-rank ≤ 1), the
3740+ # heterogeneity df MUST stay None — per-site n_valid
3741+ # cannot rescue a rank-deficient design. The
3742+ # _inference_df wrapper at the safe_inference call
3743+ # below coerces None to 0 under replicate, forcing
3744+ # NaN inference.
3745+ if df_s is None :
3746+ df_s_local = None
3747+ else :
3748+ df_s_local = min (int (df_s ), int (n_valid_het ) - 1 )
36803749 else :
36813750 var_s = compute_survey_if_variance (psi_obs , resolved )
36823751 df_s_local = df_s
@@ -3686,7 +3755,8 @@ def _compute_heterogeneity_test(
36863755 else float ("nan" )
36873756 )
36883757 t_stat , p_val , ci = safe_inference (
3689- beta_het , se_het , alpha = alpha , df = df_s_local
3758+ beta_het , se_het , alpha = alpha ,
3759+ df = _inference_df (df_s_local , resolved ),
36903760 )
36913761
36923762 results [l_h ] = {
@@ -5006,6 +5076,36 @@ def _validate_group_constant_strata_psu(
50065076 )
50075077
50085078
5079+ def _inference_df (
5080+ effective_df : Optional [int ],
5081+ resolved_survey : Any ,
5082+ ) -> Optional [int ]:
5083+ """Coerce an effective-df value for use in ``safe_inference(df=...)``.
5084+
5085+ ``_effective_df_survey()`` returns ``None`` when the replicate
5086+ design's base df is undefined (QR-rank ≤ 1) or when the reduced
5087+ df would be < 1. ``safe_inference`` treats ``df=None`` as the
5088+ standard normal (z-inference) and only returns NaN for
5089+ ``df <= 0``. Under a replicate design, "undefined effective df"
5090+ MUST map to NaN inference per REGISTRY.md — NOT to z-inference.
5091+
5092+ Returns:
5093+ - ``effective_df`` when defined (t-inference with that df).
5094+ - ``0`` when ``effective_df is None`` AND ``resolved_survey`` uses
5095+ replicate variance — forces ``safe_inference`` to return NaN
5096+ t/p/CI via its ``df <= 0`` branch.
5097+ - ``None`` otherwise (no survey, or TSL design where the resolver
5098+ always returns an int df — z-inference is never reachable here).
5099+ """
5100+ if effective_df is not None :
5101+ return effective_df
5102+ if resolved_survey is None :
5103+ return None
5104+ if getattr (resolved_survey , "uses_replicate_variance" , False ):
5105+ return 0
5106+ return None
5107+
5108+
50095109def _effective_df_survey (
50105110 resolved_survey : Any ,
50115111 replicate_n_valid_list : List [int ],
0 commit comments