@@ -3516,55 +3516,14 @@ def joint_pretrends_test(
35163516 f"(base_period={ base_period !r} )."
35173517 )
35183518
3519- # ---- trends_lin: identify the consumed placebo (base_period - 1)
3520- # and drop it from the test set BEFORE aggregation.
3521- # The F-2 → F-1 evolution is "consumed" by the per-group slope
3522- # estimator: at t = base_period - 1 the detrended dy_t = dy_t -
3523- # (-1) × slope = (Y_{base-1} - Y_base) + (Y_base - Y_{base-1}) = 0
3524- # for every unit. Feeding that all-zero residual into
3525- # `stute_joint_pretest` would trip the exact-linear short-circuit
3526- # and report a mechanical p_value=1.0 — a confidently-non-rejecting
3527- # placebo that is actually no placebo at all. Drop it explicitly,
3528- # mirroring R's "max placebo lag reduces by 1" convention and our
3529- # HAD.fit `e=-2` drop. Emits UserWarning when this filter fires
3530- # so the caller knows their `pre_periods` was modified.
3519+ # ---- trends_lin: defer the consumed-placebo drop and base-1
3520+ # identification until AFTER the validator block runs (so we can
3521+ # use t_pre_list to enforce the non-terminal-base guard and the
3522+ # observed-period predecessor consistently). On 2-period panels
3523+ # the validator does not run and trends_lin needs F-2, which is
3524+ # impossible — front-door-reject here.
35313525 base_minus_1_period : Any = None
35323526 pre_periods_effective = list (pre_periods )
3533- if trends_lin :
3534- for p , r in period_rank .items ():
3535- if r == base_rank - 1 :
3536- base_minus_1_period = p
3537- break
3538- if base_minus_1_period is None :
3539- raise ValueError (
3540- f"joint_pretrends_test(trends_lin=True) requires the "
3541- f"period immediately before base_period={ base_period !r} "
3542- f"to exist in the panel (rank { base_rank - 1 } ). The "
3543- f"per-group linear-trend slope Y[g, base] - Y[g, base-1] "
3544- f"is not identified without it. Available periods: "
3545- f"{ sorted (period_rank .keys (), key = lambda t : period_rank [t ])!r} ."
3546- )
3547- if base_minus_1_period in pre_periods_effective :
3548- warnings .warn (
3549- f"joint_pretrends_test(trends_lin=True): dropping "
3550- f"period { base_minus_1_period !r} from pre_periods — it "
3551- f"is the 'consumed' placebo (the F-2 → F-1 evolution "
3552- f"used by the per-group slope estimator), so under "
3553- f"trends_lin its detrended residual is mechanically "
3554- f"zero. R's `did_had(trends_lin=TRUE)` reduces max "
3555- f"placebo lag by 1 with the same effect." ,
3556- UserWarning ,
3557- stacklevel = 2 ,
3558- )
3559- pre_periods_effective = [t for t in pre_periods_effective if t != base_minus_1_period ]
3560- if len (pre_periods_effective ) == 0 :
3561- raise ValueError (
3562- f"joint_pretrends_test(trends_lin=True): no testable "
3563- f"placebo horizons remain after dropping the consumed "
3564- f"placebo at base_period - 1 = { base_minus_1_period !r} . "
3565- f"Pass at least one earlier pre-period (rank < "
3566- f"{ base_rank - 1 } ) when using trends_lin=True."
3567- )
35683527
35693528 # Event-study validation contract (paper Appendix B.2):
35703529 # When the panel has >= 3 distinct periods, always route through
@@ -3577,6 +3536,13 @@ def joint_pretrends_test(
35773536 # panels the validator does not apply; skip and fall through to the
35783537 # simpler balance/invariant guards in `_aggregate_for_joint_test`.
35793538 n_periods = int (data [time_col ].nunique ())
3539+ if trends_lin and n_periods < 3 :
3540+ raise ValueError (
3541+ f"joint_pretrends_test(trends_lin=True) requires a panel "
3542+ f"with at least 3 distinct time periods so the per-group "
3543+ f"slope Y[g, base] - Y[g, base - 1] is identified. Got "
3544+ f"n_periods={ n_periods } ."
3545+ )
35803546 data_filtered : pd .DataFrame = data
35813547 if n_periods >= 3 :
35823548 F_val , t_pre_list , _t_post_list , data_filtered , _filter_info = (
@@ -3610,6 +3576,68 @@ def joint_pretrends_test(
36103576 f"periods. Not-pre entries: { not_pre !r} . Validator's "
36113577 f"pre-period set: { list (t_pre_list )!r} ."
36123578 )
3579+ # PR #392 R3 P1 (non-terminal base guard): paper Eq 17 / Eq 18
3580+ # and R `DIDHAD::did_had(..., trends_lin=TRUE)` anchor the
3581+ # detrending at F-1 (the last validated pre-period) and use
3582+ # Y[F-1] - Y[F-2] as the slope. A direct caller passing
3583+ # base_period < F-1 (e.g. F-2) would compute a different slope
3584+ # at a different anchor, silently changing the methodology
3585+ # away from the documented Eq 17/18 construction. Reject
3586+ # explicitly. Workflow + HAD.fit always pass F-1; this check
3587+ # only fires on direct user calls with non-terminal bases.
3588+ if trends_lin and base_period != t_pre_list [- 1 ]:
3589+ raise ValueError (
3590+ f"joint_pretrends_test(trends_lin=True) requires "
3591+ f"base_period to equal the last validated pre-period "
3592+ f"({ t_pre_list [- 1 ]!r} , the canonical Eq 17 anchor "
3593+ f"F-1). Got base_period={ base_period !r} . Anchoring at "
3594+ f"any other pre-period would compute a different "
3595+ f"slope and detrending that does not match paper "
3596+ f"Eq 17 / Eq 18 or R DIDHAD::did_had(trends_lin=TRUE)."
3597+ )
3598+ # PR #392 R3 P1 (observed-period base-1 lookup) + R1 P0
3599+ # (consumed-placebo drop) consolidated:
3600+ # base_minus_1_period = t_pre_list[-2] (= F-2, the validated
3601+ # observed pre-period immediately before F-1). Using
3602+ # t_pre_list ensures correctness on ordered-categorical panels
3603+ # with unused intermediate levels (the validator's t_pre_list
3604+ # is built from observed contiguous pre-periods, not from the
3605+ # full dtype's category list). Then drop t_pre_list[-2] from
3606+ # pre_periods if present (the consumed placebo whose detrended
3607+ # residual is mechanically zero).
3608+ if trends_lin :
3609+ if len (t_pre_list ) < 2 :
3610+ raise ValueError (
3611+ f"joint_pretrends_test(trends_lin=True) requires "
3612+ f"at least 2 validated pre-periods so the per-"
3613+ f"group slope Y[g, F-1] - Y[g, F-2] is identified. "
3614+ f"Got t_pre_list={ list (t_pre_list )!r} ."
3615+ )
3616+ base_minus_1_period = t_pre_list [- 2 ]
3617+ if base_minus_1_period in pre_periods_effective :
3618+ warnings .warn (
3619+ f"joint_pretrends_test(trends_lin=True): dropping "
3620+ f"period { base_minus_1_period !r} from pre_periods "
3621+ f"— it is the 'consumed' placebo (the F-2 → F-1 "
3622+ f"evolution used by the per-group slope "
3623+ f"estimator), so under trends_lin its detrended "
3624+ f"residual is mechanically zero. R's "
3625+ f"`did_had(trends_lin=TRUE)` reduces max placebo "
3626+ f"lag by 1 with the same effect." ,
3627+ UserWarning ,
3628+ stacklevel = 2 ,
3629+ )
3630+ pre_periods_effective = [
3631+ t for t in pre_periods_effective if t != base_minus_1_period
3632+ ]
3633+ if len (pre_periods_effective ) == 0 :
3634+ raise ValueError (
3635+ f"joint_pretrends_test(trends_lin=True): no testable "
3636+ f"placebo horizons remain after dropping the consumed "
3637+ f"placebo at base_period - 1 = { base_minus_1_period !r} . "
3638+ f"Pass at least one earlier observed pre-period when "
3639+ f"using trends_lin=True."
3640+ )
36133641
36143642 d_arr , dy_by_horizon , _ = _aggregate_for_joint_test (
36153643 data_filtered ,
@@ -3915,6 +3943,14 @@ def joint_homogeneity_test(
39153943 # time-varying post-dose would make the per-horizon refit on
39163944 # `[1, D_g]` misspecify the regressor.
39173945 n_periods = int (data [time_col ].nunique ())
3946+ if trends_lin and n_periods < 3 :
3947+ raise ValueError (
3948+ f"joint_homogeneity_test(trends_lin=True) requires a "
3949+ f"panel with at least 3 distinct time periods so the "
3950+ f"per-group slope Y[g, base] - Y[g, base - 1] is "
3951+ f"identified. Got n_periods={ n_periods } ."
3952+ )
3953+ base_minus_1_period_validated : Any = None # set inside validator block under trends_lin
39183954 data_filtered : pd .DataFrame = data
39193955 if n_periods >= 3 :
39203956 F_val , t_pre_list , t_post_list , data_filtered , _filter_info = (
@@ -3946,6 +3982,30 @@ def joint_homogeneity_test(
39463982 f"periods. Not-post entries: { not_post !r} . Validator's "
39473983 f"post-period set: { list (t_post_list )!r} ."
39483984 )
3985+ # PR #392 R3 P1 (non-terminal base guard + observed-period
3986+ # base-1 lookup, twin of joint_pretrends_test). Eq 17 anchors
3987+ # at F-1 and uses Y[F-1] - Y[F-2] as slope; require base ==
3988+ # t_pre_list[-1] AND derive base-1 from t_pre_list[-2].
3989+ if trends_lin and base_period != t_pre_list [- 1 ]:
3990+ raise ValueError (
3991+ f"joint_homogeneity_test(trends_lin=True) requires "
3992+ f"base_period to equal the last validated pre-period "
3993+ f"({ t_pre_list [- 1 ]!r} , the canonical Eq 17 anchor "
3994+ f"F-1). Got base_period={ base_period !r} . Anchoring at "
3995+ f"any other pre-period would compute a different "
3996+ f"slope and detrending that does not match paper "
3997+ f"Eq 17 / page 32 or R DIDHAD::did_had(trends_lin=TRUE)."
3998+ )
3999+ if trends_lin and len (t_pre_list ) < 2 :
4000+ raise ValueError (
4001+ f"joint_homogeneity_test(trends_lin=True) requires "
4002+ f"at least 2 validated pre-periods so the per-group "
4003+ f"slope Y[g, F-1] - Y[g, F-2] is identified. Got "
4004+ f"t_pre_list={ list (t_pre_list )!r} ."
4005+ )
4006+ # Capture the validator's predecessor for downstream use.
4007+ if trends_lin :
4008+ base_minus_1_period_validated = t_pre_list [- 2 ]
39494009
39504010 d_arr , dy_by_horizon , _ = _aggregate_for_joint_test (
39514011 data_filtered ,
@@ -3988,20 +4048,13 @@ def joint_homogeneity_test(
39884048 # dy_t. The post-period delta = t_rank - base_rank > 0, so the
39894049 # subtraction extrapolates the linear trend FORWARD into post-periods.
39904050 if trends_lin :
3991- base_minus_1_period_h : Any = None
3992- for p , r in period_rank .items ():
3993- if r == base_rank - 1 :
3994- base_minus_1_period_h = p
3995- break
3996- if base_minus_1_period_h is None :
3997- raise ValueError (
3998- f"joint_homogeneity_test(trends_lin=True) requires the "
3999- f"period immediately before base_period={ base_period !r} "
4000- f"to exist in the panel (rank { base_rank - 1 } ). The "
4001- f"per-group linear-trend slope Y[g, base] - Y[g, base-1] "
4002- f"is not identified without it. Available periods: "
4003- f"{ sorted (period_rank .keys (), key = lambda t : period_rank [t ])!r} ."
4004- )
4051+ # PR #392 R3 P1: use the validator's t_pre_list[-2] as the
4052+ # predecessor (captured above as base_minus_1_period_validated).
4053+ # This is robust to ordered-categorical panels with unused
4054+ # intermediate levels because the validator builds t_pre_list
4055+ # from observed contiguous pre-periods, not the full dtype
4056+ # category list.
4057+ base_minus_1_period_h = base_minus_1_period_validated
40054058 slope_subset_h = data_filtered [
40064059 data_filtered [time_col ].isin ([base_period , base_minus_1_period_h ])
40074060 ]
0 commit comments