Skip to content

Commit 2c6cabf

Browse files
igerberclaude
andcommitted
Fix CI review Round 1: NaN-consistency, overall surface, rank_deficient_action
P0: Cumulated DID^{fd} SE now requires ALL component SEs to be finite; non-finite SE at any horizon propagates NaN (was silently dropped). P1: trends_linear + L_max>=2 overall surface now reports cumulated level effects from linear_trends_effects (was second-difference delta). cost_benefit_delta suppressed under trends_linear (meaningless on second-differences). P2: rank_deficient_action threaded through _compute_covariate_residualization and _compute_heterogeneity_test (was hardcoded "warn"). P3: fit() docstrings updated for controls, trends_linear, trends_nonparam (were stale "Reserved for Phase 3" text). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent f580dae commit 2c6cabf

2 files changed

Lines changed: 99 additions & 15 deletions

File tree

diff_diff/chaisemartin_dhaultfoeuille.py

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -525,14 +525,18 @@ def fit(
525525
Must be a positive integer not exceeding the number of
526526
post-baseline periods in the panel.
527527
controls : list of str, optional
528-
**Reserved for Phase 3** (covariate adjustment via the
529-
residualization-style ``DID^X`` from Web Appendix Section 1.2
530-
of the dynamic paper).
528+
Column names for covariate adjustment via residualization-style
529+
``DID^X`` (Web Appendix Section 1.2). Requires ``L_max >= 1``.
530+
One ``theta_hat`` per baseline treatment value, estimated by
531+
OLS on not-yet-treated observations. NOT doubly-robust.
531532
trends_linear : bool, optional
532-
**Reserved for Phase 3** (group-specific linear trends via
533-
``DID^{fd}``).
534-
trends_nonparam : Any, optional
535-
**Reserved for Phase 3** (state-set-specific trends).
533+
If ``True``, estimate group-specific linear trends via
534+
``DID^{fd}`` (Web Appendix Section 1.3, Lemma 6). Requires
535+
``L_max >= 1`` and at least 3 time periods.
536+
trends_nonparam : str, optional
537+
Column name for state-set membership. Restricts the control
538+
pool to groups in the same set (Web Appendix Section 1.4).
539+
Requires ``L_max >= 1`` and time-invariant values per group.
536540
honest_did : bool, default=False
537541
**Reserved for Phase 3** (HonestDiD integration on placebos).
538542
survey_design : Any, optional
@@ -959,6 +963,7 @@ def fit(
959963
N_mat=N_mat,
960964
baselines=baselines,
961965
first_switch_idx=first_switch_idx_arr,
966+
rank_deficient_action=self.rank_deficient_action,
962967
)
963968
# Keep raw Y_mat for the per-period DID path (which does not
964969
# support covariate residualization - it uses binary joiner/leaver
@@ -2073,12 +2078,20 @@ def fit(
20732078
cum_effect = float(
20742079
np.sum(S_arr[eligible] * running_per_group[eligible]) / N_l
20752080
)
2076-
# SE: conservative upper bound (sum of per-horizon SEs)
2077-
running_se_ub = sum(
2078-
event_study_effects.get(ll, {}).get("se", 0.0)
2079-
for ll in range(1, l_h + 1)
2080-
if np.isfinite(event_study_effects.get(ll, {}).get("se", np.nan))
2081-
) if event_study_effects is not None else float("nan")
2081+
# SE: conservative upper bound (sum of per-horizon SEs).
2082+
# NaN-consistency: if ANY component SE up to horizon l is
2083+
# non-finite, the cumulated SE is NaN (not 0.0).
2084+
if event_study_effects is not None:
2085+
component_ses = [
2086+
event_study_effects.get(ll, {}).get("se", np.nan)
2087+
for ll in range(1, l_h + 1)
2088+
]
2089+
if all(np.isfinite(s) for s in component_ses):
2090+
running_se_ub = sum(component_ses)
2091+
else:
2092+
running_se_ub = float("nan")
2093+
else:
2094+
running_se_ub = float("nan")
20822095
cum_t, cum_p, cum_ci = safe_inference(
20832096
cum_effect, running_se_ub, alpha=self.alpha, df=None
20842097
)
@@ -2091,6 +2104,22 @@ def fit(
20912104
}
20922105
linear_trends_effects = cumulated if cumulated else None
20932106

2107+
# When trends_linear=True and L_max>=2, suppress cost_benefit_delta
2108+
# (which is computed on second-differences) and set overall_* from
2109+
# the cumulated level effects instead. This prevents the results
2110+
# surface from labeling a second-difference aggregate as delta^{fd}
2111+
# (a level-effect estimand).
2112+
if _is_trends_linear and L_max is not None and L_max >= 2:
2113+
cost_benefit_result = None
2114+
if linear_trends_effects:
2115+
max_h = max(linear_trends_effects.keys())
2116+
lt = linear_trends_effects[max_h]
2117+
effective_overall_att = lt["effect"]
2118+
effective_overall_se = lt["se"]
2119+
effective_overall_t = lt["t_stat"]
2120+
effective_overall_p = lt["p_value"]
2121+
effective_overall_ci = lt["conf_int"]
2122+
20942123
# ------------------------------------------------------------------
20952124
# Heterogeneity testing (Web Appendix Section 1.5, Lemma 7)
20962125
# ------------------------------------------------------------------
@@ -2130,6 +2159,7 @@ def fit(
21302159
X_het=X_het,
21312160
L_max=L_max,
21322161
alpha=self.alpha,
2162+
rank_deficient_action=self.rank_deficient_action,
21332163
)
21342164

21352165
twfe_weights_df = None
@@ -2634,6 +2664,7 @@ def _compute_covariate_residualization(
26342664
N_mat: np.ndarray,
26352665
baselines: np.ndarray,
26362666
first_switch_idx: np.ndarray,
2667+
rank_deficient_action: str = "warn",
26372668
) -> Tuple[np.ndarray, Dict[str, Any]]:
26382669
"""Residualize outcomes by partialling out covariates per baseline treatment.
26392670
@@ -2750,7 +2781,7 @@ def _compute_covariate_residualization(
27502781
design,
27512782
dY,
27522783
return_vcov=True,
2753-
rank_deficient_action="warn",
2784+
rank_deficient_action=rank_deficient_action,
27542785
)
27552786

27562787
# Extract covariate coefficients (first n_covariates entries)
@@ -2837,6 +2868,7 @@ def _compute_heterogeneity_test(
28372868
X_het: np.ndarray,
28382869
L_max: int,
28392870
alpha: float = 0.05,
2871+
rank_deficient_action: str = "warn",
28402872
) -> Dict[int, Dict[str, Any]]:
28412873
"""Test for heterogeneous treatment effects (Web Appendix Section 1.5).
28422874
@@ -2938,7 +2970,7 @@ def _compute_heterogeneity_test(
29382970
coefs, _residuals, vcov = solve_ols(
29392971
design, dep_arr,
29402972
return_vcov=True,
2941-
rank_deficient_action="warn",
2973+
rank_deficient_action=rank_deficient_action,
29422974
)
29432975

29442976
beta_het = float(coefs[0])

tests/test_chaisemartin_dhaultfoeuille.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2550,6 +2550,58 @@ def test_trends_with_covariates(self):
25502550
assert r.covariate_residuals is not None
25512551
assert r.linear_trends_effects is not None
25522552

2553+
def test_trends_linear_lmax2_overall_surface(self):
2554+
"""Overall surface under trends_linear + L_max>=2 uses cumulated level effects."""
2555+
df = self._make_panel_with_trends()
2556+
r = ChaisemartinDHaultfoeuille(seed=1).fit(
2557+
df, "outcome", "group", "period", "treatment",
2558+
L_max=3, trends_linear=True,
2559+
)
2560+
# overall_att should equal the cumulated level effect at max horizon
2561+
assert r.linear_trends_effects is not None
2562+
max_h = max(r.linear_trends_effects.keys())
2563+
cum_effect = r.linear_trends_effects[max_h]["effect"]
2564+
assert r.overall_att == pytest.approx(cum_effect, abs=1e-10)
2565+
# cost_benefit_delta should be suppressed (not computed on second-diffs)
2566+
assert r.cost_benefit_delta is None
2567+
2568+
def test_cumulated_se_nan_propagation(self):
2569+
"""Cumulated SE is NaN when a component horizon has NaN SE."""
2570+
# Create a panel where horizon 2 has no eligible switchers (NaN SE)
2571+
# but horizon 1 does. The cumulated effect at h=2 should have NaN SE.
2572+
rng = np.random.RandomState(77)
2573+
rows = []
2574+
for g in range(30):
2575+
group_fe = rng.normal(0, 1)
2576+
# Groups 0-9: switch at period 3 (enough pre-switch for trends)
2577+
# Groups 10-19: never switch (controls)
2578+
# Groups 20-29: switch at period 4 (only 1 post-switch period)
2579+
if g < 10:
2580+
switch_t = 3
2581+
elif g < 20:
2582+
switch_t = 99
2583+
else:
2584+
switch_t = 4
2585+
for t in range(5):
2586+
d = 1 if t >= switch_t else 0
2587+
y = group_fe + t + 3 * d + rng.normal(0, 0.3)
2588+
rows.append({"group": g, "period": t, "treatment": d, "outcome": y})
2589+
df = pd.DataFrame(rows)
2590+
r = ChaisemartinDHaultfoeuille(seed=1).fit(
2591+
df, "outcome", "group", "period", "treatment",
2592+
L_max=2, trends_linear=True,
2593+
)
2594+
# If SE at horizon 1 is finite but horizon 2 is NaN,
2595+
# cumulated h=2 SE must be NaN (not 0.0)
2596+
if r.linear_trends_effects is not None and 2 in r.linear_trends_effects:
2597+
cum_se = r.linear_trends_effects[2]["se"]
2598+
es = r.event_study_effects
2599+
if es and 2 in es and not np.isfinite(es[2]["se"]):
2600+
assert not np.isfinite(cum_se), (
2601+
f"Cumulated SE should be NaN when component h=2 SE is NaN, "
2602+
f"got {cum_se}"
2603+
)
2604+
25532605

25542606
class TestStateSetTrends:
25552607
"""State-set-specific trends (ROADMAP item 3c)."""

0 commit comments

Comments
 (0)