Skip to content

Commit df6db2c

Browse files
igerberclaude
andcommitted
Fix CI review Round 1: non-binary guard, L_max>=1 gate, placebo SE docs
P0: Add explicit ValueError for non-binary treatment + L_max=None. P0: Change multi-horizon gate from L_max>=2 to L_max>=1 so per-group DID_{g,1} path activates at L_max=1 (handles non-binary correctly). Populate overall_att from per-group l=1 when per-period path yields NaN. P1: Update REGISTRY Notes to document placebo SE as library extension (paper's Theorem 1 is for DID_l, placebo IF applies same structure to backward outcome differences). Update results docstring. P2: Fix test_non_binary_treatment_accepted to assert ValueError, add test_non_binary_treatment_with_lmax regression. Update test_L_max_1 to test per-group path behavior (not cross-path equality, since per-group and per-period are documented different estimands). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent fb9367b commit df6db2c

4 files changed

Lines changed: 76 additions & 29 deletions

File tree

diff_diff/chaisemartin_dhaultfoeuille.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,13 @@ def fit(
915915
# via per-group DID_{g,l}) will compute the effects. Only raise if
916916
# L_max is also None (i.e., no fallback path).
917917
is_binary = set(np.unique(D_mat[~np.isnan(D_mat)])).issubset({0.0, 1.0})
918+
if not is_binary and L_max is None:
919+
raise ValueError(
920+
"Non-binary treatment requires L_max >= 1. The per-period DID "
921+
"path uses binary joiner/leaver categorization; set L_max to "
922+
"use the per-group DID_{g,l} building block which handles "
923+
"non-binary treatment."
924+
)
918925
if N_S == 0 and (L_max is None or is_binary):
919926
raise ValueError(
920927
"No switching cells found in the data after filtering: every "
@@ -1037,7 +1044,7 @@ def fit(
10371044
multi_horizon_se: Optional[Dict[int, float]] = None
10381045
multi_horizon_inference: Optional[Dict[int, Dict[str, Any]]] = None
10391046

1040-
if L_max is not None and L_max >= 2:
1047+
if L_max is not None and L_max >= 1:
10411048
multi_horizon_dids = _compute_multi_horizon_dids(
10421049
D_mat=D_mat,
10431050
Y_mat=Y_mat,
@@ -1161,7 +1168,7 @@ def fit(
11611168
normalized_effects_dict: Optional[Dict[int, Dict[str, Any]]] = None
11621169
cost_benefit_result: Optional[Dict[str, Any]] = None
11631170

1164-
if L_max is not None and L_max >= 2 and multi_horizon_dids is not None:
1171+
if L_max is not None and L_max >= 1 and multi_horizon_dids is not None:
11651172
# Dynamic placebos DID^{pl}_l
11661173
if self.placebo:
11671174
multi_horizon_placebos = _compute_multi_horizon_placebos(
@@ -1368,7 +1375,7 @@ def fit(
13681375

13691376
# Phase 1 per-period placebo (L_max=None): SE is NaN because the
13701377
# per-period DID_M^pl aggregation path does not have an IF
1371-
# derivation. Multi-horizon placebos (L_max >= 2) use the per-group
1378+
# derivation. Multi-horizon placebos (L_max >= 1) use the per-group
13721379
# placebo IF computed above and have valid SE.
13731380
placebo_se = float("nan")
13741381
placebo_t = float("nan")
@@ -1378,7 +1385,7 @@ def fit(
13781385
warnings.warn(
13791386
"Single-period placebo SE (L_max=None) is NaN. The "
13801387
"per-period DID_M^pl aggregation path does not have an "
1381-
"influence-function derivation. Use L_max >= 2 for "
1388+
"influence-function derivation. Use L_max >= 1 for "
13821389
"multi-horizon placebos with valid SE. The placebo "
13831390
"point estimate (results.placebo_effect) is still "
13841391
"meaningful.",
@@ -1416,7 +1423,7 @@ def fit(
14161423
placebo_horizon_if is not None
14171424
and multi_horizon_placebos is not None
14181425
and L_max is not None
1419-
and L_max >= 2
1426+
and L_max >= 1
14201427
):
14211428
singleton_baseline_set_pl_b = set(singleton_baseline_groups)
14221429
eligible_mask_pl_b = np.array(
@@ -1464,7 +1471,7 @@ def fit(
14641471
and multi_horizon_dids is not None
14651472
and multi_horizon_se is not None
14661473
and L_max is not None
1467-
and L_max >= 2
1474+
and L_max >= 1
14681475
):
14691476
singleton_baseline_set_b = set(singleton_baseline_groups)
14701477
eligible_mask_b = np.array(
@@ -1565,10 +1572,19 @@ def fit(
15651572
# Step 20: Build the results dataclass
15661573
# ------------------------------------------------------------------
15671574
# event_study_effects: when L_max is None, l=1 mirrors Phase 1
1568-
# DID_M (per-period path). When L_max >= 2, ALL horizons including
1575+
# DID_M (per-period path). When L_max >= 1, ALL horizons including
15691576
# l=1 use the per-group DID_{g,l} path for a consistent estimand.
15701577
if multi_horizon_inference is not None and 1 in multi_horizon_inference:
1571-
# Phase 2 mode: use per-group path for all horizons
1578+
# Per-group mode: use per-group path for all horizons.
1579+
# Also populate overall_att from l=1 when per-period path
1580+
# yielded NaN (non-binary treatment or no binary switchers).
1581+
if np.isnan(overall_att):
1582+
l1_inf = multi_horizon_inference[1]
1583+
overall_att = l1_inf["effect"]
1584+
overall_se = l1_inf["se"]
1585+
overall_t = l1_inf["t_stat"]
1586+
overall_p = l1_inf["p_value"]
1587+
overall_ci = l1_inf["conf_int"]
15721588
event_study_effects: Dict[int, Dict[str, Any]] = dict(multi_horizon_inference)
15731589
else:
15741590
# Phase 1 mode (L_max=None): l=1 from per-period path

diff_diff/chaisemartin_dhaultfoeuille_results.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@ class DCDHBootstrapResults:
5353
analytical variance for ``DID_l`` only, not for the per-period
5454
``DID_M^pl``. The ``placebo_se`` / ``placebo_ci`` / ``placebo_p_value``
5555
fields below remain ``None`` for Phase 1. Multi-horizon placebos
56-
(``L_max >= 2``) have valid SE via ``placebo_horizon_ses``.
56+
(``L_max >= 1``) have valid SE via ``placebo_horizon_ses`` - this is
57+
a library extension applying the same IF/variance structure to the
58+
placebo estimand (see REGISTRY.md dynamic placebo SE Note).
5759
5860
Attributes
5961
----------

docs/methodology/REGISTRY.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ Dynamic placebos `DID^{pl}_l` look backward from each group's reference period,
543543

544544
- **Note (Phase 2 cost-benefit delta SE):** When `L_max >= 2`, `overall_att` holds the cost-benefit `delta`. Its SE is computed via the delta method from per-horizon SEs: `SE(delta) = sqrt(sum w_l^2 * SE(DID_l)^2)`, treating horizons as independent (conservative under Assumption 8). When bootstrap is enabled, per-horizon bootstrap SEs flow through the delta-method formula, so `overall_se` reflects bootstrap-derived per-horizon uncertainty but the delta aggregation itself uses normal-theory (not bootstrap percentile). This is an intentional exception to the general bootstrap-inference-surface contract: `overall_p_value` and `overall_conf_int` for `delta` use `safe_inference(delta, delta_se)`, not percentile bootstrap, because the delta is a derived aggregate rather than a directly bootstrapped estimand.
545545

546-
- **Note (Phase 2 dynamic placebo SE):** Dynamic placebos `DID^{pl}_l` (negative horizons in `placebo_event_study`) ship as point estimates with `NaN` inference in Phase 2. The placebo influence-function derivation follows the same cohort-recentered structure as the positive horizons but requires a separate IF computation for the backward outcome differences, which is deferred. The placebo point estimates are meaningful for visual pre-trends inspection; formal placebo inference will be added in a follow-up. Bootstrap placebo inference plumbing exists in the mixin but is not wired.
546+
- **Note (dynamic placebo SE - library extension):** Dynamic placebos `DID^{pl}_l` (negative horizons in `placebo_event_study`) now have analytical SE and bootstrap SE when `L_max >= 1`. The placebo IF uses the same cohort-recentered structure as positive horizons, applied to backward outcome differences `Y_{g, F_g-1-l} - Y_{g, F_g-1}` with the dual-eligibility control pool (forward + backward observation required). The paper's Theorem 1 variance result is stated for `DID_l`, not `DID^{pl}_l` - this extension applies the same IF/variance structure to the placebo estimand as a library enhancement. The single-period placebo `DID_M^pl` (`L_max=None`) retains NaN SE because the per-period aggregation path has no IF derivation.
547547

548548
*Standard errors (Web Appendix Section 3.7.3 of the dynamic companion paper):*
549549

@@ -583,7 +583,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
583583

584584
- **Note:** The analytical CI is **conservative** under Assumption 8 (independent groups) of the dynamic companion paper, and exact only under iid sampling. This is documented as a deliberate deviation from "default nominal coverage". The bootstrap CI uses the same conservative weighting and is provided for users who want a non-asymptotic alternative.
585585

586-
- **Note:** Placebo SE is intentionally `NaN` for both the single-lag `DID_M^pl` and the dynamic placebos `DID^{pl}_l`. The placebo influence-function derivation is deferred (see the Phase 2 dynamic placebo SE Note above). Placebo point estimates are meaningful for visual pre-trends inspection; inference fields stay NaN-consistent even when `n_bootstrap > 0`.
586+
- **Note:** Placebo SE is `NaN` for the single-period `DID_M^pl` (`L_max=None`). Multi-horizon placebos (`L_max >= 1`) have valid analytical SE and bootstrap SE via the placebo IF (see the dynamic placebo SE Note above).
587587

588588
- **Note:** When every variance-eligible group forms its own `(D_{g,1}, F_g, S_g)` cohort (a degenerate small-panel case where the cohort framework has zero degrees of freedom), the cohort-recentered plug-in formula is unidentified: cohort recentering subtracts the cohort mean from each group's `U^G_g`, and for singleton cohorts the centered value is exactly zero, so the centered influence function vector collapses to all zeros. The estimator returns `overall_se = NaN` with a `UserWarning` rather than silently collapsing to `0.0` (which would falsely imply infinite precision). The `DID_M` point estimate remains well-defined. The bootstrap path inherits the same degeneracy on these panels — the multiplier weights act on an all-zero vector, so the bootstrap distribution is also degenerate. **Deviation from R `DIDmultiplegtDYN`:** R returns a non-zero SE on the canonical 4-group worked example via small-sample sandwich machinery that Python does not implement. Both responses are valid for a degenerate case; Python's `NaN`+warning is the safer default. To get a non-degenerate SE, include more groups so cohorts have peers (real-world panels typically have `G >> K`).
589589

tests/test_chaisemartin_dhaultfoeuille.py

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,8 @@ def test_missing_column_raises_value_error(self):
130130
treatment="treatment",
131131
)
132132

133-
def test_non_binary_treatment_accepted(self):
134-
"""Non-binary treatment is now supported."""
133+
def test_non_binary_treatment_requires_lmax(self):
134+
"""Non-binary treatment without L_max raises ValueError."""
135135
df = pd.DataFrame(
136136
{
137137
"group": [1, 1, 2, 2],
@@ -141,6 +141,30 @@ def test_non_binary_treatment_accepted(self):
141141
}
142142
)
143143
est = ChaisemartinDHaultfoeuille()
144+
with pytest.raises(ValueError, match="Non-binary treatment requires L_max"):
145+
est.fit(
146+
df,
147+
outcome="outcome",
148+
group="group",
149+
time="period",
150+
treatment="treatment",
151+
)
152+
153+
def test_non_binary_treatment_with_lmax(self):
154+
"""Non-binary treatment works with L_max=1."""
155+
np.random.seed(77)
156+
rows = []
157+
for g in range(20):
158+
for t in range(6):
159+
d = 0 if t < 3 else 2 # non-binary jump
160+
y = 10 + t + d * 1.5 + np.random.randn() * 0.3
161+
rows.append({"group": g, "period": t, "treatment": d, "outcome": y})
162+
for g in range(20, 40):
163+
for t in range(6):
164+
y = 10 + t + np.random.randn() * 0.3
165+
rows.append({"group": g, "period": t, "treatment": 0, "outcome": y})
166+
df = pd.DataFrame(rows)
167+
est = ChaisemartinDHaultfoeuille(twfe_diagnostic=False)
144168
with warnings.catch_warnings():
145169
warnings.simplefilter("ignore")
146170
results = est.fit(
@@ -149,6 +173,7 @@ def test_non_binary_treatment_accepted(self):
149173
group="group",
150174
time="period",
151175
treatment="treatment",
176+
L_max=1,
152177
)
153178
assert np.isfinite(results.overall_att)
154179

@@ -1795,23 +1820,27 @@ def test_L_max_none_preserves_phase1_behavior(self, data):
17951820
assert r.sup_t_bands is None
17961821
assert r.placebo_event_study is None
17971822

1798-
def test_L_max_1_equivalent_to_none(self, data):
1799-
"""L_max=1 produces same DID_1 as L_max=None."""
1823+
def test_L_max_1_uses_per_group_path(self, data):
1824+
"""L_max=1 uses the per-group DID_{g,1} path (same as L_max >= 2
1825+
uses for l=1). This is a different estimand from the per-period
1826+
DID_M path used by L_max=None - documented as a REGISTRY Note."""
18001827
est = ChaisemartinDHaultfoeuille(placebo=False, twfe_diagnostic=False)
1801-
r_none = est.fit(
1802-
data, outcome="outcome", group="group", time="period", treatment="treatment"
1803-
)
1804-
r_one = est.fit(
1805-
data,
1806-
outcome="outcome",
1807-
group="group",
1808-
time="period",
1809-
treatment="treatment",
1810-
L_max=1,
1811-
)
1812-
assert r_one.event_study_effects[1]["effect"] == pytest.approx(
1813-
r_none.event_study_effects[1]["effect"]
1814-
)
1828+
with warnings.catch_warnings():
1829+
warnings.simplefilter("ignore")
1830+
r_one = est.fit(
1831+
data,
1832+
outcome="outcome",
1833+
group="group",
1834+
time="period",
1835+
treatment="treatment",
1836+
L_max=1,
1837+
)
1838+
# Per-group path produces finite estimate and SE
1839+
assert np.isfinite(r_one.event_study_effects[1]["effect"])
1840+
assert np.isfinite(r_one.event_study_effects[1]["se"])
1841+
assert np.isfinite(r_one.overall_att)
1842+
# L_max=1 should have exactly 1 horizon
1843+
assert set(r_one.event_study_effects.keys()) == {1}
18151844

18161845
def test_L_max_populates_event_study_effects(self, data):
18171846
"""L_max=3 populates horizons {1, 2, 3} in event_study_effects."""

0 commit comments

Comments
 (0)