Skip to content

Commit e0286d2

Browse files
igerberclaude
andcommitted
Add Proposition 5 detection for unidentified long-run horizons
Distinguish Prop 5 horizons (treated obs exist, counterfactual unidentified) from zero-observation horizons. Prop 5 horizons get n_obs > 0 with NaN inference and a warning, matching ImputationDiD behavior. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8187868 commit e0286d2

3 files changed

Lines changed: 80 additions & 20 deletions

File tree

diff_diff/two_stage.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,8 +1206,39 @@ def _stage2_event_study(
12061206
else:
12071207
balance_mask = np.ones(n, dtype=bool)
12081208

1209-
# Remove reference period from estimation horizons
1210-
est_horizons = [h for h in all_horizons if h != ref_period]
1209+
# Check Proposition 5: no never-treated units
1210+
has_never_treated = df["_never_treated"].any()
1211+
h_bar = np.inf
1212+
if not has_never_treated and len(treatment_groups) > 1:
1213+
h_bar = max(treatment_groups) - min(treatment_groups)
1214+
1215+
# Identify Prop 5 horizons and compute their actual treated obs counts.
1216+
# Treated obs have NaN y_tilde at these horizons (counterfactual
1217+
# unidentified), but actual_n counts them to distinguish from truly
1218+
# empty horizons. rel_times is NaN for untreated/never-treated obs
1219+
# (line ~653), so (rel_times == h) is False for them.
1220+
prop5_horizons = []
1221+
prop5_effects: Dict[int, Dict[str, Any]] = {}
1222+
if h_bar < np.inf:
1223+
for h in all_horizons:
1224+
if h == ref_period:
1225+
continue
1226+
if h >= h_bar:
1227+
actual_n = int(np.sum((rel_times == h) & omega_1_mask.values & balance_mask))
1228+
if actual_n > 0:
1229+
prop5_horizons.append(h)
1230+
prop5_effects[h] = {
1231+
"effect": np.nan,
1232+
"se": np.nan,
1233+
"t_stat": np.nan,
1234+
"p_value": np.nan,
1235+
"conf_int": (np.nan, np.nan),
1236+
"n_obs": actual_n,
1237+
}
1238+
1239+
# Remove reference period AND Prop 5 horizons from estimation
1240+
prop5_set = set(prop5_horizons)
1241+
est_horizons = [h for h in all_horizons if h != ref_period and h not in prop5_set]
12111242

12121243
if len(est_horizons) == 0:
12131244
# No horizons to estimate — return just reference period
@@ -1308,6 +1339,17 @@ def _stage2_event_study(
13081339
"n_obs": n_obs,
13091340
}
13101341

1342+
# Add Proposition 5 entries (unidentified horizons with n_obs > 0)
1343+
event_study_effects.update(prop5_effects)
1344+
1345+
if prop5_horizons:
1346+
warnings.warn(
1347+
f"Horizons {prop5_horizons} are not identified without "
1348+
f"never-treated units (Proposition 5). Set to NaN.",
1349+
UserWarning,
1350+
stacklevel=2,
1351+
)
1352+
13111353
return event_study_effects
13121354

13131355
def _stage2_group(
@@ -1869,6 +1911,15 @@ def _run_bootstrap(
18691911
balance_mask = np.ones(n, dtype=bool)
18701912

18711913
est_horizons = [h for h in all_horizons if h != ref_period]
1914+
1915+
# Filter out Prop 5 horizons (same logic as _stage2_event_study)
1916+
has_never_treated = df["_never_treated"].any()
1917+
h_bar_boot = np.inf
1918+
if not has_never_treated and len(treatment_groups) > 1:
1919+
h_bar_boot = max(treatment_groups) - min(treatment_groups)
1920+
if h_bar_boot < np.inf:
1921+
est_horizons = [h for h in est_horizons if h < h_bar_boot]
1922+
18721923
if est_horizons:
18731924
horizon_to_col = {h: j for j, h in enumerate(est_horizons)}
18741925
k_es = len(est_horizons)
@@ -1911,6 +1962,8 @@ def _run_bootstrap(
19111962
for h in original_event_study:
19121963
if original_event_study[h].get("n_obs", 0) == 0:
19131964
continue
1965+
if np.isnan(original_event_study[h]["effect"]):
1966+
continue # Skip Prop 5 and other NaN-effect horizons
19141967
if h not in horizon_to_col:
19151968
continue
19161969
j = horizon_to_col[h]

docs/methodology/REGISTRY.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -619,8 +619,8 @@ Our implementation uses multiplier bootstrap on the GMM influence function: clus
619619
- **NaN inference for undefined statistics:** t_stat uses NaN when SE is non-finite or zero; p_value and CI also NaN. Matches CallawaySantAnna/ImputationDiD NaN convention.
620620
- **Event study aggregation:** Horizon-specific effects use the same two-stage procedure with horizon indicator dummies in Stage 2. Unidentified horizons (e.g., long-run effects without never-treated units, per Proposition 5 of Borusyak et al. 2024) produce NaN.
621621
- **balance_e with no qualifying cohorts:** If no cohorts have sufficient pre/post coverage for the requested `balance_e`, a warning is emitted and event study results contain only the reference period.
622-
- **No never-treated units:** Long-run effects may be unidentified (same limitation as ImputationDiD). Warning emitted for affected horizons.
623-
- **Zero-observation horizons after filtering:** When `balance_e` or NaN `y_tilde` filtering results in zero observations for some event study horizons, those horizons produce NaN for all inference fields (effect, SE, t-stat, p-value, CI) with n_obs=0. This differs from the Proposition 5 case (unidentified long-run effects) which has observations but unidentified counterfactual.
622+
- **No never-treated units (Proposition 5):** When there are no never-treated units and multiple treatment cohorts, horizons h >= h_bar (where h_bar = max(groups) - min(groups)) are unidentified per Proposition 5 of Borusyak et al. (2024). These produce NaN inference with n_obs > 0 (treated observations exist but counterfactual is unidentified) and a warning listing affected horizons. Matches ImputationDiD behavior. Proposition 5 applies to event study horizons only, not cohort aggregation — a cohort whose treated obs all fall at Prop 5 horizons naturally gets n_obs=0 in group effects because all its y_tilde values are NaN.
623+
- **Zero-observation horizons after filtering:** When `balance_e` or NaN `y_tilde` filtering results in zero observations for some non-Prop-5 event study horizons, those horizons produce NaN for all inference fields (effect, SE, t-stat, p-value, CI) with n_obs=0.
624624
- **Zero-observation cohorts in group effects:** If all treated observations for a cohort have NaN `y_tilde` (excluded from estimation), that cohort's group effect is NaN with n_obs=0.
625625

626626
**Reference implementation(s):**

tests/test_two_stage.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -794,35 +794,42 @@ def test_balance_e_empty_cohorts_warns(self):
794794
ref_key = list(results.event_study_effects.keys())[0]
795795
assert results.event_study_effects[ref_key]["n_obs"] == 0
796796

797-
def test_event_study_nan_for_zero_obs_horizons(self):
798-
"""Zero-observation horizons from NaN y_tilde produce NaN inference."""
797+
def test_proposition_5_nan_for_long_run_horizons(self):
798+
"""Prop 5 horizons have n_obs > 0 but NaN inference (unidentified)."""
799799
# No never-treated: cohorts 3, 5, 7; periods 0-9.
800-
# Periods 7-9 have zero untreated obs → NaN y_tilde.
801-
# Horizon 4 = cohort 3 at period 7 (NaN) + cohort 5 at period 9 (NaN) → 0 obs.
802-
# Horizons 5, 6 = cohort 3 at periods 8, 9 (NaN) → 0 obs.
803-
# Horizons 0-3 have valid observations from multiple cohorts.
800+
# h_bar = max(groups) - min(groups) = 7 - 3 = 4.
801+
# Horizons 0-3: identified, valid effects.
802+
# Horizons 4, 5, 6: Prop 5 unidentified — treated obs exist but
803+
# counterfactual is unidentified without never-treated units.
804804
data = generate_test_data(never_treated_frac=0.0)
805-
results = TwoStageDiD().fit(
806-
data,
807-
outcome="outcome",
808-
unit="unit",
809-
time="time",
810-
first_treat="first_treat",
811-
aggregate="event_study",
812-
)
805+
806+
with warnings.catch_warnings(record=True) as w:
807+
warnings.simplefilter("always")
808+
results = TwoStageDiD().fit(
809+
data,
810+
outcome="outcome",
811+
unit="unit",
812+
time="time",
813+
first_treat="first_treat",
814+
aggregate="event_study",
815+
)
813816

814817
assert results.event_study_effects is not None
815818

819+
# Check Prop 5 warning was emitted
820+
prop5_warnings = [x for x in w if "not identified without never-treated" in str(x.message)]
821+
assert len(prop5_warnings) > 0, "Proposition 5 warning should be emitted"
822+
816823
# Horizons 0-3 should have observations and finite effects
817824
for h in range(0, 4):
818825
eff = results.event_study_effects[h]
819826
assert eff["n_obs"] > 0, f"Horizon {h} should have observations"
820827
assert np.isfinite(eff["effect"]), f"Horizon {h} effect should be finite"
821828

822-
# Horizons 4, 5, 6 should have zero obs and NaN inference
829+
# Horizons 4, 5, 6: Prop 5 — n_obs > 0 but NaN inference
823830
for h in [4, 5, 6]:
824831
eff = results.event_study_effects[h]
825-
assert eff["n_obs"] == 0, f"Horizon {h} should have 0 observations"
832+
assert eff["n_obs"] > 0, f"Horizon {h} should have n_obs > 0 (Prop 5)"
826833
assert np.isnan(eff["effect"]), f"Horizon {h} effect should be NaN"
827834
assert np.isnan(eff["se"]), f"Horizon {h} SE should be NaN"
828835
assert np.isnan(eff["t_stat"]), f"Horizon {h} t_stat should be NaN"

0 commit comments

Comments
 (0)