Fix CI review Round 3: L_max=1 bootstrap sync, DID_1 label, docs alignment

igerber · claude · igerber · commit e59840d2eefe · 2026-04-12T19:39:44.000-04:00
P1: Sync overall_* from event_study_effects[1] AFTER bootstrap propagation
    so bootstrap SE/p/CI flow to top-level surface for L_max=1.
P1: Label overall estimand as "DID_1" (not "DID_M") when L_max=1 in
    __repr__. Update REGISTRY and results docstrings to document L_max=1
    as per-group DID_1 path.
P2: Add binary + non-binary L_max=1 bootstrap regressions asserting
    overall_* == event_study_effects[1].
P3: Update TODO.md, REGISTRY L_max &gt;= 2 references to L_max &gt;= 1.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/TODO.md b/TODO.md
@@ -56,7 +56,7 @@ Deferred items from PR reviews that were not addressed before merge.
 
 | Issue | Location | PR | Priority |
 |-------|----------|----|----------|
-| dCDH: Phase 1 per-period placebo DID_M^pl has NaN SE (no IF derivation for the per-period aggregation path). Multi-horizon placebos (L_max >= 2) have valid SE. | `chaisemartin_dhaultfoeuille.py` | #294 | Low |
+| dCDH: Phase 1 per-period placebo DID_M^pl has NaN SE (no IF derivation for the per-period aggregation path). Multi-horizon placebos (L_max >= 1) have valid SE. | `chaisemartin_dhaultfoeuille.py` | #294 | Low |
 | dCDH: Parity test SE/CI assertions only cover pure-direction scenarios; mixed-direction SE comparison is structurally apples-to-oranges (cell-count vs obs-count weighting). | `test_chaisemartin_dhaultfoeuille_parity.py` | #294 | Low |
 | CallawaySantAnna: consider materializing NaN entries for non-estimable (g,t) cells in group_time_effects dict (currently omitted with consolidated warning); would require updating downstream consumers (event study, balance_e, aggregation) | `staggered.py` | #256 | Low |
 | ImputationDiD dense `(A0'A0).toarray()` scales O((U+T+K)^2), OOM risk on large panels | `imputation.py` | #141 | Medium (deferred — only triggers when sparse solver fails) |
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -1649,6 +1649,21 @@ def fit(
                             eff + crit * se,
                         )
 
+        # When L_max >= 1 and the per-group path is active, sync
+        # overall_* from event_study_effects[1] AFTER bootstrap propagation
+        # so that bootstrap SE/p/CI flow to the top-level surface.
+        if (
+            L_max is not None
+            and L_max >= 1
+            and 1 in event_study_effects
+        ):
+            es1 = event_study_effects[1]
+            overall_att = es1["effect"]
+            overall_se = es1["se"]
+            overall_t = es1["t_stat"]
+            overall_p = es1["p_value"]
+            overall_ci = es1["conf_int"]
+
         # Phase 2: override overall_att with cost-benefit delta when L_max > 1
         effective_overall_att = overall_att
         effective_overall_se = overall_se
diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py
@@ -303,9 +303,11 @@ class ChaisemartinDHaultfoeuilleResults:
         Significance level used for confidence intervals.
     event_study_effects : dict, optional
         Populated with horizon ``1`` when ``L_max=None``, or horizons
-        ``1..L_max`` when ``L_max >= 2``.
+        ``1..L_max`` when ``L_max >= 1``. When ``L_max >= 1``, uses the
+        per-group ``DID_{g,l}`` path; when ``L_max=None``, uses the
+        per-period ``DID_M`` path.
     normalized_effects : dict, optional
-        Normalized estimator ``DID^n_l``. Populated when ``L_max >= 2``.
+        Normalized estimator ``DID^n_l``. Populated when ``L_max >= 1``.
     cost_benefit_delta : dict, optional
         Cost-benefit aggregate ``delta``. Populated when ``L_max >= 2``.
     sup_t_bands : dict, optional
@@ -410,7 +412,12 @@ class ChaisemartinDHaultfoeuilleResults:
     def __repr__(self) -> str:
         """Concise string representation."""
         sig = _get_significance_stars(self.overall_p_value)
-        label = "delta" if self.L_max is not None and self.L_max >= 2 else "DID_M"
+        if self.L_max is not None and self.L_max >= 2:
+            label = "delta"
+        elif self.L_max is not None and self.L_max == 1:
+            label = "DID_1"
+        else:
+            label = "DID_M"
         return (
             f"ChaisemartinDHaultfoeuilleResults("
             f"{label}={self.overall_att:.4f}{sig}, "
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -517,7 +517,7 @@ DID_M^pl = (1/N_S^pl) * sum_{t>=3} (
 
 *Phase 2: Multi-horizon event study (Equation 3 and 5 of the dynamic companion paper):*
 
-When `L_max >= 2`, the estimator computes the per-group building block `DID_{g,l}` and the aggregate `DID_l` for each horizon:
+When `L_max >= 1`, the estimator computes the per-group building block `DID_{g,l}` and the aggregate `DID_l` for each horizon. When `L_max=1`, `overall_att` holds `DID_1` (the per-group estimand, not the per-period `DID_M`). When `L_max >= 2`, `overall_att` holds the cost-benefit delta. When `L_max=None`, the per-period `DID_M` path is used:
 
 ```
 DID_{g,l} = Y_{g, F_g-1+l} - Y_{g, F_g-1}
diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -1820,6 +1820,24 @@ def test_L_max_none_preserves_phase1_behavior(self, data):
         assert r.sup_t_bands is None
         assert r.placebo_event_study is None
 
+    def test_L_max_1_bootstrap_overall_matches_es1(self, data, ci_params):
+        """With L_max=1 + bootstrap, overall_* must match event_study_effects[1]."""
+        n_boot = ci_params.bootstrap(99)
+        est = ChaisemartinDHaultfoeuille(
+            placebo=False, twfe_diagnostic=False, n_bootstrap=n_boot, seed=42
+        )
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = est.fit(
+                data, outcome="outcome", group="group", time="period",
+                treatment="treatment", L_max=1,
+            )
+        es1 = r.event_study_effects[1]
+        assert r.overall_att == es1["effect"]
+        assert r.overall_se == es1["se"]
+        assert r.overall_p_value == es1["p_value"]
+        assert r.overall_conf_int == es1["conf_int"]
+
     def test_L_max_1_uses_per_group_path(self, data):
         """L_max=1 uses the per-group DID_{g,1} path (same as L_max >= 2
         uses for l=1). This is a different estimand from the per-period
@@ -2370,7 +2388,8 @@ def test_mixed_binary_nonbinary_panel_lmax1(self):
         assert r.overall_att == r.event_study_effects[1]["effect"]
 
     def test_nonbinary_bootstrap(self, ci_params):
-        """Non-binary panel with bootstrap should produce finite event study SEs."""
+        """Non-binary panel with bootstrap: finite event study SEs AND
+        top-level overall_* matches event_study_effects[1]."""
         np.random.seed(66)
         n_boot = ci_params.bootstrap(99)
         rows = []
@@ -2397,6 +2416,11 @@ def test_nonbinary_bootstrap(self, ci_params):
         assert r.bootstrap_results.event_study_ses is not None
         assert 1 in r.bootstrap_results.event_study_ses
         assert np.isfinite(r.bootstrap_results.event_study_ses[1])
+        # Top-level overall_* must match event_study_effects[1]
+        es1 = r.event_study_effects[1]
+        assert r.overall_att == es1["effect"]
+        assert r.overall_se == es1["se"]
+        assert r.overall_p_value == es1["p_value"]
 
     def test_twfe_diagnostic_skipped_nonbinary(self):
         """TWFE diagnostic should be skipped (with warning) for non-binary."""