Fix CI review Round 1: non-binary guard, L_max>=1 gate, placebo SE docs

igerber · claude · igerber · commit df6db2c415fc · 2026-04-12T19:02:03.000-04:00
P0: Add explicit ValueError for non-binary treatment + L_max=None.
P0: Change multi-horizon gate from L_max&gt;=2 to L_max&gt;=1 so per-group
    DID_{g,1} path activates at L_max=1 (handles non-binary correctly).
    Populate overall_att from per-group l=1 when per-period path yields NaN.
P1: Update REGISTRY Notes to document placebo SE as library extension
    (paper's Theorem 1 is for DID_l, placebo IF applies same structure
    to backward outcome differences). Update results docstring.
P2: Fix test_non_binary_treatment_accepted to assert ValueError,
    add test_non_binary_treatment_with_lmax regression.
    Update test_L_max_1 to test per-group path behavior (not cross-path
    equality, since per-group and per-period are documented different
    estimands).

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -915,6 +915,13 @@ def fit(
         # via per-group DID_{g,l}) will compute the effects. Only raise if
         # L_max is also None (i.e., no fallback path).
         is_binary = set(np.unique(D_mat[~np.isnan(D_mat)])).issubset({0.0, 1.0})
+        if not is_binary and L_max is None:
+            raise ValueError(
+                "Non-binary treatment requires L_max >= 1. The per-period DID "
+                "path uses binary joiner/leaver categorization; set L_max to "
+                "use the per-group DID_{g,l} building block which handles "
+                "non-binary treatment."
+            )
         if N_S == 0 and (L_max is None or is_binary):
             raise ValueError(
                 "No switching cells found in the data after filtering: every "
@@ -1037,7 +1044,7 @@ def fit(
         multi_horizon_se: Optional[Dict[int, float]] = None
         multi_horizon_inference: Optional[Dict[int, Dict[str, Any]]] = None
 
-        if L_max is not None and L_max >= 2:
+        if L_max is not None and L_max >= 1:
             multi_horizon_dids = _compute_multi_horizon_dids(
                 D_mat=D_mat,
                 Y_mat=Y_mat,
@@ -1161,7 +1168,7 @@ def fit(
         normalized_effects_dict: Optional[Dict[int, Dict[str, Any]]] = None
         cost_benefit_result: Optional[Dict[str, Any]] = None
 
-        if L_max is not None and L_max >= 2 and multi_horizon_dids is not None:
+        if L_max is not None and L_max >= 1 and multi_horizon_dids is not None:
             # Dynamic placebos DID^{pl}_l
             if self.placebo:
                 multi_horizon_placebos = _compute_multi_horizon_placebos(
@@ -1368,7 +1375,7 @@ def fit(
 
         # Phase 1 per-period placebo (L_max=None): SE is NaN because the
         # per-period DID_M^pl aggregation path does not have an IF
-        # derivation. Multi-horizon placebos (L_max >= 2) use the per-group
+        # derivation. Multi-horizon placebos (L_max >= 1) use the per-group
         # placebo IF computed above and have valid SE.
         placebo_se = float("nan")
         placebo_t = float("nan")
@@ -1378,7 +1385,7 @@ def fit(
             warnings.warn(
                 "Single-period placebo SE (L_max=None) is NaN. The "
                 "per-period DID_M^pl aggregation path does not have an "
-                "influence-function derivation. Use L_max >= 2 for "
+                "influence-function derivation. Use L_max >= 1 for "
                 "multi-horizon placebos with valid SE. The placebo "
                 "point estimate (results.placebo_effect) is still "
                 "meaningful.",
@@ -1416,7 +1423,7 @@ def fit(
                 placebo_horizon_if is not None
                 and multi_horizon_placebos is not None
                 and L_max is not None
-                and L_max >= 2
+                and L_max >= 1
             ):
                 singleton_baseline_set_pl_b = set(singleton_baseline_groups)
                 eligible_mask_pl_b = np.array(
@@ -1464,7 +1471,7 @@ def fit(
                 and multi_horizon_dids is not None
                 and multi_horizon_se is not None
                 and L_max is not None
-                and L_max >= 2
+                and L_max >= 1
             ):
                 singleton_baseline_set_b = set(singleton_baseline_groups)
                 eligible_mask_b = np.array(
@@ -1565,10 +1572,19 @@ def fit(
         # Step 20: Build the results dataclass
         # ------------------------------------------------------------------
         # event_study_effects: when L_max is None, l=1 mirrors Phase 1
-        # DID_M (per-period path). When L_max >= 2, ALL horizons including
+        # DID_M (per-period path). When L_max >= 1, ALL horizons including
         # l=1 use the per-group DID_{g,l} path for a consistent estimand.
         if multi_horizon_inference is not None and 1 in multi_horizon_inference:
-            # Phase 2 mode: use per-group path for all horizons
+            # Per-group mode: use per-group path for all horizons.
+            # Also populate overall_att from l=1 when per-period path
+            # yielded NaN (non-binary treatment or no binary switchers).
+            if np.isnan(overall_att):
+                l1_inf = multi_horizon_inference[1]
+                overall_att = l1_inf["effect"]
+                overall_se = l1_inf["se"]
+                overall_t = l1_inf["t_stat"]
+                overall_p = l1_inf["p_value"]
+                overall_ci = l1_inf["conf_int"]
             event_study_effects: Dict[int, Dict[str, Any]] = dict(multi_horizon_inference)
         else:
             # Phase 1 mode (L_max=None): l=1 from per-period path
diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py
@@ -53,7 +53,9 @@ class DCDHBootstrapResults:
     analytical variance for ``DID_l`` only, not for the per-period
     ``DID_M^pl``. The ``placebo_se`` / ``placebo_ci`` / ``placebo_p_value``
     fields below remain ``None`` for Phase 1. Multi-horizon placebos
-    (``L_max >= 2``) have valid SE via ``placebo_horizon_ses``.
+    (``L_max >= 1``) have valid SE via ``placebo_horizon_ses`` - this is
+    a library extension applying the same IF/variance structure to the
+    placebo estimand (see REGISTRY.md dynamic placebo SE Note).
 
     Attributes
     ----------
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -543,7 +543,7 @@ Dynamic placebos `DID^{pl}_l` look backward from each group's reference period,
 
 - **Note (Phase 2 cost-benefit delta SE):** When `L_max >= 2`, `overall_att` holds the cost-benefit `delta`. Its SE is computed via the delta method from per-horizon SEs: `SE(delta) = sqrt(sum w_l^2 * SE(DID_l)^2)`, treating horizons as independent (conservative under Assumption 8). When bootstrap is enabled, per-horizon bootstrap SEs flow through the delta-method formula, so `overall_se` reflects bootstrap-derived per-horizon uncertainty but the delta aggregation itself uses normal-theory (not bootstrap percentile). This is an intentional exception to the general bootstrap-inference-surface contract: `overall_p_value` and `overall_conf_int` for `delta` use `safe_inference(delta, delta_se)`, not percentile bootstrap, because the delta is a derived aggregate rather than a directly bootstrapped estimand.
 
-- **Note (Phase 2 dynamic placebo SE):** Dynamic placebos `DID^{pl}_l` (negative horizons in `placebo_event_study`) ship as point estimates with `NaN` inference in Phase 2. The placebo influence-function derivation follows the same cohort-recentered structure as the positive horizons but requires a separate IF computation for the backward outcome differences, which is deferred. The placebo point estimates are meaningful for visual pre-trends inspection; formal placebo inference will be added in a follow-up. Bootstrap placebo inference plumbing exists in the mixin but is not wired.
+- **Note (dynamic placebo SE - library extension):** Dynamic placebos `DID^{pl}_l` (negative horizons in `placebo_event_study`) now have analytical SE and bootstrap SE when `L_max >= 1`. The placebo IF uses the same cohort-recentered structure as positive horizons, applied to backward outcome differences `Y_{g, F_g-1-l} - Y_{g, F_g-1}` with the dual-eligibility control pool (forward + backward observation required). The paper's Theorem 1 variance result is stated for `DID_l`, not `DID^{pl}_l` - this extension applies the same IF/variance structure to the placebo estimand as a library enhancement. The single-period placebo `DID_M^pl` (`L_max=None`) retains NaN SE because the per-period aggregation path has no IF derivation.
 
 *Standard errors (Web Appendix Section 3.7.3 of the dynamic companion paper):*
 
@@ -583,7 +583,7 @@ Alternative: Multiplier bootstrap clustered at group via the `n_bootstrap` param
 
 - **Note:** The analytical CI is **conservative** under Assumption 8 (independent groups) of the dynamic companion paper, and exact only under iid sampling. This is documented as a deliberate deviation from "default nominal coverage". The bootstrap CI uses the same conservative weighting and is provided for users who want a non-asymptotic alternative.
 
-- **Note:** Placebo SE is intentionally `NaN` for both the single-lag `DID_M^pl` and the dynamic placebos `DID^{pl}_l`. The placebo influence-function derivation is deferred (see the Phase 2 dynamic placebo SE Note above). Placebo point estimates are meaningful for visual pre-trends inspection; inference fields stay NaN-consistent even when `n_bootstrap > 0`.
+- **Note:** Placebo SE is `NaN` for the single-period `DID_M^pl` (`L_max=None`). Multi-horizon placebos (`L_max >= 1`) have valid analytical SE and bootstrap SE via the placebo IF (see the dynamic placebo SE Note above).
 
 - **Note:** When every variance-eligible group forms its own `(D_{g,1}, F_g, S_g)` cohort (a degenerate small-panel case where the cohort framework has zero degrees of freedom), the cohort-recentered plug-in formula is unidentified: cohort recentering subtracts the cohort mean from each group's `U^G_g`, and for singleton cohorts the centered value is exactly zero, so the centered influence function vector collapses to all zeros. The estimator returns `overall_se = NaN` with a `UserWarning` rather than silently collapsing to `0.0` (which would falsely imply infinite precision). The `DID_M` point estimate remains well-defined. The bootstrap path inherits the same degeneracy on these panels — the multiplier weights act on an all-zero vector, so the bootstrap distribution is also degenerate. **Deviation from R `DIDmultiplegtDYN`:** R returns a non-zero SE on the canonical 4-group worked example via small-sample sandwich machinery that Python does not implement. Both responses are valid for a degenerate case; Python's `NaN`+warning is the safer default. To get a non-degenerate SE, include more groups so cohorts have peers (real-world panels typically have `G >> K`).
 
diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -130,8 +130,8 @@ def test_missing_column_raises_value_error(self):
                 treatment="treatment",
             )
 
-    def test_non_binary_treatment_accepted(self):
-        """Non-binary treatment is now supported."""
+    def test_non_binary_treatment_requires_lmax(self):
+        """Non-binary treatment without L_max raises ValueError."""
         df = pd.DataFrame(
             {
                 "group": [1, 1, 2, 2],
@@ -141,6 +141,30 @@ def test_non_binary_treatment_accepted(self):
             }
         )
         est = ChaisemartinDHaultfoeuille()
+        with pytest.raises(ValueError, match="Non-binary treatment requires L_max"):
+            est.fit(
+                df,
+                outcome="outcome",
+                group="group",
+                time="period",
+                treatment="treatment",
+            )
+
+    def test_non_binary_treatment_with_lmax(self):
+        """Non-binary treatment works with L_max=1."""
+        np.random.seed(77)
+        rows = []
+        for g in range(20):
+            for t in range(6):
+                d = 0 if t < 3 else 2  # non-binary jump
+                y = 10 + t + d * 1.5 + np.random.randn() * 0.3
+                rows.append({"group": g, "period": t, "treatment": d, "outcome": y})
+        for g in range(20, 40):
+            for t in range(6):
+                y = 10 + t + np.random.randn() * 0.3
+                rows.append({"group": g, "period": t, "treatment": 0, "outcome": y})
+        df = pd.DataFrame(rows)
+        est = ChaisemartinDHaultfoeuille(twfe_diagnostic=False)
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             results = est.fit(
@@ -149,6 +173,7 @@ def test_non_binary_treatment_accepted(self):
                 group="group",
                 time="period",
                 treatment="treatment",
+                L_max=1,
             )
         assert np.isfinite(results.overall_att)
 
@@ -1795,23 +1820,27 @@ def test_L_max_none_preserves_phase1_behavior(self, data):
         assert r.sup_t_bands is None
         assert r.placebo_event_study is None
 
-    def test_L_max_1_equivalent_to_none(self, data):
-        """L_max=1 produces same DID_1 as L_max=None."""
+    def test_L_max_1_uses_per_group_path(self, data):
+        """L_max=1 uses the per-group DID_{g,1} path (same as L_max >= 2
+        uses for l=1). This is a different estimand from the per-period
+        DID_M path used by L_max=None - documented as a REGISTRY Note."""
         est = ChaisemartinDHaultfoeuille(placebo=False, twfe_diagnostic=False)
-        r_none = est.fit(
-            data, outcome="outcome", group="group", time="period", treatment="treatment"
-        )
-        r_one = est.fit(
-            data,
-            outcome="outcome",
-            group="group",
-            time="period",
-            treatment="treatment",
-            L_max=1,
-        )
-        assert r_one.event_study_effects[1]["effect"] == pytest.approx(
-            r_none.event_study_effects[1]["effect"]
-        )
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r_one = est.fit(
+                data,
+                outcome="outcome",
+                group="group",
+                time="period",
+                treatment="treatment",
+                L_max=1,
+            )
+        # Per-group path produces finite estimate and SE
+        assert np.isfinite(r_one.event_study_effects[1]["effect"])
+        assert np.isfinite(r_one.event_study_effects[1]["se"])
+        assert np.isfinite(r_one.overall_att)
+        # L_max=1 should have exactly 1 horizon
+        assert set(r_one.event_study_effects.keys()) == {1}
 
     def test_L_max_populates_event_study_effects(self, data):
         """L_max=3 populates horizons {1, 2, 3} in event_study_effects."""