Fix CI review Round 2: mixed panel overall_att, TWFE guard, bootstrap N_S=0

igerber · claude · igerber · commit 966a00d10446 · 2026-04-12T19:23:49.000-04:00
P0: When L_max &gt;= 1, always set overall_att from per-group DID_{g,1}
    (not conditional on NaN). Fixes mixed binary/non-binary panels where
    per-period N_S &gt; 0 but excludes non-binary switches.
P1: Gate TWFE diagnostic and twowayfeweights() to binary-only treatment.
    Emit warning on fit(), raise ValueError on standalone helper.
P1: Refactor _compute_dcdh_bootstrap() to skip scalar DID_M when
    divisor_overall &lt;= 0 but still process multi_horizon_inputs and
    placebo_horizon_inputs. Fixes non-binary bootstrap path.
P2: Add regressions for mixed 0-&gt;1/0-&gt;2 panel at L_max=1, non-binary
    bootstrap, and TWFE diagnostic skip on non-binary.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -608,7 +608,18 @@ def fit(
         #          the same _validate_and_aggregate_to_cells() output.
         # ------------------------------------------------------------------
         twfe_diagnostic_payload = None
-        if self.twfe_diagnostic:
+        # TWFE diagnostic assumes binary treatment (d_arr == 1 for
+        # treated mask). Skip for non-binary data with a warning.
+        is_binary_pre = set(cell["d_gt"].unique()).issubset({0.0, 1.0, 0, 1})
+        if self.twfe_diagnostic and not is_binary_pre:
+            warnings.warn(
+                "TWFE diagnostic (twfe_diagnostic=True) is not supported for "
+                "non-binary treatment. The diagnostic assumes binary {0, 1} "
+                "treatment. Skipping TWFE diagnostic for this fit.",
+                UserWarning,
+                stacklevel=2,
+            )
+        elif self.twfe_diagnostic:
             try:
                 twfe_diagnostic_payload = _compute_twfe_diagnostic(
                     cell=cell,
@@ -1576,15 +1587,16 @@ def fit(
         # l=1 use the per-group DID_{g,l} path for a consistent estimand.
         if multi_horizon_inference is not None and 1 in multi_horizon_inference:
             # Per-group mode: use per-group path for all horizons.
-            # Also populate overall_att from l=1 when per-period path
-            # yielded NaN (non-binary treatment or no binary switchers).
-            if np.isnan(overall_att):
-                l1_inf = multi_horizon_inference[1]
-                overall_att = l1_inf["effect"]
-                overall_se = l1_inf["se"]
-                overall_t = l1_inf["t_stat"]
-                overall_p = l1_inf["p_value"]
-                overall_ci = l1_inf["conf_int"]
+            # When L_max >= 1, the per-group DID_{g,1} is the correct
+            # estimand for overall_att (not the binary-only per-period
+            # DID_M). This handles both pure non-binary (N_S=0) and
+            # mixed binary/non-binary panels (N_S > 0 but incomplete).
+            l1_inf = multi_horizon_inference[1]
+            overall_att = l1_inf["effect"]
+            overall_se = l1_inf["se"]
+            overall_t = l1_inf["t_stat"]
+            overall_p = l1_inf["p_value"]
+            overall_ci = l1_inf["conf_int"]
             event_study_effects: Dict[int, Dict[str, Any]] = dict(multi_horizon_inference)
         else:
             # Phase 1 mode (L_max=None): l=1 from per-period path
@@ -3656,6 +3668,14 @@ def twowayfeweights(
         time=time,
         treatment=treatment,
     )
+    # TWFE diagnostic assumes binary treatment (d_arr == 1 for treated mask).
+    if not set(cell["d_gt"].unique()).issubset({0.0, 1.0, 0, 1}):
+        raise ValueError(
+            "twowayfeweights() requires binary treatment {0, 1}. "
+            "Non-binary treatment is supported by fit() with L_max >= 1 "
+            "but the TWFE diagnostic (Theorem 1 of AER 2020) assumes "
+            "binary treatment."
+        )
     return _compute_twfe_diagnostic(
         cell=cell,
         group_col=group,
diff --git a/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py b/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py
@@ -19,7 +19,6 @@
 produce a bootstrap distribution per target.
 """
 
-import warnings
 from typing import TYPE_CHECKING, Dict, Optional, Tuple
 
 import numpy as np
@@ -159,29 +158,29 @@ def _compute_dcdh_bootstrap(
                 f"u_centered_overall length ({u_centered_overall.shape[0]}) does not "
                 f"match n_groups_for_overall ({n_groups_for_overall})"
             )
-        if divisor_overall <= 0:
-            warnings.warn(
-                f"_compute_dcdh_bootstrap: divisor_overall={divisor_overall} <= 0; "
-                "returning all-NaN bootstrap results.",
-                RuntimeWarning,
-                stacklevel=2,
-            )
-            return _empty_bootstrap_results(self.n_bootstrap, self.bootstrap_weights, self.alpha)
-
         rng = np.random.default_rng(self.seed)
 
         # --- Overall DID_M ---
-        overall_se, overall_ci, overall_p, overall_dist = _bootstrap_one_target(
-            u_centered=u_centered_overall,
-            divisor=divisor_overall,
-            original=original_overall,
-            n_bootstrap=self.n_bootstrap,
-            weight_type=self.bootstrap_weights,
-            alpha=self.alpha,
-            rng=rng,
-            context="dCDH overall DID_M bootstrap",
-            return_distribution=True,
-        )
+        # Skip the scalar DID_M bootstrap when divisor_overall <= 0
+        # (e.g., pure non-binary panels where N_S=0), but continue
+        # to process multi_horizon_inputs and placebo_horizon_inputs.
+        if divisor_overall > 0:
+            overall_se, overall_ci, overall_p, overall_dist = _bootstrap_one_target(
+                u_centered=u_centered_overall,
+                divisor=divisor_overall,
+                original=original_overall,
+                n_bootstrap=self.n_bootstrap,
+                weight_type=self.bootstrap_weights,
+                alpha=self.alpha,
+                rng=rng,
+                context="dCDH overall DID_M bootstrap",
+                return_distribution=True,
+            )
+        else:
+            overall_se = np.nan
+            overall_ci = (np.nan, np.nan)
+            overall_p = np.nan
+            overall_dist = None
 
         results = DCDHBootstrapResults(
             n_bootstrap=self.n_bootstrap,
@@ -398,15 +397,3 @@ def _bootstrap_one_target(
     return se, ci, p_value, (boot_dist if return_distribution else None)
 
 
-def _empty_bootstrap_results(
-    n_bootstrap: int, weight_type: str, alpha: float
-) -> DCDHBootstrapResults:
-    """Return an all-NaN bootstrap results object as a graceful fallback."""
-    return DCDHBootstrapResults(
-        n_bootstrap=n_bootstrap,
-        weight_type=weight_type,
-        alpha=alpha,
-        overall_se=np.nan,
-        overall_ci=(np.nan, np.nan),
-        overall_p_value=np.nan,
-    )
diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -1671,18 +1671,18 @@ def test_twowayfeweights_rejects_nan_outcome(self):
                 treatment="treatment",
             )
 
-    def test_twowayfeweights_accepts_non_binary_treatment(self):
-        """Non-binary treatment is now supported."""
+    def test_twowayfeweights_rejects_non_binary_treatment(self):
+        """TWFE diagnostic requires binary treatment."""
         data = generate_reversible_did_data(n_groups=20, n_periods=4, seed=1)
         data.loc[data.index[0], "treatment"] = 2  # non-binary
-        result = twowayfeweights(
-            data,
-            outcome="outcome",
-            group="group",
-            time="period",
-            treatment="treatment",
-        )
-        assert result is not None
+        with pytest.raises(ValueError, match="binary treatment"):
+            twowayfeweights(
+                data,
+                outcome="outcome",
+                group="group",
+                time="period",
+                treatment="treatment",
+            )
 
     def test_twowayfeweights_rejects_nan_group(self):
         data = generate_reversible_did_data(n_groups=20, n_periods=4, seed=1)
@@ -2333,6 +2333,96 @@ def test_monotone_multi_step_dropped(self):
         # Group 0 (0->1->2, 2 change periods) should be dropped
         assert r.n_groups_dropped_crossers >= 1
 
+    def test_mixed_binary_nonbinary_panel_lmax1(self):
+        """Mixed panel with both 0->1 and 0->2 switches at L_max=1.
+        overall_att should use the per-group path (includes all switches),
+        not the per-period path (binary-only)."""
+        np.random.seed(88)
+        rows = []
+        # Binary switchers: 0->1
+        for g in range(10):
+            for t in range(6):
+                d = 0 if t < 3 else 1
+                y = 10 + t + d * 2 + np.random.randn() * 0.3
+                rows.append({"group": g, "period": t, "treatment": d, "outcome": y})
+        # Non-binary switchers: 0->2
+        for g in range(10, 20):
+            for t in range(6):
+                d = 0 if t < 3 else 2
+                y = 10 + t + d * 1.5 + np.random.randn() * 0.3
+                rows.append({"group": g, "period": t, "treatment": d, "outcome": y})
+        # Controls
+        for g in range(20, 40):
+            for t in range(6):
+                y = 10 + t + np.random.randn() * 0.3
+                rows.append({"group": g, "period": t, "treatment": 0, "outcome": y})
+        df = pd.DataFrame(rows)
+        est = ChaisemartinDHaultfoeuille(twfe_diagnostic=False)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = est.fit(
+                df, outcome="outcome", group="group", time="period",
+                treatment="treatment", L_max=1,
+            )
+        # overall_att should be from per-group path (includes both 0->1 and 0->2)
+        assert np.isfinite(r.overall_att)
+        # event_study_effects[1] and overall_att should be the same estimand
+        assert r.overall_att == r.event_study_effects[1]["effect"]
+
+    def test_nonbinary_bootstrap(self, ci_params):
+        """Non-binary panel with bootstrap should produce finite event study SEs."""
+        np.random.seed(66)
+        n_boot = ci_params.bootstrap(99)
+        rows = []
+        for g in range(20):
+            for t in range(6):
+                d = 0 if t < 3 else 2
+                y = 10 + t + d * 1.5 + np.random.randn() * 0.3
+                rows.append({"group": g, "period": t, "treatment": d, "outcome": y})
+        for g in range(20, 40):
+            for t in range(6):
+                y = 10 + t + np.random.randn() * 0.3
+                rows.append({"group": g, "period": t, "treatment": 0, "outcome": y})
+        df = pd.DataFrame(rows)
+        est = ChaisemartinDHaultfoeuille(
+            twfe_diagnostic=False, n_bootstrap=n_boot, seed=42
+        )
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            r = est.fit(
+                df, outcome="outcome", group="group", time="period",
+                treatment="treatment", L_max=1,
+            )
+        assert r.bootstrap_results is not None
+        assert r.bootstrap_results.event_study_ses is not None
+        assert 1 in r.bootstrap_results.event_study_ses
+        assert np.isfinite(r.bootstrap_results.event_study_ses[1])
+
+    def test_twfe_diagnostic_skipped_nonbinary(self):
+        """TWFE diagnostic should be skipped (with warning) for non-binary."""
+        np.random.seed(77)
+        rows = []
+        for g in range(20):
+            for t in range(6):
+                d = 0 if t < 3 else 2
+                y = 10 + t + d + np.random.randn() * 0.3
+                rows.append({"group": g, "period": t, "treatment": d, "outcome": y})
+        for g in range(20, 40):
+            for t in range(6):
+                y = 10 + t + np.random.randn() * 0.3
+                rows.append({"group": g, "period": t, "treatment": 0, "outcome": y})
+        df = pd.DataFrame(rows)
+        est = ChaisemartinDHaultfoeuille(twfe_diagnostic=True)
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            r = est.fit(
+                df, outcome="outcome", group="group", time="period",
+                treatment="treatment", L_max=1,
+            )
+        twfe_warnings = [x for x in w if "TWFE diagnostic" in str(x.message)]
+        assert len(twfe_warnings) >= 1
+        assert r.twfe_weights is None  # diagnostic was skipped
+
     def test_normalized_effects_general_formula(self):
         """For non-binary treatment, normalized denominator uses actual dose change."""
         np.random.seed(99)