Address PR review: edge case fixes for TwoStageDiD

igerber · claude · igerber · commit bc2eb77fb418 · 2026-02-16T13:20:38.000-05:00
- Always-treated warning now lists affected unit IDs (truncated at 10)
- Bootstrap handles NaN y_tilde: masks NaN obs in static, event study,
  and group bootstrap paths; returns None when all treated obs are NaN
- balance_e warns when no cohorts qualify instead of silently falling back
- Add 3 edge case tests and REGISTRY.md update

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/two_stage.py b/diff_diff/two_stage.py
@@ -593,18 +593,19 @@ def fit(
         # Check for always-treated units
         min_time = df[time].min()
         always_treated_mask = (~df["_never_treated"]) & (df[first_treat] <= min_time)
-        n_always_treated = df.loc[always_treated_mask, unit].nunique()
+        always_treated_units = df.loc[always_treated_mask, unit].unique()
+        n_always_treated = len(always_treated_units)
         if n_always_treated > 0:
+            unit_list = ", ".join(str(u) for u in always_treated_units[:10])
+            suffix = f" (and {n_always_treated - 10} more)" if n_always_treated > 10 else ""
             warnings.warn(
                 f"{n_always_treated} unit(s) are treated in all observed periods "
-                f"(first_treat <= {min_time}). These units have no untreated "
-                "observations and cannot contribute to the counterfactual model. "
-                "Excluding from estimation.",
+                f"(first_treat <= {min_time}): [{unit_list}{suffix}]. "
+                "These units have no untreated observations and cannot contribute "
+                "to the counterfactual model. Excluding from estimation.",
                 UserWarning,
                 stacklevel=2,
             )
-            # Exclude always-treated units
-            always_treated_units = df.loc[always_treated_mask, unit].unique()
             df = df[~df[unit].isin(always_treated_units)].copy()
 
         # Treatment indicator with anticipation
@@ -1183,11 +1184,25 @@ def _stage2_event_study(
                 for g, horizons in cohort_rel_times.items():
                     if required_range.issubset(horizons):
                         balanced_cohorts.add(g)
-            balance_mask = (
-                df[first_treat].isin(balanced_cohorts).values
-                if balanced_cohorts
-                else np.ones(n, dtype=bool)
-            )
+            if not balanced_cohorts:
+                warnings.warn(
+                    f"No cohorts satisfy balance_e={balance_e} requirement. "
+                    "Event study results will contain only the reference period. "
+                    "Consider reducing balance_e.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+                return {
+                    ref_period: {
+                        "effect": 0.0,
+                        "se": 0.0,
+                        "t_stat": np.nan,
+                        "p_value": np.nan,
+                        "conf_int": (0.0, 0.0),
+                        "n_obs": 0,
+                    }
+                }
+            balance_mask = df[first_treat].isin(balanced_cohorts).values
         else:
             balance_mask = np.ones(n, dtype=bool)
 
@@ -1724,7 +1739,7 @@ def _run_bootstrap(
         original_event_study: Optional[Dict[int, Dict[str, Any]]],
         original_group: Optional[Dict[Any, Dict[str, Any]]],
         aggregate: Optional[str],
-    ) -> TwoStageBootstrapResults:
+    ) -> Optional[TwoStageBootstrapResults]:
         """Run multiplier bootstrap on GMM influence function."""
         if self.n_bootstrap < 50:
             warnings.warn(
@@ -1738,12 +1753,23 @@ def _run_bootstrap(
 
         from diff_diff.staggered_bootstrap import _generate_bootstrap_weights_batch
 
-        y_tilde = df["_y_tilde"].values
+        y_tilde = df["_y_tilde"].values.copy()  # .copy() to avoid mutating df column
         n = len(df)
         cluster_ids = df[cluster_var].values
 
+        # Handle NaN y_tilde (from unidentified FEs) — matches _stage2_static logic
+        nan_mask = ~np.isfinite(y_tilde)
+        if nan_mask.any():
+            y_tilde[nan_mask] = 0.0
+
         # --- Static specification bootstrap ---
-        D = omega_1_mask.values.astype(float)
+        D = omega_1_mask.values.astype(float)  # .astype() already creates a copy
+        D[nan_mask] = 0.0  # Exclude NaN y_tilde obs from bootstrap estimation
+
+        # Degenerate case: all treated obs have NaN y_tilde
+        if D.sum() == 0:
+            return None
+
         X_2_static = D.reshape(-1, 1)
         coef_static = solve_ols(X_2_static, y_tilde, return_vcov=False)[0]
         eps_2_static = y_tilde - X_2_static @ coef_static
@@ -1811,11 +1837,10 @@ def _run_bootstrap(
                     for g, horizons in cohort_rel_times.items():
                         if required_range.issubset(horizons):
                             balanced_cohorts.add(g)
-                balance_mask = (
-                    df[first_treat].isin(balanced_cohorts).values
-                    if balanced_cohorts
-                    else np.ones(n, dtype=bool)
-                )
+                if not balanced_cohorts:
+                    all_horizons = []  # No qualifying cohorts -> skip event study bootstrap
+                else:
+                    balance_mask = df[first_treat].isin(balanced_cohorts).values
             else:
                 balance_mask = np.ones(n, dtype=bool)
 
@@ -1827,6 +1852,8 @@ def _run_bootstrap(
                 for i in range(n):
                     if not balance_mask[i]:
                         continue
+                    if nan_mask[i]:
+                        continue  # NaN y_tilde -> exclude from bootstrap event study
                     h = rel_times[i]
                     if np.isfinite(h):
                         h_int = int(h)
@@ -1890,6 +1917,8 @@ def _run_bootstrap(
             treated_mask = omega_1_mask.values
             for i in range(n):
                 if treated_mask[i]:
+                    if nan_mask[i]:
+                        continue  # NaN y_tilde -> exclude from group bootstrap
                     g = ft_vals[i]
                     if g in group_to_col:
                         X_2_grp[i, group_to_col[g]] = 1.0
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -618,6 +618,7 @@ Our implementation uses multiplier bootstrap on the GMM influence function: clus
 - **NaN y_tilde handling:** When Stage 1 FE are unidentified for some observations, the residualized outcome `y_tilde` is NaN. These observations are zeroed out (excluded) from the Stage 2 regression and variance computation, matching the treatment of unimputable observations in ImputationDiD.
 - **NaN inference for undefined statistics:** t_stat uses NaN when SE is non-finite or zero; p_value and CI also NaN. Matches CallawaySantAnna/ImputationDiD NaN convention.
 - **Event study aggregation:** Horizon-specific effects use the same two-stage procedure with horizon indicator dummies in Stage 2. Unidentified horizons (e.g., long-run effects without never-treated units, per Proposition 5 of Borusyak et al. 2024) produce NaN.
+- **balance_e with no qualifying cohorts:** If no cohorts have sufficient pre/post coverage for the requested `balance_e`, a warning is emitted and event study results contain only the reference period.
 - **No never-treated units:** Long-run effects may be unidentified (same limitation as ImputationDiD). Warning emitted for affected horizons.
 
 **Reference implementation(s):**
diff --git a/tests/test_two_stage.py b/tests/test_two_stage.py
@@ -723,6 +723,77 @@ def test_horizon_max(self):
             if results.event_study_effects[h].get("n_obs", 0) > 0:
                 assert abs(h) <= 2
 
+    def test_always_treated_warning_lists_unit_ids(self):
+        """Always-treated warning should include affected unit IDs."""
+        data = generate_test_data()
+
+        # Add two always-treated units (first_treat before min_time=0)
+        always_treated = pd.DataFrame(
+            {
+                "unit": np.repeat([997, 998], 10),
+                "time": np.tile(np.arange(10), 2),
+                "outcome": np.random.default_rng(42).standard_normal(20),
+                "first_treat": np.repeat([-1, -2], 10),
+            }
+        )
+        data_with_always = pd.concat([data, always_treated], ignore_index=True)
+
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            TwoStageDiD().fit(
+                data_with_always,
+                outcome="outcome",
+                unit="unit",
+                time="time",
+                first_treat="first_treat",
+            )
+            always_warns = [x for x in w if "treated in all observed periods" in str(x.message)]
+            assert len(always_warns) == 1
+            msg = str(always_warns[0].message)
+            assert "997" in msg
+            assert "998" in msg
+
+    def test_bootstrap_with_nan_y_tilde(self, ci_params):
+        """Bootstrap should handle NaN y_tilde from unidentified FEs."""
+        # No never-treated units: cohorts 3, 5, 7 on periods 0-9 means
+        # periods 7-9 have zero untreated obs -> NaN y_tilde
+        data = generate_test_data(never_treated_frac=0.0)
+        n_boot = ci_params.bootstrap(20)
+
+        results = TwoStageDiD(n_bootstrap=n_boot).fit(
+            data,
+            outcome="outcome",
+            unit="unit",
+            time="time",
+            first_treat="first_treat",
+        )
+
+        assert np.isfinite(results.overall_att)
+        assert results.overall_se > 0
+
+    def test_balance_e_empty_cohorts_warns(self):
+        """Unreasonably large balance_e should warn when no cohorts qualify."""
+        data = generate_test_data()
+
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            results = TwoStageDiD().fit(
+                data,
+                outcome="outcome",
+                unit="unit",
+                time="time",
+                first_treat="first_treat",
+                aggregate="event_study",
+                balance_e=100,  # No cohort can satisfy this
+            )
+            balance_warns = [x for x in w if "No cohorts satisfy" in str(x.message)]
+            assert len(balance_warns) > 0
+
+        # Event study should contain only the reference period
+        assert len(results.event_study_effects) == 1
+        ref_key = list(results.event_study_effects.keys())[0]
+        assert results.event_study_effects[ref_key]["n_obs"] == 0
+
 
 # =============================================================================
 # TestTwoStageDiDParameters