Address PR #350 CI review round 4: P1 unused categorical levels trip balance check

igerber · claude · igerber · commit 1f044e7c417c · 2026-04-22T09:16:58.000-04:00
**P1:** Round 3 added `observed=False` to the balance-check groupby to
silence a pandas FutureWarning, but that creates a false-unbalance bug:
on ordered-categorical `time_col` with extra category levels beyond the
observed periods, `observed=False` materializes zero-count unit-period
cells for the unused levels, and the balance check rejects the panel.

Fix: switched to `observed=True`. This tells categorical groupby to
count only OBSERVED unit-period cells, matching the `periods_list`
(observed uniques) that the rest of the validator is keyed to. No
change for numeric / datetime time columns.

**Test added:** `test_ordered_categorical_with_unused_levels_accepted`
declares categories `["pre0", "pre1", "pre2", "post1", "post2", "post3"]`
but only observes `{"pre1", "pre2", "post1", "post2"}`; asserts the
fit succeeds with `F="post1"` and `event_times=[-2, 0, 1]`.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/had.py b/diff_diff/had.py
@@ -1110,9 +1110,15 @@ def _sort_key(x: Any) -> Tuple[bool, Any]:
                 )
 
     # Balanced panel on the (possibly-filtered) data: every unit appears
-    # exactly once per period. ``observed=False`` preserves current
-    # behavior on categorical time columns (pandas' default is changing).
-    counts = data_filtered.groupby([unit_col, time_col], observed=False).size()
+    # exactly once per period. ``observed=True`` tells categorical
+    # groupby to count only OBSERVED unit-period cells. Without it, a
+    # time_col with an ordered-categorical dtype carrying extra unused
+    # category levels (beyond the periods actually present in the data)
+    # would expand to zero-count cells and the balance check would
+    # falsely reject valid panels. The rest of the validator is keyed
+    # to ``periods_list`` (observed unique values) so this stays
+    # consistent.
+    counts = data_filtered.groupby([unit_col, time_col], observed=True).size()
     if (counts != 1).any():
         n_bad = int((counts != 1).sum())
         raise ValueError(
diff --git a/tests/test_had.py b/tests/test_had.py
@@ -2865,6 +2865,48 @@ def test_unordered_string_time_col_rejected(self):
                 panel, "outcome", "dose", "period", "unit", aggregate="event_study"
             )
 
+    def test_ordered_categorical_with_unused_levels_accepted(self):
+        """Ordered categorical with extra unused category levels fits.
+
+        Covers CI reviewer round 4 P1: the balanced-panel check must
+        use ``observed=True`` on categorical groupby so unused category
+        levels don't expand to zero-count cells and falsely trip the
+        balance guard.
+        """
+        rng = np.random.default_rng(0)
+        G = 40
+        # Observed periods: pre1, pre2, post1, post2
+        # Declared categories: ALSO include pre0 (unused) and post3 (unused)
+        all_categories = ["pre0", "pre1", "pre2", "post1", "post2", "post3"]
+        observed = ["pre1", "pre2", "post1", "post2"]
+        cat_dtype = pd.CategoricalDtype(categories=all_categories, ordered=True)
+        rows = []
+        d_post = rng.uniform(0.1, 1.0, G)
+        d_post[0] = 0.0
+        for g in range(G):
+            for label in observed:
+                dose = d_post[g] if label in ("post1", "post2") else 0.0
+                rows.append(
+                    {
+                        "unit": g,
+                        "period": label,
+                        "dose": dose,
+                        "outcome": rng.standard_normal(),
+                    }
+                )
+        panel = pd.DataFrame(rows)
+        panel["period"] = panel["period"].astype(cat_dtype)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", UserWarning)
+            result = HeterogeneousAdoptionDiD(design="auto").fit(
+                panel, "outcome", "dose", "period", "unit", aggregate="event_study"
+            )
+        # F should be post1 (first observed post-period); event_times
+        # should be [-2, 0, 1] (e=-1 for anchor pre2 is skipped).
+        assert result.F == "post1"
+        assert result.event_times.tolist() == [-2, 0, 1]
+        assert result.n_units == G
+
     def test_ordered_categorical_time_col_accepted(self):
         """Ordered categorical time dtype passes the ordered-time check."""
         rng = np.random.default_rng(0)