Use pd.api.types.is_numeric_dtype for nullable dtype support

igerber · claude · igerber · commit 50ab3bdf372e · 2026-04-07T14:20:38.000-04:00
Replace np.issubdtype with pd.api.types.is_numeric_dtype so pandas
nullable extension dtypes (Int64, Float64) are accepted as numeric.
Add regression test with Float64 outcome column.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/prep.py b/diff_diff/prep.py
@@ -1541,7 +1541,7 @@ def aggregate_survey(
     # --- Precompute full-length outcome/covariate arrays ---
     n_total = len(data)
     all_vars = outcome_cols + cov_cols
-    non_numeric = [v for v in all_vars if not np.issubdtype(data[v].dtype, np.number)]
+    non_numeric = [v for v in all_vars if not pd.api.types.is_numeric_dtype(data[v])]
     if non_numeric:
         raise ValueError(
             f"Non-numeric column(s) in outcomes/covariates: {non_numeric}. "
diff --git a/tests/test_prep.py b/tests/test_prep.py
@@ -2463,6 +2463,21 @@ def test_error_non_numeric_outcome(self, micro_data, design):
                 survey_design=design,
             )
 
+    def test_nullable_numeric_dtypes(self):
+        """Pandas nullable Int64/Float64 dtypes are accepted as numeric."""
+        data = pd.DataFrame(
+            {
+                "geo": np.repeat(["A", "B"], 10),
+                "time": np.ones(20, dtype=int),
+                "wt": np.ones(20),
+                "y": pd.array(np.random.RandomState(1).normal(0, 1, 20), dtype="Float64"),
+            }
+        )
+        design = SurveyDesign(weights="wt")
+        panel, _ = aggregate_survey(data, by=["geo", "time"], outcomes="y", survey_design=design)
+        assert len(panel) == 2
+        assert panel["y_mean"].notna().all()
+
     def test_error_empty_data(self, design):
         """Empty DataFrame raises ValueError."""
         empty = pd.DataFrame(columns=["state", "year", "y", "wt", "stratum", "cluster"])