Tighten validation and R parity test assertions

igerber · claude · igerber · commit 8ce7c5d893cb · 2026-03-30T11:44:39.000-04:00
- Validate exact period-set equality (not just counts) for balanced panel
- Reject non-finite outcomes (Inf) and covariates up front
- R parity tests now assert GT vector lengths and (g,t) label identity
  before comparing ATT/SE values

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/staggered_triple_diff.py b/diff_diff/staggered_triple_diff.py
@@ -624,6 +624,23 @@ def _validate_inputs(
         for col in [outcome, first_treat, eligibility]:
             if df[col].isna().any():
                 raise ValueError(f"Column '{col}' contains missing values.")
+
+        # Reject non-finite outcomes (Inf/-Inf)
+        if not np.all(np.isfinite(df[outcome])):
+            raise ValueError(
+                f"Column '{outcome}' contains non-finite values (Inf/-Inf). "
+                "All outcome values must be finite."
+            )
+
+        # Reject non-finite covariates
+        if covariates:
+            for cov in covariates:
+                if df[cov].isna().any():
+                    raise ValueError(f"Covariate '{cov}' contains missing values.")
+                if not np.all(np.isfinite(df[cov])):
+                    raise ValueError(
+                        f"Covariate '{cov}' contains non-finite values."
+                    )
         if df[eligibility].nunique() < 2:
             raise ValueError(
                 "Need both eligible (Q=1) and ineligible (Q=0) units. "
@@ -638,16 +655,16 @@ def _validate_inputs(
                 f"{int(dup.sum())} duplicates detected. Panel must have unique rows."
             )
 
-        # Check balanced panel — every unit observed in every period
-        all_periods = df[time].unique()
-        n_global_periods = len(all_periods)
-        periods_per_unit = df.groupby(unit)[time].nunique()
-        incomplete = periods_per_unit[periods_per_unit < n_global_periods]
-        if len(incomplete) > 0:
+        # Check balanced panel — every unit observed in exactly the global period set
+        global_periods = set(df[time].unique())
+        n_global_periods = len(global_periods)
+        unit_period_sets = df.groupby(unit)[time].apply(set)
+        mismatched = unit_period_sets[unit_period_sets != global_periods]
+        if len(mismatched) > 0:
             raise ValueError(
                 "Unbalanced panel detected. All units must be observed in "
                 f"all {n_global_periods} periods. "
-                f"Found {len(incomplete)} units with fewer periods."
+                f"Found {len(mismatched)} units with different period sets."
             )
 
         # Check time-invariant first_treat
diff --git a/tests/test_methodology_staggered_triple_diff.py b/tests/test_methodology_staggered_triple_diff.py
@@ -152,7 +152,14 @@ def test_gt_att_matches_r(self, r_results, key):
         res = _run_python(data, r["est_method"], r["control_group"])
 
         py_gt = sorted(res.group_time_effects.items())
+        r_gt = list(zip(r["gt_groups"], r["gt_periods"]))
+        assert len(py_gt) == len(r["gt_att"]), (
+            f"{key}: Python has {len(py_gt)} GT cells, R has {len(r['gt_att'])}"
+        )
         for i, ((g, t), eff) in enumerate(py_gt):
+            assert (g, t) == (r_gt[i][0], r_gt[i][1]), (
+                f"{key}: GT cell mismatch at index {i}: Python=({g},{t}), R={r_gt[i]}"
+            )
             _assert_close(
                 eff["effect"], r["gt_att"][i],
                 ATT_RTOL, ATT_ATOL,
@@ -166,6 +173,9 @@ def test_gt_se_matches_r(self, r_results, key):
         res = _run_python(data, r["est_method"], r["control_group"])
 
         py_gt = sorted(res.group_time_effects.items())
+        assert len(py_gt) == len(r["gt_se"]), (
+            f"{key}: Python has {len(py_gt)} GT cells, R has {len(r['gt_se'])}"
+        )
         for i, ((g, t), eff) in enumerate(py_gt):
             _assert_close(
                 eff["se"], r["gt_se"][i],
@@ -197,7 +207,14 @@ def test_gt_att_matches_r(self, r_results, key):
         res = _run_python(data, r["est_method"], r["control_group"])
 
         py_gt = sorted(res.group_time_effects.items())
+        r_gt = list(zip(r["gt_groups"], r["gt_periods"]))
+        assert len(py_gt) == len(r["gt_att"]), (
+            f"{key}: Python has {len(py_gt)} GT cells, R has {len(r['gt_att'])}"
+        )
         for i, ((g, t), eff) in enumerate(py_gt):
+            assert (g, t) == (r_gt[i][0], r_gt[i][1]), (
+                f"{key}: GT cell mismatch at index {i}: Python=({g},{t}), R={r_gt[i]}"
+            )
             _assert_close(
                 eff["effect"], r["gt_att"][i],
                 ATT_RTOL, ATT_ATOL,
@@ -211,6 +228,9 @@ def test_gt_se_matches_r(self, r_results, key):
         res = _run_python(data, r["est_method"], r["control_group"])
 
         py_gt = sorted(res.group_time_effects.items())
+        assert len(py_gt) == len(r["gt_se"]), (
+            f"{key}: Python has {len(py_gt)} GT cells, R has {len(r['gt_se'])}"
+        )
         for i, ((g, t), eff) in enumerate(py_gt):
             _assert_close(
                 eff["se"], r["gt_se"][i],