Address CI AI review: reject negative first_treat in ContinuousDiD

igerber · claude · igerber · commit 2ace61d7286d · 2026-04-19T08:41:36.000-04:00
CI AI re-review flagged (P1) that the previous commit claimed "-inf
will be rejected by downstream validators" in both the code comment
and REGISTRY.md, but no such validator existed. After the `+inf -&gt; 0`
normalization, `first_treat &lt; 0` units fell out of both the treated
(g &gt; 0) and never-treated (g == 0) masks, so the affected units were
silently excluded from the estimator — exactly the axis-E silent
failure the PR was closing.

- ContinuousDiD.fit() now validates `first_treat &lt; 0` explicitly
  post-normalization and raises ValueError with the row count. -inf,
  -2, and any other negative value are all rejected.
- REGISTRY.md note rewritten to match the implemented behavior.
- Existing -inf test replaced with one that asserts
  `pytest.raises(ValueError)` matching the row-count message, plus
  a positive regression test confirming +inf warning stays silent
  on panels with only valid 0/positive `first_treat` values.
- tests/test_utils.py::test_silent_on_balanced_panel tightened: the
  balanced-panel silence assertion now filters on any warning
  containing "dropped", so a regression that changed the warning
  label would no longer hide a genuine drop signal.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/continuous_did.py b/diff_diff/continuous_did.py
@@ -233,9 +233,9 @@ def fit(
         # recategorization here would shift the control composition (axis-E
         # silent coercion). Only positive infinity is recoded (to match the
         # existing `.replace([np.inf, float("inf")], 0)` semantics on the
-        # next line); `-inf` is neither counted here nor recoded, so a
-        # downstream validator will reject it if present.
-        inf_mask = np.isposinf(df[first_treat].values)
+        # next line).
+        first_treat_vals = df[first_treat].values
+        inf_mask = np.isposinf(first_treat_vals)
         n_inf_first_treat = int(inf_mask.sum())
         if n_inf_first_treat > 0:
             warnings.warn(
@@ -245,6 +245,19 @@ def fit(
                 UserWarning,
                 stacklevel=2,
             )
+        # Reject negative first_treat values (including -inf) explicitly.
+        # Without this guard they would survive preprocessing but fall out of
+        # both the treated (g > 0) and never-treated (g == 0) masks, silently
+        # excluding the affected units.
+        negative_mask = first_treat_vals < 0
+        n_negative_first_treat = int(negative_mask.sum())
+        if n_negative_first_treat > 0:
+            raise ValueError(
+                f"{n_negative_first_treat} row(s) have negative '{first_treat}' "
+                f"values (including -inf). Valid values are 0 (never-treated) "
+                f"or a positive treatment period; such units would otherwise "
+                f"be silently excluded from both treated and control pools."
+            )
         df[first_treat] = df[first_treat].replace([np.inf, float("inf")], 0)
 
         # Drop units with positive first_treat but zero dose (R convention)
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -720,7 +720,7 @@ See `docs/methodology/continuous-did.md` Section 4 for full details.
 - [ ] Lowest-dose-as-control (Remark 3.1)
 - [x] Survey design support (Phase 3): weighted B-spline OLS, TSL on influence functions; bootstrap+survey supported (Phase 6)
 - **Note:** ContinuousDiD bootstrap with survey weights supported (Phase 6) via PSU-level multiplier weights
-- **Note:** The R-style convention of coding never-treated units as `first_treat=inf` is still accepted and normalized to `first_treat=0` internally, but the estimator now emits a `UserWarning` reporting the row count so the silent recategorization is surfaced (axis-E silent coercion under the Phase 2 audit). Only `+inf` is recoded (matching the R convention); `-inf` passes through untouched and will be rejected by downstream validators. Pass `0` directly to avoid the warning.
+- **Note:** The R-style convention of coding never-treated units as `first_treat=inf` is still accepted and normalized to `first_treat=0` internally, but the estimator now emits a `UserWarning` reporting the row count so the silent recategorization is surfaced (axis-E silent coercion under the Phase 2 audit). Only `+inf` is recoded (matching the R convention). Any **negative** `first_treat` value (including `-inf`) raises `ValueError` with the row count, since such units would otherwise silently fall out of both the treated (`g > 0`) and never-treated (`g == 0`) masks. Pass `0` directly for never-treated units to avoid the warning.
 - **Note:** Rows where `first_treat=0` (never-treated) carry a nonzero `dose` are silently zeroed for internal consistency (never-treated cells must have `D=0` in the dose response). The estimator now emits a `UserWarning` with the affected row count before the zeroing, so unintended nonzero doses on never-treated rows are no longer absorbed without a signal (axis-E silent coercion).
 
 ---
diff --git a/tests/test_continuous_did.py b/tests/test_continuous_did.py
@@ -723,16 +723,20 @@ def test_clean_never_treated_doses_silent(self):
         ]
         assert coerce_warnings == []
 
-    def test_negative_inf_first_treat_does_not_trigger_recategorization_warning(self):
-        """-inf first_treat is NOT recoded to 0 by `.replace([inf, float("inf")], 0)`,
-        so the recategorization warning (which used to count both +inf and -inf
-        via np.isinf) must not fire for -inf rows."""
-        import warnings
+    def test_negative_first_treat_raises_with_row_count(self):
+        """Negative `first_treat` (including -inf) must raise ValueError with
+        the affected row count. Without this guard the affected units fall
+        out of both the treated (g > 0) and never-treated (g == 0) masks and
+        are silently excluded from the estimator."""
         rows = []
         for unit in range(4):
-            # Unit 0 carries -inf (not recoded, so downstream validation should
-            # see it as-is). Others are untreated with dose=0.
-            ft = -np.inf if unit == 0 else 0.0
+            # Unit 0: -inf. Unit 1: -2. Others: valid (0 or positive).
+            if unit == 0:
+                ft = -np.inf
+            elif unit == 1:
+                ft = -2.0
+            else:
+                ft = 0.0
             for t in range(1, 4):
                 rows.append({
                     "unit": unit, "period": t, "outcome": float(unit + t),
@@ -741,6 +745,28 @@ def test_negative_inf_first_treat_does_not_trigger_recategorization_warning(self
         data = pd.DataFrame(rows)
         est = ContinuousDiD()
 
+        with pytest.raises(
+            ValueError,
+            match=r"6 row\(s\) have negative 'first_treat' values",
+        ):
+            est.fit(data, "outcome", "unit", "period", "first_treat", "dose")
+
+    def test_positive_inf_warning_silent_when_no_inf(self):
+        """+inf warning is gated on +inf rows only; panels with only valid
+        non-negative values (including just 0 and positive periods) must
+        never trigger the recategorization warning."""
+        import warnings
+        rows = []
+        for unit in range(4):
+            ft = 0.0 if unit < 2 else 2.0
+            for t in range(1, 4):
+                rows.append({
+                    "unit": unit, "period": t, "outcome": float(unit + t),
+                    "first_treat": ft, "dose": 0.0 if unit < 2 else 1.0,
+                })
+        data = pd.DataFrame(rows)
+        est = ContinuousDiD()
+
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
             try:
@@ -749,9 +775,7 @@ def test_negative_inf_first_treat_does_not_trigger_recategorization_warning(self
                 pass
 
         inf_warnings = [x for x in w if "inf in 'first_treat'" in str(x.message)]
-        assert inf_warnings == [], (
-            "-inf must not trigger the +inf recategorization warning"
-        )
+        assert inf_warnings == []
 
     def test_inf_first_treat_warning_counts_rows_not_units(self):
         """The warning counts affected rows (not units). On a panel with
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -849,9 +849,9 @@ def test_silent_on_balanced_panel(self):
                 treatment_group="treated", unit="unit",
             )
 
-        drop_warnings = [
-            x for x in w if "check_parallel_trends dropped" in str(x.message)
-        ]
+        # Generic filter on "dropped" catches both the old and new label so a
+        # regression in the label wouldn't hide a real silent-drop warning.
+        drop_warnings = [x for x in w if "dropped" in str(x.message).lower()]
         assert drop_warnings == []
 
     def test_warns_on_nan_outcomes_with_excess_drop_count(self):