Tighten drift test: round-based endpoint pins + exact warning-set check

igerber · claude · igerber · commit 6a663289ddec · 2026-04-25T15:13:26.000-04:00
CI review surfaced two refinements:

1. Endpoint bands like `11.0 &lt;= ci_low &lt;= 11.6` would still pass values
   rounding to several different one-decimal displays (11.0, 11.1, ...,
   11.6) while the notebook prose stays at "11.3", "12.8", "11.4",
   "13.3", "11.5", "13.6". Replace those with `round(ci_low, 1) == 11.3`
   etc. - directly pins the displayed rounding so any drift past the
   tenth fails the test.

2. The warning tests didn't pin the notebook's full warning contract.
   `event_study_results` suppressed A7 for fixture cleanliness while
   the docstring claimed "A7 visible". Two changes:
   - Fix the fixture docstring to acknowledge A7 is muted there for
     value-checking tests, with the notebook's actual warning-policy
     contract validated separately
   - Add `test_event_study_warning_policy_matches_notebook` that
     mirrors the notebook's exact filter (only matmul-pattern
     RuntimeWarnings silenced) and asserts the resulting warning set:
     exactly one UserWarning (A7 leavers-present, the one the markdown
     explains) and zero RuntimeWarnings. If a future library change
     emits an unexpected warning on this code path, the test fails.

12 tests pass in ~0.07s (was 11).

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/tests/test_t19_marketing_pulse_drift.py b/tests/test_t19_marketing_pulse_drift.py
@@ -76,8 +76,11 @@ def phase1_results(panel):
 
 @pytest.fixture(scope="module")
 def event_study_results(panel):
-    """Event-study fit: L_max=2 + multiplier bootstrap. Same warning
-    treatment as the notebook (Accelerate matmul filter; A7 visible)."""
+    """Event-study fit: L_max=2 + multiplier bootstrap. The A7
+    UserWarning is intentionally muted here so the fixture is quiet
+    for the value-checking tests below; the notebook's actual
+    warning-policy contract (A7 visible, only matmul filtered) is
+    validated separately by `test_event_study_warning_policy_matches_notebook`."""
     with warnings.catch_warnings():
         warnings.filterwarnings(
             "ignore",
@@ -123,13 +126,12 @@ def test_overall_ci_covers_truth(phase1_results):
 
 
 def test_overall_ci_endpoints_match_quoted(phase1_results):
-    """Section 3 narrative quotes '95% CI: 11.3 to 12.8'. Lock the
-    rounded endpoints so prose drift fails this test."""
+    """Section 3 narrative quotes '95% CI: 11.3 to 12.8'. Pin the
+    one-decimal display exactly so any drift past the displayed
+    rounding fails this test."""
     ci_low, ci_high = phase1_results.overall_conf_int
-    # CI lower endpoint rounds to 11.3 -> band covers 11.0..11.6
-    assert 11.0 <= ci_low <= 11.6, ci_low
-    # CI upper endpoint rounds to 12.8 -> band covers 12.5..13.1
-    assert 12.5 <= ci_high <= 13.1, ci_high
+    assert round(ci_low, 1) == 11.3, ci_low
+    assert round(ci_high, 1) == 12.8, ci_high
 
 
 def test_joiners_leavers_consistent(phase1_results):
@@ -154,14 +156,14 @@ def test_event_study_horizons_cover_truth(event_study_results):
 
 def test_event_study_ci_endpoints_match_quoted(event_study_results):
     """Section 4 narrative quotes l=1 CI [11.4, 13.3] and l=2 CI
-    [11.5, 13.6]. Lock the rounded endpoints so prose drift fails."""
+    [11.5, 13.6]. Pin the one-decimal display exactly."""
     es = event_study_results.event_study_effects
     # l=1 CI [11.4, 13.3]
-    assert 11.1 <= es[1]["conf_int"][0] <= 11.7, es[1]["conf_int"]
-    assert 13.0 <= es[1]["conf_int"][1] <= 13.6, es[1]["conf_int"]
+    assert round(es[1]["conf_int"][0], 1) == 11.4, es[1]["conf_int"]
+    assert round(es[1]["conf_int"][1], 1) == 13.3, es[1]["conf_int"]
     # l=2 CI [11.5, 13.6]
-    assert 11.2 <= es[2]["conf_int"][0] <= 11.8, es[2]["conf_int"]
-    assert 13.3 <= es[2]["conf_int"][1] <= 13.9, es[2]["conf_int"]
+    assert round(es[2]["conf_int"][0], 1) == 11.5, es[2]["conf_int"]
+    assert round(es[2]["conf_int"][1], 1) == 13.6, es[2]["conf_int"]
 
 
 def test_event_study_significance(event_study_results):
@@ -211,6 +213,48 @@ def test_assumption7_warning_fires_as_expected(panel):
     assert len(a7_warnings) >= 1, [str(w.message)[:80] for w in ws]
 
 
+def test_event_study_warning_policy_matches_notebook(panel):
+    """Mirror the notebook's exact warning policy on the visible
+    event-study fit and assert the resulting warning set matches the
+    documented contract: exactly one UserWarning (the A7 leavers-present
+    warning that the notebook's markdown explains), and zero
+    RuntimeWarnings (matmul-pattern ones filtered; everything else
+    surfaces). If the library starts emitting an unexpected warning on
+    this code path, this test fails and the notebook prose may need to
+    be updated."""
+    with warnings.catch_warnings(record=True) as ws:
+        warnings.simplefilter("always")
+        # MIRROR the notebook's narrow filter exactly (no np.errstate, no
+        # blanket A7 suppression).
+        warnings.filterwarnings(
+            "ignore",
+            message=r".*encountered in matmul",
+            category=RuntimeWarning,
+        )
+        model = DCDH(
+            twfe_diagnostic=False, placebo=True, n_bootstrap=199, seed=42
+        )
+        model.fit(
+            panel,
+            outcome="sessions",
+            group="market_id",
+            time="week",
+            treatment="promo_on",
+            L_max=2,
+        )
+    user_warnings = [w for w in ws if w.category is UserWarning]
+    runtime_warnings = [w for w in ws if w.category is RuntimeWarning]
+    # Exactly one UserWarning, and it's the documented A7 warning.
+    assert len(user_warnings) == 1, [str(w.message)[:120] for w in user_warnings]
+    msg = str(user_warnings[0].message)
+    assert "Assumption 7" in msg, msg
+    assert "leavers present" in msg, msg
+    # All RuntimeWarnings should be the matmul pattern (filtered) - so
+    # zero remaining. If a new RuntimeWarning fires from somewhere else,
+    # this fails.
+    assert len(runtime_warnings) == 0, [str(w.message)[:120] for w in runtime_warnings]
+
+
 def test_a11_warning_does_not_fire():
     """The notebook claims this seed/DGP is in the A11-clean regime
     (no warning fires). If a library change starts triggering A11 on