Fix remaining step-numbering drift and NaN check for avg_att

igerber · claude · igerber · commit 6ca617ea1f26 · 2026-03-28T15:29:05.000-04:00
P1: Align complete example and Bacon cross-reference to canonical
numbering (Step 3=Test PT, Step 4=Choose estimator, Step 5=Estimate).
Complete example now executes check_parallel_trends() and prints
cluster count before estimation. Evaluation rubric S3-S5 labels
updated to match.

P2: _check_nan_att() now checks .avg_att (MultiPeriodDiDResults) in
addition to .att and .overall_att. Added regression test.

P2: Added TODO.md entry for extending snippet smoke tests to .txt
AI guides (deferred, low priority).

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/TODO.md b/TODO.md
@@ -79,6 +79,7 @@ Deferred items from PR reviews that were not addressed before merge.
 | ~376 `duplicate object description` Sphinx warnings — caused by autodoc `:members:` on dataclass attributes within manual API pages (not from autosummary stubs); fix requires restructuring `docs/api/*.rst` pages to avoid documenting the same attribute via both `:members:` and inline `autosummary` tables | `docs/api/*.rst` | — | Low |
 | Plotly renderers silently ignore styling kwargs (marker, markersize, linewidth, capsize, ci_linewidth) that the matplotlib backend honors; thread them through or reject when `backend="plotly"` | `visualization/_event_study.py`, `_diagnostic.py`, `_power.py` | #222 | Medium |
 | Survey bootstrap test coverage: add FPC census zero-variance, single-PSU NaN, full-design bootstrap for CS/ContinuousDiD/EfficientDiD, and TROP Rao-Wu vs block bootstrap equivalence tests | `tests/test_survey_phase*.py` | #237 | Medium |
+| Doc-snippet smoke tests only cover `.rst` files; new `.txt` AI guides are outside CI validation | `tests/test_doc_snippets.py` | #239 | Low |
 
 ---
 
diff --git a/diff_diff/practitioner.py b/diff_diff/practitioner.py
@@ -591,10 +591,12 @@ def _handle_generic(results: Any):
 # ---------------------------------------------------------------------------
 def _check_nan_att(results: Any) -> List[str]:
     """Return warnings if ATT is NaN."""
-    # Check both .att (DiDResults) and .overall_att (staggered results)
+    # Check .att (DiDResults), .overall_att (staggered), .avg_att (MultiPeriod)
     att = getattr(results, "att", None)
     if att is None:
         att = getattr(results, "overall_att", None)
+    if att is None:
+        att = getattr(results, "avg_att", None)
     if att is not None and isinstance(att, float) and math.isnan(att):
         return [
             "Estimation produced NaN ATT — check data preparation and "
diff --git a/docs/llms-practitioner.txt b/docs/llms-practitioner.txt
@@ -255,7 +255,7 @@ for test_name, result in placebo.items():
 
 ### Bacon decomposition (for TWFE users)
 If you used TWFE, always run BaconDecomposition to check whether the
-estimate is contaminated by forbidden comparisons (see Step 3).
+estimate is contaminated by forbidden comparisons (see Step 4).
 
 ---
 
@@ -389,16 +389,21 @@ data = load_mpdta()
 # Step 2: Assumptions — PT-GT-NYT (not-yet-treated parallel trends),
 #          no anticipation, doubly robust for conditional PT
 
-# Step 3: Estimator selection — staggered adoption → CS (primary), SA (robustness)
-# First, diagnose TWFE bias:
+# Step 3: Test parallel trends
+pt = check_parallel_trends(data, outcome='lemp', time='year',
+                           treatment_group='first_treat')
+print(f"Pre-trends p-value: {pt['p_value']:.4f}")
+
+# Step 4: Choose estimator — staggered adoption → CS (primary), SA (robustness)
+# Diagnose TWFE bias first:
 bacon = BaconDecomposition()
 bacon_result = bacon.fit(data, outcome='lemp', unit='countyreal',
                          time='year', first_treat='first_treat')
 print(bacon_result.summary())
 
-# Step 4: Inference — cluster at county level (treatment assignment unit)
-
-# Step 5: Estimate
+# Step 5: Estimate (cluster at county level — treatment assignment unit)
+n_clusters = data['countyreal'].nunique()
+print(f"Clusters: {n_clusters} -> {'cluster-robust SEs' if n_clusters >= 50 else 'wild bootstrap'}")
 cs = CallawaySantAnna(
     control_group='not_yet_treated', estimation_method='dr',
     cluster='countyreal',
diff --git a/docs/practitioner-guide-evaluation.md b/docs/practitioner-guide-evaluation.md
@@ -25,9 +25,9 @@ instances with no shared context.
 |------|-------------|---|---|---|
 | S1 | Define target parameter | Not mentioned | Mentions ATT types | Explicitly defines weighted/unweighted, policy question |
 | S2 | State assumptions | Not mentioned | Mentions parallel trends | Formally names PT variant (PT-GT-NYT etc.) |
-| S3 | Appropriate estimator | Uses naive TWFE | Uses CS but no diagnostic | CS + Bacon diagnostic, explains choice |
-| S4 | Inference approach | Not discussed | Clusters SEs | Clusters + discusses alternatives (wild bootstrap) |
-| S5 | Estimation | No code | Partial code | Complete, working code |
+| S3 | Test parallel trends | Not done | Informal check (event study eyeball) | Runs check_parallel_trends / equivalence_test_trends |
+| S4 | Choose estimator | Uses naive TWFE | Uses CS but no diagnostic | CS + Bacon diagnostic, explains choice |
+| S5 | Estimate (with cluster check) | No code | Partial code | Complete code with cluster count check |
 | S6 | Sensitivity analysis | Not done | Mentions but doesn't run | Runs HonestDiD and/or placebo tests |
 | S7 | Heterogeneity | Not done | Some aggregation | Group + event study + subgroup |
 | S8 | Robustness | Not done | Compares 2 estimators | 3+ estimators + with/without covariates |
diff --git a/tests/test_practitioner.py b/tests/test_practitioner.py
@@ -341,6 +341,15 @@ def test_nan_att_produces_warning(self):
         assert len(output["warnings"]) > 0
         assert any("NaN" in w for w in output["warnings"])
 
+    def test_nan_avg_att_multi_period(self):
+        """MultiPeriodDiDResults uses avg_att, not att."""
+        from diff_diff.results import MultiPeriodDiDResults
+
+        r = MultiPeriodDiDResults.__new__(MultiPeriodDiDResults)
+        r.avg_att = float("nan")
+        output = practitioner_next_steps(r, verbose=False)
+        assert any("NaN" in w for w in output["warnings"])
+
 
 # ---------------------------------------------------------------------------
 # Tests: Bacon handler warnings