Skip to content

Commit 51bcf36

Browse files
igerberclaude
andcommitted
Make guidance estimator-specific for PT tests and placebo
P1: ContinuousDiD and TripleDifference handlers now emit honest "no built-in formal test" guidance instead of the invalid generic check_parallel_trends() which assumes a single binary treatment. P1: Staggered handlers now use _placebo_step(staggered=True) which recommends specification-based falsification (control group/anticipation/ subsample comparisons) instead of run_all_placebo_tests() (which refits a basic 2x2 DiD and is invalid for staggered designs). P2: _check_nan_att() now coerces to float() before testing, handling numpy scalars (np.float64(nan)) in addition to Python float("nan"). Docs updated: llms.txt Step 6 and llms-practitioner.txt placebo section now note that run_all_placebo_tests is for simple 2x2 only. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent bd8e565 commit 51bcf36

3 files changed

Lines changed: 74 additions & 14 deletions

File tree

diff_diff/practitioner.py

Lines changed: 66 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,27 @@ def _honest_did_step() -> Dict[str, Any]:
188188
)
189189

190190

191-
def _placebo_step() -> Dict[str, Any]:
191+
def _placebo_step(staggered: bool = False) -> Dict[str, Any]:
192+
if staggered:
193+
return _step(
194+
baker_step=6,
195+
label="Run sensitivity/falsification checks",
196+
why=(
197+
"For staggered designs, run_all_placebo_tests() is not "
198+
"directly applicable (it refits a basic 2x2 DiD). Instead, "
199+
"compare results across control group choices "
200+
"(never_treated vs not_yet_treated), anticipation settings, "
201+
"and subsample restrictions as falsification checks."
202+
),
203+
code=(
204+
"# Staggered falsification: compare across specifications\n"
205+
"# - Re-estimate with control_group='never_treated' vs 'not_yet_treated'\n"
206+
"# - Re-estimate with anticipation=1 to check no-anticipation\n"
207+
"# - Drop one cohort at a time for leave-one-out sensitivity"
208+
),
209+
priority="medium",
210+
step_name="sensitivity",
211+
)
192212
return _step(
193213
baker_step=6,
194214
label="Run placebo tests",
@@ -328,7 +348,7 @@ def _handle_cs(results: Any):
328348
def _handle_sa(results: Any):
329349
steps = [
330350
_parallel_trends_step(staggered=True),
331-
_placebo_step(),
351+
_placebo_step(staggered=True),
332352
_robustness_compare_step("CS, BJS, or Gardner"),
333353
_covariates_step(),
334354
]
@@ -339,7 +359,7 @@ def _handle_sa(results: Any):
339359
def _handle_imputation(results: Any):
340360
steps = [
341361
_parallel_trends_step(staggered=True),
342-
_placebo_step(),
362+
_placebo_step(staggered=True),
343363
_robustness_compare_step("CS, SA, or Gardner"),
344364
_covariates_step(),
345365
]
@@ -350,7 +370,7 @@ def _handle_imputation(results: Any):
350370
def _handle_two_stage(results: Any):
351371
steps = [
352372
_parallel_trends_step(staggered=True),
353-
_placebo_step(),
373+
_placebo_step(staggered=True),
354374
_robustness_compare_step("CS, BJS, or SA"),
355375
_covariates_step(),
356376
]
@@ -361,7 +381,7 @@ def _handle_two_stage(results: Any):
361381
def _handle_stacked(results: Any):
362382
steps = [
363383
_parallel_trends_step(staggered=True),
364-
_placebo_step(),
384+
_placebo_step(staggered=True),
365385
_step(
366386
baker_step=7,
367387
label="Check sub-experiment balance",
@@ -396,7 +416,7 @@ def _handle_synthetic(results: Any):
396416
),
397417
step_name="sensitivity",
398418
),
399-
_placebo_step(),
419+
_placebo_step(staggered=True),
400420
_step(
401421
baker_step=8,
402422
label="Compare with TROP or staggered estimators",
@@ -433,7 +453,7 @@ def _handle_trop(results: Any):
433453
),
434454
step_name="sensitivity",
435455
),
436-
_placebo_step(),
456+
_placebo_step(staggered=True),
437457
_robustness_compare_step("SyntheticDiD or CS"),
438458
]
439459
warnings = _check_nan_att(results)
@@ -443,7 +463,7 @@ def _handle_trop(results: Any):
443463
def _handle_efficient(results: Any):
444464
steps = [
445465
_parallel_trends_step(staggered=True),
446-
_placebo_step(),
466+
_placebo_step(staggered=True),
447467
_step(
448468
baker_step=7,
449469
label="Run Hausman pretest (PT-All vs PT-Post)",
@@ -468,7 +488,21 @@ def _handle_efficient(results: Any):
468488

469489
def _handle_continuous(results: Any):
470490
steps = [
471-
_parallel_trends_step(),
491+
_step(
492+
baker_step=3,
493+
label="Assess parallel trends for continuous treatment",
494+
why=(
495+
"ContinuousDiD has dose-specific parallel trends assumptions "
496+
"(PT/SPT) that differ from the binary treatment case. No "
497+
"built-in formal test exists; inspect dose-specific "
498+
"pre-treatment outcome trends across dose groups manually."
499+
),
500+
code=(
501+
"# No built-in formal PT test for continuous treatment.\n"
502+
"# Inspect pre-treatment outcome trends by dose group."
503+
),
504+
step_name="parallel_trends",
505+
),
472506
_step(
473507
baker_step=7,
474508
label="Plot dose-response curve",
@@ -501,8 +535,23 @@ def _handle_continuous(results: Any):
501535

502536
def _handle_triple(results: Any):
503537
steps = [
504-
_parallel_trends_step(),
505-
_placebo_step(),
538+
_step(
539+
baker_step=3,
540+
label="Assess parallel trends for triple difference",
541+
why=(
542+
"DDD requires parallel trends along two dimensions "
543+
"(treatment eligibility and treatment exposure). The "
544+
"generic check_parallel_trends() only tests a single "
545+
"binary comparison. Inspect pre-treatment trends "
546+
"separately for each dimension."
547+
),
548+
code=(
549+
"# No built-in formal DDD PT test.\n"
550+
"# Inspect pre-treatment trends in the treatment and\n"
551+
"# eligibility dimensions separately."
552+
),
553+
step_name="parallel_trends",
554+
),
506555
_step(
507556
baker_step=7,
508557
label="Test within-group placebo",
@@ -616,7 +665,12 @@ def _check_nan_att(results: Any) -> List[str]:
616665
att = getattr(results, "overall_att", None)
617666
if att is None:
618667
att = getattr(results, "avg_att", None)
619-
if att is not None and isinstance(att, float) and math.isnan(att):
668+
if att is not None:
669+
try:
670+
att = float(att)
671+
except (TypeError, ValueError):
672+
return []
673+
if att is not None and math.isnan(att):
620674
return [
621675
"Estimation produced NaN ATT — check data preparation and "
622676
"model specification before proceeding with diagnostics."

docs/llms-practitioner.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,10 +257,16 @@ print(honest.summary())
257257
honest_sd = compute_honest_did(results, method='smoothness', M=0.5)
258258
```
259259

260-
### Placebo tests
260+
### Placebo tests (simple 2x2 designs only)
261+
`run_all_placebo_tests()` refits a basic 2x2 DiD — it is valid for simple
262+
designs but NOT for staggered adoption. For staggered designs, use
263+
specification-based falsification: compare results across control group
264+
choices, anticipation settings, and cohort subsets.
265+
261266
```python
262267
from diff_diff import run_all_placebo_tests
263268

269+
# For simple 2x2 designs:
264270
placebo = run_all_placebo_tests(
265271
data, outcome='y', treatment='treated', time='period',
266272
unit='unit_id', pre_periods=[1, 2, 3], post_periods=[4, 5, 6],

docs/llms.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ diagnostic steps produces unreliable results.
2020
3. **Test parallel trends** — simple 2x2: `check_parallel_trends()`, `equivalence_test_trends()`; staggered: inspect CS event-study pre-period coefficients (generic PT tests are invalid for staggered designs). Insignificant pre-trends do NOT prove PT holds.
2121
4. **Choose estimator** — staggered adoption → CS/SA/BJS (NOT plain TWFE); few treated units → SDiD/TROP; simple 2x2 → DiD. Run `BaconDecomposition` to diagnose TWFE bias.
2222
5. **Estimate** — `estimator.fit(data, ...)`. Always print the cluster count first and choose inference method based on the result (cluster-robust if >= 50 clusters, wild bootstrap if fewer).
23-
6. **Sensitivity analysis** — `compute_honest_did(results)` for bounds under PT violations, `run_all_placebo_tests()` for falsification, Bacon decomposition for TWFE.
23+
6. **Sensitivity analysis** — `compute_honest_did(results)` for bounds under PT violations (MultiPeriodDiD/CS only), `run_all_placebo_tests()` for 2x2 falsification, specification comparisons for staggered designs.
2424
7. **Heterogeneity** — `aggregate='group'` for cohort effects, `aggregate='event_study'` for dynamic effects, subgroup re-estimation.
2525
8. **Robustness** — compare 2-3 estimators (CS vs SA vs BJS), MUST report with and without covariates (shows whether conditioning drives identification), present pre-trends and sensitivity bounds.
2626

0 commit comments

Comments
 (0)