Skip to content

Commit 311a7be

Browse files
igerberclaude
andcommitted
Address fifth round of CI review findings on PR #318
- P0: ``_extract_headline`` now detects ``bootstrap_results is not None`` and ``n_bootstrap > 0`` in addition to ``inference_method`` / ``bootstrap_distribution`` / ``variance_method`` / ``df_survey``. Many staggered / continuous / dCDH result classes copy bootstrap- derived se/p/conf_int into their top-level fields without advertising ``inference_method``; alpha override must preserve their fitted CI rather than silently swapping in a normal-approximation interval. - P1: ``DiagnosticReport._check_sensitivity`` wraps the HonestDiD call in ``warnings.catch_warnings(record=True)`` and propagates captured messages onto the returned section dict. ``run_all`` aggregates per-section warnings into the top-level ``warnings`` list so both DR and BR surface them. CallawaySantAnna fits with ``base_period='varying'`` are preemptively skipped at the applicability gate with a methodology-critical reason, since HonestDiD explicitly warns those bounds are not valid for interpretation. BR renders the skip as a warning-severity caveat under a new ``sensitivity_skipped`` topic. - P1: ``_describe_assumption`` now gives ``ChaisemartinDHaultfoeuilleResults`` a source-backed description of transition-based identification (joiners / leavers / stable-control transitions, DID_M / DID_l building blocks, non-binary dose matching, reversible treatment) rather than generic group-time ATT PT text. - P2: README example now uses ``CallawaySantAnna(base_period='universal')`` so the advertised one-call sensitivity path actually runs. Both ``cs_fit`` fixtures updated likewise. - Regressions: ``TestBootstrapResultsAndNBootstrapDetection`` (four cases incl. dCDH-shaped stub and the analytic zero-bootstrap guard), ``TestDCDHAssumptionTransitionBased`` (source-faithful language assertions), ``TestCSVaryingBaseSensitivitySkipped`` (DR schema reason + BR caveat surfacing). 150 -> 115 targeted tests passing; black / ruff / mypy clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 345f65c commit 311a7be

5 files changed

Lines changed: 340 additions & 17 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ diff-diff ships two preview classes, `BusinessReport` and `DiagnosticReport`, th
9999
```python
100100
from diff_diff import CallawaySantAnna, BusinessReport
101101

102-
cs = CallawaySantAnna().fit(
102+
cs = CallawaySantAnna(base_period="universal").fit(
103103
df, outcome="revenue", unit="store", time="month",
104104
first_treat="first_treat", aggregate="event_study",
105105
)

diff_diff/business_report.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,12 +364,32 @@ def _extract_headline(self, dr_schema: Optional[Dict[str, Any]]) -> Dict[str, An
364364
)
365365
variance_method = getattr(r, "variance_method", None)
366366

367+
# Many staggered / continuous / dCDH result classes copy
368+
# bootstrap-derived se/p/conf_int directly into their top-level
369+
# fields and do not advertise ``inference_method`` or
370+
# ``bootstrap_distribution``. Instead they expose either a
371+
# populated ``bootstrap_results`` sub-object (CS, SA, Imputation,
372+
# TwoStage, EfficientDiD, StaggeredTripleDiff, dCDH) or an
373+
# ``n_bootstrap`` field set > 0 (ContinuousDiD, plus the above
374+
# when applicable). Treat both as bootstrap markers so an
375+
# ``alpha`` override does not silently swap a percentile /
376+
# multiplier-bootstrap CI for a normal-approximation one.
377+
has_bootstrap_results = getattr(r, "bootstrap_results", None) is not None
378+
raw_n_bootstrap = getattr(r, "n_bootstrap", 0)
379+
has_n_bootstrap = (
380+
isinstance(raw_n_bootstrap, (int, float))
381+
and np.isfinite(raw_n_bootstrap)
382+
and raw_n_bootstrap > 0
383+
)
384+
367385
# Any non-analytic inference surface that stores a sampling /
368386
# resampling distribution (wild cluster bootstrap, percentile
369387
# bootstrap, jackknife, placebo) should preserve its native CI.
370388
bootstrap_like = (
371389
inference_method in {"bootstrap", "wild_bootstrap"}
372390
or has_bootstrap_dist
391+
or has_bootstrap_results
392+
or has_n_bootstrap
373393
or variance_method in {"bootstrap", "jackknife", "placebo"}
374394
)
375395
finite_df = isinstance(df_survey, (int, float)) and df_survey > 0
@@ -663,6 +683,36 @@ def _describe_assumption(estimator_name: str) -> Dict[str, Any]:
663683
"covariates are used."
664684
),
665685
}
686+
if estimator_name == "ChaisemartinDHaultfoeuilleResults":
687+
# de Chaisemartin & D'Haultfoeuille (2020, 2024) — identification is
688+
# transition-based across (joiner, leaver, stable-control) cells
689+
# around each switching period, not a group-time ATT parallel-
690+
# trends restriction. Writing up dCDH as "parallel trends across
691+
# treatment cohorts" was flagged as a source-faithfulness bug in
692+
# PR #318 review; REGISTRY.md §ChaisemartinDHaultfoeuille is
693+
# explicit about the transition-set construction.
694+
return {
695+
"parallel_trends_variant": "transition_based",
696+
"no_anticipation": True,
697+
"description": (
698+
"Identification is transition-based (de Chaisemartin & "
699+
"D'Haultfoeuille 2020; dynamic companion 2024). At each "
700+
"switching period, the estimator contrasts joiners "
701+
"(D:0->1), leavers (D:1->0), and stable-treated / "
702+
"stable-untreated control cells that share the same "
703+
"treatment state across adjacent periods, yielding the "
704+
"contemporaneous ``DID_M`` and per-horizon ``DID_l`` / "
705+
"``DID_{g,l}`` building blocks. The identifying "
706+
"restriction is parallel trends within each transition's "
707+
"stable-control cell (not a single group-time ATT PT "
708+
"condition across all cohorts) plus no anticipation; "
709+
"with non-binary treatment the stable-control match is "
710+
"additionally on exact baseline dose ``D_{g,1}``. "
711+
"Reversible treatment is natively supported, unlike the "
712+
"absorbing-treatment designs that rely on a fixed "
713+
"treatment-onset cohort."
714+
),
715+
}
666716
if estimator_name in {
667717
"CallawaySantAnnaResults",
668718
"SunAbrahamResults",
@@ -671,7 +721,6 @@ def _describe_assumption(estimator_name: str) -> Dict[str, Any]:
671721
"StackedDiDResults",
672722
"EfficientDiDResults",
673723
"WooldridgeDiDResults",
674-
"ChaisemartinDHaultfoeuilleResults",
675724
}:
676725
return {
677726
"parallel_trends_variant": "conditional_or_group_time",
@@ -825,6 +874,42 @@ def _build_caveats(
825874
}
826875
)
827876

877+
# Sensitivity was skipped for methodology reasons (e.g., CS fit with
878+
# ``base_period='varying'`` — HonestDiD bounds are not interpretable
879+
# there). Surface the reason as a warning-severity caveat so readers
880+
# do not assume the headline is robust across the R-R grid.
881+
if sens.get("status") == "skipped":
882+
reason = sens.get("reason")
883+
if isinstance(reason, str) and reason:
884+
caveats.append(
885+
{
886+
"severity": "warning",
887+
"topic": "sensitivity_skipped",
888+
"message": ("HonestDiD sensitivity was not run on this fit. " + reason),
889+
}
890+
)
891+
892+
# Non-fatal warnings captured from delegated diagnostics
893+
# (e.g., HonestDiD's bootstrap diag-covariance fallback, dropped
894+
# non-consecutive horizons on dCDH). DR already records these in
895+
# ``schema["warnings"]``; mirror the methodology-critical ones
896+
# into BR's caveat list so summary/full-report prose can surface
897+
# them without readers having to inspect the DR schema.
898+
for msg in dr_schema.get("warnings", []) or []:
899+
if not isinstance(msg, str) or not msg:
900+
continue
901+
# Skip alpha-override and design-effect messages already
902+
# covered by dedicated caveats above.
903+
lower = msg.lower()
904+
if "sensitivity:" in lower or "pretrends_power:" in lower:
905+
caveats.append(
906+
{
907+
"severity": "info",
908+
"topic": "diagnostic_warning",
909+
"message": msg,
910+
}
911+
)
912+
828913
# Unit mismatch caveat (log_points + unit override).
829914
unit_kind = headline.get("unit_kind")
830915
if unit_kind == "log_points":

diff_diff/diagnostic_report.py

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,25 @@ def _instance_skip_reason(self, check: str) -> Optional[str]:
500500
# Precomputed sensitivity always unlocks this check.
501501
if "sensitivity" in self._precomputed:
502502
return None
503+
# CallawaySantAnna with ``base_period='varying'`` (the default)
504+
# produces consecutive-comparison pre-period coefficients;
505+
# HonestDiD explicitly warns those bounds are not valid for
506+
# interpreted sensitivity. Skip at the applicability gate so
507+
# BR/DR do not narrate the grid as robustness. Users opting
508+
# in can pass ``precomputed={'sensitivity': ...}`` or re-fit
509+
# with ``base_period='universal'``.
510+
if name == "CallawaySantAnnaResults":
511+
base_period = getattr(r, "base_period", "universal")
512+
if base_period != "universal":
513+
return (
514+
"HonestDiD on CallawaySantAnna requires "
515+
"``base_period='universal'`` for valid interpretation "
516+
"(Rambachan-Roth bounds are not comparable across the "
517+
"consecutive pre-period comparisons produced by "
518+
f"``base_period={base_period!r}``). Re-fit with "
519+
"``CallawaySantAnna(base_period='universal')`` or pass "
520+
"``precomputed={'sensitivity': ...}`` to opt in."
521+
)
503522
# dCDH uses ``placebo_event_study`` as its pre-period surface,
504523
# which HonestDiD consumes via a dedicated branch. Accept the
505524
# fit when that attribute is populated.
@@ -625,6 +644,21 @@ def _execute(self) -> DiagnosticReportResults:
625644
if section.get("status") == "error":
626645
reason = section.get("reason") or "diagnostic raised an exception"
627646
top_warnings.append(f"{check}: {reason}")
647+
# Surface non-fatal warnings captured by delegated diagnostics
648+
# (e.g., HonestDiD's "base_period='varying' is not valid for
649+
# interpretation" on CallawaySantAnna, or the diag-covariance
650+
# fallback on bootstrap-fitted CS). These rode up on each
651+
# section's ``warnings`` field and must not be swallowed.
652+
section_warnings = section.get("warnings")
653+
if isinstance(section_warnings, (list, tuple)):
654+
for msg in section_warnings:
655+
if msg is None:
656+
continue
657+
top_warnings.append(f"{check}: {msg}")
658+
# Some sections (e.g., sensitivity skipped for varying-base CS)
659+
# also surface methodology-critical context via ``reason`` even
660+
# though ``status != "error"``. We do not duplicate those here
661+
# — the section's own status/reason is the authoritative record.
628662

629663
schema: Dict[str, Any] = {
630664
"schema_version": DIAGNOSTIC_REPORT_SCHEMA_VERSION,
@@ -994,29 +1028,48 @@ def _check_sensitivity(self) -> Dict[str, Any]:
9941028
"method": "estimator_native",
9951029
}
9961030

1031+
# Varying-base CS gate: handled at ``_instance_skip_reason``, so
1032+
# this code path is not reached for a varying-base CS fit unless
1033+
# the user passed ``precomputed={'sensitivity': ...}`` (handled
1034+
# above). Kept here as a comment anchor; see _instance_skip_reason.
1035+
1036+
import warnings as _warnings
1037+
9971038
try:
9981039
from typing import cast
9991040

10001041
from diff_diff.honest_did import HonestDiD
10011042

1002-
# The sensitivity_method string is validated at runtime by
1003-
# HonestDiD; the Literal annotation is for static typing only.
1004-
honest = HonestDiD(
1005-
method=cast(Any, self._sensitivity_method),
1006-
alpha=self._alpha,
1007-
)
1008-
sens = honest.sensitivity_analysis(
1009-
self._results,
1010-
M_grid=list(self._sensitivity_M_grid),
1011-
)
1043+
# Capture any non-fatal UserWarnings HonestDiD emits (bootstrap
1044+
# diag-covariance fallback on CS, library-extension note on
1045+
# dCDH, dropped non-consecutive horizons, etc.) so BR/DR do not
1046+
# silently narrate sensitivity as clean when the helper
1047+
# flagged caveats. The try/except below still handles fatal
1048+
# errors; captured warnings ride on the returned dict.
1049+
with _warnings.catch_warnings(record=True) as caught:
1050+
_warnings.simplefilter("always")
1051+
# The sensitivity_method string is validated at runtime by
1052+
# HonestDiD; the Literal annotation is for static typing only.
1053+
honest = HonestDiD(
1054+
method=cast(Any, self._sensitivity_method),
1055+
alpha=self._alpha,
1056+
)
1057+
sens = honest.sensitivity_analysis(
1058+
self._results,
1059+
M_grid=list(self._sensitivity_M_grid),
1060+
)
10121061
except Exception as exc: # noqa: BLE001
10131062
return {
10141063
"status": "error",
10151064
"method": self._sensitivity_method,
10161065
"reason": f"HonestDiD.sensitivity_analysis raised " f"{type(exc).__name__}: {exc}",
10171066
}
10181067

1019-
return self._format_sensitivity_results(sens)
1068+
captured = [str(w.message) for w in caught if issubclass(w.category, Warning)]
1069+
formatted = self._format_sensitivity_results(sens)
1070+
if captured:
1071+
formatted["warnings"] = captured
1072+
return formatted
10201073

10211074
def _format_sensitivity_results(self, sens: Any) -> Dict[str, Any]:
10221075
grid = []

0 commit comments

Comments
 (0)