igerber
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎diff_diff/business_report.py‎
Lines changed: 86 additions & 1 deletion b/‎diff_diff/business_report.py‎
Lines changed: 86 additions & 1 deletion
diff --git a/‎diff_diff/diagnostic_report.py‎
Lines changed: 64 additions & 11 deletions b/‎diff_diff/diagnostic_report.py‎
Lines changed: 64 additions & 11 deletions
@@ -99,7 +99,7 @@ diff-diff ships two preview classes, `BusinessReport` and `DiagnosticReport`, th
 ```python
 from diff_diff import CallawaySantAnna, BusinessReport
 
-cs = CallawaySantAnna().fit(
+cs = CallawaySantAnna(base_period="universal").fit(
     df, outcome="revenue", unit="store", time="month",
     first_treat="first_treat", aggregate="event_study",
 )
 
@@ -364,12 +364,32 @@ def _extract_headline(self, dr_schema: Optional[Dict[str, Any]]) -> Dict[str, An
             )
             variance_method = getattr(r, "variance_method", None)
 
+            # Many staggered / continuous / dCDH result classes copy
+            # bootstrap-derived se/p/conf_int directly into their top-level
+            # fields and do not advertise ``inference_method`` or
+            # ``bootstrap_distribution``. Instead they expose either a
+            # populated ``bootstrap_results`` sub-object (CS, SA, Imputation,
+            # TwoStage, EfficientDiD, StaggeredTripleDiff, dCDH) or an
+            # ``n_bootstrap`` field set > 0 (ContinuousDiD, plus the above
+            # when applicable). Treat both as bootstrap markers so an
+            # ``alpha`` override does not silently swap a percentile /
+            # multiplier-bootstrap CI for a normal-approximation one.
+            has_bootstrap_results = getattr(r, "bootstrap_results", None) is not None
+            raw_n_bootstrap = getattr(r, "n_bootstrap", 0)
+            has_n_bootstrap = (
+                isinstance(raw_n_bootstrap, (int, float))
+                and np.isfinite(raw_n_bootstrap)
+                and raw_n_bootstrap > 0
+            )
+
             # Any non-analytic inference surface that stores a sampling /
             # resampling distribution (wild cluster bootstrap, percentile
             # bootstrap, jackknife, placebo) should preserve its native CI.
             bootstrap_like = (
                 inference_method in {"bootstrap", "wild_bootstrap"}
                 or has_bootstrap_dist
+                or has_bootstrap_results
+                or has_n_bootstrap
                 or variance_method in {"bootstrap", "jackknife", "placebo"}
             )
             finite_df = isinstance(df_survey, (int, float)) and df_survey > 0
@@ -663,6 +683,36 @@ def _describe_assumption(estimator_name: str) -> Dict[str, Any]:
                 "covariates are used."
             ),
         }
+    if estimator_name == "ChaisemartinDHaultfoeuilleResults":
+        # de Chaisemartin & D'Haultfoeuille (2020, 2024) — identification is
+        # transition-based across (joiner, leaver, stable-control) cells
+        # around each switching period, not a group-time ATT parallel-
+        # trends restriction. Writing up dCDH as "parallel trends across
+        # treatment cohorts" was flagged as a source-faithfulness bug in
+        # PR #318 review; REGISTRY.md §ChaisemartinDHaultfoeuille is
+        # explicit about the transition-set construction.
+        return {
+            "parallel_trends_variant": "transition_based",
+            "no_anticipation": True,
+            "description": (
+                "Identification is transition-based (de Chaisemartin & "
+                "D'Haultfoeuille 2020; dynamic companion 2024). At each "
+                "switching period, the estimator contrasts joiners "
+                "(D:0->1), leavers (D:1->0), and stable-treated / "
+                "stable-untreated control cells that share the same "
+                "treatment state across adjacent periods, yielding the "
+                "contemporaneous ``DID_M`` and per-horizon ``DID_l`` / "
+                "``DID_{g,l}`` building blocks. The identifying "
+                "restriction is parallel trends within each transition's "
+                "stable-control cell (not a single group-time ATT PT "
+                "condition across all cohorts) plus no anticipation; "
+                "with non-binary treatment the stable-control match is "
+                "additionally on exact baseline dose ``D_{g,1}``. "
+                "Reversible treatment is natively supported, unlike the "
+                "absorbing-treatment designs that rely on a fixed "
+                "treatment-onset cohort."
+            ),
+        }
     if estimator_name in {
         "CallawaySantAnnaResults",
         "SunAbrahamResults",
@@ -671,7 +721,6 @@ def _describe_assumption(estimator_name: str) -> Dict[str, Any]:
         "StackedDiDResults",
         "EfficientDiDResults",
         "WooldridgeDiDResults",
-        "ChaisemartinDHaultfoeuilleResults",
     }:
         return {
             "parallel_trends_variant": "conditional_or_group_time",
@@ -825,6 +874,42 @@ def _build_caveats(
                     }
                 )
 
+        # Sensitivity was skipped for methodology reasons (e.g., CS fit with
+        # ``base_period='varying'`` — HonestDiD bounds are not interpretable
+        # there). Surface the reason as a warning-severity caveat so readers
+        # do not assume the headline is robust across the R-R grid.
+        if sens.get("status") == "skipped":
+            reason = sens.get("reason")
+            if isinstance(reason, str) and reason:
+                caveats.append(
+                    {
+                        "severity": "warning",
+                        "topic": "sensitivity_skipped",
+                        "message": ("HonestDiD sensitivity was not run on this fit. " + reason),
+                    }
+                )
+
+        # Non-fatal warnings captured from delegated diagnostics
+        # (e.g., HonestDiD's bootstrap diag-covariance fallback, dropped
+        # non-consecutive horizons on dCDH). DR already records these in
+        # ``schema["warnings"]``; mirror the methodology-critical ones
+        # into BR's caveat list so summary/full-report prose can surface
+        # them without readers having to inspect the DR schema.
+        for msg in dr_schema.get("warnings", []) or []:
+            if not isinstance(msg, str) or not msg:
+                continue
+            # Skip alpha-override and design-effect messages already
+            # covered by dedicated caveats above.
+            lower = msg.lower()
+            if "sensitivity:" in lower or "pretrends_power:" in lower:
+                caveats.append(
+                    {
+                        "severity": "info",
+                        "topic": "diagnostic_warning",
+                        "message": msg,
+                    }
+                )
+
     # Unit mismatch caveat (log_points + unit override).
     unit_kind = headline.get("unit_kind")
     if unit_kind == "log_points":
 
@@ -500,6 +500,25 @@ def _instance_skip_reason(self, check: str) -> Optional[str]:
             # Precomputed sensitivity always unlocks this check.
             if "sensitivity" in self._precomputed:
                 return None
+            # CallawaySantAnna with ``base_period='varying'`` (the default)
+            # produces consecutive-comparison pre-period coefficients;
+            # HonestDiD explicitly warns those bounds are not valid for
+            # interpreted sensitivity. Skip at the applicability gate so
+            # BR/DR do not narrate the grid as robustness. Users opting
+            # in can pass ``precomputed={'sensitivity': ...}`` or re-fit
+            # with ``base_period='universal'``.
+            if name == "CallawaySantAnnaResults":
+                base_period = getattr(r, "base_period", "universal")
+                if base_period != "universal":
+                    return (
+                        "HonestDiD on CallawaySantAnna requires "
+                        "``base_period='universal'`` for valid interpretation "
+                        "(Rambachan-Roth bounds are not comparable across the "
+                        "consecutive pre-period comparisons produced by "
+                        f"``base_period={base_period!r}``). Re-fit with "
+                        "``CallawaySantAnna(base_period='universal')`` or pass "
+                        "``precomputed={'sensitivity': ...}`` to opt in."
+                    )
             # dCDH uses ``placebo_event_study`` as its pre-period surface,
             # which HonestDiD consumes via a dedicated branch. Accept the
             # fit when that attribute is populated.
@@ -625,6 +644,21 @@ def _execute(self) -> DiagnosticReportResults:
             if section.get("status") == "error":
                 reason = section.get("reason") or "diagnostic raised an exception"
                 top_warnings.append(f"{check}: {reason}")
+            # Surface non-fatal warnings captured by delegated diagnostics
+            # (e.g., HonestDiD's "base_period='varying' is not valid for
+            # interpretation" on CallawaySantAnna, or the diag-covariance
+            # fallback on bootstrap-fitted CS). These rode up on each
+            # section's ``warnings`` field and must not be swallowed.
+            section_warnings = section.get("warnings")
+            if isinstance(section_warnings, (list, tuple)):
+                for msg in section_warnings:
+                    if msg is None:
+                        continue
+                    top_warnings.append(f"{check}: {msg}")
+            # Some sections (e.g., sensitivity skipped for varying-base CS)
+            # also surface methodology-critical context via ``reason`` even
+            # though ``status != "error"``. We do not duplicate those here
+            # — the section's own status/reason is the authoritative record.
 
         schema: Dict[str, Any] = {
             "schema_version": DIAGNOSTIC_REPORT_SCHEMA_VERSION,
@@ -994,29 +1028,48 @@ def _check_sensitivity(self) -> Dict[str, Any]:
                 "method": "estimator_native",
             }
 
+        # Varying-base CS gate: handled at ``_instance_skip_reason``, so
+        # this code path is not reached for a varying-base CS fit unless
+        # the user passed ``precomputed={'sensitivity': ...}`` (handled
+        # above). Kept here as a comment anchor; see _instance_skip_reason.
+
+        import warnings as _warnings
+
         try:
             from typing import cast
 
             from diff_diff.honest_did import HonestDiD
 
-            # The sensitivity_method string is validated at runtime by
-            # HonestDiD; the Literal annotation is for static typing only.
-            honest = HonestDiD(
-                method=cast(Any, self._sensitivity_method),
-                alpha=self._alpha,
-            )
-            sens = honest.sensitivity_analysis(
-                self._results,
-                M_grid=list(self._sensitivity_M_grid),
-            )
+            # Capture any non-fatal UserWarnings HonestDiD emits (bootstrap
+            # diag-covariance fallback on CS, library-extension note on
+            # dCDH, dropped non-consecutive horizons, etc.) so BR/DR do not
+            # silently narrate sensitivity as clean when the helper
+            # flagged caveats. The try/except below still handles fatal
+            # errors; captured warnings ride on the returned dict.
+            with _warnings.catch_warnings(record=True) as caught:
+                _warnings.simplefilter("always")
+                # The sensitivity_method string is validated at runtime by
+                # HonestDiD; the Literal annotation is for static typing only.
+                honest = HonestDiD(
+                    method=cast(Any, self._sensitivity_method),
+                    alpha=self._alpha,
+                )
+                sens = honest.sensitivity_analysis(
+                    self._results,
+                    M_grid=list(self._sensitivity_M_grid),
+                )
         except Exception as exc:  # noqa: BLE001
             return {
                 "status": "error",
                 "method": self._sensitivity_method,
                 "reason": f"HonestDiD.sensitivity_analysis raised " f"{type(exc).__name__}: {exc}",
             }
 
-        return self._format_sensitivity_results(sens)
+        captured = [str(w.message) for w in caught if issubclass(w.category, Warning)]
+        formatted = self._format_sensitivity_results(sens)
+        if captured:
+            formatted["warnings"] = captured
+        return formatted
 
     def _format_sensitivity_results(self, sens: Any) -> Dict[str, Any]:
         grid = []
Original file line number	Diff line number	Diff line change
@@ -99,7 +99,7 @@ diff-diff ships two preview classes, `BusinessReport` and `DiagnosticReport`, th
`99`	`99`	```python
`100`	`100`	`from diff_diff import CallawaySantAnna, BusinessReport`
`101`	`101`
`102`		`-cs = CallawaySantAnna().fit(`
	`102`	`+cs = CallawaySantAnna(base_period="universal").fit(`
`103`	`103`	`df, outcome="revenue", unit="store", time="month",`
`104`	`104`	`first_treat="first_treat", aggregate="event_study",`
`105`	`105`	`)`