Address twenty-ninth round of CI review findings on PR #318

igerber · claude · igerber · commit 59d7df7ef81a · 2026-04-19T11:53:32.000-04:00
P3 code quality (power_reason provenance). REPORTING.md lines
118-125 say the ``pretrends_power`` fallback reason is recorded on
the BR pre-trends block, but ``_lift_pre_trends`` only carried the
enum status and dropped the reason. Downstream schema consumers saw
``power_status="not_applicable"`` with no explanation — e.g., on
``StackedDiDResults`` / ``EfficientDiDResults`` /
``StaggeredTripleDiffResults`` / ``WooldridgeDiDResults`` /
``ChaisemartinDHaultfoeuilleResults`` fits where the power adapter
is not yet available.

Add a dedicated ``power_reason`` field alongside the existing
``power_status`` enum (additive, no breaking change) and update
``REPORTING.md`` to describe both fields.

P3 docs / tests (DR prose for survey PT variants). Round-28 added
the ``_survey`` suffix and ``df_denom`` to ``_pt_event_study``, and
BR's method-aware helpers were updated to recognize the variants.
``DiagnosticReport``'s own ``_pt_subject_phrase`` / ``_pt_stat_label``
prose helpers were not, so DR ``summary()`` / ``full_report()``
still rendered the generic "Pre-treatment data" subject on
survey-backed fits. Recognize ``joint_wald_survey`` and
``joint_wald_event_study_survey`` alongside the non-survey variants:
subject is the pre-period event-study coefficient vector, statistic
label is ``joint p`` (the F-reference correction is a different
reference distribution, not a different test).

Tests: 2 new regressions.

  * ``test_lift_pre_trends_exposes_power_reason`` under
    ``TestSurveyPTProsePropagation``: a fake DR block with a skipped
    power section surfaces both the enum status and the plain-English
    reason on the BR schema.
  * ``test_dr_prose_uses_event_study_subject_for_survey_pt`` under
    ``TestJointWaldAlignment``: DR's own subject / stat-label helpers
    return the event-study phrasing and ``joint p`` for both
    survey variants.

244 BR / DR / practitioner tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/business_report.py b/diff_diff/business_report.py
@@ -704,6 +704,15 @@ def _lift_pre_trends(dr: Optional[Dict[str, Any]]) -> Dict[str, Any]:
         # CI review on PR #318).
         "df_denom": pt.get("df_denom"),
         "power_status": pp.get("status"),
+        # Dedicated reason field so schema consumers see the fallback
+        # explanation when ``compute_pretrends_power`` cannot run
+        # (``status in {"skipped", "error", "not_applicable"}``).
+        # REPORTING.md lines 118-125 promise this provenance; round-29
+        # P3 CI review on PR #318 flagged that only the enum status was
+        # being exposed and the reason was dropped at the lift boundary.
+        # ``power_status`` stays the machine-readable enum; ``power_reason``
+        # carries the plain-English explanation.
+        "power_reason": pp.get("reason"),
         "power_tier": pp.get("tier"),
         "mdv": pp.get("mdv"),
         "mdv_share_of_att": pp.get("mdv_share_of_att"),
diff --git a/diff_diff/diagnostic_report.py b/diff_diff/diagnostic_report.py
@@ -2540,7 +2540,21 @@ def _pt_subject_phrase(method: Optional[str]) -> str:
         return "The pre-period slope-difference test"
     if method == "hausman":
         return "The Hausman PT-All vs PT-Post pretest"
-    if method in {"joint_wald", "joint_wald_event_study", "joint_wald_no_vcov", "bonferroni"}:
+    if method in {
+        "joint_wald",
+        "joint_wald_event_study",
+        "joint_wald_no_vcov",
+        "bonferroni",
+        # Survey-aware event-study PT variants use an F(k, df_survey)
+        # reference rather than chi-square(k); the subject is still the
+        # pre-period event-study coefficient vector — only the
+        # reference distribution changes (round-28 / round-29 CI
+        # review on PR #318). Recognizing the ``_survey`` suffix here
+        # lets DR prose match the BR prose and the REPORTING.md
+        # contract.
+        "joint_wald_survey",
+        "joint_wald_event_study_survey",
+    }:
         return "Pre-treatment event-study coefficients"
     if method == "synthetic_fit":
         return "The synthetic-control pre-treatment fit"
@@ -2555,9 +2569,19 @@ def _pt_stat_label(method: Optional[str]) -> Optional[str]:
     Wald / Bonferroni paths take a joint p-value (``joint p``); the 2x2
     slope-difference and Hausman paths are single-statistic tests
     (``p``). Design-enforced paths return ``None`` so the sentence
-    omits a statistic.
+    omits a statistic. Survey F-reference variants remain joint tests
+    on the pre-period coefficient vector and keep the ``joint p``
+    label — the correction is a different reference distribution, not
+    a different test.
     """
-    if method in {"joint_wald", "joint_wald_event_study", "joint_wald_no_vcov", "bonferroni"}:
+    if method in {
+        "joint_wald",
+        "joint_wald_event_study",
+        "joint_wald_no_vcov",
+        "bonferroni",
+        "joint_wald_survey",
+        "joint_wald_event_study_survey",
+    }:
         return "joint p"
     if method in {"slope_difference", "hausman"}:
         return "p"
diff --git a/docs/methodology/REPORTING.md b/docs/methodology/REPORTING.md
@@ -120,7 +120,10 @@ not new inference.
   `ChaisemartinDHaultfoeuilleResults`) do not yet have a power
   adapter and therefore render the `no_detected_violation` tier as
   `underpowered` with the fallback reason recorded in
-  `schema["pre_trends"]["power_status"]`. BusinessReport then reads
+  `schema["pre_trends"]["power_reason"]` (plain-English explanation)
+  while `schema["pre_trends"]["power_status"]` carries the
+  machine-readable enum (`"ran"` / `"skipped"` / `"error"` /
+  `"not_applicable"`). BusinessReport then reads
   `mdv_share_of_att = mdv / abs(att)` and selects a tier:
 
   - `< 0.25` &rarr; `well_powered` &mdash; "the test has 80% power to
diff --git a/tests/test_business_report.py b/tests/test_business_report.py
@@ -2506,6 +2506,42 @@ def test_lift_pre_trends_preserves_df_denom(self):
         assert lifted["method"] == "joint_wald_event_study_survey"
         assert lifted["df_denom"] == 30.0
 
+    def test_lift_pre_trends_exposes_power_reason(self):
+        """Round-29 P3 regression: when ``compute_pretrends_power`` cannot
+        run, REPORTING.md lines 118-125 promise the fallback reason is
+        recorded in the BR pre-trends block. Previously only the enum
+        status surfaced and the reason was dropped at the lift
+        boundary; the new ``power_reason`` field carries the
+        plain-English explanation alongside the existing enum
+        ``power_status``.
+        """
+        from diff_diff.business_report import _lift_pre_trends
+
+        fake_dr = {
+            "parallel_trends": {
+                "status": "ran",
+                "method": "joint_wald_event_study",
+                "joint_p_value": 0.35,
+                "n_pre_periods": 3,
+                "verdict": "no_detected_violation",
+            },
+            "pretrends_power": {
+                "status": "not_applicable",
+                "reason": (
+                    "StackedDiDResults does not yet have a "
+                    "compute_pretrends_power adapter."
+                ),
+            },
+        }
+        lifted = _lift_pre_trends(fake_dr)
+        # Machine-readable status preserved.
+        assert lifted["power_status"] == "not_applicable"
+        # Plain-English reason now exposed on the schema.
+        assert lifted["power_reason"] == (
+            "StackedDiDResults does not yet have a "
+            "compute_pretrends_power adapter."
+        )
+
     def test_survey_pt_method_stat_label_uses_joint_p(self):
         from diff_diff.business_report import (
             _pt_method_stat_label,
diff --git a/tests/test_diagnostic_report.py b/tests/test_diagnostic_report.py
@@ -887,6 +887,35 @@ def test_precomputed_survey_pt_replay_preserves_df_denom(self, cs_fit):
         assert pt["df_denom"] == 20.0
         assert pt["df"] == 3
 
+    def test_dr_prose_uses_event_study_subject_for_survey_pt(self):
+        """Round-29 P3 regression: DR's own ``_pt_subject_phrase`` /
+        ``_pt_stat_label`` helpers previously didn't recognize the
+        ``_survey`` variants, so summary / full_report prose fell
+        through to the generic "Pre-treatment data" wording — BR's
+        helpers were fixed last round but DR's were not. The survey
+        variants must render with the event-study subject and the
+        ``joint p`` label; the F-reference correction is a different
+        reference distribution, not a different test.
+        """
+        from diff_diff.diagnostic_report import (
+            _pt_stat_label,
+            _pt_subject_phrase,
+        )
+
+        for method in (
+            "joint_wald_survey",
+            "joint_wald_event_study_survey",
+        ):
+            assert (
+                _pt_subject_phrase(method)
+                == "Pre-treatment event-study coefficients"
+            ), (
+                f"DR subject for {method!r} must match the non-survey "
+                f"event-study phrasing; got "
+                f"{_pt_subject_phrase(method)!r}"
+            )
+            assert _pt_stat_label(method) == "joint p"
+
     def test_joint_wald_ignores_non_finite_survey_df(self):
         """If ``df_survey`` is NaN / inf / non-positive, fall back to
         chi-square (no finite-sample correction available).