Skip to content

Commit 8838303

Browse files
igerberclaude
andcommitted
Address thirty-seventh round of CI review findings on PR #318
P1 methodology (StaggeredTripleDiff never_treated surfaced composite total as control). ``_extract_sample`` swapped to ``n_never_enabled`` only inside the dynamic ``notyettreated`` branch; under ``control_group="never_treated"`` it left ``n_control = n_control_units``. But ``staggered_triple_diff.py:384`` and REGISTRY.md §StaggeredTriple- Difference (line 1730) define ``n_control_units`` as a composite total that also includes eligibility-denied / larger-cohort cells — the valid fixed comparison is only the never-enabled cohort. BR's schema / ``summary()`` / ``full_report()`` therefore misrepresented the comparison count on the ``nevertreated`` path. Added a dedicated branch that fires whenever the canonical control is ``nevertreated`` on ``StaggeredTripleDiffResults`` and surfaces ``n_never_enabled`` as the fixed comparison tally (``n_control`` set to None), regardless of whether the ``is_dynamic_control`` branch runs. P1 code quality (broken CI clause on undefined inference). ``_render_headline_sentence`` gated CI rendering on ``isinstance(lo, (int, float))`` — which accepts ``NaN`` because ``NaN`` is a float — so a fit with a finite point estimate but undefined CI endpoints (survey-df collapse, zero effective clusters, ...) rendered ``(... 95% CI: undefined to undefined)`` in both ``summary()`` and ``full_report()``. DR's own headline renderer already gated on ``np.isfinite`` (round-36 fix); BR now mirrors that: * both CI bounds finite -> usual ``(ci_level% CI: lo to hi)``; * at least one bound supplied but not finite -> explicit ``(inference unavailable: confidence interval is undefined for this fit)`` trailer; * bounds absent -> no trailer. Tests: 5 new regressions across two classes. * ``TestStaggeredTripleDiffNeverTreatedFixedComparison`` (2 tests): schema-level ``n_control is None`` / ``n_never_enabled == 300`` on the ``never_treated`` mode, plus a prose assertion that the composite 500 does not appear as "500 control" in ``summary()``; * ``TestBRHeadlineOmitsBrokenCIOnUndefinedInference`` (2 tests): NaN-CI stub renders ``summary()`` and ``full_report()`` without "undefined to undefined" / "CI: nan" fragments and with explicit "inference unavailable" language. 278 BR / DR / practitioner tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 6b251af commit 8838303

2 files changed

Lines changed: 145 additions & 1 deletion

File tree

diff_diff/business_report.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,20 @@ def _extract_sample(self) -> Dict[str, Any]:
568568
is_dynamic_control = (
569569
_canonical_control == "notyettreated" or is_stacked_dynamic
570570
)
571+
# StaggeredTripleDiff comparison-group contract:
572+
# ``n_control_units`` is a composite total that also includes
573+
# the eligibility-denied / larger-cohort cells. Regardless of
574+
# the ``control_group`` mode the valid fixed comparison is the
575+
# never-enabled cohort (``staggered_triple_diff.py:384``,
576+
# REGISTRY.md §StaggeredTripleDifference line 1730). Round-37
577+
# P1 CI review on PR #318: under ``control_group="never_treated"``
578+
# (i.e., ``_canonical_control == "nevertreated"``) the composite
579+
# total was being narrated as "control". Surface
580+
# ``n_never_enabled`` instead on both the ``nevertreated`` and
581+
# the dynamic ``notyettreated`` modes.
582+
if name == "StaggeredTripleDiffResults" and _canonical_control == "nevertreated":
583+
n_never_enabled = _safe_int(getattr(r, "n_never_enabled", None))
584+
n_control = None
571585
if is_dynamic_control:
572586
if name == "StaggeredTripleDiffResults":
573587
n_never_enabled = _safe_int(getattr(r, "n_never_enabled", None))
@@ -1661,11 +1675,31 @@ def _render_headline_sentence(schema: Dict[str, Any]) -> str:
16611675
magnitude = _format_value(abs(effect), unit, unit_kind)
16621676
lo = h.get("ci_lower")
16631677
hi = h.get("ci_upper")
1678+
# Round-37 P1 CI review on PR #318: on a finite point estimate
1679+
# whose CI bounds are NaN (undefined inference — survey-df
1680+
# collapse, zero effective clusters, etc.), the previous isinstance
1681+
# check passed because ``NaN`` is a ``float`` and the sentence
1682+
# rendered ``(... 95% CI: undefined to undefined)``. Gate on
1683+
# ``np.isfinite`` like DR's own headline renderer already does;
1684+
# add an explicit inference-unavailable trailer instead of the
1685+
# broken CI clause.
16641686
ci_str = ""
1665-
if isinstance(lo, (int, float)) and isinstance(hi, (int, float)):
1687+
ci_finite = (
1688+
isinstance(lo, (int, float))
1689+
and isinstance(hi, (int, float))
1690+
and np.isfinite(lo)
1691+
and np.isfinite(hi)
1692+
)
1693+
if ci_finite:
16661694
lo_s = _format_value(lo, unit, unit_kind)
16671695
hi_s = _format_value(hi, unit, unit_kind)
16681696
ci_str = f" ({h.get('ci_level', 95)}% CI: {lo_s} to {hi_s})"
1697+
elif isinstance(lo, (int, float)) or isinstance(hi, (int, float)):
1698+
# At least one bound was supplied but not finite -> inference
1699+
# undefined. Replace the CI clause with an explicit marker so
1700+
# downstream prose does not claim a confidence interval that
1701+
# is not actually available.
1702+
ci_str = " (inference unavailable: confidence interval is undefined for this fit)"
16691703
by_clause = f" by {magnitude}" if effect != 0 else ""
16701704
return f"{treatment.capitalize()} {verb} {outcome}{by_clause}{ci_str}."
16711705

tests/test_business_report.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,6 +1288,116 @@ def test_stacked_fit_persists_anticipation(self):
12881288
assert a["anticipation_periods"] == 1
12891289

12901290

1291+
class TestStaggeredTripleDiffNeverTreatedFixedComparison:
1292+
"""Round-37 P1 CI review on PR #318: ``StaggeredTripleDiffResults``
1293+
stores ``n_control_units`` as a composite total that also includes
1294+
the eligibility-denied cohorts. The valid fixed comparison under
1295+
``control_group="never_treated"`` is the never-enabled cohort
1296+
(``staggered_triple_diff.py:384``, REGISTRY.md §StaggeredTripleDifference
1297+
line 1730). BR was previously narrating the composite total as
1298+
"control" on the ``nevertreated`` mode; the fix surfaces
1299+
``n_never_enabled`` as the fixed comparison count on that path
1300+
too (the dynamic ``notyettreated`` path was already correct).
1301+
"""
1302+
1303+
@staticmethod
1304+
def _stub(control_group: str):
1305+
class StaggeredTripleDiffResults:
1306+
pass
1307+
1308+
stub = StaggeredTripleDiffResults()
1309+
stub.overall_att = 1.0
1310+
stub.overall_se = 0.2
1311+
stub.overall_p_value = 0.001
1312+
stub.overall_conf_int = (0.6, 1.4)
1313+
stub.alpha = 0.05
1314+
stub.n_obs = 800
1315+
stub.n_treated = 100
1316+
stub.n_control_units = 500 # composite total
1317+
stub.n_never_enabled = 300 # fixed never-enabled subset
1318+
stub.event_study_effects = None
1319+
stub.survey_metadata = None
1320+
stub.control_group = control_group
1321+
return stub
1322+
1323+
def test_never_treated_mode_surfaces_never_enabled_not_composite_total(self):
1324+
sample = BusinessReport(
1325+
self._stub("never_treated"), auto_diagnostics=False
1326+
).to_dict()["sample"]
1327+
# Composite total must not be surfaced as the fixed control
1328+
# count on the ``nevertreated`` path.
1329+
assert sample["n_control"] is None, (
1330+
f"n_control must not carry the composite n_control_units "
1331+
f"total on StaggeredTripleDiff(control_group='never_treated'); "
1332+
f"got sample={sample!r}"
1333+
)
1334+
assert sample["n_never_enabled"] == 300
1335+
1336+
def test_never_treated_mode_summary_does_not_narrate_composite_as_control(self):
1337+
summary = BusinessReport(
1338+
self._stub("never_treated"), auto_diagnostics=False
1339+
).summary()
1340+
# The composite total must not appear as "500 control" in prose.
1341+
import re
1342+
1343+
assert not re.search(r"\b500\s+control", summary), (
1344+
f"BR summary must not narrate the composite n_control_units "
1345+
f"total as 'control' on StaggeredTripleDiff(control_group="
1346+
f"'never_treated'); got: {summary!r}"
1347+
)
1348+
1349+
1350+
class TestBRHeadlineOmitsBrokenCIOnUndefinedInference:
1351+
"""Round-37 P1 CI review on PR #318: ``_extract_headline`` preserves
1352+
the fit's native CI even when it is undefined (e.g., survey-df
1353+
collapse produces finite ATT but NaN CI endpoints). The renderer
1354+
previously gated on ``isinstance(lo, (int, float))``, which accepts
1355+
``NaN`` (a float) and rendered ``95% CI: undefined to undefined``.
1356+
Gate on ``np.isfinite`` instead, and emit an explicit
1357+
"inference unavailable" trailer when at least one bound is
1358+
non-finite. DR's own headline renderer already handled this
1359+
correctly (round-36 fix).
1360+
"""
1361+
1362+
@staticmethod
1363+
def _stub_nan_ci():
1364+
class DiDResults:
1365+
pass
1366+
1367+
stub = DiDResults()
1368+
stub.att = 1.0
1369+
stub.se = float("nan")
1370+
stub.t_stat = float("nan")
1371+
stub.p_value = float("nan")
1372+
stub.conf_int = (float("nan"), float("nan"))
1373+
stub.alpha = 0.05
1374+
stub.n_obs = 200
1375+
stub.n_treated = 100
1376+
stub.n_control = 100
1377+
stub.survey_metadata = None
1378+
return stub
1379+
1380+
def test_summary_does_not_render_undefined_ci_interval(self):
1381+
summary = BusinessReport(
1382+
self._stub_nan_ci(), auto_diagnostics=False
1383+
).summary()
1384+
lower = summary.lower()
1385+
# Must not render the broken CI interval fragment.
1386+
assert "undefined to undefined" not in lower, summary
1387+
assert "95% ci: nan" not in lower
1388+
# Must explicitly flag that inference is unavailable.
1389+
assert "inference unavailable" in lower
1390+
1391+
def test_full_report_does_not_render_undefined_ci_interval(self):
1392+
md = BusinessReport(
1393+
self._stub_nan_ci(), auto_diagnostics=False
1394+
).full_report()
1395+
lower = md.lower()
1396+
assert "undefined to undefined" not in lower
1397+
assert "95% ci: nan" not in lower
1398+
assert "inference unavailable" in lower
1399+
1400+
12911401
class TestStackedCleanControlSurfacesInSampleBlock:
12921402
"""Pre-emptive audit regression: ``StackedDiD`` exposes its control-
12931403
group choice as ``clean_control`` (the public Wing-Freedman-

0 commit comments

Comments
 (0)