Skip to content

Commit dcfb4fe

Browse files
igerberclaude
andcommitted
Address forty-fifth round of CI review findings on PR #318
Round-45 landed one P1 methodology finding: ``BusinessReport`` emitted the Bacon "re-estimate with a heterogeneity-robust estimator (CS / SA / BJS / Gardner)" caveat on every fit whose Bacon block had ``forbidden_weight > 0.10``, including fits that were already produced by one of those robust estimators. Goodman-Bacon is explicitly a decomposition of TWFE weights (``bacon.py`` header, Goodman-Bacon 2021). On a displayed fit that is already heterogeneity-robust (CS / SA / BJS / Gardner / Wooldridge / EfficientDiD / Stacked / dCDH / TripleDifference / StaggeredTripleDiff / SDiD / TROP), a high forbidden-weight share is a statement about what TWFE WOULD have done on this rollout, not a claim that the displayed estimator needs replacement. DR partly preserved this in its prose with an "if not already in use" guard; BR dropped that distinction and rendered the stronger recommendation in stakeholder-facing caveats / full reports. Fix (``business_report.py`` ``_build_caveats``): - Introduce ``_TWFE_STYLE_RESULTS = {DiDResults, MultiPeriodDiDResults, TwoWayFixedEffectsResults}`` — the fits for which the switch-to- robust recommendation is load-bearing. - Keep the original message for TWFE-style fits. - Rephrase for already-robust fits: "TWFE benchmark would be materially biased on this rollout; the displayed estimator is already heterogeneity-robust, so this is a statement about the rollout design (avoid reporting TWFE alongside this fit), not about the current result's validity." Tests: 3 new regressions in ``TestBaconCaveatEstimatorAware``: - CS-like fit with high forbidden-weight does NOT recommend switching. - Spot-check the same rule across SA / Imputation / TwoStage / Stacked / Wooldridge / dCDH / EfficientDiD. - ``MultiPeriodDiDResults`` (TWFE event-study) DOES keep the switch recommendation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 1085e72 commit dcfb4fe

2 files changed

Lines changed: 203 additions & 9 deletions

File tree

diff_diff/business_report.py

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
import re
4040
from dataclasses import dataclass
41-
from typing import Any, Dict, List, Optional, Union
41+
from typing import Any, Dict, FrozenSet, List, Optional, Union
4242

4343
import numpy as np
4444

@@ -1521,23 +1521,56 @@ def _build_caveats(
15211521
)
15221522

15231523
# Bacon forbidden comparisons.
1524+
# Round-45 P1 CI review on PR #318: Goodman-Bacon is a
1525+
# decomposition of TWFE weights (see ``bacon.py`` header and
1526+
# Goodman-Bacon 2021). On fits already produced by a
1527+
# heterogeneity-robust estimator (CS / SA / BJS / Gardner /
1528+
# Wooldridge / EfficientDiD / Stacked / dCDH / TripleDifference /
1529+
# StaggeredTripleDiff / SDiD / TROP), a high forbidden-weight share
1530+
# says "TWFE would have been materially biased on this rollout",
1531+
# not "the displayed estimator needs to be replaced" — the
1532+
# displayed estimator is already robust to the heterogeneity that
1533+
# Bacon flags. DR partly preserves this with "if not already in
1534+
# use" prose; BR must carry the same distinction through to the
1535+
# caveat. The TWFE-style estimators whose results route through
1536+
# Bacon and for which the "switch to a robust estimator"
1537+
# recommendation is load-bearing are the DiDResults-type fits; all
1538+
# other result classes are already robust.
1539+
_TWFE_STYLE_RESULTS: FrozenSet[str] = frozenset(
1540+
{"DiDResults", "MultiPeriodDiDResults", "TwoWayFixedEffectsResults"}
1541+
)
15241542
if dr_schema:
15251543
bacon = dr_schema.get("bacon") or {}
15261544
if bacon.get("status") == "ran":
15271545
fw = bacon.get("forbidden_weight")
15281546
if isinstance(fw, (int, float)) and fw > 0.10:
1547+
_estimator_name = type(_results).__name__
1548+
if _estimator_name in _TWFE_STYLE_RESULTS:
1549+
bacon_message = (
1550+
f"Goodman-Bacon decomposition places {fw:.0%} "
1551+
"of implicit TWFE weight on 'forbidden' "
1552+
"later-vs-earlier comparisons. TWFE may be "
1553+
"materially biased under heterogeneous effects. "
1554+
"Re-estimate with a heterogeneity-robust "
1555+
"estimator (CS / SA / BJS / Gardner)."
1556+
)
1557+
else:
1558+
bacon_message = (
1559+
f"Goodman-Bacon decomposition places {fw:.0%} "
1560+
"of TWFE weight on 'forbidden' later-vs-earlier "
1561+
"comparisons. A TWFE benchmark on this rollout "
1562+
"would be materially biased under heterogeneous "
1563+
"effects; the displayed estimator is already "
1564+
"heterogeneity-robust, so this is a statement "
1565+
"about the rollout design (avoid reporting TWFE "
1566+
"alongside this fit), not about the current "
1567+
"result's validity."
1568+
)
15291569
caveats.append(
15301570
{
15311571
"severity": "warning",
15321572
"topic": "bacon_contamination",
1533-
"message": (
1534-
f"Goodman-Bacon decomposition places {fw:.0%} "
1535-
"of implicit TWFE weight on 'forbidden' "
1536-
"later-vs-earlier comparisons. TWFE may be "
1537-
"materially biased under heterogeneous effects. "
1538-
"Re-estimate with a heterogeneity-robust "
1539-
"estimator (CS / SA / BJS / Gardner)."
1540-
),
1573+
"message": bacon_message,
15411574
}
15421575
)
15431576

tests/test_business_report.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3975,6 +3975,167 @@ def test_br_rejects_both_passthrough_inputs_names_them(self):
39753975
assert "precomputed['sensitivity']" in msg
39763976

39773977

3978+
class TestBaconCaveatEstimatorAware:
3979+
"""Round-45 P1 CI review on PR #318: Goodman-Bacon decomposes TWFE
3980+
weights. On fits already produced by a heterogeneity-robust
3981+
estimator (CS / SA / BJS / Gardner / Wooldridge / EfficientDiD /
3982+
Stacked / dCDH / TripleDifference / StaggeredTripleDiff / SDiD /
3983+
TROP), a high forbidden-weight share says "TWFE would have been
3984+
materially biased on this rollout", not "the displayed estimator
3985+
needs to be replaced" — the displayed fit is already robust.
3986+
3987+
BR's caveat must be estimator-aware: keep the "switch to a robust
3988+
estimator" recommendation for TWFE-style fits only.
3989+
"""
3990+
3991+
@staticmethod
3992+
def _bacon_schema_with_high_forbidden_weight():
3993+
"""Build a fake ``DiagnosticReportResults`` whose schema carries
3994+
a Bacon block with ``forbidden_weight > 0.10`` so BR's caveat
3995+
builder fires on the Bacon branch."""
3996+
from diff_diff.diagnostic_report import DiagnosticReportResults
3997+
3998+
schema = {
3999+
"schema_version": "1.0",
4000+
"estimator": {"class_name": "Stub", "display_name": "Stub"},
4001+
"headline_metric": {},
4002+
"parallel_trends": {"status": "skipped", "reason": "stub"},
4003+
"pretrends_power": {"status": "skipped", "reason": "stub"},
4004+
"sensitivity": {"status": "skipped", "reason": "stub"},
4005+
"placebo": {"status": "skipped", "reason": "stub"},
4006+
"bacon": {
4007+
"status": "ran",
4008+
"twfe_estimate": 1.2,
4009+
"weight_by_type": {
4010+
"treated_vs_never": 0.5,
4011+
"earlier_vs_later": 0.1,
4012+
"later_vs_earlier": 0.4,
4013+
},
4014+
"forbidden_weight": 0.4, # > 0.10 threshold
4015+
"verdict": "materially_contaminated",
4016+
"n_timing_groups": 3,
4017+
},
4018+
"design_effect": {"status": "skipped", "reason": "stub"},
4019+
"heterogeneity": {"status": "skipped", "reason": "stub"},
4020+
"epv": {"status": "skipped", "reason": "stub"},
4021+
"estimator_native_diagnostics": {"status": "not_applicable"},
4022+
"skipped": {},
4023+
"warnings": [],
4024+
"overall_interpretation": "",
4025+
"next_steps": [],
4026+
}
4027+
return DiagnosticReportResults(
4028+
schema=schema,
4029+
interpretation="",
4030+
applicable_checks=("bacon",),
4031+
skipped_checks={},
4032+
warnings=(),
4033+
)
4034+
4035+
@staticmethod
4036+
def _make_cs_like_stub(class_name: str):
4037+
cls = type(class_name, (), {})
4038+
obj = cls()
4039+
obj.overall_att = 1.0
4040+
obj.overall_se = 0.2
4041+
obj.overall_p_value = 0.001
4042+
obj.overall_conf_int = (0.6, 1.4)
4043+
obj.alpha = 0.05
4044+
obj.n_obs = 500
4045+
obj.n_treated = 100
4046+
obj.n_control_units = 400
4047+
obj.survey_metadata = None
4048+
obj.event_study_effects = None
4049+
obj.inference_method = "analytical"
4050+
return obj
4051+
4052+
@staticmethod
4053+
def _make_twfe_style_stub():
4054+
class MultiPeriodDiDResults:
4055+
pass
4056+
4057+
obj = MultiPeriodDiDResults()
4058+
obj.avg_att = 1.0
4059+
obj.avg_se = 0.2
4060+
obj.avg_p_value = 0.001
4061+
obj.avg_conf_int = (0.6, 1.4)
4062+
obj.alpha = 0.05
4063+
obj.n_obs = 500
4064+
obj.n_treated = 100
4065+
obj.n_control = 400
4066+
obj.survey_metadata = None
4067+
obj.pre_period_effects = None
4068+
obj.inference_method = "analytical"
4069+
return obj
4070+
4071+
def test_cs_like_fit_does_not_recommend_switching_estimators(self):
4072+
"""On an already-robust CS-style fit with high forbidden
4073+
Bacon weight, BR must not recommend switching to a robust
4074+
estimator — the displayed fit IS already robust."""
4075+
stub = self._make_cs_like_stub("CallawaySantAnnaResults")
4076+
dr = self._bacon_schema_with_high_forbidden_weight()
4077+
br = BusinessReport(stub, diagnostics=dr)
4078+
caveats = br.caveats()
4079+
bacon_caveats = [c for c in caveats if c.get("topic") == "bacon_contamination"]
4080+
assert len(bacon_caveats) == 1, (
4081+
f"High forbidden-weight Bacon must surface a caveat. " f"Got caveats: {caveats!r}"
4082+
)
4083+
msg = bacon_caveats[0]["message"].lower()
4084+
# Must NOT tell the user to switch estimators.
4085+
assert "re-estimate with a heterogeneity-robust" not in msg, (
4086+
f"CS is already heterogeneity-robust; must not recommend "
4087+
f"switching. Got message: {msg!r}"
4088+
)
4089+
# Must frame this as a TWFE benchmark problem / rollout-design
4090+
# statement, not a displayed-fit-validity problem.
4091+
assert (
4092+
"heterogeneity-robust" in msg
4093+
or "already" in msg
4094+
or "twfe benchmark" in msg
4095+
or "rollout design" in msg
4096+
), f"CS Bacon caveat must reframe as rollout/TWFE issue. Got: {msg!r}"
4097+
# And the full-report rendering must reflect the softer wording.
4098+
md = br.full_report()
4099+
assert "Re-estimate with a heterogeneity-robust estimator" not in md, md
4100+
4101+
def test_other_robust_estimators_also_avoid_switch_recommendation(self):
4102+
"""Spot-check the same rule holds for multiple
4103+
heterogeneity-robust estimators on the Bacon path."""
4104+
for class_name in (
4105+
"SunAbrahamResults",
4106+
"ImputationDiDResults",
4107+
"TwoStageDiDResults",
4108+
"StackedDiDResults",
4109+
"WooldridgeDiDResults",
4110+
"ChaisemartinDHaultfoeuilleResults",
4111+
"EfficientDiDResults",
4112+
):
4113+
stub = self._make_cs_like_stub(class_name)
4114+
dr = self._bacon_schema_with_high_forbidden_weight()
4115+
br = BusinessReport(stub, diagnostics=dr)
4116+
msgs = [c["message"] for c in br.caveats() if c.get("topic") == "bacon_contamination"]
4117+
assert msgs, f"{class_name}: Bacon caveat must fire"
4118+
assert (
4119+
"Re-estimate with a heterogeneity-robust estimator" not in msgs[0]
4120+
), f"{class_name} is already robust; must not recommend switching. Got: {msgs[0]!r}"
4121+
4122+
def test_twfe_style_fit_keeps_switch_recommendation(self):
4123+
"""The switch-to-robust recommendation is load-bearing for
4124+
genuinely TWFE-style fits and must be preserved there."""
4125+
stub = self._make_twfe_style_stub()
4126+
dr = self._bacon_schema_with_high_forbidden_weight()
4127+
br = BusinessReport(stub, diagnostics=dr)
4128+
caveats = br.caveats()
4129+
bacon_caveats = [c for c in caveats if c.get("topic") == "bacon_contamination"]
4130+
assert len(bacon_caveats) == 1
4131+
msg = bacon_caveats[0]["message"]
4132+
# TWFE-style fit: keep the explicit switch recommendation.
4133+
assert "Re-estimate with a heterogeneity-robust estimator" in msg, (
4134+
f"MultiPeriodDiDResults (TWFE event-study) must keep the "
4135+
f"switch-to-robust recommendation. Got: {msg!r}"
4136+
)
4137+
4138+
39784139
class TestBusinessReportSurveyDesignPassthrough:
39794140
"""Round-40 P1 CI review on PR #318: ``BusinessReport`` must accept
39804141
``survey_design`` and forward it to the auto-constructed

0 commit comments

Comments
 (0)