Skip to content

Commit 5a50834

Browse files
committed
PR #454 R4 polish: replicate-weight skip path + wrapper docstring mirror
R4 verdict was Looks good with two P3 polish items (plus the standing R-parity-goldens follow-up, which is tracked and non-blocking). Both addressed: 1. Replicate-weight skip path (P3 Code Quality): the R1 skip-vs-error fix on `DiagnosticReport._check_bacon` only handled the within-unit- varying-survey ValueError case. Replicate-weight survey designs raise NotImplementedError from `BaconDecomposition.fit()` and were still falling through to the generic exception handler as `status="error"`. Added an `except NotImplementedError` branch that returns a structured skip with a reason naming the supported alternative (TSL-based design via SurveyDesign + precomputed= escape hatch). New regression at `TestBaconSurveyDesignNarrowing::test_diagnostic_report_skips_with_structured_reason_on_replicate_weights`. 2. Wrapper docstring mirror (P3 Documentation/Tests): the public `bacon_decompose()` and `TwoWayFixedEffects.decompose()` wrappers had short `first_treat` parameter descriptions that mentioned only the 0/np.inf never-treated convention, omitting the new always-treated remap + sentinel reservation contract that lives in `BaconDecomposition.fit()`. Mirrored the expanded contract into both wrappers (with a pointer to the full docstring on the class) so users reading wrapper help see the behavior change. Tests: 62 pass in test_methodology_bacon.py + test_bacon.py (one new regression), 3 skipped (R parity); 98 pass across the broader bacon/decompose surface.
1 parent d35fe91 commit 5a50834

4 files changed

Lines changed: 90 additions & 3 deletions

File tree

diff_diff/bacon.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,8 +1250,19 @@ def bacon_decompose(
12501250
time : str
12511251
Name of time period column.
12521252
first_treat : str
1253-
Name of column indicating when unit was first treated.
1254-
Use 0 (or np.inf) for never-treated units.
1253+
Name of column indicating when unit was first treated. The
1254+
values ``0`` and ``np.inf`` are **reserved as never-treated
1255+
sentinels**; a real treatment cohort with ``first_treat == 0``
1256+
would be folded into ``U`` and should be re-labeled to a
1257+
non-sentinel value before fitting. Units whose ``first_treat``
1258+
is at or before the first observable period
1259+
(``first_treat <= min(time)``, excluding the sentinels) are
1260+
automatically remapped to the ``U`` (untreated) bucket per
1261+
Goodman-Bacon (2021) footnote 11, with a ``UserWarning``. See
1262+
``BaconDecomposition.fit()`` for the full contract and
1263+
``BaconDecompositionResults.n_always_treated_remapped`` for the
1264+
count. The user's original ``first_treat`` column is preserved
1265+
unchanged.
12551266
weights : str, default="exact"
12561267
Weight calculation method:
12571268

diff_diff/diagnostic_report.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1774,6 +1774,31 @@ def _check_bacon(self) -> Dict[str, Any]:
17741774
"status": "error",
17751775
"reason": f"bacon_decompose raised {type(exc).__name__}: {exc}",
17761776
}
1777+
except NotImplementedError as exc:
1778+
# PR #454 R4 P3: ``BaconDecomposition.fit()`` raises
1779+
# ``NotImplementedError`` for replicate-weight survey designs
1780+
# (bacon.py rejects them because Bacon is a diagnostic that
1781+
# does not compute replicate-based variance). Match the
1782+
# within-unit-varying skip pattern above: emit a structured
1783+
# skip naming the ``precomputed={'bacon': ...}`` escape hatch
1784+
# so survey-backed reports with replicate-weight designs
1785+
# produce an actionable skip rather than an opaque
1786+
# ``status="error"`` block.
1787+
return {
1788+
"status": "skipped",
1789+
"reason": (
1790+
"Survey design uses replicate weights, which the "
1791+
"Bacon decomposition does not support (bacon is a "
1792+
"diagnostic and does not compute replicate-based "
1793+
"variance). To populate this section, run "
1794+
"``bacon_decompose(data, ..., "
1795+
"survey_design=SurveyDesign(weights=..., strata=..., "
1796+
"psu=..., fpc=...))`` with a TSL-based design and "
1797+
"pass via "
1798+
"``DiagnosticReport(..., precomputed={'bacon': result})``. "
1799+
f"Rejection detail: {exc}"
1800+
),
1801+
}
17771802
except Exception as exc: # noqa: BLE001
17781803
return {
17791804
"status": "error",

diff_diff/twfe.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,19 @@ def decompose(
702702
Name of time period column.
703703
first_treat : str
704704
Name of column indicating when each unit was first treated.
705-
Use 0 (or np.inf) for never-treated units.
705+
The values ``0`` and ``np.inf`` are **reserved as
706+
never-treated sentinels**; a real treatment cohort with
707+
``first_treat == 0`` would be folded into ``U`` and should
708+
be re-labeled to a non-sentinel value before fitting. Units
709+
whose ``first_treat`` is at or before the first observable
710+
period (``first_treat <= min(time)``, excluding the
711+
sentinels) are automatically remapped to the ``U``
712+
(untreated) bucket per Goodman-Bacon (2021) footnote 11
713+
with a ``UserWarning``. See ``BaconDecomposition.fit()`` for
714+
the full contract and
715+
``BaconDecompositionResults.n_always_treated_remapped`` for
716+
the count. The user's original ``first_treat`` column is
717+
preserved unchanged.
706718
weights : str, default="exact"
707719
Weight calculation method:
708720

tests/test_methodology_bacon.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,45 @@ def test_explicit_approximate_accepts_time_varying_weights(self) -> None:
10671067
assert abs(total - 1.0) < 0.01
10681068
assert np.isfinite(results.twfe_estimate)
10691069

1070+
def test_diagnostic_report_skips_with_structured_reason_on_replicate_weights(
1071+
self,
1072+
) -> None:
1073+
"""PR #454 R4 P3 regression: ``DiagnosticReport._check_bacon``
1074+
emits ``status="skipped"`` (not ``"error"``) when the survey
1075+
design uses replicate weights, which Bacon rejects with
1076+
``NotImplementedError`` upstream. The skip reason names the
1077+
``precomputed={'bacon': ...}`` escape hatch and points users at
1078+
a TSL-based survey design as the supported alternative.
1079+
"""
1080+
from diff_diff import DiagnosticReport, SurveyDesign
1081+
1082+
df, _ = self._time_varying_survey_panel()
1083+
df["rep_w1"] = 1.0
1084+
df["rep_w2"] = 1.0
1085+
sd_rep = SurveyDesign(
1086+
weights="w",
1087+
replicate_weights=["rep_w1", "rep_w2"],
1088+
replicate_method="BRR",
1089+
)
1090+
1091+
class _Stub:
1092+
pass
1093+
1094+
dr = DiagnosticReport(
1095+
_Stub(),
1096+
data=df,
1097+
outcome="y",
1098+
unit="unit",
1099+
time="time",
1100+
first_treat="first_treat",
1101+
survey_design=sd_rep,
1102+
)
1103+
block = dr._check_bacon()
1104+
assert block["status"] == "skipped"
1105+
reason = block["reason"]
1106+
assert "replicate weights" in reason
1107+
assert "precomputed" in reason
1108+
10701109
def test_diagnostic_report_skips_with_structured_reason_on_time_varying_survey(
10711110
self,
10721111
) -> None:

0 commit comments

Comments
 (0)