Skip to content

Commit 7df7dc2

Browse files
igerberclaude
andcommitted
Address PR #401 R0 review (1 P1 + 1 P2 + 1 P3)
P1 (blocker): the convenience helper `chaisemartin_dhaultfoeuille()` hard-coded the constructor kwarg allowlist without `paths_of_interest`, so the new kwarg fell through to `fit()` and raised TypeError. Replace the static set with a signature-derived split via `inspect.signature(ChaisemartinDHaultfoeuille.__init__)` so future constructor params cannot drift out of sync. Add helper-level regression `test_convenience_function_routes_paths_of_interest_to_init` parallel to the existing `test_convenience_function_matches_class`. P2: under `paths_of_interest`, `frequency_rank` was assigned from `enumerate(selected_paths)`, which produces user-selection order rather than true frequency rank. Decouple the iteration order from the rank field: keep `selected_paths` iteration in user order (insertion order preserved on `path_effects.keys()`), but compute `frequency_rank` as the within-selected-paths rank by descending group count (lex tiebreak on the path tuple). Under `by_path=k`, `selected_paths` is already sorted by descending frequency so the two coincide; under `paths_of_interest`, frequency_rank now reflects true observed-count rank regardless of user order. Add regression `test_paths_of_interest_frequency_rank_is_true_frequency`. P3: docs/source-docstring binary-only language on per-path disaggregation cleaned up: - class summary at chaisemartin_dhaultfoeuille.py:367-370 (drop "binary treatment", add `paths_of_interest` mention) - docs/api/chaisemartin_dhaultfoeuille.rst:14-22 (add `paths_of_interest` and explicit "binary or integer-coded discrete" language) - diff_diff/chaisemartin_dhaultfoeuille_results.py:394-452 (`path_effects`, `path_placebo_event_study`, `path_cumulated_event_study`, `path_sup_t_bands` activator clauses now read "by_path is a positive int OR paths_of_interest is set") - docs/methodology/REGISTRY.md:671 checklist line (add "or integer-coded discrete" + `paths_of_interest`) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 1d64724 commit 7df7dc2

5 files changed

Lines changed: 119 additions & 33 deletions

File tree

diff_diff/chaisemartin_dhaultfoeuille.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,10 @@ class ChaisemartinDHaultfoeuille(ChaisemartinDHaultfoeuilleBootstrapMixin):
366366
HonestDiD sensitivity integration on placebos via ``honest_did=True``
367367
- Per-path event-study disaggregation via ``by_path=k`` (top-k most
368368
common observed treatment paths within the window
369-
``[F_g-1, F_g-1+L_max]``; requires ``drop_larger_lower=False`` and
370-
binary treatment)
369+
``[F_g-1, F_g-1+L_max]``; requires ``drop_larger_lower=False``;
370+
supports binary or integer-coded discrete treatment) or via
371+
``paths_of_interest=[(...), ...]`` for an explicit user-specified
372+
path subset (Python-only API; mutex with ``by_path=k``)
371373
- Survey support via ``survey_design=``: pweight with strata/PSU/FPC
372374
via Taylor Series Linearization (analytical) or replicate-weight
373375
variance (BRR/Fay/JK1/JKn/SDR)
@@ -5842,7 +5844,20 @@ def _compute_path_effects(
58425844

58435845
path_effects: Dict[Tuple[int, ...], Dict[str, Any]] = {}
58445846

5845-
for rank, path in enumerate(selected_paths, start=1):
5847+
# `frequency_rank` is the within-selected-paths rank by descending
5848+
# group count (lex tiebreak on the path tuple). Decoupled from the
5849+
# iteration order over `selected_paths` so that under
5850+
# `paths_of_interest` (user-specified order) the rank still
5851+
# reflects true frequency. Under `by_path=k`, `selected_paths` is
5852+
# already sorted by descending frequency so ranks coincide with
5853+
# iteration order.
5854+
rank_sorted_paths = sorted(
5855+
selected_paths,
5856+
key=lambda p: (-path_to_count[p], p),
5857+
)
5858+
path_to_freq_rank = {p: i + 1 for i, p in enumerate(rank_sorted_paths)}
5859+
5860+
for path in selected_paths:
58465861
switcher_mask = path_to_group_mask[path]
58475862
n_path_groups = int(switcher_mask.sum())
58485863

@@ -5931,7 +5946,7 @@ def _compute_path_effects(
59315946

59325947
path_effects[path] = {
59335948
"n_groups": n_path_groups,
5934-
"frequency_rank": rank,
5949+
"frequency_rank": path_to_freq_rank[path],
59355950
"horizons": horizons,
59365951
}
59375952

@@ -8136,17 +8151,12 @@ def chaisemartin_dhaultfoeuille(
81368151
-------
81378152
ChaisemartinDHaultfoeuilleResults
81388153
"""
8154+
import inspect
8155+
81398156
init_keys = {
8140-
"alpha",
8141-
"cluster",
8142-
"n_bootstrap",
8143-
"bootstrap_weights",
8144-
"seed",
8145-
"placebo",
8146-
"twfe_diagnostic",
8147-
"drop_larger_lower",
8148-
"by_path",
8149-
"rank_deficient_action",
8157+
name
8158+
for name, p in inspect.signature(ChaisemartinDHaultfoeuille.__init__).parameters.items()
8159+
if p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD) and name != "self"
81508160
}
81518161
init_kwargs = {k: v for k, v in kwargs.items() if k in init_keys}
81528162
fit_kwargs = {k: v for k, v in kwargs.items() if k not in init_keys}

diff_diff/chaisemartin_dhaultfoeuille_results.py

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -394,10 +394,15 @@ class ChaisemartinDHaultfoeuilleResults:
394394
path_effects : dict, optional
395395
Per-path event-study effects keyed by observed treatment
396396
trajectory (tuple of int). Populated when ``by_path`` is a
397-
positive int at estimator construction. Each entry holds
397+
positive int OR ``paths_of_interest`` is a list of int tuples
398+
at estimator construction. Each entry holds
398399
``{"n_groups": int, "frequency_rank": int,
399400
"horizons": {l: {"effect", "se", "t_stat", "p_value",
400-
"conf_int", "n_obs"}}}`` for ``l = 1..L_max``.
401+
"conf_int", "n_obs"}}}`` for ``l = 1..L_max``. Under
402+
``paths_of_interest``, dict-insertion order matches the user-
403+
specified path order; ``frequency_rank`` is the within-
404+
selected-paths rank by descending observed-group count
405+
(decoupled from iteration order).
401406
path_placebo_event_study : dict, optional
402407
Per-path backward-horizon placebos ``DID^{pl}_{path, l}`` for
403408
``l = 1..L_max``, keyed by observed treatment trajectory (tuple
@@ -407,11 +412,12 @@ class ChaisemartinDHaultfoeuilleResults:
407412
**path_placebo_event_study[p]}`` view is well-formed across
408413
forward and backward horizons. Each inner entry holds
409414
``{"effect", "se", "t_stat", "p_value", "conf_int", "n_obs"}``.
410-
Populated when ``by_path`` is a positive int AND
411-
``placebo=True`` AND ``L_max >= 1``. Empty-state contract
412-
mirrors ``path_effects``: ``None`` when ``by_path + placebo``
413-
was not requested; ``{}`` when requested but no observed path
414-
has a complete window ``[F_g-1, F_g-1+L_max]`` within the
415+
Populated when (``by_path`` is a positive int OR
416+
``paths_of_interest`` is set) AND ``placebo=True`` AND
417+
``L_max >= 1``. Empty-state contract mirrors ``path_effects``:
418+
``None`` when ``by_path / paths_of_interest + placebo`` was
419+
not requested; ``{}`` when requested but no observed path has
420+
a complete window ``[F_g-1, F_g-1+L_max]`` within the
415421
panel (the same regime where ``path_effects`` returns ``{}``,
416422
with the same ``UserWarning`` at fit-time). Downstream callers
417423
should distinguish the two states. Inherits the cross-path
@@ -424,9 +430,9 @@ class ChaisemartinDHaultfoeuilleResults:
424430
keyed by observed treatment trajectory (tuple of int). Inner
425431
dict is keyed by horizon directly (no ``"horizons"`` wrapper);
426432
each entry holds ``{"effect", "se", "t_stat", "p_value",
427-
"conf_int", "n_obs"}``. Populated when ``by_path`` is a
428-
positive int AND ``trends_linear=True`` AND ``L_max >= 1``;
429-
``None`` otherwise. Mirrors the global ``linear_trends_effects``
433+
"conf_int", "n_obs"}``. Populated when (``by_path`` is a
434+
positive int OR ``paths_of_interest`` is set) AND
435+
``trends_linear=True`` AND ``L_max >= 1``; ``None`` otherwise. Mirrors the global ``linear_trends_effects``
430436
cumulation: SE on the cumulated layer is the conservative
431437
upper bound (sum of per-horizon component SEs from
432438
``path_effects[path]["horizons"][l]["se"]``, NaN-consistent).
@@ -443,7 +449,8 @@ class ChaisemartinDHaultfoeuilleResults:
443449
observed treatment trajectory (tuple of int). Each entry holds
444450
``{"crit_value": float, "alpha": float, "n_bootstrap": int,
445451
"method": str, "n_valid_horizons": int}``. Populated when
446-
``by_path`` is a positive int AND ``n_bootstrap > 0``. The
452+
(``by_path`` is a positive int OR ``paths_of_interest`` is
453+
set) AND ``n_bootstrap > 0``. The
447454
band itself is applied per-horizon as ``cband_conf_int`` on
448455
``path_effects[path]["horizons"][l]`` and rendered as
449456
``cband_lower`` / ``cband_upper`` columns on
@@ -585,9 +592,9 @@ class ChaisemartinDHaultfoeuilleResults:
585592
# conservative upper bound (sum of per-horizon component SEs,
586593
# NaN-consistent), matching the global `linear_trends_effects`
587594
# convention.
588-
path_cumulated_event_study: Optional[
589-
Dict[Tuple[int, ...], Dict[int, Dict[str, Any]]]
590-
] = field(default=None, repr=False)
595+
path_cumulated_event_study: Optional[Dict[Tuple[int, ...], Dict[int, Dict[str, Any]]]] = field(
596+
default=None, repr=False
597+
)
591598
# Per-path joint sup-t simultaneous-band metadata. Keyed by path
592599
# tuple; each entry holds `{"crit_value", "alpha", "n_bootstrap",
593600
# "method", "n_valid_horizons"}`. Populated when `by_path` is a
@@ -1337,9 +1344,7 @@ def _render_path_effects_section(
13371344
):
13381345
cum_horizons = self.path_cumulated_event_study[path]
13391346
if cum_horizons:
1340-
lines.append(
1341-
" Cumulated Level Effects (DID^{fd}, trends_linear):"
1342-
)
1347+
lines.append(" Cumulated Level Effects (DID^{fd}, trends_linear):")
13431348
for l_h in sorted(cum_horizons.keys()):
13441349
ce = cum_horizons[l_h]
13451350
lines.append(

docs/api/chaisemartin_dhaultfoeuille.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ integration on placebos; survey support via Taylor-series linearization
1919
``by_path=k`` (mirrors R ``did_multiplegt_dyn(..., by_path=k)``,
2020
including per-path backward placebos and per-path joint sup-t
2121
simultaneous bands when ``n_bootstrap > 0`` — Python-only extension
22-
beyond R, which provides no joint bands at any surface).
22+
beyond R, which provides no joint bands at any surface) or via
23+
``paths_of_interest=[(...), ...]`` for an explicit user-specified
24+
path subset (Python-only API; mutex with ``by_path``). ``by_path``
25+
supports binary or integer-coded discrete (D in Z) treatment.
2326

2427
The estimator:
2528

docs/methodology/REGISTRY.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ The guard is fired by `_survey_se_from_group_if` (analytical and replicate) and
668668
- [x] State-set-specific trends via control-pool restriction (Web Appendix Section 1.4)
669669
- [x] Heterogeneity testing via saturated OLS (Web Appendix Section 1.5, Lemma 7)
670670
- [x] Design-2 switch-in/switch-out descriptive wrapper (Web Appendix Section 1.6)
671-
- [x] `by_path` per-path event-study disaggregation (binary treatment, joiners/leavers IF precedent; mirrors R `did_multiplegt_dyn(..., by_path=k)`)
671+
- [x] `by_path` per-path event-study disaggregation (binary or integer-coded discrete treatment, joiners/leavers IF precedent; mirrors R `did_multiplegt_dyn(..., by_path=k)`); plus `paths_of_interest=[(...), ...]` for user-specified path subsets (Python-only API; mutex with `by_path`)
672672
- [x] HonestDiD (Rambachan-Roth 2023) integration on placebo + event study surface
673673
- [x] Survey design support: pweight with strata/PSU/FPC via Taylor Series Linearization (analytical) **or replicate-weight variance (BRR/Fay/JK1/JKn/SDR)**, covering the main ATT surface, covariate adjustment (DID^X), heterogeneity testing, the TWFE diagnostic (fit and standalone `twowayfeweights()` helper), and HonestDiD bounds. Opt-in **PSU-level Hall-Mammen wild bootstrap** is also supported via `n_bootstrap > 0`.
674674
- **Note (Survey IF expansion — library convention):** Survey IF expansion is a library extension not in the dCDH papers (the paper's plug-in variance assumes iid sampling). The library convention builds observation-level `psi_i` by proportionally distributing per-group IF mass within weight share: either at the group level (`psi_i = U_centered[g] * w_i / W_g`, the previous convention) or at the per-`(g, t)` cell level via the cell-period allocator shipped in this release. Cell-level expansion: decompose `U[g]` into per-period attributions `U[g, t]`, cohort-center each column independently, then expand to observation level as `psi_i = U_centered_per_period[g_i, t_i] * (w_i / W_{g_i, t_i})`. Binder (1983) stratified-PSU variance aggregates the resulting `psi` at PSU level. **Post-period attribution convention:** each transition term in the IF sum (of the form `role_weight * (Y_{g, t} - Y_{g, t-1})` for DID_M or `S_g * (Y_{g, out} - Y_{g, ref})` for DID_l) is attributed as a single *difference* to the POST-period cell, not split into a `+Y_post` / `-Y_pre` pair across two cells. This is a library *convention*, not a theorem — adopted because it preserves the group-sum, PSU-sum, and cohort-sum identities of the previous group-level expansion (so Binder variance coincides with the group-level variance under the auto-injected `psu=group`) and because Monte Carlo coverage at nominal 95% is empirically close to nominal on a DGP where PSUs vary across the cells of each group (see `tests/test_dcdh_cell_period_coverage.py`). A covariance-aware two-cell allocator is a plausible alternative and may be worth exploring if future designs motivate an explicit observation-level IF derivation; the method currently in the library is **not derived from the observation-level survey linearization of the contrast** and makes no stronger claim than "coverage is approximately nominal under the tested DGPs and the group-sum identity holds exactly." Under within-group-constant PSU (the pre-allocator accepted input), per-cell sums telescope to `U_centered[g]` and Binder variance is byte-identical (up to single-ULP floating-point noise) to the previous group-level expansion. **Strata and PSU must be constant within each `(g, t)` cell** (trivially satisfied in one-obs-per-cell panels — the canonical dCDH structure); variation **across cells of a group** is supported by the allocator. Within-group-varying **weights** are supported as before. When `survey_design.psu` is not specified, `fit()` auto-injects `psu=<group column>` so the TSL variance, `df_survey`, and t-based inference match the per-group PSU structure. **Strata that vary across cells of a group require either an explicit `psu=<col>` or the original `SurveyDesign(..., nest=True)` flag** — under `nest=True` the resolver combines `(stratum, psu)` into globally-unique labels, so the auto-injected `psu=<group>` is re-labeled per stratum and the cell allocator proceeds. Only the `nest=False` + varying-strata + omitted-psu combination is rejected up front with a targeted `ValueError` at `fit()` time (the synthesized PSU column would reuse group labels across strata and trip the cross-stratum PSU uniqueness check in `SurveyDesign.resolve()`). Under replicate-weight designs, the same cell-level `psi_i` is aggregated via Rao-Wu weight-ratio rescaling (`compute_replicate_if_variance` at `diff_diff/survey.py:1681`) rather than the Binder TSL formula. All five methods (BRR/Fay/JK1/JKn/SDR) are supported method-agnostically through the unified helper; the effective `df_survey` is reduced to `min(n_valid) - 1` across IF sites when some replicate solves fail (matching `efficient_did.py:1133-1135` and `triple_diff.py:676-686` precedents). Under DID^X, the first-stage residualization coefficient `theta_hat` is computed once on full-sample weights and treated as fixed (FWL plug-in IF convention) — per-replicate refits of `theta_hat` are not performed. **Post-period attribution extends to heterogeneity (Binder TSL branch only):** the heterogeneity WLS coefficient IF `ψ_g = inv(X'WX)[1,:] @ x_g * W_g * r_g` is attributed in full to the single post-period cell `(g, out_idx)` at each horizon (same single-cell convention as DID_l), then expanded as `ψ_i = ψ_g * (w_i / W_{g, out_idx})`, and fed through `compute_survey_if_variance`. Under PSU=group the PSU-level aggregate telescopes to `ψ_g`, so Binder variance is byte-identical relative to the pre-cell-period release; under within-group-varying PSU mass lands in the post-period PSU. **Replicate-weight branch keeps the legacy group-level allocator** `ψ_i = ψ_g * (w_i / W_g)` because `compute_replicate_if_variance` computes `θ_r = sum_i ratio_ir * ψ_i` at observation level and is therefore not PSU-telescoping: redistributing mass onto the post-period cell would silently change the replicate SE whenever a replicate column's ratios vary within a group (the library accepts arbitrary per-row replicate matrices, not just PSU-aligned ones). The legacy allocator preserves byte-identity of the replicate SE for every previously-supported fit. Replicate + within-group-varying PSU is unreachable by construction (`SurveyDesign` rejects `replicate_weights` combined with explicit `strata/psu/fpc`).

tests/test_chaisemartin_dhaultfoeuille.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,48 @@ def test_convenience_function_matches_class(self):
222222
assert results_class.overall_att == pytest.approx(results_fn.overall_att)
223223
assert results_class.overall_se == pytest.approx(results_fn.overall_se)
224224

225+
def test_convenience_function_routes_paths_of_interest_to_init(self):
226+
"""`paths_of_interest` is an __init__ kwarg; the convenience helper
227+
must split it out of `**kwargs` rather than letting it fall through
228+
to fit() (which would raise TypeError). Regression for the
229+
signature-derived split."""
230+
df = _by_path_three_path_data()
231+
results_class = ChaisemartinDHaultfoeuille(
232+
drop_larger_lower=False,
233+
paths_of_interest=[(0, 1, 1, 1), (0, 1, 0, 0)],
234+
twfe_diagnostic=False,
235+
seed=42,
236+
)
237+
with warnings.catch_warnings():
238+
warnings.simplefilter("ignore", UserWarning)
239+
r_class = results_class.fit(
240+
df,
241+
outcome="outcome",
242+
group="group",
243+
time="period",
244+
treatment="treatment",
245+
L_max=3,
246+
)
247+
r_fn = chaisemartin_dhaultfoeuille(
248+
df,
249+
outcome="outcome",
250+
group="group",
251+
time="period",
252+
treatment="treatment",
253+
drop_larger_lower=False,
254+
paths_of_interest=[(0, 1, 1, 1), (0, 1, 0, 0)],
255+
twfe_diagnostic=False,
256+
seed=42,
257+
L_max=3,
258+
)
259+
# Both surfaces produce identical per-path effects.
260+
assert list(r_fn.path_effects.keys()) == list(r_class.path_effects.keys())
261+
for path in r_fn.path_effects:
262+
for l_h, vals in r_fn.path_effects[path]["horizons"].items():
263+
assert vals["effect"] == pytest.approx(
264+
r_class.path_effects[path]["horizons"][l_h]["effect"]
265+
)
266+
225267
def test_minimal_computation_path(self):
226268
# Disable everything optional; verify still works
227269
data = generate_reversible_did_data(n_groups=30, n_periods=4, seed=1)
@@ -8606,6 +8648,32 @@ def test_paths_of_interest_preserves_user_order(self):
86068648
# Insertion order preserved.
86078649
assert list(res.path_effects.keys()) == user_order
86088650

8651+
def test_paths_of_interest_frequency_rank_is_true_frequency(self):
8652+
"""`frequency_rank` must reflect descending count, NOT user-list
8653+
order. Regression for the R0 P2 finding: previously the rank
8654+
field was assigned from `enumerate(selected_paths)` which gave
8655+
user-selection order under `paths_of_interest`."""
8656+
df = _by_path_three_path_data()
8657+
# _by_path_three_path_data: (0,1,1,1) has 3 groups, (0,1,0,0) has 2,
8658+
# (0,1,1,0) has 1. User passes the lowest-frequency path first.
8659+
user_order = [(0, 1, 0, 0), (0, 1, 1, 1)]
8660+
est = ChaisemartinDHaultfoeuille(
8661+
drop_larger_lower=False,
8662+
paths_of_interest=user_order,
8663+
twfe_diagnostic=False,
8664+
seed=42,
8665+
)
8666+
with warnings.catch_warnings():
8667+
warnings.simplefilter("ignore", UserWarning)
8668+
res = est.fit(
8669+
df, outcome="outcome", group="group", time="period",
8670+
treatment="treatment", L_max=3,
8671+
)
8672+
# (0,1,1,1) has higher frequency → rank 1
8673+
# (0,1,0,0) has lower frequency → rank 2
8674+
assert res.path_effects[(0, 1, 1, 1)]["frequency_rank"] == 1
8675+
assert res.path_effects[(0, 1, 0, 0)]["frequency_rank"] == 2
8676+
86098677
def test_unobserved_path_warns_and_omits(self):
86108678
df = _by_path_three_path_data()
86118679
est = ChaisemartinDHaultfoeuille(

0 commit comments

Comments
 (0)