Address PR #401 R0 review (1 P1 + 1 P2 + 1 P3)

igerber · claude · igerber · commit 7df7dc258b4d · 2026-05-09T10:43:38.000-04:00
P1 (blocker): the convenience helper `chaisemartin_dhaultfoeuille()`
hard-coded the constructor kwarg allowlist without `paths_of_interest`,
so the new kwarg fell through to `fit()` and raised TypeError. Replace
the static set with a signature-derived split via
`inspect.signature(ChaisemartinDHaultfoeuille.__init__)` so future
constructor params cannot drift out of sync. Add helper-level
regression `test_convenience_function_routes_paths_of_interest_to_init`
parallel to the existing `test_convenience_function_matches_class`.

P2: under `paths_of_interest`, `frequency_rank` was assigned from
`enumerate(selected_paths)`, which produces user-selection order
rather than true frequency rank. Decouple the iteration order from
the rank field: keep `selected_paths` iteration in user order
(insertion order preserved on `path_effects.keys()`), but compute
`frequency_rank` as the within-selected-paths rank by descending
group count (lex tiebreak on the path tuple). Under `by_path=k`,
`selected_paths` is already sorted by descending frequency so the
two coincide; under `paths_of_interest`, frequency_rank now reflects
true observed-count rank regardless of user order. Add regression
`test_paths_of_interest_frequency_rank_is_true_frequency`.

P3: docs/source-docstring binary-only language on per-path
disaggregation cleaned up:
- class summary at chaisemartin_dhaultfoeuille.py:367-370 (drop
  "binary treatment", add `paths_of_interest` mention)
- docs/api/chaisemartin_dhaultfoeuille.rst:14-22 (add
  `paths_of_interest` and explicit "binary or integer-coded discrete"
  language)
- diff_diff/chaisemartin_dhaultfoeuille_results.py:394-452
  (`path_effects`, `path_placebo_event_study`,
  `path_cumulated_event_study`, `path_sup_t_bands` activator clauses
  now read "by_path is a positive int OR paths_of_interest is set")
- docs/methodology/REGISTRY.md:671 checklist line (add
  "or integer-coded discrete" + `paths_of_interest`)

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -366,8 +366,10 @@ class ChaisemartinDHaultfoeuille(ChaisemartinDHaultfoeuilleBootstrapMixin):
       HonestDiD sensitivity integration on placebos via ``honest_did=True``
     - Per-path event-study disaggregation via ``by_path=k`` (top-k most
       common observed treatment paths within the window
-      ``[F_g-1, F_g-1+L_max]``; requires ``drop_larger_lower=False`` and
-      binary treatment)
+      ``[F_g-1, F_g-1+L_max]``; requires ``drop_larger_lower=False``;
+      supports binary or integer-coded discrete treatment) or via
+      ``paths_of_interest=[(...), ...]`` for an explicit user-specified
+      path subset (Python-only API; mutex with ``by_path=k``)
     - Survey support via ``survey_design=``: pweight with strata/PSU/FPC
       via Taylor Series Linearization (analytical) or replicate-weight
       variance (BRR/Fay/JK1/JKn/SDR)
@@ -5842,7 +5844,20 @@ def _compute_path_effects(
 
     path_effects: Dict[Tuple[int, ...], Dict[str, Any]] = {}
 
-    for rank, path in enumerate(selected_paths, start=1):
+    # `frequency_rank` is the within-selected-paths rank by descending
+    # group count (lex tiebreak on the path tuple). Decoupled from the
+    # iteration order over `selected_paths` so that under
+    # `paths_of_interest` (user-specified order) the rank still
+    # reflects true frequency. Under `by_path=k`, `selected_paths` is
+    # already sorted by descending frequency so ranks coincide with
+    # iteration order.
+    rank_sorted_paths = sorted(
+        selected_paths,
+        key=lambda p: (-path_to_count[p], p),
+    )
+    path_to_freq_rank = {p: i + 1 for i, p in enumerate(rank_sorted_paths)}
+
+    for path in selected_paths:
         switcher_mask = path_to_group_mask[path]
         n_path_groups = int(switcher_mask.sum())
 
@@ -5931,7 +5946,7 @@ def _compute_path_effects(
 
         path_effects[path] = {
             "n_groups": n_path_groups,
-            "frequency_rank": rank,
+            "frequency_rank": path_to_freq_rank[path],
             "horizons": horizons,
         }
 
@@ -8136,17 +8151,12 @@ def chaisemartin_dhaultfoeuille(
     -------
     ChaisemartinDHaultfoeuilleResults
     """
+    import inspect
+
     init_keys = {
-        "alpha",
-        "cluster",
-        "n_bootstrap",
-        "bootstrap_weights",
-        "seed",
-        "placebo",
-        "twfe_diagnostic",
-        "drop_larger_lower",
-        "by_path",
-        "rank_deficient_action",
+        name
+        for name, p in inspect.signature(ChaisemartinDHaultfoeuille.__init__).parameters.items()
+        if p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD) and name != "self"
     }
     init_kwargs = {k: v for k, v in kwargs.items() if k in init_keys}
     fit_kwargs = {k: v for k, v in kwargs.items() if k not in init_keys}
diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py
@@ -394,10 +394,15 @@ class ChaisemartinDHaultfoeuilleResults:
     path_effects : dict, optional
         Per-path event-study effects keyed by observed treatment
         trajectory (tuple of int). Populated when ``by_path`` is a
-        positive int at estimator construction. Each entry holds
+        positive int OR ``paths_of_interest`` is a list of int tuples
+        at estimator construction. Each entry holds
         ``{"n_groups": int, "frequency_rank": int,
         "horizons": {l: {"effect", "se", "t_stat", "p_value",
-        "conf_int", "n_obs"}}}`` for ``l = 1..L_max``.
+        "conf_int", "n_obs"}}}`` for ``l = 1..L_max``. Under
+        ``paths_of_interest``, dict-insertion order matches the user-
+        specified path order; ``frequency_rank`` is the within-
+        selected-paths rank by descending observed-group count
+        (decoupled from iteration order).
     path_placebo_event_study : dict, optional
         Per-path backward-horizon placebos ``DID^{pl}_{path, l}`` for
         ``l = 1..L_max``, keyed by observed treatment trajectory (tuple
@@ -407,11 +412,12 @@ class ChaisemartinDHaultfoeuilleResults:
         **path_placebo_event_study[p]}`` view is well-formed across
         forward and backward horizons. Each inner entry holds
         ``{"effect", "se", "t_stat", "p_value", "conf_int", "n_obs"}``.
-        Populated when ``by_path`` is a positive int AND
-        ``placebo=True`` AND ``L_max >= 1``. Empty-state contract
-        mirrors ``path_effects``: ``None`` when ``by_path + placebo``
-        was not requested; ``{}`` when requested but no observed path
-        has a complete window ``[F_g-1, F_g-1+L_max]`` within the
+        Populated when (``by_path`` is a positive int OR
+        ``paths_of_interest`` is set) AND ``placebo=True`` AND
+        ``L_max >= 1``. Empty-state contract mirrors ``path_effects``:
+        ``None`` when ``by_path / paths_of_interest + placebo`` was
+        not requested; ``{}`` when requested but no observed path has
+        a complete window ``[F_g-1, F_g-1+L_max]`` within the
         panel (the same regime where ``path_effects`` returns ``{}``,
         with the same ``UserWarning`` at fit-time). Downstream callers
         should distinguish the two states. Inherits the cross-path
@@ -424,9 +430,9 @@ class ChaisemartinDHaultfoeuilleResults:
         keyed by observed treatment trajectory (tuple of int). Inner
         dict is keyed by horizon directly (no ``"horizons"`` wrapper);
         each entry holds ``{"effect", "se", "t_stat", "p_value",
-        "conf_int", "n_obs"}``. Populated when ``by_path`` is a
-        positive int AND ``trends_linear=True`` AND ``L_max >= 1``;
-        ``None`` otherwise. Mirrors the global ``linear_trends_effects``
+        "conf_int", "n_obs"}``. Populated when (``by_path`` is a
+        positive int OR ``paths_of_interest`` is set) AND
+        ``trends_linear=True`` AND ``L_max >= 1``; ``None`` otherwise. Mirrors the global ``linear_trends_effects``
         cumulation: SE on the cumulated layer is the conservative
         upper bound (sum of per-horizon component SEs from
         ``path_effects[path]["horizons"][l]["se"]``, NaN-consistent).
@@ -443,7 +449,8 @@ class ChaisemartinDHaultfoeuilleResults:
         observed treatment trajectory (tuple of int). Each entry holds
         ``{"crit_value": float, "alpha": float, "n_bootstrap": int,
         "method": str, "n_valid_horizons": int}``. Populated when
-        ``by_path`` is a positive int AND ``n_bootstrap > 0``. The
+        (``by_path`` is a positive int OR ``paths_of_interest`` is
+        set) AND ``n_bootstrap > 0``. The
         band itself is applied per-horizon as ``cband_conf_int`` on
         ``path_effects[path]["horizons"][l]`` and rendered as
         ``cband_lower`` / ``cband_upper`` columns on
@@ -585,9 +592,9 @@ class ChaisemartinDHaultfoeuilleResults:
     # conservative upper bound (sum of per-horizon component SEs,
     # NaN-consistent), matching the global `linear_trends_effects`
     # convention.
-    path_cumulated_event_study: Optional[
-        Dict[Tuple[int, ...], Dict[int, Dict[str, Any]]]
-    ] = field(default=None, repr=False)
+    path_cumulated_event_study: Optional[Dict[Tuple[int, ...], Dict[int, Dict[str, Any]]]] = field(
+        default=None, repr=False
+    )
     # Per-path joint sup-t simultaneous-band metadata. Keyed by path
     # tuple; each entry holds `{"crit_value", "alpha", "n_bootstrap",
     # "method", "n_valid_horizons"}`. Populated when `by_path` is a
@@ -1337,9 +1344,7 @@ def _render_path_effects_section(
             ):
                 cum_horizons = self.path_cumulated_event_study[path]
                 if cum_horizons:
-                    lines.append(
-                        "  Cumulated Level Effects (DID^{fd}, trends_linear):"
-                    )
+                    lines.append("  Cumulated Level Effects (DID^{fd}, trends_linear):")
                     for l_h in sorted(cum_horizons.keys()):
                         ce = cum_horizons[l_h]
                         lines.append(
diff --git a/docs/api/chaisemartin_dhaultfoeuille.rst b/docs/api/chaisemartin_dhaultfoeuille.rst
@@ -19,7 +19,10 @@ integration on placebos; survey support via Taylor-series linearization
 ``by_path=k`` (mirrors R ``did_multiplegt_dyn(..., by_path=k)``,
 including per-path backward placebos and per-path joint sup-t
 simultaneous bands when ``n_bootstrap > 0`` — Python-only extension
-beyond R, which provides no joint bands at any surface).
+beyond R, which provides no joint bands at any surface) or via
+``paths_of_interest=[(...), ...]`` for an explicit user-specified
+path subset (Python-only API; mutex with ``by_path``). ``by_path``
+supports binary or integer-coded discrete (D in Z) treatment.
 
 The estimator:
 
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -668,7 +668,7 @@ The guard is fired by `_survey_se_from_group_if` (analytical and replicate) and
 - [x] State-set-specific trends via control-pool restriction (Web Appendix Section 1.4)
 - [x] Heterogeneity testing via saturated OLS (Web Appendix Section 1.5, Lemma 7)
 - [x] Design-2 switch-in/switch-out descriptive wrapper (Web Appendix Section 1.6)
-- [x] `by_path` per-path event-study disaggregation (binary treatment, joiners/leavers IF precedent; mirrors R `did_multiplegt_dyn(..., by_path=k)`)
+- [x] `by_path` per-path event-study disaggregation (binary or integer-coded discrete treatment, joiners/leavers IF precedent; mirrors R `did_multiplegt_dyn(..., by_path=k)`); plus `paths_of_interest=[(...), ...]` for user-specified path subsets (Python-only API; mutex with `by_path`)
 - [x] HonestDiD (Rambachan-Roth 2023) integration on placebo + event study surface
 - [x] Survey design support: pweight with strata/PSU/FPC via Taylor Series Linearization (analytical) **or replicate-weight variance (BRR/Fay/JK1/JKn/SDR)**, covering the main ATT surface, covariate adjustment (DID^X), heterogeneity testing, the TWFE diagnostic (fit and standalone `twowayfeweights()` helper), and HonestDiD bounds. Opt-in **PSU-level Hall-Mammen wild bootstrap** is also supported via `n_bootstrap > 0`.
 - **Note (Survey IF expansion — library convention):** Survey IF expansion is a library extension not in the dCDH papers (the paper's plug-in variance assumes iid sampling). The library convention builds observation-level `psi_i` by proportionally distributing per-group IF mass within weight share: either at the group level (`psi_i = U_centered[g] * w_i / W_g`, the previous convention) or at the per-`(g, t)` cell level via the cell-period allocator shipped in this release. Cell-level expansion: decompose `U[g]` into per-period attributions `U[g, t]`, cohort-center each column independently, then expand to observation level as `psi_i = U_centered_per_period[g_i, t_i] * (w_i / W_{g_i, t_i})`. Binder (1983) stratified-PSU variance aggregates the resulting `psi` at PSU level. **Post-period attribution convention:** each transition term in the IF sum (of the form `role_weight * (Y_{g, t} - Y_{g, t-1})` for DID_M or `S_g * (Y_{g, out} - Y_{g, ref})` for DID_l) is attributed as a single *difference* to the POST-period cell, not split into a `+Y_post` / `-Y_pre` pair across two cells. This is a library *convention*, not a theorem — adopted because it preserves the group-sum, PSU-sum, and cohort-sum identities of the previous group-level expansion (so Binder variance coincides with the group-level variance under the auto-injected `psu=group`) and because Monte Carlo coverage at nominal 95% is empirically close to nominal on a DGP where PSUs vary across the cells of each group (see `tests/test_dcdh_cell_period_coverage.py`). A covariance-aware two-cell allocator is a plausible alternative and may be worth exploring if future designs motivate an explicit observation-level IF derivation; the method currently in the library is **not derived from the observation-level survey linearization of the contrast** and makes no stronger claim than "coverage is approximately nominal under the tested DGPs and the group-sum identity holds exactly." Under within-group-constant PSU (the pre-allocator accepted input), per-cell sums telescope to `U_centered[g]` and Binder variance is byte-identical (up to single-ULP floating-point noise) to the previous group-level expansion. **Strata and PSU must be constant within each `(g, t)` cell** (trivially satisfied in one-obs-per-cell panels — the canonical dCDH structure); variation **across cells of a group** is supported by the allocator. Within-group-varying **weights** are supported as before. When `survey_design.psu` is not specified, `fit()` auto-injects `psu=<group column>` so the TSL variance, `df_survey`, and t-based inference match the per-group PSU structure. **Strata that vary across cells of a group require either an explicit `psu=<col>` or the original `SurveyDesign(..., nest=True)` flag** — under `nest=True` the resolver combines `(stratum, psu)` into globally-unique labels, so the auto-injected `psu=<group>` is re-labeled per stratum and the cell allocator proceeds. Only the `nest=False` + varying-strata + omitted-psu combination is rejected up front with a targeted `ValueError` at `fit()` time (the synthesized PSU column would reuse group labels across strata and trip the cross-stratum PSU uniqueness check in `SurveyDesign.resolve()`). Under replicate-weight designs, the same cell-level `psi_i` is aggregated via Rao-Wu weight-ratio rescaling (`compute_replicate_if_variance` at `diff_diff/survey.py:1681`) rather than the Binder TSL formula. All five methods (BRR/Fay/JK1/JKn/SDR) are supported method-agnostically through the unified helper; the effective `df_survey` is reduced to `min(n_valid) - 1` across IF sites when some replicate solves fail (matching `efficient_did.py:1133-1135` and `triple_diff.py:676-686` precedents). Under DID^X, the first-stage residualization coefficient `theta_hat` is computed once on full-sample weights and treated as fixed (FWL plug-in IF convention) — per-replicate refits of `theta_hat` are not performed. **Post-period attribution extends to heterogeneity (Binder TSL branch only):** the heterogeneity WLS coefficient IF `ψ_g = inv(X'WX)[1,:] @ x_g * W_g * r_g` is attributed in full to the single post-period cell `(g, out_idx)` at each horizon (same single-cell convention as DID_l), then expanded as `ψ_i = ψ_g * (w_i / W_{g, out_idx})`, and fed through `compute_survey_if_variance`. Under PSU=group the PSU-level aggregate telescopes to `ψ_g`, so Binder variance is byte-identical relative to the pre-cell-period release; under within-group-varying PSU mass lands in the post-period PSU. **Replicate-weight branch keeps the legacy group-level allocator** `ψ_i = ψ_g * (w_i / W_g)` because `compute_replicate_if_variance` computes `θ_r = sum_i ratio_ir * ψ_i` at observation level and is therefore not PSU-telescoping: redistributing mass onto the post-period cell would silently change the replicate SE whenever a replicate column's ratios vary within a group (the library accepts arbitrary per-row replicate matrices, not just PSU-aligned ones). The legacy allocator preserves byte-identity of the replicate SE for every previously-supported fit. Replicate + within-group-varying PSU is unreachable by construction (`SurveyDesign` rejects `replicate_weights` combined with explicit `strata/psu/fpc`).
diff --git a/tests/test_chaisemartin_dhaultfoeuille.py b/tests/test_chaisemartin_dhaultfoeuille.py
@@ -222,6 +222,48 @@ def test_convenience_function_matches_class(self):
         assert results_class.overall_att == pytest.approx(results_fn.overall_att)
         assert results_class.overall_se == pytest.approx(results_fn.overall_se)
 
+    def test_convenience_function_routes_paths_of_interest_to_init(self):
+        """`paths_of_interest` is an __init__ kwarg; the convenience helper
+        must split it out of `**kwargs` rather than letting it fall through
+        to fit() (which would raise TypeError). Regression for the
+        signature-derived split."""
+        df = _by_path_three_path_data()
+        results_class = ChaisemartinDHaultfoeuille(
+            drop_larger_lower=False,
+            paths_of_interest=[(0, 1, 1, 1), (0, 1, 0, 0)],
+            twfe_diagnostic=False,
+            seed=42,
+        )
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", UserWarning)
+            r_class = results_class.fit(
+                df,
+                outcome="outcome",
+                group="group",
+                time="period",
+                treatment="treatment",
+                L_max=3,
+            )
+            r_fn = chaisemartin_dhaultfoeuille(
+                df,
+                outcome="outcome",
+                group="group",
+                time="period",
+                treatment="treatment",
+                drop_larger_lower=False,
+                paths_of_interest=[(0, 1, 1, 1), (0, 1, 0, 0)],
+                twfe_diagnostic=False,
+                seed=42,
+                L_max=3,
+            )
+        # Both surfaces produce identical per-path effects.
+        assert list(r_fn.path_effects.keys()) == list(r_class.path_effects.keys())
+        for path in r_fn.path_effects:
+            for l_h, vals in r_fn.path_effects[path]["horizons"].items():
+                assert vals["effect"] == pytest.approx(
+                    r_class.path_effects[path]["horizons"][l_h]["effect"]
+                )
+
     def test_minimal_computation_path(self):
         # Disable everything optional; verify still works
         data = generate_reversible_did_data(n_groups=30, n_periods=4, seed=1)
@@ -8606,6 +8648,32 @@ def test_paths_of_interest_preserves_user_order(self):
         # Insertion order preserved.
         assert list(res.path_effects.keys()) == user_order
 
+    def test_paths_of_interest_frequency_rank_is_true_frequency(self):
+        """`frequency_rank` must reflect descending count, NOT user-list
+        order. Regression for the R0 P2 finding: previously the rank
+        field was assigned from `enumerate(selected_paths)` which gave
+        user-selection order under `paths_of_interest`."""
+        df = _by_path_three_path_data()
+        # _by_path_three_path_data: (0,1,1,1) has 3 groups, (0,1,0,0) has 2,
+        # (0,1,1,0) has 1. User passes the lowest-frequency path first.
+        user_order = [(0, 1, 0, 0), (0, 1, 1, 1)]
+        est = ChaisemartinDHaultfoeuille(
+            drop_larger_lower=False,
+            paths_of_interest=user_order,
+            twfe_diagnostic=False,
+            seed=42,
+        )
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", UserWarning)
+            res = est.fit(
+                df, outcome="outcome", group="group", time="period",
+                treatment="treatment", L_max=3,
+            )
+        # (0,1,1,1) has higher frequency → rank 1
+        # (0,1,0,0) has lower frequency → rank 2
+        assert res.path_effects[(0, 1, 1, 1)]["frequency_rank"] == 1
+        assert res.path_effects[(0, 1, 0, 0)]["frequency_rank"] == 2
+
     def test_unobserved_path_warns_and_omits(self):
         df = _by_path_three_path_data()
         est = ChaisemartinDHaultfoeuille(