Fix Round 4: delta SE on analytical path, shared bootstrap, equal-cell Note

igerber · claude · igerber · commit 41bc1e8c3ca5 · 2026-04-12T13:52:38.000-04:00
- Compute delta-method SE regardless of bootstrap (was gated on
  bootstrap_results != None, leaving analytical path with NaN)
- Generate one shared bootstrap weight matrix for all horizons so
  sup-t bands are a valid joint multiplier-bootstrap band
- Add REGISTRY Note for Phase 2 equal-cell weighting deviation

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -1506,20 +1506,18 @@ def fit(
                 # Cost-benefit delta SE: compute from per-horizon bootstrap
                 # distributions if available (delta = sum w_l * DID_l, so
                 # delta_b = sum w_l * DID_l_b for each bootstrap rep).
-                delta_se = float("nan")
-                if bootstrap_results is not None and bootstrap_results.event_study_ses is not None:
-                    # The mixin stores overall_dist for l=1; we need
-                    # per-horizon distributions which were computed but
-                    # not all stored. Use the delta-method SE as fallback:
-                    # Var(delta) = sum_l w_l^2 * Var(DID_l) for indep.
-                    weights = cost_benefit_result.get("weights", {})
-                    var_delta = 0.0
-                    for l_w, w_l in weights.items():
-                        se_l = event_study_effects.get(l_w, {}).get("se", float("nan"))
-                        if np.isfinite(se_l):
-                            var_delta += (w_l * se_l) ** 2
-                    if var_delta > 0:
-                        delta_se = float(np.sqrt(var_delta))
+                # Delta-method SE: Var(delta) = sum w_l^2 * Var(DID_l)
+                # (treating horizons as independent, conservative under
+                # Assumption 8). Works on both analytical and bootstrap
+                # SEs since event_study_effects[l]["se"] holds whichever
+                # was propagated.
+                weights = cost_benefit_result.get("weights", {})
+                var_delta = 0.0
+                for l_w, w_l in weights.items():
+                    se_l = event_study_effects.get(l_w, {}).get("se", float("nan"))
+                    if np.isfinite(se_l):
+                        var_delta += (w_l * se_l) ** 2
+                delta_se = float(np.sqrt(var_delta)) if var_delta > 0 else float("nan")
 
                 if np.isfinite(delta_se):
                     effective_overall_se = delta_se
diff --git a/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py b/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py
@@ -251,25 +251,36 @@ def _compute_dcdh_bootstrap(
                 results.placebo_ci = ci_pl
                 results.placebo_p_value = p_pl
 
-        # --- Phase 2: Multi-horizon bootstrap ---
+        # --- Phase 2: Multi-horizon bootstrap with shared weight matrix ---
+        # Generate ONE shared (n_bootstrap, n_groups) weight matrix so all
+        # horizons use the same bootstrap draw, making the sup-t statistic
+        # a valid joint multiplier-bootstrap band.
         if multi_horizon_inputs is not None:
             es_ses: Dict[int, float] = {}
             es_cis: Dict[int, Tuple[float, float]] = {}
             es_pvals: Dict[int, float] = {}
             es_dists: Dict[int, np.ndarray] = {}
 
+            # Shared weight matrix sized for the group set
+            n_groups_mh = n_groups_for_overall
+            shared_weights = _generate_bootstrap_weights_batch(
+                n_bootstrap=self.n_bootstrap,
+                n_units=n_groups_mh,
+                weight_type=self.bootstrap_weights,
+                rng=rng,
+            )
+
             for l_h, (u_h, n_h, eff_h) in sorted(multi_horizon_inputs.items()):
                 if u_h.size > 0 and n_h > 0:
-                    se_h, ci_h, p_h, dist_h = _bootstrap_one_target(
-                        u_centered=u_h,
-                        divisor=n_h,
-                        original=eff_h,
-                        n_bootstrap=self.n_bootstrap,
-                        weight_type=self.bootstrap_weights,
+                    # Use the shared weight matrix truncated to u_h length
+                    w_h = shared_weights[:, : u_h.size]
+                    deviations = (w_h @ u_h) / n_h
+                    dist_h = deviations + eff_h
+
+                    se_h, ci_h, p_h = _compute_effect_bootstrap_stats(
+                        original_effect=eff_h,
+                        boot_dist=dist_h,
                         alpha=self.alpha,
-                        rng=rng,
-                        context=f"dCDH horizon l={l_h} bootstrap",
-                        return_distribution=True,
                     )
                     es_ses[l_h] = se_h
                     es_cis[l_h] = ci_h
@@ -280,9 +291,7 @@ def _compute_dcdh_bootstrap(
             results.event_study_cis = es_cis
             results.event_study_p_values = es_pvals
 
-            # Sup-t simultaneous confidence bands (CallawaySantAnna pattern
-            # from staggered_bootstrap.py:497-533): for each bootstrap rep,
-            # compute the max absolute t-stat across horizons.
+            # Sup-t simultaneous confidence bands using the shared draws.
             valid_horizons = [
                 l_h
                 for l_h in es_dists
@@ -292,7 +301,6 @@ def _compute_dcdh_bootstrap(
                 boot_matrix = np.array([es_dists[l_h] for l_h in valid_horizons])
                 effects_vec = np.array([multi_horizon_inputs[l_h][2] for l_h in valid_horizons])
                 ses_vec = np.array([es_ses[l_h] for l_h in valid_horizons])
-                # sup_t_dist[b] = max_l |(boot_l[b] - DID_l) / SE_l|
                 t_stats = np.abs((boot_matrix - effects_vec[:, None]) / ses_vec[:, None])
                 sup_t_dist = np.max(t_stats, axis=0)
                 finite_mask = np.isfinite(sup_t_dist)
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -533,6 +533,8 @@ Cost-benefit aggregate `delta = sum_l w_l * DID_l` (Lemma 4) where `w_l` are non
 
 Dynamic placebos `DID^{pl}_l` look backward from each group's reference period, with a dual eligibility condition: `F_g - 1 - l >= 1` AND `F_g - 1 + l <= T_g`.
 
+- **Note (Phase 2 equal-cell weighting, deviation from R `DIDmultiplegtDYN`):** The Phase 1 equal-cell weighting contract carries forward to all Phase 2 estimands (`DID_l`, `DID^{pl}_l`, `DID^n_l`, `delta`). Each `(g, t)` cell contributes equally regardless of within-cell observation count. On individual-level inputs with uneven cell sizes, this produces a different estimand than R `DIDmultiplegtDYN` which weights by cell size. The parity tests use one-observation-per-cell generators so parity holds. See the Phase 1 weighting Note above for the full rationale.
+
 - **Note (Phase 2 `<50%` switcher warning):** When fewer than 50% of the l=1 switchers contribute at a far horizon l, `fit()` emits a `UserWarning`. The paper recommends not reporting such horizons (Favara-Imbs application, footnote 14).
 
 - **Note (Phase 2 Assumption 7 and cost-benefit delta):** Assumption 7 (`D_{g,t} >= D_{g,1}`) is required for the single-sign cost-benefit interpretation. When leavers are present (binary: 1->0 groups violate Assumption 7), the estimator emits a `UserWarning` and provides `delta_joiners` / `delta_leavers` separately on `results.cost_benefit_delta`.