igerber
diff --git a/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎diff_diff/bootstrap_utils.py‎
Lines changed: 132 additions & 0 deletions b/‎diff_diff/bootstrap_utils.py‎
Lines changed: 132 additions & 0 deletions
diff --git a/‎diff_diff/had.py‎
Lines changed: 8 additions & 33 deletions b/‎diff_diff/had.py‎
Lines changed: 8 additions & 33 deletions
diff --git a/‎diff_diff/had_pretests.py‎
Lines changed: 46 additions & 50 deletions b/‎diff_diff/had_pretests.py‎
Lines changed: 46 additions & 50 deletions
@@ -17,6 +17,7 @@
     "generate_bootstrap_weights_batch",
     "generate_bootstrap_weights_batch_numpy",
     "generate_survey_multiplier_weights_batch",
+    "apply_stratum_centering",
     "generate_rao_wu_weights",
     "generate_rao_wu_weights_batch",
     "compute_percentile_ci",
@@ -654,6 +655,137 @@ def generate_survey_multiplier_weights_batch(
     return weights, psu_ids
 
 
+def apply_stratum_centering(
+    tensor: np.ndarray,
+    resolved_survey: "ResolvedSurveyDesign",
+    psu_ids: np.ndarray,
+    psu_axis: int = 0,
+) -> np.ndarray:
+    """Within-stratum demean + sqrt(n_h/(n_h-1)) Bessel rescale along the
+    PSU axis of a tensor. Mutates ``tensor`` in place AND returns it.
+
+    Shared by the HAD sup-t event-study bootstrap
+    (``had._sup_t_multiplier_bootstrap``, PSU axis = 0 on the
+    PSU-aggregated influence tensor ``Psi_psu`` of shape ``(n_psu,
+    n_horizons)``) AND the HAD Stute survey-bootstrap family
+    (``stute_test``, ``stute_joint_pretest``, PSU axis = 1 on the
+    multiplier matrix ``psu_mults`` of shape ``(n_bootstrap, n_psu)``).
+    Same algebra applied at different points in the two pipelines:
+
+    - HAD sup-t is a multiplier bootstrap on a precomputed influence
+      tensor. The correction is applied to the tensor before
+      ``perturbations = psu_weights @ Psi_psu`` — see ``had.py:2151-2204``.
+    - Stute is a wild residual bootstrap with refit-in-loop and a
+      nonlinear functional. The correction is applied to the multipliers
+      before the per-obs broadcast ``eta_obs = psu_mults[b,
+      psu_col_idx]`` — see ``had_pretests.py:1988-2007``.
+
+    Locks the algebraic identity architecturally (so future drift in
+    one site cannot silently diverge from the other) and makes both
+    sites consume the same battle-tested code path.
+
+    The combined correction makes ``Var_xi[xi @ Psi_psu]`` match the
+    analytical Binder-TSL stratified target
+    ``V = sum_h (1 - f_h) (n_h / (n_h - 1)) sum_j (psi_hj - psi_h_bar)²``
+    exactly (the ``(1 - f_h)`` factor is already baked into
+    ``psu_mults`` by :func:`generate_survey_multiplier_weights_batch`;
+    this helper bakes the remaining ``(n_h / (n_h - 1))`` factor and
+    enforces the within-stratum-zero centering required for cluster
+    wild bootstrap consistency under stratification).
+
+    See REGISTRY § HeterogeneousAdoptionDiD —
+    "Note (Stute stratified survey-bootstrap calibration)" for the
+    derivation.
+
+    Parameters
+    ----------
+    tensor : np.ndarray
+        Tensor with the PSU dimension on ``psu_axis``. Modified
+        in-place.
+    resolved_survey : ResolvedSurveyDesign
+        Resolved survey design. Provides ``.psu`` (per-unit PSU IDs;
+        may be None for the implicit-PSU case where each unit is its
+        own PSU) and ``.strata`` (per-unit stratum labels; may be None
+        for the single-implicit-stratum case).
+    psu_ids : np.ndarray, shape ``(n_psu,)``
+        Unique PSU identifiers aligned to the ``psu_axis`` of
+        ``tensor``. Output of
+        :func:`generate_survey_multiplier_weights_batch`.
+    psu_axis : int, default 0
+        Axis of ``tensor`` along which PSUs are indexed. 0 for HAD
+        sup-t ``Psi_psu`` (shape ``(n_psu, n_horizons)``); 1 for Stute
+        ``psu_mults`` (shape ``(n_bootstrap, n_psu)``).
+
+    Returns
+    -------
+    np.ndarray
+        Same object as ``tensor`` (in-place mutation; returned for
+        chaining). Singleton strata under ``lonely_psu='remove'`` /
+        ``'certainty'`` have all-zero entries along ``psu_axis``
+        (set by :func:`generate_survey_multiplier_weights_batch`'s
+        lonely-PSU handling); the centering here skips them to avoid
+        a divide-by-zero on ``sqrt(n_h / 0)``.
+
+    Notes
+    -----
+    Under ``strata=None``, the correction is applied uniformly with a
+    single implicit stratum (``n_h = n_psu``) — demean across all PSUs
+    along ``psu_axis`` and rescale by ``sqrt(n_psu / (n_psu - 1))``.
+    This is the standard small-sample correction for an iid cluster
+    wild bootstrap (Wu 1986; Liu 1988) and matches the HAD sup-t
+    convention at ``had.py:2199-2204``.
+
+    The Stute call site has historically NOT applied this correction
+    (pre-PR Phase 4.5 C). Lifting the gate on stratified designs +
+    introducing this shared helper makes the non-strata Stute path
+    apply the correction uniformly, which is a deliberate calibration
+    improvement (~1-2% p-value shift for typical ``n_psu``) — see
+    REGISTRY § "Note (Stute stratified survey-bootstrap calibration)".
+    """
+    n_psu = tensor.shape[psu_axis]
+
+    if resolved_survey.strata is not None:
+        strata = np.asarray(resolved_survey.strata)
+        # Build PSU -> stratum map. Constant-within-PSU by the
+        # SurveyDesign.resolve contract — assert defensively in tests.
+        psu_id_to_col = {int(p): c for c, p in enumerate(psu_ids)}
+        psu_stratum = np.empty(n_psu, dtype=strata.dtype)
+        if resolved_survey.psu is not None:
+            unit_psu = np.asarray(resolved_survey.psu)
+            seen = np.zeros(n_psu, dtype=bool)
+            for i in range(len(unit_psu)):
+                col = psu_id_to_col[int(unit_psu[i])]
+                if not seen[col]:
+                    psu_stratum[col] = strata[i]
+                    seen[col] = True
+        else:
+            # Each unit is its own PSU; psu_ids = arange(n_psu).
+            psu_stratum = strata.copy()
+
+        for h in np.unique(psu_stratum):
+            mask_h = psu_stratum == h
+            n_h = int(mask_h.sum())
+            if n_h < 2:
+                # Singleton / empty stratum: caller's lonely_psu logic
+                # has already zeroed the corresponding multipliers (or
+                # the contribution is zero by construction). Skip to
+                # avoid a divide-by-zero on sqrt(n_h / 0).
+                continue
+            slc = [slice(None)] * tensor.ndim
+            slc[psu_axis] = mask_h
+            slc = tuple(slc)
+            tensor[slc] -= tensor[slc].mean(axis=psu_axis, keepdims=True)
+            tensor[slc] *= np.sqrt(n_h / (n_h - 1))
+    else:
+        # Single implicit stratum — demean across all PSUs along
+        # psu_axis, rescale by sqrt(n_psu / (n_psu - 1)).
+        if n_psu >= 2:
+            tensor -= tensor.mean(axis=psu_axis, keepdims=True)
+            tensor *= np.sqrt(n_psu / (n_psu - 1))
+
+    return tensor
+
+
 def generate_rao_wu_weights(
     resolved_survey: "ResolvedSurveyDesign",
     rng: np.random.Generator,
 
@@ -2084,6 +2084,7 @@ def _sup_t_multiplier_bootstrap(
         caller.
     """
     from diff_diff.bootstrap_utils import (
+        apply_stratum_centering,
         generate_bootstrap_weights_batch,
         generate_survey_multiplier_weights_batch,
     )
@@ -2169,39 +2170,13 @@ def _sup_t_multiplier_bootstrap(
             # Each unit is its own PSU (psu_ids = np.arange(n_units)).
             Psi_psu = influence_matrix.copy()
 
-        if resolved_survey.strata is not None:
-            strata = np.asarray(resolved_survey.strata)
-            # Build PSU -> stratum map (strata constant-within-PSU by
-            # SurveyDesign.resolve contract).
-            psu_stratum = np.empty(n_psu, dtype=strata.dtype)
-            if resolved_survey.psu is not None:
-                seen = np.zeros(n_psu, dtype=bool)
-                unit_psu = np.asarray(resolved_survey.psu)
-                for i in range(n_units):
-                    col = psu_id_to_col[int(unit_psu[i])]
-                    if not seen[col]:
-                        psu_stratum[col] = strata[i]
-                        seen[col] = True
-            else:
-                psu_stratum = strata.copy()
-
-            for h in np.unique(psu_stratum):
-                mask_h = psu_stratum == h
-                n_h = int(mask_h.sum())
-                if n_h < 2:
-                    # Singleton / empty stratum contributes 0 variance
-                    # regardless; the helper's lonely-PSU logic already
-                    # zeros those multipliers. Skip centering to avoid
-                    # a divide-by-zero on sqrt(n_h / (n_h - 1)).
-                    continue
-                Psi_psu[mask_h] -= Psi_psu[mask_h].mean(axis=0, keepdims=True)
-                Psi_psu[mask_h] *= np.sqrt(n_h / (n_h - 1))
-        else:
-            # Single implicit stratum — demean across all PSUs, scale by
-            # sqrt(n_psu / (n_psu - 1)).
-            if n_psu >= 2:
-                Psi_psu -= Psi_psu.mean(axis=0, keepdims=True)
-                Psi_psu *= np.sqrt(n_psu / (n_psu - 1))
+        # Stratum centering + Bessel rescale on the PSU-aggregated
+        # influence tensor. Shared with the HAD Stute survey-bootstrap
+        # family (had_pretests.stute_test / stute_joint_pretest) which
+        # applies the same algebra to PSU multipliers instead — see
+        # ``apply_stratum_centering`` docstring and REGISTRY
+        # § "Note (Stute stratified survey-bootstrap calibration)".
+        apply_stratum_centering(Psi_psu, resolved_survey, psu_ids, psu_axis=0)
 
         # PSU-level perturbations: (B, H) = (B, n_psu) @ (n_psu, H).
         # No (1/n) prefactor — Psi_psu_scaled is already on the θ̂-scale
 
@@ -76,7 +76,10 @@
 import pandas as pd
 from scipy import stats
 
-from diff_diff.bootstrap_utils import generate_survey_multiplier_weights_batch
+from diff_diff.bootstrap_utils import (
+    apply_stratum_centering,
+    generate_survey_multiplier_weights_batch,
+)
 from diff_diff.had import (
     _aggregate_first_difference,
     _aggregate_unit_resolved_survey,
@@ -1912,36 +1915,19 @@ def stute_test(
         # CvM recompute. Routes via synthetic trivial ResolvedSurveyDesign
         # for the weights= shortcut to share the same kernel.
         resolved_for_boot = survey if survey is not None else make_pweight_design(w_arr)
-        # R10 P1: reject stratified designs explicitly until a derived
-        # Stute-specific correction lands. The HAD sup-t bootstrap
-        # (had.py:2120+) applies a within-stratum demean +
-        # sqrt(n_h/(n_h-1)) small-sample correction AFTER
-        # generate_survey_multiplier_weights_batch returns, to make the
-        # bootstrap variance match the Binder-TSL stratified target.
-        # That same correction has NOT been derived for the Stute CvM
-        # functional, so applying the helper's raw multipliers directly
-        # to residual perturbations on stratified designs leaves the
-        # bootstrap p-value silently miscalibrated. Pweight-only,
-        # PSU-only, and FPC-only designs are still supported (the
-        # helper's output is appropriately scaled for those).
-        if resolved_for_boot.strata is not None:
-            raise NotImplementedError(
-                "stute_test: SurveyDesign(strata=...) with stratified "
-                "sampling is not yet supported. The Stute CvM bootstrap "
-                "calibration on stratified designs requires a within-"
-                "stratum demean + sqrt(n_h/(n_h-1)) small-sample "
-                "correction analogous to the HAD sup-t bootstrap, but "
-                "the matching derivation for the Stute functional has "
-                "not been completed. Pweight-only or PSU-only "
-                "(SurveyDesign(weights=..., psu=...)) designs are "
-                "supported; pre-process stratified designs to remove "
-                "the strata column or wait for the derivation in a "
-                "follow-up PR."
-            )
-        # R5 P1: reject lonely_psu='adjust' singleton-strata designs
-        # explicitly (now redundant with the strata guard above; kept
-        # for defense in depth and for residual non-stratified
-        # singleton-strata edge cases).
+        # Stratified designs are supported via the standard stratified
+        # clustered wild-bootstrap correction on the PSU multipliers
+        # (within-stratum demean + sqrt(n_h/(n_h-1)) Bessel rescale),
+        # applied uniformly before the per-obs broadcast eta_obs =
+        # psu_mults[b, psu_col_idx] below. See REGISTRY
+        # § "Note (Stute stratified survey-bootstrap calibration)" and
+        # ``apply_stratum_centering`` (bootstrap_utils.py) for the
+        # derivation; the same helper backs the HAD sup-t event-study
+        # bootstrap at had.py:2151+.
+        # R5 P1: reject lonely_psu='adjust' singleton-strata designs.
+        # This pseudo-stratum centering transform has not been derived
+        # for the Stute CvM (same gap as the HAD sup-t deviation at
+        # REGISTRY § 'Note (HAD sup-t lonely_psu="adjust") deviation').
         if _has_lonely_psu_adjust_singletons(resolved_for_boot):
             raise NotImplementedError(
                 "stute_test: SurveyDesign(lonely_psu='adjust') with "
@@ -1988,6 +1974,15 @@ def stute_test(
         psu_mults, psu_ids = generate_survey_multiplier_weights_batch(
             n_bootstrap, resolved_for_boot, weight_type="mammen", rng=rng
         )
+        # Stratum centering + Bessel rescale on the PSU multipliers
+        # before broadcast. Same algebra as the HAD sup-t bootstrap at
+        # had.py:2151+ (applied to the influence tensor there), but
+        # applied here to ``psu_mults`` because the Stute bootstrap is a
+        # wild-residual / refit-in-loop bootstrap (no precomputed
+        # influence tensor exists). See REGISTRY § "Note (Stute
+        # stratified survey-bootstrap calibration)" for the derivation
+        # and the non-strata calibration shift it introduces.
+        apply_stratum_centering(psu_mults, resolved_for_boot, psu_ids, psu_axis=1)
         # Build per-obs PSU-column index. When psu is None (trivial path),
         # each obs is its own PSU and psu_ids = arange(G) - so psu_col_idx
         # is just arange(G).
@@ -3253,25 +3248,18 @@ def stute_joint_pretest(
         # vector-valued empirical-process unit-level dependence (paper
         # convention) AND PSU clustering (Krieger-Pfeffermann 1997).
         resolved_for_boot = survey if survey is not None else make_pweight_design(w_arr)
-        # R10 P1: reject stratified designs explicitly until a derived
-        # Stute-specific correction lands (mirrors stute_test
-        # single-horizon).
-        if resolved_for_boot.strata is not None:
-            raise NotImplementedError(
-                "stute_joint_pretest: SurveyDesign(strata=...) with "
-                "stratified sampling is not yet supported. The Stute "
-                "CvM bootstrap calibration on stratified designs "
-                "requires a within-stratum demean + sqrt(n_h/(n_h-1)) "
-                "small-sample correction analogous to the HAD sup-t "
-                "bootstrap, but the matching derivation for the joint "
-                "Stute functional has not been completed. Pweight-only "
-                "or PSU-only designs are supported; pre-process "
-                "stratified designs to remove the strata column or wait "
-                "for the derivation in a follow-up PR."
-            )
-        # R5 P1: reject lonely_psu='adjust' singleton-strata designs
-        # explicitly (now redundant with the strata guard above; kept
-        # for defense in depth).
+        # Stratified designs are supported via the standard stratified
+        # clustered wild-bootstrap correction on the PSU multipliers
+        # (within-stratum demean + sqrt(n_h/(n_h-1)) Bessel rescale),
+        # applied uniformly before the per-obs broadcast eta_obs =
+        # psu_mults[b, psu_col_idx] below. The joint variant shares the
+        # SAME multiplier row across horizons within each replicate, so
+        # the stratum correction applies once and inherits across
+        # horizons (preserving cross-horizon empirical-process
+        # dependence per Hlávka & Huškova 2020 § 3). See REGISTRY
+        # § "Note (Stute stratified survey-bootstrap calibration)".
+        # R5 P1: reject lonely_psu='adjust' singleton-strata designs.
+        # Same pseudo-stratum centering gap as stute_test / HAD sup-t.
         if _has_lonely_psu_adjust_singletons(resolved_for_boot):
             raise NotImplementedError(
                 "stute_joint_pretest: SurveyDesign(lonely_psu='adjust') "
@@ -3314,6 +3302,14 @@ def stute_joint_pretest(
         psu_mults, psu_ids = generate_survey_multiplier_weights_batch(
             n_bootstrap, resolved_for_boot, weight_type="mammen", rng=rng
         )
+        # Stratum centering + Bessel rescale on the PSU multipliers
+        # before broadcast. Single application here (shared with the
+        # per-horizon loop below) propagates the same centered
+        # multipliers across all horizons in each replicate, preserving
+        # the joint Stute's cross-horizon empirical-process dependence.
+        # See REGISTRY § "Note (Stute stratified survey-bootstrap
+        # calibration)".
+        apply_stratum_centering(psu_mults, resolved_for_boot, psu_ids, psu_axis=1)
         if resolved_for_boot.psu is None:
             psu_col_idx = np.arange(G)
         else: