44
55The dCDH papers prescribe only the analytical cohort-recentered plug-in
66variance from Web Appendix Section 3.7.3 of the dynamic companion paper.
7- This module adds an opt-in multiplier bootstrap clustered at the group
8- level, matching the inference convention used by ``CallawaySantAnna``,
9- ``ImputationDiD``, and ``TwoStageDiD``. The bootstrap is a library
10- extension, not a paper requirement, and is documented as such in
11- ``REGISTRY.md``.
7+ This module adds an opt-in multiplier bootstrap, clustered at the group
8+ level by default (matching the inference convention used by
9+ ``CallawaySantAnna``, ``ImputationDiD``, and ``TwoStageDiD``). Under
10+ ``survey_design`` with an explicitly-coarser PSU, the bootstrap switches
11+ to PSU-level Hall-Mammen wild clustering: each PSU draws a single
12+ multiplier and all groups within that PSU share it
13+ (see ``_generate_psu_or_group_weights`` and ``_map_for_target`` below,
14+ plus the REGISTRY.md ``ChaisemartinDHaultfoeuille`` Note on survey +
15+ bootstrap). Under the default auto-inject ``psu=group`` each group is
16+ its own PSU and the identity-map fast path reproduces the original
17+ group-level behavior bit-for-bit. The bootstrap is a library extension,
18+ not a paper requirement, and is documented as such in ``REGISTRY.md``.
1219
1320The mixin operates on **pre-computed cohort-centered influence-function
1421values**: the main estimator class computes per-group ``U^G_g`` values
1926produce a bootstrap distribution per target.
2027"""
2128
22- from typing import TYPE_CHECKING , Dict , Optional , Tuple
29+ from typing import TYPE_CHECKING , Any , Dict , Optional , Tuple
2330
2431import numpy as np
2532
@@ -70,6 +77,9 @@ def _compute_dcdh_bootstrap(
7077 # --- Phase 2: multi-horizon inputs ---
7178 multi_horizon_inputs : Optional [Dict [int , Tuple [np .ndarray , int , float ]]] = None ,
7279 placebo_horizon_inputs : Optional [Dict [int , Tuple [np .ndarray , int , float ]]] = None ,
80+ # --- Survey: PSU-level bootstrap under survey designs ---
81+ group_id_to_psu_code : Optional [Dict [Any , int ]] = None ,
82+ eligible_group_ids : Optional [np .ndarray ] = None ,
7383 ) -> DCDHBootstrapResults :
7484 """
7585 Compute multiplier-bootstrap inference for all dCDH targets.
@@ -160,6 +170,14 @@ def _compute_dcdh_bootstrap(
160170 )
161171 rng = np .random .default_rng (self .seed )
162172
173+ # PSU label for each bootstrap weight column is derived from
174+ # the group's ID via `_map_for_target`, not from positional
175+ # truncation. All current dCDH bootstrap targets use the
176+ # variance-eligible group ordering (`eligible_group_ids`); if a
177+ # future target uses a different ordering, add a dedicated
178+ # group-IDs parameter for it rather than reusing the overall
179+ # eligible list.
180+
163181 # --- Overall DID_M ---
164182 # Skip the scalar DID_M bootstrap when divisor_overall <= 0
165183 # (e.g., pure non-binary panels where N_S=0), but continue
@@ -175,6 +193,11 @@ def _compute_dcdh_bootstrap(
175193 rng = rng ,
176194 context = "dCDH overall DID_M bootstrap" ,
177195 return_distribution = True ,
196+ group_to_psu_map = _map_for_target (
197+ u_centered_overall .shape [0 ],
198+ group_id_to_psu_code ,
199+ eligible_group_ids ,
200+ ),
178201 )
179202 else :
180203 overall_se = np .nan
@@ -206,6 +229,9 @@ def _compute_dcdh_bootstrap(
206229 rng = rng ,
207230 context = "dCDH joiners DID_+ bootstrap" ,
208231 return_distribution = False ,
232+ group_to_psu_map = _map_for_target (
233+ u_j .size , group_id_to_psu_code , eligible_group_ids ,
234+ ),
209235 )
210236 results .joiners_se = se_j
211237 results .joiners_ci = ci_j
@@ -225,6 +251,9 @@ def _compute_dcdh_bootstrap(
225251 rng = rng ,
226252 context = "dCDH leavers DID_- bootstrap" ,
227253 return_distribution = False ,
254+ group_to_psu_map = _map_for_target (
255+ u_l .size , group_id_to_psu_code , eligible_group_ids ,
256+ ),
228257 )
229258 results .leavers_se = se_l
230259 results .leavers_ci = ci_l
@@ -244,6 +273,9 @@ def _compute_dcdh_bootstrap(
244273 rng = rng ,
245274 context = "dCDH placebo DID_M^pl bootstrap" ,
246275 return_distribution = False ,
276+ group_to_psu_map = _map_for_target (
277+ u_pl .size , group_id_to_psu_code , eligible_group_ids ,
278+ ),
247279 )
248280 results .placebo_se = se_pl
249281 results .placebo_ci = ci_pl
@@ -259,19 +291,47 @@ def _compute_dcdh_bootstrap(
259291 es_pvals : Dict [int , float ] = {}
260292 es_dists : Dict [int , np .ndarray ] = {}
261293
262- # Shared weight matrix sized for the group set
294+ # Shared weight matrix sized for the group set. Under PSU-level
295+ # bootstrap (Hall-Mammen wild PSU), weights are drawn once per
296+ # PSU and broadcast to groups so all groups in the same PSU
297+ # share a multiplier within a single bootstrap replicate —
298+ # preserving the sup-t joint distribution across horizons.
263299 n_groups_mh = n_groups_for_overall
264- shared_weights = _generate_bootstrap_weights_batch (
300+ shared_weights = _generate_psu_or_group_weights (
265301 n_bootstrap = self .n_bootstrap ,
266- n_units = n_groups_mh ,
302+ n_groups_target = n_groups_mh ,
267303 weight_type = self .bootstrap_weights ,
268304 rng = rng ,
305+ group_to_psu_map = _map_for_target (
306+ n_groups_mh , group_id_to_psu_code , eligible_group_ids ,
307+ ),
269308 )
270309
271310 for l_h , (u_h , n_h , eff_h ) in sorted (multi_horizon_inputs .items ()):
272311 if u_h .size > 0 and n_h > 0 :
273- # Use the shared weight matrix truncated to u_h length
274- w_h = shared_weights [:, : u_h .size ]
312+ # Under the current contract every horizon's IF
313+ # vector uses the variance-eligible group ordering
314+ # from `eligible_group_ids`, so the shared weight
315+ # matrix is already at the right shape. Assert
316+ # this invariant so any future refactor that
317+ # introduces horizon-specific masking fails loudly
318+ # rather than silently misaligning PSU clusters via
319+ # positional truncation.
320+ if u_h .size != n_groups_mh :
321+ raise ValueError (
322+ f"Multi-horizon bootstrap: horizon { l_h } "
323+ f"IF vector has { u_h .size } entries but "
324+ f"shared weight matrix has { n_groups_mh } "
325+ f"columns. dCDH's contract requires every "
326+ f"horizon to use the variance-eligible "
327+ f"group ordering; to support a horizon "
328+ f"with a different ordering, thread "
329+ f"target-specific group IDs through "
330+ f"`multi_horizon_inputs` and project the "
331+ f"shared PSU draws onto the horizon's own "
332+ f"ordering via `_map_for_target`."
333+ )
334+ w_h = shared_weights
275335 deviations = (w_h @ u_h ) / n_h
276336 dist_h = deviations + eff_h
277337
@@ -324,6 +384,9 @@ def _compute_dcdh_bootstrap(
324384 rng = rng ,
325385 context = f"dCDH placebo l={ l_h } bootstrap" ,
326386 return_distribution = False ,
387+ group_to_psu_map = _map_for_target (
388+ u_h .size , group_id_to_psu_code , eligible_group_ids ,
389+ ),
327390 )
328391 pl_ses [l_h ] = se_h
329392 pl_cis [l_h ] = ci_h
@@ -341,6 +404,125 @@ def _compute_dcdh_bootstrap(
341404# =============================================================================
342405
343406
407+ def _map_for_target (
408+ target_size : int ,
409+ group_id_to_psu_code : Optional [Dict [Any , int ]],
410+ eligible_group_ids : Optional [np .ndarray ],
411+ ) -> Optional [np .ndarray ]:
412+ """Build a PSU map for a bootstrap target from IDs (not positions).
413+
414+ The caller passes:
415+ - ``group_id_to_psu_code``: a dict mapping each variance-eligible
416+ group ID to its dense PSU code (built once in ``fit()``).
417+ - ``eligible_group_ids``: the ordered list of group IDs that
418+ correspond to the current target's ``u_centered`` vector.
419+
420+ Returns an integer array of length ``target_size`` where entry
421+ ``i`` is the PSU code for the ``i``-th contributing group.
422+
423+ Returns ``None`` when no PSU information is available (plain
424+ multiplier-bootstrap path — identity across targets).
425+
426+ Raises ``ValueError`` if ``target_size`` does not match
427+ ``len(eligible_group_ids)``: every current dCDH bootstrap target
428+ uses the variance-eligible group ordering, so any size mismatch
429+ signals that a caller introduced a target whose group subset
430+ diverges and should pass its own ``target_group_ids`` rather than
431+ reusing the overall eligible list. Also raises ``ValueError`` if
432+ any group ID is missing from the dict (signaling misalignment
433+ between the target's IF vector and the map's keys).
434+ """
435+ if group_id_to_psu_code is None or eligible_group_ids is None :
436+ return None
437+ if target_size != len (eligible_group_ids ):
438+ raise ValueError (
439+ f"Bootstrap target size ({ target_size } ) does not match "
440+ f"eligible_group_ids length ({ len (eligible_group_ids )} ). "
441+ "dCDH's bootstrap contract requires all current targets to "
442+ "use the variance-eligible group ordering; if a new target "
443+ "has a different ordering, pass target-specific group IDs "
444+ "to _map_for_target rather than reusing eligible_group_ids."
445+ )
446+ try :
447+ return np .array (
448+ [group_id_to_psu_code [gid ] for gid in eligible_group_ids ],
449+ dtype = np .int64 ,
450+ )
451+ except KeyError as e :
452+ raise ValueError (
453+ f"Group ID { e .args [0 ]!r} in eligible_group_ids has no entry "
454+ f"in group_id_to_psu_code — PSU map is misaligned with the "
455+ f"bootstrap target's group set."
456+ ) from e
457+
458+
459+ def _generate_psu_or_group_weights (
460+ n_bootstrap : int ,
461+ n_groups_target : int ,
462+ weight_type : str ,
463+ rng : np .random .Generator ,
464+ group_to_psu_map : Optional [np .ndarray ],
465+ ) -> np .ndarray :
466+ """Generate a group-level weight matrix, possibly via PSU broadcasting.
467+
468+ When ``group_to_psu_map`` is ``None`` or is the identity (each group
469+ is its own PSU), generates weights at the group level directly —
470+ bit-identical to the pre-PSU-bootstrap contract.
471+
472+ When ``group_to_psu_map`` has fewer unique values than
473+ ``n_groups_target`` (strictly coarser PSU than group), generates
474+ weights at the PSU level and broadcasts to groups via the map.
475+ This is the Hall-Mammen wild PSU bootstrap.
476+
477+ Parameters
478+ ----------
479+ n_bootstrap, weight_type, rng
480+ Passed through to generate_bootstrap_weights_batch.
481+ n_groups_target : int
482+ Number of groups contributing to the target's IF vector.
483+ group_to_psu_map : np.ndarray or None
484+ Dense integer PSU indices of shape ``(n_groups_target,)``.
485+ ``None`` triggers the group-level path.
486+
487+ Returns
488+ -------
489+ np.ndarray
490+ Shape ``(n_bootstrap, n_groups_target)`` multiplier weights.
491+ """
492+ if group_to_psu_map is None :
493+ return _generate_bootstrap_weights_batch (
494+ n_bootstrap = n_bootstrap ,
495+ n_units = n_groups_target ,
496+ weight_type = weight_type ,
497+ rng = rng ,
498+ )
499+ if len (group_to_psu_map ) != n_groups_target :
500+ raise ValueError (
501+ f"group_to_psu_map length ({ len (group_to_psu_map )} ) does not "
502+ f"match n_groups_target ({ n_groups_target } )."
503+ )
504+ n_psu = int (np .max (group_to_psu_map )) + 1 if group_to_psu_map .size > 0 else 0
505+ if n_psu >= n_groups_target :
506+ # Identity (each group its own PSU) — skip the broadcast for a
507+ # bit-identical fast path matching the pre-PSU-bootstrap behavior.
508+ return _generate_bootstrap_weights_batch (
509+ n_bootstrap = n_bootstrap ,
510+ n_units = n_groups_target ,
511+ weight_type = weight_type ,
512+ rng = rng ,
513+ )
514+ # Hall-Mammen wild PSU bootstrap: draw n_psu multipliers, broadcast
515+ # via the dense index map so all groups in the same PSU share a
516+ # multiplier. Preserves clustered sampling structure.
517+ psu_weights = _generate_bootstrap_weights_batch (
518+ n_bootstrap = n_bootstrap ,
519+ n_units = n_psu ,
520+ weight_type = weight_type ,
521+ rng = rng ,
522+ )
523+ return psu_weights [:, group_to_psu_map ]
524+
525+
344526def _bootstrap_one_target (
345527 u_centered : np .ndarray ,
346528 divisor : int ,
@@ -351,6 +533,7 @@ def _bootstrap_one_target(
351533 rng : np .random .Generator ,
352534 context : str ,
353535 return_distribution : bool ,
536+ group_to_psu_map : Optional [np .ndarray ] = None ,
354537) -> Tuple [float , Tuple [float , float ], float , Optional [np .ndarray ]]:
355538 """
356539 Run the multiplier bootstrap for a single dCDH target.
@@ -367,16 +550,24 @@ def _bootstrap_one_target(
367550 sample mean of the bootstrap distribution should be approximately
368551 zero, not the original effect). The original effect is passed
369552 separately as the centering point for the percentile p-value.
553+
554+ When ``group_to_psu_map`` is provided (length ``len(u_centered)``,
555+ dense integer PSU indices), multiplier weights are generated at the
556+ PSU level and broadcast to groups so all groups in the same PSU
557+ receive the same bootstrap multiplier. This is the Hall-Mammen wild
558+ PSU bootstrap; it reduces to the group-level bootstrap when each
559+ group is its own PSU (identity map).
370560 """
371561 n_groups_target = u_centered .shape [0 ]
372562 if n_groups_target == 0 or divisor == 0 :
373563 return np .nan , (np .nan , np .nan ), np .nan , None
374564
375- weight_matrix = _generate_bootstrap_weights_batch (
565+ weight_matrix = _generate_psu_or_group_weights (
376566 n_bootstrap = n_bootstrap ,
377- n_units = n_groups_target ,
567+ n_groups_target = n_groups_target ,
378568 weight_type = weight_type ,
379569 rng = rng ,
570+ group_to_psu_map = group_to_psu_map ,
380571 )
381572
382573 # Each bootstrap replicate: (1 / divisor) * sum_g w_b[g] * u_centered[g]
0 commit comments