@@ -483,37 +483,64 @@ HeterogeneousAdoptionDiD (HAD) Issues
483483**Problem: ** ``HeterogeneousAdoptionDiD `` resolves ``target_parameter `` to
484484``"WAS_d_lower" `` when you expected ``"WAS" `` (or vice versa).
485485
486- **Cause: ** HAD auto-detects the design path from the dose distribution. The
487- ``_detect_design `` rule resolves to Design 1' (``continuous_at_zero ``,
488- targets WAS) when EITHER ``d.min() == 0 `` exactly OR ``d.min() `` is a small
489- positive value below ``0.01 * median(|d|) `` (the small-share-of-treated
490- escape clause). Otherwise (``d.min() `` larger than that threshold) the
491- estimator routes to Design 1, with a further check for mass-point structure
492- (modal fraction at ``d.min() `` exceeding 2% routes to ``mass_point ``;
493- otherwise ``continuous_near_d_lower ``); both Design 1 paths target
494- ``WAS_{d_lower} ``. So a Design 1 resolution only fires when ``d.min() ``
495- is meaningfully positive relative to the dose scale.
486+ **Cause: ** HAD auto-detects the design path from the unit-level
487+ post-treatment dose ``D_{g,F} `` (the dose at the first treated period
488+ ``F ``, one value per unit), NOT from the full panel ``dose `` column. The
489+ panel column carries structural pre-period zeros (HAD requires
490+ ``D_{g,t} = 0 `` for ``t < F ``), so ``had_data['dose'].min() `` is always
491+ zero on a valid HAD panel and tells you nothing about the resolved
492+ design. ``_detect_design `` then resolves on ``D_{g,F} `` and picks Design
493+ 1' (``continuous_at_zero ``, targets WAS) when EITHER
494+ ``D_{g,F}.min() == 0 `` exactly OR ``D_{g,F}.min() `` is a small positive
495+ value below ``0.01 * median(|D_{g,F}|) `` (the small-share-of-treated
496+ escape clause). Otherwise the estimator routes to Design 1, with a
497+ further check for mass-point structure (modal fraction at ``D_{g,F}.min() ``
498+ exceeding 2% routes to ``mass_point ``; otherwise
499+ ``continuous_near_d_lower ``); both Design 1 paths target ``WAS_{d_lower} ``.
496500
497501**Solutions: **
498502
499503.. code-block :: python
500504
501- # Inspect the dose support before fitting
502505 import numpy as np
503- d = data[' dose' ].to_numpy()
504- print (data[' dose' ].describe())
505- print (f " d.min() = { d.min():.6g } ; "
506- f " 0.01 * median(|d|) = { 0.01 * np.median(np.abs(d)):.6g } ; "
507- f " d.min() < threshold => Design 1' (WAS) " )
506+ import pandas as pd
507+ from diff_diff import HeterogeneousAdoptionDiD
508+
509+ # Build a HAD-shape panel: D=0 in pre-periods (t < F), D > 0 only at F+.
510+ rng = np.random.default_rng(42 )
511+ G, F, T = 200 , 4 , 5
512+ doses = rng.beta(0.5 , 1.0 , size = G)
513+ rows = []
514+ for g in range (G):
515+ for t in range (1 , T + 1 ):
516+ y = (rng.normal()
517+ + (doses[g] + doses[g] ** 2 ) * (t >= F)
518+ + rng.normal(0 , 0.5 ))
519+ d = doses[g] if t >= F else 0.0
520+ rows.append({' unit' : g, ' period' : t, ' y' : y, ' dose' : d})
521+ had_data = pd.DataFrame(rows)
522+
523+ # Inspect the support the detector actually uses: per-unit dose at the
524+ # first treated period F. Pre-period zeros on the panel column are
525+ # structural and ignored by `_detect_design()`.
526+ d_at_F = had_data.loc[had_data[' period' ] == F].set_index(' unit' )[' dose' ]
527+ print (d_at_F.describe())
528+ d_min = float (d_at_F.min())
529+ d_thr = 0.01 * float (np.median(np.abs(d_at_F)))
530+ print (f " D_ {{ g,F }} .min() = { d_min:.6g } ; "
531+ f " 0.01 * median(|D_ {{ g,F }} |) = { d_thr:.6g } ; "
532+ f " D_ {{ g,F }} .min() < threshold => Design 1' (WAS) " )
508533
509534 # Check the resolved estimand after fitting
510- results = est.fit(data, outcome_col = ' y' , unit_col = ' unit' ,
511- time_col = ' period' , dose_col = ' dose' )
535+ est = HeterogeneousAdoptionDiD()
536+ results = est.fit(had_data, outcome_col = ' y' , unit_col = ' unit' ,
537+ time_col = ' period' , dose_col = ' dose' ,
538+ aggregate = ' event_study' )
512539 print (f " Resolved: { results.target_parameter} " )
513540
514- # If you intend Design 1' but `d .min()` exceeds the threshold, verify
515- # the dose-variable encoding (e.g. log-transformed doses where 0 was
516- # mapped to a small positive value larger than 1% of the median).
541+ # If you intend Design 1' but `D_{g,F} .min()` exceeds the threshold,
542+ # verify the dose-variable encoding (e.g. log-transformed doses where
543+ # 0 was mapped to a small positive value larger than 1% of the median).
517544
518545 "Mass-point design selected"
519546~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -536,6 +563,37 @@ SE path is not used here).
536563
537564.. code-block :: python
538565
566+ import numpy as np
567+ import pandas as pd
568+ from diff_diff import HeterogeneousAdoptionDiD
569+
570+ # Build a HAD panel with a heavy boundary mass at d_lower so the
571+ # modal fraction at d.min() exceeds 2% and `_detect_design` resolves
572+ # to `mass_point`.
573+ rng = np.random.default_rng(42 )
574+ G, F, T = 200 , 4 , 5
575+ d_lower = 0.5
576+ mass_frac = 0.3
577+ doses = np.where(
578+ rng.uniform(size = G) < mass_frac,
579+ d_lower,
580+ rng.uniform(d_lower + 0.1 , 2.0 , size = G),
581+ )
582+ rows = []
583+ for g in range (G):
584+ for t in range (1 , T + 1 ):
585+ y = (rng.normal()
586+ + doses[g] * (t >= F)
587+ + rng.normal(0 , 0.5 ))
588+ d = doses[g] if t >= F else 0.0
589+ rows.append({' unit' : g, ' period' : t, ' y' : y, ' dose' : d})
590+ had_data = pd.DataFrame(rows)
591+
592+ est = HeterogeneousAdoptionDiD()
593+ results = est.fit(had_data, outcome_col = ' y' , unit_col = ' unit' ,
594+ time_col = ' period' , dose_col = ' dose' ,
595+ aggregate = ' event_study' )
596+
539597 # Inspect the resolved design
540598 print (f " Design: { results.design} " ) # 'mass_point' here
541599
@@ -593,6 +651,30 @@ a ``UserWarning``). The fit raises only when the panel is staggered
593651
594652.. code-block :: python
595653
654+ import numpy as np
655+ import pandas as pd
656+
657+ # Build a staggered HAD panel for this example: 120 units, three
658+ # cohorts (30 never-treated + 30 treated at period 5 + 60 treated at
659+ # period 8). Dose is zero pre-treatment per unit and a constant
660+ # positive value post-treatment, so the first_treat / dose-path
661+ # consistency validator passes. The 60-unit last cohort gives the
662+ # boundary local-linear estimator enough distinct dose values to fit.
663+ np.random.seed(42 )
664+ n_units, n_periods = 120 , 10
665+ first_treat_per_unit = np.array([0 ] * 30 + [5 ] * 30 + [8 ] * 60 )
666+ dose_per_unit = np.where(
667+ first_treat_per_unit > 0 , np.random.uniform(0.5 , 2.0 , n_units), 0.0
668+ )
669+ rows = []
670+ for u in range (n_units):
671+ ft = first_treat_per_unit[u]
672+ for t in range (n_periods):
673+ d_ut = dose_per_unit[u] if (ft > 0 and t >= ft) else 0.0
674+ y_ut = (d_ut > 0 ) * dose_per_unit[u] * 0.5 + np.random.normal()
675+ rows.append((u, t, d_ut, ft, y_ut))
676+ data = pd.DataFrame(rows, columns = [" unit" , " period" , " dose" , " first_treat" , " y" ])
677+
596678 # Primary remedy: pass `first_treat_col` so the estimator auto-filters
597679 # to the last-treatment cohort + never-treated and emits a UserWarning.
598680 est = HeterogeneousAdoptionDiD()
0 commit comments