Skip to content

Commit 8e1282b

Browse files
authored
Merge pull request #396 from igerber/fix/doc-snippet-had-bugs
Fix latent doc-snippet bugs from PR #389 (HAD ecosystem)
2 parents b560c80 + 0328b4a commit 8e1282b

3 files changed

Lines changed: 123 additions & 24 deletions

File tree

docs/r_comparison.rst

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,11 +237,28 @@ identification assumptions (the design path is auto-detected separately by
237237

238238
.. code-block:: python
239239
240+
import numpy as np
241+
import pandas as pd
240242
from diff_diff import HeterogeneousAdoptionDiD
241243
244+
# Build a HAD-shape panel: D=0 in pre-periods (t < F), D > 0 only at F+.
245+
rng = np.random.default_rng(42)
246+
G, F, T = 200, 4, 5
247+
doses = rng.beta(0.5, 1.0, size=G)
248+
rows = []
249+
for g in range(G):
250+
for t in range(1, T + 1):
251+
y = (rng.normal()
252+
+ (doses[g] + doses[g] ** 2) * (t >= F)
253+
+ rng.normal(0, 0.5))
254+
d = doses[g] if t >= F else 0.0
255+
rows.append({'unit': g, 'period': t, 'y': y, 'dose': d})
256+
had_data = pd.DataFrame(rows)
257+
242258
est = HeterogeneousAdoptionDiD()
243-
results = est.fit(data, outcome_col='y', unit_col='unit',
244-
time_col='period', dose_col='dose')
259+
results = est.fit(had_data, outcome_col='y', unit_col='unit',
260+
time_col='period', dose_col='dose',
261+
aggregate='event_study')
245262
246263
Key Differences
247264
---------------

docs/troubleshooting.rst

Lines changed: 103 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -483,37 +483,64 @@ HeterogeneousAdoptionDiD (HAD) Issues
483483
**Problem:** ``HeterogeneousAdoptionDiD`` resolves ``target_parameter`` to
484484
``"WAS_d_lower"`` when you expected ``"WAS"`` (or vice versa).
485485

486-
**Cause:** HAD auto-detects the design path from the dose distribution. The
487-
``_detect_design`` rule resolves to Design 1' (``continuous_at_zero``,
488-
targets WAS) when EITHER ``d.min() == 0`` exactly OR ``d.min()`` is a small
489-
positive value below ``0.01 * median(|d|)`` (the small-share-of-treated
490-
escape clause). Otherwise (``d.min()`` larger than that threshold) the
491-
estimator routes to Design 1, with a further check for mass-point structure
492-
(modal fraction at ``d.min()`` exceeding 2% routes to ``mass_point``;
493-
otherwise ``continuous_near_d_lower``); both Design 1 paths target
494-
``WAS_{d_lower}``. So a Design 1 resolution only fires when ``d.min()``
495-
is meaningfully positive relative to the dose scale.
486+
**Cause:** HAD auto-detects the design path from the unit-level
487+
post-treatment dose ``D_{g,F}`` (the dose at the first treated period
488+
``F``, one value per unit), NOT from the full panel ``dose`` column. The
489+
panel column carries structural pre-period zeros (HAD requires
490+
``D_{g,t} = 0`` for ``t < F``), so ``had_data['dose'].min()`` is always
491+
zero on a valid HAD panel and tells you nothing about the resolved
492+
design. ``_detect_design`` then resolves on ``D_{g,F}`` and picks Design
493+
1' (``continuous_at_zero``, targets WAS) when EITHER
494+
``D_{g,F}.min() == 0`` exactly OR ``D_{g,F}.min()`` is a small positive
495+
value below ``0.01 * median(|D_{g,F}|)`` (the small-share-of-treated
496+
escape clause). Otherwise the estimator routes to Design 1, with a
497+
further check for mass-point structure (modal fraction at ``D_{g,F}.min()``
498+
exceeding 2% routes to ``mass_point``; otherwise
499+
``continuous_near_d_lower``); both Design 1 paths target ``WAS_{d_lower}``.
496500

497501
**Solutions:**
498502

499503
.. code-block:: python
500504
501-
# Inspect the dose support before fitting
502505
import numpy as np
503-
d = data['dose'].to_numpy()
504-
print(data['dose'].describe())
505-
print(f"d.min() = {d.min():.6g}; "
506-
f"0.01 * median(|d|) = {0.01 * np.median(np.abs(d)):.6g}; "
507-
f"d.min() < threshold => Design 1' (WAS)")
506+
import pandas as pd
507+
from diff_diff import HeterogeneousAdoptionDiD
508+
509+
# Build a HAD-shape panel: D=0 in pre-periods (t < F), D > 0 only at F+.
510+
rng = np.random.default_rng(42)
511+
G, F, T = 200, 4, 5
512+
doses = rng.beta(0.5, 1.0, size=G)
513+
rows = []
514+
for g in range(G):
515+
for t in range(1, T + 1):
516+
y = (rng.normal()
517+
+ (doses[g] + doses[g] ** 2) * (t >= F)
518+
+ rng.normal(0, 0.5))
519+
d = doses[g] if t >= F else 0.0
520+
rows.append({'unit': g, 'period': t, 'y': y, 'dose': d})
521+
had_data = pd.DataFrame(rows)
522+
523+
# Inspect the support the detector actually uses: per-unit dose at the
524+
# first treated period F. Pre-period zeros on the panel column are
525+
# structural and ignored by `_detect_design()`.
526+
d_at_F = had_data.loc[had_data['period'] == F].set_index('unit')['dose']
527+
print(d_at_F.describe())
528+
d_min = float(d_at_F.min())
529+
d_thr = 0.01 * float(np.median(np.abs(d_at_F)))
530+
print(f"D_{{g,F}}.min() = {d_min:.6g}; "
531+
f"0.01 * median(|D_{{g,F}}|) = {d_thr:.6g}; "
532+
f"D_{{g,F}}.min() < threshold => Design 1' (WAS)")
508533
509534
# Check the resolved estimand after fitting
510-
results = est.fit(data, outcome_col='y', unit_col='unit',
511-
time_col='period', dose_col='dose')
535+
est = HeterogeneousAdoptionDiD()
536+
results = est.fit(had_data, outcome_col='y', unit_col='unit',
537+
time_col='period', dose_col='dose',
538+
aggregate='event_study')
512539
print(f"Resolved: {results.target_parameter}")
513540
514-
# If you intend Design 1' but `d.min()` exceeds the threshold, verify
515-
# the dose-variable encoding (e.g. log-transformed doses where 0 was
516-
# mapped to a small positive value larger than 1% of the median).
541+
# If you intend Design 1' but `D_{g,F}.min()` exceeds the threshold,
542+
# verify the dose-variable encoding (e.g. log-transformed doses where
543+
# 0 was mapped to a small positive value larger than 1% of the median).
517544
518545
"Mass-point design selected"
519546
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -536,6 +563,37 @@ SE path is not used here).
536563

537564
.. code-block:: python
538565
566+
import numpy as np
567+
import pandas as pd
568+
from diff_diff import HeterogeneousAdoptionDiD
569+
570+
# Build a HAD panel with a heavy boundary mass at d_lower so the
571+
# modal fraction at d.min() exceeds 2% and `_detect_design` resolves
572+
# to `mass_point`.
573+
rng = np.random.default_rng(42)
574+
G, F, T = 200, 4, 5
575+
d_lower = 0.5
576+
mass_frac = 0.3
577+
doses = np.where(
578+
rng.uniform(size=G) < mass_frac,
579+
d_lower,
580+
rng.uniform(d_lower + 0.1, 2.0, size=G),
581+
)
582+
rows = []
583+
for g in range(G):
584+
for t in range(1, T + 1):
585+
y = (rng.normal()
586+
+ doses[g] * (t >= F)
587+
+ rng.normal(0, 0.5))
588+
d = doses[g] if t >= F else 0.0
589+
rows.append({'unit': g, 'period': t, 'y': y, 'dose': d})
590+
had_data = pd.DataFrame(rows)
591+
592+
est = HeterogeneousAdoptionDiD()
593+
results = est.fit(had_data, outcome_col='y', unit_col='unit',
594+
time_col='period', dose_col='dose',
595+
aggregate='event_study')
596+
539597
# Inspect the resolved design
540598
print(f"Design: {results.design}") # 'mass_point' here
541599
@@ -593,6 +651,30 @@ a ``UserWarning``). The fit raises only when the panel is staggered
593651

594652
.. code-block:: python
595653
654+
import numpy as np
655+
import pandas as pd
656+
657+
# Build a staggered HAD panel for this example: 120 units, three
658+
# cohorts (30 never-treated + 30 treated at period 5 + 60 treated at
659+
# period 8). Dose is zero pre-treatment per unit and a constant
660+
# positive value post-treatment, so the first_treat / dose-path
661+
# consistency validator passes. The 60-unit last cohort gives the
662+
# boundary local-linear estimator enough distinct dose values to fit.
663+
np.random.seed(42)
664+
n_units, n_periods = 120, 10
665+
first_treat_per_unit = np.array([0] * 30 + [5] * 30 + [8] * 60)
666+
dose_per_unit = np.where(
667+
first_treat_per_unit > 0, np.random.uniform(0.5, 2.0, n_units), 0.0
668+
)
669+
rows = []
670+
for u in range(n_units):
671+
ft = first_treat_per_unit[u]
672+
for t in range(n_periods):
673+
d_ut = dose_per_unit[u] if (ft > 0 and t >= ft) else 0.0
674+
y_ut = (d_ut > 0) * dose_per_unit[u] * 0.5 + np.random.normal()
675+
rows.append((u, t, d_ut, ft, y_ut))
676+
data = pd.DataFrame(rows, columns=["unit", "period", "dose", "first_treat", "y"])
677+
596678
# Primary remedy: pass `first_treat_col` so the estimator auto-filters
597679
# to the last-treatment cohort + never-treated and emits a UserWarning.
598680
est = HeterogeneousAdoptionDiD()

tests/test_doc_snippets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ def _restore_datasets_module():
365365
"r_comparison:block2",
366366
"r_comparison:block3",
367367
"r_comparison:block4",
368-
"r_comparison:block6",
368+
"r_comparison:block7",
369369
"troubleshooting:block8",
370370
}
371371

0 commit comments

Comments
 (0)