Skip to content

Commit c320dde

Browse files
igerberclaude
andcommitted
Address PR #346 CI review round 2: 2 P1s + 1 P2
**P1 #1 (Methodology): continuous_near_d_lower on mass-point samples** When a user explicitly forced design="continuous_near_d_lower" on a sample that actually satisfies the >2% modal-fraction mass-point criterion, the downstream regressor shift (D - d_lower) would move the support minimum to zero on the shifted scale. Phase 1c's mass-point rejection guard only fires when d.min() > 0 (_validate_had_inputs), so the silent coercion ran the nonparametric local-linear estimator on a sample the paper (Section 3.2.4) requires to use the 2SLS branch, producing the wrong estimand. Fix: `HeterogeneousAdoptionDiD.fit()` now runs the modal-fraction check on the ORIGINAL (unshifted) d_arr when the user explicitly selects design="continuous_near_d_lower". If the fraction at d.min() exceeds 2%, the fit raises ValueError pointing to design="mass_point" or design="auto". design="auto" is unaffected (_detect_design already correctly resolves such samples to mass_point). **P1 #2 (Code Quality): first_treat_col validator not dtype-agnostic** The previous validator called `.astype(np.float64)` and `int(v)` on grouped first_treat values, which crashed on otherwise-supported string-labelled two-period panels (period in {"A","B"}, first_treat in {0, "B"}). Rewrote using `pd.isna()` for missingness and raw-value set-membership against `{0, t_post}` with no numeric coercion. **P2 (Maintainability): cluster-applied mass-point stored wrong vcov_type** When cluster was supplied, `_fit_mass_point_2sls` unconditionally switches to the CR1 cluster-robust sandwich, but the result object stored the REQUESTED family ("hc1" or "classical") as `vcov_type`. `summary()` rendered correctly via the cluster_name branch, but `to_dict()` and downstream programmatic consumers saw the stale requested label. Fixed: when cluster is supplied, `vcov_type` is stored as `"cr1"` regardless of the requested family. Renamed the local variable from `vcov_effective` to `vcov_requested` to separate the input from the effective family. Updated the `HeterogeneousAdoptionDiDResults.summary()` branch so the cluster rendering still works with the new stored value. **Tests added (+8 regression):** - TestValidateHadPanel.test_first_treat_col_with_string_periods - TestValidateHadPanel.test_first_treat_col_dtype_agnostic_rejects_invalid_string - TestContinuousPathRejectsMassPoint (2 tests) - TestMassPointClusterLabel (4 tests: cr1 stored when clustered, base family when unclustered, classical+cluster collapses to cr1, to_dict shows effective family) Targeted regression: 126 HAD tests + 505 total across Phase 1 and adjacent surfaces, all green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 8c38f07 commit c320dde

2 files changed

Lines changed: 174 additions & 15 deletions

File tree

diff_diff/had.py

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -189,10 +189,15 @@ class HeterogeneousAdoptionDiDResults:
189189
``"analytical_nonparametric"`` (continuous designs) or
190190
``"analytical_2sls"`` (mass-point).
191191
vcov_type : str or None
192-
Variance-covariance family used. ``None`` on continuous paths
193-
(they use the CCT-2014 robust SE from Phase 1c, not the library's
194-
``vcov_type`` enum). Mass-point: one of ``"classical"`` or
195-
``"hc1"`` (CR1 when ``cluster`` is supplied).
192+
Effective variance-covariance family used. ``None`` on continuous
193+
paths (they use the CCT-2014 robust SE from Phase 1c, not the
194+
library's ``vcov_type`` enum). Mass-point: ``"classical"`` or
195+
``"hc1"`` when ``cluster`` is not supplied, and ``"cr1"``
196+
whenever ``cluster`` is supplied (cluster-robust CR1 is computed
197+
regardless of the requested ``vcov_type`` because
198+
classical/hc1 + cluster collapses to the same CR1 sandwich).
199+
Downstream consumers reading ``result.to_dict()`` can inspect
200+
this field directly to determine the effective SE family.
196201
cluster_name : str or None
197202
Column name of the cluster variable on the mass-point path when
198203
cluster-robust SE is requested. ``None`` otherwise.
@@ -269,9 +274,12 @@ def summary(self) -> str:
269274
lines.append(f"{'Strictly above d_lower:':<30} {self.n_above_d_lower:>20}")
270275
lines.append(f"{'Inference method:':<30} {self.inference_method:>20}")
271276
if self.vcov_type is not None:
272-
label = self.vcov_type
273277
if self.cluster_name is not None:
278+
# Cluster-robust (CR1): the stored vcov_type is already "cr1",
279+
# but render with the cluster column for clarity.
274280
label = f"CR1 at {self.cluster_name}"
281+
else:
282+
label = self.vcov_type
275283
lines.append(f"{'Variance:':<30} {label:>20}")
276284
if self.bandwidth_diagnostics is not None:
277285
bw = self.bandwidth_diagnostics
@@ -476,20 +484,24 @@ def _validate_had_panel(
476484
# domain check that catches typos and staggered-timing mix-ups; it
477485
# does NOT cross-validate first_treat against post-period dose
478486
# (D_{g, t_post} remains the primary signal). Extended cross-checks
479-
# are queued for a follow-up PR.
487+
# are queued for a follow-up PR. The check is DTYPE-AGNOSTIC: it uses
488+
# pd.isna() for missingness and raw-value membership against
489+
# {0, t_post} so that string-labelled periods (e.g., ("A", "B")) with
490+
# first_treat in {0, "B"} are supported.
480491
if first_treat_col is not None:
481-
ft = data.groupby(unit_col)[first_treat_col].first().to_numpy()
482-
if np.any(np.isnan(ft.astype(np.float64, copy=False))):
492+
ft_series = data.groupby(unit_col)[first_treat_col].first()
493+
if bool(ft_series.isna().any()):
483494
raise ValueError(
484495
f"first_treat_col={first_treat_col!r} contains NaN. "
485496
f"Use 0 for never-treated units and t_post for treated."
486497
)
487498
valid_values = {0, t_post}
488-
bad = [v for v in np.unique(ft) if int(v) not in valid_values]
499+
observed = set(ft_series.unique().tolist())
500+
bad = sorted(observed - valid_values, key=lambda x: str(x))
489501
if bad:
490502
raise ValueError(
491503
f"first_treat_col={first_treat_col!r} contains value(s) "
492-
f"{bad} outside the allowed set {{0, {t_post}}} for a "
504+
f"{bad} outside the allowed set {{0, {t_post!r}}} for a "
493505
f"two-period HAD panel. Staggered timing with multiple "
494506
f"cohorts is Phase 2b."
495507
)
@@ -1042,6 +1054,34 @@ def fit(
10421054
else:
10431055
d_lower_val = float(d_lower_arg)
10441056

1057+
# ---- Original-scale mass-point check before the regressor shift ----
1058+
# When a user explicitly forces design="continuous_near_d_lower"
1059+
# on a sample that is actually a mass-point sample (modal fraction
1060+
# at d.min() > 2% per paper Section 3.2.4), the downstream code
1061+
# would shift D - d_lower, making the support minimum zero on the
1062+
# shifted scale and suppressing the Phase 1c mass-point rejection
1063+
# guard (_validate_had_inputs only checks modal fraction when
1064+
# d.min() > 0). That silent coercion produces wrong CIs for a
1065+
# mass-point sample by running the nonparametric estimator where
1066+
# the paper's 2SLS branch is required. Front-door reject.
1067+
if resolved_design == "continuous_near_d_lower":
1068+
d_min_orig = float(d_arr.min())
1069+
if d_min_orig > 0:
1070+
eps_mp = 1e-12 * max(1.0, abs(d_min_orig))
1071+
at_d_min_mask_orig = np.abs(d_arr - d_min_orig) <= eps_mp
1072+
modal_fraction_orig = float(np.mean(at_d_min_mask_orig))
1073+
if modal_fraction_orig > _MASS_POINT_THRESHOLD:
1074+
raise ValueError(
1075+
f"design='continuous_near_d_lower' cannot be used on a "
1076+
f"mass-point sample (modal fraction {modal_fraction_orig:.4f} "
1077+
f"at d.min()={d_min_orig!r} exceeds the "
1078+
f"{_MASS_POINT_THRESHOLD:.2f} threshold from paper Section "
1079+
f"3.2.4). Use design='mass_point' (Wald-IV / 2SLS) or "
1080+
f"design='auto' which will auto-detect. Forcing the "
1081+
f"continuous path on a mass-point sample would produce "
1082+
f"the wrong estimand."
1083+
)
1084+
10451085
# ---- d_lower contract for Design 1 paths ----
10461086
# Paper Sections 3.2.2-3.2.4 define the Design 1 estimators at
10471087
# d_lower = support infimum of D_{g,2}. For the mass-point path,
@@ -1148,21 +1188,25 @@ def fit(
11481188
elif resolved_design == "mass_point":
11491189
if vcov_type_arg is None:
11501190
# Backward-compat: robust=True -> hc1, robust=False -> classical.
1151-
vcov_effective = "hc1" if robust_arg else "classical"
1191+
vcov_requested = "hc1" if robust_arg else "classical"
11521192
else:
1153-
vcov_effective = vcov_type_arg.lower()
1154-
# Explicit cluster + hc1 becomes CR1 inside _fit_mass_point_2sls.
1193+
vcov_requested = vcov_type_arg.lower()
11551194
att, se = _fit_mass_point_2sls(
11561195
d_arr,
11571196
dy_arr,
11581197
d_lower_val,
11591198
cluster_arr,
1160-
vcov_effective,
1199+
vcov_requested,
11611200
)
11621201
bc_fit = None
11631202
bw_diag = None
11641203
inference_method = "analytical_2sls"
1165-
vcov_label = vcov_effective
1204+
# Store the EFFECTIVE variance family so downstream consumers
1205+
# (to_dict, to_dataframe, summary) see the actual SE type that
1206+
# was computed. When cluster is supplied, _fit_mass_point_2sls
1207+
# unconditionally computes CR1 regardless of vcov_requested
1208+
# (e.g. classical+cluster -> CR1), so we surface that here.
1209+
vcov_label = "cr1" if cluster_arg is not None else vcov_requested
11661210
cluster_label = cluster_arg if cluster_arg is not None else None
11671211
else:
11681212
raise ValueError(f"Internal error: unhandled design={resolved_design!r}.")

tests/test_had.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1404,3 +1404,118 @@ def test_rejects_string_periods_gracefully(self):
14041404
t_pre, t_post = _validate_had_panel(panel, "outcome", "dose", "period", "unit", None)
14051405
assert t_pre == "A"
14061406
assert t_post == "B"
1407+
1408+
def test_first_treat_col_with_string_periods(self):
1409+
"""Review P1: first_treat_col validator must be dtype-agnostic.
1410+
1411+
With string periods ("A", "B") and first_treat_col values in
1412+
{0, "B"}, the validator must not attempt numeric coercion.
1413+
"""
1414+
d, dy = _dgp_continuous_at_zero(100, seed=0)
1415+
panel = _make_panel(d, dy, periods=("A", "B"))
1416+
# 50 units never-treated (first_treat=0), 50 treated (first_treat="B")
1417+
ft_unit = np.array([0 if i % 2 == 0 else "B" for i in range(100)], dtype=object)
1418+
panel["ft"] = np.repeat(ft_unit, 2)
1419+
t_pre, t_post = _validate_had_panel(panel, "outcome", "dose", "period", "unit", "ft")
1420+
assert t_pre == "A"
1421+
assert t_post == "B"
1422+
1423+
def test_first_treat_col_dtype_agnostic_rejects_invalid_string(self):
1424+
"""Mix string periods + invalid first_treat_col string -> ValueError."""
1425+
d, dy = _dgp_continuous_at_zero(100, seed=0)
1426+
panel = _make_panel(d, dy, periods=("A", "B"))
1427+
# Invalid: "Z" is neither 0 nor "B"
1428+
ft_unit = np.array([0 if i % 2 == 0 else "Z" for i in range(100)], dtype=object)
1429+
panel["ft"] = np.repeat(ft_unit, 2)
1430+
with pytest.raises(ValueError, match="first_treat_col"):
1431+
_validate_had_panel(panel, "outcome", "dose", "period", "unit", "ft")
1432+
1433+
1434+
# =============================================================================
1435+
# Review P1: continuous_near_d_lower on a true mass-point sample rejects
1436+
# =============================================================================
1437+
1438+
1439+
class TestContinuousPathRejectsMassPoint:
1440+
"""Explicit override to continuous_near_d_lower on a mass-point sample
1441+
must raise before the regressor shift, otherwise the Phase 1c
1442+
mass-point guard (which fires only on d.min() > 0) is bypassed.
1443+
"""
1444+
1445+
def test_continuous_near_on_mass_point_sample_raises(self):
1446+
d, dy = _dgp_mass_point(500, seed=0)
1447+
panel = _make_panel(d, dy)
1448+
est = HeterogeneousAdoptionDiD(design="continuous_near_d_lower")
1449+
with pytest.raises(ValueError, match=r"mass-point sample|mass_point"):
1450+
est.fit(panel, "outcome", "dose", "period", "unit")
1451+
1452+
def test_continuous_near_on_continuous_sample_runs(self):
1453+
"""Sanity: the pre-shift check does NOT reject valid continuous samples."""
1454+
d, dy = _dgp_continuous_near_d_lower(500, seed=0)
1455+
panel = _make_panel(d, dy)
1456+
with warnings.catch_warnings():
1457+
warnings.simplefilter("ignore", UserWarning)
1458+
r = HeterogeneousAdoptionDiD(design="continuous_near_d_lower").fit(
1459+
panel, "outcome", "dose", "period", "unit"
1460+
)
1461+
assert np.isfinite(r.att)
1462+
1463+
1464+
# =============================================================================
1465+
# Review P2: cluster-applied mass-point stores vcov_type="cr1"
1466+
# =============================================================================
1467+
1468+
1469+
class TestMassPointClusterLabel:
1470+
def test_cluster_stores_cr1(self):
1471+
"""to_dict() / downstream consumers see 'cr1' not 'hc1' when clustered."""
1472+
d, dy = _dgp_mass_point(200, seed=0)
1473+
cluster_unit = np.repeat(np.arange(50), 4)
1474+
panel = _make_panel(d, dy, extra_cols={"state": cluster_unit})
1475+
with warnings.catch_warnings():
1476+
warnings.simplefilter("ignore", UserWarning)
1477+
r = HeterogeneousAdoptionDiD(design="mass_point", cluster="state").fit(
1478+
panel, "outcome", "dose", "period", "unit"
1479+
)
1480+
assert r.vcov_type == "cr1"
1481+
assert r.cluster_name == "state"
1482+
1483+
def test_no_cluster_stores_base_family(self):
1484+
"""Unclustered mass-point keeps 'hc1' or 'classical' label."""
1485+
d, dy = _dgp_mass_point(200, seed=0)
1486+
panel = _make_panel(d, dy)
1487+
with warnings.catch_warnings():
1488+
warnings.simplefilter("ignore", UserWarning)
1489+
r_hc1 = HeterogeneousAdoptionDiD(design="mass_point", vcov_type="hc1").fit(
1490+
panel, "outcome", "dose", "period", "unit"
1491+
)
1492+
r_cl = HeterogeneousAdoptionDiD(design="mass_point", vcov_type="classical").fit(
1493+
panel, "outcome", "dose", "period", "unit"
1494+
)
1495+
assert r_hc1.vcov_type == "hc1"
1496+
assert r_cl.vcov_type == "classical"
1497+
1498+
def test_cluster_with_classical_collapses_to_cr1(self):
1499+
"""classical + cluster is CR1 in practice; label reflects that."""
1500+
d, dy = _dgp_mass_point(200, seed=0)
1501+
cluster_unit = np.repeat(np.arange(50), 4)
1502+
panel = _make_panel(d, dy, extra_cols={"state": cluster_unit})
1503+
with warnings.catch_warnings():
1504+
warnings.simplefilter("ignore", UserWarning)
1505+
r = HeterogeneousAdoptionDiD(
1506+
design="mass_point", vcov_type="classical", cluster="state"
1507+
).fit(panel, "outcome", "dose", "period", "unit")
1508+
assert r.vcov_type == "cr1"
1509+
1510+
def test_to_dict_shows_effective_family(self):
1511+
d, dy = _dgp_mass_point(200, seed=0)
1512+
cluster_unit = np.repeat(np.arange(50), 4)
1513+
panel = _make_panel(d, dy, extra_cols={"state": cluster_unit})
1514+
with warnings.catch_warnings():
1515+
warnings.simplefilter("ignore", UserWarning)
1516+
r = HeterogeneousAdoptionDiD(design="mass_point", cluster="state").fit(
1517+
panel, "outcome", "dose", "period", "unit"
1518+
)
1519+
result_dict = r.to_dict()
1520+
assert result_dict["vcov_type"] == "cr1"
1521+
assert result_dict["cluster_name"] == "state"

0 commit comments

Comments
 (0)