Skip to content

Commit d907eca

Browse files
igerberclaude
andcommitted
Thread vcov_type through MultiPeriodDiD and TwoWayFixedEffects
CI review caught that Phase 1a wired vcov_type into DifferenceInDifferences __init__/get_params but not into the overridden fit() paths on MultiPeriodDiD and TwoWayFixedEffects, so `vcov_type="hc2_bm"` on either silently produced HC1 inference. Summary output also mislabeled wild-bootstrap inference with the analytical variance family. - diff_diff/estimators.py MultiPeriodDiD.fit: pass vcov_type=self.vcov_type into the analytical solve_ols call; remove the `not self.robust` homoskedastic fallback (subsumed by compute_robust_vcov's classical branch). When vcov_type="hc2_bm" and no survey design, compute Bell-McCaffrey Satterthwaite DOF via _compute_bm_dof_from_contrasts for both per-coefficient period effects AND the post-period-average contrast; fall back to the shared analytical df otherwise. Store vcov_type and cluster_name on MultiPeriodDiDResults. - diff_diff/twfe.py: forward self.robust and self.vcov_type into the two LinearRegression instantiations; store vcov_type and the TWFE auto- cluster label (or explicit self.cluster) on DiDResults. - diff_diff/linalg.py: split _compute_bm_dof_oneway into a contrast-aware helper _compute_bm_dof_from_contrasts(X, bread, h_diag, contrasts) so MultiPeriodDiD can request BM DOF for the avg_att linear combination. The per-coefficient wrapper now delegates to the shared helper with contrasts=I_k. - diff_diff/results.py DiDResults.summary and MultiPeriodDiDResults: gate the Variance family label on inference_method == "analytical" so wild-bootstrap output is no longer mislabeled; add vcov_type, cluster_name, inference_method, n_bootstrap, n_clusters fields to MultiPeriodDiDResults for symmetry with DiDResults and to drive the summary label. - tests/test_estimators_vcov_type.py: add five end-to-end tests exercising the previously-untested paths - MultiPeriodDiD classical vs hc1 SE differ; MultiPeriodDiD hc2_bm CI is finite; TWFE hc1 vs hc2_bm SE differ (CR1 vs CR2); TWFE records the unit auto-cluster label in summary; wild-bootstrap with cluster suppresses the Variance line. All 209 Phase 1a suites plus 145 estimator regression tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent a56e388 commit d907eca

5 files changed

Lines changed: 312 additions & 54 deletions

File tree

diff_diff/estimators.py

Lines changed: 83 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,6 +1303,7 @@ def fit( # type: ignore[override]
13031303
rank_deficient_action=self.rank_deficient_action,
13041304
weights=survey_weights,
13051305
weight_type=survey_weight_type,
1306+
vcov_type=self.vcov_type,
13061307
)
13071308

13081309
# Compute survey vcov if applicable
@@ -1423,25 +1424,70 @@ def _refit_mp_absorb(w_r):
14231424
)
14241425
df = None
14251426

1426-
# For non-robust, non-clustered case, we need homoskedastic vcov
1427-
# solve_ols returns HC1 by default, so compute homoskedastic if needed
1428-
if not self.robust and self.cluster is None and survey_weights is None:
1429-
n = len(y)
1430-
mse = np.sum(residuals**2) / (n - k_effective)
1431-
# Use solve() instead of inv() for numerical stability
1432-
# Only compute for identified columns (non-NaN coefficients)
1433-
identified_mask = ~np.isnan(coefficients)
1434-
if np.all(identified_mask):
1435-
vcov = np.linalg.solve(X.T @ X, mse * np.eye(X.shape[1]))
1436-
else:
1437-
# For rank-deficient case, compute vcov on reduced matrix then expand
1438-
X_reduced = X[:, identified_mask]
1439-
vcov_reduced = np.linalg.solve(
1440-
X_reduced.T @ X_reduced, mse * np.eye(X_reduced.shape[1])
1427+
# Note: the prior homoskedastic-vcov fallback conditioned on
1428+
# `not self.robust` has been subsumed by the vcov_type dispatch in
1429+
# solve_ols above, which routes vcov_type="classical" through
1430+
# compute_robust_vcov's classical branch (identical math). The
1431+
# explicit branch is no longer needed; vcov above already matches the
1432+
# requested variance family.
1433+
1434+
# For hc2_bm with a non-survey fit, compute per-coefficient and
1435+
# per-contrast Bell-McCaffrey Satterthwaite DOF so period-specific
1436+
# effects and the post-period average use correct small-sample DOF
1437+
# rather than the shared n-k fallback.
1438+
_bm_dof_per_coef: Optional[np.ndarray] = None
1439+
_bm_dof_avg: Optional[float] = None
1440+
if (
1441+
self.vcov_type == "hc2_bm"
1442+
and not _use_survey_vcov
1443+
and vcov is not None
1444+
and not np.all(np.isnan(coefficients))
1445+
):
1446+
from diff_diff.linalg import (
1447+
_compute_bm_dof_from_contrasts,
1448+
_compute_hat_diagonals,
1449+
)
1450+
1451+
_identified = ~np.isnan(coefficients)
1452+
_kept = np.where(_identified)[0]
1453+
if len(_kept) > 0:
1454+
X_kept = X[:, _kept]
1455+
bread_kept = X_kept.T @ (
1456+
X_kept * survey_weights[:, np.newaxis]
1457+
if survey_weights is not None
1458+
else X_kept
1459+
)
1460+
h_diag_kept = _compute_hat_diagonals(
1461+
X_kept, bread_kept, weights=survey_weights
1462+
)
1463+
# Build the contrast matrix: one column per identified coefficient
1464+
# plus one column for the post-period average contrast (1/n_post
1465+
# on each post-period interaction column, 0 elsewhere).
1466+
n_kept = len(_kept)
1467+
# Post-period contrast in full-width k dims, then subset to kept
1468+
post_contrast_full = np.zeros(X.shape[1])
1469+
_n_post = len(post_periods)
1470+
if _n_post > 0:
1471+
for _p in post_periods:
1472+
post_contrast_full[interaction_indices[_p]] = 1.0 / _n_post
1473+
post_contrast_kept = post_contrast_full[_kept]
1474+
contrasts = np.column_stack(
1475+
[np.eye(n_kept), post_contrast_kept[:, np.newaxis]]
14411476
)
1442-
# Expand to full size with NaN for dropped columns
1443-
vcov = np.full((X.shape[1], X.shape[1]), np.nan)
1444-
vcov[np.ix_(identified_mask, identified_mask)] = vcov_reduced
1477+
_dof_all = _compute_bm_dof_from_contrasts(
1478+
X_kept,
1479+
bread_kept,
1480+
h_diag_kept,
1481+
contrasts,
1482+
weights=survey_weights,
1483+
)
1484+
# Expand per-coefficient DOF back to full width (NaN for dropped).
1485+
_bm_dof_per_coef = np.full(X.shape[1], np.nan)
1486+
_bm_dof_per_coef[_kept] = _dof_all[:n_kept]
1487+
# Post-period average: last contrast column.
1488+
# Only meaningful if all post-period coefs are identified.
1489+
if np.all(_identified[[interaction_indices[p] for p in post_periods]]):
1490+
_bm_dof_avg = float(_dof_all[-1])
14451491

14461492
# Extract period-specific treatment effects for ALL non-reference periods
14471493
period_effects = {}
@@ -1453,7 +1499,14 @@ def _refit_mp_absorb(w_r):
14531499
idx = interaction_indices[period]
14541500
effect = coefficients[idx]
14551501
se = np.sqrt(vcov[idx, idx])
1456-
t_stat, p_value, conf_int = safe_inference(effect, se, alpha=self.alpha, df=df)
1502+
# Prefer per-coefficient BM DOF when available (hc2_bm path);
1503+
# otherwise fall back to the shared analytical df.
1504+
period_df = df
1505+
if _bm_dof_per_coef is not None and np.isfinite(_bm_dof_per_coef[idx]):
1506+
period_df = float(_bm_dof_per_coef[idx])
1507+
t_stat, p_value, conf_int = safe_inference(
1508+
effect, se, alpha=self.alpha, df=period_df
1509+
)
14571510

14581511
period_effects[period] = PeriodEffect(
14591512
period=period,
@@ -1497,8 +1550,11 @@ def _refit_mp_absorb(w_r):
14971550
avg_conf_int = (np.nan, np.nan)
14981551
else:
14991552
avg_se = float(np.sqrt(avg_var))
1553+
# Prefer the contrast-specific BM DOF for the post-period average
1554+
# when hc2_bm is in use; otherwise fall back to the shared df.
1555+
_avg_df = _bm_dof_avg if _bm_dof_avg is not None else df
15001556
avg_t_stat, avg_p_value, avg_conf_int = safe_inference(
1501-
avg_att, avg_se, alpha=self.alpha, df=df
1557+
avg_att, avg_se, alpha=self.alpha, df=_avg_df
15021558
)
15031559

15041560
# Count observations (use raw counts to avoid demeaned values from absorb)
@@ -1530,6 +1586,13 @@ def _refit_mp_absorb(w_r):
15301586
reference_period=reference_period,
15311587
interaction_indices=interaction_indices,
15321588
survey_metadata=survey_metadata,
1589+
vcov_type=self.vcov_type,
1590+
cluster_name=self.cluster,
1591+
n_clusters=(
1592+
len(np.unique(effective_cluster_ids))
1593+
if effective_cluster_ids is not None
1594+
else None
1595+
),
15331596
)
15341597

15351598
self._coefficients = coefficients

diff_diff/linalg.py

Lines changed: 57 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,69 +1341,96 @@ def _compute_cr2_bm(
13411341
return vcov, dof_vec
13421342

13431343

1344-
def _compute_bm_dof_oneway(
1344+
def _compute_bm_dof_from_contrasts(
13451345
X: np.ndarray,
13461346
bread_matrix: np.ndarray,
13471347
h_diag: np.ndarray,
1348+
contrasts: np.ndarray,
13481349
weights: Optional[np.ndarray] = None,
13491350
) -> np.ndarray:
1350-
"""Per-coefficient Bell-McCaffrey (Imbens-Kolesar 2016) DOF vector.
1351+
"""Per-contrast Bell-McCaffrey (Imbens-Kolesar 2016) Satterthwaite DOF.
13511352
1352-
For contrast ``c_j = e_j`` (the j-th standard basis vector), define
1353-
``q_j = X (X'WX)^{-1} c_j`` (length ``n``). Under a homoskedastic null,
1354-
the HC2 variance estimator for ``c_j' beta`` has a weighted-chi-squared
1353+
For each column ``c`` of ``contrasts`` (shape ``(k, m)``), define
1354+
``q = X (X'WX)^{-1} c`` (length ``n``). Under a homoskedastic null, the
1355+
HC2 variance estimator for ``c' beta`` has a weighted-chi-squared
13551356
distribution; matching mean and variance via Satterthwaite gives
13561357
1357-
DOF_j = (sum_i q_j(i)^2)^2 / sum_{i,k} a_j(i) a_j(k) M_{ik}^2
1358+
DOF(c) = (sum_i q(i)^2)^2 / sum_{i, k} a(i) a(k) M_{ik}^2
1359+
1360+
where ``M = I - H`` and ``a(i) = q(i)^2 / (1 - h_ii)``. Using the idempotent
1361+
identity ``M^2 = M``, ``trace(B) = sum_i q(i)^2`` matches the numerator.
1362+
1363+
Allocates an ``(n, n)`` temporary for ``M`` so the cost is ``O(n^2 k)`` for
1364+
the hat build plus ``O(n^2 m)`` for the per-contrast sums. Practical for
1365+
``n < 10_000``; larger designs should switch to a scores-based formulation
1366+
(tracked in TODO.md).
13581367
1359-
where ``M = I - H`` and ``a_j(i) = q_j(i)^2 / (1 - h_ii)``. Using the
1360-
identity ``M^2 = M`` (M is idempotent), ``trace(B) = sum_i q_j(i)^2``
1361-
which matches the numerator.
1368+
Parameters
1369+
----------
1370+
X : ndarray of shape (n, k)
1371+
bread_matrix : ndarray of shape (k, k) == (X'WX) or (X'X)
1372+
h_diag : ndarray of shape (n,), hat-matrix diagonals (already weighted)
1373+
contrasts : ndarray of shape (k, m). Pass ``np.eye(k)`` for per-coefficient DOF.
1374+
weights : optional weights (shape ``(n,)``) used to build the weighted hat
1375+
matrix. When ``None``, unweighted.
13621376
1363-
Allocates an ``(n, n)`` temporary for the sum and so is ``O(n^2 k)``.
1364-
Practical for ``n < 10_000``; larger designs should switch to a
1365-
scores-based formulation (tracked in TODO.md).
1377+
Returns
1378+
-------
1379+
ndarray of shape (m,) of Satterthwaite DOF per contrast column. NaN when
1380+
``den <= 0`` (degenerate case).
13661381
"""
13671382
n, k = X.shape
1368-
# q_cols[:, j] = X (bread_inv e_j) is column j of X bread_inv^T. Since
1369-
# bread_matrix is symmetric, bread_inv^T = bread_inv, so q_cols = X bread_inv.
1383+
if contrasts.ndim != 2 or contrasts.shape[0] != k:
1384+
raise ValueError(
1385+
f"contrasts must have shape (k={k}, m); got {contrasts.shape}"
1386+
)
13701387
try:
1371-
q_cols = np.linalg.solve(bread_matrix, np.eye(k)) # (k, k), bread^{-1}
1388+
bread_inv_c = np.linalg.solve(bread_matrix, contrasts)
13721389
except np.linalg.LinAlgError as e:
13731390
if "Singular" in str(e):
13741391
raise ValueError(
13751392
"Design matrix is rank-deficient (singular X'X matrix). "
13761393
"Cannot compute Bell-McCaffrey DOF."
13771394
) from e
13781395
raise
1379-
# q_ij = X @ bread_inv has shape (n, k)
1380-
q = X @ q_cols
1381-
# M = I - H where H = X (X'WX)^{-1} X' (or its weighted analogue). For DOF,
1382-
# the relevant M is the residual-maker under the same weighting used for the
1383-
# hat diagonals, so H_ij = w_j * x_i' (X'WX)^{-1} x_j when weights are
1384-
# present. Build H explicitly (O(n^2 k) memory/time).
1396+
# q has shape (n, m); column j is X @ (bread_inv @ contrasts[:, j]).
1397+
q = X @ bread_inv_c
1398+
# Build the weighted residual-maker M = I - H once.
13851399
if weights is not None:
13861400
H = X @ np.linalg.solve(bread_matrix, (X * weights[:, np.newaxis]).T)
13871401
else:
13881402
H = X @ np.linalg.solve(bread_matrix, X.T)
13891403
M = np.eye(n) - H
1390-
M_sq = M * M # elementwise square; also equal to M*M^T when M is symmetric
1391-
1392-
# Guard 1 - h_ii away from zero so `a` stays finite. The calling function
1393-
# has already warned/fallback-handled the h_ii > 1 case; this is a
1394-
# float-stability belt-and-suspenders.
1404+
M_sq = M * M # elementwise square
13951405
one_minus_h = np.maximum(1.0 - h_diag, 1e-10)
1396-
dof = np.empty(k)
1397-
for j in range(k):
1398-
qj = q[:, j]
1399-
qj_sq = qj * qj
1406+
m = contrasts.shape[1]
1407+
dof = np.empty(m)
1408+
for j in range(m):
1409+
qj_sq = q[:, j] * q[:, j]
14001410
num = qj_sq.sum() ** 2
14011411
a_j = qj_sq / one_minus_h
14021412
den = float(a_j @ M_sq @ a_j)
14031413
dof[j] = num / den if den > 0 else np.nan
14041414
return dof
14051415

14061416

1417+
def _compute_bm_dof_oneway(
1418+
X: np.ndarray,
1419+
bread_matrix: np.ndarray,
1420+
h_diag: np.ndarray,
1421+
weights: Optional[np.ndarray] = None,
1422+
) -> np.ndarray:
1423+
"""Per-coefficient Bell-McCaffrey DOF vector (Imbens-Kolesar 2016).
1424+
1425+
Thin wrapper over :func:`_compute_bm_dof_from_contrasts` with
1426+
``contrasts = I_k``, so each column picks out one coefficient.
1427+
"""
1428+
k = X.shape[1]
1429+
return _compute_bm_dof_from_contrasts(
1430+
X, bread_matrix, h_diag, np.eye(k), weights=weights
1431+
)
1432+
1433+
14071434
def _compute_robust_vcov_numpy(
14081435
X: np.ndarray,
14091436
residuals: np.ndarray,

diff_diff/results.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,10 @@ def summary(self, alpha: Optional[float] = None) -> str:
192192
if self.n_clusters is not None:
193193
lines.append(f"{'Number of clusters:':<25} {self.n_clusters:>10}")
194194

195-
# Add variance family label (vcov_type) when set.
196-
if self.vcov_type is not None:
195+
# Add variance family label (vcov_type) only when inference was analytical.
196+
# For wild-bootstrap etc. the reported SE/CI come from resampling, so the
197+
# analytical variance family would mislabel the actual inference source.
198+
if self.vcov_type is not None and self.inference_method == "analytical":
197199
label = _format_vcov_label(
198200
self.vcov_type,
199201
cluster_name=self.cluster_name,
@@ -426,6 +428,14 @@ class MultiPeriodDiDResults:
426428
interaction_indices: Optional[Dict[Any, int]] = field(default=None, repr=False)
427429
# Survey design metadata (SurveyMetadata instance from diff_diff.survey)
428430
survey_metadata: Optional[Any] = field(default=None)
431+
# Inference method (always "analytical" today for MultiPeriodDiD; included for
432+
# symmetry with DiDResults and so summary() can gate the Variance label).
433+
inference_method: str = field(default="analytical")
434+
n_bootstrap: Optional[int] = field(default=None)
435+
n_clusters: Optional[int] = field(default=None)
436+
# Variance-covariance family and cluster column for summary() labeling.
437+
vcov_type: Optional[str] = field(default=None)
438+
cluster_name: Optional[str] = field(default=None)
429439

430440
def __repr__(self) -> str:
431441
"""Concise string representation."""
@@ -493,6 +503,17 @@ def summary(self, alpha: Optional[float] = None) -> str:
493503
sm = self.survey_metadata
494504
lines.extend(_format_survey_block(sm, 80))
495505

506+
# Variance family label (only when inference was analytical).
507+
if self.vcov_type is not None and self.inference_method == "analytical":
508+
label = _format_vcov_label(
509+
self.vcov_type,
510+
cluster_name=self.cluster_name,
511+
n_clusters=self.n_clusters,
512+
n_obs=self.n_obs,
513+
)
514+
if label is not None:
515+
lines.append(f"{'Variance:':<25} {label:>50}")
516+
496517
# Pre-period effects (parallel trends test)
497518
pre_effects = {p: pe for p, pe in self.period_effects.items() if p in self.pre_periods}
498519
if pre_effects:

diff_diff/twfe.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,21 +216,22 @@ def fit( # type: ignore[override]
216216
if self.rank_deficient_action == "error":
217217
reg = LinearRegression(
218218
include_intercept=False,
219-
robust=True,
219+
robust=self.robust,
220220
cluster_ids=survey_cluster_ids if self.inference != "wild_bootstrap" else None,
221221
alpha=self.alpha,
222222
rank_deficient_action="error",
223223
weights=survey_weights,
224224
weight_type=survey_weight_type,
225225
survey_design=_lr_survey_twfe,
226+
vcov_type=self.vcov_type,
226227
).fit(X, y, df_adjustment=df_adjustment)
227228
else:
228229
# Suppress generic warning, TWFE provides context-specific messages below
229230
with warnings.catch_warnings():
230231
warnings.filterwarnings("ignore", message="Rank-deficient design matrix")
231232
reg = LinearRegression(
232233
include_intercept=False,
233-
robust=True,
234+
robust=self.robust,
234235
cluster_ids=(
235236
survey_cluster_ids if self.inference != "wild_bootstrap" else None
236237
),
@@ -239,6 +240,7 @@ def fit( # type: ignore[override]
239240
weights=survey_weights,
240241
weight_type=survey_weight_type,
241242
survey_design=_lr_survey_twfe,
243+
vcov_type=self.vcov_type,
242244
).fit(X, y, df_adjustment=df_adjustment)
243245

244246
coefficients = reg.coefficients_
@@ -362,6 +364,10 @@ def _refit_twfe(w_r):
362364
n_bootstrap_used = self._bootstrap_results.n_bootstrap
363365
n_clusters_used = self._bootstrap_results.n_clusters
364366

367+
# Cluster label for summary: TWFE auto-clusters at unit level when
368+
# self.cluster is None, so report that explicitly.
369+
_twfe_cluster_label = self.cluster if self.cluster is not None else unit
370+
365371
self.results_ = DiDResults(
366372
att=att,
367373
se=se,
@@ -381,6 +387,8 @@ def _refit_twfe(w_r):
381387
n_bootstrap=n_bootstrap_used,
382388
n_clusters=n_clusters_used,
383389
survey_metadata=survey_metadata,
390+
vcov_type=self.vcov_type,
391+
cluster_name=_twfe_cluster_label,
384392
)
385393

386394
self.is_fitted_ = True

0 commit comments

Comments
 (0)