Skip to content

Commit 2cf5ebe

Browse files
igerberclaude
andcommitted
Codex re-review round 4: propagate vcov_type / conley_lag_cutoff to to_dict
P1 (Maintainability): the prior commit added `conley_lag_cutoff` to the result dataclasses and summary(), but `DiDResults.to_dict()` and `MultiPeriodDiDResults.to_dict()` still omitted it. Downstream programmatic consumers (notebooks, adapters, pipelines) that serialize results to dicts couldn't tell which Conley variant produced the SEs. Fix: both `to_dict()` methods now include `vcov_type`, `cluster_name`, and `conley_lag_cutoff` when set. Conditional emission preserves the existing behavior for non-conley / non-cluster fits (no new keys appear in the serialized dict for unrelated estimators). Regression: `test_twfe_conley_to_dict_carries_lag_cutoff` and `test_multi_period_did_conley_to_dict_carries_lag_cutoff` fit a TWFE + MPD Conley panel and assert `to_dict()` exposes the expected fields. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent bcada5c commit 2cf5ebe

2 files changed

Lines changed: 79 additions & 0 deletions

File tree

diff_diff/results.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,14 @@ def to_dict(self) -> Dict[str, Any]:
284284
"r_squared": self.r_squared,
285285
"inference_method": self.inference_method,
286286
}
287+
# Variance-family metadata: included only when set, so existing
288+
# dict consumers see no new keys for non-conley / non-cluster fits.
289+
if self.vcov_type is not None:
290+
result["vcov_type"] = self.vcov_type
291+
if self.cluster_name is not None:
292+
result["cluster_name"] = self.cluster_name
293+
if self.conley_lag_cutoff is not None:
294+
result["conley_lag_cutoff"] = self.conley_lag_cutoff
287295
if self.n_bootstrap is not None:
288296
result["n_bootstrap"] = self.n_bootstrap
289297
if self.n_clusters is not None:
@@ -717,6 +725,14 @@ def to_dict(self) -> Dict[str, Any]:
717725
"r_squared": self.r_squared,
718726
"reference_period": self.reference_period,
719727
}
728+
# Variance-family metadata: included only when set, so existing
729+
# dict consumers see no new keys for non-conley / non-cluster fits.
730+
if self.vcov_type is not None:
731+
result["vcov_type"] = self.vcov_type
732+
if self.cluster_name is not None:
733+
result["cluster_name"] = self.cluster_name
734+
if self.conley_lag_cutoff is not None:
735+
result["conley_lag_cutoff"] = self.conley_lag_cutoff
720736

721737
# Add period-specific effects
722738
for period, pe in self.period_effects.items():

tests/test_conley_vcov.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,51 @@ def test_multi_period_did_conley_with_datetime64_time(self):
12431243
se_dt = np.sqrt(np.diag(res_dt.vcov))
12441244
np.testing.assert_allclose(se_dt, se_int, atol=1e-10)
12451245

1246+
def test_multi_period_did_conley_to_dict_carries_lag_cutoff(self):
1247+
"""Closes Codex re-review round 4 P1 (Maintainability) on MPD:
1248+
serialized `to_dict()` must include `vcov_type` and
1249+
`conley_lag_cutoff` so downstream programmatic consumers can tell
1250+
which Conley variant produced the SEs."""
1251+
import pandas as pd
1252+
1253+
from diff_diff import MultiPeriodDiD
1254+
1255+
rng = np.random.default_rng(seed=41)
1256+
n_units = 10
1257+
rows = []
1258+
for u in range(n_units):
1259+
lat = rng.uniform(-30, 30)
1260+
lon = rng.uniform(-100, 100)
1261+
for t in range(3):
1262+
rows.append(
1263+
{
1264+
"unit": u,
1265+
"time": t,
1266+
"y": rng.normal(),
1267+
"treated": int(u < 5),
1268+
"lat": lat,
1269+
"lon": lon,
1270+
}
1271+
)
1272+
df_mp = pd.DataFrame(rows)
1273+
res = MultiPeriodDiD(
1274+
vcov_type="conley",
1275+
conley_coords=("lat", "lon"),
1276+
conley_cutoff_km=2000.0,
1277+
conley_lag_cutoff=2,
1278+
).fit(
1279+
df_mp,
1280+
outcome="y",
1281+
treatment="treated",
1282+
time="time",
1283+
post_periods=[1, 2],
1284+
unit="unit",
1285+
reference_period=0,
1286+
)
1287+
d = res.to_dict()
1288+
assert d["vcov_type"] == "conley"
1289+
assert d["conley_lag_cutoff"] == 2
1290+
12461291
def test_multi_period_did_conley_missing_coords_raises(self):
12471292
"""MPD + vcov_type='conley' without conley_coords raises a clean
12481293
ValueError instead of a raw TypeError on `self.conley_coords[0]`.
@@ -1421,6 +1466,24 @@ def test_twfe_conley_summary_emits_conley_label(self, panel):
14211466
# The result dataclass also carries the lag for programmatic access.
14221467
assert res.conley_lag_cutoff == 1
14231468

1469+
def test_twfe_conley_to_dict_carries_lag_cutoff(self, panel):
1470+
"""Closes Codex re-review round 4 P1 (Maintainability): the
1471+
serialized `to_dict()` must include `vcov_type` and
1472+
`conley_lag_cutoff` so downstream programmatic consumers (notebooks,
1473+
adapters, pipelines) can tell which Conley variant produced the SEs
1474+
without re-deriving from the summary string."""
1475+
from diff_diff import TwoWayFixedEffects
1476+
1477+
res = TwoWayFixedEffects(
1478+
vcov_type="conley",
1479+
conley_coords=("lat", "lon"),
1480+
conley_cutoff_km=2000.0,
1481+
conley_lag_cutoff=1,
1482+
).fit(panel, outcome="y", treatment="treated", time="time", unit="unit")
1483+
d = res.to_dict()
1484+
assert d["vcov_type"] == "conley"
1485+
assert d["conley_lag_cutoff"] == 1
1486+
14241487
def test_twfe_conley_non_numeric_time_fails(self, panel):
14251488
"""TWFE's `_treatment_post = treated * time` design step requires
14261489
numeric `time`. Non-numeric labels (datetime64, pd.Period, strings)

0 commit comments

Comments
 (0)