Skip to content

Commit eea4463

Browse files
igerberclaude
andcommitted
Codex CI R2: DiD front-door conley_coords validation
P1 [new in R2] — DiD.fit() didn't validate conley_coords shape or column existence; malformed tuples or missing column names fell through to opaque IndexError/KeyError downstream. Mirrors the unit-column guard from R1. Adds two checks in the DiD Conley block: - conley_coords must be a 2-element tuple/list of strings (raises if arity wrong or any element non-string). - Each named column must exist in `data` (raises ValueError naming the missing column). Adds two regression tests in TestConleyEstimatorIntegration: - test_did_conley_unknown_coord_column_raises - test_did_conley_malformed_coord_tuple_raises (covers 1-element tuple and non-string element) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent be1b527 commit eea4463

2 files changed

Lines changed: 71 additions & 0 deletions

File tree

diff_diff/estimators.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,22 @@ def fit(
413413
"conley_coords=(lat_col, lon_col) and conley_cutoff_km "
414414
"on the constructor."
415415
)
416+
# Validate conley_coords is a 2-element tuple/list of strings
417+
# and both columns exist on `data`. Without these guards, a
418+
# malformed tuple or missing column fell through to an opaque
419+
# IndexError / pandas KeyError downstream. Codex CI R2 P1.
420+
if (
421+
not isinstance(self.conley_coords, (tuple, list))
422+
or len(self.conley_coords) != 2
423+
or not all(isinstance(c, str) for c in self.conley_coords)
424+
):
425+
raise ValueError(
426+
"conley_coords must be a 2-element tuple/list of column "
427+
f"names (lat_col, lon_col); got {self.conley_coords!r}."
428+
)
429+
for _coord_col in self.conley_coords:
430+
if _coord_col not in data.columns:
431+
raise ValueError(f"conley_coords column '{_coord_col}' not found in data.")
416432
if survey_design is not None:
417433
raise NotImplementedError(
418434
"DifferenceInDifferences(vcov_type='conley') + survey_design "

tests/test_conley_vcov.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,6 +1099,61 @@ def test_did_conley_unknown_unit_column_raises(self, two_period_panel):
10991099
unit="missing_unit",
11001100
)
11011101

1102+
def test_did_conley_unknown_coord_column_raises(self, two_period_panel):
1103+
"""vcov_type='conley' with `conley_coords=(<absent>, <col>)` raises
1104+
a clear estimator-level ValueError before downstream column access.
1105+
Codex CI R2 P1."""
1106+
from diff_diff import DifferenceInDifferences
1107+
1108+
with pytest.raises(ValueError, match="conley_coords column 'missing_lat' not found"):
1109+
DifferenceInDifferences(
1110+
vcov_type="conley",
1111+
conley_coords=("missing_lat", "lon"),
1112+
conley_cutoff_km=2000.0,
1113+
conley_lag_cutoff=1,
1114+
).fit(
1115+
two_period_panel,
1116+
outcome="y",
1117+
treatment="treated",
1118+
time="time",
1119+
unit="unit",
1120+
)
1121+
1122+
def test_did_conley_malformed_coord_tuple_raises(self, two_period_panel):
1123+
"""vcov_type='conley' with a malformed conley_coords (wrong arity or
1124+
non-string elements) raises ValueError before downstream access.
1125+
Codex CI R2 P1."""
1126+
from diff_diff import DifferenceInDifferences
1127+
1128+
# Wrong arity (1-element tuple)
1129+
with pytest.raises(ValueError, match="2-element tuple/list of column"):
1130+
DifferenceInDifferences(
1131+
vcov_type="conley",
1132+
conley_coords=("lat",), # type: ignore[arg-type]
1133+
conley_cutoff_km=2000.0,
1134+
conley_lag_cutoff=1,
1135+
).fit(
1136+
two_period_panel,
1137+
outcome="y",
1138+
treatment="treated",
1139+
time="time",
1140+
unit="unit",
1141+
)
1142+
# Non-string element
1143+
with pytest.raises(ValueError, match="2-element tuple/list of column"):
1144+
DifferenceInDifferences(
1145+
vcov_type="conley",
1146+
conley_coords=("lat", 0), # type: ignore[arg-type]
1147+
conley_cutoff_km=2000.0,
1148+
conley_lag_cutoff=1,
1149+
).fit(
1150+
two_period_panel,
1151+
outcome="y",
1152+
treatment="treated",
1153+
time="time",
1154+
unit="unit",
1155+
)
1156+
11021157
def test_did_conley_missing_lag_cutoff_raises(self, two_period_panel):
11031158
"""vcov_type='conley' without conley_lag_cutoff raises ValueError."""
11041159
from diff_diff import DifferenceInDifferences

0 commit comments

Comments
 (0)