Skip to content

Commit faed8a3

Browse files
igerberclaude
andcommitted
Fix pandas compat and add aggregate/summary test (round 5)
- P1: Replace pandas-2.2-only groupby.apply(include_groups=False) with pandas-1.3-compatible pd.Series.groupby().sum() in OLS zero-weight guard - P2: Add test for survey aggregate() with df_survey inference and summary() survey block display Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7df4115 commit faed8a3

2 files changed

Lines changed: 22 additions & 4 deletions

File tree

diff_diff/wooldridge.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -652,11 +652,9 @@ def _fit_ols(
652652

653653
# Guard: zero-weight unit/time groups cause 0/0 in within_transform
654654
if survey_weights is not None and np.any(survey_weights == 0):
655+
sw_series = pd.Series(survey_weights, index=sample.index)
655656
for grp_col, grp_label in [(unit, "unit"), (time, "time period")]:
656-
grp_sums = sample.groupby(grp_col).apply(
657-
lambda g: survey_weights[g.index].sum(),
658-
include_groups=False,
659-
)
657+
grp_sums = sw_series.groupby(sample[grp_col]).sum()
660658
zero_grps = grp_sums[grp_sums == 0].index.tolist()
661659
if zero_grps:
662660
raise ValueError(

tests/test_wooldridge.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,3 +1573,23 @@ def test_ols_survey_non_range_index(self, survey_panel):
15731573
df, outcome="y", unit="unit", time="time",
15741574
cohort="cohort", survey_design=sd,
15751575
)
1576+
1577+
def test_survey_aggregate_and_summary(self, survey_panel):
1578+
"""Survey aggregate() uses df_survey and summary() shows survey block."""
1579+
from diff_diff.survey import SurveyDesign
1580+
sd = SurveyDesign(weights="weight", strata="stratum", psu="unit")
1581+
r = WooldridgeDiD().fit(
1582+
survey_panel, outcome="y", unit="unit", time="time",
1583+
cohort="cohort", survey_design=sd,
1584+
)
1585+
# aggregate() should use t-distribution with survey df
1586+
r.aggregate("group")
1587+
assert r.group_effects is not None
1588+
assert r._df_survey is not None
1589+
for eff in r.group_effects.values():
1590+
assert np.isfinite(eff["p_value"])
1591+
1592+
# summary() should include survey design block
1593+
s = r.summary()
1594+
assert "Survey Design" in s
1595+
assert "pweight" in s

0 commit comments

Comments
 (0)