Skip to content

Commit 20f37e1

Browse files
igerberclaude
andcommitted
Fix EfficientDiD fweight consistency and BaconDecomposition index safety from PR #226 review (round 5)
- EfficientDiD: use resolved survey weights directly for unit-level estimation (Omega*, EIF, cohort fractions) instead of separately renormalized raw column, ensuring fweight/aweight consistency with TSL - BaconDecomposition: store survey weights as DataFrame column for safe label-based subsetting in _recompute_exact_weights, preventing out-of-bounds errors on non-default DataFrame indexes Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c47a91a commit 20f37e1

2 files changed

Lines changed: 10 additions & 14 deletions

File tree

diff_diff/bacon.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -690,8 +690,11 @@ def _recompute_exact_weights(
690690
within-group variance of the treatment indicator.
691691
"""
692692
n_total_obs = len(df)
693-
w = weights if weights is not None else np.ones(n_total_obs)
694-
w_total = np.sum(w)
693+
w_arr = weights if weights is not None else np.ones(n_total_obs)
694+
# Store weights as a column for safe label-based subsetting
695+
df = df.copy()
696+
df["_sw"] = w_arr
697+
w_total = np.sum(w_arr)
695698
n_total_units = df[unit].nunique()
696699

697700
for comp in comparisons:
@@ -742,7 +745,7 @@ def _recompute_exact_weights(
742745
continue
743746

744747
# Weighted observation counts for the 2x2 sample
745-
w_22 = w[df_22.index]
748+
w_22 = df_22["_sw"].values
746749
w_22_sum = np.sum(w_22)
747750

748751
# Sample share of this comparison (weighted)

diff_diff/efficient_did.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -429,17 +429,10 @@ def fit(
429429
# by taking the first observation per unit (balanced panel, so
430430
# weights should be constant within unit).
431431
unit_level_weights: Optional[np.ndarray] = None
432-
if survey_weights is not None:
433-
# survey_weights is obs-level from _resolve_survey_for_fit
434-
# Build a unit-level weight vector aligned with all_units ordering
435-
w_col = survey_design.weights if survey_design.weights else None
436-
if w_col is not None:
437-
w_series = df.groupby(unit)[w_col].first()
438-
else:
439-
w_series = pd.Series(1.0, index=df[unit].unique())
440-
# Normalize unit-level weights (sum = n_units)
441-
raw_unit_w = w_series.reindex(all_units).values.astype(float)
442-
unit_level_weights = raw_unit_w * (n_units / np.sum(raw_unit_w))
432+
if resolved_survey is not None:
433+
# Use the resolved survey's weights (already normalized per weight_type)
434+
# subset to unit level via _unit_first_panel_row (aligned to all_units)
435+
unit_level_weights = self._unit_resolved_survey.weights
443436

444437
cohort_fractions: Dict[float, float] = {}
445438
if unit_level_weights is not None:

0 commit comments

Comments
 (0)