Fix AI review: remove over-restrictive group-constant validation, vectorize IF expansion

igerber · claude · igerber · commit d7ddb19d88ac · 2026-04-15T21:00:18.000-04:00
- Remove _validate_group_constant_survey() call - the IF expansion
  psi_i = U[g] * (w_i / W_g) handles observation-level variation in
  weights, strata, and PSU within groups correctly
- Vectorize _survey_se_from_group_if using np.bincount + np.unique
  (was Python loops over all observations)
- Replace test_rejects_varying_weights_within_group with two positive
  tests: varying weights accepted, and varying weights change ATT
  (time-varying noise to survive first-differencing)
- Remove unused survey_weight_type variable

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/chaisemartin_dhaultfoeuille.py b/diff_diff/chaisemartin_dhaultfoeuille.py
@@ -630,12 +630,9 @@ def fit(
         # ------------------------------------------------------------------
         # Step 3: Survey resolution
         # ------------------------------------------------------------------
-        from diff_diff.survey import (
-            _resolve_survey_for_fit,
-            _validate_group_constant_survey,
-        )
+        from diff_diff.survey import _resolve_survey_for_fit
 
-        resolved_survey, survey_weights, survey_weight_type, survey_metadata = (
+        resolved_survey, survey_weights, _, survey_metadata = (
             _resolve_survey_for_fit(survey_design, data, "analytical")
         )
 
@@ -653,8 +650,9 @@ def fit(
                     "Use strata/PSU/FPC for design-based inference via Taylor "
                     "Series Linearization."
                 )
-            # Validate survey columns are constant within groups.
-            _validate_group_constant_survey(data, group, survey_design)
+            # No group-constant survey validation: the IF expansion
+            # psi_i = U[g] * (w_i / W_g) handles observation-level
+            # variation in weights, strata, and PSU within groups.
 
         # Design-2 precondition: requires drop_larger_lower=False
         if design2 and self.drop_larger_lower:
@@ -4593,27 +4591,21 @@ def _survey_se_from_group_if(
     group_ids = obs_survey_info["group_ids"]
     weights = obs_survey_info["weights"]
     resolved = obs_survey_info["resolved"]
-    n_obs = len(group_ids)
 
-    # Build group → U_centered lookup
-    group_to_u = {}
-    for idx, gid in enumerate(eligible_groups):
-        group_to_u[gid] = U_centered[idx]
+    # Build group → U_centered lookup (vectorized via factorization)
+    group_to_u = {gid: U_centered[idx] for idx, gid in enumerate(eligible_groups)}
+
+    # Map group IFs to observation level
+    u_obs = np.array([group_to_u.get(gid, 0.0) for gid in group_ids])
 
-    # Compute per-group weight totals W_g
-    group_to_w_total: Dict[Any, float] = {}
-    for i in range(n_obs):
-        gid = group_ids[i]
-        group_to_w_total[gid] = group_to_w_total.get(gid, 0.0) + weights[i]
+    # Compute per-group weight totals W_g via bincount
+    unique_gids, inverse = np.unique(group_ids, return_inverse=True)
+    w_totals_per_group = np.bincount(inverse, weights=weights)
+    w_obs_total = w_totals_per_group[inverse]
 
     # Expand to observation level: psi_i = U[g] * (w_i / W_g)
-    psi = np.zeros(n_obs)
-    for i in range(n_obs):
-        gid = group_ids[i]
-        u_val = group_to_u.get(gid, 0.0)
-        w_total = group_to_w_total.get(gid, 1.0)
-        if w_total > 0:
-            psi[i] = u_val * (weights[i] / w_total)
+    safe_w = np.where(w_obs_total > 0, w_obs_total, 1.0)
+    psi = u_obs * (weights / safe_w)
 
     variance = compute_survey_if_variance(psi, resolved)
     if not np.isfinite(variance) or variance < 0:
diff --git a/tests/test_survey_dcdh.py b/tests/test_survey_dcdh.py
@@ -264,21 +264,58 @@ def test_rejects_aweight(self, base_data):
                 survey_design=sd,
             )
 
-    def test_rejects_varying_weights_within_group(self, base_data):
-        """Weights must be constant within groups."""
+    def test_varying_weights_within_group_accepted(self, base_data):
+        """Observation-level weights varying within groups are valid."""
+        # Create multi-obs cells with varying weights
+        rng = np.random.default_rng(1)
         df = base_data.copy()
-        # Assign different weights to different observations in the same group
-        df["pw"] = np.random.default_rng(1).uniform(0.5, 3.0, size=len(df))
+        df2 = base_data.copy()
+        df2["outcome"] = df2["outcome"] + rng.normal(0, 0.5, size=len(df2))
+        multi = pd.concat([df, df2], ignore_index=True)
+        # Observation-level weights (vary within group)
+        multi["pw"] = rng.uniform(0.5, 3.0, size=len(multi))
         sd = SurveyDesign(weights="pw")
-        with pytest.raises(ValueError, match="varies within groups"):
-            ChaisemartinDHaultfoeuille().fit(
-                df,
-                outcome="outcome",
-                group="group",
-                time="period",
-                treatment="treatment",
-                survey_design=sd,
-            )
+        # Should succeed - no group-constant restriction
+        result = ChaisemartinDHaultfoeuille(seed=1).fit(
+            multi,
+            outcome="outcome",
+            group="group",
+            time="period",
+            treatment="treatment",
+            survey_design=sd,
+        )
+        assert np.isfinite(result.overall_att)
+
+    def test_varying_weights_change_att(self, base_data):
+        """With multi-obs cells and varying weights, ATT differs from unweighted.
+
+        dCDH uses first differences Y_{g,t} - Y_{g,t-1}, so group-constant
+        noise cancels. The noise must vary across both group AND time for
+        weighted cell means to affect the ATT via different first differences.
+        """
+        rng = np.random.default_rng(42)
+        df = base_data.copy()
+        df2 = base_data.copy()
+        # Per-observation noise (varies by group AND time)
+        df2["outcome"] = df2["outcome"] + rng.normal(0, 3.0, size=len(df2))
+        multi = pd.concat([df, df2], ignore_index=True)
+        # Give first copy weight=1, second copy weight=10
+        multi["pw"] = np.where(np.arange(len(multi)) < len(df), 1.0, 10.0)
+        sd = SurveyDesign(weights="pw")
+        result_plain = ChaisemartinDHaultfoeuille(seed=1).fit(
+            multi, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+        )
+        result_survey = ChaisemartinDHaultfoeuille(seed=1).fit(
+            multi, outcome="outcome", group="group",
+            time="period", treatment="treatment",
+            survey_design=sd,
+        )
+        # Weighted cell means with time-varying noise produce different
+        # first differences -> different ATT
+        assert result_plain.overall_att != pytest.approx(
+            result_survey.overall_att, abs=0.01
+        )
 
     def test_rejects_replicate_weights(self, base_data):
         """Replicate weight variance not yet supported."""