Restore replicate-weight dispatch in aggregation, raise on zero-weight never-treated

igerber · claude · igerber · commit 42d3dffdd426 · 2026-04-04T12:19:44.000-04:00
- Add uses_replicate_variance branching in _aggregate_overall() and
  _aggregate_event_study() to route replicate designs to
  compute_replicate_if_variance() instead of compute_survey_if_variance()
- Change zero-weight never-treated guard from warning to ValueError for
  covariates path — DR nuisance estimation requires positive-weight controls
- Add test_zero_weight_never_treated_raises for the new error path

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
@@ -608,11 +608,10 @@ def fit(
 
         # Guard: never-treated with zero survey weight → no valid comparisons
         if cohort_fractions.get(np.inf, 0.0) <= 0 and use_covariates:
-            warnings.warn(
-                "Never-treated group has zero survey weight; no valid "
-                "comparisons possible for covariates path.",
-                UserWarning,
-                stacklevel=2,
+            raise ValueError(
+                "Never-treated group has zero survey weight. The doubly "
+                "robust covariates path requires a never-treated control "
+                "group with positive survey weight for nuisance estimation."
             )
 
         # ----- Covariate preparation (if provided) -----
@@ -1274,17 +1273,27 @@ def _aggregate_overall(
             keepers, effects, unit_cohorts, cohort_fractions, n_units,
             unit_weights=self._unit_level_weights,
         )
-        # Compute SE: survey path uses score-level psi + compute_survey_if_variance
-        # to avoid double-weighting (compute_survey_vcov applies w_i internally,
-        # which would double-weight the survey-weighted WIF term).
+        # Compute SE: survey path uses score-level psi to avoid double-weighting
+        # (compute_survey_vcov applies w_i internally, which would double-weight
+        # the survey-weighted WIF term). Dispatch replicate vs TSL.
         if self._unit_resolved_survey is not None:
             uw = self._unit_level_weights
             total_w = float(np.sum(uw))
-            # Score-level: standard = w*eif/sum(w), wif already has w_i in indicator
             psi_total = uw * agg_eif / total_w + wif / total_w
-            from diff_diff.survey import compute_survey_if_variance
 
-            variance = compute_survey_if_variance(psi_total, self._unit_resolved_survey)
+            if (hasattr(self._unit_resolved_survey, 'uses_replicate_variance')
+                    and self._unit_resolved_survey.uses_replicate_variance):
+                from diff_diff.survey import compute_replicate_if_variance
+
+                variance, _ = compute_replicate_if_variance(
+                    psi_total, self._unit_resolved_survey
+                )
+            else:
+                from diff_diff.survey import compute_survey_if_variance
+
+                variance = compute_survey_if_variance(
+                    psi_total, self._unit_resolved_survey
+                )
             se = float(np.sqrt(max(variance, 0.0))) if np.isfinite(variance) else np.nan
         else:
             agg_eif_total = agg_eif + wif
@@ -1387,9 +1396,20 @@ def _aggregate_event_study(
                 uw = self._unit_level_weights
                 total_w = float(np.sum(uw))
                 psi_total = uw * agg_eif / total_w + wif_e / total_w
-                from diff_diff.survey import compute_survey_if_variance
 
-                variance = compute_survey_if_variance(psi_total, self._unit_resolved_survey)
+                if (hasattr(self._unit_resolved_survey, 'uses_replicate_variance')
+                        and self._unit_resolved_survey.uses_replicate_variance):
+                    from diff_diff.survey import compute_replicate_if_variance
+
+                    variance, _ = compute_replicate_if_variance(
+                        psi_total, self._unit_resolved_survey
+                    )
+                else:
+                    from diff_diff.survey import compute_survey_if_variance
+
+                    variance = compute_survey_if_variance(
+                        psi_total, self._unit_resolved_survey
+                    )
                 agg_se = float(np.sqrt(max(variance, 0.0))) if np.isfinite(variance) else np.nan
             else:
                 agg_eif = agg_eif + wif_e
diff --git a/tests/test_survey_phase3.py b/tests/test_survey_phase3.py
@@ -1088,14 +1088,12 @@ def test_bootstrap_se_in_ballpark_of_analytical(self, cov_survey_data):
         )
 
     def test_zero_weight_cohort_skipped(self, cov_survey_data):
-        """Zero-weight cohort should be skipped with a warning."""
+        """Zero-weight treated cohort should be skipped with a warning."""
         from diff_diff import EfficientDiD
 
         # Set early cohort (first_treat=4) weights to exactly zero
         cov_survey_data = cov_survey_data.copy()
         cov_survey_data.loc[cov_survey_data["first_treat"] == 4, "weight"] = 0.0
-        # Need small positive weight for pweight validation (can't be all zero)
-        # Keep remaining cohorts with positive weights
         sd = SurveyDesign(weights="weight")
         with pytest.warns(UserWarning, match="zero survey weight"):
             result = EfficientDiD(n_bootstrap=0).fit(
@@ -1107,6 +1105,21 @@ def test_zero_weight_cohort_skipped(self, cov_survey_data):
         assert np.isfinite(result.overall_att)
         assert np.isfinite(result.overall_se)
 
+    def test_zero_weight_never_treated_raises(self, cov_survey_data):
+        """Zero-weight never-treated group should raise ValueError."""
+        from diff_diff import EfficientDiD
+
+        cov_survey_data = cov_survey_data.copy()
+        cov_survey_data.loc[cov_survey_data["first_treat"] == 0, "weight"] = 0.0
+        sd = SurveyDesign(weights="weight")
+        with pytest.raises(ValueError, match="zero survey weight"):
+            EfficientDiD(n_bootstrap=0).fit(
+                cov_survey_data,
+                "outcome", "unit", "time", "first_treat",
+                covariates=["x1"],
+                survey_design=sd,
+            )
+
 
 # =============================================================================
 # Scale Invariance (applies to all estimators)