Address PR #355 R7 P1 + P3: fit-time positive-mass guard + doc wording fix

igerber · claude · igerber · commit 5515fbe7f24c · 2026-04-24T07:48:03.000-04:00
R7 P1: the per-draw zero-mass retry in ``_bootstrap_se`` (PR #355 R2 P0) only covers bootstrap draws, not the fit-time ATT. Survey weights are non-negative post-resolve() but all-zero mass on either arm is a valid input that encodes an unidentified target population. Without a fit- time guard the downstream ``np.average(Y, weights=w_treated)`` and ``omega_eff = unit_weights * w_control`` normalizations would hit 0/0 and silently propagate NaN through the bootstrap / placebo / jackknife dispatchers. Front-door the case: after ``w_control`` / ``w_treated`` are sourced from the resolved unit-level design, raise ``ValueError`` if either arm's total mass is <= 0. Covers both pweight-only and strata/PSU/FPC paths. Three regression tests added: ``test_fit_raises_on_zero_total_treated_survey_mass``, ``test_fit_raises_on_zero_total_control_survey_mass``, and ``test_fit_raises_on_zero_treated_mass_under_full_design``. R7 P3: the SDID row in ``docs/choosing_estimator.rst`` said "pweight only (placebo / jackknife); full (bootstrap)" in the **Weights** column, conflating weight-type support (fweight / aweight / pweight) with design-element support (strata / PSU / FPC). The code still hard- rejects non-pweight survey designs on every variance method. Narrow the wording to "pweight only" and leave "Via bootstrap" in the Strata/PSU/FPC column to describe design-element support. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py
@@ -456,6 +456,34 @@ def fit(  # type: ignore[override]
             w_treated = resolved_survey_unit.weights[n_control_for_split:].astype(
                 np.float64
             )
+            # Front-door positive-mass guard (PR #355 R7 P1). Survey weights
+            # are non-negative post-resolve() (survey.py L171-L176 rejects
+            # negatives), but all-zero mass on either arm is reachable — the
+            # user can assign unit survey weights of 0 to every treated or
+            # every control unit, which encodes an unidentified target
+            # population. The fit-time ATT formulas downstream
+            # (``np.average(..., weights=w_treated)`` around L551-L582 and
+            # ``omega_eff = unit_weights * w_control`` in the bootstrap /
+            # placebo / jackknife dispatchers) would otherwise hit 0/0
+            # normalization or propagate NaNs silently. The bootstrap loop
+            # already has per-draw zero-mass retries for degenerate resamples
+            # (PR #355 R2 P0); this guard is the fit-time analogue.
+            if w_control.sum() <= 0:
+                raise ValueError(
+                    "Survey-weighted control arm has zero total mass "
+                    f"(sum of w_control = {w_control.sum():.3g}). "
+                    "Every control unit has survey weight 0, so the target "
+                    "population is unidentified. Drop units with zero weight, "
+                    "or omit survey_design if unweighted estimation is intended."
+                )
+            if w_treated.sum() <= 0:
+                raise ValueError(
+                    "Survey-weighted treated arm has zero total mass "
+                    f"(sum of w_treated = {w_treated.sum():.3g}). "
+                    "Every treated unit has survey weight 0, so the target "
+                    "population is unidentified. Drop units with zero weight, "
+                    "or omit survey_design if unweighted estimation is intended."
+                )
         else:
             w_treated = None
             w_control = None
diff --git a/docs/choosing_estimator.rst b/docs/choosing_estimator.rst
@@ -783,7 +783,7 @@ estimation. The depth of support varies by estimator:
      - Full (analytical)
      - Multiplier at PSU
    * - ``SyntheticDiD``
-     - pweight only (placebo / jackknife); full (bootstrap)
+     - pweight only
      - Via bootstrap
      - --
      - Hybrid pairs-bootstrap + Rao-Wu rescaled (bootstrap only)
diff --git a/tests/test_methodology_sdid.py b/tests/test_methodology_sdid.py
@@ -842,6 +842,73 @@ def capturing_helper(Y_pre_c, Y_pre_t_mean, rw, *args, **kwargs):
                 ),
             )
 
+    def test_fit_raises_on_zero_total_treated_survey_mass(self):
+        """Fit-time positive-mass guard: zero treated survey mass raises.
+
+        ``SurveyDesign.resolve()`` accepts non-negative unit weights
+        (``survey.py`` L171-L176), so a user can legitimately assign unit
+        survey weights of 0 to every treated unit — encoding an
+        unidentified target population. Without the front-door guard, the
+        fit-time survey-weighted ATT (``np.average(Y, weights=w_treated)``)
+        would hit ``0/0`` and silently propagate NaN into the bootstrap
+        loop, defeating the per-draw zero-mass retry (PR #355 R2 P0).
+        Regression against PR #355 R7 P1: the guard must fire before the
+        bootstrap is even dispatched.
+        """
+        from diff_diff.survey import SurveyDesign
+
+        df = _make_panel(n_control=10, n_treated=3, seed=42)
+        # Every treated unit gets weight 0; controls keep positive weight.
+        df["wt"] = np.where(df["treated"] == 1, 0.0, 1.0)
+        with pytest.raises(ValueError, match=r"treated arm has zero total mass"):
+            SyntheticDiD(variance_method="bootstrap", n_bootstrap=20, seed=1).fit(
+                df, outcome="outcome", treatment="treated",
+                unit="unit", time="period",
+                post_periods=[5, 6, 7],
+                survey_design=SurveyDesign(weights="wt"),
+            )
+
+    def test_fit_raises_on_zero_total_control_survey_mass(self):
+        """Fit-time positive-mass guard: zero control survey mass raises.
+
+        Mirror of the treated-arm case (PR #355 R7 P1). Downstream
+        ``omega_eff = unit_weights * w_control / (unit_weights * w_control).sum()``
+        would hit 0/0; the guard front-doors.
+        """
+        from diff_diff.survey import SurveyDesign
+
+        df = _make_panel(n_control=10, n_treated=3, seed=42)
+        df["wt"] = np.where(df["treated"] == 0, 0.0, 1.0)
+        with pytest.raises(ValueError, match=r"control arm has zero total mass"):
+            SyntheticDiD(variance_method="bootstrap", n_bootstrap=20, seed=1).fit(
+                df, outcome="outcome", treatment="treated",
+                unit="unit", time="period",
+                post_periods=[5, 6, 7],
+                survey_design=SurveyDesign(weights="wt"),
+            )
+
+    def test_fit_raises_on_zero_treated_mass_under_full_design(self):
+        """Fit-time positive-mass guard fires under full strata/PSU/FPC too.
+
+        The guard sources w_control / w_treated from the **resolved
+        unit-level** design (PR #355 R4 P0), so zero total treated mass
+        under a strata/PSU/FPC configuration must fire the same front-door
+        ValueError as the pweight-only case (PR #355 R7 P1).
+        """
+        from diff_diff.survey import SurveyDesign
+
+        df = _make_panel(n_control=10, n_treated=3, seed=42)
+        df["wt"] = np.where(df["treated"] == 1, 0.0, 1.0)
+        df["stratum"] = df["unit"] % 2
+        df["psu"] = df["unit"]
+        with pytest.raises(ValueError, match=r"treated arm has zero total mass"):
+            SyntheticDiD(variance_method="bootstrap", n_bootstrap=20, seed=1).fit(
+                df, outcome="outcome", treatment="treated",
+                unit="unit", time="period",
+                post_periods=[5, 6, 7],
+                survey_design=SurveyDesign(weights="wt", strata="stratum", psu="psu"),
+            )
+
     def test_bootstrap_scale_invariance_under_pweight_rescaling(self):
         """Survey-bootstrap SE / p / CI are invariant to a global pweight rescaling.