Fix CI review: survey-weighted jackknife guards, benchmark timing, docstring

igerber · claude · igerber · commit b606f642eaf8 · 2026-04-15T20:59:48.000-04:00
P1: Add effective-support guards checking composed weights (omega * w_control)
and treated survey weights, plus per-iteration zero-sum guards in LOO loops.
P2: Restore R benchmark total_seconds to placebo-only (matches se field).
P3: Update fit() docstring for non-bootstrap survey rejection, add 4 survey
jackknife tests, update REGISTRY.md edge cases.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/benchmarks/R/benchmark_synthdid.R b/benchmarks/R/benchmark_synthdid.R
@@ -87,7 +87,7 @@ se_jk_matrix <- vcov(tau_hat, method = "jackknife")
 se_jackknife <- as.numeric(sqrt(se_jk_matrix[1, 1]))
 se_jk_time <- as.numeric(difftime(Sys.time(), se_jk_start, units = "secs"))
 
-total_time <- estimation_time + se_time + se_jk_time
+total_time <- estimation_time + se_time  # placebo only, matches `se` field
 
 # Compute noise level and regularization (to match Python's auto-computed values)
 N0 <- setup$N0
diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py
@@ -223,8 +223,9 @@ def fit(  # type: ignore[override]
         survey_design : SurveyDesign, optional
             Survey design specification. Only pweight weight_type is supported.
             Strata/PSU/FPC are supported via Rao-Wu rescaled bootstrap when
-            variance_method='bootstrap'. Placebo variance does not support
-            strata/PSU/FPC; use variance_method='bootstrap' for full designs.
+            variance_method='bootstrap'. Non-bootstrap variance methods
+            (placebo, jackknife) do not support strata/PSU/FPC; use
+            variance_method='bootstrap' for full designs.
 
         Returns
         -------
@@ -1210,6 +1211,29 @@ def _jackknife_se(
             )
             return np.nan, np.array([])
 
+        # --- Effective-support guards for survey-weighted path ---
+        if w_control is not None:
+            effective_control = unit_weights * w_control
+            if np.sum(effective_control > 0) <= 1:
+                warnings.warn(
+                    "Jackknife variance requires more than 1 control unit with "
+                    "positive effective weight (omega * survey_weight). "
+                    "Consider variance_method='placebo'.",
+                    UserWarning,
+                    stacklevel=3,
+                )
+                return np.nan, np.array([])
+
+        if w_treated is not None and np.sum(w_treated > 0) <= 1:
+            warnings.warn(
+                "Jackknife variance requires more than 1 treated unit with "
+                "positive survey weight. "
+                "Consider variance_method='placebo'.",
+                UserWarning,
+                stacklevel=3,
+            )
+            return np.nan, np.array([])
+
         jackknife_estimates = np.empty(n)
 
         # --- Precompute treated means (constant across control-LOO) ---
@@ -1238,6 +1262,10 @@ def _jackknife_se(
             # Compose with survey weights if present
             if w_control is not None:
                 omega_jk = omega_jk * w_control[mask]
+                if omega_jk.sum() == 0:
+                    jackknife_estimates[j] = np.nan
+                    mask[j] = True
+                    continue
                 omega_jk = omega_jk / omega_jk.sum()
 
             jackknife_estimates[j] = compute_sdid_estimator(
@@ -1259,6 +1287,10 @@ def _jackknife_se(
             # Recompute treated means from remaining units
             if w_treated is not None:
                 w_t_jk = w_treated[mask]
+                if w_t_jk.sum() == 0:
+                    jackknife_estimates[n_control + k] = np.nan
+                    mask[k] = True
+                    continue
                 t_pre_mean = np.average(
                     Y_pre_treated[:, mask], axis=1, weights=w_t_jk
                 )
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -1510,6 +1510,7 @@ Convergence criterion: stop when objective decrease < min_decrease² (default mi
 - **Jackknife with single treated unit**: Returns NaN SE. Cannot leave-one-out with N_tr=1; R returns NA for the same condition.
 - **Jackknife with single nonzero-weight control**: Returns NaN SE. Leaving out the only effective control is not meaningful.
 - **Jackknife with non-finite LOO estimate**: Returns NaN SE. Unlike bootstrap/placebo, jackknife is deterministic and cannot skip failed iterations; NaN propagates through `var()` (matches R behavior).
+- **Jackknife with survey weights**: Guards on effective positive support (omega * w_control > 0 and w_treated > 0) after composition, not raw FW counts. Returns NaN SE if fewer than 2 effective controls or 2 positive-weight treated units. Per-iteration zero-sum guards return NaN for individual LOO iterations when remaining composed weights sum to zero.
 - **Note:** Survey support: weights, strata, PSU, and FPC are all supported. Full-design surveys use Rao-Wu rescaled bootstrap (Phase 6); non-bootstrap variance methods (`variance_method="placebo"` or `"jackknife"`) require weights-only (strata/PSU/FPC require bootstrap). Both sides weighted per WLS regression interpretation: treated-side means are survey-weighted (Frank-Wolfe target and ATT formula); control-side synthetic weights are composed with survey weights post-optimization (ω_eff = ω * w_co, renormalized). Frank-Wolfe optimization itself is unweighted — survey importance enters after trajectory-matching. Covariate residualization uses WLS with survey weights. Placebo, jackknife, and bootstrap SE preserve survey weights on both sides.
 
 **Reference implementation(s):**
diff --git a/tests/test_methodology_sdid.py b/tests/test_methodology_sdid.py
@@ -611,6 +611,80 @@ def test_jackknife_n_bootstrap_none_in_results(self):
         )
         assert results.n_bootstrap is None
 
+    def test_jackknife_with_pweights(self):
+        """Jackknife should produce finite SE with survey pweights."""
+        from diff_diff.survey import SurveyDesign
+
+        df = _make_panel(n_control=15, n_treated=3, seed=42)
+        # Add unit-constant survey weights
+        unit_weights = {u: 1.0 + u * 0.1 for u in df["unit"].unique()}
+        df["weight"] = df["unit"].map(unit_weights)
+
+        sdid = SyntheticDiD(variance_method="jackknife", seed=42)
+        results = sdid.fit(
+            df, outcome="outcome", treatment="treated",
+            unit="unit", time="period",
+            post_periods=list(range(5, 8)),
+            survey_design=SurveyDesign(weights="weight"),
+        )
+        assert results.se > 0
+        assert np.isfinite(results.se)
+        assert results.variance_method == "jackknife"
+
+    def test_jackknife_zero_effective_control_nan(self):
+        """Zero-weight controls after composition -> NaN SE."""
+        from diff_diff.survey import SurveyDesign
+
+        # 3 controls, 2 treated. Set all but 1 control survey weight to 0
+        # so effective support <= 1.
+        df = _make_panel(n_control=3, n_treated=2, seed=42)
+        weights = {}
+        control_units = sorted(df.loc[df["treated"] == 0, "unit"].unique())
+        treated_units = sorted(df.loc[df["treated"] == 1, "unit"].unique())
+        # Only first control gets positive weight
+        for i, u in enumerate(control_units):
+            weights[u] = 1.0 if i == 0 else 0.0
+        for u in treated_units:
+            weights[u] = 1.0
+        df["weight"] = df["unit"].map(weights)
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            sdid = SyntheticDiD(variance_method="jackknife", seed=42)
+            results = sdid.fit(
+                df, outcome="outcome", treatment="treated",
+                unit="unit", time="period",
+                post_periods=list(range(5, 7)),
+                survey_design=SurveyDesign(weights="weight"),
+            )
+        assert np.isnan(results.se)
+
+    def test_jackknife_zero_treated_weight_nan(self):
+        """Single positive-weight treated unit with survey -> NaN SE."""
+        from diff_diff.survey import SurveyDesign
+
+        df = _make_panel(n_control=10, n_treated=2, seed=42)
+        weights = {}
+        treated_units = sorted(df.loc[df["treated"] == 1, "unit"].unique())
+        control_units = sorted(df.loc[df["treated"] == 0, "unit"].unique())
+        for u in control_units:
+            weights[u] = 1.0
+        # Only first treated unit gets positive weight
+        weights[treated_units[0]] = 1.0
+        weights[treated_units[1]] = 0.0
+        df["weight"] = df["unit"].map(weights)
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            sdid = SyntheticDiD(variance_method="jackknife", seed=42)
+            results = sdid.fit(
+                df, outcome="outcome", treatment="treated",
+                unit="unit", time="period",
+                post_periods=list(range(5, 7)),
+                survey_design=SurveyDesign(weights="weight"),
+            )
+        assert np.isnan(results.se)
+
 
 # =============================================================================
 # Jackknife SE - R Golden Value Parity
diff --git a/tests/test_survey_phase5.py b/tests/test_survey_phase5.py
@@ -198,7 +198,21 @@ def test_full_design_bootstrap_smoke(self, sdid_survey_data, survey_design_full)
     def test_full_design_placebo_raises(self, sdid_survey_data, survey_design_full):
         """Placebo variance with full design raises NotImplementedError."""
         est = SyntheticDiD(variance_method="placebo", n_bootstrap=50, seed=42)
-        with pytest.raises(NotImplementedError, match="placebo.*does not support strata/PSU/FPC"):
+        with pytest.raises(NotImplementedError, match="does not support strata/PSU/FPC"):
+            est.fit(
+                sdid_survey_data,
+                outcome="outcome",
+                treatment="treated",
+                unit="unit",
+                time="time",
+                post_periods=[6, 7, 8, 9],
+                survey_design=survey_design_full,
+            )
+
+    def test_full_design_jackknife_raises(self, sdid_survey_data, survey_design_full):
+        """Jackknife variance with full design raises NotImplementedError."""
+        est = SyntheticDiD(variance_method="jackknife", seed=42)
+        with pytest.raises(NotImplementedError, match="does not support strata/PSU/FPC"):
             est.fit(
                 sdid_survey_data,
                 outcome="outcome",