Fix EIF centering for per-unit weights, add overlap diagnostics

igerber · claude · igerber · commit 2c7770291f39 · 2026-03-21T17:12:19.000-04:00
P0 fix: EIF now centers on scalar ATT (EIF_i = w_i @ gen_out_i - ATT)
instead of per-pair means, ensuring mean(EIF) ≈ 0 when weights vary
by unit. Added unit test verifying mean-zero property.

P1 fix: Add overlap warning when sieve ratios require clipping, with
test asserting the warning fires on near-separation covariates.

P1 fix: Document unconditional-pi Omega* as deviation in REGISTRY.md;
downgrade "efficient" claim in docstring for covariate path.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
@@ -54,7 +54,9 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
     sample means and covariances.  With covariates, uses the doubly robust
     path: sieve-based propensity score ratios (Eq 4.1-4.2) with AIC/BIC
     selection, OLS outcome regression, and kernel-smoothed conditional
-    Omega*(X) for per-unit efficient weights.
+    Omega*(X) for per-unit efficient weights.  The conditional Omega*
+    currently uses unconditional cohort fractions rather than per-unit
+    conditional propensities (see REGISTRY.md deviation note).
 
     Parameters
     ----------
@@ -534,7 +536,8 @@ def fit(
                         att_gt = np.nan
 
                     # EIF with per-unit weights (Remark 4.2: plug-in valid)
-                    eif_vals = compute_eif_cov(per_unit_w, gen_out, y_hat, n_units)
+                    # Center on scalar ATT, not per-pair means (ensures mean(EIF) ≈ 0)
+                    eif_vals = compute_eif_cov(per_unit_w, gen_out, att_gt, n_units)
                     eif_by_gt[(g, t)] = eif_vals
                 else:
                     # No-covariates path (closed-form)
diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
@@ -235,6 +235,19 @@ def estimate_propensity_ratio_sieve(
             best_ic = ic_val
             best_ratio = r_hat.copy()
 
+    # Overlap diagnostics: warn if ratios require significant clipping
+    n_extreme = int(np.sum((best_ratio < 1.0 / ratio_clip) | (best_ratio > ratio_clip)))
+    if n_extreme > 0:
+        pct = 100.0 * n_extreme / n_units
+        warnings.warn(
+            f"Sieve propensity ratios for {n_extreme} of {n_units} units "
+            f"({pct:.1f}%) were outside [{1.0/ratio_clip:.2f}, {ratio_clip:.1f}] "
+            f"and will be clipped. This may indicate overlap assumption "
+            f"violations (near-zero propensity scores for some covariate values).",
+            UserWarning,
+            stacklevel=2,
+        )
+
     # Clip: population ratio p_g(X)/p_{g'}(X) is non-negative
     best_ratio = np.clip(best_ratio, 1.0 / ratio_clip, ratio_clip)
 
@@ -626,44 +639,46 @@ def compute_per_unit_weights(
 def compute_eif_cov(
     weights: np.ndarray,
     generated_outcomes: np.ndarray,
-    y_hat_mean: np.ndarray,
+    att_gt: float,
     n_units: int,
 ) -> np.ndarray:
     """Per-unit efficient influence function from DR generated outcomes.
 
     Supports both global weights ``(H,)`` and per-unit weights ``(n_units, H)``.
 
-    The plug-in EIF treats estimated per-unit weights w(X_i) as fixed.
-    This is valid under Neyman orthogonality (Remark 4.2): estimation
-    error in the conditional Omega*(X) weights is second-order and does
-    not affect the first-order asymptotics of the EIF.
+    For global weights: ``EIF_i = w @ (gen_out_i - y_bar) = w @ gen_out_i - ATT``
+    For per-unit weights: ``EIF_i = w(X_i) @ gen_out_i - ATT``
+
+    In both cases the EIF centers on the scalar ATT estimate, ensuring
+    ``mean(EIF) ≈ 0``. The plug-in EIF treats estimated per-unit weights
+    as fixed, valid under Neyman orthogonality (Remark 4.2).
 
     Parameters
     ----------
     weights : ndarray, shape (H,) or (n_units, H)
         Efficient combination weights.
     generated_outcomes : ndarray, shape (n_units, H)
         Per-unit generated outcomes.
-    y_hat_mean : ndarray, shape (H,)
-        Sample average of generated outcomes per pair.
+    att_gt : float
+        Scalar ATT estimate for this (g, t) cell.
     n_units : int
         Total number of units.
 
     Returns
     -------
     eif : ndarray, shape (n_units,)
-        EIF value for every unit.
+        EIF value for every unit. Sample mean is approximately zero.
     """
     if weights.size == 0:
         return np.zeros(n_units)
 
-    centered = generated_outcomes - y_hat_mean  # (n_units, H)
-
     if weights.ndim == 1:
-        # Global weights: (n_units,) = (n_units, H) @ (H,)
-        eif = centered @ weights
+        # Global weights: w @ gen_out_i for each unit
+        weighted_scores = generated_outcomes @ weights  # (n_units,)
     else:
-        # Per-unit weights: element-wise multiply then sum
-        eif = np.sum(weights * centered, axis=1)
+        # Per-unit weights: w_i @ gen_out_i for each unit
+        weighted_scores = np.sum(weights * generated_outcomes, axis=1)
 
+    # Center on the scalar ATT estimate (ensures mean(EIF) ≈ 0)
+    eif = weighted_scores - att_gt
     return eif
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -671,6 +671,7 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - [x] Overlap diagnostics for propensity score ratios
 - **Note:** Sieve ratio estimation uses polynomial basis functions (total degree up to K) with AIC/BIC model selection. The paper describes sieve estimators generally without specifying a particular basis family; polynomial sieves are a standard choice (Section 4, Eq 4.2). Negative sieve ratio predictions are clipped to a small positive value since the population ratio p_g(X)/p_{g'}(X) is non-negative.
 - **Note:** Kernel-smoothed conditional covariance Omega*(X) uses Gaussian kernel with Silverman's rule-of-thumb bandwidth by default. The paper specifies kernel smoothing (step 5, Section 4) without mandating a particular kernel or bandwidth selection method.
+- **Note (deviation from source):** The conditional covariance Omega*(X) scales each term by unconditional cohort fractions pi_g rather than conditional generalized propensities p_g(X) as in Eq 3.12. Implementing the full conditional propensity scaling requires per-unit group probability estimation (algorithm step 4: s_hat_{g'}(X) = 1/p_{g'}(X) via convex minimization), which is deferred. The unconditional-pi approximation is consistent under double robustness but does not achieve the full conditional efficiency bound of Eq 3.12.
 
 ---
 
diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py
@@ -1348,14 +1348,9 @@ def test_shuffled_units_match_ordered(self):
             f"vs shuffled={r_shuffled.overall_att:.6f}"
         )
 
-    def test_extreme_covariates_still_valid(self):
-        """Extreme covariates (near-separation) should still produce valid results.
-
-        The sieve ratio estimator clips extreme ratios; conditional Omega*
-        handles the resulting variation in weights gracefully.
-        """
+    def test_extreme_covariates_warns_overlap(self):
+        """Extreme covariates should trigger overlap warning and still produce valid results."""
         df = _make_covariate_panel(n_units=300, seed=77)
-        # Create a covariate that nearly separates treated from control
         rng = np.random.default_rng(77)
         units = df["unit"].unique()
         n_units = len(units)
@@ -1367,12 +1362,27 @@ def test_extreme_covariates_still_valid(self):
         )
         sep_map = dict(zip(units, sep_vals))
         df["x_sep"] = df["unit"].map(sep_map)
-        result = EfficientDiD(pt_assumption="post").fit(
-            df, "y", "unit", "time", "first_treat", covariates=["x_sep"]
-        )
+        with pytest.warns(UserWarning, match="overlap|clipped|propensity"):
+            result = EfficientDiD(pt_assumption="post").fit(
+                df, "y", "unit", "time", "first_treat", covariates=["x_sep"]
+            )
         assert np.isfinite(result.overall_att)
         assert result.overall_se > 0
 
+    def test_eif_mean_approximately_zero(self):
+        """EIF with per-unit weights should have sample mean ≈ 0."""
+        from diff_diff.efficient_did_covariates import compute_eif_cov
+
+        rng = np.random.default_rng(42)
+        n, H = 200, 3
+        gen_out = rng.normal(0, 1, (n, H))
+        # Non-constant per-unit weights (each row sums to 1)
+        raw_w = rng.exponential(1, (n, H))
+        per_unit_w = raw_w / raw_w.sum(axis=1, keepdims=True)
+        att = float(np.mean(np.sum(per_unit_w * gen_out, axis=1)))
+        eif = compute_eif_cov(per_unit_w, gen_out, att, n)
+        assert abs(np.mean(eif)) < 1e-10, f"EIF mean should be ≈ 0, got {np.mean(eif):.2e}"
+
 
 class TestCovariatesBootstrap:
     """Tier 2: bootstrap with covariates."""