From 678c51ed3a136a9a78ad7011b4cee1c15bf2546d Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 14:48:20 -0400
Subject: [PATCH 01/12] Add doubly robust covariates path to EfficientDiD
 estimator

Implements Phase 2 of the Chen, Sant'Anna & Xie (2025) methodology:
when covariates are provided, uses outcome regression (OLS) and
propensity score ratios (logit) for doubly robust ATT estimation.
The estimator is consistent if either the outcome model or the
propensity model is correctly specified.

New module efficient_did_covariates.py contains the DR math functions.
The no-covariates closed-form path remains unchanged as the default.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 TODO.md                               |   2 +
 diff_diff/efficient_did.py            | 244 +++++++++++++-----
 diff_diff/efficient_did_covariates.py | 351 ++++++++++++++++++++++++++
 diff_diff/efficient_did_results.py    |   5 +-
 docs/methodology/REGISTRY.md          |   6 +-
 tests/helpers/edid_dgp.py             |  59 ++++-
 tests/test_efficient_did.py           | 243 +++++++++++++++++-
 7 files changed, 838 insertions(+), 72 deletions(-)
 create mode 100644 diff_diff/efficient_did_covariates.py

diff --git a/TODO.md b/TODO.md
index 41dda2c6..7e78854f 100644
--- a/TODO.md
+++ b/TODO.md
@@ -53,6 +53,8 @@ Deferred items from PR reviews that were not addressed before merge.
 | EfficientDiD: warn when cohort share is very small (< 2 units or < 1% of sample) — inverted in Omega*/EIF | `efficient_did_weights.py` | #192 | Low |
 | EfficientDiD: API docs / tutorial page for new public estimator | `docs/` | #192 | Medium |
 | Multi-absorb weighted demeaning needs iterative alternating projections for N > 1 absorbed FE with survey weights; unweighted multi-absorb also uses single-pass (pre-existing, exact only for balanced panels) | `estimators.py` | #218 | Medium |
+| EfficientDiD: sieve-based propensity ratio estimation (Eq 4.1-4.2) with AIC/BIC selection — currently uses standard logit | `efficient_did_covariates.py` | — | Low |
+| EfficientDiD: kernel-smoothed conditional covariance Omega*(X) for X-dependent efficient weights — currently uses unconditional Omega* | `efficient_did_covariates.py` | — | Low |
 | TripleDifference power: `generate_ddd_data` is a fixed 2×2×2 cross-sectional DGP — no multi-period or unbalanced-group support. Add a `generate_ddd_panel_data` for panel DDD power analysis. | `prep_dgp.py`, `power.py` | #208 | Low |
 
 #### Performance
diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
index 5ae23f63..84d26c6b 100644
--- a/diff_diff/efficient_did.py
+++ b/diff_diff/efficient_did.py
@@ -2,7 +2,7 @@
 Efficient Difference-in-Differences estimator.
 
 Implements the semiparametrically efficient ATT estimator from
-Chen, Sant'Anna & Xie (2025), Phase 1 (no covariates).
+Chen, Sant'Anna & Xie (2025).
 
 The estimator achieves the efficiency bound by optimally weighting
 across pre-treatment periods and comparison groups via the inverse of
@@ -22,6 +22,13 @@
     EDiDBootstrapResults,
     EfficientDiDBootstrapMixin,
 )
+from diff_diff.efficient_did_covariates import (
+    compute_eif_cov,
+    compute_generated_outcomes_cov,
+    compute_omega_star_cov,
+    estimate_outcome_regression,
+    estimate_propensity_ratio,
+)
 from diff_diff.efficient_did_results import EfficientDiDResults
 from diff_diff.efficient_did_weights import (
     compute_efficient_weights,
@@ -41,8 +48,10 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
 
     Achieves the semiparametric efficiency bound for ATT(g,t) in
     difference-in-differences settings with staggered treatment adoption.
-    Phase 1 supports the **no-covariates** path only — a closed-form
-    estimator using within-group sample means and covariances.
+
+    Without covariates, uses a closed-form estimator based on within-group
+    sample means and covariances.  With covariates, uses the doubly robust
+    path: outcome regression via OLS plus propensity score ratios via logit.
 
     Parameters
     ----------
@@ -64,6 +73,9 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
     anticipation : int, default 0
         Number of anticipation periods (shifts the effective treatment
         boundary forward by this amount).
+    pscore_trim : float, default 0.01
+        Propensity scores are clipped to ``[pscore_trim, 1-pscore_trim]``
+        before ratio computation.  Only used when covariates are provided.
 
     Examples
     --------
@@ -83,6 +95,7 @@ def __init__(
         bootstrap_weights: str = "rademacher",
         seed: Optional[int] = None,
         anticipation: int = 0,
+        pscore_trim: float = 0.01,
     ):
         if cluster is not None:
             raise NotImplementedError(
@@ -96,6 +109,7 @@ def __init__(
         self.bootstrap_weights = bootstrap_weights
         self.seed = seed
         self.anticipation = anticipation
+        self.pscore_trim = pscore_trim
         self.is_fitted_ = False
         self.results_: Optional[EfficientDiDResults] = None
         self._validate_params()
@@ -110,6 +124,8 @@ def _validate_params(self) -> None:
                 f"bootstrap_weights must be one of {valid_weights}, "
                 f"got '{self.bootstrap_weights}'"
             )
+        if not (0 < self.pscore_trim < 0.5):
+            raise ValueError(f"pscore_trim must be in (0, 0.5), got {self.pscore_trim}")
 
     # -- sklearn compatibility ------------------------------------------------
 
@@ -123,6 +139,7 @@ def get_params(self) -> Dict[str, Any]:
             "n_bootstrap": self.n_bootstrap,
             "bootstrap_weights": self.bootstrap_weights,
             "seed": self.seed,
+            "pscore_trim": self.pscore_trim,
         }
 
     def set_params(self, **params: Any) -> "EfficientDiD":
@@ -164,7 +181,9 @@ def fit(
             Column indicating first treatment period.
             Use 0 or ``np.inf`` for never-treated units.
         covariates : list of str, optional
-            Not implemented in Phase 1.  Raises ``NotImplementedError``.
+            Column names for time-invariant unit-level covariates.
+            When provided, uses the doubly robust path (outcome regression
+            + propensity score ratios).
         aggregate : str, optional
             ``None``, ``"simple"``, ``"event_study"``, ``"group"``, or
             ``"all"``.
@@ -180,16 +199,13 @@ def fit(
         ValueError
             Missing columns, unbalanced panel, non-absorbing treatment,
             or PT-Post without a never-treated group.
-        NotImplementedError
-            If ``covariates`` is provided (Phase 2).
         """
         self._validate_params()
 
-        if covariates is not None:
-            raise NotImplementedError(
-                "Covariates are not yet supported in EfficientDiD (Phase 1). "
-                "The with-covariates path will be added in Phase 2."
-            )
+        # Normalize empty covariates list to None (use nocov path)
+        if covariates is not None and len(covariates) == 0:
+            covariates = None
+        use_covariates = covariates is not None
 
         # ----- Validate inputs -----
         required_cols = [outcome, unit, time, first_treat]
@@ -299,6 +315,45 @@ def fit(
             cohort_fractions[g] = float(np.sum(cohort_masks[g])) / n_units
         cohort_fractions[np.inf] = float(np.sum(never_treated_mask)) / n_units
 
+        # ----- Covariate preparation (if provided) -----
+        covariate_matrix: Optional[np.ndarray] = None
+        m_hat_cache: Dict[Tuple, np.ndarray] = {}
+        r_hat_cache: Dict[Tuple[float, float], np.ndarray] = {}
+
+        if use_covariates:
+            assert covariates is not None  # for type narrowing
+
+            # Validate covariate columns exist
+            missing_cov = [c for c in covariates if c not in data.columns]
+            if missing_cov:
+                raise ValueError(f"Missing covariate columns: {missing_cov}")
+
+            # Validate no NaN/Inf in covariates
+            for col_name in covariates:
+                non_finite_cov = ~np.isfinite(pd.to_numeric(df[col_name], errors="coerce"))
+                if non_finite_cov.any():
+                    n_bad = int(non_finite_cov.sum())
+                    raise ValueError(
+                        f"Found {n_bad} non-finite value(s) in covariate column "
+                        f"'{col_name}'. Covariates must be finite."
+                    )
+
+            # Validate time-invariance: covariates must be constant within each unit
+            for col_name in covariates:
+                cov_nunique = df.groupby(unit)[col_name].nunique()
+                varying = cov_nunique[cov_nunique > 1]
+                if len(varying) > 0:
+                    uid = varying.index[0]
+                    raise ValueError(
+                        f"Covariate '{col_name}' varies over time for unit {uid}. "
+                        "EfficientDiD requires time-invariant covariates. "
+                        "Extract base-period values before calling fit()."
+                    )
+
+            # Extract unit-level covariate matrix from period_1 observations
+            base_df = df[df[time] == period_1].set_index(unit).reindex(all_units)
+            covariate_matrix = base_df[list(covariates)].values.astype(float)
+
         # ----- Core estimation: ATT(g, t) for each target -----
         # Precompute per-group unit counts (avoid repeated np.sum in loop)
         n_treated_per_g = {g: int(np.sum(cohort_masks[g])) for g in treatment_groups}
@@ -368,55 +423,129 @@ def fit(
                     eif_by_gt[(g, t)] = np.zeros(n_units)
                     continue
 
-                # Omega* matrix
-                omega = compute_omega_star_nocov(
-                    target_g=g,
-                    target_t=t,
-                    valid_pairs=pairs,
-                    outcome_wide=outcome_wide,
-                    cohort_masks=cohort_masks,
-                    never_treated_mask=never_treated_mask,
-                    period_to_col=period_to_col,
-                    period_1_col=effective_p1_col,
-                    cohort_fractions=cohort_fractions,
-                )
-
-                # Efficient weights (also returns condition number)
-                weights, _, cond_num = compute_efficient_weights(omega)
-                stored_weights[(g, t)] = weights
-                if omega.size > 0:
-                    stored_cond[(g, t)] = cond_num
+                if use_covariates:
+                    assert covariate_matrix is not None
+                    t_col_val = period_to_col[t]
+
+                    # Lazily populate nuisance caches for this (g, t)
+                    for gp, tpre in pairs:
+                        tpre_col_val = period_to_col[tpre]
+                        # m_{inf, t, tpre}(X)
+                        key_inf_t = (np.inf, t_col_val, tpre_col_val)
+                        if key_inf_t not in m_hat_cache:
+                            m_hat_cache[key_inf_t] = estimate_outcome_regression(
+                                outcome_wide,
+                                covariate_matrix,
+                                never_treated_mask,
+                                t_col_val,
+                                tpre_col_val,
+                            )
+                        # m_{g', tpre, 1}(X)
+                        key_gp_tpre = (gp, tpre_col_val, effective_p1_col)
+                        if key_gp_tpre not in m_hat_cache:
+                            gp_mask_for_reg = (
+                                never_treated_mask if np.isinf(gp) else cohort_masks[gp]
+                            )
+                            m_hat_cache[key_gp_tpre] = estimate_outcome_regression(
+                                outcome_wide,
+                                covariate_matrix,
+                                gp_mask_for_reg,
+                                tpre_col_val,
+                                effective_p1_col,
+                            )
+                        # r_{g, inf}(X) and r_{g, g'}(X)
+                        for comp in {np.inf, gp}:
+                            rkey = (g, comp)
+                            if rkey not in r_hat_cache:
+                                comp_mask = (
+                                    never_treated_mask if np.isinf(comp) else cohort_masks[comp]
+                                )
+                                r_hat_cache[rkey] = estimate_propensity_ratio(
+                                    covariate_matrix,
+                                    cohort_masks[g],
+                                    comp_mask,
+                                    pscore_trim=self.pscore_trim,
+                                )
+
+                    # Per-unit DR generated outcomes: shape (n_units, H)
+                    gen_out = compute_generated_outcomes_cov(
+                        target_g=g,
+                        target_t=t,
+                        valid_pairs=pairs,
+                        outcome_wide=outcome_wide,
+                        cohort_masks=cohort_masks,
+                        never_treated_mask=never_treated_mask,
+                        period_to_col=period_to_col,
+                        period_1_col=effective_p1_col,
+                        cohort_fractions=cohort_fractions,
+                        m_hat_cache=m_hat_cache,
+                        r_hat_cache=r_hat_cache,
+                    )
 
-                # Generated outcomes
-                y_hat = compute_generated_outcomes_nocov(
-                    target_g=g,
-                    target_t=t,
-                    valid_pairs=pairs,
-                    outcome_wide=outcome_wide,
-                    cohort_masks=cohort_masks,
-                    never_treated_mask=never_treated_mask,
-                    period_to_col=period_to_col,
-                    period_1_col=effective_p1_col,
-                )
+                    # Average per pair → scalar generated outcomes
+                    y_hat = np.mean(gen_out, axis=0)  # shape (H,)
+
+                    # Unconditional Omega* from per-unit generated outcomes
+                    omega = compute_omega_star_cov(gen_out)
+
+                    # Efficient weights
+                    weights, _, cond_num = compute_efficient_weights(omega)
+                    stored_weights[(g, t)] = weights
+                    if omega.size > 0:
+                        stored_cond[(g, t)] = cond_num
+
+                    # ATT(g,t) = w @ y_hat
+                    att_gt = float(weights @ y_hat) if len(weights) > 0 else np.nan
+
+                    # EIF from DR generated outcomes
+                    eif_vals = compute_eif_cov(weights, gen_out, y_hat, n_units)
+                    eif_by_gt[(g, t)] = eif_vals
+                else:
+                    # No-covariates path (closed-form)
+                    omega = compute_omega_star_nocov(
+                        target_g=g,
+                        target_t=t,
+                        valid_pairs=pairs,
+                        outcome_wide=outcome_wide,
+                        cohort_masks=cohort_masks,
+                        never_treated_mask=never_treated_mask,
+                        period_to_col=period_to_col,
+                        period_1_col=effective_p1_col,
+                        cohort_fractions=cohort_fractions,
+                    )
 
-                # ATT(g,t) = w @ y_hat
-                att_gt = float(weights @ y_hat) if len(weights) > 0 else np.nan
+                    weights, _, cond_num = compute_efficient_weights(omega)
+                    stored_weights[(g, t)] = weights
+                    if omega.size > 0:
+                        stored_cond[(g, t)] = cond_num
+
+                    y_hat = compute_generated_outcomes_nocov(
+                        target_g=g,
+                        target_t=t,
+                        valid_pairs=pairs,
+                        outcome_wide=outcome_wide,
+                        cohort_masks=cohort_masks,
+                        never_treated_mask=never_treated_mask,
+                        period_to_col=period_to_col,
+                        period_1_col=effective_p1_col,
+                    )
 
-                # EIF
-                eif_vals = compute_eif_nocov(
-                    target_g=g,
-                    target_t=t,
-                    weights=weights,
-                    valid_pairs=pairs,
-                    outcome_wide=outcome_wide,
-                    cohort_masks=cohort_masks,
-                    never_treated_mask=never_treated_mask,
-                    period_to_col=period_to_col,
-                    period_1_col=effective_p1_col,
-                    cohort_fractions=cohort_fractions,
-                    n_units=n_units,
-                )
-                eif_by_gt[(g, t)] = eif_vals
+                    att_gt = float(weights @ y_hat) if len(weights) > 0 else np.nan
+
+                    eif_vals = compute_eif_nocov(
+                        target_g=g,
+                        target_t=t,
+                        weights=weights,
+                        valid_pairs=pairs,
+                        outcome_wide=outcome_wide,
+                        cohort_masks=cohort_masks,
+                        never_treated_mask=never_treated_mask,
+                        period_to_col=period_to_col,
+                        period_1_col=effective_p1_col,
+                        cohort_fractions=cohort_fractions,
+                        n_units=n_units,
+                    )
+                    eif_by_gt[(g, t)] = eif_vals
 
                 # Analytical SE = sqrt(mean(EIF^2) / n)  [paper p.21]
                 se_gt = float(np.sqrt(np.mean(eif_vals**2) / n_units))
@@ -557,6 +686,7 @@ def fit(
             omega_condition_numbers=stored_cond if stored_cond else None,
             influence_functions=None,  # can store full EIF matrix if needed
             bootstrap_results=bootstrap_results,
+            estimation_path="dr" if use_covariates else "nocov",
         )
         self.is_fitted_ = True
         return self.results_
diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
new file mode 100644
index 00000000..b5f4aa6c
--- /dev/null
+++ b/diff_diff/efficient_did_covariates.py
@@ -0,0 +1,351 @@
+"""
+Doubly robust math for the Efficient DiD estimator (with covariates).
+
+Implements the with-covariates path from Chen, Sant'Anna & Xie (2025):
+outcome regression via OLS, propensity score ratios via logistic regression,
+doubly robust generated outcomes (Eq 4.4), and the efficient influence
+function for analytical standard errors.
+
+All functions are pure (no state), operating on pre-pivoted numpy arrays.
+"""
+
+import warnings
+from typing import Dict, List, Tuple
+
+import numpy as np
+
+from diff_diff.linalg import (
+    _check_propensity_diagnostics,
+    solve_logit,
+    solve_ols,
+)
+
+
+def estimate_outcome_regression(
+    outcome_wide: np.ndarray,
+    covariate_matrix: np.ndarray,
+    group_mask: np.ndarray,
+    t_col: int,
+    tpre_col: int,
+) -> np.ndarray:
+    """Estimate conditional mean outcome change m_hat(X) for a comparison group.
+
+    Regresses ``(Y_t - Y_{tpre})`` on ``X`` within the units identified by
+    ``group_mask`` using OLS.  Returns predicted values ``m_hat(X_i)`` for
+    **all** units (extrapolated from the within-group fit).
+
+    This implements ``m_hat_{g',t,tpre}(X) = E[Y_t - Y_{tpre} | G=g', X]``.
+
+    Parameters
+    ----------
+    outcome_wide : ndarray, shape (n_units, n_periods)
+        Pivoted outcome matrix.
+    covariate_matrix : ndarray, shape (n_units, n_covariates)
+        Unit-level (time-invariant) covariates.
+    group_mask : ndarray of bool, shape (n_units,)
+        Mask selecting units in the comparison group.
+    t_col, tpre_col : int
+        Column indices in ``outcome_wide`` for the two time periods.
+
+    Returns
+    -------
+    m_hat : ndarray, shape (n_units,)
+        Predicted ``E[Y_t - Y_{tpre} | X]`` for every unit.
+    """
+    # Dependent variable: outcome change within the comparison group
+    Y_group = outcome_wide[group_mask]
+    delta_y = Y_group[:, t_col] - Y_group[:, tpre_col]
+
+    # Design matrix with intercept for the group
+    X_group = covariate_matrix[group_mask]
+    X_design = np.column_stack([np.ones(len(X_group)), X_group])
+
+    # Fit OLS — we only need coefficients, not vcov
+    coef, _, _ = solve_ols(
+        X_design,
+        delta_y,
+        return_vcov=False,
+        rank_deficient_action="warn",
+    )
+
+    # Predict for all units
+    X_all = np.column_stack([np.ones(len(covariate_matrix)), covariate_matrix])
+
+    # Handle NaN coefficients from rank-deficient fits: set NaN coefs to 0
+    # so prediction degrades gracefully (those terms contribute nothing)
+    coef_safe = np.where(np.isfinite(coef), coef, 0.0)
+    m_hat = X_all @ coef_safe
+
+    # Guard against non-finite predictions
+    non_finite = ~np.isfinite(m_hat)
+    if non_finite.any():
+        n_bad = int(non_finite.sum())
+        warnings.warn(
+            f"Outcome regression produced {n_bad} non-finite prediction(s). "
+            "Setting to 0.0 (equivalent to no covariate adjustment).",
+            UserWarning,
+            stacklevel=2,
+        )
+        m_hat[non_finite] = 0.0
+
+    return m_hat
+
+
+def estimate_propensity_ratio(
+    covariate_matrix: np.ndarray,
+    mask_g: np.ndarray,
+    mask_gp: np.ndarray,
+    pscore_trim: float = 0.01,
+) -> np.ndarray:
+    r"""Estimate propensity score ratio r_{g,g'}(X) = p_g(X) / p_{g'}(X).
+
+    Fits binary logistic regression on units in ``{g, g'}`` with ``D=1``
+    for ``G=g`` and ``D=0`` for ``G=g'``.  The fitted probability is
+    ``pscore = P(G=g | G in {g,g'}, X)``, and the ratio is computed as
+    ``pscore / (1 - pscore)`` (conditional odds).
+
+    On logit failure (convergence, separation, LinAlgError), falls back to
+    the unconditional population fraction ratio ``n_g / n_{g'}``.
+
+    Parameters
+    ----------
+    covariate_matrix : ndarray, shape (n_units, n_covariates)
+        Unit-level covariates.
+    mask_g : ndarray of bool, shape (n_units,)
+        Mask for the target treatment group.
+    mask_gp : ndarray of bool, shape (n_units,)
+        Mask for the comparison group.
+    pscore_trim : float, default 0.01
+        Propensity scores are clipped to ``[pscore_trim, 1-pscore_trim]``
+        before ratio computation.
+
+    Returns
+    -------
+    ratio : ndarray, shape (n_units,)
+        Estimated ``r_{g,g'}(X_i)`` for every unit (extrapolated from
+        the fit on ``{g, g'}`` units).
+    """
+    n_g = int(np.sum(mask_g))
+    n_gp = int(np.sum(mask_gp))
+    n_units = len(covariate_matrix)
+
+    # Stack covariates for the two groups
+    combined_mask = mask_g | mask_gp
+    X_combined = covariate_matrix[combined_mask]
+    # Treatment indicator: 1 for group g, 0 for group g'
+    D = np.concatenate([np.ones(n_g), np.zeros(n_gp)])
+
+    try:
+        beta, pscore_combined = solve_logit(X_combined, D, rank_deficient_action="warn")
+        _check_propensity_diagnostics(pscore_combined, pscore_trim)
+
+        # Predict for all units using the logit coefficients
+        X_all_with_intercept = np.column_stack([np.ones(n_units), covariate_matrix])
+        # Handle NaN coefficients from rank-deficient fits
+        beta_safe = np.where(np.isfinite(beta), beta, 0.0)
+        z = X_all_with_intercept @ beta_safe
+        z = np.clip(z, -500, 500)
+        pscore_all = 1.0 / (1.0 + np.exp(-z))
+
+    except (np.linalg.LinAlgError, ValueError):
+        warnings.warn(
+            "Propensity score estimation failed for a group pair. "
+            "Falling back to unconditional population fraction ratio.",
+            UserWarning,
+            stacklevel=2,
+        )
+        # Fallback: constant ratio n_g / n_gp for all units
+        flat_p = n_g / (n_g + n_gp) if (n_g + n_gp) > 0 else 0.5
+        pscore_all = np.full(n_units, flat_p)
+
+    # Trim propensity scores
+    pscore_all = np.clip(pscore_all, pscore_trim, 1.0 - pscore_trim)
+
+    # Ratio: pscore / (1 - pscore) = conditional odds
+    ratio = pscore_all / (1.0 - pscore_all)
+    return ratio
+
+
+def compute_generated_outcomes_cov(
+    target_g: float,
+    target_t: float,
+    valid_pairs: List[Tuple[float, float]],
+    outcome_wide: np.ndarray,
+    cohort_masks: Dict[float, np.ndarray],
+    never_treated_mask: np.ndarray,
+    period_to_col: Dict[float, int],
+    period_1_col: int,
+    cohort_fractions: Dict[float, float],
+    m_hat_cache: Dict[Tuple, np.ndarray],
+    r_hat_cache: Dict[Tuple[float, float], np.ndarray],
+    never_treated_val: float = np.inf,
+) -> np.ndarray:
+    """Compute per-unit doubly robust generated outcomes (Eq 4.4).
+
+    For each valid pair ``(g', t_pre)`` and each unit ``i``, three terms::
+
+        Term 1 (treated):
+            (G_{g,i} / pi_g) * (Y_{i,t} - Y_{i,1}
+                - m_{inf,t,tpre}(X_i) - m_{g',tpre,1}(X_i))
+
+        Term 2 (never-treated):
+            -r_{g,inf}(X_i) * (G_{inf,i} / pi_g)
+                * (Y_{i,t} - Y_{i,tpre} - m_{inf,t,tpre}(X_i))
+
+        Term 3 (comparison cohort):
+            -r_{g,g'}(X_i) * (G_{g',i} / pi_g)
+                * (Y_{i,tpre} - Y_{i,1} - m_{g',tpre,1}(X_i))
+
+    Parameters
+    ----------
+    target_g, target_t : float
+        Target group-time.
+    valid_pairs : list of (g', t_pre)
+        Valid comparison pairs.
+    outcome_wide : ndarray, shape (n_units, n_periods)
+    cohort_masks : dict
+        ``{cohort: bool_mask}``
+    never_treated_mask : ndarray of bool
+    period_to_col : dict
+    period_1_col : int
+        Column index of effective baseline period (Y_1).
+    cohort_fractions : dict
+        ``{cohort: n_cohort / n}``
+    m_hat_cache : dict
+        Outcome regression predictions, keyed by
+        ``(comparison_group, t_col, tpre_col)``.
+    r_hat_cache : dict
+        Propensity score ratios, keyed by ``(target_g, comparison_g)``.
+    never_treated_val : float
+        Sentinel for the never-treated group.
+
+    Returns
+    -------
+    gen_out : ndarray, shape (n_units, H)
+        Per-unit generated outcome for each valid pair.
+    """
+    H = len(valid_pairs)
+    n_units = outcome_wide.shape[0]
+    if H == 0:
+        return np.empty((n_units, 0))
+
+    t_col = period_to_col[target_t]
+    y1_col = period_1_col
+
+    g_mask = cohort_masks[target_g]
+    pi_g = cohort_fractions[target_g]
+
+    gen_out = np.zeros((n_units, H))
+
+    for j, (gp, tpre) in enumerate(valid_pairs):
+        tpre_col = period_to_col[tpre]
+
+        # Retrieve cached nuisance parameters
+        # m_{inf, t, tpre}(X)
+        m_inf_t_tpre = m_hat_cache[(never_treated_val, t_col, tpre_col)]
+        # m_{g', tpre, 1}(X)
+        m_gp_tpre_1 = m_hat_cache[(gp, tpre_col, y1_col)]
+        # r_{g, inf}(X)
+        r_g_inf = r_hat_cache[(target_g, never_treated_val)]
+        # r_{g, g'}(X)
+        r_g_gp = r_hat_cache[(target_g, gp)]
+
+        # ------- Term 1: treated units (G = g) -------
+        if pi_g > 0:
+            Y_t_minus_Y1 = outcome_wide[g_mask, t_col] - outcome_wide[g_mask, y1_col]
+            residual_treated = Y_t_minus_Y1 - m_inf_t_tpre[g_mask] - m_gp_tpre_1[g_mask]
+            gen_out[g_mask, j] += (1.0 / pi_g) * residual_treated
+
+        # ------- Term 2: never-treated units (G = inf) -------
+        pi_inf = cohort_fractions.get(never_treated_val, 0.0)
+        if pi_inf > 0:
+            Y_t_minus_Ytpre = (
+                outcome_wide[never_treated_mask, t_col] - outcome_wide[never_treated_mask, tpre_col]
+            )
+            residual_inf = Y_t_minus_Ytpre - m_inf_t_tpre[never_treated_mask]
+            gen_out[never_treated_mask, j] -= (
+                r_g_inf[never_treated_mask] * (1.0 / pi_g) * residual_inf
+            )
+
+        # ------- Term 3: comparison cohort units (G = g') -------
+        if np.isinf(gp):
+            gp_mask = never_treated_mask
+        else:
+            gp_mask = cohort_masks[gp]
+        pi_gp = cohort_fractions.get(gp, 0.0)
+        if pi_gp > 0:
+            Y_tpre_minus_Y1 = outcome_wide[gp_mask, tpre_col] - outcome_wide[gp_mask, y1_col]
+            residual_gp = Y_tpre_minus_Y1 - m_gp_tpre_1[gp_mask]
+            gen_out[gp_mask, j] -= r_g_gp[gp_mask] * (1.0 / pi_g) * residual_gp
+
+    return gen_out
+
+
+def compute_omega_star_cov(
+    generated_outcomes: np.ndarray,
+) -> np.ndarray:
+    """Unconditional sample covariance of per-unit DR generated outcomes.
+
+    Uses ``ddof=1`` for consistency with ``_sample_cov()`` in the nocov path.
+
+    Parameters
+    ----------
+    generated_outcomes : ndarray, shape (n_units, H)
+        Per-unit generated outcomes from :func:`compute_generated_outcomes_cov`.
+
+    Returns
+    -------
+    omega : ndarray, shape (H, H)
+        Covariance matrix.
+    """
+    n, H = generated_outcomes.shape
+    if H == 0:
+        return np.empty((0, 0))
+    if n < 2:
+        return np.zeros((H, H))
+
+    # Demean
+    means = generated_outcomes.mean(axis=0)  # shape (H,)
+    centered = generated_outcomes - means  # shape (n, H)
+
+    # Sample covariance with ddof=1
+    omega = (centered.T @ centered) / (n - 1)
+    return omega
+
+
+def compute_eif_cov(
+    weights: np.ndarray,
+    generated_outcomes: np.ndarray,
+    y_hat_mean: np.ndarray,
+    n_units: int,
+) -> np.ndarray:
+    """Per-unit efficient influence function from DR generated outcomes.
+
+    ``EIF_i = sum_j w_j * (Y_hat_{j,i} - y_hat_j)``
+
+    Parameters
+    ----------
+    weights : ndarray, shape (H,)
+        Efficient combination weights.
+    generated_outcomes : ndarray, shape (n_units, H)
+        Per-unit generated outcomes.
+    y_hat_mean : ndarray, shape (H,)
+        Sample average of generated outcomes per pair.
+    n_units : int
+        Total number of units.
+
+    Returns
+    -------
+    eif : ndarray, shape (n_units,)
+        EIF value for every unit.
+    """
+    H = len(weights)
+    if H == 0:
+        return np.zeros(n_units)
+
+    # Demeaned generated outcomes: (n_units, H)
+    centered = generated_outcomes - y_hat_mean
+
+    # Weighted sum across pairs: (n_units,)
+    eif = centered @ weights
+    return eif
diff --git a/diff_diff/efficient_did_results.py b/diff_diff/efficient_did_results.py
index 1135f9b4..0d307a40 100644
--- a/diff_diff/efficient_did_results.py
+++ b/diff_diff/efficient_did_results.py
@@ -104,13 +104,15 @@ class EfficientDiDResults:
     )
     influence_functions: Optional["np.ndarray"] = field(default=None, repr=False)
     bootstrap_results: Optional["EDiDBootstrapResults"] = field(default=None, repr=False)
+    estimation_path: str = "nocov"
 
     def __repr__(self) -> str:
         sig = _get_significance_stars(self.overall_p_value)
+        path = "DR" if self.estimation_path == "dr" else "nocov"
         return (
             f"EfficientDiDResults(ATT={self.overall_att:.4f}{sig}, "
             f"SE={self.overall_se:.4f}, "
-            f"pt={self.pt_assumption}, "
+            f"pt={self.pt_assumption}, path={path}, "
             f"n_groups={len(self.groups)}, "
             f"n_periods={len(self.time_periods)})"
         )
@@ -131,6 +133,7 @@ def summary(self, alpha: Optional[float] = None) -> str:
             f"{'Treatment cohorts:':<30} {len(self.groups):>10}",
             f"{'Time periods:':<30} {len(self.time_periods):>10}",
             f"{'PT assumption:':<30} {self.pt_assumption:>10}",
+            f"{'Estimation path:':<30} {'doubly robust' if self.estimation_path == 'dr' else 'no covariates':>10}",
         ]
         if self.anticipation > 0:
             lines.append(f"{'Anticipation periods:':<30} {self.anticipation:>10}")
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 9ba4d511..11e288ce 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -657,7 +657,7 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - [x] Implements two-step semiparametric estimator (Equation 4.3)
 - [x] Supports both PT-Post (just-identified) and PT-All (overidentified) regimes
 - [x] Computes efficient weights from conditional covariance matrix inverse
-- [ ] Doubly robust: consistent if either outcome regression or propensity score ratio is correct
+- [x] Doubly robust: consistent if either outcome regression or propensity score ratio is correct
 - [x] No-covariates case uses closed-form sample means/covariances (no tuning)
 - [ ] With covariates: sieve-based propensity ratio estimation with AIC/BIC selection
 - [ ] Kernel-smoothed conditional covariance estimation
@@ -667,7 +667,9 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - [ ] Hausman-type pre-test for PT-All vs PT-Post (Theorem A.1)
 - [x] Each ATT(g,t) can be estimated independently (parallelizable)
 - [x] Absorbing treatment validation
-- [ ] Overlap diagnostics for propensity score ratios
+- [x] Overlap diagnostics for propensity score ratios
+- **Deviation from paper:** Propensity score ratios estimated via standard logistic regression rather than sieve-based convex minimization (Eq 4.1-4.2). Standard logit is a valid approach under the doubly robust property. Sieve estimation with AIC/BIC selection may be added for improved efficiency in a future version.
+- **Deviation from paper:** Unconditional covariance matrix Omega* used for efficient weights across (g', t_pre) pairs, rather than kernel-smoothed conditional covariance Omega*(X). This gives a valid doubly robust estimator with correct EIF-based SEs. Conditional weights (X-dependent) may be added later for full semiparametric efficiency.
 
 ---
 
diff --git a/tests/helpers/edid_dgp.py b/tests/helpers/edid_dgp.py
index 6fa6200c..103a7304 100644
--- a/tests/helpers/edid_dgp.py
+++ b/tests/helpers/edid_dgp.py
@@ -13,11 +13,30 @@
 N_PERIODS = 11
 
 
-def make_compustat_dgp(n_units=400, n_periods=N_PERIODS, rho=0.0, seed=42):
+def make_compustat_dgp(
+    n_units=400,
+    n_periods=N_PERIODS,
+    rho=0.0,
+    seed=42,
+    add_covariates=False,
+    covariate_effect=0.5,
+    confounding_strength=0.0,
+):
     """Simplified Compustat-style DGP from Section 5.2.
 
     Groups: G=5 (~1/3), G=8 (~1/3), G=inf (~1/3).
     ATT(5,t) = 0.154*(t-4), ATT(8,t) = 0.093*(t-7).
+
+    Parameters
+    ----------
+    add_covariates : bool
+        If True, adds ``x1`` (continuous) and ``x2`` (binary) columns.
+    covariate_effect : float
+        Coefficient of ``x1`` on outcome (explains residual variance).
+    confounding_strength : float
+        ``x1`` also shifts the group-mean outcome differentially by group,
+        creating confounding where parallel trends hold conditional on X
+        but not unconditionally.  Set > 0 to test the DR path.
     """
     rng = np.random.default_rng(seed)
     n_t = n_periods
@@ -28,6 +47,10 @@ def make_compustat_dgp(n_units=400, n_periods=N_PERIODS, rho=0.0, seed=42):
     ft[:n_g5] = 5
     ft[n_g5 : n_g5 + n_g8] = 8
 
+    # Generate unit-level covariates (time-invariant)
+    x1 = rng.normal(0, 1, n_units)
+    x2 = rng.binomial(1, 0.5, n_units)
+
     units = np.repeat(np.arange(n_units), n_t)
     times = np.tile(np.arange(1, n_t + 1), n_units)
     ft_col = np.repeat(ft, n_t)
@@ -57,9 +80,31 @@ def make_compustat_dgp(n_units=400, n_periods=N_PERIODS, rho=0.0, seed=42):
 
     y = unit_fe + time_fe + tau + eps_flat
 
-    return pd.DataFrame(
-        {"unit": units, "time": times, "first_treat": ft_col, "y": y}
-    )
+    if add_covariates:
+        # Covariate effect on outcome (explains residual variance)
+        x1_expanded = np.repeat(x1, n_t)
+        y += covariate_effect * x1_expanded
+
+        if confounding_strength > 0:
+            # Confounding: x1 interacts with group membership to create
+            # differential trends. Parallel trends hold conditional on X
+            # but not unconditionally.
+            for i in range(n_units):
+                g = ft[i]
+                if np.isinf(g):
+                    continue
+                for t_idx in range(n_t):
+                    t_val = t_idx + 1
+                    # Group-specific time trend that depends on x1
+                    y[i * n_t + t_idx] += confounding_strength * x1[i] * (t_val - 1) / n_t
+
+    result = pd.DataFrame({"unit": units, "time": times, "first_treat": ft_col, "y": y})
+
+    if add_covariates:
+        result["x1"] = np.repeat(x1, n_t)
+        result["x2"] = np.repeat(x2, n_t)
+
+    return result
 
 
 def true_es_avg():
@@ -68,11 +113,7 @@ def true_es_avg():
     all_e = range(0, max(max_e.values()) + 1)
     es_values = []
     for e in all_e:
-        contributing = [
-            coef * (e + 1)
-            for g, coef in ATT_COEFS.items()
-            if e <= max_e[g]
-        ]
+        contributing = [coef * (e + 1) for g, coef in ATT_COEFS.items() if e <= max_e[g]]
         if contributing:
             es_values.append(np.mean(contributing))
     return np.mean(es_values)
diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py
index 3b727348..fd005793 100644
--- a/tests/test_efficient_did.py
+++ b/tests/test_efficient_did.py
@@ -250,10 +250,10 @@ def test_absorbing_treatment_validation(self):
         with pytest.raises(ValueError, match="Non-absorbing"):
             EfficientDiD().fit(df, "y", "unit", "time", "first_treat")
 
-    def test_covariates_not_implemented(self):
+    def test_missing_covariate_column_raises(self):
         df = _make_simple_panel()
-        with pytest.raises(NotImplementedError, match="covariates"):
-            EfficientDiD().fit(df, "y", "unit", "time", "first_treat", covariates=["y"])
+        with pytest.raises(ValueError, match="Missing covariate columns"):
+            EfficientDiD().fit(df, "y", "unit", "time", "first_treat", covariates=["nonexistent"])
 
     def test_missing_columns(self):
         df = _make_simple_panel()
@@ -1061,3 +1061,240 @@ def test_cohort_drop_warning(self):
         groups_present = {g for (g, t) in result.group_time_effects}
         assert 2.0 not in groups_present, "g=2 should have been dropped"
         assert 4.0 in groups_present, "g=4 should still be present"
+
+
+# =============================================================================
+# Covariate Tests
+# =============================================================================
+
+
+def _make_covariate_panel(
+    n_units=300,
+    n_periods=11,
+    seed=42,
+    covariate_effect=0.5,
+    confounding_strength=0.0,
+):
+    """Helper: staggered panel with time-invariant covariates.
+
+    Uses n_periods=11 (default) so both treatment groups g=5 and g=8 are valid.
+    """
+    return make_compustat_dgp(
+        n_units=n_units,
+        n_periods=n_periods,
+        rho=0.0,
+        seed=seed,
+        add_covariates=True,
+        covariate_effect=covariate_effect,
+        confounding_strength=confounding_strength,
+    )
+
+
+class TestCovariatesBasic:
+    """Tier 1: basic covariate path correctness."""
+
+    def test_covariates_fit_produces_results(self):
+        """Smoke test: fit with covariates returns valid results."""
+        df = _make_covariate_panel()
+        result = EfficientDiD(pt_assumption="post").fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x1", "x2"]
+        )
+        assert isinstance(result, EfficientDiDResults)
+        assert result.estimation_path == "dr"
+        assert np.isfinite(result.overall_att)
+        assert result.overall_se > 0
+        assert len(result.group_time_effects) > 0
+        for (g, t), eff in result.group_time_effects.items():
+            assert np.isfinite(eff["effect"])
+            # Baseline cells (t == g-1 under PT-Post) have SE=0 by construction
+            if t >= g:
+                assert eff["se"] > 0, f"SE=0 for post-treatment cell ({g}, {t})"
+
+    def test_nocov_match_when_irrelevant(self):
+        """Random noise covariates should give ~same ATT as nocov."""
+        df = _make_covariate_panel(covariate_effect=0.0)
+        edid = EfficientDiD(pt_assumption="post")
+        r_nocov = edid.fit(df, "y", "unit", "time", "first_treat")
+        r_cov = EfficientDiD(pt_assumption="post").fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x1", "x2"]
+        )
+        # ATT should be close (not identical due to nuisance estimation noise)
+        assert (
+            abs(r_cov.overall_att - r_nocov.overall_att) < 0.3
+        ), f"DR ATT {r_cov.overall_att:.4f} too far from nocov {r_nocov.overall_att:.4f}"
+
+    def test_covariates_produce_valid_se(self):
+        """DR path with covariates explaining variance produces valid SE."""
+        df = _make_covariate_panel(covariate_effect=2.0, n_units=600)
+        r_cov = EfficientDiD(pt_assumption="post").fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x1"]
+        )
+        # DR SE should be positive and finite
+        assert r_cov.overall_se > 0
+        assert np.isfinite(r_cov.overall_se)
+        # ATT should be close to the nocov estimate (no confounding)
+        r_nocov = EfficientDiD(pt_assumption="post").fit(df, "y", "unit", "time", "first_treat")
+        assert abs(r_cov.overall_att - r_nocov.overall_att) < 0.2
+
+    def test_covariates_recover_effect_under_confounding(self):
+        """DGP with confounding: DR should recover true ATT better than nocov."""
+        df = _make_covariate_panel(
+            n_units=600,
+            covariate_effect=1.0,
+            confounding_strength=1.0,
+            seed=123,
+        )
+        r_nocov = EfficientDiD(pt_assumption="post").fit(df, "y", "unit", "time", "first_treat")
+        r_cov = EfficientDiD(pt_assumption="post").fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x1"]
+        )
+        # Both should produce finite results
+        assert np.isfinite(r_nocov.overall_att)
+        assert np.isfinite(r_cov.overall_att)
+        # DR should be closer to the known treatment effect structure
+        # (we can't compute exact true ATT for confounded DGP easily,
+        # but we verify the DR path runs without error and produces
+        # different results from nocov)
+        assert r_cov.estimation_path == "dr"
+        assert r_nocov.estimation_path == "nocov"
+
+    def test_empty_covariates_uses_nocov(self):
+        """covariates=[] should normalize to nocov path."""
+        df = _make_covariate_panel()
+        result = EfficientDiD(pt_assumption="post").fit(
+            df, "y", "unit", "time", "first_treat", covariates=[]
+        )
+        assert result.estimation_path == "nocov"
+
+
+class TestCovariateValidation:
+    """Tier 1: input validation for covariates."""
+
+    def test_missing_covariate_column_raises(self):
+        df = _make_covariate_panel()
+        with pytest.raises(ValueError, match="Missing covariate columns"):
+            EfficientDiD().fit(df, "y", "unit", "time", "first_treat", covariates=["nonexistent"])
+
+    def test_nan_covariates_raises(self):
+        df = _make_covariate_panel()
+        df.loc[0, "x1"] = np.nan
+        with pytest.raises(ValueError, match="non-finite"):
+            EfficientDiD().fit(df, "y", "unit", "time", "first_treat", covariates=["x1"])
+
+    def test_pscore_trim_validation(self):
+        with pytest.raises(ValueError, match="pscore_trim"):
+            EfficientDiD(pscore_trim=0.0)
+        with pytest.raises(ValueError, match="pscore_trim"):
+            EfficientDiD(pscore_trim=0.5)
+        with pytest.raises(ValueError, match="pscore_trim"):
+            EfficientDiD(pscore_trim=-0.1)
+
+    def test_pscore_trim_in_get_params(self):
+        edid = EfficientDiD(pscore_trim=0.05)
+        params = edid.get_params()
+        assert "pscore_trim" in params
+        assert params["pscore_trim"] == 0.05
+
+    def test_time_varying_covariates_raises(self):
+        df = _make_covariate_panel()
+        # Make x1 vary over time for one unit
+        mask = (df["unit"] == 0) & (df["time"] == 2)
+        df.loc[mask, "x1"] = 999.0
+        with pytest.raises(ValueError, match="varies over time"):
+            EfficientDiD().fit(df, "y", "unit", "time", "first_treat", covariates=["x1"])
+
+
+class TestCovariatesPTAssumptions:
+    """Tier 2: covariates under different PT assumptions."""
+
+    def test_covariates_pt_post(self):
+        df = _make_covariate_panel()
+        result = EfficientDiD(pt_assumption="post").fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x1"]
+        )
+        assert isinstance(result, EfficientDiDResults)
+        assert result.estimation_path == "dr"
+        assert np.isfinite(result.overall_att)
+
+    def test_covariates_pt_all(self):
+        df = _make_covariate_panel()
+        result = EfficientDiD(pt_assumption="all").fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x1"]
+        )
+        assert isinstance(result, EfficientDiDResults)
+        assert result.estimation_path == "dr"
+        assert np.isfinite(result.overall_att)
+
+
+class TestCovariatesEdgeCases:
+    """Tier 2: edge cases for covariate path."""
+
+    def test_single_covariate(self):
+        df = _make_covariate_panel()
+        result = EfficientDiD(pt_assumption="post").fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x1"]
+        )
+        assert np.isfinite(result.overall_att)
+
+    def test_binary_covariate(self):
+        df = _make_covariate_panel()
+        result = EfficientDiD(pt_assumption="post").fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x2"]
+        )
+        assert np.isfinite(result.overall_att)
+
+    def test_many_covariates(self):
+        """Multiple covariates including derived ones."""
+        df = _make_covariate_panel()
+        # Create a unit-level covariate (must be time-invariant)
+        rng = np.random.default_rng(99)
+        units = df["unit"].unique()
+        x3_map = dict(
+            zip(units, df.groupby("unit")["x1"].first() * 0.5 + rng.normal(0, 0.1, len(units)))
+        )
+        df["x3"] = df["unit"].map(x3_map)
+        result = EfficientDiD(pt_assumption="post").fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x1", "x2", "x3"]
+        )
+        assert np.isfinite(result.overall_att)
+
+    def test_near_separation_trimming_warns(self):
+        """Near-separation covariates should trigger overlap warning."""
+        df = _make_covariate_panel(n_units=300, seed=77)
+        # Create a covariate that nearly separates treated from control
+        rng = np.random.default_rng(77)
+        units = df["unit"].unique()
+        n_units = len(units)
+        ft_map = df.groupby("unit")["first_treat"].first()
+        sep_vals = np.where(
+            ft_map.values < np.inf,
+            5.0 + rng.normal(0, 0.01, n_units),  # treated: ~5
+            -5.0 + rng.normal(0, 0.01, n_units),  # control: ~-5
+        )
+        sep_map = dict(zip(units, sep_vals))
+        df["x_sep"] = df["unit"].map(sep_map)
+        with warnings.catch_warnings(record=True):
+            warnings.simplefilter("always")
+            result = EfficientDiD(pt_assumption="post").fit(
+                df, "y", "unit", "time", "first_treat", covariates=["x_sep"]
+            )
+        # Should still produce valid results despite trimming
+        assert np.isfinite(result.overall_att)
+        assert result.overall_se > 0
+
+
+class TestCovariatesBootstrap:
+    """Tier 2: bootstrap with covariates."""
+
+    def test_bootstrap_with_covariates_smoke(self):
+        """Bootstrap with covariates produces valid inference."""
+        df = _make_covariate_panel(n_units=300)
+        result = EfficientDiD(pt_assumption="post", n_bootstrap=99, seed=42).fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x1"]
+        )
+        assert result.bootstrap_results is not None
+        assert np.isfinite(result.overall_att)
+        assert result.overall_se > 0
+        ci = result.overall_conf_int
+        assert ci[0] < ci[1], "CI lower must be less than upper"
+        assert np.isfinite(result.overall_p_value)

From 02537476753671800dbd9ef04a2777398ce013af Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 15:09:07 -0400
Subject: [PATCH 02/12] Address AI review: fix propensity ratio label
 alignment, add pscore_trim to results

P0 fix: estimate_propensity_ratio() now derives D labels from
mask_g[combined_mask] instead of concatenating ones/zeros, ensuring
correct alignment regardless of unit ordering. Also short-circuits
r_{g,g}(X) = 1 for same-cohort comparisons under PT-All.

P1 fix: pscore_trim is now preserved in EfficientDiDResults, matching
the staggered_results pattern for parameter traceability.

P1 fix: add shuffled-unit regression test that would have caught the
label bug, plus aggregation tests (event_study, group, all) and
PT-All bootstrap test for full covariate-path coverage.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/efficient_did.py            |  1 +
 diff_diff/efficient_did_covariates.py |  8 ++-
 diff_diff/efficient_did_results.py    |  1 +
 tests/test_efficient_did.py           | 90 +++++++++++++++++++++++++++
 4 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
index 84d26c6b..a7944e93 100644
--- a/diff_diff/efficient_did.py
+++ b/diff_diff/efficient_did.py
@@ -687,6 +687,7 @@ def fit(
             influence_functions=None,  # can store full EIF matrix if needed
             bootstrap_results=bootstrap_results,
             estimation_path="dr" if use_covariates else "nocov",
+            pscore_trim=self.pscore_trim,
         )
         self.is_fitted_ = True
         return self.results_
diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
index b5f4aa6c..134b6b27 100644
--- a/diff_diff/efficient_did_covariates.py
+++ b/diff_diff/efficient_did_covariates.py
@@ -129,11 +129,15 @@ def estimate_propensity_ratio(
     n_gp = int(np.sum(mask_gp))
     n_units = len(covariate_matrix)
 
+    # Short-circuit: r_{g,g}(X) = 1 for same-cohort comparisons (PT-All)
+    if np.array_equal(mask_g, mask_gp):
+        return np.ones(n_units)
+
     # Stack covariates for the two groups
     combined_mask = mask_g | mask_gp
     X_combined = covariate_matrix[combined_mask]
-    # Treatment indicator: 1 for group g, 0 for group g'
-    D = np.concatenate([np.ones(n_g), np.zeros(n_gp)])
+    # Treatment indicator: derive from mask_g so labels align with row order
+    D = mask_g[combined_mask].astype(float)
 
     try:
         beta, pscore_combined = solve_logit(X_combined, D, rank_deficient_action="warn")
diff --git a/diff_diff/efficient_did_results.py b/diff_diff/efficient_did_results.py
index 0d307a40..812451c0 100644
--- a/diff_diff/efficient_did_results.py
+++ b/diff_diff/efficient_did_results.py
@@ -105,6 +105,7 @@ class EfficientDiDResults:
     influence_functions: Optional["np.ndarray"] = field(default=None, repr=False)
     bootstrap_results: Optional["EDiDBootstrapResults"] = field(default=None, repr=False)
     estimation_path: str = "nocov"
+    pscore_trim: float = 0.01
 
     def __repr__(self) -> str:
         sig = _get_significance_stars(self.overall_p_value)
diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py
index fd005793..a0bcf7af 100644
--- a/tests/test_efficient_did.py
+++ b/tests/test_efficient_did.py
@@ -1225,6 +1225,51 @@ def test_covariates_pt_all(self):
         assert result.estimation_path == "dr"
         assert np.isfinite(result.overall_att)
 
+    def test_covariates_aggregate_event_study(self):
+        df = _make_covariate_panel()
+        result = EfficientDiD(pt_assumption="post").fit(
+            df,
+            "y",
+            "unit",
+            "time",
+            "first_treat",
+            covariates=["x1"],
+            aggregate="event_study",
+        )
+        assert result.event_study_effects is not None
+        assert len(result.event_study_effects) > 0
+        for e, eff in result.event_study_effects.items():
+            assert np.isfinite(eff["effect"])
+
+    def test_covariates_aggregate_group(self):
+        df = _make_covariate_panel()
+        result = EfficientDiD(pt_assumption="post").fit(
+            df,
+            "y",
+            "unit",
+            "time",
+            "first_treat",
+            covariates=["x1"],
+            aggregate="group",
+        )
+        assert result.group_effects is not None
+        assert len(result.group_effects) > 0
+
+    def test_covariates_aggregate_all(self):
+        df = _make_covariate_panel()
+        result = EfficientDiD(pt_assumption="post").fit(
+            df,
+            "y",
+            "unit",
+            "time",
+            "first_treat",
+            covariates=["x1"],
+            aggregate="all",
+        )
+        assert result.event_study_effects is not None
+        assert result.group_effects is not None
+        assert np.isfinite(result.overall_att)
+
 
 class TestCovariatesEdgeCases:
     """Tier 2: edge cases for covariate path."""
@@ -1258,6 +1303,33 @@ def test_many_covariates(self):
         )
         assert np.isfinite(result.overall_att)
 
+    def test_shuffled_units_match_ordered(self):
+        """Shuffled unit ordering must produce same ATT as original ordering.
+
+        Regression test for P0 label-alignment bug in estimate_propensity_ratio:
+        D labels must follow the row order of combined_mask, not assume
+        g-units come before g'-units.
+        """
+        df_ordered = _make_covariate_panel(n_units=300, seed=55)
+        # Shuffle: randomize unit IDs so cohorts are interleaved
+        rng = np.random.default_rng(55)
+        df_shuffled = df_ordered.copy()
+        units = df_shuffled["unit"].unique()
+        perm = rng.permutation(len(units))
+        unit_map = dict(zip(units, perm))
+        df_shuffled["unit"] = df_shuffled["unit"].map(unit_map)
+        df_shuffled = df_shuffled.sort_values(["unit", "time"]).reset_index(drop=True)
+
+        edid = EfficientDiD(pt_assumption="post")
+        r_ordered = edid.fit(df_ordered, "y", "unit", "time", "first_treat", covariates=["x1"])
+        r_shuffled = EfficientDiD(pt_assumption="post").fit(
+            df_shuffled, "y", "unit", "time", "first_treat", covariates=["x1"]
+        )
+        assert abs(r_ordered.overall_att - r_shuffled.overall_att) < 1e-10, (
+            f"ATT mismatch: ordered={r_ordered.overall_att:.6f} "
+            f"vs shuffled={r_shuffled.overall_att:.6f}"
+        )
+
     def test_near_separation_trimming_warns(self):
         """Near-separation covariates should trigger overlap warning."""
         df = _make_covariate_panel(n_units=300, seed=77)
@@ -1298,3 +1370,21 @@ def test_bootstrap_with_covariates_smoke(self):
         ci = result.overall_conf_int
         assert ci[0] < ci[1], "CI lower must be less than upper"
         assert np.isfinite(result.overall_p_value)
+
+    def test_covariates_pt_all_bootstrap(self):
+        """PT-All + bootstrap + covariates end-to-end."""
+        df = _make_covariate_panel(n_units=300)
+        result = EfficientDiD(pt_assumption="all", n_bootstrap=99, seed=42).fit(
+            df,
+            "y",
+            "unit",
+            "time",
+            "first_treat",
+            covariates=["x1"],
+            aggregate="all",
+        )
+        assert result.bootstrap_results is not None
+        assert result.event_study_effects is not None
+        assert result.group_effects is not None
+        assert np.isfinite(result.overall_att)
+        assert result.overall_se > 0

From 86492b0272d9d5a9613e6f25f35be9ec7c78b3a4 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 16:02:33 -0400
Subject: [PATCH 03/12] Address AI review round 2: document fallback deviation,
 fix confounding DGP

- Document propensity-ratio constant-fallback as deviation in REGISTRY.md
- Fix confounding DGP: X-dependent trends apply to all groups (not just
  treated), x1 distribution shifts by group; conditional PT now holds
- Test asserts DR bias < nocov bias against known true ATT
- Add logit-failure regression test via mock
- Clarify docstring: covariate path uses unconditional Omega*, does not
  achieve full semiparametric efficiency bound
- Fix pre-existing E402 lint: move edid_dgp import to top of file

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/efficient_did.py   |  4 +++
 docs/methodology/REGISTRY.md |  1 +
 tests/helpers/edid_dgp.py    | 27 +++++++++++---------
 tests/test_efficient_did.py  | 49 ++++++++++++++++++++++++++----------
 4 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
index a7944e93..02d53b03 100644
--- a/diff_diff/efficient_did.py
+++ b/diff_diff/efficient_did.py
@@ -52,6 +52,10 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
     Without covariates, uses a closed-form estimator based on within-group
     sample means and covariances.  With covariates, uses the doubly robust
     path: outcome regression via OLS plus propensity score ratios via logit.
+    The covariate path uses unconditional Omega* for pair weights (not the
+    kernel-smoothed conditional Omega*(X) from the paper), so it does not
+    achieve the full semiparametric efficiency bound but remains consistent
+    and doubly robust.
 
     Parameters
     ----------
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 11e288ce..4f1c2899 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -618,6 +618,7 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - **Single pre-treatment period (g=2)**: `V*_{gt}(X)` is 1x1, efficient weights are trivially 1, estimator collapses to standard DiD with single baseline
 - **Rank deficiency in `V*_{gt}(X)` or `Omega*_{gt}(X)`**: Inverse does not exist if outcome changes are linearly dependent conditional on covariates. Detect via matrix condition number; fall back to pseudoinverse or standard estimator
 - **Near-zero propensity scores**: Ratio `p_g(X)/p_{g'}(X)` explodes. Overlap assumption (O) rules this out in population; implement trimming or warn on finite-sample instability
+- **Note:** On propensity score estimation failure (logit convergence/separation), the estimator falls back to the unconditional population fraction ratio pi_g / pi_{g'}, matching the CallawaySantAnna fallback pattern (staggered.py:1516-1525). This replaces X-dependent propensity ratios with a constant, reducing to the no-covariates generated outcome for the affected (g', t_pre) pair. The DR property ensures consistency as long as the outcome regression is correctly specified.
 - **All units eventually treated**: Last cohort serves as "never-treated" by dropping last time period (Phase 1: raises ValueError; last-cohort-as-control fallback planned for Phase 2)
 - **Negative weights**: Explicitly stated as harmless for bias and beneficial for precision; arise from efficiency optimization under overidentification (Section 5.2)
 - **PT-Post regime (just-identified)**: Under PT-Post, EDiD automatically reduces to standard single-baseline estimator (Corollary 3.2). No downside to using EDiD -- it subsumes standard estimators
diff --git a/tests/helpers/edid_dgp.py b/tests/helpers/edid_dgp.py
index 103a7304..b79959ed 100644
--- a/tests/helpers/edid_dgp.py
+++ b/tests/helpers/edid_dgp.py
@@ -48,7 +48,15 @@ def make_compustat_dgp(
     ft[n_g5 : n_g5 + n_g8] = 8
 
     # Generate unit-level covariates (time-invariant)
-    x1 = rng.normal(0, 1, n_units)
+    # When confounding_strength > 0, shift x1 distribution by group so
+    # treated units have higher x1 on average (selection on observables).
+    if confounding_strength > 0:
+        x1 = np.empty(n_units)
+        x1[:n_g5] = rng.normal(1.0, 1.0, n_g5)  # G=5: higher x1
+        x1[n_g5 : n_g5 + n_g8] = rng.normal(0.5, 1.0, n_g8)  # G=8: moderate
+        x1[n_g5 + n_g8 :] = rng.normal(0.0, 1.0, n_units - n_g5 - n_g8)  # G=inf: baseline
+    else:
+        x1 = rng.normal(0, 1, n_units)
     x2 = rng.binomial(1, 0.5, n_units)
 
     units = np.repeat(np.arange(n_units), n_t)
@@ -86,17 +94,12 @@ def make_compustat_dgp(
         y += covariate_effect * x1_expanded
 
         if confounding_strength > 0:
-            # Confounding: x1 interacts with group membership to create
-            # differential trends. Parallel trends hold conditional on X
-            # but not unconditionally.
-            for i in range(n_units):
-                g = ft[i]
-                if np.isinf(g):
-                    continue
-                for t_idx in range(n_t):
-                    t_val = t_idx + 1
-                    # Group-specific time trend that depends on x1
-                    y[i * n_t + t_idx] += confounding_strength * x1[i] * (t_val - 1) / n_t
+            # Confounding: x1-dependent time trend applied to ALL units.
+            # Conditional on x1, all groups share the same trend → conditional PT holds.
+            # Unconditional PT fails because treated/control have different x1 distributions.
+            x1_expanded = np.repeat(x1, n_t)
+            time_trend = np.tile(np.arange(n_t), n_units) / n_t
+            y += confounding_strength * x1_expanded * time_trend
 
     result = pd.DataFrame({"unit": units, "time": times, "first_treat": ft_col, "y": y})
 
diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py
index a0bcf7af..84af5156 100644
--- a/tests/test_efficient_did.py
+++ b/tests/test_efficient_did.py
@@ -13,6 +13,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from edid_dgp import make_compustat_dgp
 
 from diff_diff import CallawaySantAnna, EDiD, EfficientDiD
 from diff_diff.efficient_did_results import EfficientDiDResults
@@ -112,9 +113,6 @@ def _make_staggered_panel(
     )
 
 
-from edid_dgp import make_compustat_dgp
-
-
 def _make_compustat_dgp(n_units=400, n_periods=11, rho=0.0, seed=42):
     """Delegate to shared DGP in edid_dgp.py."""
     return make_compustat_dgp(n_units=n_units, n_periods=n_periods, rho=rho, seed=seed)
@@ -1137,26 +1135,33 @@ def test_covariates_produce_valid_se(self):
         assert abs(r_cov.overall_att - r_nocov.overall_att) < 0.2
 
     def test_covariates_recover_effect_under_confounding(self):
-        """DGP with confounding: DR should recover true ATT better than nocov."""
+        """DGP with confounding: DR should recover true ATT closer to truth than nocov.
+
+        The DGP adds x1-dependent time trends to ALL units and shifts x1
+        distribution by group, so unconditional PT fails but conditional PT holds.
+        True ATT is unchanged by confounding (only levels shift, not treatment).
+        """
+        from edid_dgp import true_overall_att
+
+        true_att = true_overall_att()
         df = _make_covariate_panel(
-            n_units=600,
+            n_units=900,
             covariate_effect=1.0,
-            confounding_strength=1.0,
+            confounding_strength=2.0,
             seed=123,
         )
         r_nocov = EfficientDiD(pt_assumption="post").fit(df, "y", "unit", "time", "first_treat")
         r_cov = EfficientDiD(pt_assumption="post").fit(
             df, "y", "unit", "time", "first_treat", covariates=["x1"]
         )
-        # Both should produce finite results
         assert np.isfinite(r_nocov.overall_att)
         assert np.isfinite(r_cov.overall_att)
-        # DR should be closer to the known treatment effect structure
-        # (we can't compute exact true ATT for confounded DGP easily,
-        # but we verify the DR path runs without error and produces
-        # different results from nocov)
-        assert r_cov.estimation_path == "dr"
-        assert r_nocov.estimation_path == "nocov"
+        # DR should be closer to the true ATT than nocov
+        bias_nocov = abs(r_nocov.overall_att - true_att)
+        bias_cov = abs(r_cov.overall_att - true_att)
+        assert (
+            bias_cov < bias_nocov
+        ), f"DR bias ({bias_cov:.4f}) should be smaller than nocov bias ({bias_nocov:.4f})"
 
     def test_empty_covariates_uses_nocov(self):
         """covariates=[] should normalize to nocov path."""
@@ -1303,6 +1308,24 @@ def test_many_covariates(self):
         )
         assert np.isfinite(result.overall_att)
 
+    def test_logit_failure_fallback(self):
+        """Logit failure should fall back to population fraction ratio with warning."""
+        from unittest.mock import patch
+
+        df = _make_covariate_panel(n_units=300, seed=88)
+        # Patch solve_logit to raise, simulating convergence failure
+        with patch(
+            "diff_diff.efficient_did_covariates.solve_logit",
+            side_effect=np.linalg.LinAlgError("Simulated logit failure"),
+        ):
+            with pytest.warns(UserWarning, match="Propensity score estimation failed"):
+                result = EfficientDiD(pt_assumption="post").fit(
+                    df, "y", "unit", "time", "first_treat", covariates=["x1"]
+                )
+        # Should still produce valid finite results via fallback
+        assert np.isfinite(result.overall_att)
+        assert result.overall_se > 0
+
     def test_shuffled_units_match_ordered(self):
         """Shuffled unit ordering must produce same ATT as original ordering.
 

From 74d7723c43c86a6c8783c74abe132d2960f6bda0 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 16:48:24 -0400
Subject: [PATCH 04/12] Replace logit with sieve ratio estimation and add
 conditional Omega*

Implements paper-faithful Section 4 nuisance estimation:

Sieve-based propensity ratio (Eq 4.1-4.2):
- Closed-form linear system per polynomial degree K
- AIC/BIC model selection across sieve dimensions
- Ratio clipping to [1/ratio_clip, ratio_clip] for positivity
- Eliminates logit convergence/separation issues entirely

Kernel-smoothed conditional Omega*(X) (Eq 3.12):
- Nadaraya-Watson estimator with Gaussian kernel and local means
- Silverman bandwidth by default, user-overridable
- Per-unit efficient weights w(X_i) from Omega*(X_i) inverse
- Plug-in EIF valid under Neyman orthogonality (Remark 4.2)

New params: sieve_k_max, sieve_criterion, ratio_clip, kernel_bandwidth
Removed: pscore_trim (replaced by ratio_clip)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 TODO.md                               |   2 -
 diff_diff/efficient_did.py            |  97 +++--
 diff_diff/efficient_did_covariates.py | 560 ++++++++++++++++++++------
 diff_diff/efficient_did_results.py    |   5 +-
 docs/methodology/REGISTRY.md          |  10 +-
 tests/test_efficient_did.py           |  68 ++--
 6 files changed, 544 insertions(+), 198 deletions(-)

diff --git a/TODO.md b/TODO.md
index 7e78854f..41dda2c6 100644
--- a/TODO.md
+++ b/TODO.md
@@ -53,8 +53,6 @@ Deferred items from PR reviews that were not addressed before merge.
 | EfficientDiD: warn when cohort share is very small (< 2 units or < 1% of sample) — inverted in Omega*/EIF | `efficient_did_weights.py` | #192 | Low |
 | EfficientDiD: API docs / tutorial page for new public estimator | `docs/` | #192 | Medium |
 | Multi-absorb weighted demeaning needs iterative alternating projections for N > 1 absorbed FE with survey weights; unweighted multi-absorb also uses single-pass (pre-existing, exact only for balanced panels) | `estimators.py` | #218 | Medium |
-| EfficientDiD: sieve-based propensity ratio estimation (Eq 4.1-4.2) with AIC/BIC selection — currently uses standard logit | `efficient_did_covariates.py` | — | Low |
-| EfficientDiD: kernel-smoothed conditional covariance Omega*(X) for X-dependent efficient weights — currently uses unconditional Omega* | `efficient_did_covariates.py` | — | Low |
 | TripleDifference power: `generate_ddd_data` is a fixed 2×2×2 cross-sectional DGP — no multi-period or unbalanced-group support. Add a `generate_ddd_panel_data` for panel DDD power analysis. | `prep_dgp.py`, `power.py` | #208 | Low |
 
 #### Performance
diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
index 02d53b03..25e1a658 100644
--- a/diff_diff/efficient_did.py
+++ b/diff_diff/efficient_did.py
@@ -25,9 +25,10 @@
 from diff_diff.efficient_did_covariates import (
     compute_eif_cov,
     compute_generated_outcomes_cov,
-    compute_omega_star_cov,
+    compute_omega_star_conditional,
+    compute_per_unit_weights,
     estimate_outcome_regression,
-    estimate_propensity_ratio,
+    estimate_propensity_ratio_sieve,
 )
 from diff_diff.efficient_did_results import EfficientDiDResults
 from diff_diff.efficient_did_weights import (
@@ -51,11 +52,9 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
 
     Without covariates, uses a closed-form estimator based on within-group
     sample means and covariances.  With covariates, uses the doubly robust
-    path: outcome regression via OLS plus propensity score ratios via logit.
-    The covariate path uses unconditional Omega* for pair weights (not the
-    kernel-smoothed conditional Omega*(X) from the paper), so it does not
-    achieve the full semiparametric efficiency bound but remains consistent
-    and doubly robust.
+    path: sieve-based propensity score ratios (Eq 4.1-4.2) with AIC/BIC
+    selection, OLS outcome regression, and kernel-smoothed conditional
+    Omega*(X) for per-unit efficient weights.
 
     Parameters
     ----------
@@ -77,9 +76,16 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
     anticipation : int, default 0
         Number of anticipation periods (shifts the effective treatment
         boundary forward by this amount).
-    pscore_trim : float, default 0.01
-        Propensity scores are clipped to ``[pscore_trim, 1-pscore_trim]``
-        before ratio computation.  Only used when covariates are provided.
+    sieve_k_max : int or None
+        Maximum polynomial degree for sieve ratio estimation. None = auto
+        (``min(floor(n_gp^{1/5}), 5)``). Only used with covariates.
+    sieve_criterion : str, default ``"bic"``
+        Information criterion for sieve degree selection: ``"aic"`` or ``"bic"``.
+    ratio_clip : float, default 20.0
+        Clip sieve propensity ratios to ``[1/ratio_clip, ratio_clip]``.
+    kernel_bandwidth : float or None
+        Bandwidth for Gaussian kernel in conditional Omega* estimation.
+        None = Silverman's rule-of-thumb (automatic).
 
     Examples
     --------
@@ -99,7 +105,10 @@ def __init__(
         bootstrap_weights: str = "rademacher",
         seed: Optional[int] = None,
         anticipation: int = 0,
-        pscore_trim: float = 0.01,
+        sieve_k_max: Optional[int] = None,
+        sieve_criterion: str = "bic",
+        ratio_clip: float = 20.0,
+        kernel_bandwidth: Optional[float] = None,
     ):
         if cluster is not None:
             raise NotImplementedError(
@@ -113,7 +122,10 @@ def __init__(
         self.bootstrap_weights = bootstrap_weights
         self.seed = seed
         self.anticipation = anticipation
-        self.pscore_trim = pscore_trim
+        self.sieve_k_max = sieve_k_max
+        self.sieve_criterion = sieve_criterion
+        self.ratio_clip = ratio_clip
+        self.kernel_bandwidth = kernel_bandwidth
         self.is_fitted_ = False
         self.results_: Optional[EfficientDiDResults] = None
         self._validate_params()
@@ -128,8 +140,12 @@ def _validate_params(self) -> None:
                 f"bootstrap_weights must be one of {valid_weights}, "
                 f"got '{self.bootstrap_weights}'"
             )
-        if not (0 < self.pscore_trim < 0.5):
-            raise ValueError(f"pscore_trim must be in (0, 0.5), got {self.pscore_trim}")
+        if self.sieve_criterion not in ("aic", "bic"):
+            raise ValueError(
+                f"sieve_criterion must be 'aic' or 'bic', got '{self.sieve_criterion}'"
+            )
+        if self.ratio_clip <= 1.0:
+            raise ValueError(f"ratio_clip must be > 1.0, got {self.ratio_clip}")
 
     # -- sklearn compatibility ------------------------------------------------
 
@@ -143,7 +159,10 @@ def get_params(self) -> Dict[str, Any]:
             "n_bootstrap": self.n_bootstrap,
             "bootstrap_weights": self.bootstrap_weights,
             "seed": self.seed,
-            "pscore_trim": self.pscore_trim,
+            "sieve_k_max": self.sieve_k_max,
+            "sieve_criterion": self.sieve_criterion,
+            "ratio_clip": self.ratio_clip,
+            "kernel_bandwidth": self.kernel_bandwidth,
         }
 
     def set_params(self, **params: Any) -> "EfficientDiD":
@@ -457,18 +476,20 @@ def fit(
                                 tpre_col_val,
                                 effective_p1_col,
                             )
-                        # r_{g, inf}(X) and r_{g, g'}(X)
+                        # r_{g, inf}(X) and r_{g, g'}(X) via sieve (Eq 4.1-4.2)
                         for comp in {np.inf, gp}:
                             rkey = (g, comp)
                             if rkey not in r_hat_cache:
                                 comp_mask = (
                                     never_treated_mask if np.isinf(comp) else cohort_masks[comp]
                                 )
-                                r_hat_cache[rkey] = estimate_propensity_ratio(
+                                r_hat_cache[rkey] = estimate_propensity_ratio_sieve(
                                     covariate_matrix,
                                     cohort_masks[g],
                                     comp_mask,
-                                    pscore_trim=self.pscore_trim,
+                                    k_max=self.sieve_k_max,
+                                    criterion=self.sieve_criterion,
+                                    ratio_clip=self.ratio_clip,
                                 )
 
                     # Per-unit DR generated outcomes: shape (n_units, H)
@@ -486,23 +507,34 @@ def fit(
                         r_hat_cache=r_hat_cache,
                     )
 
-                    # Average per pair → scalar generated outcomes
                     y_hat = np.mean(gen_out, axis=0)  # shape (H,)
 
-                    # Unconditional Omega* from per-unit generated outcomes
-                    omega = compute_omega_star_cov(gen_out)
+                    # Conditional Omega*(X): (n_units, H, H)
+                    omega_cond = compute_omega_star_conditional(
+                        target_g=g,
+                        target_t=t,
+                        valid_pairs=pairs,
+                        outcome_wide=outcome_wide,
+                        cohort_masks=cohort_masks,
+                        never_treated_mask=never_treated_mask,
+                        period_to_col=period_to_col,
+                        period_1_col=effective_p1_col,
+                        cohort_fractions=cohort_fractions,
+                        covariate_matrix=covariate_matrix,
+                        bandwidth=self.kernel_bandwidth,
+                    )
 
-                    # Efficient weights
-                    weights, _, cond_num = compute_efficient_weights(omega)
-                    stored_weights[(g, t)] = weights
-                    if omega.size > 0:
-                        stored_cond[(g, t)] = cond_num
+                    # Per-unit weights: (n_units, H)
+                    per_unit_w = compute_per_unit_weights(omega_cond)
 
-                    # ATT(g,t) = w @ y_hat
-                    att_gt = float(weights @ y_hat) if len(weights) > 0 else np.nan
+                    # ATT = mean_i( w(X_i) @ gen_out[i] )
+                    if per_unit_w.shape[1] > 0:
+                        att_gt = float(np.mean(np.sum(per_unit_w * gen_out, axis=1)))
+                    else:
+                        att_gt = np.nan
 
-                    # EIF from DR generated outcomes
-                    eif_vals = compute_eif_cov(weights, gen_out, y_hat, n_units)
+                    # EIF with per-unit weights (Remark 4.2: plug-in valid)
+                    eif_vals = compute_eif_cov(per_unit_w, gen_out, y_hat, n_units)
                     eif_by_gt[(g, t)] = eif_vals
                 else:
                     # No-covariates path (closed-form)
@@ -691,7 +723,10 @@ def fit(
             influence_functions=None,  # can store full EIF matrix if needed
             bootstrap_results=bootstrap_results,
             estimation_path="dr" if use_covariates else "nocov",
-            pscore_trim=self.pscore_trim,
+            sieve_k_max=self.sieve_k_max,
+            sieve_criterion=self.sieve_criterion,
+            ratio_clip=self.ratio_clip,
+            kernel_bandwidth=self.kernel_bandwidth,
         )
         self.is_fitted_ = True
         return self.results_
diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
index 134b6b27..c48b1b1e 100644
--- a/diff_diff/efficient_did_covariates.py
+++ b/diff_diff/efficient_did_covariates.py
@@ -2,7 +2,8 @@
 Doubly robust math for the Efficient DiD estimator (with covariates).
 
 Implements the with-covariates path from Chen, Sant'Anna & Xie (2025):
-outcome regression via OLS, propensity score ratios via logistic regression,
+outcome regression via OLS, sieve-based propensity score ratios (Eq 4.1-4.2),
+kernel-smoothed conditional Omega*(X) for per-unit efficient weights,
 doubly robust generated outcomes (Eq 4.4), and the efficient influence
 function for analytical standard errors.
 
@@ -10,15 +11,18 @@
 """
 
 import warnings
-from typing import Dict, List, Tuple
+from itertools import combinations_with_replacement
+from math import comb
+from typing import Dict, List, Optional, Tuple
 
 import numpy as np
+from scipy.spatial.distance import cdist
 
-from diff_diff.linalg import (
-    _check_propensity_diagnostics,
-    solve_logit,
-    solve_ols,
-)
+from diff_diff.linalg import solve_ols
+
+# ---------------------------------------------------------------------------
+# Outcome regression
+# ---------------------------------------------------------------------------
 
 
 def estimate_outcome_regression(
@@ -52,15 +56,12 @@ def estimate_outcome_regression(
     m_hat : ndarray, shape (n_units,)
         Predicted ``E[Y_t - Y_{tpre} | X]`` for every unit.
     """
-    # Dependent variable: outcome change within the comparison group
     Y_group = outcome_wide[group_mask]
     delta_y = Y_group[:, t_col] - Y_group[:, tpre_col]
 
-    # Design matrix with intercept for the group
     X_group = covariate_matrix[group_mask]
     X_design = np.column_stack([np.ones(len(X_group)), X_group])
 
-    # Fit OLS — we only need coefficients, not vcov
     coef, _, _ = solve_ols(
         X_design,
         delta_y,
@@ -68,15 +69,10 @@ def estimate_outcome_regression(
         rank_deficient_action="warn",
     )
 
-    # Predict for all units
     X_all = np.column_stack([np.ones(len(covariate_matrix)), covariate_matrix])
-
-    # Handle NaN coefficients from rank-deficient fits: set NaN coefs to 0
-    # so prediction degrades gracefully (those terms contribute nothing)
     coef_safe = np.where(np.isfinite(coef), coef, 0.0)
     m_hat = X_all @ coef_safe
 
-    # Guard against non-finite predictions
     non_finite = ~np.isfinite(m_hat)
     if non_finite.any():
         n_bad = int(non_finite.sum())
@@ -91,83 +87,163 @@ def estimate_outcome_regression(
     return m_hat
 
 
-def estimate_propensity_ratio(
+# ---------------------------------------------------------------------------
+# Sieve-based propensity ratio estimation (Eq 4.1-4.2)
+# ---------------------------------------------------------------------------
+
+
+def _polynomial_sieve_basis(X: np.ndarray, degree: int) -> np.ndarray:
+    """Build polynomial sieve basis up to total degree K.
+
+    For d covariates and degree K, includes all monomials
+    ``X_1^{a_1} * ... * X_d^{a_d}`` where ``a_1 + ... + a_d <= K``,
+    including the intercept term (degree 0).
+
+    Standardizes X to zero mean, unit variance for numerical stability.
+
+    Parameters
+    ----------
+    X : ndarray, shape (n, d)
+        Covariate matrix.
+    degree : int
+        Maximum total polynomial degree.
+
+    Returns
+    -------
+    basis : ndarray, shape (n, n_basis)
+        Sieve basis matrix. ``n_basis = C(K+d, d)``.
+    """
+    n, d = X.shape
+
+    # Standardize for numerical stability
+    X_mean = X.mean(axis=0)
+    X_std = X.std(axis=0)
+    X_std[X_std < 1e-10] = 1.0  # avoid division by zero for constant columns
+    X_s = (X - X_mean) / X_std
+
+    # Build monomials: enumerate all (a_1, ..., a_d) with sum <= degree
+    columns = [np.ones(n)]  # degree-0 (intercept)
+    for total_deg in range(1, degree + 1):
+        for exponents in combinations_with_replacement(range(d), total_deg):
+            col = np.ones(n)
+            for idx in exponents:
+                col = col * X_s[:, idx]
+            columns.append(col)
+
+    return np.column_stack(columns)
+
+
+def estimate_propensity_ratio_sieve(
     covariate_matrix: np.ndarray,
     mask_g: np.ndarray,
     mask_gp: np.ndarray,
-    pscore_trim: float = 0.01,
+    k_max: Optional[int] = None,
+    criterion: str = "bic",
+    ratio_clip: float = 20.0,
 ) -> np.ndarray:
-    r"""Estimate propensity score ratio r_{g,g'}(X) = p_g(X) / p_{g'}(X).
+    r"""Estimate propensity ratio via sieve convex minimization (Eq 4.1-4.2).
+
+    Solves for each sieve degree K = 1, ..., k_max:
+
+    .. math::
+        \hat\beta_K = \arg\min_{\beta} \frac{1}{n}
+            \sum_i \bigl[ G_{g',i} (\psi^K(X_i)'\beta)^2
+            - 2 G_{g,i} (\psi^K(X_i)'\beta) \bigr]
 
-    Fits binary logistic regression on units in ``{g, g'}`` with ``D=1``
-    for ``G=g`` and ``D=0`` for ``G=g'``.  The fitted probability is
-    ``pscore = P(G=g | G in {g,g'}, X)``, and the ratio is computed as
-    ``pscore / (1 - pscore)`` (conditional odds).
+    The FOC gives a closed-form linear system (no iterative optimization):
+    ``(Psi_{g'}' Psi_{g'}) beta = Psi_g.sum(axis=0)``.
 
-    On logit failure (convergence, separation, LinAlgError), falls back to
-    the unconditional population fraction ratio ``n_g / n_{g'}``.
+    Selects K via AIC/BIC: ``IC(K) = 2*loss(K) + C_n*K/n``.
+
+    On singular basis: tries lower K.  Short-circuits r_{g,g}(X) = 1.
 
     Parameters
     ----------
     covariate_matrix : ndarray, shape (n_units, n_covariates)
-        Unit-level covariates.
     mask_g : ndarray of bool, shape (n_units,)
-        Mask for the target treatment group.
+        Target treatment group mask.
     mask_gp : ndarray of bool, shape (n_units,)
-        Mask for the comparison group.
-    pscore_trim : float, default 0.01
-        Propensity scores are clipped to ``[pscore_trim, 1-pscore_trim]``
-        before ratio computation.
+        Comparison group mask.
+    k_max : int or None
+        Maximum polynomial degree. None = ``min(floor(n_gp^{1/5}), 5)``.
+    criterion : str
+        ``"aic"`` or ``"bic"``.
+    ratio_clip : float
+        Clip ratios to ``[1/ratio_clip, ratio_clip]``.
 
     Returns
     -------
     ratio : ndarray, shape (n_units,)
-        Estimated ``r_{g,g'}(X_i)`` for every unit (extrapolated from
-        the fit on ``{g, g'}`` units).
+        Estimated ``r_{g,g'}(X_i)`` for every unit.
     """
-    n_g = int(np.sum(mask_g))
-    n_gp = int(np.sum(mask_gp))
     n_units = len(covariate_matrix)
+    n_gp = int(np.sum(mask_gp))
 
     # Short-circuit: r_{g,g}(X) = 1 for same-cohort comparisons (PT-All)
     if np.array_equal(mask_g, mask_gp):
         return np.ones(n_units)
 
-    # Stack covariates for the two groups
-    combined_mask = mask_g | mask_gp
-    X_combined = covariate_matrix[combined_mask]
-    # Treatment indicator: derive from mask_g so labels align with row order
-    D = mask_g[combined_mask].astype(float)
-
-    try:
-        beta, pscore_combined = solve_logit(X_combined, D, rank_deficient_action="warn")
-        _check_propensity_diagnostics(pscore_combined, pscore_trim)
-
-        # Predict for all units using the logit coefficients
-        X_all_with_intercept = np.column_stack([np.ones(n_units), covariate_matrix])
-        # Handle NaN coefficients from rank-deficient fits
-        beta_safe = np.where(np.isfinite(beta), beta, 0.0)
-        z = X_all_with_intercept @ beta_safe
-        z = np.clip(z, -500, 500)
-        pscore_all = 1.0 / (1.0 + np.exp(-z))
-
-    except (np.linalg.LinAlgError, ValueError):
-        warnings.warn(
-            "Propensity score estimation failed for a group pair. "
-            "Falling back to unconditional population fraction ratio.",
-            UserWarning,
-            stacklevel=2,
-        )
-        # Fallback: constant ratio n_g / n_gp for all units
-        flat_p = n_g / (n_g + n_gp) if (n_g + n_gp) > 0 else 0.5
-        pscore_all = np.full(n_units, flat_p)
+    d = covariate_matrix.shape[1]
+
+    # Default k_max: use comparison group size, not total n
+    if k_max is None:
+        k_max = min(int(n_gp**0.2), 5)
+    k_max = max(k_max, 1)
+
+    # Penalty multiplier for IC
+    n_total = int(np.sum(mask_g)) + n_gp
+    c_n = 2.0 if criterion == "aic" else np.log(max(n_total, 2))
+
+    best_ic = np.inf
+    best_ratio = np.ones(n_units)  # fallback: constant ratio 1
+
+    for K in range(1, k_max + 1):
+        n_basis = comb(K + d, d)
+
+        # Cap K so basis dimension < n_gp (avoid singular system)
+        if n_basis >= n_gp:
+            break
+
+        basis_all = _polynomial_sieve_basis(covariate_matrix, K)
+        Psi_gp = basis_all[mask_gp]  # (n_gp, n_basis)
+        Psi_g = basis_all[mask_g]  # (n_g, n_basis)
+
+        # Normal equations: (Psi_gp' Psi_gp) beta = Psi_g.sum(axis=0)
+        A = Psi_gp.T @ Psi_gp
+        b = Psi_g.sum(axis=0)
 
-    # Trim propensity scores
-    pscore_all = np.clip(pscore_all, pscore_trim, 1.0 - pscore_trim)
+        try:
+            beta = np.linalg.solve(A, b)
+        except np.linalg.LinAlgError:
+            continue  # singular — try next K
 
-    # Ratio: pscore / (1 - pscore) = conditional odds
-    ratio = pscore_all / (1.0 - pscore_all)
-    return ratio
+        # Check for NaN/Inf in solution
+        if not np.all(np.isfinite(beta)):
+            continue
+
+        # Predicted ratio for all units
+        r_hat = basis_all @ beta
+
+        # IC selection: loss at optimum = -(1/n) * b'beta
+        # Derivation: L(beta) = (1/n)(beta'A*beta - 2*b'beta).
+        # At optimum A*beta = b, so beta'A*beta = b'beta.
+        # Therefore L = (1/n)(b'beta - 2*b'beta) = -(1/n)*b'beta.
+        loss = -float(b @ beta) / n_total
+        ic_val = 2.0 * loss + c_n * n_basis / n_total
+
+        if ic_val < best_ic:
+            best_ic = ic_val
+            best_ratio = r_hat.copy()
+
+    # Clip: population ratio p_g(X)/p_{g'}(X) is non-negative
+    best_ratio = np.clip(best_ratio, 1.0 / ratio_clip, ratio_clip)
+
+    return best_ratio
+
+
+# ---------------------------------------------------------------------------
+# Doubly robust generated outcomes (Eq 4.4)
+# ---------------------------------------------------------------------------
 
 
 def compute_generated_outcomes_cov(
@@ -200,29 +276,6 @@ def compute_generated_outcomes_cov(
             -r_{g,g'}(X_i) * (G_{g',i} / pi_g)
                 * (Y_{i,tpre} - Y_{i,1} - m_{g',tpre,1}(X_i))
 
-    Parameters
-    ----------
-    target_g, target_t : float
-        Target group-time.
-    valid_pairs : list of (g', t_pre)
-        Valid comparison pairs.
-    outcome_wide : ndarray, shape (n_units, n_periods)
-    cohort_masks : dict
-        ``{cohort: bool_mask}``
-    never_treated_mask : ndarray of bool
-    period_to_col : dict
-    period_1_col : int
-        Column index of effective baseline period (Y_1).
-    cohort_fractions : dict
-        ``{cohort: n_cohort / n}``
-    m_hat_cache : dict
-        Outcome regression predictions, keyed by
-        ``(comparison_group, t_col, tpre_col)``.
-    r_hat_cache : dict
-        Propensity score ratios, keyed by ``(target_g, comparison_g)``.
-    never_treated_val : float
-        Sentinel for the never-treated group.
-
     Returns
     -------
     gen_out : ndarray, shape (n_units, H)
@@ -244,23 +297,18 @@ def compute_generated_outcomes_cov(
     for j, (gp, tpre) in enumerate(valid_pairs):
         tpre_col = period_to_col[tpre]
 
-        # Retrieve cached nuisance parameters
-        # m_{inf, t, tpre}(X)
         m_inf_t_tpre = m_hat_cache[(never_treated_val, t_col, tpre_col)]
-        # m_{g', tpre, 1}(X)
         m_gp_tpre_1 = m_hat_cache[(gp, tpre_col, y1_col)]
-        # r_{g, inf}(X)
         r_g_inf = r_hat_cache[(target_g, never_treated_val)]
-        # r_{g, g'}(X)
         r_g_gp = r_hat_cache[(target_g, gp)]
 
-        # ------- Term 1: treated units (G = g) -------
+        # Term 1: treated units
         if pi_g > 0:
             Y_t_minus_Y1 = outcome_wide[g_mask, t_col] - outcome_wide[g_mask, y1_col]
             residual_treated = Y_t_minus_Y1 - m_inf_t_tpre[g_mask] - m_gp_tpre_1[g_mask]
             gen_out[g_mask, j] += (1.0 / pi_g) * residual_treated
 
-        # ------- Term 2: never-treated units (G = inf) -------
+        # Term 2: never-treated units
         pi_inf = cohort_fractions.get(never_treated_val, 0.0)
         if pi_inf > 0:
             Y_t_minus_Ytpre = (
@@ -271,7 +319,7 @@ def compute_generated_outcomes_cov(
                 r_g_inf[never_treated_mask] * (1.0 / pi_g) * residual_inf
             )
 
-        # ------- Term 3: comparison cohort units (G = g') -------
+        # Term 3: comparison cohort units
         if np.isinf(gp):
             gp_mask = never_treated_mask
         else:
@@ -285,38 +333,296 @@ def compute_generated_outcomes_cov(
     return gen_out
 
 
-def compute_omega_star_cov(
-    generated_outcomes: np.ndarray,
+# ---------------------------------------------------------------------------
+# Kernel-smoothed conditional Omega* (Eq 3.12)
+# ---------------------------------------------------------------------------
+
+
+def _silverman_bandwidth(X: np.ndarray) -> float:
+    """Silverman's rule-of-thumb bandwidth for d-dimensional X.
+
+    ``h = (4 / (d + 2))^{1/(d+4)} * median_std * n^{-1/(d+4)}``
+    """
+    n, d = X.shape
+    stds = np.std(X, axis=0)
+    stds[stds < 1e-10] = 1.0
+    median_std = float(np.median(stds))
+    h = (4.0 / (d + 2)) ** (1.0 / (d + 4)) * median_std * n ** (-1.0 / (d + 4))
+    return max(h, 1e-10)
+
+
+def _kernel_weights_matrix(
+    X_all: np.ndarray,
+    X_group: np.ndarray,
+    bandwidth: float,
 ) -> np.ndarray:
-    """Unconditional sample covariance of per-unit DR generated outcomes.
+    """Gaussian kernel weight matrix.
 
-    Uses ``ddof=1`` for consistency with ``_sample_cov()`` in the nocov path.
+    Returns shape ``(n_all, n_group)`` where entry ``[i, j]`` is the
+    normalized kernel weight ``K_h(X_group[j], X_all[i])``.
+
+    Each row sums to 1 (Nadaraya-Watson normalization).
+    """
+    # Squared distances: (n_all, n_group)
+    dist_sq = cdist(X_all, X_group, metric="sqeuclidean")
+    # Gaussian kernel
+    raw = np.exp(-dist_sq / (2.0 * bandwidth**2))
+    # Normalize each row
+    row_sums = raw.sum(axis=1, keepdims=True)
+    row_sums[row_sums < 1e-15] = 1.0  # avoid division by zero
+    return raw / row_sums
+
+
+def _kernel_weighted_cov(
+    A: np.ndarray,
+    B: np.ndarray,
+    W: np.ndarray,
+) -> np.ndarray:
+    """Kernel-weighted local covariance.
 
     Parameters
     ----------
-    generated_outcomes : ndarray, shape (n_units, H)
-        Per-unit generated outcomes from :func:`compute_generated_outcomes_cov`.
+    A : ndarray, shape (n_group,)
+    B : ndarray, shape (n_group,)
+    W : ndarray, shape (n_all, n_group)
+        Normalized kernel weights (rows sum to 1).
+
+    Returns
+    -------
+    cov : ndarray, shape (n_all,)
+        ``Cov_hat(A, B | X_i)`` for each target unit i.
+    """
+    # Local means: (n_all,)
+    A_local = W @ A
+    B_local = W @ B
+
+    # Centered products: (n_all, n_group)
+    A_centered = A[np.newaxis, :] - A_local[:, np.newaxis]  # (n_all, n_group)
+    B_centered = B[np.newaxis, :] - B_local[:, np.newaxis]
+
+    # Weighted local covariance: (n_all,)
+    cov = np.sum(W * A_centered * B_centered, axis=1)
+    return cov
+
+
+def compute_omega_star_conditional(
+    target_g: float,
+    target_t: float,
+    valid_pairs: List[Tuple[float, float]],
+    outcome_wide: np.ndarray,
+    cohort_masks: Dict[float, np.ndarray],
+    never_treated_mask: np.ndarray,
+    period_to_col: Dict[float, int],
+    period_1_col: int,
+    cohort_fractions: Dict[float, float],
+    covariate_matrix: np.ndarray,
+    bandwidth: Optional[float] = None,
+    never_treated_val: float = np.inf,
+) -> np.ndarray:
+    r"""Kernel-smoothed conditional Omega\*(X_i) for each unit (Eq 3.12).
+
+    Estimates the five-term conditional covariance matrix using
+    Nadaraya-Watson kernel regression with Gaussian kernel and
+    local (kernel-weighted) means.
+
+    Parameters
+    ----------
+    target_g, target_t : float
+        Target group-time.
+    valid_pairs : list of (g', t_pre)
+    outcome_wide : ndarray, shape (n_units, n_periods)
+    cohort_masks, never_treated_mask, period_to_col, period_1_col,
+    cohort_fractions : pre-computed data structures
+    covariate_matrix : ndarray, shape (n_units, n_covariates)
+    bandwidth : float or None
+        Kernel bandwidth. None = Silverman's rule.
+    never_treated_val : float
 
     Returns
     -------
-    omega : ndarray, shape (H, H)
-        Covariance matrix.
+    omega : ndarray, shape (n_units, H, H)
+        Per-unit conditional covariance matrices.
     """
-    n, H = generated_outcomes.shape
+    H = len(valid_pairs)
+    n_units = outcome_wide.shape[0]
     if H == 0:
-        return np.empty((0, 0))
-    if n < 2:
-        return np.zeros((H, H))
+        return np.empty((n_units, 0, 0))
+
+    if bandwidth is None:
+        bandwidth = _silverman_bandwidth(covariate_matrix)
+
+    t_col = period_to_col[target_t]
+    y1_col = period_1_col
+
+    g_mask = cohort_masks[target_g]
+    pi_g = cohort_fractions[target_g]
+
+    Y_inf = outcome_wide[never_treated_mask]
+    X_inf = covariate_matrix[never_treated_mask]
+    pi_inf = cohort_fractions.get(never_treated_val, 0.0)
+
+    # Scalability warning
+    if n_units > 5000:
+        warnings.warn(
+            f"Conditional Omega* estimation with n={n_units} is expensive "
+            f"(O(n^2 * H^2)). Consider using fewer units or unconditional Omega*.",
+            UserWarning,
+            stacklevel=2,
+        )
+
+    # Pre-compute kernel weight matrices per group (reused across pairs)
+    # W_g[i, j] = normalized K_h(X_g[j], X_all[i])
+    Y_g = outcome_wide[g_mask]
+    X_g = covariate_matrix[g_mask]
+    Yg_t_minus_1 = Y_g[:, t_col] - Y_g[:, y1_col]
 
-    # Demean
-    means = generated_outcomes.mean(axis=0)  # shape (H,)
-    centered = generated_outcomes - means  # shape (n, H)
+    W_g = _kernel_weights_matrix(covariate_matrix, X_g, bandwidth)
+    W_inf = _kernel_weights_matrix(covariate_matrix, X_inf, bandwidth)
+
+    # Pre-compute per-pair differenced arrays for never-treated
+    inf_t_minus_tpre = {}
+    for _, tpre in valid_pairs:
+        tpre_col = period_to_col[tpre]
+        if tpre_col not in inf_t_minus_tpre:
+            inf_t_minus_tpre[tpre_col] = Y_inf[:, t_col] - Y_inf[:, tpre_col]
+
+    # Pre-compute kernel weights for comparison cohorts
+    W_gp_cache: Dict[float, np.ndarray] = {}
+    gp_outcomes_cache: Dict[float, np.ndarray] = {}
+
+    omega = np.zeros((n_units, H, H))
+
+    # Term 1: (1/pi_g) * Cov(Y_t-Y_1, Y_t-Y_1 | G=g, X) — same for all (j,k)
+    if pi_g > 0:
+        term1 = (1.0 / pi_g) * _kernel_weighted_cov(Yg_t_minus_1, Yg_t_minus_1, W_g)
+    else:
+        term1 = np.zeros(n_units)
+
+    for j in range(H):
+        gp_j, tpre_j = valid_pairs[j]
+        tpre_j_col = period_to_col[tpre_j]
+
+        for k in range(j, H):
+            gp_k, tpre_k = valid_pairs[k]
+            tpre_k_col = period_to_col[tpre_k]
+
+            val = term1.copy()
+
+            # Term 2: (1/pi_inf) * Cov(Y_t-Y_{tpre_j}, Y_t-Y_{tpre_k} | G=inf, X)
+            if pi_inf > 0:
+                val += (1.0 / pi_inf) * _kernel_weighted_cov(
+                    inf_t_minus_tpre[tpre_j_col],
+                    inf_t_minus_tpre[tpre_k_col],
+                    W_inf,
+                )
+
+            # Term 3: -1{g==g'_j}/pi_g * Cov(Y_t-Y_1, Y_{tpre_j}-Y_1 | G=g, X)
+            if gp_j == target_g and pi_g > 0:
+                g_tpre_j = Y_g[:, tpre_j_col] - Y_g[:, y1_col]
+                val -= (1.0 / pi_g) * _kernel_weighted_cov(Yg_t_minus_1, g_tpre_j, W_g)
+
+            # Term 4: -1{g==g'_k}/pi_g * Cov(Y_t-Y_1, Y_{tpre_k}-Y_1 | G=g, X)
+            if gp_k == target_g and pi_g > 0:
+                g_tpre_k = Y_g[:, tpre_k_col] - Y_g[:, y1_col]
+                val -= (1.0 / pi_g) * _kernel_weighted_cov(Yg_t_minus_1, g_tpre_k, W_g)
+
+            # Term 5: 1{g'_j==g'_k}/pi_{g'_j} * Cov(Y_{tpre_j}-Y_1, Y_{tpre_k}-Y_1 | G=g'_j, X)
+            if gp_j == gp_k:
+                if np.isinf(gp_j):
+                    if pi_inf > 0:
+                        inf_tpre_j = Y_inf[:, tpre_j_col] - Y_inf[:, y1_col]
+                        inf_tpre_k = Y_inf[:, tpre_k_col] - Y_inf[:, y1_col]
+                        val += (1.0 / pi_inf) * _kernel_weighted_cov(inf_tpre_j, inf_tpre_k, W_inf)
+                else:
+                    pi_gp_j = cohort_fractions.get(gp_j, 0.0)
+                    if pi_gp_j > 0:
+                        if gp_j not in W_gp_cache:
+                            X_gp = covariate_matrix[cohort_masks[gp_j]]
+                            W_gp_cache[gp_j] = _kernel_weights_matrix(
+                                covariate_matrix, X_gp, bandwidth
+                            )
+                            gp_outcomes_cache[gp_j] = outcome_wide[cohort_masks[gp_j]]
+                        W_gp = W_gp_cache[gp_j]
+                        Y_gp = gp_outcomes_cache[gp_j]
+                        gp_tpre_j = Y_gp[:, tpre_j_col] - Y_gp[:, y1_col]
+                        gp_tpre_k = Y_gp[:, tpre_k_col] - Y_gp[:, y1_col]
+                        val += (1.0 / pi_gp_j) * _kernel_weighted_cov(gp_tpre_j, gp_tpre_k, W_gp)
+
+            omega[:, j, k] = val
+            if j != k:
+                omega[:, k, j] = val
 
-    # Sample covariance with ddof=1
-    omega = (centered.T @ centered) / (n - 1)
     return omega
 
 
+# ---------------------------------------------------------------------------
+# Per-unit efficient weights from conditional Omega*
+# ---------------------------------------------------------------------------
+
+
+def compute_per_unit_weights(
+    omega_conditional: np.ndarray,
+    cond_threshold: float = 1e12,
+) -> np.ndarray:
+    """Per-unit efficient weights from conditional Omega* inverse.
+
+    ``w(X_i) = 1' Omega*(X_i)^{-1} / (1' Omega*(X_i)^{-1} 1)``
+
+    Falls back to pseudoinverse per unit if condition number exceeds threshold.
+
+    Parameters
+    ----------
+    omega_conditional : ndarray, shape (n_units, H, H)
+        Per-unit conditional covariance matrices.
+    cond_threshold : float
+        Condition number threshold for pseudoinverse fallback.
+
+    Returns
+    -------
+    weights : ndarray, shape (n_units, H)
+        Per-unit efficient combination weights (each row sums to 1).
+    """
+    n_units, H, _ = omega_conditional.shape
+    if H == 0:
+        return np.empty((n_units, 0))
+    if H == 1:
+        return np.ones((n_units, 1))
+
+    ones = np.ones(H)
+    weights = np.zeros((n_units, H))
+
+    for i in range(n_units):
+        omega_i = omega_conditional[i]
+
+        if np.allclose(omega_i, 0.0):
+            weights[i] = ones / H
+            continue
+
+        cond = float(np.linalg.cond(omega_i))
+        if cond > cond_threshold:
+            omega_inv = np.linalg.pinv(omega_i)
+        else:
+            try:
+                omega_inv = np.linalg.inv(omega_i)
+            except np.linalg.LinAlgError:
+                omega_inv = np.linalg.pinv(omega_i)
+
+        numerator = ones @ omega_inv
+        denominator = numerator @ ones
+
+        if abs(denominator) < 1e-15:
+            weights[i] = ones / H
+        else:
+            weights[i] = numerator / denominator
+
+    return weights
+
+
+# ---------------------------------------------------------------------------
+# EIF computation
+# ---------------------------------------------------------------------------
+
+
 def compute_eif_cov(
     weights: np.ndarray,
     generated_outcomes: np.ndarray,
@@ -325,11 +631,16 @@ def compute_eif_cov(
 ) -> np.ndarray:
     """Per-unit efficient influence function from DR generated outcomes.
 
-    ``EIF_i = sum_j w_j * (Y_hat_{j,i} - y_hat_j)``
+    Supports both global weights ``(H,)`` and per-unit weights ``(n_units, H)``.
+
+    The plug-in EIF treats estimated per-unit weights w(X_i) as fixed.
+    This is valid under Neyman orthogonality (Remark 4.2): estimation
+    error in the conditional Omega*(X) weights is second-order and does
+    not affect the first-order asymptotics of the EIF.
 
     Parameters
     ----------
-    weights : ndarray, shape (H,)
+    weights : ndarray, shape (H,) or (n_units, H)
         Efficient combination weights.
     generated_outcomes : ndarray, shape (n_units, H)
         Per-unit generated outcomes.
@@ -343,13 +654,16 @@ def compute_eif_cov(
     eif : ndarray, shape (n_units,)
         EIF value for every unit.
     """
-    H = len(weights)
-    if H == 0:
+    if weights.size == 0:
         return np.zeros(n_units)
 
-    # Demeaned generated outcomes: (n_units, H)
-    centered = generated_outcomes - y_hat_mean
+    centered = generated_outcomes - y_hat_mean  # (n_units, H)
+
+    if weights.ndim == 1:
+        # Global weights: (n_units,) = (n_units, H) @ (H,)
+        eif = centered @ weights
+    else:
+        # Per-unit weights: element-wise multiply then sum
+        eif = np.sum(weights * centered, axis=1)
 
-    # Weighted sum across pairs: (n_units,)
-    eif = centered @ weights
     return eif
diff --git a/diff_diff/efficient_did_results.py b/diff_diff/efficient_did_results.py
index 812451c0..88554298 100644
--- a/diff_diff/efficient_did_results.py
+++ b/diff_diff/efficient_did_results.py
@@ -105,7 +105,10 @@ class EfficientDiDResults:
     influence_functions: Optional["np.ndarray"] = field(default=None, repr=False)
     bootstrap_results: Optional["EDiDBootstrapResults"] = field(default=None, repr=False)
     estimation_path: str = "nocov"
-    pscore_trim: float = 0.01
+    sieve_k_max: Optional[int] = None
+    sieve_criterion: str = "bic"
+    ratio_clip: float = 20.0
+    kernel_bandwidth: Optional[float] = None
 
     def __repr__(self) -> str:
         sig = _get_significance_stars(self.overall_p_value)
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 4f1c2899..c1f61934 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -618,7 +618,7 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - **Single pre-treatment period (g=2)**: `V*_{gt}(X)` is 1x1, efficient weights are trivially 1, estimator collapses to standard DiD with single baseline
 - **Rank deficiency in `V*_{gt}(X)` or `Omega*_{gt}(X)`**: Inverse does not exist if outcome changes are linearly dependent conditional on covariates. Detect via matrix condition number; fall back to pseudoinverse or standard estimator
 - **Near-zero propensity scores**: Ratio `p_g(X)/p_{g'}(X)` explodes. Overlap assumption (O) rules this out in population; implement trimming or warn on finite-sample instability
-- **Note:** On propensity score estimation failure (logit convergence/separation), the estimator falls back to the unconditional population fraction ratio pi_g / pi_{g'}, matching the CallawaySantAnna fallback pattern (staggered.py:1516-1525). This replaces X-dependent propensity ratios with a constant, reducing to the no-covariates generated outcome for the affected (g', t_pre) pair. The DR property ensures consistency as long as the outcome regression is correctly specified.
+- **Note:** On sieve ratio estimation failure (singular basis matrix at all K values), the estimator falls back to a constant ratio of 1 for all units. This is an exception-triggered fallback — the outcome regression adjustment remains active, so the generated outcomes (Eq 4.4) still incorporate covariate information via the m_hat terms. The DR property ensures consistency as long as the outcome regression is correctly specified.
 - **All units eventually treated**: Last cohort serves as "never-treated" by dropping last time period (Phase 1: raises ValueError; last-cohort-as-control fallback planned for Phase 2)
 - **Negative weights**: Explicitly stated as harmless for bias and beneficial for precision; arise from efficiency optimization under overidentification (Section 5.2)
 - **PT-Post regime (just-identified)**: Under PT-Post, EDiD automatically reduces to standard single-baseline estimator (Corollary 3.2). No downside to using EDiD -- it subsumes standard estimators
@@ -660,8 +660,8 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - [x] Computes efficient weights from conditional covariance matrix inverse
 - [x] Doubly robust: consistent if either outcome regression or propensity score ratio is correct
 - [x] No-covariates case uses closed-form sample means/covariances (no tuning)
-- [ ] With covariates: sieve-based propensity ratio estimation with AIC/BIC selection
-- [ ] Kernel-smoothed conditional covariance estimation
+- [x] With covariates: sieve-based propensity ratio estimation with AIC/BIC selection
+- [x] Kernel-smoothed conditional covariance estimation
 - [x] Analytical SE from EIF sample variance
 - [ ] Cluster bootstrap SE option (recommended for small samples)
 - [x] Event-study aggregation ES(e) with cohort-size weights
@@ -669,8 +669,8 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - [x] Each ATT(g,t) can be estimated independently (parallelizable)
 - [x] Absorbing treatment validation
 - [x] Overlap diagnostics for propensity score ratios
-- **Deviation from paper:** Propensity score ratios estimated via standard logistic regression rather than sieve-based convex minimization (Eq 4.1-4.2). Standard logit is a valid approach under the doubly robust property. Sieve estimation with AIC/BIC selection may be added for improved efficiency in a future version.
-- **Deviation from paper:** Unconditional covariance matrix Omega* used for efficient weights across (g', t_pre) pairs, rather than kernel-smoothed conditional covariance Omega*(X). This gives a valid doubly robust estimator with correct EIF-based SEs. Conditional weights (X-dependent) may be added later for full semiparametric efficiency.
+- **Note:** Sieve ratio estimation uses polynomial basis functions (total degree up to K) with AIC/BIC model selection. The paper describes sieve estimators generally without specifying a particular basis family; polynomial sieves are a standard choice (Section 4, Eq 4.2). Negative sieve ratio predictions are clipped to a small positive value since the population ratio p_g(X)/p_{g'}(X) is non-negative.
+- **Note:** Kernel-smoothed conditional covariance Omega*(X) uses Gaussian kernel with Silverman's rule-of-thumb bandwidth by default. The paper specifies kernel smoothing (step 5, Section 4) without mandating a particular kernel or bandwidth selection method.
 
 ---
 
diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py
index 84af5156..863e0a16 100644
--- a/tests/test_efficient_did.py
+++ b/tests/test_efficient_did.py
@@ -1186,19 +1186,23 @@ def test_nan_covariates_raises(self):
         with pytest.raises(ValueError, match="non-finite"):
             EfficientDiD().fit(df, "y", "unit", "time", "first_treat", covariates=["x1"])
 
-    def test_pscore_trim_validation(self):
-        with pytest.raises(ValueError, match="pscore_trim"):
-            EfficientDiD(pscore_trim=0.0)
-        with pytest.raises(ValueError, match="pscore_trim"):
-            EfficientDiD(pscore_trim=0.5)
-        with pytest.raises(ValueError, match="pscore_trim"):
-            EfficientDiD(pscore_trim=-0.1)
-
-    def test_pscore_trim_in_get_params(self):
-        edid = EfficientDiD(pscore_trim=0.05)
+    def test_ratio_clip_validation(self):
+        with pytest.raises(ValueError, match="ratio_clip"):
+            EfficientDiD(ratio_clip=0.5)
+        with pytest.raises(ValueError, match="ratio_clip"):
+            EfficientDiD(ratio_clip=1.0)
+
+    def test_sieve_criterion_validation(self):
+        with pytest.raises(ValueError, match="sieve_criterion"):
+            EfficientDiD(sieve_criterion="invalid")
+
+    def test_new_params_in_get_params(self):
+        edid = EfficientDiD(sieve_k_max=3, sieve_criterion="aic", ratio_clip=10.0)
         params = edid.get_params()
-        assert "pscore_trim" in params
-        assert params["pscore_trim"] == 0.05
+        assert params["sieve_k_max"] == 3
+        assert params["sieve_criterion"] == "aic"
+        assert params["ratio_clip"] == 10.0
+        assert "kernel_bandwidth" in params
 
     def test_time_varying_covariates_raises(self):
         df = _make_covariate_panel()
@@ -1308,21 +1312,12 @@ def test_many_covariates(self):
         )
         assert np.isfinite(result.overall_att)
 
-    def test_logit_failure_fallback(self):
-        """Logit failure should fall back to population fraction ratio with warning."""
-        from unittest.mock import patch
-
+    def test_sieve_ratio_produces_valid_results(self):
+        """Sieve ratio estimation produces finite ATT with valid ratios."""
         df = _make_covariate_panel(n_units=300, seed=88)
-        # Patch solve_logit to raise, simulating convergence failure
-        with patch(
-            "diff_diff.efficient_did_covariates.solve_logit",
-            side_effect=np.linalg.LinAlgError("Simulated logit failure"),
-        ):
-            with pytest.warns(UserWarning, match="Propensity score estimation failed"):
-                result = EfficientDiD(pt_assumption="post").fit(
-                    df, "y", "unit", "time", "first_treat", covariates=["x1"]
-                )
-        # Should still produce valid finite results via fallback
+        result = EfficientDiD(pt_assumption="post", sieve_k_max=3, sieve_criterion="bic").fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x1"]
+        )
         assert np.isfinite(result.overall_att)
         assert result.overall_se > 0
 
@@ -1353,8 +1348,12 @@ def test_shuffled_units_match_ordered(self):
             f"vs shuffled={r_shuffled.overall_att:.6f}"
         )
 
-    def test_near_separation_trimming_warns(self):
-        """Near-separation covariates should trigger overlap warning."""
+    def test_extreme_covariates_still_valid(self):
+        """Extreme covariates (near-separation) should still produce valid results.
+
+        The sieve ratio estimator clips extreme ratios; conditional Omega*
+        handles the resulting variation in weights gracefully.
+        """
         df = _make_covariate_panel(n_units=300, seed=77)
         # Create a covariate that nearly separates treated from control
         rng = np.random.default_rng(77)
@@ -1363,17 +1362,14 @@ def test_near_separation_trimming_warns(self):
         ft_map = df.groupby("unit")["first_treat"].first()
         sep_vals = np.where(
             ft_map.values < np.inf,
-            5.0 + rng.normal(0, 0.01, n_units),  # treated: ~5
-            -5.0 + rng.normal(0, 0.01, n_units),  # control: ~-5
+            5.0 + rng.normal(0, 0.01, n_units),
+            -5.0 + rng.normal(0, 0.01, n_units),
         )
         sep_map = dict(zip(units, sep_vals))
         df["x_sep"] = df["unit"].map(sep_map)
-        with warnings.catch_warnings(record=True):
-            warnings.simplefilter("always")
-            result = EfficientDiD(pt_assumption="post").fit(
-                df, "y", "unit", "time", "first_treat", covariates=["x_sep"]
-            )
-        # Should still produce valid results despite trimming
+        result = EfficientDiD(pt_assumption="post").fit(
+            df, "y", "unit", "time", "first_treat", covariates=["x_sep"]
+        )
         assert np.isfinite(result.overall_att)
         assert result.overall_se > 0
 

From 2c7770291f394064750cabc739fbafe3165a99bb Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 17:12:19 -0400
Subject: [PATCH 05/12] Fix EIF centering for per-unit weights, add overlap
 diagnostics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

P0 fix: EIF now centers on scalar ATT (EIF_i = w_i @ gen_out_i - ATT)
instead of per-pair means, ensuring mean(EIF) ≈ 0 when weights vary
by unit. Added unit test verifying mean-zero property.

P1 fix: Add overlap warning when sieve ratios require clipping, with
test asserting the warning fires on near-separation covariates.

P1 fix: Document unconditional-pi Omega* as deviation in REGISTRY.md;
downgrade "efficient" claim in docstring for covariate path.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/efficient_did.py            |  7 +++--
 diff_diff/efficient_did_covariates.py | 43 ++++++++++++++++++---------
 docs/methodology/REGISTRY.md          |  1 +
 tests/test_efficient_did.py           | 30 ++++++++++++-------
 4 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
index 25e1a658..1cfc4d8d 100644
--- a/diff_diff/efficient_did.py
+++ b/diff_diff/efficient_did.py
@@ -54,7 +54,9 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
     sample means and covariances.  With covariates, uses the doubly robust
     path: sieve-based propensity score ratios (Eq 4.1-4.2) with AIC/BIC
     selection, OLS outcome regression, and kernel-smoothed conditional
-    Omega*(X) for per-unit efficient weights.
+    Omega*(X) for per-unit efficient weights.  The conditional Omega*
+    currently uses unconditional cohort fractions rather than per-unit
+    conditional propensities (see REGISTRY.md deviation note).
 
     Parameters
     ----------
@@ -534,7 +536,8 @@ def fit(
                         att_gt = np.nan
 
                     # EIF with per-unit weights (Remark 4.2: plug-in valid)
-                    eif_vals = compute_eif_cov(per_unit_w, gen_out, y_hat, n_units)
+                    # Center on scalar ATT, not per-pair means (ensures mean(EIF) ≈ 0)
+                    eif_vals = compute_eif_cov(per_unit_w, gen_out, att_gt, n_units)
                     eif_by_gt[(g, t)] = eif_vals
                 else:
                     # No-covariates path (closed-form)
diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
index c48b1b1e..a9c80a6d 100644
--- a/diff_diff/efficient_did_covariates.py
+++ b/diff_diff/efficient_did_covariates.py
@@ -235,6 +235,19 @@ def estimate_propensity_ratio_sieve(
             best_ic = ic_val
             best_ratio = r_hat.copy()
 
+    # Overlap diagnostics: warn if ratios require significant clipping
+    n_extreme = int(np.sum((best_ratio < 1.0 / ratio_clip) | (best_ratio > ratio_clip)))
+    if n_extreme > 0:
+        pct = 100.0 * n_extreme / n_units
+        warnings.warn(
+            f"Sieve propensity ratios for {n_extreme} of {n_units} units "
+            f"({pct:.1f}%) were outside [{1.0/ratio_clip:.2f}, {ratio_clip:.1f}] "
+            f"and will be clipped. This may indicate overlap assumption "
+            f"violations (near-zero propensity scores for some covariate values).",
+            UserWarning,
+            stacklevel=2,
+        )
+
     # Clip: population ratio p_g(X)/p_{g'}(X) is non-negative
     best_ratio = np.clip(best_ratio, 1.0 / ratio_clip, ratio_clip)
 
@@ -626,17 +639,19 @@ def compute_per_unit_weights(
 def compute_eif_cov(
     weights: np.ndarray,
     generated_outcomes: np.ndarray,
-    y_hat_mean: np.ndarray,
+    att_gt: float,
     n_units: int,
 ) -> np.ndarray:
     """Per-unit efficient influence function from DR generated outcomes.
 
     Supports both global weights ``(H,)`` and per-unit weights ``(n_units, H)``.
 
-    The plug-in EIF treats estimated per-unit weights w(X_i) as fixed.
-    This is valid under Neyman orthogonality (Remark 4.2): estimation
-    error in the conditional Omega*(X) weights is second-order and does
-    not affect the first-order asymptotics of the EIF.
+    For global weights: ``EIF_i = w @ (gen_out_i - y_bar) = w @ gen_out_i - ATT``
+    For per-unit weights: ``EIF_i = w(X_i) @ gen_out_i - ATT``
+
+    In both cases the EIF centers on the scalar ATT estimate, ensuring
+    ``mean(EIF) ≈ 0``. The plug-in EIF treats estimated per-unit weights
+    as fixed, valid under Neyman orthogonality (Remark 4.2).
 
     Parameters
     ----------
@@ -644,26 +659,26 @@ def compute_eif_cov(
         Efficient combination weights.
     generated_outcomes : ndarray, shape (n_units, H)
         Per-unit generated outcomes.
-    y_hat_mean : ndarray, shape (H,)
-        Sample average of generated outcomes per pair.
+    att_gt : float
+        Scalar ATT estimate for this (g, t) cell.
     n_units : int
         Total number of units.
 
     Returns
     -------
     eif : ndarray, shape (n_units,)
-        EIF value for every unit.
+        EIF value for every unit. Sample mean is approximately zero.
     """
     if weights.size == 0:
         return np.zeros(n_units)
 
-    centered = generated_outcomes - y_hat_mean  # (n_units, H)
-
     if weights.ndim == 1:
-        # Global weights: (n_units,) = (n_units, H) @ (H,)
-        eif = centered @ weights
+        # Global weights: w @ gen_out_i for each unit
+        weighted_scores = generated_outcomes @ weights  # (n_units,)
     else:
-        # Per-unit weights: element-wise multiply then sum
-        eif = np.sum(weights * centered, axis=1)
+        # Per-unit weights: w_i @ gen_out_i for each unit
+        weighted_scores = np.sum(weights * generated_outcomes, axis=1)
 
+    # Center on the scalar ATT estimate (ensures mean(EIF) ≈ 0)
+    eif = weighted_scores - att_gt
     return eif
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index c1f61934..d46418c5 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -671,6 +671,7 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - [x] Overlap diagnostics for propensity score ratios
 - **Note:** Sieve ratio estimation uses polynomial basis functions (total degree up to K) with AIC/BIC model selection. The paper describes sieve estimators generally without specifying a particular basis family; polynomial sieves are a standard choice (Section 4, Eq 4.2). Negative sieve ratio predictions are clipped to a small positive value since the population ratio p_g(X)/p_{g'}(X) is non-negative.
 - **Note:** Kernel-smoothed conditional covariance Omega*(X) uses Gaussian kernel with Silverman's rule-of-thumb bandwidth by default. The paper specifies kernel smoothing (step 5, Section 4) without mandating a particular kernel or bandwidth selection method.
+- **Note (deviation from source):** The conditional covariance Omega*(X) scales each term by unconditional cohort fractions pi_g rather than conditional generalized propensities p_g(X) as in Eq 3.12. Implementing the full conditional propensity scaling requires per-unit group probability estimation (algorithm step 4: s_hat_{g'}(X) = 1/p_{g'}(X) via convex minimization), which is deferred. The unconditional-pi approximation is consistent under double robustness but does not achieve the full conditional efficiency bound of Eq 3.12.
 
 ---
 
diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py
index 863e0a16..920ff993 100644
--- a/tests/test_efficient_did.py
+++ b/tests/test_efficient_did.py
@@ -1348,14 +1348,9 @@ def test_shuffled_units_match_ordered(self):
             f"vs shuffled={r_shuffled.overall_att:.6f}"
         )
 
-    def test_extreme_covariates_still_valid(self):
-        """Extreme covariates (near-separation) should still produce valid results.
-
-        The sieve ratio estimator clips extreme ratios; conditional Omega*
-        handles the resulting variation in weights gracefully.
-        """
+    def test_extreme_covariates_warns_overlap(self):
+        """Extreme covariates should trigger overlap warning and still produce valid results."""
         df = _make_covariate_panel(n_units=300, seed=77)
-        # Create a covariate that nearly separates treated from control
         rng = np.random.default_rng(77)
         units = df["unit"].unique()
         n_units = len(units)
@@ -1367,12 +1362,27 @@ def test_extreme_covariates_still_valid(self):
         )
         sep_map = dict(zip(units, sep_vals))
         df["x_sep"] = df["unit"].map(sep_map)
-        result = EfficientDiD(pt_assumption="post").fit(
-            df, "y", "unit", "time", "first_treat", covariates=["x_sep"]
-        )
+        with pytest.warns(UserWarning, match="overlap|clipped|propensity"):
+            result = EfficientDiD(pt_assumption="post").fit(
+                df, "y", "unit", "time", "first_treat", covariates=["x_sep"]
+            )
         assert np.isfinite(result.overall_att)
         assert result.overall_se > 0
 
+    def test_eif_mean_approximately_zero(self):
+        """EIF with per-unit weights should have sample mean ≈ 0."""
+        from diff_diff.efficient_did_covariates import compute_eif_cov
+
+        rng = np.random.default_rng(42)
+        n, H = 200, 3
+        gen_out = rng.normal(0, 1, (n, H))
+        # Non-constant per-unit weights (each row sums to 1)
+        raw_w = rng.exponential(1, (n, H))
+        per_unit_w = raw_w / raw_w.sum(axis=1, keepdims=True)
+        att = float(np.mean(np.sum(per_unit_w * gen_out, axis=1)))
+        eif = compute_eif_cov(per_unit_w, gen_out, att, n)
+        assert abs(np.mean(eif)) < 1e-10, f"EIF mean should be ≈ 0, got {np.mean(eif):.2e}"
+
 
 class TestCovariatesBootstrap:
     """Tier 2: bootstrap with covariates."""

From 7608264bf1d0be401abcc8579719b9ea236b1931 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 17:23:27 -0400
Subject: [PATCH 06/12] Tighten DR tuning parameter validation

Validate kernel_bandwidth (None or finite > 0), ratio_clip (finite > 1.0),
and sieve_k_max (None or positive int) in _validate_params(). Rejects
NaN, Inf, zero, and negative values with clear ValueError messages.

Add pytest.raises tests for all invalid parameter combinations.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/efficient_did.py  | 16 ++++++++++++++--
 tests/test_efficient_did.py | 26 ++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
index 1cfc4d8d..e624ea57 100644
--- a/diff_diff/efficient_did.py
+++ b/diff_diff/efficient_did.py
@@ -146,8 +146,20 @@ def _validate_params(self) -> None:
             raise ValueError(
                 f"sieve_criterion must be 'aic' or 'bic', got '{self.sieve_criterion}'"
             )
-        if self.ratio_clip <= 1.0:
-            raise ValueError(f"ratio_clip must be > 1.0, got {self.ratio_clip}")
+        if not (np.isfinite(self.ratio_clip) and self.ratio_clip > 1.0):
+            raise ValueError(f"ratio_clip must be finite and > 1.0, got {self.ratio_clip}")
+        if self.kernel_bandwidth is not None:
+            if not (np.isfinite(self.kernel_bandwidth) and self.kernel_bandwidth > 0):
+                raise ValueError(
+                    f"kernel_bandwidth must be finite and > 0 (or None for auto), "
+                    f"got {self.kernel_bandwidth}"
+                )
+        if self.sieve_k_max is not None:
+            if not (isinstance(self.sieve_k_max, (int, np.integer)) and self.sieve_k_max > 0):
+                raise ValueError(
+                    f"sieve_k_max must be a positive integer (or None for auto), "
+                    f"got {self.sieve_k_max}"
+                )
 
     # -- sklearn compatibility ------------------------------------------------
 
diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py
index 920ff993..82470531 100644
--- a/tests/test_efficient_did.py
+++ b/tests/test_efficient_did.py
@@ -1191,6 +1191,32 @@ def test_ratio_clip_validation(self):
             EfficientDiD(ratio_clip=0.5)
         with pytest.raises(ValueError, match="ratio_clip"):
             EfficientDiD(ratio_clip=1.0)
+        with pytest.raises(ValueError, match="ratio_clip"):
+            EfficientDiD(ratio_clip=np.nan)
+        with pytest.raises(ValueError, match="ratio_clip"):
+            EfficientDiD(ratio_clip=np.inf)
+
+    def test_kernel_bandwidth_validation(self):
+        with pytest.raises(ValueError, match="kernel_bandwidth"):
+            EfficientDiD(kernel_bandwidth=0.0)
+        with pytest.raises(ValueError, match="kernel_bandwidth"):
+            EfficientDiD(kernel_bandwidth=-1.0)
+        with pytest.raises(ValueError, match="kernel_bandwidth"):
+            EfficientDiD(kernel_bandwidth=np.nan)
+        with pytest.raises(ValueError, match="kernel_bandwidth"):
+            EfficientDiD(kernel_bandwidth=np.inf)
+        # None is valid (auto bandwidth)
+        edid = EfficientDiD(kernel_bandwidth=None)
+        assert edid.kernel_bandwidth is None
+
+    def test_sieve_k_max_validation(self):
+        with pytest.raises(ValueError, match="sieve_k_max"):
+            EfficientDiD(sieve_k_max=0)
+        with pytest.raises(ValueError, match="sieve_k_max"):
+            EfficientDiD(sieve_k_max=-1)
+        # None is valid (auto)
+        edid = EfficientDiD(sieve_k_max=None)
+        assert edid.sieve_k_max is None
 
     def test_sieve_criterion_validation(self):
         with pytest.raises(ValueError, match="sieve_criterion"):

From 16dc2f9f5540bd40ee559f78d9bf3b4198151108 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 17:43:41 -0400
Subject: [PATCH 07/12] Implement sieve inverse propensity for conditional
 Omega* (Eq 3.12 step 4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace unconditional cohort fractions pi_g in Omega*(X) with per-unit
sieve-estimated inverse propensities s_hat_{g'}(X) = 1/p_{g'}(X), matching
Eq 3.12 and the paper's algorithm step 4.

The inverse propensity sieve uses the same polynomial basis and AIC/BIC
selection as the ratio estimator. FOC: (Psi_{g'}' Psi_{g'}) beta =
Psi_all.sum(axis=0) — same closed-form linear system structure.

This eliminates the last documented methodology deviation from the paper.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/efficient_did.py            |  26 +++-
 diff_diff/efficient_did_covariates.py | 181 +++++++++++++++++++-------
 docs/methodology/REGISTRY.md          |   2 +-
 3 files changed, 159 insertions(+), 50 deletions(-)

diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
index e624ea57..22a996f6 100644
--- a/diff_diff/efficient_did.py
+++ b/diff_diff/efficient_did.py
@@ -27,6 +27,7 @@
     compute_generated_outcomes_cov,
     compute_omega_star_conditional,
     compute_per_unit_weights,
+    estimate_inverse_propensity_sieve,
     estimate_outcome_regression,
     estimate_propensity_ratio_sieve,
 )
@@ -53,10 +54,9 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
     Without covariates, uses a closed-form estimator based on within-group
     sample means and covariances.  With covariates, uses the doubly robust
     path: sieve-based propensity score ratios (Eq 4.1-4.2) with AIC/BIC
-    selection, OLS outcome regression, and kernel-smoothed conditional
-    Omega*(X) for per-unit efficient weights.  The conditional Omega*
-    currently uses unconditional cohort fractions rather than per-unit
-    conditional propensities (see REGISTRY.md deviation note).
+    selection, OLS outcome regression, sieve-estimated inverse propensities
+    (algorithm step 4), and kernel-smoothed conditional Omega*(X) with
+    per-unit efficient weights (Eq 3.12).
 
     Parameters
     ----------
@@ -356,6 +356,7 @@ def fit(
         covariate_matrix: Optional[np.ndarray] = None
         m_hat_cache: Dict[Tuple, np.ndarray] = {}
         r_hat_cache: Dict[Tuple[float, float], np.ndarray] = {}
+        s_hat_cache: Dict[float, np.ndarray] = {}  # inverse propensities per group
 
         if use_covariates:
             assert covariates is not None  # for type narrowing
@@ -523,7 +524,21 @@ def fit(
 
                     y_hat = np.mean(gen_out, axis=0)  # shape (H,)
 
-                    # Conditional Omega*(X): (n_units, H, H)
+                    # Inverse propensity estimation (algorithm step 4)
+                    # s_hat_{g'}(X) = 1/p_{g'}(X) for Eq 3.12 scaling
+                    for group_id in {g, np.inf} | {gp for gp, _ in pairs}:
+                        if group_id not in s_hat_cache:
+                            group_mask_s = (
+                                never_treated_mask if np.isinf(group_id) else cohort_masks[group_id]
+                            )
+                            s_hat_cache[group_id] = estimate_inverse_propensity_sieve(
+                                covariate_matrix,
+                                group_mask_s,
+                                k_max=self.sieve_k_max,
+                                criterion=self.sieve_criterion,
+                            )
+
+                    # Conditional Omega*(X) with per-unit propensities (Eq 3.12)
                     omega_cond = compute_omega_star_conditional(
                         target_g=g,
                         target_t=t,
@@ -535,6 +550,7 @@ def fit(
                         period_1_col=effective_p1_col,
                         cohort_fractions=cohort_fractions,
                         covariate_matrix=covariate_matrix,
+                        s_hat_cache=s_hat_cache,
                         bandwidth=self.kernel_bandwidth,
                     )
 
diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
index a9c80a6d..80723904 100644
--- a/diff_diff/efficient_did_covariates.py
+++ b/diff_diff/efficient_did_covariates.py
@@ -254,6 +254,97 @@ def estimate_propensity_ratio_sieve(
     return best_ratio
 
 
+# ---------------------------------------------------------------------------
+# Sieve-based inverse propensity estimation (Algorithm step 4)
+# ---------------------------------------------------------------------------
+
+
+def estimate_inverse_propensity_sieve(
+    covariate_matrix: np.ndarray,
+    group_mask: np.ndarray,
+    k_max: Optional[int] = None,
+    criterion: str = "bic",
+) -> np.ndarray:
+    r"""Estimate s_{g'}(X) = 1/p_{g'}(X) via sieve convex minimization.
+
+    Solves for each sieve degree K:
+
+    .. math::
+        \hat\beta_K = \arg\min_\beta \frac{1}{n}
+            \sum_i \bigl[ G_{g',i} (\psi^K(X_i)'\beta)^2
+            - 2 (\psi^K(X_i)'\beta) \bigr]
+
+    FOC: ``(Psi_{g'}' Psi_{g'}) beta = Psi_all.sum(axis=0)``
+
+    This is the same structure as the ratio estimator but with all
+    units on the RHS (not just group g), following the paper's
+    algorithm step 4.
+
+    Parameters
+    ----------
+    covariate_matrix : ndarray, shape (n_units, n_covariates)
+    group_mask : ndarray of bool, shape (n_units,)
+        Mask for the group whose inverse propensity to estimate.
+    k_max : int or None
+        Maximum polynomial degree. None = auto.
+    criterion : str
+        ``"aic"`` or ``"bic"``.
+
+    Returns
+    -------
+    s_hat : ndarray, shape (n_units,)
+        Estimated ``1/p_{g'}(X_i)`` for every unit. Clipped to [1, n].
+    """
+    n_units = len(covariate_matrix)
+    n_group = int(np.sum(group_mask))
+    d = covariate_matrix.shape[1]
+
+    if n_group == 0:
+        return np.ones(n_units)
+
+    if k_max is None:
+        k_max = min(int(n_group**0.2), 5)
+    k_max = max(k_max, 1)
+
+    c_n = 2.0 if criterion == "aic" else np.log(max(n_units, 2))
+
+    best_ic = np.inf
+    best_s = np.full(n_units, n_units / n_group)  # fallback: unconditional
+
+    for K in range(1, k_max + 1):
+        n_basis = comb(K + d, d)
+        if n_basis >= n_group:
+            break
+
+        basis_all = _polynomial_sieve_basis(covariate_matrix, K)
+        Psi_gp = basis_all[group_mask]
+
+        A = Psi_gp.T @ Psi_gp
+        # RHS: sum of basis over ALL units (not just one group)
+        b = basis_all.sum(axis=0)
+
+        try:
+            beta = np.linalg.solve(A, b)
+        except np.linalg.LinAlgError:
+            continue
+        if not np.all(np.isfinite(beta)):
+            continue
+
+        s_hat = basis_all @ beta
+
+        # IC: loss = -(1/n) * b'beta (same derivation as ratio estimator)
+        loss = -float(b @ beta) / n_units
+        ic_val = 2.0 * loss + c_n * n_basis / n_units
+
+        if ic_val < best_ic:
+            best_ic = ic_val
+            best_s = s_hat.copy()
+
+    # s = 1/p must be >= 1 (since p <= 1) and bounded above
+    best_s = np.clip(best_s, 1.0, float(n_units))
+    return best_s
+
+
 # ---------------------------------------------------------------------------
 # Doubly robust generated outcomes (Eq 4.4)
 # ---------------------------------------------------------------------------
@@ -429,6 +520,7 @@ def compute_omega_star_conditional(
     period_1_col: int,
     cohort_fractions: Dict[float, float],
     covariate_matrix: np.ndarray,
+    s_hat_cache: Dict[float, np.ndarray],
     bandwidth: Optional[float] = None,
     never_treated_val: float = np.inf,
 ) -> np.ndarray:
@@ -436,7 +528,9 @@ def compute_omega_star_conditional(
 
     Estimates the five-term conditional covariance matrix using
     Nadaraya-Watson kernel regression with Gaussian kernel and
-    local (kernel-weighted) means.
+    local (kernel-weighted) means.  Scales each term by per-unit
+    conditional inverse propensities ``s_hat_g(X_i) = 1/p_g(X_i)``
+    (algorithm step 4), matching the paper's Eq 3.12.
 
     Parameters
     ----------
@@ -447,6 +541,9 @@ def compute_omega_star_conditional(
     cohort_masks, never_treated_mask, period_to_col, period_1_col,
     cohort_fractions : pre-computed data structures
     covariate_matrix : ndarray, shape (n_units, n_covariates)
+    s_hat_cache : dict
+        Inverse propensity estimates ``{group: s_hat(X_i)}`` where each
+        value is shape ``(n_units,)``. Keyed by group identifier.
     bandwidth : float or None
         Kernel bandwidth. None = Silverman's rule.
     never_treated_val : float
@@ -468,23 +565,27 @@ def compute_omega_star_conditional(
     y1_col = period_1_col
 
     g_mask = cohort_masks[target_g]
-    pi_g = cohort_fractions[target_g]
 
     Y_inf = outcome_wide[never_treated_mask]
     X_inf = covariate_matrix[never_treated_mask]
-    pi_inf = cohort_fractions.get(never_treated_val, 0.0)
+
+    # Per-unit inverse propensities from sieve estimation (Eq 3.12)
+    s_g = s_hat_cache.get(target_g, np.full(n_units, 1.0 / max(cohort_fractions[target_g], 1e-10)))
+    s_inf = s_hat_cache.get(
+        never_treated_val,
+        np.full(n_units, 1.0 / max(cohort_fractions.get(never_treated_val, 1e-10), 1e-10)),
+    )
 
     # Scalability warning
     if n_units > 5000:
         warnings.warn(
             f"Conditional Omega* estimation with n={n_units} is expensive "
-            f"(O(n^2 * H^2)). Consider using fewer units or unconditional Omega*.",
+            f"(O(n^2 * H^2)). Consider using fewer units.",
             UserWarning,
             stacklevel=2,
         )
 
-    # Pre-compute kernel weight matrices per group (reused across pairs)
-    # W_g[i, j] = normalized K_h(X_g[j], X_all[i])
+    # Pre-compute kernel weight matrices per group
     Y_g = outcome_wide[g_mask]
     X_g = covariate_matrix[g_mask]
     Yg_t_minus_1 = Y_g[:, t_col] - Y_g[:, y1_col]
@@ -492,24 +593,19 @@ def compute_omega_star_conditional(
     W_g = _kernel_weights_matrix(covariate_matrix, X_g, bandwidth)
     W_inf = _kernel_weights_matrix(covariate_matrix, X_inf, bandwidth)
 
-    # Pre-compute per-pair differenced arrays for never-treated
     inf_t_minus_tpre = {}
     for _, tpre in valid_pairs:
         tpre_col = period_to_col[tpre]
         if tpre_col not in inf_t_minus_tpre:
             inf_t_minus_tpre[tpre_col] = Y_inf[:, t_col] - Y_inf[:, tpre_col]
 
-    # Pre-compute kernel weights for comparison cohorts
     W_gp_cache: Dict[float, np.ndarray] = {}
     gp_outcomes_cache: Dict[float, np.ndarray] = {}
 
     omega = np.zeros((n_units, H, H))
 
-    # Term 1: (1/pi_g) * Cov(Y_t-Y_1, Y_t-Y_1 | G=g, X) — same for all (j,k)
-    if pi_g > 0:
-        term1 = (1.0 / pi_g) * _kernel_weighted_cov(Yg_t_minus_1, Yg_t_minus_1, W_g)
-    else:
-        term1 = np.zeros(n_units)
+    # Term 1: s_g(X) * Cov(Y_t-Y_1, Y_t-Y_1 | G=g, X) — same for all (j,k)
+    term1 = s_g * _kernel_weighted_cov(Yg_t_minus_1, Yg_t_minus_1, W_g)
 
     for j in range(H):
         gp_j, tpre_j = valid_pairs[j]
@@ -521,45 +617,42 @@ def compute_omega_star_conditional(
 
             val = term1.copy()
 
-            # Term 2: (1/pi_inf) * Cov(Y_t-Y_{tpre_j}, Y_t-Y_{tpre_k} | G=inf, X)
-            if pi_inf > 0:
-                val += (1.0 / pi_inf) * _kernel_weighted_cov(
-                    inf_t_minus_tpre[tpre_j_col],
-                    inf_t_minus_tpre[tpre_k_col],
-                    W_inf,
-                )
+            # Term 2: s_inf(X) * Cov(Y_t-Y_{tpre_j}, Y_t-Y_{tpre_k} | G=inf, X)
+            val += s_inf * _kernel_weighted_cov(
+                inf_t_minus_tpre[tpre_j_col],
+                inf_t_minus_tpre[tpre_k_col],
+                W_inf,
+            )
 
-            # Term 3: -1{g==g'_j}/pi_g * Cov(Y_t-Y_1, Y_{tpre_j}-Y_1 | G=g, X)
-            if gp_j == target_g and pi_g > 0:
+            # Term 3: -1{g==g'_j} * s_g(X) * Cov(Y_t-Y_1, Y_{tpre_j}-Y_1 | G=g, X)
+            if gp_j == target_g:
                 g_tpre_j = Y_g[:, tpre_j_col] - Y_g[:, y1_col]
-                val -= (1.0 / pi_g) * _kernel_weighted_cov(Yg_t_minus_1, g_tpre_j, W_g)
+                val -= s_g * _kernel_weighted_cov(Yg_t_minus_1, g_tpre_j, W_g)
 
-            # Term 4: -1{g==g'_k}/pi_g * Cov(Y_t-Y_1, Y_{tpre_k}-Y_1 | G=g, X)
-            if gp_k == target_g and pi_g > 0:
+            # Term 4: -1{g==g'_k} * s_g(X) * Cov(Y_t-Y_1, Y_{tpre_k}-Y_1 | G=g, X)
+            if gp_k == target_g:
                 g_tpre_k = Y_g[:, tpre_k_col] - Y_g[:, y1_col]
-                val -= (1.0 / pi_g) * _kernel_weighted_cov(Yg_t_minus_1, g_tpre_k, W_g)
+                val -= s_g * _kernel_weighted_cov(Yg_t_minus_1, g_tpre_k, W_g)
 
-            # Term 5: 1{g'_j==g'_k}/pi_{g'_j} * Cov(Y_{tpre_j}-Y_1, Y_{tpre_k}-Y_1 | G=g'_j, X)
+            # Term 5: 1{g'_j==g'_k} * s_{g'_j}(X) * Cov(...)
             if gp_j == gp_k:
                 if np.isinf(gp_j):
-                    if pi_inf > 0:
-                        inf_tpre_j = Y_inf[:, tpre_j_col] - Y_inf[:, y1_col]
-                        inf_tpre_k = Y_inf[:, tpre_k_col] - Y_inf[:, y1_col]
-                        val += (1.0 / pi_inf) * _kernel_weighted_cov(inf_tpre_j, inf_tpre_k, W_inf)
+                    inf_tpre_j = Y_inf[:, tpre_j_col] - Y_inf[:, y1_col]
+                    inf_tpre_k = Y_inf[:, tpre_k_col] - Y_inf[:, y1_col]
+                    val += s_inf * _kernel_weighted_cov(inf_tpre_j, inf_tpre_k, W_inf)
                 else:
-                    pi_gp_j = cohort_fractions.get(gp_j, 0.0)
-                    if pi_gp_j > 0:
-                        if gp_j not in W_gp_cache:
-                            X_gp = covariate_matrix[cohort_masks[gp_j]]
-                            W_gp_cache[gp_j] = _kernel_weights_matrix(
-                                covariate_matrix, X_gp, bandwidth
-                            )
-                            gp_outcomes_cache[gp_j] = outcome_wide[cohort_masks[gp_j]]
-                        W_gp = W_gp_cache[gp_j]
-                        Y_gp = gp_outcomes_cache[gp_j]
-                        gp_tpre_j = Y_gp[:, tpre_j_col] - Y_gp[:, y1_col]
-                        gp_tpre_k = Y_gp[:, tpre_k_col] - Y_gp[:, y1_col]
-                        val += (1.0 / pi_gp_j) * _kernel_weighted_cov(gp_tpre_j, gp_tpre_k, W_gp)
+                    s_gp_j = s_hat_cache.get(
+                        gp_j, np.full(n_units, 1.0 / max(cohort_fractions.get(gp_j, 1e-10), 1e-10))
+                    )
+                    if gp_j not in W_gp_cache:
+                        X_gp = covariate_matrix[cohort_masks[gp_j]]
+                        W_gp_cache[gp_j] = _kernel_weights_matrix(covariate_matrix, X_gp, bandwidth)
+                        gp_outcomes_cache[gp_j] = outcome_wide[cohort_masks[gp_j]]
+                    W_gp = W_gp_cache[gp_j]
+                    Y_gp = gp_outcomes_cache[gp_j]
+                    gp_tpre_j = Y_gp[:, tpre_j_col] - Y_gp[:, y1_col]
+                    gp_tpre_k = Y_gp[:, tpre_k_col] - Y_gp[:, y1_col]
+                    val += s_gp_j * _kernel_weighted_cov(gp_tpre_j, gp_tpre_k, W_gp)
 
             omega[:, j, k] = val
             if j != k:
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index d46418c5..18f4159e 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -671,7 +671,7 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - [x] Overlap diagnostics for propensity score ratios
 - **Note:** Sieve ratio estimation uses polynomial basis functions (total degree up to K) with AIC/BIC model selection. The paper describes sieve estimators generally without specifying a particular basis family; polynomial sieves are a standard choice (Section 4, Eq 4.2). Negative sieve ratio predictions are clipped to a small positive value since the population ratio p_g(X)/p_{g'}(X) is non-negative.
 - **Note:** Kernel-smoothed conditional covariance Omega*(X) uses Gaussian kernel with Silverman's rule-of-thumb bandwidth by default. The paper specifies kernel smoothing (step 5, Section 4) without mandating a particular kernel or bandwidth selection method.
-- **Note (deviation from source):** The conditional covariance Omega*(X) scales each term by unconditional cohort fractions pi_g rather than conditional generalized propensities p_g(X) as in Eq 3.12. Implementing the full conditional propensity scaling requires per-unit group probability estimation (algorithm step 4: s_hat_{g'}(X) = 1/p_{g'}(X) via convex minimization), which is deferred. The unconditional-pi approximation is consistent under double robustness but does not achieve the full conditional efficiency bound of Eq 3.12.
+- **Note:** Conditional covariance Omega*(X) scales each term by per-unit sieve-estimated inverse propensities s_hat_{g'}(X) = 1/p_{g'}(X) (algorithm step 4), matching Eq 3.12. The inverse propensity estimation uses the same polynomial sieve convex minimization as the ratio estimator.
 
 ---
 

From f94f499c148485e84b135fd5a25ba19454a43d49 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 17:54:00 -0400
Subject: [PATCH 08/12] Add sieve fallback warnings, fallback tests, and
 results docstring

P1 fix: estimate_inverse_propensity_sieve() now emits UserWarning on
all-K fallback (matching the ratio sieve pattern). Documented in
REGISTRY.md alongside the existing ratio fallback note.

Add TestSieveFallbacks class with targeted tests forcing tiny groups
(n_basis >= n_group) for both ratio and inverse propensity sieve helpers.

Extend EfficientDiDResults attribute docs to cover estimation_path,
sieve_k_max, sieve_criterion, ratio_clip, and kernel_bandwidth.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/efficient_did_covariates.py |  9 +++++++
 diff_diff/efficient_did_results.py    | 10 ++++++++
 docs/methodology/REGISTRY.md          |  3 ++-
 tests/test_efficient_did.py           | 36 +++++++++++++++++++++++++++
 4 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
index 80723904..37dcf3a8 100644
--- a/diff_diff/efficient_did_covariates.py
+++ b/diff_diff/efficient_did_covariates.py
@@ -340,6 +340,15 @@ def estimate_inverse_propensity_sieve(
             best_ic = ic_val
             best_s = s_hat.copy()
 
+    # Warn if no sieve fit succeeded (falling back to unconditional)
+    if best_ic == np.inf:
+        warnings.warn(
+            "Inverse propensity sieve estimation failed for all K values. "
+            "Falling back to unconditional n/n_group scaling.",
+            UserWarning,
+            stacklevel=2,
+        )
+
     # s = 1/p must be >= 1 (since p <= 1) and bounded above
     best_s = np.clip(best_s, 1.0, float(n_units))
     return best_s
diff --git a/diff_diff/efficient_did_results.py b/diff_diff/efficient_did_results.py
index 88554298..35393466 100644
--- a/diff_diff/efficient_did_results.py
+++ b/diff_diff/efficient_did_results.py
@@ -75,6 +75,16 @@ class EfficientDiDResults:
         Stored EIF matrix for bootstrap / manual SE computation.
     bootstrap_results : EDiDBootstrapResults, optional
         Bootstrap inference results.
+    estimation_path : str
+        ``"nocov"`` or ``"dr"`` — which estimation path was used.
+    sieve_k_max : int or None
+        Maximum polynomial degree for sieve ratio estimation.
+    sieve_criterion : str
+        Information criterion used (``"aic"`` or ``"bic"``).
+    ratio_clip : float
+        Clipping bound for sieve propensity ratios.
+    kernel_bandwidth : float or None
+        Bandwidth used for kernel-smoothed conditional Omega*.
     """
 
     group_time_effects: Dict[Tuple[Any, Any], Dict[str, Any]]
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 18f4159e..84e4ee7a 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -618,7 +618,8 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - **Single pre-treatment period (g=2)**: `V*_{gt}(X)` is 1x1, efficient weights are trivially 1, estimator collapses to standard DiD with single baseline
 - **Rank deficiency in `V*_{gt}(X)` or `Omega*_{gt}(X)`**: Inverse does not exist if outcome changes are linearly dependent conditional on covariates. Detect via matrix condition number; fall back to pseudoinverse or standard estimator
 - **Near-zero propensity scores**: Ratio `p_g(X)/p_{g'}(X)` explodes. Overlap assumption (O) rules this out in population; implement trimming or warn on finite-sample instability
-- **Note:** On sieve ratio estimation failure (singular basis matrix at all K values), the estimator falls back to a constant ratio of 1 for all units. This is an exception-triggered fallback — the outcome regression adjustment remains active, so the generated outcomes (Eq 4.4) still incorporate covariate information via the m_hat terms. The DR property ensures consistency as long as the outcome regression is correctly specified.
+- **Note:** On sieve ratio estimation failure (singular basis matrix at all K values), the estimator falls back to a constant ratio of 1 for all units with a UserWarning. The outcome regression adjustment remains active, so the generated outcomes (Eq 4.4) still incorporate covariate information via the m_hat terms. The DR property ensures consistency as long as the outcome regression is correctly specified.
+- **Note:** On sieve inverse propensity estimation failure (algorithm step 4), the estimator falls back to unconditional n/n_group scaling with a UserWarning, which reduces to the unconditional Omega* approximation for the affected group. Both sieve fallbacks are exception-triggered and logged.
 - **All units eventually treated**: Last cohort serves as "never-treated" by dropping last time period (Phase 1: raises ValueError; last-cohort-as-control fallback planned for Phase 2)
 - **Negative weights**: Explicitly stated as harmless for bias and beneficial for precision; arise from efficiency optimization under overidentification (Section 5.2)
 - **PT-Post regime (just-identified)**: Under PT-Post, EDiD automatically reduces to standard single-baseline estimator (Corollary 3.2). No downside to using EDiD -- it subsumes standard estimators
diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py
index 82470531..a9d676a8 100644
--- a/tests/test_efficient_did.py
+++ b/tests/test_efficient_did.py
@@ -1443,3 +1443,39 @@ def test_covariates_pt_all_bootstrap(self):
         assert result.group_effects is not None
         assert np.isfinite(result.overall_att)
         assert result.overall_se > 0
+
+
+class TestSieveFallbacks:
+    """Tier 2: sieve estimation failure fallbacks."""
+
+    def test_ratio_sieve_fallback_tiny_group(self):
+        """When comparison group is too small for any basis, fall back to constant ratio."""
+        from diff_diff.efficient_did_covariates import estimate_propensity_ratio_sieve
+
+        rng = np.random.default_rng(42)
+        n = 100
+        X = rng.normal(0, 1, (n, 3))  # 3 covariates
+        mask_g = np.zeros(n, dtype=bool)
+        mask_g[:50] = True
+        # Tiny comparison group: only 2 units (fewer than any basis dimension)
+        mask_gp = np.zeros(n, dtype=bool)
+        mask_gp[50:52] = True
+        ratio = estimate_propensity_ratio_sieve(X, mask_g, mask_gp, k_max=3)
+        # Should produce valid ratios (fallback to constant 1)
+        assert np.all(np.isfinite(ratio))
+
+    def test_inverse_propensity_sieve_fallback_warns(self):
+        """When group is too small for sieve, fall back with warning."""
+        from diff_diff.efficient_did_covariates import estimate_inverse_propensity_sieve
+
+        rng = np.random.default_rng(42)
+        n = 100
+        X = rng.normal(0, 1, (n, 5))  # 5 covariates
+        # Tiny group: only 2 units
+        mask = np.zeros(n, dtype=bool)
+        mask[:2] = True
+        with pytest.warns(UserWarning, match="Inverse propensity sieve estimation failed"):
+            s_hat = estimate_inverse_propensity_sieve(X, mask, k_max=3)
+        assert np.all(np.isfinite(s_hat))
+        # Should fall back to unconditional n/n_group = 100/2 = 50
+        assert np.allclose(s_hat, 50.0)

From 24b6b2a1ebee8813877cc36803cd03ebeefb37a0 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 18:18:09 -0400
Subject: [PATCH 09/12] Add warning on ratio sieve all-K fallback

Symmetric with the inverse propensity fallback: emit UserWarning when
no sieve candidate succeeds and ratio falls back to constant 1.
Updated test to assert the warning fires.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/efficient_did_covariates.py | 10 ++++++++++
 tests/test_efficient_did.py           | 10 ++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
index 37dcf3a8..f39c5710 100644
--- a/diff_diff/efficient_did_covariates.py
+++ b/diff_diff/efficient_did_covariates.py
@@ -235,6 +235,16 @@ def estimate_propensity_ratio_sieve(
             best_ic = ic_val
             best_ratio = r_hat.copy()
 
+    # Warn if no sieve fit succeeded (falling back to constant ratio 1)
+    if best_ic == np.inf:
+        warnings.warn(
+            "Propensity ratio sieve estimation failed for all K values. "
+            "Falling back to constant ratio of 1 (no ratio adjustment). "
+            "The DR estimator relies on outcome regression only.",
+            UserWarning,
+            stacklevel=2,
+        )
+
     # Overlap diagnostics: warn if ratios require significant clipping
     n_extreme = int(np.sum((best_ratio < 1.0 / ratio_clip) | (best_ratio > ratio_clip)))
     if n_extreme > 0:
diff --git a/tests/test_efficient_did.py b/tests/test_efficient_did.py
index a9d676a8..e304f033 100644
--- a/tests/test_efficient_did.py
+++ b/tests/test_efficient_did.py
@@ -1448,8 +1448,8 @@ def test_covariates_pt_all_bootstrap(self):
 class TestSieveFallbacks:
     """Tier 2: sieve estimation failure fallbacks."""
 
-    def test_ratio_sieve_fallback_tiny_group(self):
-        """When comparison group is too small for any basis, fall back to constant ratio."""
+    def test_ratio_sieve_fallback_tiny_group_warns(self):
+        """When comparison group is too small for any basis, fall back with warning."""
         from diff_diff.efficient_did_covariates import estimate_propensity_ratio_sieve
 
         rng = np.random.default_rng(42)
@@ -1460,9 +1460,11 @@ def test_ratio_sieve_fallback_tiny_group(self):
         # Tiny comparison group: only 2 units (fewer than any basis dimension)
         mask_gp = np.zeros(n, dtype=bool)
         mask_gp[50:52] = True
-        ratio = estimate_propensity_ratio_sieve(X, mask_g, mask_gp, k_max=3)
-        # Should produce valid ratios (fallback to constant 1)
+        with pytest.warns(UserWarning, match="Propensity ratio sieve estimation failed"):
+            ratio = estimate_propensity_ratio_sieve(X, mask_g, mask_gp, k_max=3)
         assert np.all(np.isfinite(ratio))
+        # Fallback: constant ratio of 1 (clipped to [1/ratio_clip, ratio_clip])
+        assert np.allclose(ratio, 1.0)
 
     def test_inverse_propensity_sieve_fallback_warns(self):
         """When group is too small for sieve, fall back with warning."""

From 7a86d431267905ac78f18e3cefa02ceeea7bfee2 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 18:30:27 -0400
Subject: [PATCH 10/12] Fix registry fallback notes to match actual trigger
 conditions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Broaden sieve fallback descriptions from "singular basis matrix" /
"exception-triggered" to "when no sieve degree K succeeds" — covers
both the n_basis >= n_group early-exit and singular system cases.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/methodology/REGISTRY.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 84e4ee7a..7ddaf22c 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -618,8 +618,8 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - **Single pre-treatment period (g=2)**: `V*_{gt}(X)` is 1x1, efficient weights are trivially 1, estimator collapses to standard DiD with single baseline
 - **Rank deficiency in `V*_{gt}(X)` or `Omega*_{gt}(X)`**: Inverse does not exist if outcome changes are linearly dependent conditional on covariates. Detect via matrix condition number; fall back to pseudoinverse or standard estimator
 - **Near-zero propensity scores**: Ratio `p_g(X)/p_{g'}(X)` explodes. Overlap assumption (O) rules this out in population; implement trimming or warn on finite-sample instability
-- **Note:** On sieve ratio estimation failure (singular basis matrix at all K values), the estimator falls back to a constant ratio of 1 for all units with a UserWarning. The outcome regression adjustment remains active, so the generated outcomes (Eq 4.4) still incorporate covariate information via the m_hat terms. The DR property ensures consistency as long as the outcome regression is correctly specified.
-- **Note:** On sieve inverse propensity estimation failure (algorithm step 4), the estimator falls back to unconditional n/n_group scaling with a UserWarning, which reduces to the unconditional Omega* approximation for the affected group. Both sieve fallbacks are exception-triggered and logged.
+- **Note:** When no sieve degree K succeeds for ratio estimation (basis dimension exceeds comparison group size, or all linear systems are singular), the estimator falls back to a constant ratio of 1 for all units with a UserWarning. The outcome regression adjustment remains active, so the generated outcomes (Eq 4.4) still incorporate covariate information via the m_hat terms. The DR property ensures consistency as long as the outcome regression is correctly specified.
+- **Note:** When no sieve degree K succeeds for inverse propensity estimation (algorithm step 4), the estimator falls back to unconditional n/n_group scaling with a UserWarning, which reduces to the unconditional Omega* approximation for the affected group.
 - **All units eventually treated**: Last cohort serves as "never-treated" by dropping last time period (Phase 1: raises ValueError; last-cohort-as-control fallback planned for Phase 2)
 - **Negative weights**: Explicitly stated as harmless for bias and beneficial for precision; arise from efficiency optimization under overidentification (Section 5.2)
 - **PT-Post regime (just-identified)**: Under PT-Post, EDiD automatically reduces to standard single-baseline estimator (Corollary 3.2). No downside to using EDiD -- it subsumes standard estimators

From 56fa2e74dfa004ef634926ccfa71f7885c2b0d77 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 18:47:33 -0400
Subject: [PATCH 11/12] Document OLS outcome regression as working-model choice
 in REGISTRY/docstrings

The paper's Section 4 describes flexible nonparametric nuisance estimation
for m_hat, while our implementation uses linear OLS. Add REGISTRY.md note
and align class/module docstrings: the DR property ensures consistency,
but OLS does not generically guarantee the semiparametric efficiency bound
unless the conditional mean is linear in covariates.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/efficient_did.py            | 21 ++++++++++++---------
 diff_diff/efficient_did_covariates.py |  8 +++++++-
 docs/methodology/REGISTRY.md          |  1 +
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
index 22a996f6..f9db70ff 100644
--- a/diff_diff/efficient_did.py
+++ b/diff_diff/efficient_did.py
@@ -48,15 +48,18 @@
 class EfficientDiD(EfficientDiDBootstrapMixin):
     """Efficient DiD estimator (Chen, Sant'Anna & Xie 2025).
 
-    Achieves the semiparametric efficiency bound for ATT(g,t) in
-    difference-in-differences settings with staggered treatment adoption.
-
-    Without covariates, uses a closed-form estimator based on within-group
-    sample means and covariances.  With covariates, uses the doubly robust
-    path: sieve-based propensity score ratios (Eq 4.1-4.2) with AIC/BIC
-    selection, OLS outcome regression, sieve-estimated inverse propensities
-    (algorithm step 4), and kernel-smoothed conditional Omega*(X) with
-    per-unit efficient weights (Eq 3.12).
+    Without covariates, achieves the semiparametric efficiency bound for
+    ATT(g,t) using a closed-form estimator based on within-group sample
+    means and covariances.
+
+    With covariates, uses a doubly robust path: sieve-based propensity
+    score ratios (Eq 4.1-4.2), OLS outcome regression, sieve-estimated
+    inverse propensities (algorithm step 4), and kernel-smoothed
+    conditional Omega*(X) with per-unit efficient weights (Eq 3.12).
+    The DR property ensures consistency if either the OLS outcome model
+    or the sieve propensity ratio is correctly specified.  The OLS
+    working model for outcome regressions does not generically guarantee
+    the semiparametric efficiency bound (see REGISTRY.md).
 
     Parameters
     ----------
diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
index f39c5710..784effa6 100644
--- a/diff_diff/efficient_did_covariates.py
+++ b/diff_diff/efficient_did_covariates.py
@@ -2,11 +2,17 @@
 Doubly robust math for the Efficient DiD estimator (with covariates).
 
 Implements the with-covariates path from Chen, Sant'Anna & Xie (2025):
-outcome regression via OLS, sieve-based propensity score ratios (Eq 4.1-4.2),
+OLS outcome regression (linear working model), sieve-based propensity
+score ratios (Eq 4.1-4.2), sieve-based inverse propensities (step 4),
 kernel-smoothed conditional Omega*(X) for per-unit efficient weights,
 doubly robust generated outcomes (Eq 4.4), and the efficient influence
 function for analytical standard errors.
 
+The DR property ensures consistency if either the OLS outcome model or
+the sieve propensity ratio is correctly specified.  The OLS working model
+does not generically guarantee the semiparametric efficiency bound unless
+the conditional mean is linear in covariates (see REGISTRY.md).
+
 All functions are pure (no state), operating on pre-pivoted numpy arrays.
 """
 
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 7ddaf22c..42d7a022 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -673,6 +673,7 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - **Note:** Sieve ratio estimation uses polynomial basis functions (total degree up to K) with AIC/BIC model selection. The paper describes sieve estimators generally without specifying a particular basis family; polynomial sieves are a standard choice (Section 4, Eq 4.2). Negative sieve ratio predictions are clipped to a small positive value since the population ratio p_g(X)/p_{g'}(X) is non-negative.
 - **Note:** Kernel-smoothed conditional covariance Omega*(X) uses Gaussian kernel with Silverman's rule-of-thumb bandwidth by default. The paper specifies kernel smoothing (step 5, Section 4) without mandating a particular kernel or bandwidth selection method.
 - **Note:** Conditional covariance Omega*(X) scales each term by per-unit sieve-estimated inverse propensities s_hat_{g'}(X) = 1/p_{g'}(X) (algorithm step 4), matching Eq 3.12. The inverse propensity estimation uses the same polynomial sieve convex minimization as the ratio estimator.
+- **Note:** Outcome regressions m_hat_{g',t,tpre}(X) use linear OLS working models. The paper's Section 4 describes flexible nonparametric nuisance estimation (sieve regression, kernel smoothing, or ML methods). The DR property ensures consistency if either the OLS outcome model or the sieve propensity ratio is correctly specified, but the linear OLS specification does not generically guarantee attainment of the semiparametric efficiency bound unless the conditional mean is linear in the covariates.
 
 ---
 

From a3a7909307cc27c6afa42f72adbfc8bf527f2fda Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Sat, 21 Mar 2026 19:19:17 -0400
Subject: [PATCH 12/12] Add overlap warning for inverse propensity clipping,
 align module docstring

P1 fix: estimate_inverse_propensity_sieve() now warns when s_hat values
are clipped to [1, n], matching the ratio path's overlap diagnostics.
Documented in REGISTRY.md.

P3 fix: module-level docstring in efficient_did.py now qualifies the
covariate path, consistent with the class docstring and REGISTRY.md.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 diff_diff/efficient_did.py            | 12 ++++++------
 diff_diff/efficient_did_covariates.py | 12 ++++++++++++
 docs/methodology/REGISTRY.md          |  2 +-
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/diff_diff/efficient_did.py b/diff_diff/efficient_did.py
index f9db70ff..aa047a0a 100644
--- a/diff_diff/efficient_did.py
+++ b/diff_diff/efficient_did.py
@@ -1,13 +1,13 @@
 """
 Efficient Difference-in-Differences estimator.
 
-Implements the semiparametrically efficient ATT estimator from
-Chen, Sant'Anna & Xie (2025).
+Implements the ATT estimator from Chen, Sant'Anna & Xie (2025).
+Without covariates, achieves the semiparametric efficiency bound via
+closed-form within-group covariances.  With covariates, uses a doubly
+robust path with OLS outcome regression, sieve propensity ratios, and
+kernel-smoothed conditional Omega*(X) (see class docstring for caveats).
 
-The estimator achieves the efficiency bound by optimally weighting
-across pre-treatment periods and comparison groups via the inverse of
-the within-group covariance matrix Omega*.  Under the stronger PT-All
-assumption the model is overidentified and EDiD exploits this for
+Under PT-All the model is overidentified and EDiD exploits this for
 tighter inference; under PT-Post it reduces to the standard
 single-baseline estimator (Callaway-Sant'Anna).
 """
diff --git a/diff_diff/efficient_did_covariates.py b/diff_diff/efficient_did_covariates.py
index 784effa6..9e3052c2 100644
--- a/diff_diff/efficient_did_covariates.py
+++ b/diff_diff/efficient_did_covariates.py
@@ -365,6 +365,18 @@ def estimate_inverse_propensity_sieve(
             stacklevel=2,
         )
 
+    # Overlap diagnostics: warn if s_hat values require clipping
+    n_clipped = int(np.sum((best_s < 1.0) | (best_s > float(n_units))))
+    if n_clipped > 0:
+        pct = 100.0 * n_clipped / n_units
+        warnings.warn(
+            f"Inverse propensity estimates for {n_clipped} of {n_units} units "
+            f"({pct:.1f}%) were outside [1, {n_units}] and will be clipped. "
+            f"This may indicate overlap assumption violations.",
+            UserWarning,
+            stacklevel=2,
+        )
+
     # s = 1/p must be >= 1 (since p <= 1) and bounded above
     best_s = np.clip(best_s, 1.0, float(n_units))
     return best_s
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 42d7a022..289acc43 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -672,7 +672,7 @@ where `q_{g,e} = pi_g / sum_{g' in G_{trt,e}} pi_{g'}`.
 - [x] Overlap diagnostics for propensity score ratios
 - **Note:** Sieve ratio estimation uses polynomial basis functions (total degree up to K) with AIC/BIC model selection. The paper describes sieve estimators generally without specifying a particular basis family; polynomial sieves are a standard choice (Section 4, Eq 4.2). Negative sieve ratio predictions are clipped to a small positive value since the population ratio p_g(X)/p_{g'}(X) is non-negative.
 - **Note:** Kernel-smoothed conditional covariance Omega*(X) uses Gaussian kernel with Silverman's rule-of-thumb bandwidth by default. The paper specifies kernel smoothing (step 5, Section 4) without mandating a particular kernel or bandwidth selection method.
-- **Note:** Conditional covariance Omega*(X) scales each term by per-unit sieve-estimated inverse propensities s_hat_{g'}(X) = 1/p_{g'}(X) (algorithm step 4), matching Eq 3.12. The inverse propensity estimation uses the same polynomial sieve convex minimization as the ratio estimator.
+- **Note:** Conditional covariance Omega*(X) scales each term by per-unit sieve-estimated inverse propensities s_hat_{g'}(X) = 1/p_{g'}(X) (algorithm step 4), matching Eq 3.12. The inverse propensity estimation uses the same polynomial sieve convex minimization as the ratio estimator. Estimated s_hat values are clipped to [1, n] with a UserWarning when clipping binds, mirroring the ratio path's overlap diagnostics.
 - **Note:** Outcome regressions m_hat_{g',t,tpre}(X) use linear OLS working models. The paper's Section 4 describes flexible nonparametric nuisance estimation (sieve regression, kernel smoothing, or ML methods). The DR property ensures consistency if either the OLS outcome model or the sieve propensity ratio is correctly specified, but the linear OLS specification does not generically guarantee attainment of the semiparametric efficiency bound unless the conditional mean is linear in the covariates.
 
 ---