Fix EPV denominator: use predictor count excluding intercept (Peduzzi convention)

igerber · claude · igerber · commit 00635ee2b884 · 2026-04-02T06:48:43.000-04:00
Peduzzi et al. (1996) define EPV using independent predictor variables,
not including the intercept. Change denominator from k_solve (which
includes the intercept column) to n_predictors = k_solve - 1.

Also fix TripleDifference fallback warning to use correct API keyword
estimation_method (not est_method).

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py
@@ -1301,20 +1301,23 @@ def solve_logit(
     n_pos_y = int(np.sum(y_eff))
     n_neg_y = n_eff - n_pos_y
     n_events = min(n_pos_y, n_neg_y)
-    epv = n_events / k_solve if k_solve > 0 else float("inf")
+    # Peduzzi et al. (1996) define EPV using predictor variables, excluding
+    # the intercept. k_solve includes the intercept column, so use k_solve - 1.
+    n_predictors = k_solve - 1  # exclude intercept
+    epv = n_events / n_predictors if n_predictors > 0 else float("inf")
 
     if diagnostics_out is not None:
         diagnostics_out["epv"] = epv
         diagnostics_out["n_events"] = n_events
-        diagnostics_out["k"] = k_solve
+        diagnostics_out["k"] = n_predictors
         diagnostics_out["is_low"] = epv < epv_threshold
 
     if epv < epv_threshold:
         ctx = f" for {context_label}" if context_label else ""
         msg = (
             f"Low Events Per Variable (EPV = {epv:.1f}) in propensity score "
             f"model{ctx}. {n_events} minority-class observations for "
-            f"{k_solve} parameters (including intercept). "
+            f"{n_predictors} predictor variable(s). "
             f"Peduzzi et al. (1996) recommend EPV >= {epv_threshold:.0f}. "
             f"Estimates may be unreliable (overfitting, biased coefficients, "
             f"inflated standard errors). "
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
@@ -456,7 +456,7 @@ def diagnose_propensity(
         never_treated_mask = precomputed["never_treated_mask"]
         unit_cohorts = precomputed["unit_cohorts"]
         n_covariates = len(covariates)
-        n_params = n_covariates + 1  # +1 for intercept
+        n_params = n_covariates  # predictor count, excluding intercept (Peduzzi convention)
 
         rows = []
         for g in sorted(cohort_masks.keys()):
diff --git a/diff_diff/triple_diff.py b/diff_diff/triple_diff.py
@@ -1041,7 +1041,7 @@ def _estimate_ddd_decomposition(
                             f"Propensity score estimation failed for subgroup "
                             f"{j} vs 4; dropping covariates and using "
                             f"unconditional probability. "
-                            f"Consider est_method='reg' to avoid propensity "
+                            f"Consider estimation_method='reg' to avoid propensity "
                             f"scores entirely.",
                             UserWarning,
                             stacklevel=3,
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -405,7 +405,7 @@ The multiplier bootstrap uses random weights w_i with E[w]=0 and Var(w)=1:
   - Trimming: Propensity scores clipped to `[pscore_trim, 1-pscore_trim]` (default
     0.01) before weight computation. Warning emitted when scores are trimmed.
   - **Events Per Variable (EPV) diagnostics:** Per-cohort EPV =
-    min(n_treated, n_control) / (n_covariates + 1) checked before IRLS.
+    min(n_treated, n_control) / n_covariates checked before IRLS.
     Default threshold: 10 (Peduzzi et al. 1996). Warns when EPV < threshold;
     errors when `rank_deficient_action="error"`. Pre-estimation check via
     `diagnose_propensity()`. Results stored in `results.epv_diagnostics`.
@@ -1247,7 +1247,7 @@ has no additional effect.
   function), emits UserWarning. When `rank_deficient_action="error"`, errors
   are always re-raised regardless of `pscore_fallback`.
 - **Events Per Variable (EPV) diagnostics:** Per-logit EPV =
-  min(n_subgroup_j, n_subgroup_4) / (n_covariates + 1) checked before IRLS.
+  min(n_subgroup_j, n_subgroup_4) / n_covariates checked before IRLS.
   Default threshold: 10 (Peduzzi et al. 1996). Warns when EPV < threshold;
   errors when `rank_deficient_action="error"`.
 - **Note:** `pscore_fallback` default changed from unconditional to error.
@@ -1295,7 +1295,7 @@ has no additional effect.
 - Warns if no valid comparison groups exist for a (g, t) pair (skips that pair)
 - Propensity score overlap enforced by clipping at `pscore_trim` (default 0.01)
 - **Events Per Variable (EPV) diagnostics:** Per-DiD EPV =
-  min(n_subgroup_j, n_subgroup_4) / (n_covariates + 1) checked before IRLS.
+  min(n_subgroup_j, n_subgroup_4) / n_covariates checked before IRLS.
   Default threshold: 10 (Peduzzi et al. 1996). Warns when EPV < threshold;
   errors when `rank_deficient_action="error"`.
 - **Note:** When multiple comparison cohorts `g_c` contribute to the same
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
@@ -1772,8 +1772,8 @@ def test_epv_diagnostics_out_populated(self):
         assert "k" in diag
         assert "is_low" in diag
         assert diag["n_events"] == 20  # minority class
-        assert diag["k"] == 5  # 4 covariates + intercept
-        assert abs(diag["epv"] - 4.0) < 0.01
+        assert diag["k"] == 4  # 4 predictor variables (excluding intercept)
+        assert abs(diag["epv"] - 5.0) < 0.01  # 20 events / 4 predictors
         assert diag["is_low"] is True
 
     def test_epv_uses_post_drop_k(self):
@@ -1794,9 +1794,9 @@ def test_epv_uses_post_drop_k(self):
             solve_logit(X, y, diagnostics_out=diag, rank_deficient_action="silent")
 
         # Should be 3 params (2 kept covariates + intercept), not 4
-        assert diag["k"] == 3
+        assert diag["k"] == 2  # 2 kept predictor variables (excluding intercept)
         assert diag["n_events"] == 30
-        assert abs(diag["epv"] - 10.0) < 0.01
+        assert abs(diag["epv"] - 15.0) < 0.01  # 30 events / 2 predictors
 
     def test_epv_uses_positive_weight_sample(self):
         """EPV computed on positive-weight sample, not padded rows."""
@@ -1828,7 +1828,7 @@ def test_epv_uses_positive_weight_sample(self):
 
         # EPV should reflect the 10-event effective sample, not 260
         assert diag["n_events"] == 10  # min(10, 190) from real sample
-        assert diag["epv"] == 10 / 5  # 10 events / 5 params = 2.0
+        assert diag["epv"] == 10 / 4  # 10 events / 4 predictors = 2.5
         assert diag["is_low"] is True