igerber
diff --git a/‎README.md‎
Lines changed: 3 additions & 3 deletions b/‎README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎TODO.md‎
Lines changed: 11 additions & 11 deletions b/‎TODO.md‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎diff_diff/imputation.py‎
Lines changed: 13 additions & 16 deletions b/‎diff_diff/imputation.py‎
Lines changed: 13 additions & 16 deletions
diff --git a/‎diff_diff/imputation_bootstrap.py‎
Lines changed: 41 additions & 18 deletions b/‎diff_diff/imputation_bootstrap.py‎
Lines changed: 41 additions & 18 deletions
diff --git a/‎diff_diff/imputation_results.py‎
Lines changed: 1 addition & 1 deletion b/‎diff_diff/imputation_results.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎diff_diff/sun_abraham.py‎
Lines changed: 0 additions & 22 deletions b/‎diff_diff/sun_abraham.py‎
Lines changed: 0 additions & 22 deletions
diff --git a/‎diff_diff/trop.py‎
Lines changed: 7 additions & 1 deletion b/‎diff_diff/trop.py‎
Lines changed: 7 additions & 1 deletion
@@ -1983,7 +1983,7 @@ TROP(
     max_iter=100,              # Max iterations for factor estimation
     tol=1e-6,                  # Convergence tolerance
     alpha=0.05,                # Significance level for CIs
-    n_bootstrap=200,           # Bootstrap replications
+    n_bootstrap=200,           # Bootstrap replications (minimum 2; TROP requires bootstrap for SEs)
     seed=None                  # Random seed
 )
 ```
@@ -2064,8 +2064,6 @@ SunAbraham(
 | `time` | str | Time period column |
 | `first_treat` | str | Column with first treatment period (0 for never-treated) |
 | `covariates` | list | Covariate column names |
-| `min_pre_periods` | int | Minimum pre-treatment periods to include |
-| `min_post_periods` | int | Minimum post-treatment periods to include |
 
 ### SunAbrahamResults
 
@@ -2105,6 +2103,7 @@ ImputationDiD(
     alpha=0.05,                       # Significance level for CIs
     cluster=None,                     # Column for cluster-robust SEs
     n_bootstrap=0,                    # Bootstrap iterations (0 = analytical)
+    bootstrap_weights='rademacher',   # 'rademacher', 'mammen', or 'webb'
     seed=None,                        # Random seed
     rank_deficient_action='warn',     # 'warn', 'error', or 'silent'
     horizon_max=None,                 # Max event-study horizon
@@ -2159,6 +2158,7 @@ TwoStageDiD(
     alpha=0.05,                       # Significance level for CIs
     cluster=None,                     # Column for cluster-robust SEs (defaults to unit)
     n_bootstrap=0,                    # Bootstrap iterations (0 = analytical GMM SEs)
+    bootstrap_weights='rademacher',   # 'rademacher', 'mammen', or 'webb'
     seed=None,                        # Random seed
     rank_deficient_action='warn',     # 'warn', 'error', or 'silent'
     horizon_max=None,                 # Max event-study horizon
 
@@ -28,7 +28,7 @@ Target: < 1000 lines per module for maintainability.
 | ~~`trop.py`~~ | ~~2904~~ ~2560 | ✅ Partially split: results extracted to `trop_results.py` (~340 lines) |
 | ~~`imputation.py`~~ | ~~2480~~ ~1740 | ✅ Split into imputation.py, imputation_results.py, imputation_bootstrap.py |
 | ~~`two_stage.py`~~ | ~~2209~~ ~1490 | ✅ Split into two_stage.py, two_stage_results.py, two_stage_bootstrap.py |
-| `utils.py` | 1879 | Monitor -- legacy placebo functions stay to avoid circular imports |
+| `utils.py` | 1780 | Monitor -- legacy placebo function removed |
 | `visualization.py` | 1678 | Monitor -- growing but cohesive |
 | `linalg.py` | 1537 | Monitor -- unified backend, splitting would hurt cohesion |
 | `honest_did.py` | 1511 | Acceptable |
@@ -58,18 +58,18 @@ Deferred items from PR reviews that were not addressed before merge.
 
 | Issue | Location | PR | Priority |
 |-------|----------|----|----------|
-| TwoStageDiD & ImputationDiD bootstrap hardcodes Rademacher only; no `bootstrap_weights` parameter unlike CallawaySantAnna | `two_stage_bootstrap.py`, `imputation_bootstrap.py` | #156, #141 | Medium |
-| TwoStageDiD GMM score logic duplicated between analytic/bootstrap with inconsistent NaN/overflow handling | `two_stage.py`, `two_stage_bootstrap.py` | #156 | Medium |
-| ImputationDiD weight construction duplicated between aggregation and bootstrap (drift risk) -- has explicit code comment acknowledging duplication | `imputation.py`, `imputation_bootstrap.py` | #141 | Medium |
-| ImputationDiD dense `(A0'A0).toarray()` scales O((U+T+K)^2), OOM risk on large panels | `imputation.py` | #141 | Medium |
+| ~~TwoStageDiD & ImputationDiD bootstrap hardcodes Rademacher only; no `bootstrap_weights` parameter unlike CallawaySantAnna~~ | ~~`two_stage_bootstrap.py`, `imputation_bootstrap.py`~~ | ~~#156, #141~~ | ✅ Fixed: Added `bootstrap_weights` parameter to both estimators |
+| ~~TwoStageDiD GMM score logic duplicated between analytic/bootstrap with inconsistent NaN/overflow handling~~ | ~~`two_stage.py`, `two_stage_bootstrap.py`~~ | ~~#156~~ | ✅ Fixed: Unified via `_compute_gmm_scores()` static method |
+| ~~ImputationDiD weight construction duplicated between aggregation and bootstrap (drift risk)~~ | ~~`imputation.py`, `imputation_bootstrap.py`~~ | ~~#141~~ | ✅ Fixed: Extracted `_compute_target_weights()` helper in `imputation_bootstrap.py` |
+| ImputationDiD dense `(A0'A0).toarray()` scales O((U+T+K)^2), OOM risk on large panels | `imputation.py` | #141 | Medium (deferred — only triggers when sparse solver fails; fixing requires sparse least-squares alternatives) |
 
 #### Performance
 
 | Issue | Location | PR | Priority |
 |-------|----------|----|----------|
-| TwoStageDiD per-column `.toarray()` in loop for cluster scores | `two_stage_bootstrap.py` | #156 | Medium |
+| ~~TwoStageDiD per-column `.toarray()` in loop for cluster scores~~ | ~~`two_stage_bootstrap.py`~~ | ~~#156~~ | ✅ Fixed: Single `.toarray()` call replaces per-column loop |
 | ImputationDiD event-study SEs recompute full conservative variance per horizon (should cache A0/A1 factorization) | `imputation.py` | #141 | Low |
-| Legacy `compute_placebo_effects` uses deprecated projected-gradient weights (marked deprecated, users directed to `SyntheticDiD`) | `utils.py:1689-1691` | #145 | Low |
+| ~~Legacy `compute_placebo_effects` uses deprecated projected-gradient weights~~ | ~~`utils.py:1689-1691`~~ | ~~#145~~ | ✅ Fixed: Removed function entirely |
 | Rust faer SVD ndarray-to-faer conversion overhead (minimal vs SVD cost) | `rust/src/linalg.rs:67` | #115 | Low |
 
 #### Testing/Docs
@@ -78,11 +78,11 @@ Deferred items from PR reviews that were not addressed before merge.
 |-------|----------|----|----------|
 | Tutorial notebooks not executed in CI | `docs/tutorials/*.ipynb` | #159 | Low |
 | ~~TwoStageDiD `test_nan_propagation` is a no-op~~ | ~~`tests/test_two_stage.py:643-652`~~ | ~~#156~~ | ✅ Fixed |
-| ImputationDiD bootstrap + covariate path untested | `tests/test_imputation.py` | #141 | Low |
-| TROP `n_bootstrap >= 2` validation missing (can yield 0/NaN SE silently) | `trop.py:462` | #124 | Low |
-| SunAbraham deprecated `min_pre_periods`/`min_post_periods` still in `fit()` docstring | `sun_abraham.py:458-487` | #153 | Low |
+| ~~ImputationDiD bootstrap + covariate path untested~~ | ~~`tests/test_imputation.py`~~ | ~~#141~~ | ✅ Fixed: Added `test_bootstrap_with_covariates` |
+| ~~TROP `n_bootstrap >= 2` validation missing (can yield 0/NaN SE silently)~~ | ~~`trop.py:462`~~ | ~~#124~~ | ✅ Fixed: Added `ValueError` for `n_bootstrap < 2` |
+| ~~SunAbraham deprecated `min_pre_periods`/`min_post_periods` still in `fit()` docstring~~ | ~~`sun_abraham.py:458-487`~~ | ~~#153~~ | ✅ Fixed: Removed deprecated params from `fit()` |
 | R comparison tests spawn separate `Rscript` per test (slow CI) | `tests/test_methodology_twfe.py:294` | #139 | Low |
-| Rust TROP bootstrap SE returns 0.0 instead of NaN for <2 samples | `rust/src/trop.rs:1038-1054` | #115 | Low |
+| ~~Rust TROP bootstrap SE returns 0.0 instead of NaN for <2 samples~~ | ~~`rust/src/trop.rs:1038-1054`~~ | ~~#115~~ | ✅ Already fixed: Returns `f64::NAN` at `rust/src/trop.rs:1034` |
 
 ---
 
 
@@ -22,7 +22,7 @@
 from scipy import sparse, stats
 from scipy.sparse.linalg import spsolve
 
-from diff_diff.imputation_bootstrap import ImputationDiDBootstrapMixin
+from diff_diff.imputation_bootstrap import ImputationDiDBootstrapMixin, _compute_target_weights
 from diff_diff.imputation_results import ImputationBootstrapResults, ImputationDiDResults  # noqa: F401 (re-export)
 from diff_diff.linalg import solve_ols
 from diff_diff.utils import safe_inference
@@ -63,6 +63,8 @@ class ImputationDiD(ImputationDiDBootstrapMixin):
     n_bootstrap : int, default=0
         Number of bootstrap iterations. If 0, uses analytical inference
         (conservative variance from Theorem 3).
+    bootstrap_weights : str, default="rademacher"
+        Type of bootstrap weights: "rademacher", "mammen", or "webb".
     seed : int, optional
         Random seed for reproducibility.
     rank_deficient_action : str, default="warn"
@@ -126,6 +128,7 @@ def __init__(
         alpha: float = 0.05,
         cluster: Optional[str] = None,
         n_bootstrap: int = 0,
+        bootstrap_weights: str = "rademacher",
         seed: Optional[int] = None,
         rank_deficient_action: str = "warn",
         horizon_max: Optional[int] = None,
@@ -136,6 +139,11 @@ def __init__(
                 f"rank_deficient_action must be 'warn', 'error', or 'silent', "
                 f"got '{rank_deficient_action}'"
             )
+        if bootstrap_weights not in ("rademacher", "mammen", "webb"):
+            raise ValueError(
+                f"bootstrap_weights must be 'rademacher', 'mammen', or 'webb', "
+                f"got '{bootstrap_weights}'"
+            )
         if aux_partition not in ("cohort_horizon", "cohort", "horizon"):
             raise ValueError(
                 f"aux_partition must be 'cohort_horizon', 'cohort', or 'horizon', "
@@ -146,6 +154,7 @@ def __init__(
         self.alpha = alpha
         self.cluster = cluster
         self.n_bootstrap = n_bootstrap
+        self.bootstrap_weights = bootstrap_weights
         self.seed = seed
         self.rank_deficient_action = rank_deficient_action
         self.horizon_max = horizon_max
@@ -1359,15 +1368,7 @@ def _aggregate_event_study(
             effect = float(np.mean(valid_tau))
 
             # Compute SE via conservative variance with horizon-specific weights
-            weights_h = np.zeros(int(omega_1_mask.sum()))
-            # Map h_mask (relative to df_1) to weights array
-            h_indices_in_omega1 = np.where(h_mask)[0]
-            n_valid = len(valid_tau)
-            # Only weight valid (finite) observations
-            finite_mask = np.isfinite(tau_hat[h_mask])
-            valid_h_indices = h_indices_in_omega1[finite_mask]
-            for idx in valid_h_indices:
-                weights_h[idx] = 1.0 / n_valid
+            weights_h, n_valid = _compute_target_weights(tau_hat, h_mask)
 
             se = self._compute_conservative_variance(
                 df=df,
@@ -1477,12 +1478,7 @@ def _aggregate_group(
             effect = float(np.mean(valid_tau))
 
             # Compute SE with group-specific weights
-            weights_g = np.zeros(int(omega_1_mask.sum()))
-            finite_mask = np.isfinite(tau_hat) & g_mask
-            g_indices = np.where(finite_mask)[0]
-            n_valid = len(valid_tau)
-            for idx in g_indices:
-                weights_g[idx] = 1.0 / n_valid
+            weights_g, _ = _compute_target_weights(tau_hat, g_mask)
 
             se = self._compute_conservative_variance(
                 df=df,
@@ -1664,6 +1660,7 @@ def get_params(self) -> Dict[str, Any]:
             "alpha": self.alpha,
             "cluster": self.cluster,
             "n_bootstrap": self.n_bootstrap,
+            "bootstrap_weights": self.bootstrap_weights,
             "seed": self.seed,
             "rank_deficient_action": self.rank_deficient_action,
             "horizon_max": self.horizon_max,
 
@@ -19,6 +19,39 @@
 ]
 
 
+def _compute_target_weights(
+    tau_hat: np.ndarray,
+    target_mask: np.ndarray,
+) -> "tuple[np.ndarray, int]":
+    """
+    Equal weights for finite tau_hat observations within target_mask.
+
+    Used by both aggregation and bootstrap paths to avoid weight logic
+    duplication.
+
+    Parameters
+    ----------
+    tau_hat : np.ndarray
+        Per-observation treatment effects (may contain NaN).
+    target_mask : np.ndarray
+        Boolean mask selecting the target subset within tau_hat.
+
+    Returns
+    -------
+    weights : np.ndarray
+        Weight array (same length as tau_hat). 1/n_valid for finite
+        observations in target_mask, 0 elsewhere.
+    n_valid : int
+        Number of finite observations in the target subset.
+    """
+    finite_target = np.isfinite(tau_hat) & target_mask
+    n_valid = int(finite_target.sum())
+    weights = np.zeros(len(tau_hat))
+    if n_valid > 0:
+        weights[np.where(finite_target)[0]] = 1.0 / n_valid
+    return weights, n_valid
+
+
 class ImputationDiDBootstrapMixin:
     """Mixin providing bootstrap inference methods for ImputationDiD."""
 
@@ -91,7 +124,8 @@ def _precompute_bootstrap_psi(
 
         For each aggregation target (overall, per-horizon, per-group), computes
         psi_i = sum_t v_it * epsilon_tilde_it for each cluster. The multiplier
-        bootstrap then perturbs these psi sums with Rademacher weights.
+        bootstrap then perturbs these psi sums with multiplier weights
+        (rademacher/mammen/webb; configurable via ``bootstrap_weights``).
 
         Computational cost scales with the number of aggregation targets, since
         each target requires its own v_untreated computation (weight-dependent).
@@ -120,13 +154,10 @@ def _precompute_bootstrap_psi(
         result["overall"] = (overall_psi, cluster_ids)
 
         # Event study: per-horizon weights
-        # NOTE: weight logic duplicated from _aggregate_event_study.
-        # If weight scheme changes there, update here too.
         if event_study_effects:
             result["event_study"] = {}
             df_1 = df.loc[omega_1_mask]
             rel_times = df_1["_rel_time"].values
-            n_omega_1 = int(omega_1_mask.sum())
 
             # Balanced cohort mask (same logic as _aggregate_event_study)
             balanced_mask = None
@@ -150,37 +181,28 @@ def _precompute_bootstrap_psi(
                 h_mask = rel_times == h
                 if balanced_mask is not None:
                     h_mask = h_mask & balanced_mask
-                weights_h = np.zeros(n_omega_1)
-                finite_h = np.isfinite(tau_hat) & h_mask
-                n_valid_h = int(finite_h.sum())
+                weights_h, n_valid_h = _compute_target_weights(tau_hat, h_mask)
                 if n_valid_h == 0:
                     continue
-                weights_h[np.where(finite_h)[0]] = 1.0 / n_valid_h
 
                 psi_h, _ = self._compute_cluster_psi_sums(**common, weights=weights_h)
                 result["event_study"][h] = psi_h
 
         # Group effects: per-group weights
-        # NOTE: weight logic duplicated from _aggregate_group.
-        # If weight scheme changes there, update here too.
         if group_effects:
             result["group"] = {}
             df_1 = df.loc[omega_1_mask]
             cohorts = df_1[first_treat].values
-            n_omega_1 = int(omega_1_mask.sum())
 
             for g in group_effects:
                 if group_effects[g].get("n_obs", 0) == 0:
                     continue
                 if not np.isfinite(group_effects[g].get("effect", np.nan)):
                     continue
                 g_mask = cohorts == g
-                weights_g = np.zeros(n_omega_1)
-                finite_g = np.isfinite(tau_hat) & g_mask
-                n_valid_g = int(finite_g.sum())
+                weights_g, n_valid_g = _compute_target_weights(tau_hat, g_mask)
                 if n_valid_g == 0:
                     continue
-                weights_g[np.where(finite_g)[0]] = 1.0 / n_valid_g
 
                 psi_g, _ = self._compute_cluster_psi_sums(**common, weights=weights_g)
                 result["group"][g] = psi_g
@@ -197,7 +219,8 @@ def _run_bootstrap(
         """
         Run multiplier bootstrap on pre-computed influence function sums.
 
-        Uses T_b = sum_i w_b_i * psi_i where w_b_i are Rademacher weights
+        Uses T_b = sum_i w_b_i * psi_i where w_b_i are multiplier weights
+        (rademacher/mammen/webb; configurable via ``bootstrap_weights``)
         and psi_i are cluster-level influence function sums from Theorem 3.
         SE = std(T_b, ddof=1).
         """
@@ -216,7 +239,7 @@ def _run_bootstrap(
 
         # Generate ALL weights upfront: shape (n_bootstrap, n_clusters)
         all_weights = _generate_bootstrap_weights_batch(
-            self.n_bootstrap, n_clusters, "rademacher", rng
+            self.n_bootstrap, n_clusters, self.bootstrap_weights, rng
         )
 
         # Overall ATT bootstrap draws
@@ -295,7 +318,7 @@ def _run_bootstrap(
 
         return ImputationBootstrapResults(
             n_bootstrap=self.n_bootstrap,
-            weight_type="rademacher",
+            weight_type=self.bootstrap_weights,
             alpha=self.alpha,
             overall_att_se=overall_se,
             overall_att_ci=overall_ci,
 
@@ -33,7 +33,7 @@ class ImputationBootstrapResults:
     n_bootstrap : int
         Number of bootstrap iterations.
     weight_type : str
-        Type of bootstrap weights (currently "rademacher" only).
+        Type of bootstrap weights: "rademacher", "mammen", or "webb".
     alpha : float
         Significance level used for confidence intervals.
     overall_att_se : float
 
@@ -433,8 +433,6 @@ def fit(
         time: str,
         first_treat: str,
         covariates: Optional[List[str]] = None,
-        min_pre_periods: int = 1,
-        min_post_periods: int = 1,
     ) -> SunAbrahamResults:
         """
         Fit the Sun-Abraham estimator using saturated regression.
@@ -454,10 +452,6 @@ def fit(
             Use 0 (or np.inf) for never-treated units.
         covariates : list, optional
             List of covariate column names to include in regression.
-        min_pre_periods : int, default=1
-            **Deprecated**: Accepted but ignored. Will be removed in a future version.
-        min_post_periods : int, default=1
-            **Deprecated**: Accepted but ignored. Will be removed in a future version.
 
         Returns
         -------
@@ -469,22 +463,6 @@ def fit(
         ValueError
             If required columns are missing or data validation fails.
         """
-        # Deprecation warnings for unimplemented parameters
-        if min_pre_periods != 1:
-            warnings.warn(
-                "min_pre_periods is not yet implemented and will be ignored. "
-                "This parameter will be removed in a future version.",
-                FutureWarning,
-                stacklevel=2,
-            )
-        if min_post_periods != 1:
-            warnings.warn(
-                "min_post_periods is not yet implemented and will be ignored. "
-                "This parameter will be removed in a future version.",
-                FutureWarning,
-                stacklevel=2,
-            )
-
         # Validate inputs
         required_cols = [outcome, unit, time, first_treat]
         if covariates:
 
@@ -93,7 +93,7 @@ class TROP:
     alpha : float, default=0.05
         Significance level for confidence intervals.
     n_bootstrap : int, default=200
-        Number of bootstrap replications for variance estimation.
+        Number of bootstrap replications for variance estimation. Must be >= 2.
     seed : int, optional
         Random seed for reproducibility.
 
@@ -156,6 +156,12 @@ def __init__(
         self.lambda_unit_grid = lambda_unit_grid or [0.0, 0.1, 0.5, 1.0, 2.0, 5.0]
         self.lambda_nn_grid = lambda_nn_grid or [0.0, 0.01, 0.1, 1.0, 10.0]
 
+        if n_bootstrap < 2:
+            raise ValueError(
+                "n_bootstrap must be >= 2 for TROP (bootstrap variance "
+                "estimation is always used)"
+            )
+
         self.max_iter = max_iter
         self.tol = tol
         self.alpha = alpha