igerber
diff --git a/‎diff_diff/linalg.py‎
Lines changed: 86 additions & 28 deletions b/‎diff_diff/linalg.py‎
Lines changed: 86 additions & 28 deletions
diff --git a/‎diff_diff/wooldridge.py‎
Lines changed: 69 additions & 20 deletions b/‎diff_diff/wooldridge.py‎
Lines changed: 69 additions & 20 deletions
@@ -1204,9 +1204,7 @@ def solve_logit(
         if np.any(weights < 0):
             raise ValueError("weights must be non-negative")
         if np.sum(weights) <= 0:
-            raise ValueError(
-                "weights sum to zero — no observations have positive weight"
-            )
+            raise ValueError("weights sum to zero — no observations have positive weight")
 
     # Validate rank_deficient_action
     valid_actions = {"warn", "error", "silent"}
@@ -1882,7 +1880,9 @@ def fit(
                     kept_cols = np.where(~nan_mask)[0]
                     if len(kept_cols) > 0:
                         vcov_reduced, _n_valid_rep = compute_replicate_vcov(
-                            X[:, kept_cols], y, coefficients[kept_cols],
+                            X[:, kept_cols],
+                            y,
+                            coefficients[kept_cols],
                             _effective_survey_design,
                             weight_type=self.weight_type,
                         )
@@ -1892,7 +1892,10 @@ def fit(
                         _n_valid_rep = 0
                 else:
                     vcov, _n_valid_rep = compute_replicate_vcov(
-                        X, y, coefficients, _effective_survey_design,
+                        X,
+                        y,
+                        coefficients,
+                        _effective_survey_design,
                         weight_type=self.weight_type,
                     )
                 # Store effective replicate df only when replicates were dropped
@@ -1948,7 +1951,7 @@ def fit(
             if isinstance(_effective_survey_design, ResolvedSurveyDesign):
                 self.survey_df_ = _effective_survey_design.df_survey
                 # Override with effective replicate df if available
-                if hasattr(self, '_replicate_df') and self._replicate_df is not None:
+                if hasattr(self, "_replicate_df") and self._replicate_df is not None:
                     self.survey_df_ = self._replicate_df
 
         return self
@@ -1964,10 +1967,9 @@ def compute_deff(self, coefficient_names=None):
         DEFFDiagnostics
         """
         self._check_fitted()
-        if not (hasattr(self, 'survey_design') and self.survey_design is not None):
+        if not (hasattr(self, "survey_design") and self.survey_design is not None):
             raise ValueError(
-                "compute_deff() requires a survey design. "
-                "Fit with survey_design= first."
+                "compute_deff() requires a survey design. " "Fit with survey_design= first."
             )
         from diff_diff.survey import compute_deff_diagnostics
 
@@ -1980,17 +1982,23 @@ def compute_deff(self, coefficient_names=None):
                 k = len(self.coefficients_)
                 nan_arr = np.full(k, np.nan)
                 from diff_diff.survey import DEFFDiagnostics
+
                 return DEFFDiagnostics(
-                    deff=nan_arr, effective_n=nan_arr.copy(),
-                    srs_se=nan_arr.copy(), survey_se=nan_arr.copy(),
+                    deff=nan_arr,
+                    effective_n=nan_arr.copy(),
+                    srs_se=nan_arr.copy(),
+                    survey_se=nan_arr.copy(),
                     coefficient_names=coefficient_names,
                 )
             # Compute on kept columns only
             X_kept = self._X[:, kept]
             vcov_kept = self.vcov_[np.ix_(kept, kept)]
             deff_kept = compute_deff_diagnostics(
-                X_kept, self.residuals_, vcov_kept,
-                self.weights, weight_type=self.weight_type,
+                X_kept,
+                self.residuals_,
+                vcov_kept,
+                self.weights,
+                weight_type=self.weight_type,
             )
             # Expand back to full size with NaN for dropped
             k = len(self.coefficients_)
@@ -2003,15 +2011,21 @@ def compute_deff(self, coefficient_names=None):
             full_srs_se[kept] = deff_kept.srs_se
             full_survey_se[kept] = deff_kept.survey_se
             from diff_diff.survey import DEFFDiagnostics
+
             return DEFFDiagnostics(
-                deff=full_deff, effective_n=full_eff_n,
-                srs_se=full_srs_se, survey_se=full_survey_se,
+                deff=full_deff,
+                effective_n=full_eff_n,
+                srs_se=full_srs_se,
+                survey_se=full_survey_se,
                 coefficient_names=coefficient_names,
             )
 
         return compute_deff_diagnostics(
-            self._X, self.residuals_, self.vcov_,
-            self.weights, weight_type=self.weight_type,
+            self._X,
+            self.residuals_,
+            self.vcov_,
+            self.weights,
+            weight_type=self.weight_type,
             coefficient_names=coefficient_names,
         )
 
@@ -2108,24 +2122,31 @@ def get_inference(
             effective_df = df
         elif self.survey_df_ is not None:
             effective_df = self.survey_df_
-        elif (hasattr(self, 'survey_design') and self.survey_design is not None
-              and hasattr(self.survey_design, 'uses_replicate_variance')
-              and self.survey_design.uses_replicate_variance):
+        elif (
+            hasattr(self, "survey_design")
+            and self.survey_design is not None
+            and hasattr(self.survey_design, "uses_replicate_variance")
+            and self.survey_design.uses_replicate_variance
+        ):
             # Replicate design with undefined df (rank <= 1) — NaN inference
             warnings.warn(
                 "Replicate design has undefined survey d.f. (rank <= 1). "
                 "Inference fields will be NaN.",
-                UserWarning, stacklevel=2,
+                UserWarning,
+                stacklevel=2,
             )
             effective_df = 0  # Forces NaN from t-distribution
         else:
             effective_df = self.df_
 
         # Warn if df is non-positive and fall back to normal distribution
         # (skip for replicate designs — df=0 is intentional for NaN inference)
-        _is_replicate = (hasattr(self, 'survey_design') and self.survey_design is not None
-                         and hasattr(self.survey_design, 'uses_replicate_variance')
-                         and self.survey_design.uses_replicate_variance)
+        _is_replicate = (
+            hasattr(self, "survey_design")
+            and self.survey_design is not None
+            and hasattr(self.survey_design, "uses_replicate_variance")
+            and self.survey_design.uses_replicate_variance
+        )
         if effective_df is not None and effective_df <= 0 and not _is_replicate:
             import warnings
 
@@ -2350,6 +2371,7 @@ def solve_poisson(
     max_iter: int = 200,
     tol: float = 1e-8,
     init_beta: Optional[np.ndarray] = None,
+    rank_deficient_action: str = "warn",
 ) -> Tuple[np.ndarray, np.ndarray]:
     """Poisson IRLS (Newton-Raphson with log link).
 
@@ -2365,15 +2387,38 @@ def solve_poisson(
     init_beta : optional starting coefficient vector; if None, zeros are used
         with the first column treated as the intercept and initialized to
         log(mean(y)) to improve convergence for large-scale outcomes.
+    rank_deficient_action : {"warn", "error", "silent"}
+        How to handle rank-deficient design matrices. Mirrors solve_ols/solve_logit.
 
     Returns
     -------
-    beta : (k,) coefficient vector
+    beta : (k,) coefficient vector (NaN for dropped columns if rank-deficient)
     W : (n,) final fitted means mu_hat (weights for sandwich vcov)
     """
+    n, k_orig = X.shape
+
+    # Rank-deficiency detection (same pattern as solve_logit/solve_ols)
+    kept_cols = np.arange(k_orig)
+    rank, dropped_cols, _pivot = _detect_rank_deficiency(X)
+    if len(dropped_cols) > 0:
+        if rank_deficient_action == "error":
+            raise ValueError(
+                f"Rank-deficient design matrix: {len(dropped_cols)} collinear columns detected."
+            )
+        if rank_deficient_action == "warn":
+            warnings.warn(
+                f"Rank-deficient design matrix: dropping {len(dropped_cols)} of {k_orig} columns. "
+                f"Coefficients for these columns are set to NA.",
+                UserWarning,
+                stacklevel=2,
+            )
+        dropped_set = set(int(d) for d in dropped_cols)
+        kept_cols = np.array([i for i in range(k_orig) if i not in dropped_set])
+        X = X[:, kept_cols]
+
     n, k = X.shape
     if init_beta is not None:
-        beta = init_beta.copy()
+        beta = init_beta[kept_cols].copy() if len(dropped_cols) > 0 else init_beta.copy()
     else:
         beta = np.zeros(k)
         # Initialise the intercept to log(mean(y)) so the first IRLS step
@@ -2385,11 +2430,17 @@ def solve_poisson(
     for _ in range(max_iter):
         eta = np.clip(X @ beta, -500, 500)
         mu = np.exp(eta)
-        score = X.T @ (y - mu)                   # gradient of log-likelihood
-        hess = X.T @ (mu[:, None] * X)           # -Hessian = X'WX, W=diag(mu)
+        score = X.T @ (y - mu)  # gradient of log-likelihood
+        hess = X.T @ (mu[:, None] * X)  # -Hessian = X'WX, W=diag(mu)
         try:
             delta = np.linalg.solve(hess + 1e-12 * np.eye(k), score)
         except np.linalg.LinAlgError:
+            warnings.warn(
+                "solve_poisson: Hessian is singular at iteration. "
+                "Design matrix may be rank-deficient.",
+                RuntimeWarning,
+                stacklevel=2,
+            )
             break
         # Damped step: cap the maximum coefficient change to avoid overshooting
         max_step = np.max(np.abs(delta))
@@ -2407,4 +2458,11 @@ def solve_poisson(
             stacklevel=2,
         )
     mu_final = np.exp(np.clip(X @ beta, -500, 500))
+
+    # Expand back to full size if columns were dropped
+    if len(dropped_cols) > 0:
+        beta_full = np.full(k_orig, np.nan)
+        beta_full[kept_cols] = beta
+        beta = beta_full
+
     return beta, mu_final
@@ -77,26 +77,21 @@ def _filter_sample(
 ) -> pd.DataFrame:
     """Return the analysis sample following jwdid selection rules.
 
-    For "not_yet_treated": keep all observations from treated units (pre- and
-    post-treatment) plus all never-treated and not-yet-treated observations.
-    For "never_treated": keep only post-treatment observations from treated
-    units (t >= g - anticipation) plus all never-treated observations.
-    Pre-treatment observations from treated units are excluded so they do not
-    serve as implicit controls in the regression baseline.
+    All treated units keep ALL observations (pre- and post-treatment) for
+    proper FE estimation. The control_group setting affects which additional
+    control observations are included, AND the interaction matrix structure
+    (see _build_interaction_matrix).
     """
     df = data.copy()
     # Normalise never-treated: fill NaN cohort with 0
     df[cohort] = df[cohort].fillna(0)
 
+    treated_mask = df[cohort] > 0
+
     if control_group == "never_treated":
-        # Post-treatment obs from treated units + all never-treated obs.
-        # Pre-treatment obs from treated units are excluded so the
-        # counterfactual is identified solely from never-treated units.
-        treated_mask = (df[cohort] > 0) & (df[time] >= df[cohort] - anticipation)
         control_mask = df[cohort] == 0
     else:  # not_yet_treated
-        # All treated-unit obs + never-treated + not-yet-treated obs
-        treated_mask = df[cohort] > 0
+        # Keep untreated-at-t observations for not-yet-treated units
         control_mask = (df[cohort] == 0) | (df[cohort] > df[time])
 
     return df[treated_mask | control_mask].copy()
@@ -107,9 +102,20 @@ def _build_interaction_matrix(
     cohort: str,
     time: str,
     anticipation: int,
+    control_group: str = "not_yet_treated",
 ) -> Tuple[np.ndarray, List[str], List[Tuple[Any, Any]]]:
     """Build the saturated cohort×time interaction design matrix.
 
+    For ``not_yet_treated``: only post-treatment cells (t >= g - anticipation).
+    Pre-treatment obs from treated units sit in the regression baseline alongside
+    not-yet-treated controls.
+
+    For ``never_treated``: ALL (g, t) pairs for each treated cohort. This
+    "absorbs" pre-treatment obs from treated units into their own indicators so
+    they do not serve as implicit controls in the baseline. Only never-treated
+    observations remain in the omitted category. Pre-treatment coefficients
+    (t < g) serve as placebo/pre-trend tests.
+
     Returns
     -------
     X_int : (n, n_cells) binary indicator matrix
@@ -127,7 +133,7 @@ def _build_interaction_matrix(
 
     for g in groups:
         for t in times:
-            if t >= g - anticipation:
+            if control_group == "never_treated" or t >= g - anticipation:
                 indicator = ((cohort_vals == g) & (time_vals == t)).astype(float)
                 cols.append(indicator)
                 col_names.append(f"g{g}_t{t}")
@@ -315,16 +321,46 @@ def fit(
         df = data.copy()
         df[cohort] = df[cohort].fillna(0)
 
+        # 0. Reject bootstrap for nonlinear methods (not implemented)
+        if self.n_bootstrap > 0 and self.method != "ols":
+            raise ValueError(
+                f"Bootstrap inference is only supported for method='ols'. "
+                f"Got method={self.method!r} with n_bootstrap={self.n_bootstrap}. "
+                f"Set n_bootstrap=0 for analytic SEs."
+            )
+
         # 1. Filter to analysis sample
         sample = _filter_sample(df, unit, time, cohort, self.control_group, self.anticipation)
 
+        # 1b. Identification checks
+        groups = sorted(g for g in sample[cohort].unique() if g > 0)
+        if len(groups) == 0:
+            raise ValueError(
+                "No treated cohorts found in data. Ensure the cohort column "
+                "contains values > 0 for treated units."
+            )
+        if self.control_group == "never_treated" and not (sample[cohort] == 0).any():
+            raise ValueError(
+                "control_group='never_treated' but no never-treated units "
+                "(cohort == 0) found. Use 'not_yet_treated' or add "
+                "never-treated units."
+            )
+
         # 2. Build interaction matrix
         X_int, int_col_names, gt_keys = _build_interaction_matrix(
-            sample, cohort=cohort, time=time, anticipation=self.anticipation
+            sample,
+            cohort=cohort,
+            time=time,
+            anticipation=self.anticipation,
+            control_group=self.control_group,
         )
+        if X_int.shape[1] == 0:
+            raise ValueError(
+                "No valid treatment cells found. Check that treated units "
+                "have post-treatment observations in the data."
+            )
 
         # 3. Covariates
-        groups = sorted(g for g in sample[cohort].unique() if g > 0)
         X_cov = _prepare_covariates(
             sample,
             exovar=exovar,
@@ -444,6 +480,9 @@ def _fit_ols(
         for idx, (g, t) in enumerate(gt_keys):
             if idx >= len(coefs):
                 break
+            # Skip cells whose coefficient was dropped (rank deficiency)
+            if np.isnan(coefs[idx]):
+                continue
             att = float(coefs[idx])
             se = float(np.sqrt(max(vcov[idx, idx], 0.0))) if vcov is not None else float("nan")
             t_stat, p_value, conf_int = safe_inference(att, se, alpha=self.alpha)
@@ -456,10 +495,14 @@ def _fit_ols(
             }
             gt_weights[(g, t)] = int(((sample[cohort] == g) & (sample[time] == t)).sum())
 
-        # Extract vcov submatrix for beta_{g,t} only
-        n_gt = len(gt_keys)
-        gt_vcov = vcov[:n_gt, :n_gt] if vcov is not None else None
-        gt_keys_ordered = list(gt_keys)
+        # Extract vcov submatrix for identified β_{g,t} only (skip NaN/dropped)
+        gt_keys_ordered = list(gt_effects.keys())
+        if vcov is not None and gt_keys_ordered:
+            # Map from gt_keys_ordered to original indices in the coef vector
+            orig_indices = [i for i, k in enumerate(gt_keys) if k in gt_effects]
+            gt_vcov = vcov[np.ix_(orig_indices, orig_indices)]
+        else:
+            gt_vcov = None
 
         # 8. Simple aggregation (always computed)
         overall = _compute_weighted_agg(
@@ -721,7 +764,13 @@ def _fit_poisson(
         cluster_col = self.cluster if self.cluster else unit
         cluster_ids = sample[cluster_col].values
 
-        beta, mu_hat = solve_poisson(X_full, y)
+        beta, mu_hat = solve_poisson(X_full, y, rank_deficient_action=self.rank_deficient_action)
+
+        # Handle rank-deficient designs: zero out NaN entries so downstream
+        # matrix ops don't propagate NaN (dropped columns contribute nothing)
+        nan_mask = np.isnan(beta)
+        if np.any(nan_mask):
+            beta = np.where(nan_mask, 0.0, beta)
 
         # QMLE sandwich vcov via shared linalg backend
         resids = y - mu_hat