Skip to content

Commit df5d62a

Browse files
igerberclaude
andcommitted
Propagate rank_deficient_action to all estimators (P2)
Complete parameter propagation for rank_deficient_action across all estimators that use OLS internally: - TwoWayFixedEffects: Pass to LinearRegression, conditional warning suppression based on setting - CallawaySantAnna: Add parameter to __init__, pass to _linear_regression helper in _outcome_regression and _doubly_robust methods - SunAbraham: Add parameter to __init__, pass to LinearRegression in _fit_saturated_regression - TripleDifference: Add parameter to __init__, pass to solve_ols and LinearRegression in regression methods, also update convenience function All estimators now support: - "warn" (default): Issue warning and drop linearly dependent columns - "error": Raise ValueError on rank deficiency - "silent": Drop columns silently without warning Tests added for each estimator verifying error/silent behavior. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent f35e57c commit df5d62a

8 files changed

Lines changed: 372 additions & 17 deletions

File tree

diff_diff/staggered.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ def gradient(beta: np.ndarray) -> np.ndarray:
109109
def _linear_regression(
110110
X: np.ndarray,
111111
y: np.ndarray,
112+
rank_deficient_action: str = "warn",
112113
) -> Tuple[np.ndarray, np.ndarray]:
113114
"""
114115
Fit OLS regression.
@@ -119,6 +120,11 @@ def _linear_regression(
119120
Feature matrix (n_samples, n_features). Intercept added automatically.
120121
y : np.ndarray
121122
Outcome variable.
123+
rank_deficient_action : str, default "warn"
124+
Action when design matrix is rank-deficient:
125+
- "warn": Issue warning and drop linearly dependent columns (default)
126+
- "error": Raise ValueError
127+
- "silent": Drop columns silently without warning
122128
123129
Returns
124130
-------
@@ -132,7 +138,10 @@ def _linear_regression(
132138
X_with_intercept = np.column_stack([np.ones(n), X])
133139

134140
# Use unified OLS backend (no vcov needed)
135-
beta, residuals, _ = solve_ols(X_with_intercept, y, return_vcov=False)
141+
beta, residuals, _ = solve_ols(
142+
X_with_intercept, y, return_vcov=False,
143+
rank_deficient_action=rank_deficient_action,
144+
)
136145

137146
return beta, residuals
138147

@@ -195,6 +204,11 @@ class CallawaySantAnna(
195204
Use ``bootstrap_weights`` instead. Will be removed in v2.0.
196205
seed : int, optional
197206
Random seed for reproducibility.
207+
rank_deficient_action : str, default="warn"
208+
Action when design matrix is rank-deficient (linearly dependent columns):
209+
- "warn": Issue warning and drop linearly dependent columns (default)
210+
- "error": Raise ValueError
211+
- "silent": Drop columns silently without warning
198212
199213
Attributes
200214
----------
@@ -277,6 +291,7 @@ def __init__(
277291
bootstrap_weights: Optional[str] = None,
278292
bootstrap_weight_type: Optional[str] = None,
279293
seed: Optional[int] = None,
294+
rank_deficient_action: str = "warn",
280295
):
281296
import warnings
282297

@@ -312,6 +327,12 @@ def __init__(
312327
f"got '{bootstrap_weights}'"
313328
)
314329

330+
if rank_deficient_action not in ["warn", "error", "silent"]:
331+
raise ValueError(
332+
f"rank_deficient_action must be 'warn', 'error', or 'silent', "
333+
f"got '{rank_deficient_action}'"
334+
)
335+
315336
self.control_group = control_group
316337
self.anticipation = anticipation
317338
self.estimation_method = estimation_method
@@ -322,6 +343,7 @@ def __init__(
322343
# Keep bootstrap_weight_type for backward compatibility
323344
self.bootstrap_weight_type = bootstrap_weights
324345
self.seed = seed
346+
self.rank_deficient_action = rank_deficient_action
325347

326348
self.is_fitted_ = False
327349
self.results_: Optional[CallawaySantAnnaResults] = None
@@ -778,7 +800,10 @@ def _outcome_regression(
778800
if X_treated is not None and X_control is not None and X_treated.shape[1] > 0:
779801
# Covariate-adjusted outcome regression
780802
# Fit regression on control units: E[Delta Y | X, D=0]
781-
beta, residuals = _linear_regression(X_control, control_change)
803+
beta, residuals = _linear_regression(
804+
X_control, control_change,
805+
rank_deficient_action=self.rank_deficient_action,
806+
)
782807

783808
# Predict counterfactual for treated units
784809
X_treated_with_intercept = np.column_stack([np.ones(n_t), X_treated])
@@ -938,7 +963,10 @@ def _doubly_robust(
938963
if X_treated is not None and X_control is not None and X_treated.shape[1] > 0:
939964
# Doubly robust estimation with covariates
940965
# Step 1: Outcome regression - fit E[Delta Y | X] on control
941-
beta, _ = _linear_regression(X_control, control_change)
966+
beta, _ = _linear_regression(
967+
X_control, control_change,
968+
rank_deficient_action=self.rank_deficient_action,
969+
)
942970

943971
# Predict counterfactual for both treated and control
944972
X_treated_with_intercept = np.column_stack([np.ones(n_t), X_treated])
@@ -1014,6 +1042,7 @@ def get_params(self) -> Dict[str, Any]:
10141042
# Deprecated but kept for backward compatibility
10151043
"bootstrap_weight_type": self.bootstrap_weight_type,
10161044
"seed": self.seed,
1045+
"rank_deficient_action": self.rank_deficient_action,
10171046
}
10181047

10191048
def set_params(self, **params) -> "CallawaySantAnna":

diff_diff/sun_abraham.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,11 @@ class SunAbraham:
319319
If 0, uses analytical cluster-robust standard errors.
320320
seed : int, optional
321321
Random seed for reproducibility.
322+
rank_deficient_action : str, default="warn"
323+
Action when design matrix is rank-deficient (linearly dependent columns):
324+
- "warn": Issue warning and drop linearly dependent columns (default)
325+
- "error": Raise ValueError
326+
- "silent": Drop columns silently without warning
322327
323328
Attributes
324329
----------
@@ -395,19 +400,27 @@ def __init__(
395400
cluster: Optional[str] = None,
396401
n_bootstrap: int = 0,
397402
seed: Optional[int] = None,
403+
rank_deficient_action: str = "warn",
398404
):
399405
if control_group not in ["never_treated", "not_yet_treated"]:
400406
raise ValueError(
401407
f"control_group must be 'never_treated' or 'not_yet_treated', "
402408
f"got '{control_group}'"
403409
)
404410

411+
if rank_deficient_action not in ["warn", "error", "silent"]:
412+
raise ValueError(
413+
f"rank_deficient_action must be 'warn', 'error', or 'silent', "
414+
f"got '{rank_deficient_action}'"
415+
)
416+
405417
self.control_group = control_group
406418
self.anticipation = anticipation
407419
self.alpha = alpha
408420
self.cluster = cluster
409421
self.n_bootstrap = n_bootstrap
410422
self.seed = seed
423+
self.rank_deficient_action = rank_deficient_action
411424

412425
self.is_fitted_ = False
413426
self.results_: Optional[SunAbrahamResults] = None
@@ -756,6 +769,7 @@ def _fit_saturated_regression(
756769
include_intercept=False, # Already demeaned, no intercept needed
757770
robust=True,
758771
cluster_ids=cluster_ids,
772+
rank_deficient_action=self.rank_deficient_action,
759773
).fit(X, y)
760774

761775
coefficients = reg.coefficients_
@@ -1153,6 +1167,7 @@ def get_params(self) -> Dict[str, Any]:
11531167
"cluster": self.cluster,
11541168
"n_bootstrap": self.n_bootstrap,
11551169
"seed": self.seed,
1170+
"rank_deficient_action": self.rank_deficient_action,
11561171
}
11571172

11581173
def set_params(self, **params) -> "SunAbraham":

diff_diff/triple_diff.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ def gradient(beta: np.ndarray) -> np.ndarray:
330330
def _linear_regression(
331331
X: np.ndarray,
332332
y: np.ndarray,
333+
rank_deficient_action: str = "warn",
333334
) -> Tuple[np.ndarray, np.ndarray, float]:
334335
"""
335336
Fit OLS regression.
@@ -340,6 +341,11 @@ def _linear_regression(
340341
Feature matrix (n_samples, n_features). Intercept added automatically.
341342
y : np.ndarray
342343
Outcome variable.
344+
rank_deficient_action : str, default "warn"
345+
Action when design matrix is rank-deficient:
346+
- "warn": Issue warning and drop linearly dependent columns (default)
347+
- "error": Raise ValueError
348+
- "silent": Drop columns silently without warning
343349
344350
Returns
345351
-------
@@ -355,7 +361,8 @@ def _linear_regression(
355361

356362
# Use unified OLS backend
357363
beta, residuals, fitted, _ = solve_ols(
358-
X_with_intercept, y, return_fitted=True, return_vcov=False
364+
X_with_intercept, y, return_fitted=True, return_vcov=False,
365+
rank_deficient_action=rank_deficient_action,
359366
)
360367

361368
# Compute R-squared
@@ -400,6 +407,11 @@ class TripleDifference:
400407
pscore_trim : float, default=0.01
401408
Trimming threshold for propensity scores. Scores below this value
402409
or above (1 - pscore_trim) are clipped to avoid extreme weights.
410+
rank_deficient_action : str, default="warn"
411+
Action when design matrix is rank-deficient (linearly dependent columns):
412+
- "warn": Issue warning and drop linearly dependent columns (default)
413+
- "error": Raise ValueError
414+
- "silent": Drop columns silently without warning
403415
404416
Attributes
405417
----------
@@ -478,17 +490,24 @@ def __init__(
478490
cluster: Optional[str] = None,
479491
alpha: float = 0.05,
480492
pscore_trim: float = 0.01,
493+
rank_deficient_action: str = "warn",
481494
):
482495
if estimation_method not in ("dr", "reg", "ipw"):
483496
raise ValueError(
484497
f"estimation_method must be 'dr', 'reg', or 'ipw', "
485498
f"got '{estimation_method}'"
486499
)
500+
if rank_deficient_action not in ["warn", "error", "silent"]:
501+
raise ValueError(
502+
f"rank_deficient_action must be 'warn', 'error', or 'silent', "
503+
f"got '{rank_deficient_action}'"
504+
)
487505
self.estimation_method = estimation_method
488506
self.robust = robust
489507
self.cluster = cluster
490508
self.alpha = alpha
491509
self.pscore_trim = pscore_trim
510+
self.rank_deficient_action = rank_deficient_action
492511

493512
self.is_fitted_ = False
494513
self.results_: Optional[TripleDifferenceResults] = None
@@ -744,6 +763,7 @@ def _regression_adjustment(
744763
include_intercept=False, # Intercept already in design_matrix
745764
robust=self.robust,
746765
alpha=self.alpha,
766+
rank_deficient_action=self.rank_deficient_action,
747767
).fit(design_matrix, y)
748768

749769
# ATT is the coefficient on G*P*T (index 7)
@@ -937,7 +957,10 @@ def _doubly_robust(
937957
if np.sum(mask) > 1:
938958
X_cell = np.column_stack([X[mask], T[mask]])
939959
try:
940-
_, fitted, _ = _linear_regression(X_cell, y[mask])
960+
_, fitted, _ = _linear_regression(
961+
X_cell, y[mask],
962+
rank_deficient_action=self.rank_deficient_action,
963+
)
941964
mu_fitted[mask] = fitted
942965
except Exception:
943966
mu_fitted[mask] = np.mean(y[mask])
@@ -1166,6 +1189,7 @@ def get_params(self) -> Dict[str, Any]:
11661189
"cluster": self.cluster,
11671190
"alpha": self.alpha,
11681191
"pscore_trim": self.pscore_trim,
1192+
"rank_deficient_action": self.rank_deficient_action,
11691193
}
11701194

11711195
def set_params(self, **params) -> "TripleDifference":
@@ -1223,6 +1247,7 @@ def triple_difference(
12231247
robust: bool = True,
12241248
cluster: Optional[str] = None,
12251249
alpha: float = 0.05,
1250+
rank_deficient_action: str = "warn",
12261251
) -> TripleDifferenceResults:
12271252
"""
12281253
Estimate Triple Difference (DDD) treatment effect.
@@ -1256,6 +1281,11 @@ def triple_difference(
12561281
Column name for cluster-robust standard errors.
12571282
alpha : float, default=0.05
12581283
Significance level for confidence intervals.
1284+
rank_deficient_action : str, default="warn"
1285+
Action when design matrix is rank-deficient:
1286+
- "warn": Issue warning and drop linearly dependent columns (default)
1287+
- "error": Raise ValueError
1288+
- "silent": Drop columns silently without warning
12591289
12601290
Returns
12611291
-------
@@ -1280,6 +1310,7 @@ def triple_difference(
12801310
robust=robust,
12811311
cluster=cluster,
12821312
alpha=alpha,
1313+
rank_deficient_action=rank_deficient_action,
12831314
)
12841315
return estimator.fit(
12851316
data=data,

diff_diff/twfe.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -128,16 +128,29 @@ def fit( # type: ignore[override]
128128
# For wild bootstrap, we don't need cluster SEs from the initial fit
129129
cluster_ids = data[cluster_var].values
130130

131-
# Suppress rank-deficiency warning from solve_ols - TWFE handles its own messaging
132-
# with more context-specific error/warning messages
133-
with warnings.catch_warnings():
134-
warnings.filterwarnings("ignore", message="Rank-deficient design matrix")
131+
# Pass rank_deficient_action to LinearRegression
132+
# If "error", let LinearRegression raise immediately
133+
# If "warn" or "silent", suppress generic warning and use TWFE's context-specific
134+
# error/warning messages (more informative for panel data)
135+
if self.rank_deficient_action == "error":
135136
reg = LinearRegression(
136-
include_intercept=False, # Intercept already in X
137-
robust=True, # TWFE always uses robust/cluster SEs
137+
include_intercept=False,
138+
robust=True,
138139
cluster_ids=cluster_ids if self.inference != "wild_bootstrap" else None,
139140
alpha=self.alpha,
141+
rank_deficient_action="error",
140142
).fit(X, y, df_adjustment=df_adjustment)
143+
else:
144+
# Suppress generic warning, TWFE provides context-specific messages below
145+
with warnings.catch_warnings():
146+
warnings.filterwarnings("ignore", message="Rank-deficient design matrix")
147+
reg = LinearRegression(
148+
include_intercept=False,
149+
robust=True,
150+
cluster_ids=cluster_ids if self.inference != "wild_bootstrap" else None,
151+
alpha=self.alpha,
152+
rank_deficient_action="silent",
153+
).fit(X, y, df_adjustment=df_adjustment)
141154

142155
coefficients = reg.coefficients_
143156
residuals = reg.residuals_
@@ -171,12 +184,14 @@ def fit( # type: ignore[override]
171184
else:
172185
# Only covariates are dropped - this is a warning, not an error
173186
# The ATT can still be estimated
174-
warnings.warn(
175-
f"Some covariates are collinear and were dropped: "
176-
f"{', '.join(dropped_names)}. The treatment effect is still identified.",
177-
UserWarning,
178-
stacklevel=2,
179-
)
187+
# Respect rank_deficient_action setting for warning
188+
if self.rank_deficient_action == "warn":
189+
warnings.warn(
190+
f"Some covariates are collinear and were dropped: "
191+
f"{', '.join(dropped_names)}. The treatment effect is still identified.",
192+
UserWarning,
193+
stacklevel=2,
194+
)
180195

181196
# Get inference - either from bootstrap or analytical
182197
if self.inference == "wild_bootstrap":

tests/test_estimators.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,58 @@ def test_twfe_treatment_collinearity_raises_error(self):
10581058
# If treatment column is dropped, should get informative error
10591059
assert "collinear" in str(e).lower() or "Treatment effect cannot be identified" in str(e)
10601060

1061+
def test_rank_deficient_action_error_raises(self, twfe_panel_data):
1062+
"""Test that rank_deficient_action='error' raises ValueError on collinear data."""
1063+
from diff_diff.estimators import TwoWayFixedEffects
1064+
1065+
# Add a covariate that is perfectly collinear with post
1066+
twfe_panel_data = twfe_panel_data.copy()
1067+
twfe_panel_data["collinear_cov"] = twfe_panel_data["post"].copy()
1068+
1069+
twfe = TwoWayFixedEffects(rank_deficient_action="error")
1070+
with pytest.raises(ValueError, match="rank-deficient"):
1071+
twfe.fit(
1072+
twfe_panel_data,
1073+
outcome="outcome",
1074+
treatment="treated",
1075+
time="post",
1076+
unit="unit",
1077+
covariates=["collinear_cov"]
1078+
)
1079+
1080+
def test_rank_deficient_action_silent_no_warning(self, twfe_panel_data):
1081+
"""Test that rank_deficient_action='silent' produces no warning."""
1082+
import warnings
1083+
from diff_diff.estimators import TwoWayFixedEffects
1084+
1085+
# Add a covariate that is perfectly collinear with another
1086+
twfe_panel_data = twfe_panel_data.copy()
1087+
twfe_panel_data["size"] = np.random.normal(100, 10, len(twfe_panel_data))
1088+
twfe_panel_data["size_dup"] = twfe_panel_data["size"].copy() # Perfect collinearity
1089+
1090+
twfe = TwoWayFixedEffects(rank_deficient_action="silent")
1091+
1092+
with warnings.catch_warnings(record=True) as w:
1093+
warnings.simplefilter("always")
1094+
results = twfe.fit(
1095+
twfe_panel_data,
1096+
outcome="outcome",
1097+
treatment="treated",
1098+
time="post",
1099+
unit="unit",
1100+
covariates=["size", "size_dup"]
1101+
)
1102+
1103+
# No warnings about rank deficiency or collinearity should be emitted
1104+
rank_warnings = [x for x in w if "Rank-deficient" in str(x.message)
1105+
or "rank-deficient" in str(x.message).lower()
1106+
or "collinear" in str(x.message).lower()]
1107+
assert len(rank_warnings) == 0, f"Expected no rank warnings, got {rank_warnings}"
1108+
1109+
# Should still get valid results
1110+
assert results is not None
1111+
assert twfe.is_fitted_
1112+
10611113

10621114
class TestClusterRobustSE:
10631115
"""Tests for cluster-robust standard errors."""

0 commit comments

Comments
 (0)