Skip to content

Commit bf38119

Browse files
igerberclaude
andcommitted
Use pivoted QR for replicate df and NaN inference for rank<=1 designs
Replace unpivoted QR with scipy pivoted QR (LAPACK dgeqp3) matching R's qr(..., tol=1e-5) column-pivoting semantics for rank computation. When a replicate design has undefined df (rank <= 1), force NaN inference instead of falling through to z-distribution or model-based df. Applies to LinearRegression.get_inference() and TripleDifference. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4ba002f commit bf38119

3 files changed

Lines changed: 22 additions & 4 deletions

File tree

diff_diff/linalg.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2029,6 +2029,16 @@ def get_inference(
20292029
effective_df = df
20302030
elif self.survey_df_ is not None:
20312031
effective_df = self.survey_df_
2032+
elif (hasattr(self, '_survey_design') and self._survey_design is not None
2033+
and hasattr(self._survey_design, 'uses_replicate_variance')
2034+
and self._survey_design.uses_replicate_variance):
2035+
# Replicate design with undefined df (rank <= 1) — NaN inference
2036+
warnings.warn(
2037+
"Replicate design has undefined survey d.f. (rank <= 1). "
2038+
"Inference fields will be NaN.",
2039+
UserWarning, stacklevel=2,
2040+
)
2041+
effective_df = 0 # Forces NaN from t-distribution
20322042
else:
20332043
effective_df = self.df_
20342044

diff_diff/survey.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -590,10 +590,13 @@ def df_survey(self) -> Optional[int]:
590590
analysis_weights = self.replicate_weights
591591
else:
592592
analysis_weights = self.replicate_weights * self.weights[:, np.newaxis]
593-
# Use QR decomposition with R-compatible tolerance (1e-5)
594-
Q, R_mat = np.linalg.qr(analysis_weights, mode='reduced')
593+
# Pivoted QR with R-compatible tolerance, matching R's
594+
# qr(..., tol=1e-5) which uses column pivoting (LAPACK dgeqp3)
595+
from scipy.linalg import qr as scipy_qr
596+
_, R_mat, _ = scipy_qr(analysis_weights, pivoting=True, mode='economic')
597+
diag_abs = np.abs(np.diag(R_mat))
595598
tol = 1e-5
596-
rank = int(np.sum(np.abs(np.diag(R_mat)) > tol * np.abs(np.diag(R_mat)).max()))
599+
rank = int(np.sum(diag_abs > tol * diag_abs.max())) if diag_abs.max() > 0 else 0
597600
df = rank - 1
598601
return df if df > 0 else None
599602
if self.psu is not None and self.n_psu > 0:

diff_diff/triple_diff.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,12 @@ def fit(
590590
survey_metadata.df_survey = self._replicate_n_valid - 1
591591
# df <= 0 means insufficient rank for t-based inference
592592
if df is not None and df <= 0:
593-
df = None
593+
df = 0 # Forces NaN from t-distribution
594+
elif (resolved_survey is not None
595+
and hasattr(resolved_survey, 'uses_replicate_variance')
596+
and resolved_survey.uses_replicate_variance):
597+
# Replicate design with undefined df (rank <= 1) — NaN inference
598+
df = 0 # Forces NaN from t-distribution
594599
else:
595600
df = n_obs - 8 # Approximate df (8 cell means)
596601
if covariates:

0 commit comments

Comments
 (0)