Skip to content

Commit 9e576c0

Browse files
committed
Revert untested Rust backend changes
The Rust backend optimizations (Cholesky factorization, reduced allocations) could not be tested in CI due to missing OpenBLAS library. Reverting these changes to keep v2.0.1 focused on tested Python improvements only. The Rust optimizations remain in TODO.md for future implementation when proper testing infrastructure is available.
1 parent aacc54a commit 9e576c0

4 files changed

Lines changed: 62 additions & 77 deletions

File tree

TODO.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ Enhancements for `honest_did.py`:
9292

9393
Deferred from PR #58 code review (can be done post-merge):
9494

95-
- [x] **Matrix inversion efficiency** (`rust/src/linalg.rs`): ~~Use Cholesky factorization for symmetric positive-definite matrices instead of column-by-column solve~~ (completed in v2.0.1)
96-
- [x] **Reduce bootstrap allocations** (`rust/src/bootstrap.rs`): ~~Currently uses `Vec<Vec<f64>>` → flatten → `Array2` which allocates twice.~~ Now allocates directly into pre-allocated buffer. (completed in v2.0.1)
95+
- [ ] **Matrix inversion efficiency** (`rust/src/linalg.rs:180-194`): Use Cholesky factorization for symmetric positive-definite matrices instead of column-by-column solve
96+
- [ ] **Reduce bootstrap allocations** (`rust/src/bootstrap.rs`): Currently uses `Vec<Vec<f64>>` → flatten → `Array2` which allocates twice. Should allocate directly into ndarray.
9797
- [ ] **Consider static BLAS linking** (`rust/Cargo.toml`): Currently requires system BLAS libraries. Consider `openblas-static` or `intel-mkl-static` features for easier distribution.
9898

9999
---

rust/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "diff_diff_rust"
3-
version = "2.0.1"
3+
version = "2.0.0"
44
edition = "2021"
55
description = "Rust backend for diff-diff DiD library"
66
license = "MIT"

rust/src/bootstrap.rs

Lines changed: 53 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -51,20 +51,19 @@ pub fn generate_bootstrap_weights_batch<'py>(
5151
///
5252
/// E[w] = 0, Var[w] = 1
5353
fn generate_rademacher_batch(n_bootstrap: usize, n_units: usize, seed: u64) -> Array2<f64> {
54-
// Pre-allocate flat array directly (single allocation instead of Vec<Vec<f64>> + flatten)
55-
let total_size = n_bootstrap * n_units;
56-
let mut flat = vec![0.0_f64; total_size];
57-
58-
// Generate weights in parallel, writing directly to pre-allocated buffer
59-
flat.par_chunks_mut(n_units)
60-
.enumerate()
61-
.for_each(|(i, row)| {
54+
// Generate weights in parallel using rayon
55+
let rows: Vec<Vec<f64>> = (0..n_bootstrap)
56+
.into_par_iter()
57+
.map(|i| {
6258
let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed.wrapping_add(i as u64));
63-
for val in row.iter_mut() {
64-
*val = if rng.gen::<bool>() { 1.0 } else { -1.0 };
65-
}
66-
});
67-
59+
(0..n_units)
60+
.map(|_| if rng.gen::<bool>() { 1.0 } else { -1.0 })
61+
.collect()
62+
})
63+
.collect();
64+
65+
// Convert to ndarray
66+
let flat: Vec<f64> = rows.into_iter().flatten().collect();
6867
Array2::from_shape_vec((n_bootstrap, n_units), flat).unwrap()
6968
}
7069

@@ -84,24 +83,23 @@ fn generate_mammen_batch(n_bootstrap: usize, n_units: usize, seed: u64) -> Array
8483
// Probability of negative value
8584
let prob_neg = (sqrt5 + 1.0) / (2.0 * sqrt5); // ≈ 0.724
8685

87-
// Pre-allocate flat array directly (single allocation)
88-
let total_size = n_bootstrap * n_units;
89-
let mut flat = vec![0.0_f64; total_size];
90-
91-
// Generate weights in parallel, writing directly to pre-allocated buffer
92-
flat.par_chunks_mut(n_units)
93-
.enumerate()
94-
.for_each(|(i, row)| {
86+
let rows: Vec<Vec<f64>> = (0..n_bootstrap)
87+
.into_par_iter()
88+
.map(|i| {
9589
let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed.wrapping_add(i as u64));
96-
for val in row.iter_mut() {
97-
*val = if rng.gen::<f64>() < prob_neg {
98-
val_neg
99-
} else {
100-
val_pos
101-
};
102-
}
103-
});
104-
90+
(0..n_units)
91+
.map(|_| {
92+
if rng.gen::<f64>() < prob_neg {
93+
val_neg
94+
} else {
95+
val_pos
96+
}
97+
})
98+
.collect()
99+
})
100+
.collect();
101+
102+
let flat: Vec<f64> = rows.into_iter().flatten().collect();
105103
Array2::from_shape_vec((n_bootstrap, n_units), flat).unwrap()
106104
}
107105

@@ -120,33 +118,32 @@ fn generate_webb_batch(n_bootstrap: usize, n_units: usize, seed: u64) -> Array2<
120118
// Equal probability for each of 6 values: 1/6 each
121119
let prob = 1.0 / 6.0;
122120

123-
// Pre-allocate flat array directly (single allocation)
124-
let total_size = n_bootstrap * n_units;
125-
let mut flat = vec![0.0_f64; total_size];
126-
127-
// Generate weights in parallel, writing directly to pre-allocated buffer
128-
flat.par_chunks_mut(n_units)
129-
.enumerate()
130-
.for_each(|(i, row)| {
121+
let rows: Vec<Vec<f64>> = (0..n_bootstrap)
122+
.into_par_iter()
123+
.map(|i| {
131124
let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed.wrapping_add(i as u64));
132-
for val in row.iter_mut() {
133-
let u = rng.gen::<f64>();
134-
*val = if u < prob {
135-
-val1
136-
} else if u < 2.0 * prob {
137-
-val2
138-
} else if u < 3.0 * prob {
139-
-val3
140-
} else if u < 4.0 * prob {
141-
val3
142-
} else if u < 5.0 * prob {
143-
val2
144-
} else {
145-
val1
146-
};
147-
}
148-
});
149-
125+
(0..n_units)
126+
.map(|_| {
127+
let u = rng.gen::<f64>();
128+
if u < prob {
129+
-val1
130+
} else if u < 2.0 * prob {
131+
-val2
132+
} else if u < 3.0 * prob {
133+
-val3
134+
} else if u < 4.0 * prob {
135+
val3
136+
} else if u < 5.0 * prob {
137+
val2
138+
} else {
139+
val1
140+
}
141+
})
142+
.collect()
143+
})
144+
.collect();
145+
146+
let flat: Vec<f64> = rows.into_iter().flatten().collect();
150147
Array2::from_shape_vec((n_bootstrap, n_units), flat).unwrap()
151148
}
152149

rust/src/linalg.rs

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//! - Cluster-robust variance-covariance estimation
77
88
use ndarray::{Array1, Array2, ArrayView1, ArrayView2};
9-
use ndarray_linalg::{Cholesky, LeastSquaresSvd, Solve, UPLO};
9+
use ndarray_linalg::{LeastSquaresSvd, Solve};
1010
use numpy::{IntoPyArray, PyArray1, PyArray2, PyReadonlyArray1, PyReadonlyArray2};
1111
use pyo3::prelude::*;
1212
use std::collections::HashMap;
@@ -190,30 +190,18 @@ fn compute_robust_vcov_internal(
190190
}
191191
}
192192

193-
/// Invert a symmetric positive-definite matrix using Cholesky factorization.
194-
///
195-
/// For symmetric positive-definite matrices like X'X, Cholesky factorization
196-
/// (A = L L') is more efficient than general LU decomposition, requiring
197-
/// approximately half the operations.
193+
/// Invert a symmetric positive-definite matrix.
198194
fn invert_symmetric(a: &Array2<f64>) -> PyResult<Array2<f64>> {
199195
let n = a.nrows();
200-
201-
// Compute Cholesky factorization: A = L L' where L is lower triangular
202-
let factorized = a.cholesky(UPLO::Lower)
203-
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(
204-
format!("Cholesky factorization failed (matrix may not be positive-definite): {}", e)
205-
))?;
206-
207-
// Solve A * A^{-1} = I by solving for each column of the identity
208196
let mut result = Array2::<f64>::zeros((n, n));
197+
198+
// Solve A * x_i = e_i for each column of the identity matrix
209199
for i in 0..n {
210200
let mut e_i = Array1::<f64>::zeros(n);
211201
e_i[i] = 1.0;
212202

213-
let col = factorized.solve(&e_i)
214-
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(
215-
format!("Matrix inversion failed: {}", e)
216-
))?;
203+
let col = a.solve(&e_i)
204+
.map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Matrix inversion failed: {}", e)))?;
217205

218206
result.column_mut(i).assign(&col);
219207
}

0 commit comments

Comments
 (0)