|
| 1 | +/// Performance benchmarks for Vec::with_capacity() optimizations |
| 2 | +/// |
| 3 | +/// Run with: cargo test --test capacity_optimization_benches --release -- --nocapture |
| 4 | +/// |
| 5 | +/// These are performance tests that measure actual execution time |
| 6 | +/// to demonstrate the improvements from Vec::with_capacity() optimizations |
| 7 | +
|
| 8 | +#[cfg(test)] |
| 9 | +mod tests { |
| 10 | + use helix_db::{ |
| 11 | + helix_engine::{ |
| 12 | + bm25::bm25::BM25, |
| 13 | + storage_core::HelixGraphStorage, |
| 14 | + traversal_core::{ |
| 15 | + config::Config, |
| 16 | + ops::{ |
| 17 | + g::G, |
| 18 | + source::{add_n::AddNAdapter, n_from_type::NFromTypeAdapter}, |
| 19 | + util::{ |
| 20 | + aggregate::AggregateAdapter, group_by::GroupByAdapter, |
| 21 | + update::UpdateAdapter, |
| 22 | + }, |
| 23 | + }, |
| 24 | + }, |
| 25 | + }, |
| 26 | + props, |
| 27 | + utils::id::v6_uuid, |
| 28 | + }; |
| 29 | + use std::sync::Arc; |
| 30 | + use std::time::Instant; |
| 31 | + use tempfile::TempDir; |
| 32 | + |
| 33 | + fn setup_test_db() -> (Arc<HelixGraphStorage>, TempDir) { |
| 34 | + let temp_dir = TempDir::new().unwrap(); |
| 35 | + let db_path = temp_dir.path().to_str().unwrap(); |
| 36 | + |
| 37 | + let mut config = Config::default(); |
| 38 | + config.bm25 = Some(true); |
| 39 | + |
| 40 | + let storage = HelixGraphStorage::new(db_path, config, Default::default()).unwrap(); |
| 41 | + (Arc::new(storage), temp_dir) |
| 42 | + } |
| 43 | + |
| 44 | + fn setup_db_with_nodes(count: usize) -> (Arc<HelixGraphStorage>, TempDir) { |
| 45 | + let (storage, temp_dir) = setup_test_db(); |
| 46 | + let mut txn = storage.graph_env.write_txn().unwrap(); |
| 47 | + |
| 48 | + for i in 0..count { |
| 49 | + let _ = G::new_mut(Arc::clone(&storage), &mut txn) |
| 50 | + .add_n( |
| 51 | + "User", |
| 52 | + Some(props! { |
| 53 | + "name" => format!("User{}", i), |
| 54 | + "age" => (20 + (i % 50)) as i64, |
| 55 | + "department" => format!("Dept{}", i % 5), |
| 56 | + "city" => format!("City{}", i % 10), |
| 57 | + "role" => format!("Role{}", i % 3), |
| 58 | + "score" => (i % 100) as i64, |
| 59 | + }), |
| 60 | + None, |
| 61 | + ) |
| 62 | + .collect_to_obj(); |
| 63 | + } |
| 64 | + |
| 65 | + txn.commit().unwrap(); |
| 66 | + (storage, temp_dir) |
| 67 | + } |
| 68 | + |
| 69 | + #[test] |
| 70 | + fn bench_aggregate_small() { |
| 71 | + println!("\n=== Aggregate Performance (100 rows) ==="); |
| 72 | + |
| 73 | + for prop_count in [1, 3, 5] { |
| 74 | + let (storage, _temp_dir) = setup_db_with_nodes(100); |
| 75 | + |
| 76 | + let properties: Vec<String> = match prop_count { |
| 77 | + 1 => vec!["department".to_string()], |
| 78 | + 3 => vec![ |
| 79 | + "department".to_string(), |
| 80 | + "age".to_string(), |
| 81 | + "city".to_string(), |
| 82 | + ], |
| 83 | + 5 => vec![ |
| 84 | + "department".to_string(), |
| 85 | + "age".to_string(), |
| 86 | + "city".to_string(), |
| 87 | + "role".to_string(), |
| 88 | + "score".to_string(), |
| 89 | + ], |
| 90 | + _ => vec![], |
| 91 | + }; |
| 92 | + |
| 93 | + let start = Instant::now(); |
| 94 | + let txn = storage.graph_env.read_txn().unwrap(); |
| 95 | + let _result = G::new(Arc::clone(&storage), &txn) |
| 96 | + .n_from_type("User") |
| 97 | + .aggregate_by(&properties, false); |
| 98 | + let elapsed = start.elapsed(); |
| 99 | + |
| 100 | + println!(" {} properties: {:?}", prop_count, elapsed); |
| 101 | + } |
| 102 | + } |
| 103 | + |
| 104 | + #[test] |
| 105 | + fn bench_aggregate_medium() { |
| 106 | + println!("\n=== Aggregate Performance (1,000 rows) ==="); |
| 107 | + |
| 108 | + for prop_count in [1, 3, 5] { |
| 109 | + let (storage, _temp_dir) = setup_db_with_nodes(1000); |
| 110 | + |
| 111 | + let properties: Vec<String> = match prop_count { |
| 112 | + 1 => vec!["department".to_string()], |
| 113 | + 3 => vec![ |
| 114 | + "department".to_string(), |
| 115 | + "age".to_string(), |
| 116 | + "city".to_string(), |
| 117 | + ], |
| 118 | + 5 => vec![ |
| 119 | + "department".to_string(), |
| 120 | + "age".to_string(), |
| 121 | + "city".to_string(), |
| 122 | + "role".to_string(), |
| 123 | + "score".to_string(), |
| 124 | + ], |
| 125 | + _ => vec![], |
| 126 | + }; |
| 127 | + |
| 128 | + let start = Instant::now(); |
| 129 | + let txn = storage.graph_env.read_txn().unwrap(); |
| 130 | + let _result = G::new(Arc::clone(&storage), &txn) |
| 131 | + .n_from_type("User") |
| 132 | + .aggregate_by(&properties, false); |
| 133 | + let elapsed = start.elapsed(); |
| 134 | + |
| 135 | + println!(" {} properties: {:?}", prop_count, elapsed); |
| 136 | + } |
| 137 | + } |
| 138 | + |
| 139 | + #[test] |
| 140 | + fn bench_aggregate_large() { |
| 141 | + println!("\n=== Aggregate Performance (10,000 rows) ==="); |
| 142 | + |
| 143 | + let (storage, _temp_dir) = setup_db_with_nodes(10000); |
| 144 | + |
| 145 | + for prop_count in [1, 3, 5] { |
| 146 | + let properties: Vec<String> = match prop_count { |
| 147 | + 1 => vec!["department".to_string()], |
| 148 | + 3 => vec![ |
| 149 | + "department".to_string(), |
| 150 | + "age".to_string(), |
| 151 | + "city".to_string(), |
| 152 | + ], |
| 153 | + 5 => vec![ |
| 154 | + "department".to_string(), |
| 155 | + "age".to_string(), |
| 156 | + "city".to_string(), |
| 157 | + "role".to_string(), |
| 158 | + "score".to_string(), |
| 159 | + ], |
| 160 | + _ => vec![], |
| 161 | + }; |
| 162 | + |
| 163 | + let start = Instant::now(); |
| 164 | + let txn = storage.graph_env.read_txn().unwrap(); |
| 165 | + let _result = G::new(Arc::clone(&storage), &txn) |
| 166 | + .n_from_type("User") |
| 167 | + .aggregate_by(&properties, false); |
| 168 | + let elapsed = start.elapsed(); |
| 169 | + |
| 170 | + println!(" {} properties: {:?}", prop_count, elapsed); |
| 171 | + } |
| 172 | + } |
| 173 | + |
| 174 | + #[test] |
| 175 | + fn bench_group_by() { |
| 176 | + println!("\n=== Group By Performance ==="); |
| 177 | + |
| 178 | + for size in [100, 1000] { |
| 179 | + let (storage, _temp_dir) = setup_db_with_nodes(size); |
| 180 | + let properties = vec!["department".to_string(), "city".to_string()]; |
| 181 | + |
| 182 | + let start = Instant::now(); |
| 183 | + let txn = storage.graph_env.read_txn().unwrap(); |
| 184 | + let _result = G::new(Arc::clone(&storage), &txn) |
| 185 | + .n_from_type("User") |
| 186 | + .group_by(&properties, false); |
| 187 | + let elapsed = start.elapsed(); |
| 188 | + |
| 189 | + println!(" {} rows: {:?}", size, elapsed); |
| 190 | + } |
| 191 | + } |
| 192 | + |
| 193 | + #[test] |
| 194 | + fn bench_update_operations() { |
| 195 | + println!("\n=== Update Performance ==="); |
| 196 | + |
| 197 | + for size in [10, 100, 1000] { |
| 198 | + let (storage, _temp_dir) = setup_db_with_nodes(size); |
| 199 | + let mut txn = storage.graph_env.write_txn().unwrap(); |
| 200 | + |
| 201 | + // Get nodes to update |
| 202 | + let update_tr = { |
| 203 | + let rtxn = storage.graph_env.read_txn().unwrap(); |
| 204 | + G::new(Arc::clone(&storage), &rtxn) |
| 205 | + .n_from_type("User") |
| 206 | + .collect_to::<Vec<_>>() |
| 207 | + }; |
| 208 | + |
| 209 | + let start = Instant::now(); |
| 210 | + let _result = G::new_mut_from(Arc::clone(&storage), &mut txn, update_tr) |
| 211 | + .update(Some(vec![("score".to_string(), 999.into())])) |
| 212 | + .collect_to::<Vec<_>>(); |
| 213 | + let elapsed = start.elapsed(); |
| 214 | + |
| 215 | + txn.commit().unwrap(); |
| 216 | + println!(" {} nodes: {:?}", size, elapsed); |
| 217 | + } |
| 218 | + } |
| 219 | + |
| 220 | + #[test] |
| 221 | + fn bench_bm25_search() { |
| 222 | + println!("\n=== BM25 Search Performance ==="); |
| 223 | + |
| 224 | + let (storage, _temp_dir) = setup_test_db(); |
| 225 | + let mut wtxn = storage.graph_env.write_txn().unwrap(); |
| 226 | + |
| 227 | + let bm25 = storage.bm25.as_ref().expect("BM25 should be enabled"); |
| 228 | + |
| 229 | + // Insert 10,000 documents |
| 230 | + for i in 0..10000 { |
| 231 | + let doc = format!( |
| 232 | + "Document {} contains various search terms keywords database performance optimization testing benchmark", |
| 233 | + i |
| 234 | + ); |
| 235 | + bm25.insert_doc(&mut wtxn, v6_uuid(), &doc).unwrap(); |
| 236 | + } |
| 237 | + |
| 238 | + wtxn.commit().unwrap(); |
| 239 | + |
| 240 | + let rtxn = storage.graph_env.read_txn().unwrap(); |
| 241 | + |
| 242 | + for limit in [10, 100, 1000] { |
| 243 | + let start = Instant::now(); |
| 244 | + let _results = bm25.search(&rtxn, "database optimization performance", limit); |
| 245 | + let elapsed = start.elapsed(); |
| 246 | + |
| 247 | + println!(" limit={}: {:?}", limit, elapsed); |
| 248 | + } |
| 249 | + } |
| 250 | + |
| 251 | + #[test] |
| 252 | + fn bench_vector_allocation_patterns() { |
| 253 | + println!("\n=== Vector Allocation Patterns ==="); |
| 254 | + |
| 255 | + // Pattern 1: Vec::new() in loop (old way - slow) |
| 256 | + let start = Instant::now(); |
| 257 | + for _ in 0..1000 { |
| 258 | + let properties_count = 5; |
| 259 | + for _ in 0..100 { |
| 260 | + let mut vec1 = Vec::new(); |
| 261 | + let mut vec2 = Vec::new(); |
| 262 | + for i in 0..properties_count { |
| 263 | + vec1.push(i); |
| 264 | + vec2.push(format!("value_{}", i)); |
| 265 | + } |
| 266 | + } |
| 267 | + } |
| 268 | + let vec_new_time = start.elapsed(); |
| 269 | + println!(" Vec::new() in loop: {:?}", vec_new_time); |
| 270 | + |
| 271 | + // Pattern 2: Vec::with_capacity() in loop (new way - fast) |
| 272 | + let start = Instant::now(); |
| 273 | + for _ in 0..1000 { |
| 274 | + let properties_count = 5; |
| 275 | + for _ in 0..100 { |
| 276 | + let mut vec1 = Vec::with_capacity(properties_count); |
| 277 | + let mut vec2 = Vec::with_capacity(properties_count); |
| 278 | + for i in 0..properties_count { |
| 279 | + vec1.push(i); |
| 280 | + vec2.push(format!("value_{}", i)); |
| 281 | + } |
| 282 | + } |
| 283 | + } |
| 284 | + let vec_capacity_time = start.elapsed(); |
| 285 | + println!(" Vec::with_capacity() in loop: {:?}", vec_capacity_time); |
| 286 | + |
| 287 | + let improvement = |
| 288 | + (1.0 - (vec_capacity_time.as_secs_f64() / vec_new_time.as_secs_f64())) * 100.0; |
| 289 | + println!(" Improvement: {:.1}% faster", improvement); |
| 290 | + } |
| 291 | +} |
0 commit comments