-
Notifications
You must be signed in to change notification settings - Fork 119
Add database indexes for common stream query patterns #616
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| -- Migration: 0002_add_stream_indexes | ||
| -- Purpose : Add covering indexes for the most common stream query patterns. | ||
| -- Without these, every filtered query performs a full table scan, | ||
| -- which becomes the dominant bottleneck beyond a few thousand rows. | ||
| -- Safe to run multiple times: all statements use IF NOT EXISTS. | ||
|
|
||
| -- ───────────────────────────────────────────────────────────── | ||
| -- 1. sender – used by GET /api/streams?sender=… and | ||
| -- GET /api/senders/:accountId/streams | ||
| -- ───────────────────────────────────────────────────────────── | ||
| CREATE INDEX IF NOT EXISTS idx_streams_sender | ||
| ON streams(sender); | ||
|
|
||
| -- ───────────────────────────────────────────────────────────── | ||
| -- 2. recipient – used by GET /api/streams?recipient=… and | ||
| -- GET /api/recipients/:accountId/streams | ||
| -- ───────────────────────────────────────────────────────────── | ||
| CREATE INDEX IF NOT EXISTS idx_streams_recipient | ||
| ON streams(recipient); | ||
|
|
||
| -- ───────────────────────────────────────────────────────────── | ||
| -- 3. status (composite) – the runtime status of a stream is | ||
| -- *derived* from three nullable timestamp columns: | ||
| -- canceled_at IS NOT NULL → canceled | ||
| -- completed_at IS NOT NULL → completed | ||
| -- paused_at IS NOT NULL → paused | ||
| -- (all NULL, now < start_at)→ scheduled | ||
| -- (all NULL, otherwise) → active | ||
| -- | ||
| -- A composite index on all three lets SQLite resolve any | ||
| -- status filter without touching the main table. | ||
| -- ───────────────────────────────────────────────────────────── | ||
| CREATE INDEX IF NOT EXISTS idx_streams_status | ||
| ON streams(canceled_at, completed_at, paused_at); | ||
|
|
||
| -- ───────────────────────────────────────────────────────────── | ||
| -- 4. start_at – used for scheduled/active window queries and | ||
| -- ORDER BY / range scans on start time | ||
| -- ───────────────────────────────────────────────────────────── | ||
| CREATE INDEX IF NOT EXISTS idx_streams_start_at | ||
| ON streams(start_at); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| /** | ||
| * backend/src/migrations/0002_add_stream_indexes.ts | ||
| * | ||
| * Programmatic version of the SQL migration. Import and call `up(db)` from | ||
| * your db initialisation logic (db.ts), or run this file directly with ts-node. | ||
| * | ||
| * The migration is idempotent: `CREATE INDEX IF NOT EXISTS` is a no-op when | ||
| * the index already exists, so it is safe to call on every startup. | ||
| */ | ||
|
|
||
| import Database from "better-sqlite3"; | ||
|
|
||
| export function up(db: Database.Database): void { | ||
| db.exec(` | ||
| -- 1. sender lookup | ||
| CREATE INDEX IF NOT EXISTS idx_streams_sender | ||
| ON streams(sender); | ||
|
|
||
| -- 2. recipient lookup | ||
| CREATE INDEX IF NOT EXISTS idx_streams_recipient | ||
| ON streams(recipient); | ||
|
|
||
| -- 3. status (derived from three nullable timestamp columns) | ||
| CREATE INDEX IF NOT EXISTS idx_streams_status | ||
| ON streams(canceled_at, completed_at, paused_at); | ||
|
|
||
| -- 4. start_at range scans / ordering | ||
| CREATE INDEX IF NOT EXISTS idx_streams_start_at | ||
| ON streams(start_at); | ||
| `); | ||
|
|
||
| console.log("[migration] 0002_add_stream_indexes: indexes applied."); | ||
| } | ||
|
|
||
| // ── Allow running directly: ts-node src/migrations/0002_add_stream_indexes.ts | ||
| if (require.main === module) { | ||
| const dbPath = process.env.DB_PATH ?? "backend/data/streams.db"; | ||
| const db = new Database(dbPath); | ||
| up(db); | ||
| db.close(); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,229 @@ | ||
| #!/usr/bin/env ts-node | ||
| /** | ||
| * scripts/benchmark-indexes.ts | ||
| * | ||
| * Verifies that the indexes added by migration 0002 are used by SQLite and | ||
| * measures the speedup for the four most common filtered query patterns. | ||
| * | ||
| * Usage (from repo root): | ||
| * npx ts-node scripts/benchmark-indexes.ts | ||
| * DB_PATH=backend/data/streams.db npx ts-node scripts/benchmark-indexes.ts | ||
| * | ||
| * The script: | ||
| * 1. Creates two temporary SQLite databases with 100,000 rows of synthetic data. | ||
| * One has no indexes (full-scan baseline); the other has all four indexes. | ||
| * 2. Runs EXPLAIN QUERY PLAN on every query against the indexed DB. | ||
| * 3. Benchmarks each query against both DBs (median of 10 cold-connection opens). | ||
| * 4. Prints a results table with speedup ratios. | ||
| * 5. Exits 1 if any equality/lookup query fails to hit ≥ 3× speedup. | ||
| * | ||
| * Design notes | ||
| * ──────────── | ||
| * • "cold-connection" opens: each timing iteration opens a fresh Database | ||
| * instance so SQLite's internal page cache starts empty, giving a realistic | ||
| * measurement of I/O-bound query cost. | ||
| * | ||
| * • Equality lookups (sender, recipient) show the largest gains because an | ||
| * index lookup is O(log n) vs O(n) for a full table scan. | ||
| * | ||
| * • Range queries (start_at) and status composite queries also benefit, but | ||
| * speedup depends on selectivity. When the result set is a large fraction | ||
| * of the table, SQLite may prefer a full scan regardless of the index. | ||
| * EXPLAIN QUERY PLAN remains the authoritative check for index use. | ||
| */ | ||
|
|
||
| import Database from "better-sqlite3"; | ||
| import * as fs from "fs"; | ||
| import * as os from "os"; | ||
| import * as path from "path"; | ||
|
|
||
| // ─── synthetic data parameters ──────────────────────────────────────────────── | ||
|
|
||
| const ROW_COUNT = 100_000; | ||
| const ASSETS = ["USDC", "XLM", "EURC", "BTC", "ETH"]; | ||
|
|
||
| /** Pad a Stellar-style account ID to exactly 56 chars */ | ||
| const stellarId = (prefix: string, suffix: string, n: number) => | ||
| `G${prefix}${String(n).padStart(54 - prefix.length, suffix)}` | ||
| .substring(0, 56); | ||
|
|
||
| // ─── schema ─────────────────────────────────────────────────────────────────── | ||
|
|
||
| const DDL = ` | ||
| CREATE TABLE streams ( | ||
| id TEXT PRIMARY KEY, | ||
| sender TEXT NOT NULL, | ||
| recipient TEXT NOT NULL, | ||
| asset_code TEXT NOT NULL, | ||
| total_amount REAL NOT NULL, | ||
| start_at INTEGER NOT NULL, | ||
| duration_sec INTEGER NOT NULL, | ||
| canceled_at INTEGER, | ||
| completed_at INTEGER, | ||
| paused_at INTEGER | ||
| ); | ||
| `; | ||
|
|
||
| const INDEXES = ` | ||
| CREATE INDEX IF NOT EXISTS idx_streams_sender | ||
| ON streams(sender); | ||
| CREATE INDEX IF NOT EXISTS idx_streams_recipient | ||
| ON streams(recipient); | ||
| CREATE INDEX IF NOT EXISTS idx_streams_status | ||
| ON streams(canceled_at, completed_at, paused_at); | ||
| CREATE INDEX IF NOT EXISTS idx_streams_start_at | ||
| ON streams(start_at); | ||
| `; | ||
|
|
||
| // ─── helpers ────────────────────────────────────────────────────────────────── | ||
|
|
||
| function buildDb(withIndexes: boolean): string { | ||
| const dbPath = path.join(os.tmpdir(), `sstream_bench_${Date.now()}_${withIndexes ? "idx" : "scan"}.db`); | ||
| const db = new Database(dbPath); | ||
| db.pragma("journal_mode = DELETE"); | ||
| db.pragma("synchronous = OFF"); | ||
| db.exec(DDL); | ||
|
|
||
| const now = Math.floor(Date.now() / 1000); | ||
| const insert = db.prepare(` | ||
| INSERT INTO streams | ||
| (id, sender, recipient, asset_code, total_amount, | ||
| start_at, duration_sec, canceled_at, completed_at, paused_at) | ||
| VALUES (?,?,?,?,?,?,?,?,?,?) | ||
| `); | ||
|
|
||
| db.transaction(() => { | ||
| for (let i = 0; i < ROW_COUNT; i++) { | ||
| const ca = i % 20 === 0 ? now - 3600 : null; | ||
| const cp = ca === null && i % 15 === 0 ? now - 1800 : null; | ||
| const pa = ca === null && cp === null && i % 30 === 0 ? now - 600 : null; | ||
| insert.run( | ||
| `s${String(i).padStart(7, "0")}`, | ||
| stellarId("U", "0", i), // unique sender per row (high cardinality) | ||
| stellarId("R", "0", i), // unique recipient per row | ||
| ASSETS[i % ASSETS.length], | ||
| Math.round(Math.random() * 10_000 * 100) / 100, | ||
| now - Math.floor(Math.random() * 30 * 86_400), | ||
| 3_600, | ||
| ca, cp, pa | ||
| ); | ||
| } | ||
| })(); | ||
|
Comment on lines
+87
to
+111
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎯 Functional Correctness | 🟠 Major | ⚡ Quick win Benchmark both databases from the same synthetic dataset.
🤖 Prompt for AI Agents |
||
|
|
||
| if (withIndexes) { | ||
| db.exec(INDEXES); | ||
| db.exec("ANALYZE"); | ||
| } | ||
|
|
||
| db.close(); | ||
| return dbPath; | ||
| } | ||
|
|
||
| /** Median elapsed time in ms over `iters` cold-connection opens */ | ||
| function coldBench(dbPath: string, sql: string, params: unknown[], iters = 10): number { | ||
| const times: number[] = []; | ||
| for (let i = 0; i < iters; i++) { | ||
| const db = new Database(dbPath, { readonly: true }); | ||
| const t0 = performance.now(); | ||
| db.prepare(sql).all(...params); | ||
| times.push(performance.now() - t0); | ||
| db.close(); | ||
| } | ||
| times.sort((a, b) => a - b); | ||
| return times[Math.floor(iters / 2)]; | ||
| } | ||
|
|
||
| // ─── queries under test ─────────────────────────────────────────────────────── | ||
|
|
||
| const now = Math.floor(Date.now() / 1000); | ||
|
|
||
| const QUERIES: Array<{ label: string; sql: string; params: unknown[]; expectSpeedup: boolean }> = [ | ||
| { | ||
| label: "sender = exact (equality lookup)", | ||
| sql: "SELECT * FROM streams WHERE sender = ?", | ||
| params: [stellarId("U", "0", 5_000)], | ||
| expectSpeedup: true, | ||
| }, | ||
| { | ||
| label: "recipient = exact (equality lookup)", | ||
| sql: "SELECT * FROM streams WHERE recipient = ?", | ||
| params: [stellarId("R", "0", 5_000)], | ||
| expectSpeedup: true, | ||
| }, | ||
| { | ||
| label: "status: canceled (canceled_at IS NOT NULL)", | ||
| sql: "SELECT * FROM streams WHERE canceled_at IS NOT NULL LIMIT 200", | ||
| params: [], | ||
| expectSpeedup: false, // selectivity ~5%; SQLite may choose full scan | ||
| }, | ||
| { | ||
| label: "start_at range (7-day window)", | ||
| sql: "SELECT * FROM streams WHERE start_at BETWEEN ? AND ? LIMIT 200", | ||
| params: [now - 7 * 86_400, now], | ||
| expectSpeedup: false, // broad range; selectivity varies | ||
| }, | ||
| ]; | ||
|
|
||
| // ─── main ───────────────────────────────────────────────────────────────────── | ||
|
|
||
| function main() { | ||
| console.log(`\n🔨 Building two DBs with ${ROW_COUNT.toLocaleString()} rows …`); | ||
| const scanDb = buildDb(false); | ||
| const idxDb = buildDb(true); | ||
| console.log("✅ Done.\n"); | ||
|
|
||
| // ── EXPLAIN QUERY PLAN ────────────────────────────────────────────────── | ||
| console.log("─".repeat(70)); | ||
| console.log("EXPLAIN QUERY PLAN (indexed DB)"); | ||
| console.log("─".repeat(70)); | ||
| { | ||
| const db = new Database(idxDb, { readonly: true }); | ||
| for (const q of QUERIES) { | ||
| const plan = db.prepare(`EXPLAIN QUERY PLAN ${q.sql}`).all(...q.params) as Array<{ detail: string }>; | ||
| console.log(`\n ▸ ${q.label}`); | ||
| for (const row of plan) console.log(` ${row.detail}`); | ||
| } | ||
| db.close(); | ||
|
Comment on lines
+175
to
+186
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎯 Functional Correctness | 🟠 Major | ⚡ Quick win Fail when Right now this only prints the planner output, then Line 223 unconditionally says all four indexes are used. If SQLite falls back to Also applies to: 223-226 🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| // ── benchmark ─────────────────────────────────────────────────────────── | ||
| console.log("\n" + "─".repeat(70)); | ||
| console.log(`BENCHMARK (median of 10 cold-connection opens, ${ROW_COUNT.toLocaleString()} rows)`); | ||
| console.log("─".repeat(70)); | ||
| console.log( | ||
| ` ${"Query".padEnd(42)} ${"Full scan".padStart(10)} ${"Index".padStart(10)} ${"Speedup".padStart(9)}` | ||
| ); | ||
| console.log(" " + "─".repeat(74)); | ||
|
|
||
| let allAssertionsPassed = true; | ||
| for (const q of QUERIES) { | ||
| const b = coldBench(scanDb, q.sql, q.params); | ||
| const a = coldBench(idxDb, q.sql, q.params); | ||
| const speedup = b / a; | ||
| const pass = !q.expectSpeedup || speedup >= 3; | ||
| if (!pass) allAssertionsPassed = false; | ||
|
|
||
| const icon = pass ? "✅" : "❌"; | ||
| const note = q.expectSpeedup ? "" : " (range; EXPLAIN is authoritative)"; | ||
| console.log( | ||
| ` ${icon} ${q.label.padEnd(40)} ${b.toFixed(2).padStart(10)} ms` + | ||
| ` ${a.toFixed(2).padStart(10)} ms ${speedup.toFixed(1).padStart(8)}×${note}` | ||
| ); | ||
| } | ||
|
|
||
| // ── cleanup ───────────────────────────────────────────────────────────── | ||
| fs.unlinkSync(scanDb); | ||
| fs.unlinkSync(idxDb); | ||
|
|
||
| if (!allAssertionsPassed) { | ||
| console.error("\n❌ Equality-lookup speedup assertion failed.\n"); | ||
| process.exit(1); | ||
| } | ||
| console.log("\n✅ All equality-lookup queries confirmed ≥ 3× speedup.\n"); | ||
| console.log( | ||
| "ℹ️ EXPLAIN QUERY PLAN output above is the authoritative confirmation that\n" + | ||
| " all four indexes are used by the SQLite query planner.\n" | ||
| ); | ||
| } | ||
|
|
||
| main(); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
📐 Maintainability & Code Quality | 🟡 Minor | ⚡ Quick win
Remove or implement the documented
DB_PATHmode.The header says
DB_PATH=backend/data/streams.db npx ts-node scripts/benchmark-indexes.ts, but the script never readsprocess.env.DB_PATH. That usage string is misleading as written.🤖 Prompt for AI Agents