diff --git a/.github/actions/tests/run-benchmark-tests/action.yml b/.github/actions/tests/run-benchmark-tests/action.yml new file mode 100644 index 00000000000..ece9bd3fe3f --- /dev/null +++ b/.github/actions/tests/run-benchmark-tests/action.yml @@ -0,0 +1,38 @@ +name: 'Run Benchmark Tests' +description: 'Run Playwright benchmark tests and compare against baseline' +inputs: + os: + description: 'Operating system (ubuntu, macos, windows)' + default: 'ubuntu' + update-baseline: + description: 'Update baseline instead of comparing' + default: 'false' +runs: + using: 'composite' + steps: + - name: Run Benchmark Tests (Ubuntu) + if: inputs.os == 'ubuntu' + shell: bash + run: xvfb-run npm run test:benchmark + + - name: Run Benchmark Tests + if: inputs.os != 'ubuntu' + shell: bash + run: npm run test:benchmark + + - name: Update Baseline + if: inputs.update-baseline == 'true' + shell: bash + run: >- + node tests/benchmarks/utils/compare.js + --results tests/benchmarks/results/mounting.json + --baseline tests/benchmarks/mounting/baseline.${{ inputs.os }}.json + --update-baseline + + - name: Compare Against Baseline + if: inputs.update-baseline != 'true' + shell: bash + run: >- + node tests/benchmarks/utils/compare.js + --results tests/benchmarks/results/mounting.json + --baseline tests/benchmarks/mounting/baseline.${{ inputs.os }}.json diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 00000000000..304af458483 --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,88 @@ +name: Benchmarks +on: + workflow_dispatch: + inputs: + update-baseline: + description: 'Update baseline with current results instead of comparing' + type: boolean + default: false + pull_request: + branches: [main, 'release/v*'] + +jobs: + benchmark: + name: Performance Benchmarks (${{ matrix.os }}) + timeout-minutes: 60 + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04, macos-latest, windows-latest] + include: + - os: ubuntu-24.04 + os-name: ubuntu + - os: macos-latest + os-name: macos + - os: windows-latest + os-name: windows + permissions: + contents: write + pull-requests: write + steps: + - uses: actions/checkout@v6 + + - name: Install System Dependencies (Ubuntu) + if: matrix.os-name == 'ubuntu' + run: | + sudo apt-get update + sudo apt-get --no-install-recommends install -y \ + libglib2.0-0 libnss3 libdbus-1-3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libgtk-3-0 libasound2t64 \ + xvfb + + - name: Setup Node Dependencies + uses: ./.github/actions/common/setup-node-deps + + - name: Configure Chrome Sandbox + if: matrix.os-name == 'ubuntu' + run: | + sudo chown root node_modules/electron/dist/chrome-sandbox + sudo chmod 4755 node_modules/electron/dist/chrome-sandbox + + - name: Run Benchmark Tests + uses: ./.github/actions/tests/run-benchmark-tests + with: + os: ${{ matrix.os-name }} + update-baseline: ${{ github.event.inputs.update-baseline || 'false' }} + + - name: Upload Benchmark Results + uses: actions/upload-artifact@v6 + if: ${{ !cancelled() }} + with: + name: benchmark-results-${{ matrix.os-name }} + path: | + tests/benchmarks/results/ + benchmark-report/ + retention-days: 30 + + - name: Commit Updated Baseline + if: github.event.inputs.update-baseline == 'true' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add tests/benchmarks/mounting/baseline.${{ matrix.os-name }}.json + git diff --staged --quiet || git commit -m "chore: update ${{ matrix.os-name }} benchmark baseline" && git push + + - name: Comment Benchmark Results on PR + if: github.event_name == 'pull_request' && !cancelled() + continue-on-error: true + uses: actions/github-script@v7 + with: + script: | + const run = require('./tests/benchmarks/utils/pr-comment.js'); + await run({ + github, + context, + resultsPath: 'tests/benchmarks/results/mounting.json', + baselinePath: 'tests/benchmarks/mounting/baseline.${{ matrix.os-name }}.json', + title: 'Benchmark Results — Collection Mount (${{ matrix.os-name }})' + }); diff --git a/.gitignore b/.gitignore index 8dfcfb9e86e..acfe558c205 100644 --- a/.gitignore +++ b/.gitignore @@ -58,6 +58,10 @@ skills-lock.json # Playwright /blob-report/ +# Benchmark results (generated at runtime) +tests/benchmarks/results/ +/benchmark-report/ + # Development plan files CLAUDE.md AGENTS.md diff --git a/package.json b/package.json index e4c3c737319..fd9089b9765 100644 --- a/package.json +++ b/package.json @@ -83,6 +83,7 @@ "test:e2e": "playwright test --project=default", "test:e2e:ssl": "playwright test --project=ssl", "test:e2e:auth": "playwright test --project=auth", + "test:benchmark": "playwright test --config=playwright.benchmark.config.ts", "lint": "cross-env NODE_OPTIONS=\"--max_old_space_size=4096\" npx eslint", "lint:fix": "cross-env NODE_OPTIONS=\"--max_old_space_size=4096\" npx eslint --fix", "prepare": "husky" diff --git a/playwright.benchmark.config.ts b/playwright.benchmark.config.ts new file mode 100644 index 00000000000..69ecd66f44c --- /dev/null +++ b/playwright.benchmark.config.ts @@ -0,0 +1,38 @@ +import { defineConfig } from '@playwright/test'; + +export default defineConfig({ + fullyParallel: false, + forbidOnly: !!process.env.CI, + retries: 0, + workers: 1, + reporter: [ + ['list'], + ['json', { outputFile: 'benchmark-report/results.json' }] + ], + + use: { + trace: 'off' + }, + + projects: [ + { + name: 'benchmarks', + testDir: './tests/benchmarks', + testMatch: '**/*.bench.ts' + } + ], + + webServer: [ + { + command: 'npm run dev:web', + url: 'http://localhost:3000', + reuseExistingServer: !process.env.CI, + timeout: 10 * 60 * 1000 + } + ], + + timeout: 10 * 60 * 1000, + expect: { + timeout: 120_000 + } +}); diff --git a/playwright.config.ts b/playwright.config.ts index f1ec9b2e2a7..eb758f08d5f 100644 --- a/playwright.config.ts +++ b/playwright.config.ts @@ -23,7 +23,8 @@ export default defineConfig({ testDir: './tests', testIgnore: [ 'ssl/**', // custom CA certificate tests require separate server setup and certificate generation - 'auth/**' // auth tests have their own project + 'auth/**', // auth tests have their own project + 'benchmarks/**' ] }, { diff --git a/tests/benchmarks/mounting/baseline.macos.json b/tests/benchmarks/mounting/baseline.macos.json new file mode 100644 index 00000000000..d7de548c4bc --- /dev/null +++ b/tests/benchmarks/mounting/baseline.macos.json @@ -0,0 +1,45 @@ +{ + "thresholdPercent": 20, + "entries": { + "bru-50": { + "mean": 2200, + "p50": 1000 + }, + "bru-200": { + "mean": 1300, + "p50": 1100 + }, + "bru-500": { + "mean": 3600, + "p50": 3500 + }, + "bru-1000": { + "mean": 9100, + "p50": 9000 + }, + "bru-3000": { + "mean": 185000, + "p50": 183000 + }, + "yml-50": { + "mean": 700, + "p50": 650 + }, + "yml-200": { + "mean": 1400, + "p50": 1250 + }, + "yml-500": { + "mean": 3900, + "p50": 3700 + }, + "yml-1000": { + "mean": 11700, + "p50": 11900 + }, + "yml-3000": { + "mean": 85000, + "p50": 80000 + } + } +} diff --git a/tests/benchmarks/mounting/baseline.ubuntu.json b/tests/benchmarks/mounting/baseline.ubuntu.json new file mode 100644 index 00000000000..0d4ff8c6806 --- /dev/null +++ b/tests/benchmarks/mounting/baseline.ubuntu.json @@ -0,0 +1,45 @@ +{ + "thresholdPercent": 20, + "entries": { + "bru-50": { + "mean": 1500, + "p50": 700 + }, + "bru-200": { + "mean": 1200, + "p50": 1150 + }, + "bru-500": { + "mean": 2900, + "p50": 2900 + }, + "bru-1000": { + "mean": 8000, + "p50": 8000 + }, + "bru-3000": { + "mean": 175000, + "p50": 170000 + }, + "yml-50": { + "mean": 600, + "p50": 560 + }, + "yml-200": { + "mean": 1200, + "p50": 1200 + }, + "yml-500": { + "mean": 3500, + "p50": 3400 + }, + "yml-1000": { + "mean": 10700, + "p50": 10650 + }, + "yml-3000": { + "mean": 85000, + "p50": 80000 + } + } +} diff --git a/tests/benchmarks/mounting/baseline.windows.json b/tests/benchmarks/mounting/baseline.windows.json new file mode 100644 index 00000000000..f3be08bb31d --- /dev/null +++ b/tests/benchmarks/mounting/baseline.windows.json @@ -0,0 +1,45 @@ +{ + "thresholdPercent": 20, + "entries": { + "bru-50": { + "mean": 2700, + "p50": 800 + }, + "bru-200": { + "mean": 1500, + "p50": 1400 + }, + "bru-500": { + "mean": 3500, + "p50": 3500 + }, + "bru-1000": { + "mean": 9500, + "p50": 9400 + }, + "bru-3000": { + "mean": 195000, + "p50": 190000 + }, + "yml-50": { + "mean": 600, + "p50": 570 + }, + "yml-200": { + "mean": 1350, + "p50": 1300 + }, + "yml-500": { + "mean": 3800, + "p50": 3700 + }, + "yml-1000": { + "mean": 11000, + "p50": 11000 + }, + "yml-3000": { + "mean": 90000, + "p50": 88000 + } + } +} diff --git a/tests/benchmarks/mounting/collection-mount.bench.ts b/tests/benchmarks/mounting/collection-mount.bench.ts new file mode 100644 index 00000000000..82e3046ea18 --- /dev/null +++ b/tests/benchmarks/mounting/collection-mount.bench.ts @@ -0,0 +1,115 @@ +import { test } from '../../../playwright'; +import { type ElectronApplication, type Page } from '@playwright/test'; +import { openCollection, closeAllCollections } from '../../utils/page'; +import { summarize } from '../utils/stats'; +import { writeResults, buildResultEntry, type ResultEntry } from '../utils/results'; +import { startTimer } from '../utils/timing'; +import { generateCollection, type CollectionFormat } from '../utils/collection-generator'; +import * as path from 'path'; +import * as fs from 'fs'; + +const COLLECTION_SIZES = [50, 200, 500, 1000, 3000]; +const COLLECTION_FORMATS: CollectionFormat[] = ['bru', 'yml']; +const ITERATIONS_PER_SIZE = 3; + +async function measureCollectionMount( + page: Page, + electronApp: ElectronApplication, + collectionDir: string, + collectionName: string +): Promise { + await electronApp.evaluate( + ({ dialog }, { dir }) => { + (dialog as any).__originalShowOpenDialog ??= dialog.showOpenDialog; + dialog.showOpenDialog = async () => ({ canceled: false, filePaths: [dir] }); + }, + { dir: collectionDir } + ); + + await page.evaluate(() => { + (window as any).__benchMountDone = new Promise((resolve) => { + const off = (window as any).ipcRenderer.on('main:collection-loading-state-updated', (val: any) => { + if (!val.isLoading) { + off(); resolve(); + } + }); + }); + }); + + const timer = startTimer(); + + await page.getByTestId('collections-header-add-menu').click(); + await page.locator('.tippy-box .dropdown-item').filter({ hasText: 'Open collection' }).click(); + await page.locator('#sidebar-collection-name').filter({ hasText: collectionName }).waitFor({ state: 'visible' }); + + await openCollection(page, collectionName); + await page.evaluate(() => (window as any).__benchMountDone); + + const elapsed = timer.elapsed(); + + await electronApp.evaluate(({ dialog }) => { + if ((dialog as any).__originalShowOpenDialog) { + dialog.showOpenDialog = (dialog as any).__originalShowOpenDialog; + } + }); + + await closeAllCollections(page); + + return elapsed; +} + +function resultKey(format: CollectionFormat, size: number): string { + return `${format}-${size}`; +} + +test.describe('Benchmark: Collection Mount', () => { + const results: Record = {}; + + for (const format of COLLECTION_FORMATS) { + test.describe(`format: ${format}`, () => { + for (const size of COLLECTION_SIZES) { + test(`mount ${format} collection with ${size} requests`, async ({ page, electronApp, createTmpDir }) => { + test.setTimeout((2 + Math.ceil(size / 100) * 2) * 60_000); + const timings: number[] = []; + + const collectionName = `bench-${format}-${size}`; + const collectionDir = await createTmpDir(`bench-${format}-${size}`); + generateCollection({ dir: collectionDir, name: collectionName, requestCount: size, format }); + + for (let i = 0; i < ITERATIONS_PER_SIZE; i++) { + const elapsed = await measureCollectionMount(page, electronApp, collectionDir, collectionName); + timings.push(elapsed); + } + + const key = resultKey(format, size); + results[key] = timings; + + const stats = summarize(timings); + const r = (v: number) => Math.round(v); + console.log(`[BENCHMARK] ${format} ${size} requests — mean: ${r(stats.mean)}ms, median: ${r(stats.median)}ms, p90: ${r(stats.p90)}ms, stdDev: ${r(stats.stdDev)}ms, raw: [${timings.join(', ')}]`); + + test.info().annotations.push({ + type: 'benchmark', + description: JSON.stringify({ format, size, ...stats, timings }) + }); + }); + } + }); + } + + test.afterAll(async () => { + const resultsDir = path.join(process.cwd(), 'tests', 'benchmarks', 'results'); + fs.mkdirSync(resultsDir, { recursive: true }); + const outputPath = path.join(resultsDir, 'mounting.json'); + const entries: Record = {}; + + for (const [key, timings] of Object.entries(results)) { + if (timings.length === 0) continue; + const [format, sizeStr] = key.split('-'); + entries[key] = buildResultEntry(timings, { format, size: Number(sizeStr) }); + } + + writeResults(outputPath, { name: 'Collection Mount', unit: 'ms', direction: 'smaller' }, entries); + console.log(`[BENCHMARK] Results written to ${outputPath}`); + }); +}); diff --git a/tests/benchmarks/utils/collection-generator.ts b/tests/benchmarks/utils/collection-generator.ts new file mode 100644 index 00000000000..582076eb4b1 --- /dev/null +++ b/tests/benchmarks/utils/collection-generator.ts @@ -0,0 +1,67 @@ +import { stringifyRequest, stringifyCollection, stringifyFolder } from '@usebruno/filestore'; +import type { BrunoItem } from '@usebruno/schema-types'; +import * as path from 'path'; +import * as fs from 'fs'; + +export type CollectionFormat = 'bru' | 'yml'; + +export function buildRequestItem(seq: number): BrunoItem { + return { + uid: `req-${seq}`, + type: 'http-request', + name: `request-${seq}`, + seq, + request: { + method: 'GET', + url: `https://example.com/api/v1/resource/${seq}`, + headers: [ + { uid: `h1-${seq}`, name: 'Content-Type', value: 'application/json', enabled: true }, + { uid: `h2-${seq}`, name: 'Accept', value: 'application/json', enabled: true } + ], + body: { mode: 'none' }, + auth: { mode: 'none' } + } + } as BrunoItem; +} + +export interface GenerateCollectionOptions { + dir: string; + name: string; + requestCount: number; + format: CollectionFormat; + requestsPerFolder?: number; +} + +export function generateCollection({ + dir, + name, + requestCount, + format, + requestsPerFolder = 10 +}: GenerateCollectionOptions) { + if (format === 'bru') { + fs.writeFileSync(path.join(dir, 'bruno.json'), JSON.stringify({ version: '1', name, type: 'collection' }, null, 2)); + fs.writeFileSync(path.join(dir, 'collection.bru'), stringifyCollection({ name } as any, {}, { format: 'bru' }) || `meta {\n name: ${name}\n}\n`); + } else { + const ymlContent = stringifyCollection({ name } as any, { name, type: 'collection', opencollection: '1.0.0' }, { format: 'yml' }); + fs.writeFileSync(path.join(dir, 'opencollection.yml'), ymlContent); + } + + const ext = format === 'bru' ? 'bru' : 'yml'; + const folderFile = format === 'bru' ? 'folder.bru' : 'folder.yml'; + const folderCount = Math.ceil(requestCount / requestsPerFolder); + + Array.from({ length: folderCount }).forEach((_, f) => { + const folderPath = path.join(dir, `folder-${f}`); + fs.mkdirSync(folderPath, { recursive: true }); + + const folderContent = stringifyFolder({ name: `folder-${f}` }, { format }); + fs.writeFileSync(path.join(folderPath, folderFile), folderContent || `meta {\n name: folder-${f}\n}\n`); + + const count = Math.min(requestsPerFolder, requestCount - f * requestsPerFolder); + Array.from({ length: count }).forEach((_, r) => { + const seq = f * requestsPerFolder + r + 1; + fs.writeFileSync(path.join(folderPath, `request-${seq}.${ext}`), stringifyRequest(buildRequestItem(seq), { format })); + }); + }); +} diff --git a/tests/benchmarks/utils/compare.js b/tests/benchmarks/utils/compare.js new file mode 100644 index 00000000000..e1cf4555780 --- /dev/null +++ b/tests/benchmarks/utils/compare.js @@ -0,0 +1,129 @@ +#!/usr/bin/env node + +/** + * Generic benchmark comparison: compares results against a baseline and exits + * with code 1 if any metric exceeds the allowed regression threshold. + * + * Usage: + * node tests/benchmarks/utils/compare.js --results --baseline [--update-baseline] + * + * Examples: + * node tests/benchmarks/utils/compare.js \ + * --results benchmark-results.json \ + * --baseline tests/benchmarks/mounting/baseline.json + * + * node tests/benchmarks/utils/compare.js \ + * --results benchmark-results.json \ + * --baseline tests/benchmarks/mounting/baseline.json \ + * --update-baseline + */ + +import { existsSync, readFileSync, writeFileSync } from 'fs'; + +function parseArgs(argv) { + const args = {}; + for (let i = 2; i < argv.length; i++) { + if (argv[i] === '--results') args.results = argv[++i]; + else if (argv[i] === '--baseline') args.baseline = argv[++i]; + else if (argv[i] === '--update-baseline') args.updateBaseline = true; + } + return args; +} + +function loadJSON(filepath) { + if (!existsSync(filepath)) { + console.error(`File not found: ${filepath}`); + process.exit(1); + } + return JSON.parse(readFileSync(filepath, 'utf-8')); +} + +function percentChange(baseline, current) { + if (baseline === 0) return current === 0 ? 0 : Infinity; + return ((current - baseline) / baseline) * 100; +} + +function formatChange(change) { + const sign = change > 0 ? '+' : ''; + return `${sign}${change.toFixed(1)}%`; +} + +const args = parseArgs(process.argv); + +if (!args.results || !args.baseline) { + console.error('Usage: compare.js --results --baseline [--update-baseline]'); + process.exit(1); +} + +const results = loadJSON(args.results); +const baseline = loadJSON(args.baseline); +const threshold = baseline.thresholdPercent || 20; +const resultEntries = results.entries || results; +const baselineEntries = baseline.entries || {}; + +if (args.updateBaseline) { + const newBaseline = { + thresholdPercent: threshold, + entries: {} + }; + for (const [key, data] of Object.entries(resultEntries)) { + newBaseline.entries[key] = { + mean: data.mean, + p50: data.p50 + }; + } + writeFileSync(args.baseline, JSON.stringify(newBaseline, null, 2) + '\n'); + console.log(`Baseline updated at ${args.baseline}`); + process.exit(0); +} + +let hasRegression = false; +const rows = []; + +console.log(''); +console.log('='.repeat(72)); +console.log(' BENCHMARK COMPARISON'); +console.log('='.repeat(72)); +console.log(` Regression threshold: ${threshold}%`); +console.log(''); + +for (const [key, data] of Object.entries(resultEntries)) { + const base = baselineEntries[key]; + if (!base) { + console.log(` [SKIP] No baseline for ${key}`); + continue; + } + + const meanChange = percentChange(base.mean, data.mean); + const p50Change = percentChange(base.p50, data.p50); + + const meanStatus = meanChange > threshold ? 'FAIL' : meanChange < -threshold ? 'IMPROVED' : 'OK'; + const p50Status = p50Change > threshold ? 'FAIL' : p50Change < -threshold ? 'IMPROVED' : 'OK'; + + if (meanStatus === 'FAIL' || p50Status === 'FAIL') { + hasRegression = true; + } + + rows.push({ + key, + 'mean (ms)': `${Math.round(data.mean)} (baseline: ${base.mean})`, + 'mean change': formatChange(meanChange), + 'mean status': meanStatus, + 'p50 (ms)': `${Math.round(data.p50)} (baseline: ${base.p50})`, + 'p50 change': formatChange(p50Change), + 'p50 status': p50Status + }); +} + +console.table(rows); +console.log(''); + +if (hasRegression) { + console.error(`FAILED: One or more benchmarks regressed beyond the ${threshold}% threshold.`); + console.error('If this regression is expected, update the baseline:'); + console.error(` node tests/benchmarks/utils/compare.js --results ${args.results} --baseline ${args.baseline} --update-baseline`); + process.exit(1); +} else { + console.log('PASSED: All benchmarks are within the acceptable threshold.'); + process.exit(0); +} diff --git a/tests/benchmarks/utils/pr-comment.js b/tests/benchmarks/utils/pr-comment.js new file mode 100644 index 00000000000..63765ec5baf --- /dev/null +++ b/tests/benchmarks/utils/pr-comment.js @@ -0,0 +1,83 @@ +#!/usr/bin/env node + +/** + * Generic benchmark PR comment: posts/updates a comparison table on a PR. + * + * Called by CI via actions/github-script: + * const run = require('./tests/benchmarks/utils/pr-comment.js'); + * await run({ github, context, resultsPath, baselinePath, title }); + */ + +const fs = require('fs'); + +function buildCommentBody(results, baseline, title) { + const threshold = baseline.thresholdPercent || 20; + const resultEntries = results.entries || results; + const baselineEntries = baseline.entries || {}; + const marker = `## ${title}`; + + let body = `${marker}\n\n`; + body += `| Key | Mean (ms) | Baseline Mean | Change | Status |\n`; + body += `|---|---|---|---|---|\n`; + + let hasRegression = false; + + for (const [key, data] of Object.entries(resultEntries)) { + const base = baselineEntries[key]; + if (!base) continue; + + const changePercent = (data.mean - base.mean) / base.mean * 100; + const changeStr = changePercent.toFixed(1); + const status = changePercent > threshold ? 'šŸ”“ REGRESSION' : changePercent < -threshold ? '🟢 IMPROVED' : 'āœ… OK'; + if (changePercent > threshold) hasRegression = true; + + body += `| ${key} | ${Math.round(data.mean)} | ${base.mean} | ${changePercent > 0 ? '+' : ''}${changeStr}% | ${status} |\n`; + } + + body += `\n> Threshold: ${threshold}% regression allowed\n`; + + if (hasRegression) { + body += '\nāš ļø **Performance regression detected.** If expected, update the baseline.\n'; + } + + return { body, marker }; +} + +async function postOrUpdateComment(github, context, body, marker) { + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number + }); + + const existing = comments.find((c) => c.body.startsWith(marker)); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body + }); + } +} + +module.exports = async function run({ github, context, resultsPath, baselinePath, title }) { + if (!fs.existsSync(resultsPath)) { + console.log(`No benchmark results found at ${resultsPath}, skipping comment.`); + return; + } + + const results = JSON.parse(fs.readFileSync(resultsPath, 'utf-8')); + const baseline = JSON.parse(fs.readFileSync(baselinePath, 'utf-8')); + const { body, marker } = buildCommentBody(results, baseline, title); + + await postOrUpdateComment(github, context, body, marker); +}; diff --git a/tests/benchmarks/utils/results.ts b/tests/benchmarks/utils/results.ts new file mode 100644 index 00000000000..8bf018a0187 --- /dev/null +++ b/tests/benchmarks/utils/results.ts @@ -0,0 +1,92 @@ +/** + * Standard read/write helpers for benchmark results and baselines. + * + * Results shape (written by benchmark tests): + * { + * "suite": { "name": "...", "unit": "ms", "direction": "smaller" }, + * "entries": { + * "": { mean, median, p50, p90, p99, stdDev, min, max, count, timings, ...meta } + * } + * } + * + * Baseline shape (committed per suite): + * { + * "thresholdPercent": 20, + * "entries": { + * "": { mean, p50 } + * } + * } + */ + +import { existsSync, readFileSync, writeFileSync } from 'fs'; +import { summarize } from './stats'; + +export type Direction = 'smaller' | 'bigger'; +export type Unit = 'ms' | 's' | 'ops/s' | 'bytes' | '%' | 'count'; + +export interface SuiteMeta { + name: string; + unit: Unit; + direction: Direction; +} + +export interface ResultEntry { + mean: number; + median: number; + p50: number; + p90: number; + p99: number; + stdDev: number; + min: number; + max: number; + count: number; + timings: number[]; + [key: string]: any; +} + +export interface ResultsFile { + suite: SuiteMeta; + entries: Record; +} + +export interface BaselineEntry { + mean: number; + p50: number; +} + +export interface BaselineFile { + thresholdPercent: number; + entries: Record; +} + +export function readResults(filePath: string): ResultsFile { + if (!existsSync(filePath)) { + throw new Error(`Results file not found: ${filePath}`); + } + return JSON.parse(readFileSync(filePath, 'utf-8')); +} + +export function writeResults(filePath: string, suite: SuiteMeta, entries: Record) { + const data: ResultsFile = { suite, entries }; + writeFileSync(filePath, JSON.stringify(data, null, 2)); +} + +export function buildResultEntry(timings: number[], meta: Record = {}): ResultEntry { + return { ...summarize(timings), timings, ...meta }; +} + +export function readBaseline(filePath: string): BaselineFile { + if (!existsSync(filePath)) { + throw new Error(`Baseline file not found: ${filePath}`); + } + return JSON.parse(readFileSync(filePath, 'utf-8')); +} + +export function writeBaseline(filePath: string, results: ResultsFile, thresholdPercent: number) { + const entries: Record = {}; + for (const [key, data] of Object.entries(results.entries)) { + entries[key] = { mean: data.mean, p50: data.p50 }; + } + const data: BaselineFile = { thresholdPercent, entries }; + writeFileSync(filePath, JSON.stringify(data, null, 2) + '\n'); +} diff --git a/tests/benchmarks/utils/stats.ts b/tests/benchmarks/utils/stats.ts new file mode 100644 index 00000000000..bd17703d926 --- /dev/null +++ b/tests/benchmarks/utils/stats.ts @@ -0,0 +1,111 @@ +/** + * Statistical utility functions for benchmark analysis. + */ + +function assertValid(values: number[]) { + if (values.length === 0) { + throw new Error('Values array must not be empty'); + } + if (!values.every(Number.isFinite)) { + throw new TypeError('All values must be finite numbers'); + } +} + +function sorted(values: number[]): number[] { + return [...values].sort((a, b) => a - b); +} + +export function mean(values: number[]): number { + assertValid(values); + return values.reduce((sum, v) => sum + v, 0) / values.length; +} + +export function median(values: number[]): number { + assertValid(values); + const s = sorted(values); + const mid = Math.floor(s.length / 2); + + return s.length % 2 === 0 + ? (s[mid - 1] + s[mid]) / 2 + : s[mid]; +} + +export function percentile(values: number[], p: number): number { + assertValid(values); + + if (p < 0 || p > 100) { + throw new RangeError(`Percentile must be between 0 and 100, got ${p}`); + } + + const s = sorted(values); + const index = (p / 100) * (s.length - 1); + + const lower = Math.floor(index); + const upper = Math.ceil(index); + + if (lower === upper) return s[lower]; + + const weight = index - lower; + return s[lower] + weight * (s[upper] - s[lower]); +} + +/** + * Population standard deviation (divide by N) + */ +export function populationStdDev(values: number[]): number { + assertValid(values); + const avg = mean(values); + + const variance + = values.reduce((sum, v) => sum + (v - avg) ** 2, 0) / values.length; + + return Math.sqrt(variance); +} + +/** + * Sample standard deviation (divide by N - 1) + */ +export function sampleStdDev(values: number[]): number { + assertValid(values); + + if (values.length < 2) { + throw new Error('Sample standard deviation requires at least 2 values'); + } + + const avg = mean(values); + + const variance + = values.reduce((sum, v) => sum + (v - avg) ** 2, 0) + / (values.length - 1); + + return Math.sqrt(variance); +} + +export function min(values: number[]): number { + assertValid(values); + return values.reduce((a, b) => (a < b ? a : b), Infinity); +} + +export function max(values: number[]): number { + assertValid(values); + return values.reduce((a, b) => (a > b ? a : b), -Infinity); +} + +/** + * Summary for benchmarking (no rounding, keep precision) + */ +export function summarize(values: number[]) { + assertValid(values); + + return { + mean: mean(values), + median: median(values), + p50: percentile(values, 50), + p90: percentile(values, 90), + p99: percentile(values, 99), + min: min(values), + max: max(values), + stdDev: populationStdDev(values), + count: values.length + }; +} diff --git a/tests/benchmarks/utils/timing.ts b/tests/benchmarks/utils/timing.ts new file mode 100644 index 00000000000..984d1dc3d8b --- /dev/null +++ b/tests/benchmarks/utils/timing.ts @@ -0,0 +1,25 @@ +/** + * Timing utilities for benchmarks. + * + * Capture: const t = startTimer(); ...do work...; const ms = t.elapsed(); + * Convert: convertDuration(1500, 'ms', 's') === 1.5 + */ + +export type DurationUnit = 'ns' | 'us' | 'ms' | 's'; + +const DURATION_TO_MS: Record = { + ns: 1e-6, + us: 1e-3, + ms: 1, + s: 1000 +}; + +export function startTimer() { + const start = performance.now(); + return { elapsed: () => performance.now() - start }; +} + +export function convertDuration(value: number, from: DurationUnit, to: DurationUnit): number { + if (from === to) return value; + return (value * DURATION_TO_MS[from]) / DURATION_TO_MS[to]; +}