From 572c43138050049e92d391ad050fb660a838795d Mon Sep 17 00:00:00 2001 From: Adrian Wedd Date: Wed, 18 Jun 2025 21:04:05 +1000 Subject: [PATCH] feat: add benchmark job and star-delta timing --- .github/workflows/ci.yml | 36 ++++++++++++++++++++++++++++++++++++ docs/PERFORMANCE.md | 13 ++++++++++--- scripts/benchmark_ops.py | 39 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 84 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 301cf11..6be7411 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,12 @@ on: push: branches: [ main ] pull_request: + workflow_dispatch: + inputs: + run-benchmarks: + description: "Run benchmarking job" + default: 'false' + required: false defaults: run: @@ -200,6 +206,36 @@ jobs: name: bandit path: bandit.json + benchmarks: + needs: tests + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.run-benchmarks == 'true' }} + runs-on: ubuntu-latest + env: + PYTHONFAULTHANDLER: '1' + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: ${{ runner.os }}-pip- + - name: Install deps + run: | + pip install -r requirements.txt + pip install -r dev-requirements.txt + pip install -e '.[dev]' + - name: Run benchmarks + run: python scripts/benchmark_ops.py | tee benchmark.log + - name: Upload benchmark log + uses: actions/upload-artifact@v4 + with: + name: benchmarks + path: benchmark.log + badge-update: runs-on: ubuntu-latest permissions: diff --git a/docs/PERFORMANCE.md b/docs/PERFORMANCE.md index df2581a..f818747 100644 --- a/docs/PERFORMANCE.md +++ b/docs/PERFORMANCE.md @@ -15,6 +15,13 @@ Large `repos.json` files can slow down table generation and diff checks. ```bash pip install ijson ``` - Then call `load_repos(..., use_stream=True)`. -- Run `scripts/benchmark_ops.py` to measure sort and diff times. The script - prints a warning when operations exceed built-in baselines. +- Then call `load_repos(..., use_stream=True)`. +- Run `scripts/benchmark_ops.py` to benchmark sort, diff and star-delta + calculations. The script prints a warning when any operation exceeds its + baseline. + +### CI Benchmarks + +The `benchmarks` job in `ci.yml` runs only when triggered via +`workflow_dispatch` with `run-benchmarks: true`. It executes the benchmarking +script and uploads the results as an artifact. diff --git a/scripts/benchmark_ops.py b/scripts/benchmark_ops.py index ac394a1..0a19868 100755 --- a/scripts/benchmark_ops.py +++ b/scripts/benchmark_ops.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -"""Benchmark sort and diff operations.""" +"""Benchmark sort, diff and star-delta operations.""" from __future__ import annotations import sys @@ -9,6 +9,7 @@ REPOS_PATH = Path("data/repos.json") BASELINE_SORT = 0.5 BASELINE_DIFF = 0.2 +BASELINE_STAR_DELTA = 0.3 THRESHOLD = 1.5 @@ -46,6 +47,42 @@ def bench_diff() -> float: return dur +def bench_star_delta() -> float: + """Benchmark star-delta calculations.""" + from agentic_index_cli.validate import load_repos + + history_file = Path("data/last_snapshot.txt") + last_path = ( + Path(history_file.read_text().strip()) if history_file.exists() else None + ) + prev_map = {} + if last_path and last_path.exists(): + prev_repos = load_repos(last_path) + prev_map = {r.get("full_name", r.get("name")): r for r in prev_repos} + + repos = load_repos(REPOS_PATH, cache=True, stream=False) + start = time.perf_counter() + for repo in repos: + prev = prev_map.get(repo.get("full_name", repo.get("name"))) + if prev: + _ = repo.get("stars", repo.get("stargazers_count", 0)) - prev.get( + "stars", + prev.get("stargazers_count", 0), + ) + else: + _ = 0 + dur = time.perf_counter() - start + if dur > BASELINE_STAR_DELTA * THRESHOLD: + print( + f"WARNING: star-delta took {dur:.3f}s, baseline {BASELINE_STAR_DELTA:.3f}s", + file=sys.stderr, + ) + else: + print(f"star-delta {dur:.3f}s") + return dur + + if __name__ == "__main__": bench_sort() bench_diff() + bench_star_delta()