adrianwedd · adrianwedd · Jun 18, 2025
@@ -4,6 +4,12 @@ on:
   push:
     branches: [ main ]
   pull_request:
+  workflow_dispatch:
+    inputs:
+      run-benchmarks:
+        description: "Run benchmarking job"
+        default: 'false'
+        required: false
 
 defaults:
   run:
@@ -200,6 +206,36 @@ jobs:
           name: bandit
           path: bandit.json
 
+  benchmarks:
+    needs: tests
+    if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.run-benchmarks == 'true' }}
+    runs-on: ubuntu-latest
+    env:
+      PYTHONFAULTHANDLER: '1'
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Cache pip
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
+          restore-keys: ${{ runner.os }}-pip-
+      - name: Install deps
+        run: |
+          pip install -r requirements.txt
+          pip install -r dev-requirements.txt
+          pip install -e '.[dev]'
+      - name: Run benchmarks
+        run: python scripts/benchmark_ops.py | tee benchmark.log
+      - name: Upload benchmark log
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmarks
+          path: benchmark.log
+
   badge-update:
     runs-on: ubuntu-latest
     permissions:

@@ -15,6 +15,13 @@ Large `repos.json` files can slow down table generation and diff checks.
   ```bash
   pip install ijson
   ```
-  Then call `load_repos(..., use_stream=True)`.
-- Run `scripts/benchmark_ops.py` to measure sort and diff times. The script
-  prints a warning when operations exceed built-in baselines.
+- Then call `load_repos(..., use_stream=True)`.
+- Run `scripts/benchmark_ops.py` to benchmark sort, diff and star-delta
+  calculations. The script prints a warning when any operation exceeds its
+  baseline.
+
+### CI Benchmarks
+
+The `benchmarks` job in `ci.yml` runs only when triggered via
+`workflow_dispatch` with `run-benchmarks: true`. It executes the benchmarking
+script and uploads the results as an artifact.
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-"""Benchmark sort and diff operations."""
+"""Benchmark sort, diff and star-delta operations."""
 from __future__ import annotations
 
 import sys
@@ -9,6 +9,7 @@
 REPOS_PATH = Path("data/repos.json")
 BASELINE_SORT = 0.5
 BASELINE_DIFF = 0.2
+BASELINE_STAR_DELTA = 0.3
 THRESHOLD = 1.5
 
 
@@ -46,6 +47,42 @@ def bench_diff() -> float:
     return dur
 
 
+def bench_star_delta() -> float:
+    """Benchmark star-delta calculations."""
+    from agentic_index_cli.validate import load_repos
+
+    history_file = Path("data/last_snapshot.txt")
+    last_path = (
+        Path(history_file.read_text().strip()) if history_file.exists() else None
+    )
+    prev_map = {}
+    if last_path and last_path.exists():
+        prev_repos = load_repos(last_path)
+        prev_map = {r.get("full_name", r.get("name")): r for r in prev_repos}
+
+    repos = load_repos(REPOS_PATH, cache=True, stream=False)
+    start = time.perf_counter()
+    for repo in repos:
+        prev = prev_map.get(repo.get("full_name", repo.get("name")))
+        if prev:
+            _ = repo.get("stars", repo.get("stargazers_count", 0)) - prev.get(
+                "stars",
+                prev.get("stargazers_count", 0),
+            )
+        else:
+            _ = 0
+    dur = time.perf_counter() - start
+    if dur > BASELINE_STAR_DELTA * THRESHOLD:
+        print(
+            f"WARNING: star-delta took {dur:.3f}s, baseline {BASELINE_STAR_DELTA:.3f}s",
+            file=sys.stderr,
+        )
+    else:
+        print(f"star-delta {dur:.3f}s")
+    return dur
+
+
 if __name__ == "__main__":
     bench_sort()
     bench_diff()
+    bench_star_delta()