diff --git a/.github/workflows/benchmark-throughput.yml b/.github/workflows/benchmark-throughput.yml index 0c67f28..1a65d2a 100644 --- a/.github/workflows/benchmark-throughput.yml +++ b/.github/workflows/benchmark-throughput.yml @@ -104,6 +104,7 @@ jobs: - name: Build matrix id: filter run: | + # Per-broker unreliable sizes are dropped later from scripts/throughput_skip_sizes.json. FULL_MATRIX='[ {"broker":"t4g.micro", "arch":"arm64","load_generator":"c8g.large"}, {"broker":"t4g.small", "arch":"arm64","load_generator":"c8g.large"}, @@ -152,6 +153,14 @@ jobs: echo "broker_safe=${BROKER_SAFE}" >> "$GITHUB_ENV" echo "ssh_key_name=${SSH_KEY_NAME}" >> "$GITHUB_ENV" echo "state_file=${STATE_FILE}" >> "$GITHUB_ENV" + # Drop this broker's unreliable sizes (same config the website redaction uses, issue #90). + SKIP_MAP=$(cat scripts/throughput_skip_sizes.json) + MSG_SIZES=$(printf '%s' "$DEFAULT_MSG_SIZES" | jq -c --argjson skip "$SKIP_MAP" --arg b "$BROKER" \ + '($skip[$b] // []) as $s | map(select(. as $x | $s | index($x) | not))') + echo "msg_sizes=${MSG_SIZES}" >> "$GITHUB_ENV" + env: + DEFAULT_MSG_SIZES: ${{ env.TF_VAR_message_sizes }} + BROKER: ${{ matrix.broker }} - name: Write SSH public key env: @@ -175,6 +184,7 @@ jobs: terraform apply -auto-approve \ -state="${{ env.state_file }}" \ -var "ami_arch=${{ matrix.arch }}" \ + -var "message_sizes=${{ env.msg_sizes }}" \ -var "broker_instance_type=${{ matrix.broker }}" \ -var "broker_name=benchmark-broker-${{ env.broker_safe }}" \ -var "load_generator_instance_type=${{ matrix.load_generator }}" \ diff --git a/README.md b/README.md index 2c762b5..0d7eb2f 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,9 @@ infrastructure, runs benchmarks across all supported instance types in parallel, results into markdown summary files, and opens a pull request against `main` with the results committed to a `results/v{version}` branch. +See [docs/pipeline.md](docs/pipeline.md) for diagrams of the AWS infrastructure and the CI +workflow, from release trigger to published results. + ### Triggering a run **Via the GitHub UI:** go to **Actions → Benchmark → Run workflow**, fill in the version and pick a scenario. @@ -146,6 +149,20 @@ gh workflow run benchmark.yml \ The individual `benchmark-latency.yml`, `benchmark-throughput.yml`, and `benchmark-mqtt-throughput.yml` workflows can also be triggered directly in the same way if you want to skip the aggregate/PR step. +### Network limits + +At larger message sizes the throughput test can outrun an instance's sustained AWS network +bandwidth once its burst allowance is spent, giving unreliable numbers. Two configs keep that out +of the published charts (raw CSVs stay in git): + +- `scripts/throughput_skip_sizes.json` — sizes skipped per instance (`t4g.micro`, `r7g.medium` at + 65536 B). The workflow skips them on new runs; `build_data.py` also redacts them from old data. +- `scripts/website_hidden_instances.json` — instances still benchmarked but hidden from all charts + (`z1d.large`, whose large-message throughput is bandwidth-bound, not LavinMQ-bound). + +Latency is unaffected: its per-size rate limits keep demand below every baseline. See +[issue #90](../../issues/90). + ## Publishing results Raw results are stored and available in this repository. These are also available at https://lavinmq.com/benchmark diff --git a/scripts/build_data.py b/scripts/build_data.py index 3429855..e636a43 100644 --- a/scripts/build_data.py +++ b/scripts/build_data.py @@ -24,6 +24,8 @@ ROOT = Path(__file__).resolve().parent.parent RESULTS = ROOT / "results" SCENARIOS = ("throughput", "latency", "mqtt_throughput") +SKIP_SIZES_CONFIG = Path(__file__).resolve().parent / "throughput_skip_sizes.json" +HIDDEN_INSTANCES_CONFIG = Path(__file__).resolve().parent / "website_hidden_instances.json" VERSION_RE = re.compile(r"^v\d+\.\d+\.\d+$") PRE_VERSION_RE = re.compile(r"^v\d+\.\d+\.\d+[-.].+$") @@ -37,6 +39,30 @@ def instance_from_filename(name: str) -> str: return name.split("_", 1)[0].replace("-", ".") +def load_throughput_skip_sizes() -> dict[str, set[int]]: + """instance -> message sizes to omit from published throughput data. + + These sizes throttle to the instance's AWS network baseline rather than + measuring LavinMQ, so they are redacted from old and new results alike. + Shared with skip_sizes in benchmark-throughput.yml. See issue #90. + """ + if not SKIP_SIZES_CONFIG.exists(): + return {} + raw = json.loads(SKIP_SIZES_CONFIG.read_text()) + return {inst: set(sizes) for inst, sizes in raw.items()} + + +def load_hidden_instances() -> set[str]: + """Instances that are benchmarked but omitted from all published website data. + + Raw CSVs stay in git; only the aggregated JSON the charts read drops them. + See issue #90. + """ + if not HIDDEN_INSTANCES_CONFIG.exists(): + return set() + return set(json.loads(HIDDEN_INSTANCES_CONFIG.read_text())) + + def median(values: list) -> float | None: nums = [v for v in values if v is not None] return statistics.median(nums) if nums else None @@ -120,6 +146,8 @@ def main() -> None: entries = collect_entries() versions = sorted({v for v, _, _, _ in entries}, key=version_sort_key) generated_at = dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds") + throughput_skips = load_throughput_skip_sizes() + hidden_instances = load_hidden_instances() for scenario in SCENARIOS: aggregator = aggregate_latency if scenario == "latency" else aggregate_throughput @@ -127,6 +155,10 @@ def main() -> None: for v, s, inst, path in entries: if s == scenario: rows += aggregator(v, inst, path) + if hidden_instances: + rows = [r for r in rows if r["instance"] not in hidden_instances] + if scenario == "throughput": + rows = [r for r in rows if r["size"] not in throughput_skips.get(r["instance"], ())] target = RESULTS / f"{scenario}.json" target.write_text(json.dumps({ "generated_at": generated_at, diff --git a/scripts/throughput_skip_sizes.json b/scripts/throughput_skip_sizes.json new file mode 100644 index 0000000..9db4cd6 --- /dev/null +++ b/scripts/throughput_skip_sizes.json @@ -0,0 +1,4 @@ +{ + "t4g.micro": [65536], + "r7g.medium": [65536] +} diff --git a/scripts/website_hidden_instances.json b/scripts/website_hidden_instances.json new file mode 100644 index 0000000..6e918d0 --- /dev/null +++ b/scripts/website_hidden_instances.json @@ -0,0 +1 @@ +["z1d.large"]