cloudamqp · jage · Jun 25, 2026 · Jun 17, 2026
@@ -104,6 +104,7 @@ jobs:
       - name: Build matrix
         id: filter
         run: |
+          # Per-broker unreliable sizes are dropped later from scripts/throughput_skip_sizes.json.
           FULL_MATRIX='[
             {"broker":"t4g.micro",  "arch":"arm64","load_generator":"c8g.large"},
             {"broker":"t4g.small",  "arch":"arm64","load_generator":"c8g.large"},
@@ -152,6 +153,14 @@ jobs:
           echo "broker_safe=${BROKER_SAFE}"    >> "$GITHUB_ENV"
           echo "ssh_key_name=${SSH_KEY_NAME}"  >> "$GITHUB_ENV"
           echo "state_file=${STATE_FILE}"      >> "$GITHUB_ENV"
+          # Drop this broker's unreliable sizes (same config the website redaction uses, issue #90).
+          SKIP_MAP=$(cat scripts/throughput_skip_sizes.json)
+          MSG_SIZES=$(printf '%s' "$DEFAULT_MSG_SIZES" | jq -c --argjson skip "$SKIP_MAP" --arg b "$BROKER" \
+            '($skip[$b] // []) as $s | map(select(. as $x | $s | index($x) | not))')
+          echo "msg_sizes=${MSG_SIZES}"        >> "$GITHUB_ENV"
+        env:
+          DEFAULT_MSG_SIZES: ${{ env.TF_VAR_message_sizes }}
+          BROKER: ${{ matrix.broker }}
 
       - name: Write SSH public key
         env:
@@ -175,6 +184,7 @@ jobs:
           terraform apply -auto-approve \
             -state="${{ env.state_file }}" \
             -var "ami_arch=${{ matrix.arch }}" \
+            -var "message_sizes=${{ env.msg_sizes }}" \
             -var "broker_instance_type=${{ matrix.broker }}" \
             -var "broker_name=benchmark-broker-${{ env.broker_safe }}" \
             -var "load_generator_instance_type=${{ matrix.load_generator }}" \

@@ -80,6 +80,9 @@ infrastructure, runs benchmarks across all supported instance types in parallel,
 results into markdown summary files, and opens a pull request against `main` with the results
 committed to a `results/v{version}` branch.
 
+See [docs/pipeline.md](docs/pipeline.md) for diagrams of the AWS infrastructure and the CI
+workflow, from release trigger to published results.
+
 ### Triggering a run
 
 **Via the GitHub UI:** go to **Actions → Benchmark → Run workflow**, fill in the version and pick a scenario.
@@ -146,6 +149,20 @@ gh workflow run benchmark.yml \
 The individual `benchmark-latency.yml`, `benchmark-throughput.yml`, and `benchmark-mqtt-throughput.yml`
 workflows can also be triggered directly in the same way if you want to skip the aggregate/PR step.
 
+### Network limits
+
+At larger message sizes the throughput test can outrun an instance's sustained AWS network
+bandwidth once its burst allowance is spent, giving unreliable numbers. Two configs keep that out
+of the published charts (raw CSVs stay in git):
+
+- `scripts/throughput_skip_sizes.json` — sizes skipped per instance (`t4g.micro`, `r7g.medium` at
+  65536 B). The workflow skips them on new runs; `build_data.py` also redacts them from old data.
+- `scripts/website_hidden_instances.json` — instances still benchmarked but hidden from all charts
+  (`z1d.large`, whose large-message throughput is bandwidth-bound, not LavinMQ-bound).
+
+Latency is unaffected: its per-size rate limits keep demand below every baseline. See
+[issue #90](../../issues/90).
+
 ## Publishing results
 
 Raw results are stored and available in this repository. These are also available at https://lavinmq.com/benchmark

@@ -24,6 +24,8 @@
 ROOT = Path(__file__).resolve().parent.parent
 RESULTS = ROOT / "results"
 SCENARIOS = ("throughput", "latency", "mqtt_throughput")
+SKIP_SIZES_CONFIG = Path(__file__).resolve().parent / "throughput_skip_sizes.json"
+HIDDEN_INSTANCES_CONFIG = Path(__file__).resolve().parent / "website_hidden_instances.json"
 VERSION_RE = re.compile(r"^v\d+\.\d+\.\d+$")
 PRE_VERSION_RE = re.compile(r"^v\d+\.\d+\.\d+[-.].+$")
 
@@ -37,6 +39,30 @@ def instance_from_filename(name: str) -> str:
     return name.split("_", 1)[0].replace("-", ".")
 
 
+def load_throughput_skip_sizes() -> dict[str, set[int]]:
+    """instance -> message sizes to omit from published throughput data.
+
+    These sizes throttle to the instance's AWS network baseline rather than
+    measuring LavinMQ, so they are redacted from old and new results alike.
+    Shared with skip_sizes in benchmark-throughput.yml. See issue #90.
+    """
+    if not SKIP_SIZES_CONFIG.exists():
+        return {}
+    raw = json.loads(SKIP_SIZES_CONFIG.read_text())
+    return {inst: set(sizes) for inst, sizes in raw.items()}
+
+
+def load_hidden_instances() -> set[str]:
+    """Instances that are benchmarked but omitted from all published website data.
+
+    Raw CSVs stay in git; only the aggregated JSON the charts read drops them.
+    See issue #90.
+    """
+    if not HIDDEN_INSTANCES_CONFIG.exists():
+        return set()
+    return set(json.loads(HIDDEN_INSTANCES_CONFIG.read_text()))
+
+
 def median(values: list) -> float | None:
     nums = [v for v in values if v is not None]
     return statistics.median(nums) if nums else None
@@ -120,13 +146,19 @@ def main() -> None:
     entries = collect_entries()
     versions = sorted({v for v, _, _, _ in entries}, key=version_sort_key)
     generated_at = dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds")
+    throughput_skips = load_throughput_skip_sizes()
+    hidden_instances = load_hidden_instances()
 
     for scenario in SCENARIOS:
         aggregator = aggregate_latency if scenario == "latency" else aggregate_throughput
         rows = []
         for v, s, inst, path in entries:
             if s == scenario:
                 rows += aggregator(v, inst, path)
+        if hidden_instances:
+            rows = [r for r in rows if r["instance"] not in hidden_instances]
+        if scenario == "throughput":
+            rows = [r for r in rows if r["size"] not in throughput_skips.get(r["instance"], ())]
         target = RESULTS / f"{scenario}.json"
         target.write_text(json.dumps({
             "generated_at": generated_at,

@@ -0,0 +1,4 @@
+{
+  "t4g.micro": [65536],
+  "r7g.medium": [65536]
+}
@@ -0,0 +1 @@
+["z1d.large"]