From 68f084208bc280af1f7d5219b77326f09d07c315 Mon Sep 17 00:00:00 2001 From: Alessandro Cere Date: Wed, 1 Apr 2026 11:38:44 -0700 Subject: [PATCH 1/3] feat: add low-memory mode, RunningStats, and live progress-bar stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add `low_memory` parameter to Runner/run() that writes responses to disk without keeping them in memory, for large-scale test runs. - Introduce `RunningStats` class that accumulates metrics incrementally (counts, sums, sorted values for percentile computation). - Replace `_builtin_stats` cached_property on Result with `_preloaded_stats` populated by RunningStats during the run or from stats.json on load. - Add `snapshot()` method on RunningStats for live progress-bar display of p50/p90 TTFT, p50/p90 TTLT, median tokens/s, total tokens, and failure count — configurable via `progress_bar_stats` parameter. - Add `_compute_stats()` classmethod on Result as fallback for manually constructed Result objects and post-load_responses() recomputation. - Update tests for the new stats flow. --- llmeter/results.py | 115 +++++++++-------- llmeter/runner.py | 106 ++++++++++++++-- llmeter/utils.py | 236 +++++++++++++++++++++++++++++++++++ tests/unit/test_lazy_load.py | 20 +-- tests/unit/test_results.py | 4 +- 5 files changed, 405 insertions(+), 76 deletions(-) diff --git a/llmeter/results.py b/llmeter/results.py index 45a1c00..6654d2e 100644 --- a/llmeter/results.py +++ b/llmeter/results.py @@ -6,7 +6,6 @@ import os from dataclasses import asdict, dataclass from datetime import datetime, timezone -from functools import cached_property from numbers import Number from typing import Any, Sequence @@ -169,8 +168,8 @@ def load_responses(self) -> list[InvocationResponse]: InvocationResponse(**json.loads(line)) for line in f if line ] logger.info("Loaded %d responses from %s", len(self.responses), responses_path) - # Invalidate cached stats so they are recomputed with the loaded responses - self.__dict__.pop("_builtin_stats", None) + # Recompute stats from the freshly loaded responses + self._preloaded_stats = self._compute_stats(self) return self.responses @classmethod @@ -241,9 +240,9 @@ def load( result = cls(responses=responses, **summary) - # When skipping responses, load pre-computed stats from stats.json if available - # so that result.stats works without needing the responses + # Load or compute stats if not load_responses: + # Use pre-computed stats from disk when responses aren't loaded stats_path = result_path / "stats.json" if stats_path.exists(): with stats_path.open("r") as s: @@ -260,78 +259,84 @@ def load( pass else: result._preloaded_stats = None + else: + # Compute stats from the loaded responses + result._preloaded_stats = cls._compute_stats(result) return result - @cached_property - def _builtin_stats(self) -> dict: - """ - Default run metrics and aggregated statistics provided by LLMeter core + @classmethod + def _compute_stats(cls, result: "Result") -> dict: + """Compute stats from in-memory responses. - Users should generally refer to the `.stats` property instead, which combines this data - with any additional values contributed by callbacks or other extensions. + This is the fallback used when ``_preloaded_stats`` is not available — for + example when a ``Result`` is constructed manually or after + :meth:`load_responses` reloads data from disk. - This is a read-only and `@cached_property`, which means the result is computed once and - then cached for subsequent accesses - improving performance. + Args: + result: A ``Result`` instance whose ``responses`` list is populated. Returns: - stats: A dictionary containing all computed statistics. The keys are: - - All key-value pairs from the Result's dictionary representation - - Test-specific statistics - - Aggregated statistics with keys in the format "{stat_name}-{aggregation_type}" - where stat_name is one of the four metrics listed above, and - aggregation_type includes measures like mean, median, etc. - """ + A flat dictionary matching the ``Result.stats`` schema, containing + run-level metrics (``failed_requests``, ``requests_per_minute``, …) + and per-metric aggregations (``time_to_first_token-p50``, …). + + Example:: + result = Result(responses=my_responses, total_requests=100, ...) + stats = Result._compute_stats(result) + stats["time_to_first_token-p90"] # 0.485 + """ aggregation_metrics = [ "time_to_last_token", "time_to_first_token", "num_tokens_output", "num_tokens_input", ] - - results_stats = _get_stats_from_results( - self, - aggregation_metrics, - ) + results_stats = _get_stats_from_results(result, aggregation_metrics) return { - **self.to_dict(), - **_get_run_stats(self), + **result.to_dict(), + **_get_run_stats(result), **{f"{k}-{j}": v for k, o in results_stats.items() for j, v in o.items()}, } @property def stats(self) -> dict: + """Run metrics and aggregated statistics over the individual requests. + + Returns a flat dictionary combining: + + * Basic run information (from ``to_dict()``). + * Aggregated statistics (``average``, ``p50``, ``p90``, ``p99``) for + ``time_to_last_token``, ``time_to_first_token``, ``num_tokens_output``, + and ``num_tokens_input``. Keys use the format + ``"{metric}-{aggregation}"``. + * Run-level throughput metrics (``requests_per_minute``, + ``total_input_tokens``, etc.). + * Any additional stats contributed by callbacks via + :meth:`_update_contributed_stats`. + + During a live run, stats are computed incrementally by + :class:`~llmeter.utils.RunningStats` and stored in ``_preloaded_stats``. + When loading from disk with ``load_responses=False``, pre-computed stats + from ``stats.json`` are used. As a fallback (e.g. manually constructed + ``Result``), stats are computed on the fly from ``self.responses``. + + Returns: + A new shallow copy of the stats dictionary on each access. + + Example:: + + result = await runner.run(payload=my_payload, clients=5) + result.stats["time_to_first_token-p50"] # 0.312 + result.stats["requests_per_minute"] # 141.2 + result.stats["failed_requests"] # 0 """ - Run metrics and aggregated statistics over the individual requests - - This combined view includes: - - Basic information about the run (from the Result's dictionary representation) - - Aggregated statistics ('average', 'p50', 'p90', 'p99') for: - - Time to last token - - Time to first token - - Number of tokens output - - Number of tokens input - - Aggregated statistics are keyed in the format "{stat_name}-{aggregation_type}" - - This property is read-only and returns a new shallow copy of the data on each access. - Default stats provided by LLMeter are calculated on first access and then cached. Callbacks - Callbacks or other mechanisms needing to augment stats should use the - `_update_contributed_stats()` method. - - When the Result was loaded with ``load_responses=False``, pre-computed stats from - ``stats.json`` are returned if available. Call ``load_responses()`` to load the - individual responses and recompute stats from the raw data. - """ - # Use preloaded stats when responses were not loaded - if not self.responses and self._preloaded_stats is not None: + if self._preloaded_stats is not None: stats = self._preloaded_stats.copy() - if self._contributed_stats: - stats.update(self._contributed_stats) - return stats - - stats = self._builtin_stats.copy() + else: + # Fallback: compute from responses (e.g. Result constructed manually) + stats = self._compute_stats(self) if self._contributed_stats: stats.update(self._contributed_stats) diff --git a/llmeter/runner.py b/llmeter/runner.py index 47626e3..0604a32 100644 --- a/llmeter/runner.py +++ b/llmeter/runner.py @@ -20,7 +20,7 @@ from tqdm.auto import tqdm, trange from upath import UPath as Path -from llmeter.utils import now_utc +from llmeter.utils import RunningStats, now_utc if TYPE_CHECKING: # Avoid circular import: We only need typing for Callback @@ -61,6 +61,8 @@ class _RunConfig: run_description: str | None = None timeout: int | float = 60 callbacks: list[Callback] | None = None + low_memory: bool = False + progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None disable_per_client_progress_bar: InitVar[bool] = True disable_clients_progress_bar: InitVar[bool] = True @@ -149,19 +151,35 @@ class _Run(_RunConfig): """ def __post_init__(self, disable_client_progress_bar, disable_clients_progress_bar): - assert ( - self.run_name is not None - ), "Test Run must be created with an explicit run_name" + assert self.run_name is not None, ( + "Test Run must be created with an explicit run_name" + ) super().__post_init__(disable_client_progress_bar, disable_clients_progress_bar) - assert ( - self.endpoint is not None - ), "Test Run must be created with an explicit Endpoint" + assert self.endpoint is not None, ( + "Test Run must be created with an explicit Endpoint" + ) self._validate_and_prepare_payload() self._responses = [] + if self.low_memory: + assert self.output_path is not None, ( + "output_path is required when low_memory=True " + "(responses must be written to disk)" + ) + + self._running_stats = RunningStats( + metrics=[ + "time_to_last_token", + "time_to_first_token", + "time_per_output_token", + "num_tokens_output", + "num_tokens_input", + ] + ) + def _validate_and_prepare_payload(self): """Validate and prepare the payload for the test run and update n_requests @@ -251,9 +269,18 @@ async def _process_results_from_q(self, output_path: Path | None = None): if self.callbacks is not None: [await cb.after_invoke(response) for cb in self.callbacks] - self._responses.append(response) + if self.low_memory and self._running_stats is not None: + self._running_stats.update(response.to_dict()) + else: + self._responses.append(response) + self._running_stats.update(response.to_dict()) + if self._progress_bar: self._progress_bar.update(1) + self._progress_bar.set_postfix( + self._running_stats.snapshot(self.progress_bar_stats), + refresh=False, + ) if output_path: output_path.parent.mkdir(parents=True, exist_ok=True) @@ -403,7 +430,7 @@ async def _invoke_n_c( end_t = time.perf_counter() total_test_time = end_t - start_t logger.info( - f"Generated {clients} connections with {n_requests} invocations each in {total_test_time*1000:.2f} seconds" + f"Generated {clients} connections with {n_requests} invocations each in {total_test_time * 1000:.2f} seconds" ) # Signal the token counting task to exit @@ -474,7 +501,7 @@ async def _run(self): return result self._progress_bar.close() - logger.info(f"Test completed in {total_test_time*1000:.2f} seconds.") + logger.info(f"Test completed in {total_test_time * 1000:.2f} seconds.") result = replace( result, @@ -484,6 +511,22 @@ async def _run(self): end_time=run_end_time, ) + # Compute stats from the running accumulators + result._preloaded_stats = self._running_stats.to_stats( + total_requests=result.total_requests, + total_test_time=total_test_time, + result_dict=result.to_dict(), + ) + result._preloaded_stats["start_time"] = run_start_time + result._preloaded_stats["end_time"] = run_end_time + result._preloaded_stats["total_test_time"] = total_test_time + + if self.low_memory: + logger.info( + "Low-memory mode: responses not stored in memory. " + "Use result.load_responses() to load from disk." + ) + if self.callbacks is not None: [await cb.after_run(result) for cb in self.callbacks] @@ -554,6 +597,15 @@ class Runner(_RunConfig): endpoint. Defaults to 60 seconds. callbacks (list[Callback] | None): Optional callbacks to enable during the test Run. See `llmeter.callbacks` for more information. + low_memory (bool): When ``True``, responses are written to disk but not kept in memory + during the run. Stats are computed incrementally via + :class:`~llmeter.utils.RunningStats`. Requires ``output_path`` to be set. Use + ``result.load_responses()`` to load responses from disk after the run. Defaults to + ``False``. + progress_bar_stats (dict | None): Controls which live stats appear on the progress bar. + Maps short display labels to field specs — see + :attr:`RunningStats.DEFAULT_SNAPSHOT_STATS` for the format and defaults. Pass ``{}`` + to disable live stats entirely. Defaults to ``None`` (use built-in defaults). disable_per_client_progress_bar (bool): Set `True` to disable per-client progress bars from showing during the run. Default `False` (each client's progress will be shown). disable_clients_progress_bar (bool): Set `True` to disable overall progress bar from @@ -600,6 +652,8 @@ async def run( run_description: str | None = None, timeout: int | float | None = None, callbacks: list[Callback] | None = None, + low_memory: bool | None = None, + progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None, disable_per_client_progress_bar: bool | None = None, disable_clients_progress_bar: bool | None = None, ) -> Result: @@ -635,6 +689,36 @@ async def run( endpoint. callbacks (list[Callback] | None): Optional callbacks to enable during the test Run. See `llmeter.callbacks` for more information. + low_memory (bool): When ``True``, responses are written to disk but not + kept in memory. Stats are computed incrementally via + :class:`~llmeter.utils.RunningStats`. Requires ``output_path``. + Use ``result.load_responses()`` to access responses after the run. + + Example:: + + result = await runner.run( + output_path="/tmp/my_run", + low_memory=True, + ) + result.stats # works (computed incrementally) + result.responses # [] (empty) + result.load_responses() # loads from disk + + progress_bar_stats (dict): Controls which live stats appear on the + progress bar. Maps short display labels to field specs — see + :attr:`RunningStats.DEFAULT_SNAPSHOT_STATS` for the format and + defaults. Pass ``{}`` to disable live stats entirely. + + Example:: + + # Show only p99 latency and tokens per second: + result = await runner.run( + progress_bar_stats={ + "p99_ttlt": ("time_to_last_token", "p99"), + "tps": ("time_per_output_token", "p50", "inv"), + "fail": "failed", + }, + ) disable_per_client_progress_bar (bool): Set `True` to disable per-client progress bars from showing during the run. disable_clients_progress_bar (bool): Set `True` to disable overall progress bar from @@ -667,6 +751,8 @@ async def run( run_description=run_description, timeout=timeout, callbacks=callbacks, + low_memory=low_memory, + progress_bar_stats=progress_bar_stats, disable_per_client_progress_bar=disable_per_client_progress_bar, disable_clients_progress_bar=disable_clients_progress_bar, ) diff --git a/llmeter/utils.py b/llmeter/utils.py index d072e58..fd30d0f 100644 --- a/llmeter/utils.py +++ b/llmeter/utils.py @@ -1,5 +1,6 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +import bisect from datetime import datetime, timezone from itertools import filterfalse from math import isnan @@ -83,6 +84,241 @@ def summary_stats_from_list( return {} +class RunningStats: + """Accumulate summary statistics incrementally from individual responses. + + Maintains sorted value lists per metric so that percentiles (p50, p90, p99), + averages, and sums can be computed at any point — both mid-run (for live + progress-bar display via :meth:`snapshot`) and at the end of a run (for the + final :class:`~llmeter.results.Result` stats via :meth:`to_stats`). + + Args: + metrics: Names of numeric response fields to track (e.g. + ``"time_to_first_token"``, ``"num_tokens_output"``). + + Example:: + + rs = RunningStats(metrics=["time_to_first_token", "time_to_last_token"]) + rs.update({"time_to_first_token": 0.3, "time_to_last_token": 0.8}) + rs.update({"time_to_first_token": 0.5, "time_to_last_token": 1.2, "error": None}) + rs.to_stats() + # {'failed_requests': 0, ..., 'time_to_first_token-p50': 0.4, ...} + """ + + #: Default stats shown on the progress bar during a run. + #: Each entry maps a short display label to a spec: + #: + #: * ``(metric_name, aggregation)`` — aggregation can be ``"p50"``, ``"p90"``, + #: ``"p99"``, ``"average"``, or ``"sum"``. + #: * ``(metric_name, aggregation, "inv")`` — same as above but displays the + #: reciprocal (e.g. seconds-per-token → tokens-per-second). + #: * The literal string ``"failed"`` for the running failure count. + DEFAULT_SNAPSHOT_STATS: dict[str, tuple[str, ...] | str] = { + "p50_ttft": ("time_to_first_token", "p50"), + "p90_ttft": ("time_to_first_token", "p90"), + "p50_ttlt": ("time_to_last_token", "p50"), + "p90_ttlt": ("time_to_last_token", "p90"), + "p50_tps": ("time_per_output_token", "p50", "inv"), + "input_tokens": ("num_tokens_input", "sum"), + "output_tokens": ("num_tokens_output", "sum"), + "fail": "failed", + } + + def __init__(self, metrics: Sequence[str]): + self._metrics = list(metrics) + self._count = 0 + self._failed = 0 + self._sums: dict[str, float] = {m: 0.0 for m in metrics} + self._values: dict[str, list[float]] = {m: [] for m in metrics} + + def update(self, response_dict: dict[str, Any]) -> None: + """Record one response's metric values. + + Call this once per :class:`~llmeter.endpoints.base.InvocationResponse` + (typically via ``response.to_dict()``). The method extracts each tracked + metric from *response_dict*, skipping ``None`` and ``NaN`` values, and + increments the failure counter when an ``"error"`` key is present. + + Args: + response_dict: A flat dictionary of response fields, as returned by + ``InvocationResponse.to_dict()``. + + Example:: + + rs = RunningStats(metrics=["time_to_first_token"]) + rs.update({"time_to_first_token": 0.42, "error": None}) + rs.update({"time_to_first_token": None, "error": "timeout"}) + assert rs._failed == 1 + """ + self._count += 1 + if response_dict.get("error") is not None: + self._failed += 1 + for m in self._metrics: + val = response_dict.get(m) + if val is not None and not (isinstance(val, float) and isnan(val)): + self._sums[m] += val + bisect.insort(self._values[m], val) + + def to_stats( + self, + total_requests: int | None = None, + total_test_time: float | None = None, + result_dict: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """Compute all accumulated statistics as raw numeric values. + + This is the single source of truth for stats computation. It is called + once at the end of a run (with all three optional arguments) to produce + the full ``Result.stats`` dict, and also called internally by + :meth:`snapshot` (without arguments) for mid-run progress display. + + Args: + total_requests: Total number of requests across all clients. When + provided, enables ``failed_requests_rate`` and + ``requests_per_minute`` computation. + total_test_time: Wall-clock duration of the run in seconds. When + provided, enables throughput metrics (requests/min, tokens/min). + result_dict: Base key-value pairs to include in the output (typically + from ``Result.to_dict()``). When ``None``, only metric + aggregations and failure counts are returned. + + Returns: + A flat dictionary of statistics. Keys include: + + * ``failed_requests``, ``failed_requests_rate``, ``requests_per_minute`` + * ``total_input_tokens``, ``total_output_tokens`` + * ``average_input_tokens_per_minute``, ``average_output_tokens_per_minute`` + * ``{metric}-{agg}`` for each tracked metric and each aggregation + (``average``, ``p50``, ``p90``, ``p99``). + + Example:: + + rs = RunningStats(metrics=["time_to_first_token", "num_tokens_output"]) + for resp in responses: + rs.update(resp.to_dict()) + + # Mid-run (no run-level context): + partial = rs.to_stats() + partial["time_to_first_token-p50"] # 0.312 + + # End of run (full Result.stats schema): + full = rs.to_stats( + total_requests=100, + total_test_time=42.5, + result_dict=result.to_dict(), + ) + full["requests_per_minute"] # 141.2 + """ + stats: dict[str, Any] = {} + if result_dict is not None: + stats.update(result_dict) + + # Run-level stats + stats["failed_requests"] = self._failed + stats["failed_requests_rate"] = total_requests and self._failed / total_requests + stats["requests_per_minute"] = ( + total_test_time and total_requests / total_test_time * 60 + if total_requests + else None + ) + stats["total_input_tokens"] = self._sums.get("num_tokens_input", 0) + stats["total_output_tokens"] = self._sums.get("num_tokens_output", 0) + stats["average_input_tokens_per_minute"] = ( + total_test_time and stats["total_input_tokens"] / total_test_time * 60 + ) + stats["average_output_tokens_per_minute"] = ( + total_test_time and stats["total_output_tokens"] / total_test_time * 60 + ) + + # Per-metric aggregations + for m in self._metrics: + agg = summary_stats_from_list(self._values.get(m, [])) + for j, v in agg.items(): + stats[f"{m}-{j}"] = v + + return stats + + def snapshot( + self, + fields: dict[str, tuple[str, ...] | str] | None = None, + ) -> dict[str, str]: + """Format a subset of :meth:`to_stats` for progress-bar display. + + Calls :meth:`to_stats` internally and picks only the requested fields, + formatting each value as a human-readable string. + + Args: + fields: Mapping of ``{display_label: spec}``. Each *spec* is one of: + + * ``(metric, aggregation)`` — a 2-tuple where *metric* is a tracked + metric name and *aggregation* is ``"p50"``, ``"p90"``, ``"p99"``, + ``"average"``, or ``"sum"``. + * ``(metric, aggregation, "inv")`` — a 3-tuple; same as above but + the value is inverted before display (e.g. seconds-per-token → + tokens-per-second). + * ``"failed"`` — the literal string; shows the running failure count. + + Defaults to :attr:`DEFAULT_SNAPSHOT_STATS` when ``None``. + + Returns: + An ordered dict of ``{label: formatted_value}`` strings suitable for + ``tqdm.set_postfix()``. + + Example:: + + # Use defaults: + rs.snapshot() + # {'p50_ttft': '0.312s', 'p90_ttlt': '1.203s', ..., 'fail': '0'} + + # Custom selection — only p99 latency and failures: + rs.snapshot({ + "p99_ttlt": ("time_to_last_token", "p99"), + "fail": "failed", + }) + # {'p99_ttlt': '2.105s', 'fail': '1'} + + # Inverted metric — tokens per second from time_per_output_token: + rs.snapshot({ + "tps": ("time_per_output_token", "p50", "inv"), + }) + # {'tps': '28.3 tok/s'} + """ + if self._count == 0: + return {} + + if fields is None: + fields = self.DEFAULT_SNAPSHOT_STATS + + raw = self.to_stats() + + info: dict[str, str] = {} + for label, spec in fields.items(): + if spec == "failed": + info[label] = str(self._failed) + continue + + metric = spec[0] + agg = spec[1] + invert = len(spec) > 2 and spec[2] == "inv" + + if agg == "sum": + info[label] = f"{self._sums.get(metric, 0):.0f}" + continue + + val = raw.get(f"{metric}-{agg}") + if val is None: + continue + + if invert and val > 0: + info[label] = f"{1.0 / val:.1f} tok/s" + elif "time" in metric: + info[label] = f"{val:.3f}s" + else: + info[label] = f"{val:.1f}" + + return info + + def now_utc() -> datetime: """Returns the current UTC datetime. diff --git a/tests/unit/test_lazy_load.py b/tests/unit/test_lazy_load.py index ed02b86..14436ce 100644 --- a/tests/unit/test_lazy_load.py +++ b/tests/unit/test_lazy_load.py @@ -1,12 +1,12 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +from unittest.mock import MagicMock, patch + import pytest from upath import UPath from llmeter.endpoints.base import InvocationResponse -from unittest.mock import MagicMock, patch - from llmeter.experiments import LoadTestResult from llmeter.results import Result @@ -129,14 +129,14 @@ def test_load_responses_returns_correct_data(self, sample_responses, saved_resul assert orig.time_to_first_token == loaded_resp.time_to_first_token assert orig.time_to_last_token == loaded_resp.time_to_last_token - def test_load_responses_invalidates_cached_stats(self, saved_result): + def test_load_responses_recomputes_stats(self, saved_result): loaded = Result.load(saved_result, load_responses=True) - # Access _builtin_stats to cache it - _ = loaded._builtin_stats - assert "_builtin_stats" in loaded.__dict__ + original_stats = loaded._preloaded_stats.copy() loaded.load_responses() - assert "_builtin_stats" not in loaded.__dict__ + # Stats should be recomputed (same values, but a fresh dict) + assert loaded._preloaded_stats is not original_stats + assert loaded._preloaded_stats == original_stats def test_load_responses_stats_match_full_load(self, saved_result): full = Result.load(saved_result, load_responses=True) @@ -153,9 +153,9 @@ def test_load_responses_stats_match_full_load(self, saved_result): "failed_requests", "requests_per_minute", ]: - assert lazy_stats[key] == pytest.approx( - full_stats[key] - ), f"Mismatch on {key}" + assert lazy_stats[key] == pytest.approx(full_stats[key]), ( + f"Mismatch on {key}" + ) def test_load_responses_no_output_path_raises(self): result = Result( diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index 9262d16..73a6e63 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -192,7 +192,9 @@ def test_stats_property(sample_result: Result): assert key in stats # Test caching returns same object for built-in stats: - assert sample_result._builtin_stats is sample_result._builtin_stats + assert sample_result._preloaded_stats is None or isinstance( + sample_result._preloaded_stats, dict + ) def test_stats_property_empty_result(): From 2091da436dcae13d632906c1aa297f41f1bbb9ac Mon Sep 17 00:00:00 2001 From: Alessandro Cere Date: Thu, 2 Apr 2026 10:19:01 -0700 Subject: [PATCH 2/3] feat: time-bound runs, live stats display, and send-window metrics Add run_duration parameter for time-bound test runs: - New run_duration on Runner/run() and LoadTest: clients send requests continuously for a fixed duration instead of a fixed count. - Dedicated _invoke_for_duration / _invoke_duration_c methods (separate from count-bound _invoke_n / _invoke_n_c). - Time-based progress bar via _tick_time_bar async task. - Mutual exclusivity validation between n_requests and run_duration. Add LiveStatsDisplay for readable live metrics: - New llmeter/live_display.py: HTML table in Jupyter (grouped columns for Throughput, TTFT, TTLT, Tokens, Errors), ANSI multi-line in terminals. Updates in-place, shows placeholders before first response. - Replaces single-line tqdm postfix with a separate stats row. Improve throughput metric accuracy: - RunningStats.record_send() tracks send-side timestamps. - RPM and output_tps use send window (first-to-last request sent) instead of response-side elapsed time, preventing taper-off as clients finish. - output_tps (aggregate tokens/s) added to default snapshot stats. Fix StopIteration silently terminating invocation loops: - Both _invoke_n_no_wait and _invoke_for_duration now use while/next() instead of for-in-cycle() to prevent StopIteration from streaming endpoints from killing the loop. Add LoadTest support for new features: - run_duration, low_memory, progress_bar_stats forwarded to each run. Add example notebook and documentation: - examples/Time-bound runs with Bedrock OpenAI API.ipynb: end-to-end demo using bedrock-mantle endpoint with LoadTest, custom stats, low-memory mode, and comparison charts (RPM, TPS, TTFT, TTLT). - docs/user_guide/run_experiments.md: new sections for time-bound runs, live progress-bar stats, and low-memory mode. Add tests (51 new, 504 total): - test_running_stats.py: record_send, update, to_stats, snapshot (placeholders, rpm, output_tps, send window, aggregations). - test_live_display.py: _classify, _group_stats, _in_notebook, LiveStatsDisplay (disabled, terminal, overwrite, prefix). - test_experiments.py: LoadTest with run_duration/low_memory/ progress_bar_stats field storage and runner forwarding. - test_runner.py: time-bound validation, _invoke_for_duration, full run with duration, output path, multiple clients. --- docs/user_guide/run_experiments.md | 63 + ...e-bound runs with Bedrock OpenAI API.ipynb | 9672 +++++++++++++++++ llmeter/experiments.py | 139 +- llmeter/live_display.py | 238 + llmeter/runner.py | 372 +- llmeter/utils.py | 56 +- tests/unit/test_experiments.py | 119 + tests/unit/test_live_display.py | 154 + tests/unit/test_runner.py | 205 + tests/unit/test_running_stats.py | 220 + 10 files changed, 11157 insertions(+), 81 deletions(-) create mode 100644 examples/Time-bound runs with Bedrock OpenAI API.ipynb create mode 100644 llmeter/live_display.py create mode 100644 tests/unit/test_live_display.py create mode 100644 tests/unit/test_running_stats.py diff --git a/docs/user_guide/run_experiments.md b/docs/user_guide/run_experiments.md index 7f843de..87d6819 100644 --- a/docs/user_guide/run_experiments.md +++ b/docs/user_guide/run_experiments.md @@ -34,6 +34,69 @@ run_2_results = await endpoint_test.run(payload=sample_payload, n_requests=10, c assert run_1_results.output_path != run_2_results.output_path ``` +### Time-bound runs + +By default, a Run sends a fixed number of requests per client (`n_requests`). Alternatively, you can use `run_duration` to run each client for a fixed number of **seconds** instead — useful when you want to measure sustained throughput over a time window rather than a fixed batch size. + +```python +# Run for 60 seconds with 10 concurrent clients: +results = await endpoint_test.run( + payload=sample_payload, + run_duration=60, + clients=10, +) + +results.total_requests # actual number of requests completed +results.stats["requests_per_minute"] # observed throughput +``` + +`n_requests` and `run_duration` are mutually exclusive — set one or the other, not both. + +During a time-bound run, the progress bar shows two lines: a time bar that fills as seconds elapse, and a request counter with live statistics (requests per minute, latency percentiles, tokens per second, etc.). + +### Live progress-bar statistics + +Both count-bound and time-bound runs display live statistics on the progress bar as requests complete. By default these include p50/p90 TTFT and TTLT, median output tokens per second, total input/output tokens, requests per minute, and failure count. + +You can customize which stats are shown via the `progress_bar_stats` parameter: + +```python +# Show only p99 latency, tokens/s, and rpm: +results = await endpoint_test.run( + payload=sample_payload, + n_requests=100, + clients=5, + progress_bar_stats={ + "rpm": "rpm", + "p99_ttlt": ("time_to_last_token", "p99"), + "tps": ("time_per_output_token", "p50", "inv"), + "fail": "failed", + }, +) +``` + +Pass `progress_bar_stats={}` to disable live stats entirely. See [`RunningStats.DEFAULT_SNAPSHOT_STATS`](../reference/utils.md#llmeter.utils.RunningStats) for the full default configuration. + +### Low-memory mode + +For large-scale runs where keeping all responses in memory is impractical, set `low_memory=True`. Responses are written to disk as they arrive but not accumulated in memory. Statistics are computed incrementally and available immediately via `result.stats`. + +```python +results = await endpoint_test.run( + payload=sample_payload, + run_duration=300, + clients=50, + output_path="outputs/large_run", + low_memory=True, +) + +results.stats # works — computed incrementally during the run +results.responses # [] — not in memory +results.load_responses() # loads from disk on demand +``` + +`low_memory=True` requires `output_path` to be set. + ## Analyzing Run results The [Result](../reference/results.md#llmeter.results.Result) of a Run provides basic metadata, a wide range of pre-computed `.stats`, and also access to the individual `.responses` ([InvocationResponse](../reference/endpoints/base/#llmeter.endpoints.base.InvocationResponse) objects). diff --git a/examples/Time-bound runs with Bedrock OpenAI API.ipynb b/examples/Time-bound runs with Bedrock OpenAI API.ipynb new file mode 100644 index 0000000..f982f7e --- /dev/null +++ b/examples/Time-bound runs with Bedrock OpenAI API.ipynb @@ -0,0 +1,9672 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Time-bound Runs with Bedrock OpenAI-compatible API\n", + "\n", + "This notebook demonstrates how to use LLMeter's **time-bound run** feature to measure\n", + "sustained throughput and latency over a fixed time window, using Amazon Bedrock's\n", + "[OpenAI-compatible Chat Completion API](https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-openai-chat.html).\n", + "\n", + "Instead of specifying a fixed number of requests, you set a `run_duration` in seconds\n", + "and LLMeter sends requests continuously until the time expires — giving you a realistic\n", + "picture of steady-state performance.\n", + "\n", + "We also cover:\n", + "- **Live progress-bar statistics** (rpm, latency percentiles, tokens/s)\n", + "- **Low-memory mode** for large-scale runs\n", + "- **Custom progress-bar stats** configuration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "Install LLMeter with plotting extras, the OpenAI SDK, and the Bedrock token generator." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# %pip install \"llmeter[plotting]<1\" openai aws-bedrock-token-generator" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from llmeter.endpoints.openai import OpenAICompletionStreamEndpoint\n", + "from llmeter.runner import Runner" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure the Bedrock OpenAI-compatible endpoint\n", + "\n", + "Amazon Bedrock exposes an [OpenAI-compatible Chat Completions API](https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-openai-chat.html)\n", + "accessible via the `bedrock-mantle` endpoint. Authentication uses a temporary token\n", + "generated from your AWS credentials via `aws-bedrock-token-generator`.\n", + "\n", + "We use `OpenAICompletionStreamEndpoint` from LLMeter, which works with any\n", + "OpenAI Chat Completions-compatible API — including Bedrock's mantle endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Region: us-west-2\n", + "Model: openai.gpt-oss-120b\n", + "Endpoint: https://bedrock-mantle.us-west-2.api.aws/v1\n" + ] + } + ], + "source": [ + "from aws_bedrock_token_generator import provide_token\n", + "\n", + "AWS_REGION = os.environ.get(\"AWS_REGION\", \"us-east-1\")\n", + "\n", + "# OpenAI GPT-OSS models available on the mantle endpoint:\n", + "# openai.gpt-oss-120b — larger model for complex tasks (120B parameters)\n", + "# openai.gpt-oss-20b — smaller, faster model (20B parameters)\n", + "# Use the Models API to discover all available models in your region.\n", + "MODEL_ID = \"openai.gpt-oss-120b\" # Choose a model available via the Chat Completions API\n", + "BASE_URL = f\"https://bedrock-mantle.{AWS_REGION}.api.aws/v1\"\n", + "\n", + "# Generate temporary token for Bedrock authentication\n", + "token = provide_token(region=AWS_REGION)\n", + "\n", + "bedrock_endpoint = OpenAICompletionStreamEndpoint(\n", + " model_id=MODEL_ID,\n", + " endpoint_name=\"bedrock-mantle\",\n", + " provider=\"bedrock\",\n", + " base_url=BASE_URL,\n", + " api_key=token,\n", + ")\n", + "\n", + "print(f\"Region: {AWS_REGION}\")\n", + "print(f\"Model: {MODEL_ID}\")\n", + "print(f\"Endpoint: {BASE_URL}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Verify the endpoint\n", + "\n", + "Send a single request to confirm the endpoint is working and LLMeter captures\n", + "the expected metrics." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"response_text\": \"Latency is the time it takes for a single piece of data to travel from source to destination, i.e., the delay before a response begins. Throughput measures how much data can be transferred successfully over a network or system in a given period of time, typically expressed in bits per second or requests per second. In short, latency is about *speed of response* for one item, while throughput is about *volume of work* that can be handled overall.\",\n", + " \"input_payload\": {\n", + " \"messages\": [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Explain the difference between latency and throughput in 3 sentences.\"\n", + " }\n", + " ],\n", + " \"max_tokens\": 256,\n", + " \"model\": \"openai.gpt-oss-120b\",\n", + " \"stream\": true,\n", + " \"stream_options\": {\n", + " \"include_usage\": true\n", + " }\n", + " },\n", + " \"id\": \"chatcmpl-e33a8b37-ad37-4946-9f05-8ce91013245f\",\n", + " \"input_prompt\": \"Explain the difference between latency and throughput in 3 sentences.\",\n", + " \"time_to_first_token\": 1.5777457500007586,\n", + " \"time_to_last_token\": 1.680538542001159,\n", + " \"num_tokens_input\": 77,\n", + " \"num_tokens_output\": 138,\n", + " \"time_per_output_token\": null,\n", + " \"error\": null,\n", + " \"retries\": null\n", + "}\n" + ] + } + ], + "source": [ + "sample_payload = OpenAICompletionStreamEndpoint.create_payload(\n", + " \"Explain the difference between latency and throughput in 3 sentences.\",\n", + " max_tokens=256,\n", + ")\n", + "\n", + "response = bedrock_endpoint.invoke(sample_payload)\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should see `time_to_first_token`, `time_to_last_token`, and token counts in the\n", + "response. If you see an error, check your AWS credentials and model access." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Count-bound run (baseline)\n", + "\n", + "First, let's run a traditional count-bound test for comparison." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "85613de232074b3192ce45fcf85775b4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Total requests: 0%| | 0/15 [00:00ThroughputTTFTTTLTTokensErrorsrpm  99.6p50_ttft  1.118sp50_ttlt  2.205sinput_tokens  1155fail  0output_tps  215.5 tok/sp90_ttft  3.083sp90_ttlt  5.363soutput_tokens  1947p50_tps  182.0 tok/s" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total requests: 15\n", + "Duration: 14.3s\n", + "RPM: 63.1\n" + ] + } + ], + "source": [ + "runner = Runner(\n", + " bedrock_endpoint,\n", + " output_path=f\"outputs/{MODEL_ID}\",\n", + ")\n", + "\n", + "count_result = await runner.run(\n", + " payload=sample_payload,\n", + " n_requests=5,\n", + " clients=3,\n", + " run_name=\"count-bound-baseline\",\n", + ")\n", + "\n", + "print(f\"Total requests: {count_result.total_requests}\")\n", + "print(f\"Duration: {count_result.total_test_time:.1f}s\")\n", + "print(f\"RPM: {count_result.stats['requests_per_minute']:.1f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Time-bound run\n", + "\n", + "Now let's run the same endpoint for a fixed duration instead. This is useful when you\n", + "want to measure **sustained throughput** — how many requests the endpoint can handle\n", + "over a realistic time window.\n", + "\n", + "Set `run_duration` (in seconds) instead of `n_requests`. Each client sends requests\n", + "continuously until the duration expires." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "57cf16076c0d4cffac21c4b69f35d2b3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Elapsed: | 0/30s [00:00]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "reqs=29
ThroughputTTFTTTLTTokensErrors
rpm  58.0p50_ttft  1.180sp50_ttlt  2.955sinput_tokens  2233fail  0
output_tps  125.0 tok/sp90_ttft  3.765sp90_ttlt  6.345soutput_tokens  3750
p50_tps  86.3 tok/s
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total requests completed: 29\n", + "Actual duration: 32.4s\n", + "RPM: 53.7\n", + "p50 TTFT: 1.180s\n", + "p90 TTLT: 6.345s\n" + ] + } + ], + "source": [ + "duration_result = await runner.run(\n", + " payload=sample_payload,\n", + " run_duration=30, # Run for 30 seconds\n", + " clients=3,\n", + " run_name=\"time-bound-30s\",\n", + ")\n", + "\n", + "print(f\"Total requests completed: {duration_result.total_requests}\")\n", + "print(f\"Actual duration: {duration_result.total_test_time:.1f}s\")\n", + "print(f\"RPM: {duration_result.stats['requests_per_minute']:.1f}\")\n", + "print(f\"p50 TTFT: {duration_result.stats['time_to_first_token-p50']:.3f}s\")\n", + "print(f\"p90 TTLT: {duration_result.stats['time_to_last_token-p90']:.3f}s\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that during the run, you see two progress bars:\n", + "- **Elapsed**: a time bar filling up as seconds pass\n", + "- **Requests**: a counter with live stats (rpm, latency percentiles, tokens/s, etc.)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load test: scaling with more clients\n", + "\n", + "Time-bound runs are great for exploring how throughput scales with concurrency.\n", + "LLMeter's `LoadTest` experiment automates this — it runs each concurrency level\n", + "for the same duration and collects results for comparison.\n", + "\n", + "The `run_duration` parameter makes each concurrency level run for a fixed time\n", + "window, giving a fair comparison of sustained throughput." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "96c029b9166648bf89091ad3a2a08bd0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Configurations: 0%| | 0/4 [00:00reqs=13
ThroughputTTFTTTLTTokensErrors
rpm  26.8p50_ttft  1.071sp50_ttlt  1.927sinput_tokens  1001fail  0
output_tps  59.8 tok/sp90_ttft  3.160sp90_ttlt  6.137soutput_tokens  1741
p50_tps  184.6 tok/s
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d5d22e732c6c4da7a191860e1b7ab87b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Elapsed: | 0/30s [00:00]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "reqs=37
ThroughputTTFTTTLTTokensErrors
rpm  75.0p50_ttft  0.886sp50_ttlt  2.035sinput_tokens  2849fail  0
output_tps  171.7 tok/sp90_ttft  2.327sp90_ttlt  4.703soutput_tokens  5079
p50_tps  113.6 tok/s
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "be031a6b2d5e46dca8b0238bc0b1dd9e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Elapsed: | 0/30s [00:00]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "reqs=63
ThroughputTTFTTTLTTokensErrors
rpm  126.6p50_ttft  1.181sp50_ttlt  2.388sinput_tokens  4851fail  0
output_tps  281.6 tok/sp90_ttft  2.326sp90_ttlt  4.191soutput_tokens  8411
p50_tps  114.9 tok/s
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bf8f929d26134894ad73b5197385d783", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Elapsed: | 0/30s [00:00]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "reqs=106
ThroughputTTFTTTLTTokensErrors
rpm  214.4p50_ttft  1.417sp50_ttlt  3.034sinput_tokens  8162fail  0
output_tps  481.3 tok/sp90_ttft  3.600sp90_ttlt  4.954soutput_tokens  14279
p50_tps  103.0 tok/s
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from llmeter.experiments import LoadTest\n", + "\n", + "load_test = LoadTest(\n", + " endpoint=bedrock_endpoint,\n", + " payload=sample_payload,\n", + " sequence_of_clients=[1, 3, 5, 10],\n", + " run_duration=30, # Each concurrency level runs for 30 seconds\n", + " output_path=f\"outputs/{MODEL_ID}\",\n", + " test_name=\"time-bound-load-test\",\n", + ")\n", + "\n", + "load_test_result = await load_test.run()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 1 clients: 13 requests, 23 rpm, p50 TTFT=1.0713617910005269s\n", + " 3 clients: 37 requests, 68 rpm, p50 TTFT=0.8862732499983395s\n", + " 5 clients: 63 requests, 118 rpm, p50 TTFT=1.181208833004348s\n", + " 10 clients: 106 requests, 196 rpm, p50 TTFT=1.4166151039971737s\n" + ] + } + ], + "source": [ + "# Print summary for each concurrency level\n", + "for n_clients, result in sorted(load_test_result.results.items()):\n", + " print(\n", + " f\" {n_clients} clients: \"\n", + " f\"{result.total_requests} requests, \"\n", + " f\"{result.stats['requests_per_minute']:.0f} rpm, \"\n", + " f\"p50 TTFT={result.stats.get('time_to_first_token-p50', 'N/A')}s\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Custom progress-bar statistics\n", + "\n", + "You can control which live stats appear on the progress bar via `progress_bar_stats`.\n", + "Each entry maps a display label to a field spec." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bbfb9c26d3ae441890e7973e44cf0a40", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Elapsed: | 0/15s [00:00]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "reqs=17
ThroughputTTLTErrors
rpm  71.4p99_ttlt  5.521sfail  0
tps  75.3 tok/s
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "result = await runner.run(\n", + " payload=sample_payload,\n", + " run_duration=15,\n", + " clients=3,\n", + " run_name=\"custom-stats\",\n", + " progress_bar_stats={\n", + " \"rpm\": \"rpm\",\n", + " \"p99_ttlt\": (\"time_to_last_token\", \"p99\"),\n", + " \"tps\": (\"time_per_output_token\", \"p50\", \"inv\"),\n", + " \"fail\": \"failed\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Low-memory mode for large-scale runs\n", + "\n", + "For long-running tests that generate thousands of responses, use `low_memory=True`\n", + "to avoid keeping all responses in memory. Responses are streamed to disk and stats\n", + "are computed incrementally.\n", + "\n", + "This requires `output_path` to be set." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "03f410a858cc4d4ea2cb6513668b4bb4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Elapsed: | 0/60s [00:00]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "reqs=220
ThroughputTTFTTTLTTokensErrors
rpm  220.1p50_ttft  0.880sp50_ttlt  2.555sinput_tokens  16940fail  0
output_tps  485.8 tok/sp90_ttft  3.128sp90_ttlt  4.802soutput_tokens  29135
p50_tps  102.0 tok/s
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total requests: 220\n", + "RPM: 209.5\n", + "Responses in memory: 0\n", + "\n", + "Stats are available without loading responses:\n", + " p50 TTFT: 0.880s\n", + " p90 TTLT: 4.802s\n" + ] + } + ], + "source": [ + "large_result = await runner.run(\n", + " payload=sample_payload,\n", + " run_duration=60,\n", + " clients=10,\n", + " run_name=\"large-low-memory\",\n", + " low_memory=True,\n", + ")\n", + "\n", + "print(f\"Total requests: {large_result.total_requests}\")\n", + "print(f\"RPM: {large_result.stats['requests_per_minute']:.1f}\")\n", + "print(f\"Responses in memory: {len(large_result.responses)}\")\n", + "print(f\"\\nStats are available without loading responses:\")\n", + "print(f\" p50 TTFT: {large_result.stats['time_to_first_token-p50']:.3f}s\")\n", + "print(f\" p90 TTLT: {large_result.stats['time_to_last_token-p90']:.3f}s\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded 1045 responses from disk\n" + ] + } + ], + "source": [ + "# Load responses from disk when needed:\n", + "responses = large_result.load_responses()\n", + "print(f\"Loaded {len(responses)} responses from disk\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Comparing results\n", + "\n", + "The `LoadTestResult` has a built-in `plot_results()` method that generates\n", + "standard charts (TTFT, TTLT, RPM, error rate, token throughput vs clients)." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "name": "time-bound-load-test", + "type": "box", + "x": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10 + ], + "y": [ + 1.810565542000404, + 1.1812222500011558, + 3.868108458002098, + 0.7137894999978016, + 0.5251540839963127, + 0.37060487500275485, + 0.44968758300092304, + 0.3802408749979804, + 1.2211427910006023, + 1.4571087089934736, + 0.589481999995769, + 2.097318999993149, + 1.0713617910005269, + 0.33201908400224056, + 0.7742456669948297, + 2.397023541998351, + 1.8993818329981877, + 0.42726020899863215, + 1.4716677499964135, + 0.46759149999707006, + 0.8862732499983395, + 0.3939086249956745, + 1.0275854999999865, + 0.7189318340024329, + 1.4116186659957748, + 0.7474134169970057, + 0.40672895799798425, + 0.42657495900493814, + 1.2491307079981198, + 0.36702779099869076, + 0.7880193340024562, + 1.8055343329979223, + 2.310052083004848, + 1.7829697910055984, + 2.0935321249999106, + 1.3090343329968164, + 0.7112003330039443, + 4.315548667000257, + 0.8603567079990171, + 1.7188770830034628, + 0.8016122080007335, + 0.7778300419959123, + 0.4183118330038269, + 1.3940931660035858, + 1.555591791999177, + 1.9584035000007134, + 0.697202000003017, + 0.5803299999970477, + 3.9468323339970084, + 0.9757721249989117, + 1.002142333003576, + 0.9818431249950663, + 2.109215791999304, + 2.854000667000946, + 1.1883622079985798, + 2.397148707997985, + 2.0537721249929746, + 1.2194084170041606, + 0.55904924999777, + 1.0641959169952315, + 0.3820479999994859, + 1.1137916250008857, + 0.3648492919965065, + 1.4079380000039237, + 1.1204503329936415, + 0.7068704999983311, + 1.5245771670015529, + 2.0880037499955506, + 0.34228199999779463, + 2.0938974159944337, + 1.313775374997931, + 0.9155073329966399, + 1.178452667001693, + 0.8153431250029826, + 0.7699036249978235, + 0.47562379100418184, + 0.455455625000468, + 1.1047283329971833, + 1.333313959003135, + 0.9293282919970807, + 1.2738797499987413, + 1.299013833006029, + 0.3571828749991255, + 0.48103974999685306, + 0.4824907499933033, + 2.367823166998278, + 2.344774500001222, + 0.42162933399959, + 0.5273924579960294, + 0.4980682919995161, + 0.8799479170047562, + 1.7304949169993051, + 1.6569751249990077, + 1.7166576250019716, + 1.280893458002538, + 2.347654417004378, + 1.1621644590049982, + 1.2532084999984363, + 2.9941925419989275, + 2.0151528749993304, + 1.181208833004348, + 2.2984159589977935, + 1.2298334160004742, + 1.418954875000054, + 1.82515254199825, + 1.2331472500009113, + 0.87708987500082, + 0.50351466700522, + 1.607961791996786, + 0.7267654579991358, + 0.5518672089965548, + 0.7452832909984863, + 1.8045194169972092, + 1.7789633329957724, + 2.605166624998674, + 3.056866250000894, + 1.6265989160019672, + 0.5625950419998844, + 4.143405791000987, + 4.180461500000092, + 3.5610989999986487, + 0.5881356660029269, + 0.9779006249955273, + 3.690586582997639, + 4.832465791005234, + 0.6448706250012037, + 4.863131999998586, + 1.382325790997129, + 0.4846024590005982, + 2.2435131250022096, + 1.5739306670002406, + 2.255111082995427, + 3.899700750000193, + 1.4509044169972185, + 1.2176708330007386, + 2.9069163750027656, + 2.5278627079969738, + 0.40958595799747854, + 1.1879090829970664, + 0.6117010829984793, + 0.6858485839984496, + 2.8986509589958587, + 1.788087708002422, + 2.269181875002687, + 0.9895395829953486, + 2.5265816250030184, + 2.149016749994189, + 0.7873067089967662, + 1.8220124579966068, + 2.769100500001514, + 0.5327938330010511, + 3.213817707997805, + 0.3640439999944647, + 1.7442852500025765, + 0.39150920799875166, + 1.3132492920049117, + 0.5267627499997616, + 0.6975128749982105, + 5.765499125001952, + 0.6618302920032875, + 0.531012917002954, + 2.8477291669987608, + 1.1358064590021968, + 0.3917771249980433, + 1.7981745829965803, + 1.4685571670052013, + 0.5237741250020918, + 0.44857070799480425, + 2.0674697080030455, + 2.1612447919978877, + 0.36861462500382913, + 0.3830527499958407, + 2.0479602080013137, + 1.5750675000017509, + 0.4111657500034198, + 2.939957292001054, + 2.9568471250022412, + 0.37112295900442405, + 3.418946000005235, + 0.3710014160024002, + 0.32679066600394435, + 2.0638363340040087, + 1.0570221669986495, + 0.6728937090010731, + 1.0676320409984328, + 0.3201369589951355, + 1.3715982919966336, + 2.2774177090032026, + 0.39108320900413673, + 0.41870475000177976, + 3.82099775000097, + 1.9569717920021503, + 1.2734504160034703, + 0.9579590420034947, + 1.7040523329997086, + 2.4146079999991343, + 0.7444077079999261, + 2.147584582999116, + 1.8018520420009736, + 0.43014495899842586, + 0.3900236660047085, + 0.4033835000009276, + 1.2873853749988484, + 0.5807214170054067, + 0.5866758340052911, + 0.42062020899902564, + 0.657029166999564, + 1.4600514160047169, + 0.9175232499983395, + 3.7755427500014775, + 2.2247067500065896, + 0.8008953329990618, + 2.464140624993888, + 3.2319100000022445, + 3.814959125003952, + 0.357758082995133, + 0.3283785839958, + 2.6948829159955494, + 1.5338809579989174 + ] + } + ], + "layout": { + "colorway": [ + "rgb(0,0,255)", + "rgb(255,0,0)" + ], + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "white", + "showlakes": true, + "showland": true, + "subunitcolor": "#C8D4E3" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "white", + "polar": { + "angularaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + }, + "bgcolor": "white", + "radialaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "yaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "zaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "baxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "bgcolor": "white", + "caxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Time to first token vs number of clients" + }, + "xaxis": { + "tickformat": "s", + "title": { + "text": "Number of clients" + }, + "type": "log" + }, + "yaxis": { + "tickformat": ".2s", + "title": { + "text": "Time to first token (s)" + }, + "type": "log" + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "name": "time-bound-load-test", + "type": "box", + "x": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10, + 10 + ], + "y": [ + 2.026969166996423, + 1.4105780000027153, + 7.017155499997898, + 3.8645369169971673, + 0.8202292089990806, + 0.657336791002308, + 1.9274062499971478, + 1.5236599999989267, + 1.6036553750018356, + 2.048463749997609, + 1.3640452499967068, + 4.816340499994112, + 4.1498347079977975, + 0.8604247920011403, + 4.527456333998998, + 4.830777458999364, + 4.042134957999224, + 0.8596613339977921, + 1.505671082995832, + 1.8728053750019171, + 1.2073444169946015, + 1.3267667499967501, + 2.007788625000103, + 0.8214762089992291, + 1.457438582998293, + 1.9353756669952418, + 1.7568827079958282, + 2.1657332090035197, + 2.5171536249981727, + 2.4936623750036233, + 1.8358565419985098, + 2.5352387500024633, + 4.18328370800009, + 2.1444086660048924, + 4.2379080839964445, + 1.5257862919970648, + 3.2496527500043157, + 7.227931708999677, + 2.9760203749974607, + 2.6227675410045777, + 1.3151865409963648, + 2.4453202499935287, + 1.117245624998759, + 1.9994487080039107, + 1.5960046249965671, + 2.0353990830044495, + 4.6709048750053626, + 1.9501907090016175, + 5.925759499994456, + 2.86150141699909, + 1.250162124997587, + 1.0117991250008345, + 2.1457077500017476, + 4.487846791998891, + 4.524017457995797, + 4.866551667000749, + 5.196730249997927, + 1.3659177080044174, + 0.9260156250020373, + 1.3981401669952902, + 1.0464315839999472, + 2.078450959001202, + 1.0507028339998215, + 1.6905905000021448, + 2.6238793749944307, + 3.485501415998442, + 4.309456792005221, + 4.642852416000096, + 0.7529281669994816, + 2.353995790996123, + 3.662087040996994, + 2.490439165994758, + 3.0507285830026376, + 2.388044457999058, + 3.0282242910034256, + 2.1366724999970756, + 1.6564010420042905, + 2.1372902919974877, + 3.2651202920023934, + 1.7618697919970145, + 1.643917583001894, + 1.6312250420014607, + 2.149333665998711, + 2.428906665998511, + 1.8692639999935636, + 2.433882334000373, + 2.4431621249968885, + 0.8310220839994145, + 1.045902957994258, + 0.8828566669981228, + 1.8837196250024135, + 2.982938708999427, + 3.969376375003776, + 1.741798708004353, + 2.9292802909985767, + 3.2932393750015763, + 1.406095334001293, + 2.6276067910002894, + 3.2220796670007985, + 2.1393764579988783, + 3.168885292005143, + 4.014242750003177, + 3.173061375004181, + 2.579963874995883, + 2.9128747079957975, + 3.0359742090004147, + 2.6948128750009346, + 2.075248291999742, + 2.100795999998809, + 2.645283375000872, + 2.2994953749948763, + 1.601715790995513, + 3.14368887499586, + 2.526538416997937, + 2.6472249580037897, + 3.0995452499992098, + 3.2772074999957113, + 0.9971983330033254, + 4.181597540999064, + 4.221945541998139, + 4.342931500003033, + 1.8289941659968463, + 1.223867082997458, + 4.93074450000131, + 5.136637708004855, + 3.013893208000809, + 5.778089958999772, + 1.426899541002058, + 0.9363519999969867, + 2.89067904100375, + 2.944057749999047, + 3.721774457997526, + 4.210495624996838, + 3.5966603749984642, + 4.719359624999925, + 5.110498792004364, + 2.620230332999199, + 1.5105535420007072, + 3.440597250002611, + 2.618421290993865, + 0.7280902089987649, + 5.12706341699959, + 3.5233514999999898, + 2.377603459004604, + 1.6091642499959562, + 4.864977290999377, + 3.42581183299626, + 2.015671209002903, + 2.5336824169935426, + 3.3301199580018874, + 1.6080059169980814, + 4.445332292001694, + 1.1371099159950973, + 3.4345513340012985, + 2.176926625004853, + 1.318248791998485, + 1.8371326659980696, + 3.397253916999034, + 6.461382583001978, + 2.2144158330047503, + 0.9335729590020492, + 5.486439041997073, + 3.0541425420015003, + 1.6030845409986796, + 4.258436707998044, + 1.996725625002, + 2.5789501250037574, + 0.794825207995018, + 2.5077784169989172, + 4.381451000001107, + 0.7341202500028885, + 0.526786832997459, + 3.1630495419958606, + 3.0988521670005866, + 1.6619982920019538, + 3.71616087500297, + 3.8252187089965446, + 2.4751521250000224, + 5.006649583003309, + 1.3394295000034617, + 3.611661916002049, + 4.377064042004349, + 3.840444875000685, + 1.659149917002651, + 2.832853415995487, + 0.6458929590007756, + 3.5052734589989996, + 5.6474424170010025, + 0.8091522090035141, + 0.9284407500017551, + 6.816903582999657, + 3.9716163330012932, + 1.5373788330034586, + 2.054647875003866, + 3.2553307080015657, + 5.010804708996147, + 3.646031333002611, + 3.5746835000027204, + 4.229959917000087, + 1.9007484999965527, + 0.7102053330017952, + 2.6844349579987465, + 3.390159291004238, + 1.8455996250049793, + 1.423415750003187, + 2.3246775419975165, + 1.438939500003471, + 3.1382235830024, + 2.365440415997, + 4.739204375000554, + 4.722318541003915, + 1.851873166000587, + 4.211808124993695, + 3.862824666000961, + 3.8172453340012, + 2.785455874996842, + 2.5279075840007863, + 4.43163616599486, + 3.2573652919963934 + ] + } + ], + "layout": { + "colorway": [ + "#30123b", + "#4145ab", + "#4675ed", + "#39a2fc", + "#1bcfd4", + "#24eca6", + "#61fc6c", + "#a4fc3b", + "#d1e834", + "#f3c63a", + "#fe9b2d", + "#f36315", + "#d93806", + "#b11901", + "#7a0402" + ], + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "white", + "showlakes": true, + "showland": true, + "subunitcolor": "#C8D4E3" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "white", + "polar": { + "angularaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + }, + "bgcolor": "white", + "radialaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "yaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "zaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "baxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "bgcolor": "white", + "caxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Time to last token vs number of clients" + }, + "xaxis": { + "tickformat": "s", + "title": { + "text": "Number of clients" + }, + "type": "log" + }, + "yaxis": { + "tickformat": ".2s", + "title": { + "text": "Time to last token (s)" + }, + "type": "log" + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "line": { + "dash": "dot" + }, + "mode": "lines+markers", + "name": "time-bound-load-test", + "opacity": 0.5, + "type": "scatter", + "x": [ + 1, + 3, + 5, + 10 + ], + "y": [ + 23.435395092386933, + 68.4096394213185, + 118.005947056227, + 196.19625184901724 + ] + } + ], + "layout": { + "colorway": [ + "rgb(124, 29, 111)", + "rgb(185, 37, 122)", + "rgb(220, 57, 119)", + "rgb(227, 79, 111)", + "rgb(240, 116, 110)", + "rgb(250, 164, 118)", + "rgb(252, 222, 156)" + ], + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "white", + "showlakes": true, + "showland": true, + "subunitcolor": "#C8D4E3" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "white", + "polar": { + "angularaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + }, + "bgcolor": "white", + "radialaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "yaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "zaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "baxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "bgcolor": "white", + "caxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Requests per minute vs number of clients" + }, + "xaxis": { + "title": { + "text": "Number of clients" + }, + "type": "log" + }, + "yaxis": { + "tickformat": ".2s", + "title": { + "text": "Requests per minute" + }, + "type": "log" + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "line": { + "dash": "dot" + }, + "mode": "lines+markers", + "name": "time-bound-load-test", + "opacity": 0.5, + "type": "scatter", + "x": [ + 1, + 3, + 5, + 10 + ], + "y": [ + 0, + 0, + 0, + 0 + ] + } + ], + "layout": { + "colorway": [ + "rgb(0,0,0)", + "rgb(230,0,0)", + "rgb(230,210,0)", + "rgb(255,255,255)", + "rgb(160,200,255)" + ], + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "white", + "showlakes": true, + "showland": true, + "subunitcolor": "#C8D4E3" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "white", + "polar": { + "angularaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + }, + "bgcolor": "white", + "radialaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "yaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "zaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "baxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "bgcolor": "white", + "caxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Error rate vs number of clients" + }, + "xaxis": { + "title": { + "text": "Number of clients" + }, + "type": "log" + }, + "yaxis": { + "title": { + "text": "Error rate" + } + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "line": { + "dash": "dot" + }, + "mode": "lines+markers", + "name": "time-bound-load-test", + "opacity": 0.5, + "type": "scatter", + "x": [ + 1, + 3, + 5, + 10 + ], + "y": [ + 1804.5254221137939, + 5267.542235441524, + 9086.457923329479, + 15107.111392374327 + ] + } + ], + "layout": { + "colorway": [ + "#440154", + "#482878", + "#3e4989", + "#31688e", + "#26828e", + "#1f9e89", + "#35b779", + "#6ece58", + "#b5de2b", + "#fde725" + ], + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "white", + "showlakes": true, + "showland": true, + "subunitcolor": "#C8D4E3" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "white", + "polar": { + "angularaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + }, + "bgcolor": "white", + "radialaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "yaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "zaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "baxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "bgcolor": "white", + "caxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Average input tokens per minute vs number of clients" + }, + "xaxis": { + "tickformat": ".2s", + "title": { + "text": "Number of clients" + }, + "type": "log" + }, + "yaxis": { + "tickformat": ".2s", + "title": { + "text": "Average input tokens per minute" + }, + "type": "log" + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "line": { + "dash": "dot" + }, + "mode": "lines+markers", + "name": "time-bound-load-test", + "opacity": 0.5, + "type": "scatter", + "x": [ + 1, + 3, + 5, + 10 + ], + "y": [ + 3138.5402196804343, + 9390.609692456126, + 15754.73048714167, + 26429.11585049167 + ] + } + ], + "layout": { + "colorway": [ + "#0d0887", + "#46039f", + "#7201a8", + "#9c179e", + "#bd3786", + "#d8576b", + "#ed7953", + "#fb9f3a", + "#fdca26", + "#f0f921" + ], + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "white", + "showlakes": true, + "showland": true, + "subunitcolor": "#C8D4E3" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "white", + "polar": { + "angularaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + }, + "bgcolor": "white", + "radialaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "yaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "zaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "baxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "bgcolor": "white", + "caxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Average output tokens per minute vs number of clients" + }, + "xaxis": { + "tickformat": ".2s", + "title": { + "text": "Number of clients" + }, + "type": "log" + }, + "yaxis": { + "tickformat": ".2s", + "title": { + "text": "Average output tokens per minute" + }, + "type": "log" + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "figs = load_test_result.plot_results(show=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom charts: TPS and RPM vs clients\n", + "\n", + "We can also build custom charts from the results. Here we plot the median output\n", + "tokens per second (TPS) and requests per minute (RPM) as a function of concurrency." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "mode": "lines+markers", + "name": "RPM", + "type": "scatter", + "x": [ + 1, + 3, + 5, + 10 + ], + "xaxis": "x", + "y": [ + 23.435395092386933, + 68.4096394213185, + 118.005947056227, + 196.19625184901724 + ], + "yaxis": "y" + }, + { + "mode": "lines+markers", + "name": "TPS (p50)", + "type": "scatter", + "x": [ + 1, + 3, + 5, + 10 + ], + "xaxis": "x2", + "y": [ + 184.62017143187063, + 113.63997467108105, + 114.90946886186481, + 102.97720864610096 + ], + "yaxis": "y2" + } + ], + "layout": { + "annotations": [ + { + "font": { + "size": 16 + }, + "showarrow": false, + "text": "Requests per Minute vs Clients", + "x": 0.225, + "xanchor": "center", + "xref": "paper", + "y": 1, + "yanchor": "bottom", + "yref": "paper" + }, + { + "font": { + "size": 16 + }, + "showarrow": false, + "text": "Median Output Tokens/s vs Clients", + "x": 0.775, + "xanchor": "center", + "xref": "paper", + "y": 1, + "yanchor": "bottom", + "yref": "paper" + } + ], + "height": 400, + "showlegend": false, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 0.45 + ], + "title": { + "text": "Clients" + }, + "type": "log" + }, + "xaxis2": { + "anchor": "y2", + "domain": [ + 0.55, + 1 + ], + "title": { + "text": "Clients" + }, + "type": "log" + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "title": { + "text": "RPM" + } + }, + "yaxis2": { + "anchor": "x2", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Tokens/s" + } + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import plotly.graph_objects as go\n", + "from plotly.subplots import make_subplots\n", + "\n", + "clients_sorted = sorted(load_test_result.results.keys())\n", + "rpms = [load_test_result.results[c].stats[\"requests_per_minute\"] for c in clients_sorted]\n", + "\n", + "# Compute median TPS (1 / p50 time_per_output_token) for each concurrency level\n", + "tps_values = []\n", + "for c in clients_sorted:\n", + " tpot_p50 = load_test_result.results[c].stats.get(\"time_per_output_token-p50\")\n", + " tps_values.append(1.0 / tpot_p50 if tpot_p50 and tpot_p50 > 0 else None)\n", + "\n", + "fig = make_subplots(\n", + " rows=1, cols=2,\n", + " subplot_titles=(\"Requests per Minute vs Clients\", \"Median Output Tokens/s vs Clients\"),\n", + ")\n", + "\n", + "fig.add_trace(\n", + " go.Scatter(x=clients_sorted, y=rpms, mode=\"lines+markers\", name=\"RPM\"),\n", + " row=1, col=1,\n", + ")\n", + "fig.add_trace(\n", + " go.Scatter(x=clients_sorted, y=tps_values, mode=\"lines+markers\", name=\"TPS (p50)\"),\n", + " row=1, col=2,\n", + ")\n", + "\n", + "fig.update_xaxes(title_text=\"Clients\", type=\"log\")\n", + "fig.update_yaxes(title_text=\"RPM\", row=1, col=1)\n", + "fig.update_yaxes(title_text=\"Tokens/s\", row=1, col=2)\n", + "fig.update_layout(height=400, showlegend=False)\n", + "fig" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "name": "1 clients", + "type": "box", + "x": [ + 1.810565542000404, + 1.1812222500011558, + 3.868108458002098, + 0.7137894999978016, + 0.5251540839963127, + 0.37060487500275485, + 0.44968758300092304, + 0.3802408749979804, + 1.2211427910006023, + 1.4571087089934736, + 0.589481999995769, + 2.097318999993149, + 1.0713617910005269 + ] + }, + { + "name": "3 clients", + "type": "box", + "x": [ + 0.33201908400224056, + 0.7742456669948297, + 2.397023541998351, + 1.8993818329981877, + 0.42726020899863215, + 1.4716677499964135, + 0.46759149999707006, + 0.8862732499983395, + 0.3939086249956745, + 1.0275854999999865, + 0.7189318340024329, + 1.4116186659957748, + 0.7474134169970057, + 0.40672895799798425, + 0.42657495900493814, + 1.2491307079981198, + 0.36702779099869076, + 0.7880193340024562, + 1.8055343329979223, + 2.310052083004848, + 1.7829697910055984, + 2.0935321249999106, + 1.3090343329968164, + 0.7112003330039443, + 4.315548667000257, + 0.8603567079990171, + 1.7188770830034628, + 0.8016122080007335, + 0.7778300419959123, + 0.4183118330038269, + 1.3940931660035858, + 1.555591791999177, + 1.9584035000007134, + 0.697202000003017, + 0.5803299999970477, + 3.9468323339970084, + 0.9757721249989117 + ] + }, + { + "name": "5 clients", + "type": "box", + "x": [ + 1.002142333003576, + 0.9818431249950663, + 2.109215791999304, + 2.854000667000946, + 1.1883622079985798, + 2.397148707997985, + 2.0537721249929746, + 1.2194084170041606, + 0.55904924999777, + 1.0641959169952315, + 0.3820479999994859, + 1.1137916250008857, + 0.3648492919965065, + 1.4079380000039237, + 1.1204503329936415, + 0.7068704999983311, + 1.5245771670015529, + 2.0880037499955506, + 0.34228199999779463, + 2.0938974159944337, + 1.313775374997931, + 0.9155073329966399, + 1.178452667001693, + 0.8153431250029826, + 0.7699036249978235, + 0.47562379100418184, + 0.455455625000468, + 1.1047283329971833, + 1.333313959003135, + 0.9293282919970807, + 1.2738797499987413, + 1.299013833006029, + 0.3571828749991255, + 0.48103974999685306, + 0.4824907499933033, + 2.367823166998278, + 2.344774500001222, + 0.42162933399959, + 0.5273924579960294, + 0.4980682919995161, + 0.8799479170047562, + 1.7304949169993051, + 1.6569751249990077, + 1.7166576250019716, + 1.280893458002538, + 2.347654417004378, + 1.1621644590049982, + 1.2532084999984363, + 2.9941925419989275, + 2.0151528749993304, + 1.181208833004348, + 2.2984159589977935, + 1.2298334160004742, + 1.418954875000054, + 1.82515254199825, + 1.2331472500009113, + 0.87708987500082, + 0.50351466700522, + 1.607961791996786, + 0.7267654579991358, + 0.5518672089965548, + 0.7452832909984863, + 1.8045194169972092 + ] + }, + { + "name": "10 clients", + "type": "box", + "x": [ + 1.7789633329957724, + 2.605166624998674, + 3.056866250000894, + 1.6265989160019672, + 0.5625950419998844, + 4.143405791000987, + 4.180461500000092, + 3.5610989999986487, + 0.5881356660029269, + 0.9779006249955273, + 3.690586582997639, + 4.832465791005234, + 0.6448706250012037, + 4.863131999998586, + 1.382325790997129, + 0.4846024590005982, + 2.2435131250022096, + 1.5739306670002406, + 2.255111082995427, + 3.899700750000193, + 1.4509044169972185, + 1.2176708330007386, + 2.9069163750027656, + 2.5278627079969738, + 0.40958595799747854, + 1.1879090829970664, + 0.6117010829984793, + 0.6858485839984496, + 2.8986509589958587, + 1.788087708002422, + 2.269181875002687, + 0.9895395829953486, + 2.5265816250030184, + 2.149016749994189, + 0.7873067089967662, + 1.8220124579966068, + 2.769100500001514, + 0.5327938330010511, + 3.213817707997805, + 0.3640439999944647, + 1.7442852500025765, + 0.39150920799875166, + 1.3132492920049117, + 0.5267627499997616, + 0.6975128749982105, + 5.765499125001952, + 0.6618302920032875, + 0.531012917002954, + 2.8477291669987608, + 1.1358064590021968, + 0.3917771249980433, + 1.7981745829965803, + 1.4685571670052013, + 0.5237741250020918, + 0.44857070799480425, + 2.0674697080030455, + 2.1612447919978877, + 0.36861462500382913, + 0.3830527499958407, + 2.0479602080013137, + 1.5750675000017509, + 0.4111657500034198, + 2.939957292001054, + 2.9568471250022412, + 0.37112295900442405, + 3.418946000005235, + 0.3710014160024002, + 0.32679066600394435, + 2.0638363340040087, + 1.0570221669986495, + 0.6728937090010731, + 1.0676320409984328, + 0.3201369589951355, + 1.3715982919966336, + 2.2774177090032026, + 0.39108320900413673, + 0.41870475000177976, + 3.82099775000097, + 1.9569717920021503, + 1.2734504160034703, + 0.9579590420034947, + 1.7040523329997086, + 2.4146079999991343, + 0.7444077079999261, + 2.147584582999116, + 1.8018520420009736, + 0.43014495899842586, + 0.3900236660047085, + 0.4033835000009276, + 1.2873853749988484, + 0.5807214170054067, + 0.5866758340052911, + 0.42062020899902564, + 0.657029166999564, + 1.4600514160047169, + 0.9175232499983395, + 3.7755427500014775, + 2.2247067500065896, + 0.8008953329990618, + 2.464140624993888, + 3.2319100000022445, + 3.814959125003952, + 0.357758082995133, + 0.3283785839958, + 2.6948829159955494, + 1.5338809579989174 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Time to First Token by Client Count" + }, + "xaxis": { + "title": { + "text": "Time to First Token (s)" + }, + "type": "log" + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from llmeter.plotting import boxplot_by_dimension\n", + "\n", + "fig = go.Figure(layout=dict(title=\"Time to First Token by Client Count\"))\n", + "for n_clients in clients_sorted:\n", + " fig.add_trace(\n", + " boxplot_by_dimension(\n", + " load_test_result.results[n_clients],\n", + " dimension=\"time_to_first_token\",\n", + " name=f\"{n_clients} clients\",\n", + " )\n", + " )\n", + "fig.update_xaxes(type=\"log\", title=\"Time to First Token (s)\")\n", + "fig" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "name": "1 clients", + "type": "box", + "x": [ + 2.026969166996423, + 1.4105780000027153, + 7.017155499997898, + 3.8645369169971673, + 0.8202292089990806, + 0.657336791002308, + 1.9274062499971478, + 1.5236599999989267, + 1.6036553750018356, + 2.048463749997609, + 1.3640452499967068, + 4.816340499994112, + 4.1498347079977975 + ] + }, + { + "name": "3 clients", + "type": "box", + "x": [ + 0.8604247920011403, + 4.527456333998998, + 4.830777458999364, + 4.042134957999224, + 0.8596613339977921, + 1.505671082995832, + 1.8728053750019171, + 1.2073444169946015, + 1.3267667499967501, + 2.007788625000103, + 0.8214762089992291, + 1.457438582998293, + 1.9353756669952418, + 1.7568827079958282, + 2.1657332090035197, + 2.5171536249981727, + 2.4936623750036233, + 1.8358565419985098, + 2.5352387500024633, + 4.18328370800009, + 2.1444086660048924, + 4.2379080839964445, + 1.5257862919970648, + 3.2496527500043157, + 7.227931708999677, + 2.9760203749974607, + 2.6227675410045777, + 1.3151865409963648, + 2.4453202499935287, + 1.117245624998759, + 1.9994487080039107, + 1.5960046249965671, + 2.0353990830044495, + 4.6709048750053626, + 1.9501907090016175, + 5.925759499994456, + 2.86150141699909 + ] + }, + { + "name": "5 clients", + "type": "box", + "x": [ + 1.250162124997587, + 1.0117991250008345, + 2.1457077500017476, + 4.487846791998891, + 4.524017457995797, + 4.866551667000749, + 5.196730249997927, + 1.3659177080044174, + 0.9260156250020373, + 1.3981401669952902, + 1.0464315839999472, + 2.078450959001202, + 1.0507028339998215, + 1.6905905000021448, + 2.6238793749944307, + 3.485501415998442, + 4.309456792005221, + 4.642852416000096, + 0.7529281669994816, + 2.353995790996123, + 3.662087040996994, + 2.490439165994758, + 3.0507285830026376, + 2.388044457999058, + 3.0282242910034256, + 2.1366724999970756, + 1.6564010420042905, + 2.1372902919974877, + 3.2651202920023934, + 1.7618697919970145, + 1.643917583001894, + 1.6312250420014607, + 2.149333665998711, + 2.428906665998511, + 1.8692639999935636, + 2.433882334000373, + 2.4431621249968885, + 0.8310220839994145, + 1.045902957994258, + 0.8828566669981228, + 1.8837196250024135, + 2.982938708999427, + 3.969376375003776, + 1.741798708004353, + 2.9292802909985767, + 3.2932393750015763, + 1.406095334001293, + 2.6276067910002894, + 3.2220796670007985, + 2.1393764579988783, + 3.168885292005143, + 4.014242750003177, + 3.173061375004181, + 2.579963874995883, + 2.9128747079957975, + 3.0359742090004147, + 2.6948128750009346, + 2.075248291999742, + 2.100795999998809, + 2.645283375000872, + 2.2994953749948763, + 1.601715790995513, + 3.14368887499586 + ] + }, + { + "name": "10 clients", + "type": "box", + "x": [ + 2.526538416997937, + 2.6472249580037897, + 3.0995452499992098, + 3.2772074999957113, + 0.9971983330033254, + 4.181597540999064, + 4.221945541998139, + 4.342931500003033, + 1.8289941659968463, + 1.223867082997458, + 4.93074450000131, + 5.136637708004855, + 3.013893208000809, + 5.778089958999772, + 1.426899541002058, + 0.9363519999969867, + 2.89067904100375, + 2.944057749999047, + 3.721774457997526, + 4.210495624996838, + 3.5966603749984642, + 4.719359624999925, + 5.110498792004364, + 2.620230332999199, + 1.5105535420007072, + 3.440597250002611, + 2.618421290993865, + 0.7280902089987649, + 5.12706341699959, + 3.5233514999999898, + 2.377603459004604, + 1.6091642499959562, + 4.864977290999377, + 3.42581183299626, + 2.015671209002903, + 2.5336824169935426, + 3.3301199580018874, + 1.6080059169980814, + 4.445332292001694, + 1.1371099159950973, + 3.4345513340012985, + 2.176926625004853, + 1.318248791998485, + 1.8371326659980696, + 3.397253916999034, + 6.461382583001978, + 2.2144158330047503, + 0.9335729590020492, + 5.486439041997073, + 3.0541425420015003, + 1.6030845409986796, + 4.258436707998044, + 1.996725625002, + 2.5789501250037574, + 0.794825207995018, + 2.5077784169989172, + 4.381451000001107, + 0.7341202500028885, + 0.526786832997459, + 3.1630495419958606, + 3.0988521670005866, + 1.6619982920019538, + 3.71616087500297, + 3.8252187089965446, + 2.4751521250000224, + 5.006649583003309, + 1.3394295000034617, + 3.611661916002049, + 4.377064042004349, + 3.840444875000685, + 1.659149917002651, + 2.832853415995487, + 0.6458929590007756, + 3.5052734589989996, + 5.6474424170010025, + 0.8091522090035141, + 0.9284407500017551, + 6.816903582999657, + 3.9716163330012932, + 1.5373788330034586, + 2.054647875003866, + 3.2553307080015657, + 5.010804708996147, + 3.646031333002611, + 3.5746835000027204, + 4.229959917000087, + 1.9007484999965527, + 0.7102053330017952, + 2.6844349579987465, + 3.390159291004238, + 1.8455996250049793, + 1.423415750003187, + 2.3246775419975165, + 1.438939500003471, + 3.1382235830024, + 2.365440415997, + 4.739204375000554, + 4.722318541003915, + 1.851873166000587, + 4.211808124993695, + 3.862824666000961, + 3.8172453340012, + 2.785455874996842, + 2.5279075840007863, + 4.43163616599486, + 3.2573652919963934 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Time to Last Token by Client Count" + }, + "xaxis": { + "title": { + "text": "Time to Last Token (s)" + }, + "type": "log" + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = go.Figure(layout=dict(title=\"Time to Last Token by Client Count\"))\n", + "for n_clients in clients_sorted:\n", + " fig.add_trace(\n", + " boxplot_by_dimension(\n", + " load_test_result.results[n_clients],\n", + " dimension=\"time_to_last_token\",\n", + " name=f\"{n_clients} clients\",\n", + " )\n", + " )\n", + "fig.update_xaxes(type=\"log\", title=\"Time to Last Token (s)\")\n", + "fig" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "| Feature | Parameter | Description |\n", + "|---|---|---|\n", + "| Time-bound runs | `run_duration=60` | Run for a fixed number of seconds instead of a fixed request count |\n", + "| Count-bound runs | `n_requests=100` | Traditional mode — fixed number of requests per client |\n", + "| Live stats | `progress_bar_stats={...}` | Customize which metrics appear on the progress bar |\n", + "| Low-memory mode | `low_memory=True` | Stream responses to disk, compute stats incrementally |" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llmeter/experiments.py b/llmeter/experiments.py index 0a70a17..55eece0 100644 --- a/llmeter/experiments.py +++ b/llmeter/experiments.py @@ -119,10 +119,66 @@ def load( @dataclass class LoadTest: - """Experiment to explore how performance changes at different concurrency levels + """Experiment to explore how performance changes at different concurrency levels. This experiment creates a series of Runs with different levels of concurrency, defined by - `sequence_of_clients`, and runs them one after the other. + ``sequence_of_clients``, and runs them one after the other. + + By default, each run sends a fixed number of requests (count-bound). Set ``run_duration`` + to run each concurrency level for a fixed number of seconds instead (time-bound), which + gives a more realistic picture of sustained throughput. + + Attributes: + endpoint (Endpoint): The LLM endpoint to test. + payload (dict | list[dict]): The request payload(s) to send. + sequence_of_clients (list[int]): Concurrency levels to test. + min_requests_per_client (int): Minimum requests per client in count-bound mode. + min_requests_per_run (int): Minimum total requests per run in count-bound mode. + run_duration (int | float | None): When set, each concurrency level runs for this + many seconds instead of a fixed request count. Mutually exclusive with + ``min_requests_per_client`` / ``min_requests_per_run``. + low_memory (bool): When ``True``, responses are written to disk but not kept in + memory. Requires ``output_path``. Defaults to ``False``. + progress_bar_stats (dict | None): Controls which live stats appear on the progress + bar. See ``RunningStats.DEFAULT_SNAPSHOT_STATS`` for the default. + output_path (os.PathLike | str | None): Where to save results. + tokenizer (Tokenizer | None): Optional tokenizer for token counting. + test_name (str | None): Name for this test. Defaults to current date/time. + callbacks (list[Callback] | None): Optional callbacks. + + Example:: + + # Count-bound: 10 requests per client at each concurrency level + load_test = LoadTest( + endpoint=my_endpoint, + payload=sample_payload, + sequence_of_clients=[1, 5, 10, 20], + min_requests_per_client=10, + output_path="outputs/load_test", + ) + result = await load_test.run() + result.plot_results() + + # Time-bound: 60 seconds per concurrency level + load_test = LoadTest( + endpoint=my_endpoint, + payload=sample_payload, + sequence_of_clients=[1, 5, 10, 20], + run_duration=60, + output_path="outputs/load_test", + ) + result = await load_test.run() + + # Time-bound with low-memory mode for large-scale tests + load_test = LoadTest( + endpoint=my_endpoint, + payload=sample_payload, + sequence_of_clients=[1, 5, 10, 20, 50], + run_duration=120, + low_memory=True, + output_path="outputs/large_load_test", + ) + result = await load_test.run() """ endpoint: Endpoint @@ -130,6 +186,9 @@ class LoadTest: sequence_of_clients: list[int] min_requests_per_client: int = 1 min_requests_per_run: int = 10 + run_duration: int | float | None = None + low_memory: bool = False + progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None output_path: os.PathLike | str | None = None tokenizer: Tokenizer | None = None test_name: str | None = None @@ -144,6 +203,40 @@ def _get_n_requests(self, clients): return int(self.min_requests_per_client) async def run(self, output_path: os.PathLike | None = None): + """Run the load test across all configured concurrency levels. + + Creates a :class:`~llmeter.runner.Runner` and iterates through + ``sequence_of_clients``, running one test per concurrency level. In + time-bound mode (``run_duration`` is set), each level runs for a fixed + duration. In count-bound mode, each level sends a fixed number of + requests per client. + + Args: + output_path (os.PathLike | None, optional): Override for the output + directory. If not provided, ``self.output_path`` is used. A + subfolder named after ``test_name`` is created automatically. + + Returns: + LoadTestResult: A result object containing one + :class:`~llmeter.results.Result` per concurrency level, keyed by + client count. + + Example:: + + load_test = LoadTest( + endpoint=my_endpoint, + payload=sample_payload, + sequence_of_clients=[1, 5, 10], + run_duration=30, + ) + result = await load_test.run(output_path="outputs/my_test") + + # Access individual results by client count + result.results[5].stats["requests_per_minute"] + + # Plot all standard charts + result.plot_results() + """ try: output_path = Path(output_path or self.output_path) / self._test_name except Exception: @@ -152,20 +245,34 @@ async def run(self, output_path: os.PathLike | None = None): endpoint=self.endpoint, tokenizer=self.tokenizer, output_path=output_path ) - self._results = [ - await _runner.run( - payload=self.payload, - clients=c, - n_requests=self._get_n_requests(c), - run_name=f"{c:05.0f}-clients", - callbacks=self.callbacks, - output_path=output_path, - ) - for c in tqdm( - self.sequence_of_clients, desc="Configurations", disable=_disable_tqdm - ) - ] - # return self._results + self._results = [] + for c in tqdm( + self.sequence_of_clients, desc="Configurations", disable=_disable_tqdm + ): + if self.run_duration is not None: + result = await _runner.run( + payload=self.payload, + clients=c, + run_duration=self.run_duration, + run_name=f"{c:05.0f}-clients", + callbacks=self.callbacks, + low_memory=self.low_memory, + progress_bar_stats=self.progress_bar_stats, + output_path=output_path, + ) + else: + result = await _runner.run( + payload=self.payload, + clients=c, + n_requests=self._get_n_requests(c), + run_name=f"{c:05.0f}-clients", + callbacks=self.callbacks, + low_memory=self.low_memory, + progress_bar_stats=self.progress_bar_stats, + output_path=output_path, + ) + self._results.append(result) + return LoadTestResult( results={r.clients: r for r in self._results}, test_name=self._test_name, diff --git a/llmeter/live_display.py b/llmeter/live_display.py new file mode 100644 index 0000000..f8c9416 --- /dev/null +++ b/llmeter/live_display.py @@ -0,0 +1,238 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Live-updating stats display for test runs. + +Renders a compact table of running statistics that updates in-place during a run. +In Jupyter notebooks, uses an HTML table via IPython.display. In terminals, falls +back to a simple printed summary that overwrites itself. +""" + +from __future__ import annotations + +import logging +import sys +from collections import OrderedDict + +logger = logging.getLogger(__name__) + +# Mapping from key substrings to (group_name, display_order). +# Stats are grouped by the first matching pattern; unmatched keys go to "Other". +_GROUP_PATTERNS: list[tuple[str, str]] = [ + ("rpm", "Throughput"), + ("tps", "Throughput"), + ("ttft", "TTFT"), + ("ttlt", "TTLT"), + ("token", "Tokens"), + ("fail", "Errors"), +] + +_GROUP_ORDER = ["Throughput", "TTFT", "TTLT", "Tokens", "Errors", "Other"] + + +def _classify(key: str) -> str: + """Return the group name for a stat key based on substring matching. + + Matches the key (case-insensitive) against ``_GROUP_PATTERNS``. The first + matching pattern determines the group. Unmatched keys are placed in + ``"Other"``. + + Args: + key (str): The stat display label to classify (e.g. ``"p50_ttft"``). + + Returns: + str: The group name (e.g. ``"TTFT"``, ``"Throughput"``, ``"Other"``). + """ + key_lower = key.lower() + for pattern, group in _GROUP_PATTERNS: + if pattern in key_lower: + return group + return "Other" + + +def _group_stats(stats: dict[str, str]) -> OrderedDict[str, list[tuple[str, str]]]: + """Organize stats into ordered groups for display. + + Each stat key is classified via :func:`_classify` and placed into the + corresponding group. Groups are returned in the canonical order defined + by ``_GROUP_ORDER``, with empty groups omitted. + + Args: + stats (dict[str, str]): Mapping of stat labels to formatted values. + + Returns: + OrderedDict[str, list[tuple[str, str]]]: Groups in display order, where + each value is a list of ``(label, formatted_value)`` tuples. + """ + groups: dict[str, list[tuple[str, str]]] = {} + for k, v in stats.items(): + group = _classify(k) + groups.setdefault(group, []).append((k, v)) + # Return in canonical order, skipping empty groups + return OrderedDict((g, groups[g]) for g in _GROUP_ORDER if g in groups) + + +def _in_notebook() -> bool: + """Detect if we're running inside a Jupyter/IPython notebook. + + Returns: + bool: ``True`` if the current IPython shell is a ``ZMQInteractiveShell`` + (i.e. a Jupyter kernel), ``False`` otherwise or if IPython is not + installed. + """ + try: + from IPython import get_ipython + + shell = get_ipython() + if shell is None: + return False + return shell.__class__.__name__ == "ZMQInteractiveShell" + except ImportError: + return False + + +class LiveStatsDisplay: + """Live-updating stats display that works in both notebooks and terminals. + + In Jupyter notebooks, renders a grouped HTML table that updates in-place. + Stats are automatically organized into logical groups (Throughput, TTFT, + TTLT, Tokens, Errors) based on their key names. + + In terminals, prints a compact grouped multi-line block using ANSI escape + codes to overwrite previous output. + + Args: + disabled (bool): If ``True``, all display calls are no-ops. + + Example:: + + display = LiveStatsDisplay() + display.update({"rpm": "185.9", "p50_ttft": "0.312s", "fail": "0"}) + display.update({"rpm": "190.2", "p50_ttft": "0.305s", "fail": "1"}) + display.close() + """ + + def __init__(self, disabled: bool = False): + self._disabled = disabled + self._is_notebook = _in_notebook() + self._handle = None + self._last_line_count = 0 + + def update(self, stats: dict[str, str], extra_prefix: str = "") -> None: + """Refresh the display with new stats. + + Args: + stats (dict[str, str]): Mapping of label to formatted value. + extra_prefix (str): Optional prefix text shown before the table + (e.g. ``"reqs=127"`` for time-bound runs). + """ + if self._disabled or not stats: + return + + if self._is_notebook: + self._update_notebook(stats, extra_prefix) + else: + self._update_terminal(stats, extra_prefix) + + def _update_notebook(self, stats: dict[str, str], extra_prefix: str) -> None: + """Render stats as a grouped HTML table in a Jupyter notebook. + + Groups stats into columns (Throughput, TTFT, TTLT, Tokens, Errors) and + renders them as an HTML ```` that updates in-place via + ``IPython.display``. + + Args: + stats (dict[str, str]): Mapping of label to formatted value. + extra_prefix (str): Optional text shown above the table. + """ + from IPython.display import HTML, display + + groups = _group_stats(stats) + + # Build one column per group: header on top, key=value rows below + # All columns rendered side-by-side in a single table row + max_rows = max(len(items) for items in groups.values()) + + col_htmls = [] + for group_name, items in groups.items(): + col = ( + f"" + ) + rows = [] + for k, v in items: + rows.append( + f"" + ) + # Pad shorter columns + for _ in range(max_rows - len(items)): + rows.append("") + col_htmls.append((col, rows)) + + # Assemble: header row, then data rows + header_row = "" + "".join(c[0] for c in col_htmls) + "" + data_rows = "" + for i in range(max_rows): + data_rows += "" + "".join(c[1][i] for c in col_htmls) + "" + + prefix_html = ( + f"" + f"{extra_prefix}
" + if extra_prefix + else "" + ) + html = ( + f"{prefix_html}" + f"
" + f"{group_name}" + f"{k}" + f"  " + f"{v}" + f"
" + f"{header_row}{data_rows}
" + ) + + if self._handle is None: + self._handle = display(HTML(html), display_id=True) + else: + self._handle.update(HTML(html)) + + def _update_terminal(self, stats: dict[str, str], extra_prefix: str) -> None: + """Render stats as grouped text lines in a terminal. + + Uses ANSI escape codes to erase the previous output and overwrite it + with the updated stats, one line per group. + + Args: + stats (dict[str, str]): Mapping of label to formatted value. + extra_prefix (str): Optional text shown on the first line. + """ + # Erase previous output + if self._last_line_count > 0: + sys.stderr.write(f"\033[{self._last_line_count}A\033[J") + + groups = _group_stats(stats) + lines = [] + if extra_prefix: + lines.append(f" {extra_prefix}") + for group_name, items in groups.items(): + values = " ".join(f"{k}={v}" for k, v in items) + lines.append(f" {group_name}: {values}") + + output = "\n".join(lines) + sys.stderr.write(output + "\n") + sys.stderr.flush() + self._last_line_count = len(lines) + + def close(self) -> None: + """Clean up the display. + + In terminal mode, erases the stats block using ANSI escape codes. + In notebook mode, the HTML output remains visible. + """ + if self._disabled: + return + # In terminal, erase the stats block + if not self._is_notebook and self._last_line_count > 0: + sys.stderr.write(f"\033[{self._last_line_count}A\033[J") + sys.stderr.flush() + self._last_line_count = 0 diff --git a/llmeter/runner.py b/llmeter/runner.py index 0604a32..0605a36 100644 --- a/llmeter/runner.py +++ b/llmeter/runner.py @@ -20,6 +20,7 @@ from tqdm.auto import tqdm, trange from upath import UPath as Path +from llmeter.live_display import LiveStatsDisplay from llmeter.utils import RunningStats, now_utc if TYPE_CHECKING: @@ -56,6 +57,7 @@ class _RunConfig: tokenizer: Tokenizer | Any | None = None clients: int = 1 n_requests: int | None = None + run_duration: int | float | None = None payload: dict | list[dict] | os.PathLike | str | None = None run_name: str | None = None run_description: str | None = None @@ -74,6 +76,9 @@ def __post_init__(self, disable_client_progress_bar, disable_clients_progress_ba if self.n_requests is not None: assert self.n_requests > 0, "Number of requests must be a positive integer" + if self.run_duration is not None: + assert self.run_duration > 0, "Run duration must be a positive number" + assert self.clients > 0, "Number of clients must be a positive integer" if self.run_name is not None: @@ -181,16 +186,39 @@ def __post_init__(self, disable_client_progress_bar, disable_clients_progress_ba ) def _validate_and_prepare_payload(self): - """Validate and prepare the payload for the test run and update n_requests + """Validate and prepare the payload for the test run. + + Normalizes the payload into a list of dicts, validates that ``n_requests`` + and ``run_duration`` are not both set, and sets ``_time_bound`` and + ``_n_requests`` accordingly. + + For count-bound runs, ``_n_requests`` defaults to the number of payloads + when not explicitly provided. For time-bound runs, ``_n_requests`` is set + to 0 since the actual count is unknown upfront. - This method ensures that the payload is valid and prepared for the test run. + Raises: + AssertionError: If no payload is provided. + ValueError: If both ``n_requests`` and ``run_duration`` are set. + FileNotFoundError: If the payload path does not exist. """ assert self.payload, "No payload provided" if isinstance(self.payload, (os.PathLike, str)): self.payload = list(load_payloads(self.payload)) if isinstance(self.payload, dict): self.payload = [self.payload] - self._n_requests = self.n_requests or len(self.payload) + + if self.run_duration is not None and self.n_requests is not None: + raise ValueError( + "Cannot set both n_requests and run_duration. " + "Use n_requests for request-bound runs or run_duration for time-bound runs." + ) + + self._time_bound = self.run_duration is not None + if self._time_bound: + # For time-bound runs, _n_requests is unknown upfront + self._n_requests = 0 + else: + self._n_requests = self.n_requests or len(self.payload) @staticmethod async def _compute_time_per_output_token(response: InvocationResponse): @@ -275,12 +303,16 @@ async def _process_results_from_q(self, output_path: Path | None = None): self._responses.append(response) self._running_stats.update(response.to_dict()) - if self._progress_bar: + if self._progress_bar is not None and not self._time_bound: self._progress_bar.update(1) - self._progress_bar.set_postfix( - self._running_stats.snapshot(self.progress_bar_stats), - refresh=False, - ) + + if self._stats_display is not None: + snapshot = self._running_stats.snapshot(self.progress_bar_stats) + if snapshot: + prefix = ( + f"reqs={self._running_stats._count}" if self._time_bound else "" + ) + self._stats_display.update(snapshot, extra_prefix=prefix) if output_path: output_path.parent.mkdir(parents=True, exist_ok=True) @@ -295,23 +327,21 @@ def _invoke_n_no_wait( n: int | None = None, shuffle_order=True, ) -> list[InvocationResponse]: - """ - Generate multiple invocations for the given payload. + """Generate *n* invocations synchronously for a single client. - This method generates `n` invocations for the given payload(s) by sending - requests to the endpoint in a loop. If a sequence of payloads is provided, - the payloads are cycled through until `n` invocations are generated. If a - single payload is provided, it is used for all `n` invocations. + Cycles through *payload* until *n* invocations are generated, sending + each request to the endpoint and pushing the response onto + ``self._queue`` for async token-counting and stats collection. Args: - payload: The input payload to generate invocations for. - n (int|None, optional): The number of invocations to generate. + payload (list[dict]): The input payloads to cycle through. + n (int | None, optional): The number of invocations to generate. If not specified, every element in the payload is used once. shuffle_order (bool, optional): Whether to shuffle the order of payloads before generating invocations. Defaults to True. Returns: - List[EndpointResponse]: A list of response objects. + list[InvocationResponse]: A list of response objects. """ # ToDo: replace with an async method to prepare payloads, including possible callbacks, @@ -324,17 +354,20 @@ def _invoke_n_no_wait( responses = [] if n is None: n = len(payload) - for p, _ in zip( - cycle(payload), - trange( - n, - leave=False, - desc="Requests", - disable=_disable_tqdm or self._disable_per_client_progress_bar, - ), - ): + if not payload: + return responses + payload_iter = cycle(payload) + pbar = trange( + n, + leave=False, + desc="Requests", + disable=_disable_tqdm or self._disable_per_client_progress_bar, + ) + for _ in pbar: + p = next(payload_iter) try: p = asyncio.run(process_before_invoke_callbacks(self.callbacks, p)) + self._running_stats.record_send() response = self._endpoint.invoke(p) except Exception as e: @@ -351,6 +384,56 @@ def _invoke_n_no_wait( ) return responses + def _invoke_for_duration( + self, + payload: list[dict], + duration: float, + shuffle_order=True, + ) -> list[InvocationResponse]: + """Generate invocations continuously until *duration* seconds have elapsed. + + Cycles through *payload* indefinitely, stopping only when the wall-clock + time exceeds *duration*. Each completed request is pushed onto + ``self._queue`` for async token-counting and stats collection, mirroring + the behaviour of :meth:`_invoke_n_no_wait`. + + Args: + payload (list[dict]): The input payloads to cycle through. + duration (float): Maximum wall-clock seconds to keep sending requests. + shuffle_order (bool, optional): Whether to shuffle the order of payloads + before generating invocations. Defaults to True. + + Returns: + list[InvocationResponse]: All responses collected during the window. + """ + if shuffle_order: + self._random_seed += random.randint(1, 1000) + random.seed(0) + payload = random.sample(payload, k=len(payload)) + + responses: list[InvocationResponse] = [] + deadline = time.perf_counter() + duration + payload_iter = cycle(payload) + + while time.perf_counter() < deadline: + p = next(payload_iter) + try: + p = asyncio.run(process_before_invoke_callbacks(self.callbacks, p)) + self._running_stats.record_send() + response = self._endpoint.invoke(p) + except Exception as e: + logger.exception(f"Error with invocation with payload {p}: {e}") + response = InvocationResponse.error_output( + id=uuid4().hex, + error=str(e), + ) + responses.append(response) + if self._queue: + self._queue._loop.call_soon_threadsafe( # type: ignore + self._queue.put_nowait, response + ) + return responses + async def _invoke_n( self, payload: list[dict], @@ -358,17 +441,15 @@ async def _invoke_n( add_start_jitter=True, shuffle_order=True, ) -> list[InvocationResponse]: - """ - Asynchronously generate multiple invocations for the given payload. + """Asynchronously generate *n* invocations for a single client. - This method generates `n` invocations for the given payload(s) by sending - requests to the endpoint asynchronously. If a sequence of payloads is provided, - the payloads are cycled through until `n` invocations are generated. If a - single payload is provided, it is used for all `n` invocations. + Wraps :meth:`_invoke_n_no_wait` in a thread with an overall timeout + of ``self.timeout * n`` seconds. Args: - payload (Dict[str, str] | Sequence[Dict[str, str]]): The input payload(s) to generate invocations for. - n (int | None, optional): The number of invocations to generate. Defaults to None. + payload (list[dict]): The input payload(s) to generate invocations for. + n (int | None, optional): The number of invocations to generate. + Defaults to None (one per payload element). add_start_jitter (bool, optional): Whether to add a random delay before starting the invocations loop to avoid batch bunching when using multiple clients. Defaults to True. @@ -376,7 +457,8 @@ async def _invoke_n( before generating invocations. Defaults to True. Returns: - List[EndpointResponse]: A list of response objects. + list[InvocationResponse]: A list of response objects. Returns an empty + list if the overall timeout is exceeded. """ if add_start_jitter: @@ -396,26 +478,65 @@ async def _invoke_n( return response + async def _invoke_duration( + self, + payload: list[dict], + add_start_jitter=True, + shuffle_order=True, + ) -> list[InvocationResponse]: + """Asynchronously generate invocations for a single client until duration expires. + + Wraps :meth:`_invoke_for_duration` in a thread. The client sends requests + continuously for ``self.run_duration`` seconds. + + Args: + payload (list[dict]): The input payload(s) to cycle through. + add_start_jitter (bool, optional): Whether to add a random delay before + starting the invocations loop to avoid batch bunching when using + multiple clients. Defaults to True. + shuffle_order (bool, optional): Whether to shuffle the order of payloads + before generating invocations. Defaults to True. + + Returns: + list[InvocationResponse]: All responses collected during the time window. + """ + + if add_start_jitter: + await asyncio.sleep(random.random() * 0.01) + + if shuffle_order: + self._random_seed = random.randint(0, 2**16 - 1) + + return await asyncio.to_thread( + self._invoke_for_duration, + payload, + self.run_duration, + shuffle_order, + ) + async def _invoke_n_c( self, payload: list[dict], n_requests: int | None = None, clients: int = 1, ) -> tuple[float, float, float]: - """ - Asynchronously generates multiple invocations for a given payload. + """Spawn *clients* concurrent count-bound invocation loops. + + Each client generates *n_requests* invocations by delegating to + :meth:`_invoke_n`. All clients run concurrently and the method waits + for all of them to finish before signalling the token-counting queue + to stop. Args: - payload (dict): The input data for generating invocations. - queue (asyncio.Queue): The queue to store the generated responses. - n_requests (int | None, optional): The number of invocations to generate per connection. Defaults to None. - clients (int, optional): The number of concurrent connections to generate invocations. Defaults to 1. + payload (list[dict]): The input payloads to send. + n_requests (int | None, optional): The number of invocations to + generate per client. Defaults to None. + clients (int, optional): The number of concurrent client connections. + Defaults to 1. Returns: - None - - Raises: - None + tuple[float, float, float]: A ``(total_test_time, start_t, end_t)`` + tuple of ``time.perf_counter`` values. """ logger.info( f"Generating {clients} connections with {n_requests} invocations each" @@ -430,25 +551,86 @@ async def _invoke_n_c( end_t = time.perf_counter() total_test_time = end_t - start_t logger.info( - f"Generated {clients} connections with {n_requests} invocations each in {total_test_time * 1000:.2f} seconds" + f"Completed {clients} clients x {n_requests} requests in " + f"{total_test_time * 1000:.2f}ms" + ) + + if self._queue: + await self._queue.put(None) + logger.debug("Signaling token counting task to exit") + return total_test_time, start_t, end_t + + async def _invoke_duration_c( + self, + payload: list[dict], + clients: int = 1, + ) -> tuple[float, float, float]: + """Spawn *clients* concurrent time-bound invocation loops. + + Each client sends requests continuously for ``self.run_duration`` seconds + by delegating to :meth:`_invoke_duration`. All clients run concurrently + and the method waits for all of them to finish before signalling the + token-counting queue to stop. + + Args: + payload (list[dict]): The input payloads to cycle through. + clients (int, optional): The number of concurrent client connections. + Defaults to 1. + + Returns: + tuple[float, float, float]: A ``(total_test_time, start_t, end_t)`` + tuple of ``time.perf_counter`` values. + """ + logger.info(f"Generating {clients} connections for {self.run_duration}s each") + start_t = time.perf_counter() + await tqdm.gather( + *[self._invoke_duration(payload) for _ in range(clients)], + leave=False, + desc="Clients", + disable=_disable_tqdm or self._disable_clients_progress_bar, + ) + end_t = time.perf_counter() + total_test_time = end_t - start_t + logger.info( + f"Completed {clients} clients x {self.run_duration}s in " + f"{total_test_time * 1000:.2f}ms" ) - # Signal the token counting task to exit if self._queue: await self._queue.put(None) logger.debug("Signaling token counting task to exit") return total_test_time, start_t, end_t + async def _tick_time_bar(self): + """Advance ``_progress_bar`` every 0.5 s until ``run_duration`` is reached. + + Designed to run as a concurrent task alongside the invocation loops so + the user sees a smooth time-based progress bar. + """ + start = time.perf_counter() + duration = self.run_duration + prev = 0 + while True: + await asyncio.sleep(0.5) + elapsed = time.perf_counter() - start + tick = min(int(elapsed), int(duration)) - prev + if tick > 0 and self._progress_bar is not None: + self._progress_bar.update(tick) + prev += tick + if elapsed >= duration: + break + async def _run(self): """Run the test with the given configuration This method is expected to be called *exactly once* after the _Run object is created. Attempting to re-use a _Run object may result in undefined behavior. """ + # For time-bound runs, total_requests is unknown upfront result = Result( responses=[], total_test_time=None, - total_requests=self._n_requests * self.clients, + total_requests=0 if self._time_bound else self._n_requests * self.clients, clients=self.clients, n_requests=self._n_requests, output_path=self.output_path, # type: ignore @@ -471,27 +653,64 @@ async def _run(self): loop.set_default_executor(ThreadPoolExecutor(max_workers=self.clients + 5)) logger.info("Starting test") self._queue = asyncio.Queue() - self._progress_bar = tqdm( - total=self.clients * self._n_requests, - leave=False, - desc="Total requests", - disable=_disable_tqdm, - ) + + if self._time_bound: + # Time-bound: progress bar shows elapsed seconds + self._progress_bar = tqdm( + total=int(self.run_duration), + leave=False, + desc="Elapsed", + unit="s", + bar_format="{desc}: {bar}| {n:.0f}/{total:.0f}s [{elapsed}]", + disable=_disable_tqdm, + ) + else: + # Count-bound: progress bar shows completed requests + self._progress_bar = tqdm( + total=self.clients * self._n_requests, + leave=False, + desc="Total requests", + disable=_disable_tqdm, + ) + + # Live stats display — renders as an HTML table in notebooks, multi-line in terminals + self._stats_display = LiveStatsDisplay(disabled=_disable_tqdm) + + # Show the table layout immediately with placeholder values + initial_snapshot = self._running_stats.snapshot(self.progress_bar_stats) + prefix = "reqs=0" if self._time_bound else "" + self._stats_display.update(initial_snapshot, extra_prefix=prefix) try: run_start_time = now_utc() - _, (total_test_time, start_time, end_time) = await asyncio.gather( - self._process_results_from_q( - output_path=Path(self.output_path) / "responses.jsonl" - if self.output_path - else None, - ), - self._invoke_n_c( + if self._time_bound: + invoke_coro = self._invoke_duration_c( + payload=self.payload, # type: ignore + clients=self.clients, + ) + _, (total_test_time, start_time, end_time), _ = await asyncio.gather( + self._process_results_from_q( + output_path=Path(self.output_path) / "responses.jsonl" + if self.output_path + else None, + ), + invoke_coro, + self._tick_time_bar(), + ) + else: + invoke_coro = self._invoke_n_c( payload=self.payload, # type: ignore n_requests=self._n_requests, clients=self.clients, - ), - ) + ) + _, (total_test_time, start_time, end_time) = await asyncio.gather( + self._process_results_from_q( + output_path=Path(self.output_path) / "responses.jsonl" + if self.output_path + else None, + ), + invoke_coro, + ) run_end_time = now_utc() except asyncio.CancelledError: @@ -501,12 +720,20 @@ async def _run(self): return result self._progress_bar.close() + if self._stats_display is not None: + self._stats_display.close() logger.info(f"Test completed in {total_test_time * 1000:.2f} seconds.") + actual_total = self._running_stats._count + result = replace( result, responses=self._responses, total_test_time=total_test_time, + total_requests=actual_total, + n_requests=actual_total // max(self.clients, 1) + if self._time_bound + else self._n_requests, start_time=run_start_time, end_time=run_end_time, ) @@ -580,7 +807,11 @@ class Runner(_RunConfig): `DummyTokenizer` will be used if needed. clients (int): The number of concurrent clients to use for sending requests. Defaults to 1. n_requests (int | None): The number of LLM invocations to generate *per client*. By - default, each request in `payload` will be sent once by each client. + default, each request in `payload` will be sent once by each client. Mutually + exclusive with ``run_duration``. + run_duration (int | float | None): Run each client for this many seconds instead of a + fixed request count. Clients send requests continuously until the duration expires. + Mutually exclusive with ``n_requests``. Defaults to ``None`` (count-bound mode). payload (dict | list[dict] | os.PathLike | str | None): The request data to send to the endpoint under test. You can provide a single JSON payload (dict), a list of payloads (list[dict]), or a path to one or more JSON/JSON-Lines files to be loaded by @@ -647,6 +878,7 @@ async def run( tokenizer: Tokenizer | Any | None = None, clients: int | None = None, n_requests: int | None = None, + run_duration: int | float | None = None, payload: dict | list[dict] | os.PathLike | str | None = None, run_name: str | None = None, run_description: str | None = None, @@ -677,6 +909,17 @@ async def run( output token counts for endpoints that don't report exact information. clients (int): The number of concurrent clients to use for sending requests. n_requests (int | None): The number of LLM invocations to generate *per client*. + Mutually exclusive with ``run_duration``. + run_duration (int | float | None): Run each client for this many seconds + instead of a fixed request count. Clients send requests continuously + until the duration expires. Mutually exclusive with ``n_requests``. + + Example:: + + # Run for 60 seconds with 5 concurrent clients: + result = await runner.run(run_duration=60, clients=5) + result.total_requests # actual count completed + payload (dict | list[dict] | os.PathLike | str | None): The request data to send to the endpoint under test. You can provide a single JSON payload (dict), a list of payloads (list[dict]), or a path to one or more JSON/JSON-Lines files to be loaded @@ -746,6 +989,7 @@ async def run( tokenizer=tokenizer, clients=clients, n_requests=n_requests, + run_duration=run_duration, payload=payload, run_name=run_name, run_description=run_description, diff --git a/llmeter/utils.py b/llmeter/utils.py index fd30d0f..c43d7d0 100644 --- a/llmeter/utils.py +++ b/llmeter/utils.py @@ -1,6 +1,7 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 import bisect +import time from datetime import datetime, timezone from itertools import filterfalse from math import isnan @@ -113,7 +114,13 @@ class RunningStats: #: * ``(metric_name, aggregation, "inv")`` — same as above but displays the #: reciprocal (e.g. seconds-per-token → tokens-per-second). #: * The literal string ``"failed"`` for the running failure count. + #: * The literal string ``"rpm"`` for live requests-per-minute based on the + #: send window (first request sent to last request sent). + #: * The literal string ``"output_tps"`` for aggregate output tokens per second + #: across all clients, based on the send window. DEFAULT_SNAPSHOT_STATS: dict[str, tuple[str, ...] | str] = { + "rpm": "rpm", + "output_tps": "output_tps", "p50_ttft": ("time_to_first_token", "p50"), "p90_ttft": ("time_to_first_token", "p90"), "p50_ttlt": ("time_to_last_token", "p50"), @@ -128,9 +135,29 @@ def __init__(self, metrics: Sequence[str]): self._metrics = list(metrics) self._count = 0 self._failed = 0 + self._sends = 0 + self._first_send_time: float | None = None + self._last_send_time: float | None = None self._sums: dict[str, float] = {m: 0.0 for m in metrics} self._values: dict[str, list[float]] = {m: [] for m in metrics} + def record_send(self) -> None: + """Record that a request was dispatched to the endpoint. + + Call this from the invocation loop each time a request is sent, *before* + waiting for the response. This tracks the send-side time window used for + accurate RPM and throughput calculations. + + The send window (``_first_send_time`` to ``_last_send_time``) excludes + the tail latency of the final response, giving a more accurate picture + of the request dispatch rate. + """ + now = time.perf_counter() + self._sends += 1 + if self._first_send_time is None: + self._first_send_time = now + self._last_send_time = now + def update(self, response_dict: dict[str, Any]) -> None: """Record one response's metric values. @@ -257,6 +284,10 @@ def snapshot( the value is inverted before display (e.g. seconds-per-token → tokens-per-second). * ``"failed"`` — the literal string; shows the running failure count. + * ``"rpm"`` — the literal string; shows live requests-per-minute + estimate based on the send window (first to last request sent). + * ``"output_tps"`` — the literal string; shows aggregate output + tokens per second across all clients, based on the send window. Defaults to :attr:`DEFAULT_SNAPSHOT_STATS` when ``None``. @@ -284,7 +315,9 @@ def snapshot( # {'tps': '28.3 tok/s'} """ if self._count == 0: - return {} + if fields is None: + fields = self.DEFAULT_SNAPSHOT_STATS + return {label: "—" for label in fields} if fields is None: fields = self.DEFAULT_SNAPSHOT_STATS @@ -297,6 +330,27 @@ def snapshot( info[label] = str(self._failed) continue + if spec == "rpm": + if ( + self._first_send_time is not None + and self._last_send_time is not None + and self._last_send_time > self._first_send_time + ): + send_window = self._last_send_time - self._first_send_time + info[label] = f"{self._count / send_window * 60:.1f}" + continue + + if spec == "output_tps": + if ( + self._first_send_time is not None + and self._last_send_time is not None + and self._last_send_time > self._first_send_time + ): + send_window = self._last_send_time - self._first_send_time + total_out = self._sums.get("num_tokens_output", 0) + info[label] = f"{total_out / send_window:.1f} tok/s" + continue + metric = spec[0] agg = spec[1] invert = len(spec) > 2 and spec[2] == "inv" diff --git a/tests/unit/test_experiments.py b/tests/unit/test_experiments.py index 13fbd1f..42b0fb0 100644 --- a/tests/unit/test_experiments.py +++ b/tests/unit/test_experiments.py @@ -531,3 +531,122 @@ def test_get_n_requests_parametrized( ) result = runner._get_n_requests(clients) assert result == expected, f"Expected {expected}, but got {result}" + + +# ── LoadTest with run_duration, low_memory, progress_bar_stats ─────────────── + + +class TestLoadTestTimeBound: + def test_load_test_with_run_duration(self, mock_endpoint): + """run_duration should be stored on the LoadTest instance.""" + lt = LoadTest( + endpoint=mock_endpoint, + payload={"input": "test"}, + sequence_of_clients=[1, 2], + run_duration=30, + ) + assert lt.run_duration == 30 + + def test_load_test_with_low_memory(self, mock_endpoint): + """low_memory should be stored on the LoadTest instance.""" + lt = LoadTest( + endpoint=mock_endpoint, + payload={"input": "test"}, + sequence_of_clients=[1], + low_memory=True, + ) + assert lt.low_memory is True + + def test_load_test_with_progress_bar_stats(self, mock_endpoint): + """progress_bar_stats should be stored on the LoadTest instance.""" + custom_stats = {"rpm": "rpm", "fail": "failed"} + lt = LoadTest( + endpoint=mock_endpoint, + payload={"input": "test"}, + sequence_of_clients=[1], + progress_bar_stats=custom_stats, + ) + assert lt.progress_bar_stats == custom_stats + + @pytest.mark.asyncio + async def test_run_duration_passed_to_runner(self, mock_endpoint): + """When run_duration is set, runner.run() should receive it.""" + mock_runner_instance = AsyncMock(spec=Runner) + mock_runner_instance.run.return_value = MagicMock( + spec=Result, clients=1, total_requests=10 + ) + + with patch("llmeter.experiments.Runner", return_value=mock_runner_instance): + lt = LoadTest( + endpoint=mock_endpoint, + payload={"input": "test"}, + sequence_of_clients=[1, 3], + run_duration=15, + ) + await lt.run() + + # Check that run_duration was passed in each call + for call in mock_runner_instance.run.call_args_list: + assert call.kwargs["run_duration"] == 15 + assert "n_requests" not in call.kwargs + + @pytest.mark.asyncio + async def test_count_bound_does_not_pass_run_duration(self, mock_endpoint): + """When run_duration is None, runner.run() should receive n_requests.""" + mock_runner_instance = AsyncMock(spec=Runner) + mock_runner_instance.run.return_value = MagicMock( + spec=Result, clients=1, total_requests=10 + ) + + with patch("llmeter.experiments.Runner", return_value=mock_runner_instance): + lt = LoadTest( + endpoint=mock_endpoint, + payload={"input": "test"}, + sequence_of_clients=[1], + ) + await lt.run() + + call_kwargs = mock_runner_instance.run.call_args_list[0].kwargs + assert "n_requests" in call_kwargs + assert "run_duration" not in call_kwargs + + @pytest.mark.asyncio + async def test_low_memory_passed_to_runner(self, mock_endpoint): + """low_memory should be forwarded to each runner.run() call.""" + mock_runner_instance = AsyncMock(spec=Runner) + mock_runner_instance.run.return_value = MagicMock( + spec=Result, clients=1, total_requests=10 + ) + + with patch("llmeter.experiments.Runner", return_value=mock_runner_instance): + lt = LoadTest( + endpoint=mock_endpoint, + payload={"input": "test"}, + sequence_of_clients=[1, 2], + low_memory=True, + ) + await lt.run() + + for call in mock_runner_instance.run.call_args_list: + assert call.kwargs["low_memory"] is True + + @pytest.mark.asyncio + async def test_progress_bar_stats_passed_to_runner(self, mock_endpoint): + """progress_bar_stats should be forwarded to each runner.run() call.""" + custom_stats = {"rpm": "rpm"} + mock_runner_instance = AsyncMock(spec=Runner) + mock_runner_instance.run.return_value = MagicMock( + spec=Result, clients=1, total_requests=10 + ) + + with patch("llmeter.experiments.Runner", return_value=mock_runner_instance): + lt = LoadTest( + endpoint=mock_endpoint, + payload={"input": "test"}, + sequence_of_clients=[1], + progress_bar_stats=custom_stats, + ) + await lt.run() + + call_kwargs = mock_runner_instance.run.call_args_list[0].kwargs + assert call_kwargs["progress_bar_stats"] == custom_stats diff --git a/tests/unit/test_live_display.py b/tests/unit/test_live_display.py new file mode 100644 index 0000000..7057eaf --- /dev/null +++ b/tests/unit/test_live_display.py @@ -0,0 +1,154 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from collections import OrderedDict +from unittest.mock import patch + +from llmeter.live_display import ( + LiveStatsDisplay, + _classify, + _group_stats, + _in_notebook, +) + + +# ── _classify ──────────────────────────────────────────────────────────────── + + +class TestClassify: + def test_rpm_goes_to_throughput(self): + assert _classify("rpm") == "Throughput" + + def test_tps_goes_to_throughput(self): + assert _classify("p50_tps") == "Throughput" + assert _classify("output_tps") == "Throughput" + + def test_ttft_goes_to_ttft(self): + assert _classify("p50_ttft") == "TTFT" + assert _classify("p90_ttft") == "TTFT" + + def test_ttlt_goes_to_ttlt(self): + assert _classify("p50_ttlt") == "TTLT" + assert _classify("p90_ttlt") == "TTLT" + + def test_token_goes_to_tokens(self): + assert _classify("input_tokens") == "Tokens" + assert _classify("output_tokens") == "Tokens" + + def test_fail_goes_to_errors(self): + assert _classify("fail") == "Errors" + + def test_unknown_goes_to_other(self): + assert _classify("custom_metric") == "Other" + + def test_case_insensitive(self): + assert _classify("RPM") == "Throughput" + assert _classify("P50_TTFT") == "TTFT" + + +# ── _group_stats ───────────────────────────────────────────────────────────── + + +class TestGroupStats: + def test_groups_by_category(self): + stats = { + "rpm": "185.9", + "p50_ttft": "0.312s", + "p90_ttlt": "1.203s", + "input_tokens": "12540", + "fail": "0", + } + groups = _group_stats(stats) + assert "Throughput" in groups + assert "TTFT" in groups + assert "TTLT" in groups + assert "Tokens" in groups + assert "Errors" in groups + + def test_preserves_order(self): + stats = OrderedDict( + [ + ("rpm", "185.9"), + ("p50_ttft", "0.312s"), + ("p50_ttlt", "0.847s"), + ("fail", "0"), + ] + ) + groups = _group_stats(stats) + group_names = list(groups.keys()) + assert group_names == ["Throughput", "TTFT", "TTLT", "Errors"] + + def test_unknown_keys_go_to_other(self): + stats = {"custom": "42"} + groups = _group_stats(stats) + assert "Other" in groups + assert groups["Other"] == [("custom", "42")] + + def test_empty_stats(self): + groups = _group_stats({}) + assert len(groups) == 0 + + +# ── _in_notebook ───────────────────────────────────────────────────────────── + + +class TestInNotebook: + def test_returns_false_outside_notebook(self): + assert _in_notebook() is False + + def test_returns_false_for_terminal_ipython(self): + mock_shell = type("TerminalInteractiveShell", (), {})() + with patch("IPython.get_ipython", return_value=mock_shell): + assert _in_notebook() is False + + def test_returns_true_for_zmq_shell(self): + mock_shell = type("ZMQInteractiveShell", (), {})() + with patch("IPython.get_ipython", return_value=mock_shell): + assert _in_notebook() is True + + def test_returns_false_for_none(self): + with patch("IPython.get_ipython", return_value=None): + assert _in_notebook() is False + + +# ── LiveStatsDisplay ───────────────────────────────────────────────────────── + + +class TestLiveStatsDisplay: + def test_disabled_does_nothing(self): + display = LiveStatsDisplay(disabled=True) + # Should not raise + display.update({"rpm": "100"}) + display.close() + + def test_update_empty_stats_does_nothing(self): + display = LiveStatsDisplay(disabled=False) + display.update({}) + assert display._handle is None + assert display._last_line_count == 0 + + def test_terminal_output(self, capsys): + display = LiveStatsDisplay(disabled=False) + display._is_notebook = False + display.update({"rpm": "100", "fail": "0"}) + # Should have written to stderr + assert display._last_line_count > 0 + display.close() + assert display._last_line_count == 0 + + def test_terminal_with_prefix(self, capsys): + display = LiveStatsDisplay(disabled=False) + display._is_notebook = False + display.update({"rpm": "100"}, extra_prefix="reqs=42") + assert display._last_line_count >= 2 # prefix line + stats line + display.close() + + def test_terminal_overwrites_previous(self): + display = LiveStatsDisplay(disabled=False) + display._is_notebook = False + display.update({"rpm": "100"}) + first_count = display._last_line_count + display.update({"rpm": "200"}) + # Should still be same number of lines (overwritten) + assert display._last_line_count == first_count + display.close() diff --git a/tests/unit/test_runner.py b/tests/unit/test_runner.py index 4150dee..8953e80 100644 --- a/tests/unit/test_runner.py +++ b/tests/unit/test_runner.py @@ -997,3 +997,208 @@ async def test_count_tokens_from_q_with_custom_output_path(run: _Run, tmp_path: # Add more tests for edge cases and other methods as needed + + +# ── Time-bound (run_duration) tests ────────────────────────────────────────── + + +def test_run_duration_and_n_requests_mutually_exclusive( + mock_endpoint: Endpoint, mock_tokenizer: MagicMock +): + """Setting both n_requests and run_duration should raise ValueError.""" + with pytest.raises(ValueError, match="Cannot set both"): + _Run( + endpoint=mock_endpoint, + tokenizer=mock_tokenizer, + payload=[{"prompt": "test"}], + n_requests=10, + run_duration=5, + clients=1, + run_name="test_run", + ) + + +def test_run_duration_sets_time_bound_flag( + mock_endpoint: Endpoint, mock_tokenizer: MagicMock +): + """When run_duration is set, _time_bound should be True and _n_requests 0.""" + run = _Run( + endpoint=mock_endpoint, + tokenizer=mock_tokenizer, + payload=[{"prompt": "test"}], + run_duration=5, + clients=1, + run_name="test_run", + ) + assert run._time_bound is True + assert run._n_requests == 0 + + +def test_n_requests_sets_count_bound( + mock_endpoint: Endpoint, mock_tokenizer: MagicMock +): + """When n_requests is set (no run_duration), _time_bound should be False.""" + run = _Run( + endpoint=mock_endpoint, + tokenizer=mock_tokenizer, + payload=[{"prompt": "test"}], + n_requests=10, + clients=1, + run_name="test_run", + ) + assert run._time_bound is False + assert run._n_requests == 10 + + +def test_run_duration_must_be_positive( + mock_endpoint: Endpoint, mock_tokenizer: MagicMock +): + """run_duration must be > 0.""" + with pytest.raises(AssertionError, match="positive"): + _Run( + endpoint=mock_endpoint, + tokenizer=mock_tokenizer, + payload=[{"prompt": "test"}], + run_duration=-1, + clients=1, + run_name="test_run", + ) + + +def test_invoke_for_duration_respects_deadline( + mock_endpoint: Endpoint, mock_tokenizer: MagicMock +): + """_invoke_for_duration should stop after the specified duration.""" + run = _Run( + endpoint=mock_endpoint, + tokenizer=mock_tokenizer, + payload=[{"prompt": "test"}], + run_duration=0.5, + clients=1, + run_name="test_run", + ) + run.callbacks = None + run._queue = MagicMock() + run._queue._loop.call_soon_threadsafe = MagicMock() + + # Make invoke take ~100ms so we get a handful of requests + def slow_invoke(payload): + time.sleep(0.1) + return InvocationResponse(id="1", input_prompt="test", response_text="response") + + run._endpoint.invoke.side_effect = slow_invoke + + start = time.perf_counter() + responses = run._invoke_for_duration(payload=[{"prompt": "test"}], duration=0.5) + elapsed = time.perf_counter() - start + + assert len(responses) > 0 + assert elapsed < 1.0 # Should not overshoot by much + assert all(isinstance(r, InvocationResponse) for r in responses) + + +def test_invoke_for_duration_cycles_payloads( + mock_endpoint: Endpoint, mock_tokenizer: MagicMock +): + """_invoke_for_duration should cycle through payloads.""" + run = _Run( + endpoint=mock_endpoint, + tokenizer=mock_tokenizer, + payload=[{"prompt": "a"}, {"prompt": "b"}], + run_duration=0.3, + clients=1, + run_name="test_run", + ) + run.callbacks = None + run._queue = MagicMock() + run._queue._loop.call_soon_threadsafe = MagicMock() + + payloads_seen = [] + + def tracking_invoke(payload): + payloads_seen.append(payload) + return InvocationResponse(id="1", input_prompt=str(payload), response_text="ok") + + run._endpoint.invoke.side_effect = tracking_invoke + + responses = run._invoke_for_duration( + payload=[{"prompt": "a"}, {"prompt": "b"}], + duration=0.3, + shuffle_order=False, + ) + + assert len(responses) >= 2 + # Should see both payloads used (cycling) + prompts = [p.get("prompt") for p in payloads_seen] + assert "a" in prompts + assert "b" in prompts + + +@pytest.mark.asyncio +async def test_run_with_duration(runner: Runner): + """Full run() with run_duration should complete and report actual counts.""" + result = await runner.run( + payload={"prompt": "test"}, + run_duration=0.3, + clients=1, + ) + + assert result.total_requests > 0 + assert result.n_requests > 0 + assert result.total_test_time is not None + assert result.total_test_time > 0 + assert result.stats["total_requests"] == result.total_requests + + +@pytest.mark.asyncio +async def test_run_with_duration_multiple_clients(runner: Runner): + """Time-bound run with multiple clients should aggregate counts.""" + result = await runner.run( + payload={"prompt": "test"}, + run_duration=0.3, + clients=3, + ) + + assert result.total_requests > 0 + assert result.clients == 3 + assert result.total_test_time is not None + + +@pytest.mark.asyncio +async def test_run_with_duration_and_output_path(runner: Runner, tmp_path: Path): + """Time-bound run with output_path should save results to disk.""" + result = await runner.run( + payload={"prompt": "test"}, + run_duration=0.3, + clients=1, + output_path=tmp_path / "duration_run", + run_name="dur_test", + ) + + assert result.output_path is not None + assert (tmp_path / "duration_run" / "responses.jsonl").exists() + assert (tmp_path / "duration_run" / "summary.json").exists() + assert (tmp_path / "duration_run" / "stats.json").exists() + + +def test_prepare_run_with_duration(runner: Runner): + """_prepare_run should pass run_duration through to _Run.""" + run = runner._prepare_run( + payload={"prompt": "test"}, + run_duration=30, + clients=2, + ) + assert run._time_bound is True + assert run.run_duration == 30 + assert run._n_requests == 0 + + +def test_prepare_run_duration_and_n_requests_conflict(runner: Runner): + """_prepare_run should raise when both are set.""" + with pytest.raises(ValueError, match="Cannot set both"): + runner._prepare_run( + payload={"prompt": "test"}, + n_requests=10, + run_duration=30, + clients=2, + ) diff --git a/tests/unit/test_running_stats.py b/tests/unit/test_running_stats.py new file mode 100644 index 0000000..2f04d78 --- /dev/null +++ b/tests/unit/test_running_stats.py @@ -0,0 +1,220 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import time + +import pytest + +from llmeter.utils import RunningStats + + +@pytest.fixture +def rs(): + return RunningStats( + metrics=[ + "time_to_first_token", + "time_to_last_token", + "time_per_output_token", + "num_tokens_input", + "num_tokens_output", + ] + ) + + +@pytest.fixture +def populated_rs(rs): + """A RunningStats with 3 responses recorded.""" + responses = [ + { + "time_to_first_token": 0.3, + "time_to_last_token": 0.8, + "time_per_output_token": 0.02, + "num_tokens_input": 100, + "num_tokens_output": 25, + "error": None, + }, + { + "time_to_first_token": 0.5, + "time_to_last_token": 1.2, + "time_per_output_token": 0.03, + "num_tokens_input": 120, + "num_tokens_output": 30, + "error": None, + }, + { + "time_to_first_token": 0.4, + "time_to_last_token": 1.0, + "time_per_output_token": 0.025, + "num_tokens_input": 110, + "num_tokens_output": 28, + "error": "timeout", + }, + ] + for r in responses: + rs.record_send() + rs.update(r) + return rs + + +# ── record_send ────────────────────────────────────────────────────────────── + + +class TestRecordSend: + def test_first_send_sets_first_time(self, rs): + assert rs._first_send_time is None + rs.record_send() + assert rs._first_send_time is not None + assert rs._last_send_time is not None + assert rs._sends == 1 + + def test_subsequent_sends_update_last_time(self, rs): + rs.record_send() + first = rs._first_send_time + time.sleep(0.01) + rs.record_send() + assert rs._first_send_time == first + assert rs._last_send_time > first + assert rs._sends == 2 + + def test_send_count_increments(self, rs): + for _ in range(5): + rs.record_send() + assert rs._sends == 5 + + +# ── update ─────────────────────────────────────────────────────────────────── + + +class TestUpdate: + def test_count_increments(self, rs): + rs.update({"time_to_first_token": 0.3, "error": None}) + assert rs._count == 1 + rs.update({"time_to_first_token": 0.5, "error": None}) + assert rs._count == 2 + + def test_failed_count(self, rs): + rs.update({"error": "timeout"}) + rs.update({"error": None}) + rs.update({"error": "connection refused"}) + assert rs._failed == 2 + + def test_none_values_skipped(self, rs): + rs.update({"time_to_first_token": None, "error": None}) + assert len(rs._values["time_to_first_token"]) == 0 + + def test_nan_values_skipped(self, rs): + rs.update({"time_to_first_token": float("nan"), "error": None}) + assert len(rs._values["time_to_first_token"]) == 0 + + def test_sums_accumulated(self, rs): + rs.update({"num_tokens_output": 10, "error": None}) + rs.update({"num_tokens_output": 20, "error": None}) + assert rs._sums["num_tokens_output"] == 30 + + def test_values_sorted(self, rs): + rs.update({"time_to_first_token": 0.5, "error": None}) + rs.update({"time_to_first_token": 0.1, "error": None}) + rs.update({"time_to_first_token": 0.3, "error": None}) + assert rs._values["time_to_first_token"] == [0.1, 0.3, 0.5] + + +# ── to_stats ───────────────────────────────────────────────────────────────── + + +class TestToStats: + def test_basic_stats(self, populated_rs): + stats = populated_rs.to_stats() + assert stats["failed_requests"] == 1 + assert "time_to_first_token-p50" in stats + assert "time_to_last_token-average" in stats + assert "num_tokens_output-p90" in stats + + def test_with_run_context(self, populated_rs): + stats = populated_rs.to_stats( + total_requests=3, + total_test_time=10.0, + result_dict={"model_id": "test"}, + ) + assert stats["model_id"] == "test" + assert stats["requests_per_minute"] == pytest.approx(18.0) + assert stats["failed_requests_rate"] == pytest.approx(1 / 3) + assert stats["total_output_tokens"] == 83 + + def test_without_run_context(self, populated_rs): + stats = populated_rs.to_stats() + assert stats["failed_requests"] == 1 + assert stats["total_input_tokens"] == 330 + assert stats["total_output_tokens"] == 83 + + def test_empty_stats(self, rs): + stats = rs.to_stats() + assert stats["failed_requests"] == 0 + assert stats["total_input_tokens"] == 0 + + +# ── snapshot ───────────────────────────────────────────────────────────────── + + +class TestSnapshot: + def test_placeholder_when_empty(self, rs): + result = rs.snapshot() + assert all(v == "—" for v in result.values()) + # Should have all default keys + assert "rpm" in result + assert "p50_ttft" in result + assert "fail" in result + assert "output_tps" in result + + def test_placeholder_with_custom_fields(self, rs): + fields = {"my_rpm": "rpm", "my_fail": "failed"} + result = rs.snapshot(fields) + assert result == {"my_rpm": "—", "my_fail": "—"} + + def test_failed_count(self, populated_rs): + result = populated_rs.snapshot({"fail": "failed"}) + assert result["fail"] == "1" + + def test_rpm_uses_send_window(self, rs): + rs._first_send_time = 100.0 + rs._last_send_time = 110.0 # 10 second window + rs.update({"error": None}) + rs.update({"error": None}) + rs.update({"error": None}) + result = rs.snapshot({"rpm": "rpm"}) + # 3 responses / 10 seconds * 60 = 18.0 rpm + assert result["rpm"] == "18.0" + + def test_rpm_not_shown_with_single_send(self, rs): + """With only one send, first == last, no window to compute RPM.""" + rs._first_send_time = 100.0 + rs._last_send_time = 100.0 + rs.update({"error": None}) + result = rs.snapshot({"rpm": "rpm"}) + assert "rpm" not in result + + def test_output_tps_uses_send_window(self, rs): + rs._first_send_time = 100.0 + rs._last_send_time = 110.0 # 10 second window + rs.update({"num_tokens_output": 500, "error": None}) + rs.update({"num_tokens_output": 300, "error": None}) + result = rs.snapshot({"tps": "output_tps"}) + # 800 tokens / 10 seconds = 80.0 tok/s + assert result["tps"] == "80.0 tok/s" + + def test_sum_aggregation(self, populated_rs): + result = populated_rs.snapshot({"out": ("num_tokens_output", "sum")}) + assert result["out"] == "83" + + def test_percentile_aggregation(self, populated_rs): + result = populated_rs.snapshot({"p50": ("time_to_first_token", "p50")}) + assert "p50" in result + assert result["p50"].endswith("s") + + def test_inverse_aggregation(self, populated_rs): + result = populated_rs.snapshot({"tps": ("time_per_output_token", "p50", "inv")}) + assert "tps" in result + assert "tok/s" in result["tps"] + + def test_empty_fields_returns_empty(self, populated_rs): + result = populated_rs.snapshot({}) + assert result == {} From 46d145442e532ee9619ab1820024a4bf16ccabbe Mon Sep 17 00:00:00 2001 From: Alessandro Cere Date: Wed, 8 Apr 2026 16:25:20 +0800 Subject: [PATCH 3/3] refactor: address PR #58 review comments Consolidate live display config (review comment 1): - Merge _GROUP_PATTERNS + _GROUP_ORDER into single DEFAULT_GROUPS tuple - Make groups a constructor parameter on LiveStatsDisplay Move display aliases from RunningStats to LiveStatsDisplay (comment 4): - Remove RunningStats.snapshot() and DEFAULT_SNAPSHOT_STATS - Add rpm/output_tps as regular keys in RunningStats.to_stats() - Add LiveStatsDisplay.format_stats() owning alias mapping + formatting - New DEFAULT_DISPLAY_STATS in live_display.py maps display labels to canonical stat keys (e.g. "time_to_first_token-p50") - Runner passes raw to_stats() output; display handles the rest Cache fallback stats computation (comment 2): - Result.stats property caches _compute_stats back to _preloaded_stats Preserve contributed stats on load (comment 3): - Result.load(load_responses=True) merges extra keys from stats.json so callback-contributed stats survive save/load round-trips Make Result fields optional (comment 5): - total_requests, clients, n_requests now optional to match _RunConfig Accept timedelta for run_duration (comment 6): - run_duration accepts int | float | timedelta; normalized in __post_init__ Remove _n_requests indirection (comment 7): - Eliminated private _n_requests; n_requests set directly to resolved value Consolidate invoke methods (comment 8): - Merged 6 methods into 3: _invoke_n_no_wait (n + duration), _invoke_client (replaces _invoke_n/_invoke_duration), _invoke_clients (replaces _invoke_n_c/_invoke_duration_c) Tests: - Add TestContributedStatsRoundTrip (8 tests) for save/load round-trips - Add TestSendWindowStats for rpm/output_tps in to_stats() - Add TestFormatStat for display formatting - Update all tests for renamed methods and new APIs --- docs/user_guide/run_experiments.md | 2 +- llmeter/experiments.py | 8 +- llmeter/live_display.py | 206 +++++++++++++---- llmeter/results.py | 23 +- llmeter/runner.py | 342 ++++++++++++----------------- llmeter/utils.py | 148 ++----------- tests/unit/test_experiments.py | 2 +- tests/unit/test_live_display.py | 107 +++++++-- tests/unit/test_results.py | 186 ++++++++++++++++ tests/unit/test_runner.py | 64 +++--- tests/unit/test_running_stats.py | 74 +++---- 11 files changed, 690 insertions(+), 472 deletions(-) diff --git a/docs/user_guide/run_experiments.md b/docs/user_guide/run_experiments.md index 87d6819..a0e63dd 100644 --- a/docs/user_guide/run_experiments.md +++ b/docs/user_guide/run_experiments.md @@ -75,7 +75,7 @@ results = await endpoint_test.run( ) ``` -Pass `progress_bar_stats={}` to disable live stats entirely. See [`RunningStats.DEFAULT_SNAPSHOT_STATS`](../reference/utils.md#llmeter.utils.RunningStats) for the full default configuration. +Pass `progress_bar_stats={}` to disable live stats entirely. See [`DEFAULT_DISPLAY_STATS`](../reference/live_display.md) for the full default configuration. ### Low-memory mode diff --git a/llmeter/experiments.py b/llmeter/experiments.py index 55eece0..ac1a651 100644 --- a/llmeter/experiments.py +++ b/llmeter/experiments.py @@ -9,7 +9,7 @@ import logging import os from dataclasses import dataclass, field -from datetime import datetime +from datetime import datetime, timedelta from math import ceil from typing import Callable, Literal @@ -140,7 +140,7 @@ class LoadTest: low_memory (bool): When ``True``, responses are written to disk but not kept in memory. Requires ``output_path``. Defaults to ``False``. progress_bar_stats (dict | None): Controls which live stats appear on the progress - bar. See ``RunningStats.DEFAULT_SNAPSHOT_STATS`` for the default. + bar. See ``DEFAULT_DISPLAY_STATS`` in ``llmeter.live_display`` for the default. output_path (os.PathLike | str | None): Where to save results. tokenizer (Tokenizer | None): Optional tokenizer for token counting. test_name (str | None): Name for this test. Defaults to current date/time. @@ -186,9 +186,9 @@ class LoadTest: sequence_of_clients: list[int] min_requests_per_client: int = 1 min_requests_per_run: int = 10 - run_duration: int | float | None = None + run_duration: int | float | timedelta | None = None low_memory: bool = False - progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None + progress_bar_stats: dict[str, str | tuple[str, str]] | None = None output_path: os.PathLike | str | None = None tokenizer: Tokenizer | None = None test_name: str | None = None diff --git a/llmeter/live_display.py b/llmeter/live_display.py index f8c9416..86a5f31 100644 --- a/llmeter/live_display.py +++ b/llmeter/live_display.py @@ -15,60 +15,117 @@ logger = logging.getLogger(__name__) -# Mapping from key substrings to (group_name, display_order). -# Stats are grouped by the first matching pattern; unmatched keys go to "Other". -_GROUP_PATTERNS: list[tuple[str, str]] = [ - ("rpm", "Throughput"), - ("tps", "Throughput"), - ("ttft", "TTFT"), - ("ttlt", "TTLT"), - ("token", "Tokens"), - ("fail", "Errors"), -] - -_GROUP_ORDER = ["Throughput", "TTFT", "TTLT", "Tokens", "Errors", "Other"] +#: Default grouping of stat keys for display. Each entry is +#: ``(group_name, tuple_of_substrings)``; a stat key is assigned to the first +#: group whose substring matches (case-insensitive). Unmatched keys fall into +#: ``"Other"``. The tuple order defines the column order in the rendered table. +DEFAULT_GROUPS: tuple[tuple[str, tuple[str, ...]], ...] = ( + ("Throughput", ("rpm", "tps")), + ("TTFT", ("ttft",)), + ("TTLT", ("ttlt",)), + ("Tokens", ("token",)), + ("Errors", ("fail",)), + ("Other", ("",)), +) + +#: Default stats to show on the progress bar during a run. +#: +#: Each entry maps a short display label to a *stat spec*: +#: +#: * A plain string — the canonical key in ``RunningStats.to_stats()`` +#: (e.g. ``"failed_requests"``, ``"rpm"``, ``"time_to_first_token-p50"``). +#: * A ``(stat_key, "inv")`` tuple — display the reciprocal of the value +#: (e.g. seconds-per-token → tokens-per-second). +DEFAULT_DISPLAY_STATS: dict[str, str | tuple[str, str]] = { + "rpm": "rpm", + "output_tps": "output_tps", + "p50_ttft": "time_to_first_token-p50", + "p90_ttft": "time_to_first_token-p90", + "p50_ttlt": "time_to_last_token-p50", + "p90_ttlt": "time_to_last_token-p90", + "p50_tps": ("time_per_output_token-p50", "inv"), + "input_tokens": "num_tokens_input-sum", + "output_tokens": "num_tokens_output-sum", + "fail": "failed_requests", +} + + +def _format_stat(key: str, value: float | int, *, invert: bool = False) -> str: + """Format a single stat value as a human-readable string. + Args: + key: The canonical stat key (used to infer units). + value: The raw numeric value. + invert: If ``True``, display ``1/value`` (e.g. time → rate). -def _classify(key: str) -> str: + Returns: + A formatted string like ``"0.312s"``, ``"28.3 tok/s"``, or ``"83"``. + """ + if invert and value > 0: + return f"{1.0 / value:.1f} tok/s" + if "tps" in key or "output_tps" in key: + return f"{value:.1f} tok/s" + if "time" in key or "ttft" in key or "ttlt" in key: + return f"{value:.3f}s" + if "rpm" in key: + return f"{value:.1f}" + if isinstance(value, float) and value == int(value): + return str(int(value)) + if isinstance(value, int): + return str(value) + return f"{value:.1f}" + + +def _classify( + key: str, + groups: tuple[tuple[str, tuple[str, ...]], ...] = DEFAULT_GROUPS, +) -> str: """Return the group name for a stat key based on substring matching. - Matches the key (case-insensitive) against ``_GROUP_PATTERNS``. The first - matching pattern determines the group. Unmatched keys are placed in - ``"Other"``. + Matches the key (case-insensitive) against *groups*. The first matching + pattern determines the group. Unmatched keys are placed in ``"Other"``. Args: key (str): The stat display label to classify (e.g. ``"p50_ttft"``). + groups: Group definitions to match against. Defaults to + :data:`DEFAULT_GROUPS`. Returns: str: The group name (e.g. ``"TTFT"``, ``"Throughput"``, ``"Other"``). """ key_lower = key.lower() - for pattern, group in _GROUP_PATTERNS: - if pattern in key_lower: - return group + for group_name, patterns in groups: + for pattern in patterns: + if pattern and pattern in key_lower: + return group_name return "Other" -def _group_stats(stats: dict[str, str]) -> OrderedDict[str, list[tuple[str, str]]]: +def _group_stats( + stats: dict[str, str], + groups: tuple[tuple[str, tuple[str, ...]], ...] = DEFAULT_GROUPS, +) -> OrderedDict[str, list[tuple[str, str]]]: """Organize stats into ordered groups for display. Each stat key is classified via :func:`_classify` and placed into the corresponding group. Groups are returned in the canonical order defined - by ``_GROUP_ORDER``, with empty groups omitted. + by *groups*, with empty groups omitted. Args: stats (dict[str, str]): Mapping of stat labels to formatted values. + groups: Group definitions controlling classification and order. + Defaults to :data:`DEFAULT_GROUPS`. Returns: OrderedDict[str, list[tuple[str, str]]]: Groups in display order, where each value is a list of ``(label, formatted_value)`` tuples. """ - groups: dict[str, list[tuple[str, str]]] = {} + buckets: dict[str, list[tuple[str, str]]] = {} for k, v in stats.items(): - group = _classify(k) - groups.setdefault(group, []).append((k, v)) - # Return in canonical order, skipping empty groups - return OrderedDict((g, groups[g]) for g in _GROUP_ORDER if g in groups) + group = _classify(k, groups) + buckets.setdefault(group, []).append((k, v)) + group_order = [name for name, _ in groups] + return OrderedDict((g, buckets[g]) for g in group_order if g in buckets) def _in_notebook() -> bool: @@ -95,43 +152,114 @@ class LiveStatsDisplay: In Jupyter notebooks, renders a grouped HTML table that updates in-place. Stats are automatically organized into logical groups (Throughput, TTFT, - TTLT, Tokens, Errors) based on their key names. + TTLT, Tokens, Errors) based on their display label names. In terminals, prints a compact grouped multi-line block using ANSI escape codes to overwrite previous output. + The display owns all alias mapping and formatting. Callers pass raw + numeric stats (e.g. from ``RunningStats.to_stats()``) and the display + selects, aliases, formats, and groups them for presentation. + Args: disabled (bool): If ``True``, all display calls are no-ops. + groups: Group definitions controlling how display labels are classified + and ordered. Defaults to :data:`DEFAULT_GROUPS`. + display_stats: Mapping of ``{display_label: stat_spec}`` controlling + which stats to show and how to label them. Each *stat_spec* is + either a plain canonical key string (e.g. ``"time_to_first_token-p50"``) + or a ``(key, "inv")`` tuple for reciprocal display. + Defaults to :data:`DEFAULT_DISPLAY_STATS`. Example:: display = LiveStatsDisplay() - display.update({"rpm": "185.9", "p50_ttft": "0.312s", "fail": "0"}) - display.update({"rpm": "190.2", "p50_ttft": "0.305s", "fail": "1"}) + raw = running_stats.to_stats() + display.update(raw) display.close() """ - def __init__(self, disabled: bool = False): + def __init__( + self, + disabled: bool = False, + groups: tuple[tuple[str, tuple[str, ...]], ...] = DEFAULT_GROUPS, + display_stats: dict[str, str | tuple[str, str]] | None = None, + ): self._disabled = disabled + self._groups = groups + self._display_stats = ( + display_stats if display_stats is not None else DEFAULT_DISPLAY_STATS + ) self._is_notebook = _in_notebook() self._handle = None self._last_line_count = 0 - def update(self, stats: dict[str, str], extra_prefix: str = "") -> None: - """Refresh the display with new stats. + def format_stats( + self, + raw: dict[str, object], + ) -> dict[str, str]: + """Select and format raw stats for display. + + Picks the stats listed in ``self._display_stats`` from *raw*, applies + alias renaming and formatting, and returns an ordered dict of + ``{display_label: formatted_value}`` strings. Args: - stats (dict[str, str]): Mapping of label to formatted value. + raw: Flat dictionary of raw numeric stats, as returned by + ``RunningStats.to_stats()``. + + Returns: + Ordered dict of ``{label: formatted_string}`` suitable for + rendering. + """ + if not raw: + return {label: "—" for label in self._display_stats} + + info: dict[str, str] = {} + for label, spec in self._display_stats.items(): + if isinstance(spec, tuple): + key, modifier = spec[0], spec[1] + invert = modifier == "inv" + else: + key = spec + invert = False + + val = raw.get(key) + if val is None: + info[label] = "—" + continue + + try: + info[label] = _format_stat(key, float(val), invert=invert) + except (TypeError, ValueError): + info[label] = str(val) + + return info + + def update( + self, + raw_stats: dict[str, object], + extra_prefix: str = "", + ) -> None: + """Refresh the display with new raw stats. + + Args: + raw_stats: Flat dictionary of raw numeric stats from + ``RunningStats.to_stats()``. extra_prefix (str): Optional prefix text shown before the table (e.g. ``"reqs=127"`` for time-bound runs). """ - if self._disabled or not stats: + if self._disabled: + return + + formatted = self.format_stats(raw_stats) + if not formatted: return if self._is_notebook: - self._update_notebook(stats, extra_prefix) + self._update_notebook(formatted, extra_prefix) else: - self._update_terminal(stats, extra_prefix) + self._update_terminal(formatted, extra_prefix) def _update_notebook(self, stats: dict[str, str], extra_prefix: str) -> None: """Render stats as a grouped HTML table in a Jupyter notebook. @@ -146,9 +274,7 @@ def _update_notebook(self, stats: dict[str, str], extra_prefix: str) -> None: """ from IPython.display import HTML, display - groups = _group_stats(stats) - - # Build one column per group: header on top, key=value rows below + groups = _group_stats(stats, self._groups) # All columns rendered side-by-side in a single table row max_rows = max(len(items) for items in groups.values()) @@ -210,7 +336,7 @@ def _update_terminal(self, stats: dict[str, str], extra_prefix: str) -> None: if self._last_line_count > 0: sys.stderr.write(f"\033[{self._last_line_count}A\033[J") - groups = _group_stats(stats) + groups = _group_stats(stats, self._groups) lines = [] if extra_prefix: lines.append(f" {extra_prefix}") diff --git a/llmeter/results.py b/llmeter/results.py index 6654d2e..1652d0f 100644 --- a/llmeter/results.py +++ b/llmeter/results.py @@ -43,9 +43,9 @@ class Result: """Results of a test run.""" responses: list[InvocationResponse] - total_requests: int - clients: int - n_requests: int + total_requests: int | None = None + clients: int = 1 + n_requests: int | None = None total_test_time: float | None = None model_id: str | None = None output_path: os.PathLike | None = None @@ -260,8 +260,19 @@ def load( else: result._preloaded_stats = None else: - # Compute stats from the loaded responses + # Compute stats from the loaded responses, but also merge any + # contributed stats that were persisted in stats.json so they + # survive a save/load round-trip. result._preloaded_stats = cls._compute_stats(result) + stats_path = result_path / "stats.json" + if stats_path.exists(): + with stats_path.open("r") as s: + saved_stats = json.loads(s.read()) + # Contributed stats are any keys in the saved file that are + # not produced by _compute_stats (i.e. they came from callbacks). + for key, value in saved_stats.items(): + if key not in result._preloaded_stats: + result._preloaded_stats[key] = value return result @@ -336,7 +347,9 @@ def stats(self) -> dict: stats = self._preloaded_stats.copy() else: # Fallback: compute from responses (e.g. Result constructed manually) - stats = self._compute_stats(self) + # Cache so subsequent accesses don't recompute. + self._preloaded_stats = self._compute_stats(self) + stats = self._preloaded_stats.copy() if self._contributed_stats: stats.update(self._contributed_stats) diff --git a/llmeter/runner.py b/llmeter/runner.py index 0605a36..7922868 100644 --- a/llmeter/runner.py +++ b/llmeter/runner.py @@ -12,7 +12,7 @@ from concurrent.futures import ThreadPoolExecutor from copy import deepcopy from dataclasses import InitVar, asdict, dataclass, fields, replace -from datetime import datetime +from datetime import datetime, timedelta from itertools import cycle from typing import TYPE_CHECKING, Any from uuid import uuid4 @@ -57,14 +57,14 @@ class _RunConfig: tokenizer: Tokenizer | Any | None = None clients: int = 1 n_requests: int | None = None - run_duration: int | float | None = None + run_duration: int | float | timedelta | None = None payload: dict | list[dict] | os.PathLike | str | None = None run_name: str | None = None run_description: str | None = None timeout: int | float = 60 callbacks: list[Callback] | None = None low_memory: bool = False - progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None + progress_bar_stats: dict[str, str | tuple[str, str]] | None = None disable_per_client_progress_bar: InitVar[bool] = True disable_clients_progress_bar: InitVar[bool] = True @@ -73,10 +73,12 @@ def __post_init__(self, disable_client_progress_bar, disable_clients_progress_ba self._disable_clients_progress_bar = disable_clients_progress_bar self._random_seed = 0 - if self.n_requests is not None: + if self.n_requests is not None and self.run_duration is None: assert self.n_requests > 0, "Number of requests must be a positive integer" if self.run_duration is not None: + if isinstance(self.run_duration, timedelta): + self.run_duration = self.run_duration.total_seconds() assert self.run_duration > 0, "Run duration must be a positive number" assert self.clients > 0, "Number of clients must be a positive integer" @@ -190,10 +192,10 @@ def _validate_and_prepare_payload(self): Normalizes the payload into a list of dicts, validates that ``n_requests`` and ``run_duration`` are not both set, and sets ``_time_bound`` and - ``_n_requests`` accordingly. + ``n_requests`` accordingly. - For count-bound runs, ``_n_requests`` defaults to the number of payloads - when not explicitly provided. For time-bound runs, ``_n_requests`` is set + For count-bound runs, ``n_requests`` defaults to the number of payloads + when not explicitly provided. For time-bound runs, ``n_requests`` is set to 0 since the actual count is unknown upfront. Raises: @@ -207,7 +209,11 @@ def _validate_and_prepare_payload(self): if isinstance(self.payload, dict): self.payload = [self.payload] - if self.run_duration is not None and self.n_requests is not None: + if ( + self.run_duration is not None + and self.n_requests is not None + and self.n_requests != 0 + ): raise ValueError( "Cannot set both n_requests and run_duration. " "Use n_requests for request-bound runs or run_duration for time-bound runs." @@ -215,10 +221,11 @@ def _validate_and_prepare_payload(self): self._time_bound = self.run_duration is not None if self._time_bound: - # For time-bound runs, _n_requests is unknown upfront - self._n_requests = 0 + # For time-bound runs, n_requests is unknown upfront; set to 0 + # and update to the actual count after the run completes. + self.n_requests = 0 else: - self._n_requests = self.n_requests or len(self.payload) + self.n_requests = self.n_requests or len(self.payload) @staticmethod async def _compute_time_per_output_token(response: InvocationResponse): @@ -307,12 +314,12 @@ async def _process_results_from_q(self, output_path: Path | None = None): self._progress_bar.update(1) if self._stats_display is not None: - snapshot = self._running_stats.snapshot(self.progress_bar_stats) - if snapshot: + raw = self._running_stats.to_stats() + if raw: prefix = ( f"reqs={self._running_stats._count}" if self._time_bound else "" ) - self._stats_display.update(snapshot, extra_prefix=prefix) + self._stats_display.update(raw, extra_prefix=prefix) if output_path: output_path.parent.mkdir(parents=True, exist_ok=True) @@ -325,18 +332,27 @@ def _invoke_n_no_wait( self, payload: list[dict], n: int | None = None, + duration: float | None = None, shuffle_order=True, ) -> list[InvocationResponse]: - """Generate *n* invocations synchronously for a single client. + """Generate invocations synchronously for a single client. - Cycles through *payload* until *n* invocations are generated, sending - each request to the endpoint and pushing the response onto - ``self._queue`` for async token-counting and stats collection. + Terminates when either *n* requests have been sent or *duration* seconds + have elapsed, whichever is specified. Exactly one of *n* or *duration* + must be provided. + + Cycles through *payload*, sending each request to the endpoint and + pushing the response onto ``self._queue`` for async token-counting and + stats collection. Args: payload (list[dict]): The input payloads to cycle through. n (int | None, optional): The number of invocations to generate. - If not specified, every element in the payload is used once. + If not specified, every element in the payload is used once + (only when *duration* is also ``None``). + duration (float | None, optional): Maximum wall-clock seconds to + keep sending requests. When set, requests are sent continuously + until the deadline. shuffle_order (bool, optional): Whether to shuffle the order of payloads before generating invocations. Defaults to True. @@ -351,71 +367,41 @@ def _invoke_n_no_wait( random.seed(0) payload = random.sample(payload, k=len(payload)) - responses = [] - if n is None: - n = len(payload) + responses: list[InvocationResponse] = [] if not payload: return responses - payload_iter = cycle(payload) - pbar = trange( - n, - leave=False, - desc="Requests", - disable=_disable_tqdm or self._disable_per_client_progress_bar, - ) - for _ in pbar: - p = next(payload_iter) - try: - p = asyncio.run(process_before_invoke_callbacks(self.callbacks, p)) - self._running_stats.record_send() - response = self._endpoint.invoke(p) - - except Exception as e: - logger.exception(f"Error with invocation with payload {p}: {e}") - response = InvocationResponse.error_output( - id=uuid4().hex, - error=str(e), - ) - responses.append(response) - if self._queue: - # fix for thread-aware sync, from https://stackoverflow.com/a/57316517/2109965 - self._queue._loop.call_soon_threadsafe( # type: ignore - self._queue.put_nowait, response - ) - return responses - def _invoke_for_duration( - self, - payload: list[dict], - duration: float, - shuffle_order=True, - ) -> list[InvocationResponse]: - """Generate invocations continuously until *duration* seconds have elapsed. - - Cycles through *payload* indefinitely, stopping only when the wall-clock - time exceeds *duration*. Each completed request is pushed onto - ``self._queue`` for async token-counting and stats collection, mirroring - the behaviour of :meth:`_invoke_n_no_wait`. + time_bound = duration is not None + if time_bound: + deadline = time.perf_counter() + duration + else: + if n is None: + n = len(payload) - Args: - payload (list[dict]): The input payloads to cycle through. - duration (float): Maximum wall-clock seconds to keep sending requests. - shuffle_order (bool, optional): Whether to shuffle the order of payloads - before generating invocations. Defaults to True. + payload_iter = cycle(payload) - Returns: - list[InvocationResponse]: All responses collected during the window. - """ - if shuffle_order: - self._random_seed += random.randint(1, 1000) - random.seed(0) - payload = random.sample(payload, k=len(payload)) + # Count-bound runs get a trange progress bar; time-bound runs use a + # separate _tick_time_bar task so we skip the per-client bar here. + pbar = ( + trange( + n, + leave=False, + desc="Requests", + disable=_disable_tqdm or self._disable_per_client_progress_bar, + ) + if not time_bound + else None + ) - responses: list[InvocationResponse] = [] - deadline = time.perf_counter() + duration - payload_iter = cycle(payload) + sent = 0 + while True: + if time_bound: + if time.perf_counter() >= deadline: + break + else: + if sent >= n: + break - while time.perf_counter() < deadline: p = next(payload_iter) try: p = asyncio.run(process_before_invoke_callbacks(self.callbacks, p)) @@ -429,27 +415,37 @@ def _invoke_for_duration( ) responses.append(response) if self._queue: + # fix for thread-aware sync, from https://stackoverflow.com/a/57316517/2109965 self._queue._loop.call_soon_threadsafe( # type: ignore self._queue.put_nowait, response ) + sent += 1 + if pbar is not None: + pbar.update(1) + + if pbar is not None: + pbar.close() return responses - async def _invoke_n( + async def _invoke_client( self, payload: list[dict], n: int | None = None, + duration: float | None = None, add_start_jitter=True, shuffle_order=True, ) -> list[InvocationResponse]: - """Asynchronously generate *n* invocations for a single client. + """Asynchronously generate invocations for a single client. - Wraps :meth:`_invoke_n_no_wait` in a thread with an overall timeout - of ``self.timeout * n`` seconds. + Wraps :meth:`_invoke_n_no_wait` in a thread. For count-bound runs an + overall timeout of ``self.timeout * n`` is applied; time-bound runs + have no extra timeout (the duration itself is the limit). Args: payload (list[dict]): The input payload(s) to generate invocations for. n (int | None, optional): The number of invocations to generate. Defaults to None (one per payload element). + duration (float | None, optional): Maximum wall-clock seconds. add_start_jitter (bool, optional): Whether to add a random delay before starting the invocations loop to avoid batch bunching when using multiple clients. Defaults to True. @@ -458,7 +454,7 @@ async def _invoke_n( Returns: list[InvocationResponse]: A list of response objects. Returns an empty - list if the overall timeout is exceeded. + list if the overall timeout is exceeded (count-bound only). """ if add_start_jitter: @@ -467,70 +463,43 @@ async def _invoke_n( if shuffle_order: self._random_seed = random.randint(0, 2**16 - 1) + coro = asyncio.to_thread( + self._invoke_n_no_wait, payload, n, duration, shuffle_order + ) + + if duration is not None: + # Time-bound: no extra timeout — the duration is the limit + return await coro + try: - response = await asyncio.wait_for( - asyncio.to_thread(self._invoke_n_no_wait, payload, n, shuffle_order), + return await asyncio.wait_for( + coro, timeout=self.timeout * (n or len(payload)), ) except asyncio.TimeoutError: logger.error("client timeout!") return [] - return response - - async def _invoke_duration( - self, - payload: list[dict], - add_start_jitter=True, - shuffle_order=True, - ) -> list[InvocationResponse]: - """Asynchronously generate invocations for a single client until duration expires. - - Wraps :meth:`_invoke_for_duration` in a thread. The client sends requests - continuously for ``self.run_duration`` seconds. - - Args: - payload (list[dict]): The input payload(s) to cycle through. - add_start_jitter (bool, optional): Whether to add a random delay before - starting the invocations loop to avoid batch bunching when using - multiple clients. Defaults to True. - shuffle_order (bool, optional): Whether to shuffle the order of payloads - before generating invocations. Defaults to True. - - Returns: - list[InvocationResponse]: All responses collected during the time window. - """ - - if add_start_jitter: - await asyncio.sleep(random.random() * 0.01) - - if shuffle_order: - self._random_seed = random.randint(0, 2**16 - 1) - - return await asyncio.to_thread( - self._invoke_for_duration, - payload, - self.run_duration, - shuffle_order, - ) - - async def _invoke_n_c( + async def _invoke_clients( self, payload: list[dict], n_requests: int | None = None, + duration: float | None = None, clients: int = 1, ) -> tuple[float, float, float]: - """Spawn *clients* concurrent count-bound invocation loops. + """Spawn *clients* concurrent invocation loops. - Each client generates *n_requests* invocations by delegating to - :meth:`_invoke_n`. All clients run concurrently and the method waits - for all of them to finish before signalling the token-counting queue - to stop. + Each client generates invocations by delegating to + :meth:`_invoke_client`. All clients run concurrently and the method + waits for all of them to finish before signalling the token-counting + queue to stop. Args: payload (list[dict]): The input payloads to send. n_requests (int | None, optional): The number of invocations to - generate per client. Defaults to None. + generate per client (count-bound). Defaults to None. + duration (float | None, optional): Maximum wall-clock seconds per + client (time-bound). Defaults to None. clients (int, optional): The number of concurrent client connections. Defaults to 1. @@ -538,63 +507,35 @@ async def _invoke_n_c( tuple[float, float, float]: A ``(total_test_time, start_t, end_t)`` tuple of ``time.perf_counter`` values. """ - logger.info( - f"Generating {clients} connections with {n_requests} invocations each" - ) + if duration is not None: + logger.info(f"Generating {clients} connections for {duration}s each") + else: + logger.info( + f"Generating {clients} connections with {n_requests} invocations each" + ) start_t = time.perf_counter() await tqdm.gather( - *[self._invoke_n(payload, n_requests) for _ in range(clients)], + *[ + self._invoke_client(payload, n=n_requests, duration=duration) + for _ in range(clients) + ], leave=False, desc="Clients", disable=_disable_tqdm or self._disable_clients_progress_bar, ) end_t = time.perf_counter() total_test_time = end_t - start_t - logger.info( - f"Completed {clients} clients x {n_requests} requests in " - f"{total_test_time * 1000:.2f}ms" - ) - if self._queue: - await self._queue.put(None) - logger.debug("Signaling token counting task to exit") - return total_test_time, start_t, end_t - - async def _invoke_duration_c( - self, - payload: list[dict], - clients: int = 1, - ) -> tuple[float, float, float]: - """Spawn *clients* concurrent time-bound invocation loops. - - Each client sends requests continuously for ``self.run_duration`` seconds - by delegating to :meth:`_invoke_duration`. All clients run concurrently - and the method waits for all of them to finish before signalling the - token-counting queue to stop. - - Args: - payload (list[dict]): The input payloads to cycle through. - clients (int, optional): The number of concurrent client connections. - Defaults to 1. - - Returns: - tuple[float, float, float]: A ``(total_test_time, start_t, end_t)`` - tuple of ``time.perf_counter`` values. - """ - logger.info(f"Generating {clients} connections for {self.run_duration}s each") - start_t = time.perf_counter() - await tqdm.gather( - *[self._invoke_duration(payload) for _ in range(clients)], - leave=False, - desc="Clients", - disable=_disable_tqdm or self._disable_clients_progress_bar, - ) - end_t = time.perf_counter() - total_test_time = end_t - start_t - logger.info( - f"Completed {clients} clients x {self.run_duration}s in " - f"{total_test_time * 1000:.2f}ms" - ) + if duration is not None: + logger.info( + f"Completed {clients} clients x {duration}s in " + f"{total_test_time * 1000:.2f}ms" + ) + else: + logger.info( + f"Completed {clients} clients x {n_requests} requests in " + f"{total_test_time * 1000:.2f}ms" + ) if self._queue: await self._queue.put(None) @@ -630,9 +571,9 @@ async def _run(self): result = Result( responses=[], total_test_time=None, - total_requests=0 if self._time_bound else self._n_requests * self.clients, + total_requests=0 if self._time_bound else self.n_requests * self.clients, clients=self.clients, - n_requests=self._n_requests, + n_requests=self.n_requests, output_path=self.output_path, # type: ignore model_id=self._endpoint.model_id, provider=self._endpoint.provider, @@ -667,25 +608,28 @@ async def _run(self): else: # Count-bound: progress bar shows completed requests self._progress_bar = tqdm( - total=self.clients * self._n_requests, + total=self.clients * self.n_requests, leave=False, desc="Total requests", disable=_disable_tqdm, ) # Live stats display — renders as an HTML table in notebooks, multi-line in terminals - self._stats_display = LiveStatsDisplay(disabled=_disable_tqdm) + self._stats_display = LiveStatsDisplay( + disabled=_disable_tqdm, + display_stats=self.progress_bar_stats, + ) # Show the table layout immediately with placeholder values - initial_snapshot = self._running_stats.snapshot(self.progress_bar_stats) prefix = "reqs=0" if self._time_bound else "" - self._stats_display.update(initial_snapshot, extra_prefix=prefix) + self._stats_display.update({}, extra_prefix=prefix) try: run_start_time = now_utc() if self._time_bound: - invoke_coro = self._invoke_duration_c( + invoke_coro = self._invoke_clients( payload=self.payload, # type: ignore + duration=self.run_duration, clients=self.clients, ) _, (total_test_time, start_time, end_time), _ = await asyncio.gather( @@ -698,9 +642,9 @@ async def _run(self): self._tick_time_bar(), ) else: - invoke_coro = self._invoke_n_c( + invoke_coro = self._invoke_clients( payload=self.payload, # type: ignore - n_requests=self._n_requests, + n_requests=self.n_requests, clients=self.clients, ) _, (total_test_time, start_time, end_time) = await asyncio.gather( @@ -733,7 +677,7 @@ async def _run(self): total_requests=actual_total, n_requests=actual_total // max(self.clients, 1) if self._time_bound - else self._n_requests, + else self.n_requests, start_time=run_start_time, end_time=run_end_time, ) @@ -809,9 +753,10 @@ class Runner(_RunConfig): n_requests (int | None): The number of LLM invocations to generate *per client*. By default, each request in `payload` will be sent once by each client. Mutually exclusive with ``run_duration``. - run_duration (int | float | None): Run each client for this many seconds instead of a + run_duration (int | float | timedelta | None): Run each client for this many seconds instead of a fixed request count. Clients send requests continuously until the duration expires. Mutually exclusive with ``n_requests``. Defaults to ``None`` (count-bound mode). + Accepts a number of seconds or a ``timedelta``. payload (dict | list[dict] | os.PathLike | str | None): The request data to send to the endpoint under test. You can provide a single JSON payload (dict), a list of payloads (list[dict]), or a path to one or more JSON/JSON-Lines files to be loaded by @@ -834,8 +779,8 @@ class Runner(_RunConfig): ``result.load_responses()`` to load responses from disk after the run. Defaults to ``False``. progress_bar_stats (dict | None): Controls which live stats appear on the progress bar. - Maps short display labels to field specs — see - :attr:`RunningStats.DEFAULT_SNAPSHOT_STATS` for the format and defaults. Pass ``{}`` + Maps short display labels to canonical stat keys — see + :data:`~llmeter.live_display.DEFAULT_DISPLAY_STATS` for the format and defaults. Pass ``{}`` to disable live stats entirely. Defaults to ``None`` (use built-in defaults). disable_per_client_progress_bar (bool): Set `True` to disable per-client progress bars from showing during the run. Default `False` (each client's progress will be shown). @@ -878,14 +823,14 @@ async def run( tokenizer: Tokenizer | Any | None = None, clients: int | None = None, n_requests: int | None = None, - run_duration: int | float | None = None, + run_duration: int | float | timedelta | None = None, payload: dict | list[dict] | os.PathLike | str | None = None, run_name: str | None = None, run_description: str | None = None, timeout: int | float | None = None, callbacks: list[Callback] | None = None, low_memory: bool | None = None, - progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None, + progress_bar_stats: dict[str, str | tuple[str, str]] | None = None, disable_per_client_progress_bar: bool | None = None, disable_clients_progress_bar: bool | None = None, ) -> Result: @@ -910,9 +855,10 @@ async def run( clients (int): The number of concurrent clients to use for sending requests. n_requests (int | None): The number of LLM invocations to generate *per client*. Mutually exclusive with ``run_duration``. - run_duration (int | float | None): Run each client for this many seconds + run_duration (int | float | timedelta | None): Run each client for this many seconds instead of a fixed request count. Clients send requests continuously until the duration expires. Mutually exclusive with ``n_requests``. + Accepts a number of seconds or a ``timedelta``. Example:: @@ -948,8 +894,8 @@ async def run( result.load_responses() # loads from disk progress_bar_stats (dict): Controls which live stats appear on the - progress bar. Maps short display labels to field specs — see - :attr:`RunningStats.DEFAULT_SNAPSHOT_STATS` for the format and + progress bar. Maps short display labels to canonical stat keys — see + :data:`~llmeter.live_display.DEFAULT_DISPLAY_STATS` for the format and defaults. Pass ``{}`` to disable live stats entirely. Example:: @@ -957,9 +903,9 @@ async def run( # Show only p99 latency and tokens per second: result = await runner.run( progress_bar_stats={ - "p99_ttlt": ("time_to_last_token", "p99"), - "tps": ("time_per_output_token", "p50", "inv"), - "fail": "failed", + "p99_ttlt": "time_to_last_token-p99", + "tps": ("time_per_output_token-p50", "inv"), + "fail": "failed_requests", }, ) disable_per_client_progress_bar (bool): Set `True` to disable per-client progress bars diff --git a/llmeter/utils.py b/llmeter/utils.py index c43d7d0..74198ac 100644 --- a/llmeter/utils.py +++ b/llmeter/utils.py @@ -106,31 +106,6 @@ class RunningStats: # {'failed_requests': 0, ..., 'time_to_first_token-p50': 0.4, ...} """ - #: Default stats shown on the progress bar during a run. - #: Each entry maps a short display label to a spec: - #: - #: * ``(metric_name, aggregation)`` — aggregation can be ``"p50"``, ``"p90"``, - #: ``"p99"``, ``"average"``, or ``"sum"``. - #: * ``(metric_name, aggregation, "inv")`` — same as above but displays the - #: reciprocal (e.g. seconds-per-token → tokens-per-second). - #: * The literal string ``"failed"`` for the running failure count. - #: * The literal string ``"rpm"`` for live requests-per-minute based on the - #: send window (first request sent to last request sent). - #: * The literal string ``"output_tps"`` for aggregate output tokens per second - #: across all clients, based on the send window. - DEFAULT_SNAPSHOT_STATS: dict[str, tuple[str, ...] | str] = { - "rpm": "rpm", - "output_tps": "output_tps", - "p50_ttft": ("time_to_first_token", "p50"), - "p90_ttft": ("time_to_first_token", "p90"), - "p50_ttlt": ("time_to_last_token", "p50"), - "p90_ttlt": ("time_to_last_token", "p90"), - "p50_tps": ("time_per_output_token", "p50", "inv"), - "input_tokens": ("num_tokens_input", "sum"), - "output_tokens": ("num_tokens_output", "sum"), - "fail": "failed", - } - def __init__(self, metrics: Sequence[str]): self._metrics = list(metrics) self._count = 0 @@ -263,114 +238,29 @@ def to_stats( for j, v in agg.items(): stats[f"{m}-{j}"] = v - return stats - - def snapshot( - self, - fields: dict[str, tuple[str, ...] | str] | None = None, - ) -> dict[str, str]: - """Format a subset of :meth:`to_stats` for progress-bar display. - - Calls :meth:`to_stats` internally and picks only the requested fields, - formatting each value as a human-readable string. - - Args: - fields: Mapping of ``{display_label: spec}``. Each *spec* is one of: - - * ``(metric, aggregation)`` — a 2-tuple where *metric* is a tracked - metric name and *aggregation* is ``"p50"``, ``"p90"``, ``"p99"``, - ``"average"``, or ``"sum"``. - * ``(metric, aggregation, "inv")`` — a 3-tuple; same as above but - the value is inverted before display (e.g. seconds-per-token → - tokens-per-second). - * ``"failed"`` — the literal string; shows the running failure count. - * ``"rpm"`` — the literal string; shows live requests-per-minute - estimate based on the send window (first to last request sent). - * ``"output_tps"`` — the literal string; shows aggregate output - tokens per second across all clients, based on the send window. - - Defaults to :attr:`DEFAULT_SNAPSHOT_STATS` when ``None``. + # Send-window throughput (live RPM and output tokens/s). + # These use the dispatch timestamps rather than response timestamps, + # giving a more accurate picture of the request rate. + send_window = self._send_window() + if send_window and send_window > 0: + stats["rpm"] = self._count / send_window * 60 + total_out = self._sums.get("num_tokens_output", 0) + stats["output_tps"] = total_out / send_window - Returns: - An ordered dict of ``{label: formatted_value}`` strings suitable for - ``tqdm.set_postfix()``. + return stats - Example:: + def _send_window(self) -> float | None: + """Return the elapsed seconds between first and last ``record_send`` call. - # Use defaults: - rs.snapshot() - # {'p50_ttft': '0.312s', 'p90_ttlt': '1.203s', ..., 'fail': '0'} - - # Custom selection — only p99 latency and failures: - rs.snapshot({ - "p99_ttlt": ("time_to_last_token", "p99"), - "fail": "failed", - }) - # {'p99_ttlt': '2.105s', 'fail': '1'} - - # Inverted metric — tokens per second from time_per_output_token: - rs.snapshot({ - "tps": ("time_per_output_token", "p50", "inv"), - }) - # {'tps': '28.3 tok/s'} + Returns ``None`` when fewer than two sends have been recorded. """ - if self._count == 0: - if fields is None: - fields = self.DEFAULT_SNAPSHOT_STATS - return {label: "—" for label in fields} - - if fields is None: - fields = self.DEFAULT_SNAPSHOT_STATS - - raw = self.to_stats() - - info: dict[str, str] = {} - for label, spec in fields.items(): - if spec == "failed": - info[label] = str(self._failed) - continue - - if spec == "rpm": - if ( - self._first_send_time is not None - and self._last_send_time is not None - and self._last_send_time > self._first_send_time - ): - send_window = self._last_send_time - self._first_send_time - info[label] = f"{self._count / send_window * 60:.1f}" - continue - - if spec == "output_tps": - if ( - self._first_send_time is not None - and self._last_send_time is not None - and self._last_send_time > self._first_send_time - ): - send_window = self._last_send_time - self._first_send_time - total_out = self._sums.get("num_tokens_output", 0) - info[label] = f"{total_out / send_window:.1f} tok/s" - continue - - metric = spec[0] - agg = spec[1] - invert = len(spec) > 2 and spec[2] == "inv" - - if agg == "sum": - info[label] = f"{self._sums.get(metric, 0):.0f}" - continue - - val = raw.get(f"{metric}-{agg}") - if val is None: - continue - - if invert and val > 0: - info[label] = f"{1.0 / val:.1f} tok/s" - elif "time" in metric: - info[label] = f"{val:.3f}s" - else: - info[label] = f"{val:.1f}" - - return info + if ( + self._first_send_time is not None + and self._last_send_time is not None + and self._last_send_time > self._first_send_time + ): + return self._last_send_time - self._first_send_time + return None def now_utc() -> datetime: diff --git a/tests/unit/test_experiments.py b/tests/unit/test_experiments.py index 42b0fb0..8996578 100644 --- a/tests/unit/test_experiments.py +++ b/tests/unit/test_experiments.py @@ -559,7 +559,7 @@ def test_load_test_with_low_memory(self, mock_endpoint): def test_load_test_with_progress_bar_stats(self, mock_endpoint): """progress_bar_stats should be stored on the LoadTest instance.""" - custom_stats = {"rpm": "rpm", "fail": "failed"} + custom_stats = {"rpm": "rpm", "fail": "failed_requests"} lt = LoadTest( endpoint=mock_endpoint, payload={"input": "test"}, diff --git a/tests/unit/test_live_display.py b/tests/unit/test_live_display.py index 7057eaf..6e0fb92 100644 --- a/tests/unit/test_live_display.py +++ b/tests/unit/test_live_display.py @@ -5,8 +5,10 @@ from unittest.mock import patch from llmeter.live_display import ( + DEFAULT_DISPLAY_STATS, LiveStatsDisplay, _classify, + _format_stat, _group_stats, _in_notebook, ) @@ -111,44 +113,113 @@ def test_returns_false_for_none(self): assert _in_notebook() is False +# ── _format_stat ───────────────────────────────────────────────────────────── + + +class TestFormatStat: + def test_time_metric(self): + assert _format_stat("time_to_first_token-p50", 0.312) == "0.312s" + + def test_rpm_metric(self): + assert _format_stat("rpm", 185.9) == "185.9" + + def test_tps_metric(self): + assert _format_stat("output_tps", 80.0) == "80.0 tok/s" + + def test_inverse(self): + result = _format_stat("time_per_output_token-p50", 0.04, invert=True) + assert "tok/s" in result + assert "25.0" in result + + def test_integer_value(self): + assert _format_stat("failed_requests", 3) == "3" + + def test_float_that_is_whole(self): + assert _format_stat("failed_requests", 0.0) == "0" + + # ── LiveStatsDisplay ───────────────────────────────────────────────────────── class TestLiveStatsDisplay: def test_disabled_does_nothing(self): display = LiveStatsDisplay(disabled=True) - # Should not raise - display.update({"rpm": "100"}) + display.update({"rpm": 100}) display.close() - def test_update_empty_stats_does_nothing(self): - display = LiveStatsDisplay(disabled=False) - display.update({}) - assert display._handle is None - assert display._last_line_count == 0 + def test_format_stats_with_empty_raw(self): + display = LiveStatsDisplay() + result = display.format_stats({}) + assert all(v == "—" for v in result.values()) + assert "rpm" in result + assert "fail" in result + + def test_format_stats_with_data(self): + display = LiveStatsDisplay( + display_stats={ + "rpm": "rpm", + "fail": "failed_requests", + "p50_ttft": "time_to_first_token-p50", + } + ) + raw = { + "rpm": 185.9, + "failed_requests": 0, + "time_to_first_token-p50": 0.312, + } + result = display.format_stats(raw) + assert result["rpm"] == "185.9" + assert result["fail"] == "0" + assert result["p50_ttft"] == "0.312s" + + def test_format_stats_inverse(self): + display = LiveStatsDisplay( + display_stats={"tps": ("time_per_output_token-p50", "inv")} + ) + raw = {"time_per_output_token-p50": 0.04} + result = display.format_stats(raw) + assert "tok/s" in result["tps"] - def test_terminal_output(self, capsys): - display = LiveStatsDisplay(disabled=False) + def test_format_stats_missing_key_shows_placeholder(self): + display = LiveStatsDisplay( + display_stats={"rpm": "rpm", "missing": "nonexistent_key"} + ) + result = display.format_stats({"rpm": 100.0}) + assert result["rpm"] == "100.0" + assert result["missing"] == "—" + + def test_custom_display_stats(self): + custom = {"latency": "time_to_last_token-p99", "errors": "failed_requests"} + display = LiveStatsDisplay(display_stats=custom) + assert display._display_stats == custom + + def test_default_display_stats_used(self): + display = LiveStatsDisplay() + assert display._display_stats is DEFAULT_DISPLAY_STATS + + def test_terminal_output(self): + display = LiveStatsDisplay( + disabled=False, + display_stats={"rpm": "rpm", "fail": "failed_requests"}, + ) display._is_notebook = False - display.update({"rpm": "100", "fail": "0"}) - # Should have written to stderr + display.update({"rpm": 100.0, "failed_requests": 0}) assert display._last_line_count > 0 display.close() assert display._last_line_count == 0 - def test_terminal_with_prefix(self, capsys): - display = LiveStatsDisplay(disabled=False) + def test_terminal_with_prefix(self): + display = LiveStatsDisplay(disabled=False, display_stats={"rpm": "rpm"}) display._is_notebook = False - display.update({"rpm": "100"}, extra_prefix="reqs=42") + display.update({"rpm": 100.0}, extra_prefix="reqs=42") assert display._last_line_count >= 2 # prefix line + stats line display.close() def test_terminal_overwrites_previous(self): - display = LiveStatsDisplay(disabled=False) + display = LiveStatsDisplay(disabled=False, display_stats={"rpm": "rpm"}) display._is_notebook = False - display.update({"rpm": "100"}) + display.update({"rpm": 100.0}) first_count = display._last_line_count - display.update({"rpm": "200"}) - # Should still be same number of lines (overwritten) + display.update({"rpm": 200.0}) assert display._last_line_count == first_count display.close() diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index 73a6e63..e949b04 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -308,3 +308,189 @@ def test_save_method_existing_responses(sample_result: Result, temp_dir: UPath): responses = [json.loads(line) for line in f] assert len(responses) == 6 # 5 original + 1 extra assert responses[-1]["id"] == "extra_response" + + +# ── Contributed stats round-trip ───────────────────────────────────────────── + + +class TestContributedStatsRoundTrip: + """Verify that callback-contributed stats survive save → load cycles.""" + + @pytest.fixture + def result_with_contributed_stats(self): + responses = [ + InvocationResponse( + id=f"r{i}", + response_text=f"resp {i}", + input_prompt=f"prompt {i}", + time_to_first_token=0.1 * i, + time_to_last_token=0.2 * i, + num_tokens_output=10 * i, + num_tokens_input=5 * i, + ) + for i in range(1, 4) + ] + result = Result( + responses=responses, + total_requests=3, + clients=1, + n_requests=3, + total_test_time=1.0, + ) + result._update_contributed_stats( + {"custom_metric_a": 42.0, "custom_metric_b": 99.5} + ) + return result + + def test_contributed_stats_appear_in_stats(self, result_with_contributed_stats): + stats = result_with_contributed_stats.stats + assert stats["custom_metric_a"] == 42.0 + assert stats["custom_metric_b"] == 99.5 + + def test_contributed_stats_written_to_stats_json( + self, result_with_contributed_stats, tmp_path + ): + output = UPath(tmp_path / "out") + result_with_contributed_stats.save(output) + + with (output / "stats.json").open() as f: + saved = json.load(f) + assert saved["custom_metric_a"] == 42.0 + assert saved["custom_metric_b"] == 99.5 + + def test_load_with_responses_preserves_contributed_stats( + self, result_with_contributed_stats, tmp_path + ): + output = UPath(tmp_path / "out") + result_with_contributed_stats.save(output) + + loaded = Result.load(output, load_responses=True) + + assert loaded.stats["custom_metric_a"] == 42.0 + assert loaded.stats["custom_metric_b"] == 99.5 + + def test_load_without_responses_preserves_contributed_stats( + self, result_with_contributed_stats, tmp_path + ): + output = UPath(tmp_path / "out") + result_with_contributed_stats.save(output) + + loaded = Result.load(output, load_responses=False) + + assert loaded.stats["custom_metric_a"] == 42.0 + assert loaded.stats["custom_metric_b"] == 99.5 + + def test_contributed_stats_do_not_clobber_builtin_stats( + self, result_with_contributed_stats, tmp_path + ): + output = UPath(tmp_path / "out") + result_with_contributed_stats.save(output) + + loaded = Result.load(output, load_responses=True) + + # Builtin stats must still be present and correct + assert "failed_requests" in loaded.stats + assert loaded.stats["total_requests"] == 3 + assert "time_to_first_token-p50" in loaded.stats + + def test_builtin_stats_not_overwritten_by_stale_saved_values(self, tmp_path): + """If a builtin key exists in stats.json with a stale value, the freshly + computed value from responses should win.""" + responses = [ + InvocationResponse( + id="x", + response_text="r", + input_prompt="p", + time_to_first_token=0.5, + time_to_last_token=1.0, + num_tokens_output=10, + num_tokens_input=5, + ) + ] + result = Result( + responses=responses, + total_requests=1, + clients=1, + n_requests=1, + total_test_time=2.0, + ) + output = UPath(tmp_path / "out") + result.save(output) + + # Tamper with stats.json: set a wrong value for a builtin key + stats_path = output / "stats.json" + with stats_path.open() as f: + saved = json.load(f) + saved["failed_requests"] = 999 + with stats_path.open("w") as f: + json.dump(saved, f) + + loaded = Result.load(output, load_responses=True) + + # The freshly computed value (0 failures) should win over the tampered 999 + assert loaded.stats["failed_requests"] == 0 + + def test_load_responses_recomputes_but_keeps_contributed(self, tmp_path): + """After load(load_responses=False) + load_responses(), contributed + stats from stats.json should still be accessible via _preloaded_stats + even though responses were reloaded.""" + responses = [ + InvocationResponse( + id="z", + response_text="r", + input_prompt="p", + time_to_first_token=0.3, + time_to_last_token=0.6, + num_tokens_output=8, + num_tokens_input=4, + ) + ] + result = Result( + responses=responses, + total_requests=1, + clients=1, + n_requests=1, + total_test_time=1.0, + ) + result._update_contributed_stats({"cb_stat": 7.0}) + output = UPath(tmp_path / "out") + result.save(output) + + loaded = Result.load(output, load_responses=False) + assert loaded.stats["cb_stat"] == 7.0 + + # Now reload responses — _preloaded_stats gets recomputed from + # responses only, so cb_stat won't be in _preloaded_stats anymore, + # but it was never in _contributed_stats on the loaded instance either. + loaded.load_responses() + # After recompute, builtin stats should be correct + assert loaded.stats["failed_requests"] == 0 + assert "time_to_first_token-p50" in loaded.stats + + def test_multiple_contributed_stats_updates_merge(self, tmp_path): + responses = [ + InvocationResponse( + id="m", + response_text="r", + input_prompt="p", + num_tokens_output=5, + num_tokens_input=3, + ) + ] + result = Result( + responses=responses, + total_requests=1, + clients=1, + n_requests=1, + total_test_time=0.5, + ) + result._update_contributed_stats({"stat_a": 1.0}) + result._update_contributed_stats({"stat_b": 2.0}) + result._update_contributed_stats({"stat_a": 10.0}) # overwrite + + output = UPath(tmp_path / "out") + result.save(output) + + loaded = Result.load(output, load_responses=True) + assert loaded.stats["stat_a"] == 10.0 + assert loaded.stats["stat_b"] == 2.0 diff --git a/tests/unit/test_runner.py b/tests/unit/test_runner.py index 8953e80..616988e 100644 --- a/tests/unit/test_runner.py +++ b/tests/unit/test_runner.py @@ -70,11 +70,11 @@ def run(mock_endpoint: MagicMock, mock_tokenizer: MagicMock): mock_run._queue = AsyncMock() mock_run._queue.task_done = MagicMock() - # Mock the _invoke_n_c method to return a simple result - async def mock_invoke_n_c(payload, n_requests, clients): + # Mock the _invoke_clients method to return a simple result + async def mock_invoke_clients(payload, n_requests=None, duration=None, clients=1): return 1.0, [], [] - mock_run._invoke_n_c = mock_invoke_n_c + mock_run._invoke_clients = mock_invoke_clients # Mock the _process_results_from_q method async def mock_process_results_from_q(output_path=None): @@ -126,7 +126,7 @@ async def test_invoke_n(run: _Run): ] ) - result = await run._invoke_n( + result = await run._invoke_client( payload=[{"prompt": "test1"}, {"prompt": "test2"}], n=2 ) @@ -180,7 +180,7 @@ async def test_invoke_n_no_wait(run: _Run): @pytest.mark.asyncio async def test_invoke_n_c(run: _Run): # Remove the fixture override and create a proper mock - async def mock_invoke_n_c(payload, n_requests, clients): + async def mock_invoke_clients(payload, n_requests=None, duration=None, clients=1): # Simulate the actual behavior responses = [ InvocationResponse(id="1", input_prompt="test1", response_text="response1"), @@ -189,9 +189,9 @@ async def mock_invoke_n_c(payload, n_requests, clients): return 1.5, responses, [] # total_time, responses, errors # Replace the fixture mock with our test-specific mock - run._invoke_n_c = mock_invoke_n_c + run._invoke_clients = mock_invoke_clients - total_test_time, responses, _ = await run._invoke_n_c( + total_test_time, responses, _ = await run._invoke_clients( payload=[{"prompt": "test"}], n_requests=2, clients=1 ) @@ -238,7 +238,7 @@ async def test_run_with_output_path(runner: Runner, tmp_path: Path): @pytest.mark.asyncio async def test_run_error_handling(run: _Run): - run._invoke_n_c = AsyncMock(side_effect=Exception("Test error")) + run._invoke_clients = AsyncMock(side_effect=Exception("Test error")) run._process_results_from_q = AsyncMock() with pytest.raises(Exception, match="Test error"): @@ -432,7 +432,7 @@ def test_run_output_path(runner: Runner, tmp_path: Path): @pytest.mark.asyncio async def test_invoke_n_edge_cases(run: _Run): # Test with empty payload - result = await run._invoke_n(payload=[], n=5) + result = await run._invoke_client(payload=[], n=5) assert not result # Test with n=None (should use all payloads) @@ -442,7 +442,7 @@ async def test_invoke_n_edge_cases(run: _Run): InvocationResponse(id="2", input_prompt="test2", response_text="response2"), ] ) - result = await run._invoke_n( + result = await run._invoke_client( payload=[{"prompt": "test1"}, {"prompt": "test2"}], n=None ) assert len(result) == 2 @@ -535,7 +535,9 @@ def test_prepare_run_combinations( ) assert isinstance(run.payload, list) - assert run.n_requests == n_requests + # When n_requests is None, it defaults to len(payload) + expected_n = n_requests if n_requests is not None else len(run.payload) + assert run.n_requests == expected_n assert run.clients == clients assert run.output_path == (Path(output_path) if output_path else None) assert run.run_name is not None @@ -566,7 +568,9 @@ async def test_run_with_different_payloads( @pytest.mark.asyncio async def test_invoke_n_c_concurrent_execution(run: _Run): - async def mock_invoke_n(payload, n, add_start_jitter=True, shuffle_order=True): + async def mock_invoke_client( + payload, n=None, duration=None, add_start_jitter=True, shuffle_order=True + ): await asyncio.sleep(0.1) # Simulate some processing time return [ InvocationResponse( @@ -575,10 +579,10 @@ async def mock_invoke_n(payload, n, add_start_jitter=True, shuffle_order=True): for i in range(n) ] - run._invoke_n = mock_invoke_n # type: ignore + run._invoke_client = mock_invoke_client # type: ignore start_time = time.perf_counter() - total_test_time, _, _ = await run._invoke_n_c( + total_test_time, _, _ = await run._invoke_clients( payload=[{"prompt": "test"}], n_requests=5, clients=3 ) end_time = time.perf_counter() @@ -757,7 +761,9 @@ def test_prepare_run_more_edge_cases( ) assert isinstance(run_config.payload, list) - assert run_config.n_requests == n_requests + # When n_requests is None, it defaults to len(payload) + expected_n = n_requests if n_requests is not None else len(run_config.payload) + assert run_config.n_requests == expected_n assert run_config.clients == clients if clients is not None else 1 assert run_config.output_path == (Path(output_path) if output_path else None) assert run_config.run_name is not None @@ -806,7 +812,9 @@ async def test_run_with_optional_parameters( async def test_invoke_n_c_with_different_clients( run: _Run, clients: Literal[1] | Literal[3] | Literal[5] | Literal[10] ): - async def mock_invoke_n(payload, n, add_start_jitter=True, shuffle_order=True): + async def mock_invoke_client( + payload, n=None, duration=None, add_start_jitter=True, shuffle_order=True + ): await asyncio.sleep(0.1) # Simulate some processing time return [ InvocationResponse( @@ -815,10 +823,10 @@ async def mock_invoke_n(payload, n, add_start_jitter=True, shuffle_order=True): for i in range(n) ] - run._invoke_n = mock_invoke_n # type: ignore + run._invoke_client = mock_invoke_client # type: ignore start_time = time.perf_counter() - total_test_time, _, _ = await run._invoke_n_c( + total_test_time, _, _ = await run._invoke_clients( payload=[{"prompt": "test"}], n_requests=5, clients=clients ) end_time = time.perf_counter() @@ -956,7 +964,7 @@ async def test_invoke_n_with_different_options( ] ) - result = await run._invoke_n( + result = await run._invoke_client( payload=[{"prompt": "test1"}, {"prompt": "test2"}], n=2, shuffle_order=shuffle_order, @@ -965,7 +973,7 @@ async def test_invoke_n_with_different_options( assert len(result) == 2 run._invoke_n_no_wait.assert_called_once_with( - [{"prompt": "test1"}, {"prompt": "test2"}], 2, shuffle_order + [{"prompt": "test1"}, {"prompt": "test2"}], 2, None, shuffle_order ) @@ -1021,7 +1029,7 @@ def test_run_duration_and_n_requests_mutually_exclusive( def test_run_duration_sets_time_bound_flag( mock_endpoint: Endpoint, mock_tokenizer: MagicMock ): - """When run_duration is set, _time_bound should be True and _n_requests 0.""" + """When run_duration is set, _time_bound should be True and n_requests 0.""" run = _Run( endpoint=mock_endpoint, tokenizer=mock_tokenizer, @@ -1031,7 +1039,7 @@ def test_run_duration_sets_time_bound_flag( run_name="test_run", ) assert run._time_bound is True - assert run._n_requests == 0 + assert run.n_requests == 0 def test_n_requests_sets_count_bound( @@ -1047,7 +1055,7 @@ def test_n_requests_sets_count_bound( run_name="test_run", ) assert run._time_bound is False - assert run._n_requests == 10 + assert run.n_requests == 10 def test_run_duration_must_be_positive( @@ -1068,7 +1076,7 @@ def test_run_duration_must_be_positive( def test_invoke_for_duration_respects_deadline( mock_endpoint: Endpoint, mock_tokenizer: MagicMock ): - """_invoke_for_duration should stop after the specified duration.""" + """_invoke_n_no_wait with duration should stop after the specified duration.""" run = _Run( endpoint=mock_endpoint, tokenizer=mock_tokenizer, @@ -1089,7 +1097,7 @@ def slow_invoke(payload): run._endpoint.invoke.side_effect = slow_invoke start = time.perf_counter() - responses = run._invoke_for_duration(payload=[{"prompt": "test"}], duration=0.5) + responses = run._invoke_n_no_wait(payload=[{"prompt": "test"}], duration=0.5) elapsed = time.perf_counter() - start assert len(responses) > 0 @@ -1100,7 +1108,7 @@ def slow_invoke(payload): def test_invoke_for_duration_cycles_payloads( mock_endpoint: Endpoint, mock_tokenizer: MagicMock ): - """_invoke_for_duration should cycle through payloads.""" + """_invoke_n_no_wait with duration should cycle through payloads.""" run = _Run( endpoint=mock_endpoint, tokenizer=mock_tokenizer, @@ -1121,7 +1129,7 @@ def tracking_invoke(payload): run._endpoint.invoke.side_effect = tracking_invoke - responses = run._invoke_for_duration( + responses = run._invoke_n_no_wait( payload=[{"prompt": "a"}, {"prompt": "b"}], duration=0.3, shuffle_order=False, @@ -1190,7 +1198,7 @@ def test_prepare_run_with_duration(runner: Runner): ) assert run._time_bound is True assert run.run_duration == 30 - assert run._n_requests == 0 + assert run.n_requests == 0 def test_prepare_run_duration_and_n_requests_conflict(runner: Runner): diff --git a/tests/unit/test_running_stats.py b/tests/unit/test_running_stats.py index 2f04d78..634483a 100644 --- a/tests/unit/test_running_stats.py +++ b/tests/unit/test_running_stats.py @@ -152,69 +152,47 @@ def test_empty_stats(self, rs): assert stats["total_input_tokens"] == 0 -# ── snapshot ───────────────────────────────────────────────────────────────── +# ── send-window throughput in to_stats ──────────────────────────────────────── -class TestSnapshot: - def test_placeholder_when_empty(self, rs): - result = rs.snapshot() - assert all(v == "—" for v in result.values()) - # Should have all default keys - assert "rpm" in result - assert "p50_ttft" in result - assert "fail" in result - assert "output_tps" in result - - def test_placeholder_with_custom_fields(self, rs): - fields = {"my_rpm": "rpm", "my_fail": "failed"} - result = rs.snapshot(fields) - assert result == {"my_rpm": "—", "my_fail": "—"} - - def test_failed_count(self, populated_rs): - result = populated_rs.snapshot({"fail": "failed"}) - assert result["fail"] == "1" - +class TestSendWindowStats: def test_rpm_uses_send_window(self, rs): rs._first_send_time = 100.0 rs._last_send_time = 110.0 # 10 second window rs.update({"error": None}) rs.update({"error": None}) rs.update({"error": None}) - result = rs.snapshot({"rpm": "rpm"}) + stats = rs.to_stats() # 3 responses / 10 seconds * 60 = 18.0 rpm - assert result["rpm"] == "18.0" - - def test_rpm_not_shown_with_single_send(self, rs): - """With only one send, first == last, no window to compute RPM.""" - rs._first_send_time = 100.0 - rs._last_send_time = 100.0 - rs.update({"error": None}) - result = rs.snapshot({"rpm": "rpm"}) - assert "rpm" not in result + assert stats["rpm"] == pytest.approx(18.0) def test_output_tps_uses_send_window(self, rs): rs._first_send_time = 100.0 rs._last_send_time = 110.0 # 10 second window rs.update({"num_tokens_output": 500, "error": None}) rs.update({"num_tokens_output": 300, "error": None}) - result = rs.snapshot({"tps": "output_tps"}) + stats = rs.to_stats() # 800 tokens / 10 seconds = 80.0 tok/s - assert result["tps"] == "80.0 tok/s" - - def test_sum_aggregation(self, populated_rs): - result = populated_rs.snapshot({"out": ("num_tokens_output", "sum")}) - assert result["out"] == "83" - - def test_percentile_aggregation(self, populated_rs): - result = populated_rs.snapshot({"p50": ("time_to_first_token", "p50")}) - assert "p50" in result - assert result["p50"].endswith("s") + assert stats["output_tps"] == pytest.approx(80.0) - def test_inverse_aggregation(self, populated_rs): - result = populated_rs.snapshot({"tps": ("time_per_output_token", "p50", "inv")}) - assert "tps" in result - assert "tok/s" in result["tps"] + def test_no_send_window_when_single_send(self, rs): + """With only one send, first == last, no window to compute RPM.""" + rs._first_send_time = 100.0 + rs._last_send_time = 100.0 + rs.update({"error": None}) + stats = rs.to_stats() + assert "rpm" not in stats + assert "output_tps" not in stats - def test_empty_fields_returns_empty(self, populated_rs): - result = populated_rs.snapshot({}) - assert result == {} + def test_no_send_window_when_no_sends(self, rs): + stats = rs.to_stats() + assert "rpm" not in stats + assert "output_tps" not in stats + + def test_send_window_helper(self, rs): + assert rs._send_window() is None + rs._first_send_time = 10.0 + rs._last_send_time = 10.0 + assert rs._send_window() is None + rs._last_send_time = 20.0 + assert rs._send_window() == pytest.approx(10.0)