From 68f084208bc280af1f7d5219b77326f09d07c315 Mon Sep 17 00:00:00 2001
From: Alessandro Cere <alecere@amazon.com>
Date: Wed, 1 Apr 2026 11:38:44 -0700
Subject: [PATCH 1/3] feat: add low-memory mode, RunningStats, and live
 progress-bar stats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add `low_memory` parameter to Runner/run() that writes responses to
  disk without keeping them in memory, for large-scale test runs.
- Introduce `RunningStats` class that accumulates metrics incrementally
  (counts, sums, sorted values for percentile computation).
- Replace `_builtin_stats` cached_property on Result with `_preloaded_stats`
  populated by RunningStats during the run or from stats.json on load.
- Add `snapshot()` method on RunningStats for live progress-bar display
  of p50/p90 TTFT, p50/p90 TTLT, median tokens/s, total tokens, and
  failure count — configurable via `progress_bar_stats` parameter.
- Add `_compute_stats()` classmethod on Result as fallback for manually
  constructed Result objects and post-load_responses() recomputation.
- Update tests for the new stats flow.
---
 llmeter/results.py           | 115 +++++++++--------
 llmeter/runner.py            | 106 ++++++++++++++--
 llmeter/utils.py             | 236 +++++++++++++++++++++++++++++++++++
 tests/unit/test_lazy_load.py |  20 +--
 tests/unit/test_results.py   |   4 +-
 5 files changed, 405 insertions(+), 76 deletions(-)

diff --git a/llmeter/results.py b/llmeter/results.py
index 45a1c00..6654d2e 100644
--- a/llmeter/results.py
+++ b/llmeter/results.py
@@ -6,7 +6,6 @@
 import os
 from dataclasses import asdict, dataclass
 from datetime import datetime, timezone
-from functools import cached_property
 from numbers import Number
 from typing import Any, Sequence
 
@@ -169,8 +168,8 @@ def load_responses(self) -> list[InvocationResponse]:
                 InvocationResponse(**json.loads(line)) for line in f if line
             ]
         logger.info("Loaded %d responses from %s", len(self.responses), responses_path)
-        # Invalidate cached stats so they are recomputed with the loaded responses
-        self.__dict__.pop("_builtin_stats", None)
+        # Recompute stats from the freshly loaded responses
+        self._preloaded_stats = self._compute_stats(self)
         return self.responses
 
     @classmethod
@@ -241,9 +240,9 @@ def load(
 
         result = cls(responses=responses, **summary)
 
-        # When skipping responses, load pre-computed stats from stats.json if available
-        # so that result.stats works without needing the responses
+        # Load or compute stats
         if not load_responses:
+            # Use pre-computed stats from disk when responses aren't loaded
             stats_path = result_path / "stats.json"
             if stats_path.exists():
                 with stats_path.open("r") as s:
@@ -260,78 +259,84 @@ def load(
                                 pass
             else:
                 result._preloaded_stats = None
+        else:
+            # Compute stats from the loaded responses
+            result._preloaded_stats = cls._compute_stats(result)
 
         return result
 
-    @cached_property
-    def _builtin_stats(self) -> dict:
-        """
-        Default run metrics and aggregated statistics provided by LLMeter core
+    @classmethod
+    def _compute_stats(cls, result: "Result") -> dict:
+        """Compute stats from in-memory responses.
 
-        Users should generally refer to the `.stats` property instead, which combines this data
-        with any additional values contributed by callbacks or other extensions.
+        This is the fallback used when ``_preloaded_stats`` is not available — for
+        example when a ``Result`` is constructed manually or after
+        :meth:`load_responses` reloads data from disk.
 
-        This is a read-only and `@cached_property`, which means the result is computed once and
-        then cached for subsequent accesses - improving performance.
+        Args:
+            result: A ``Result`` instance whose ``responses`` list is populated.
 
         Returns:
-            stats: A dictionary containing all computed statistics. The keys are:
-                - All key-value pairs from the Result's dictionary representation
-                - Test-specific statistics
-                - Aggregated statistics with keys in the format "{stat_name}-{aggregation_type}"
-                  where stat_name is one of the four metrics listed above, and
-                  aggregation_type includes measures like mean, median, etc.
-        """
+            A flat dictionary matching the ``Result.stats`` schema, containing
+            run-level metrics (``failed_requests``, ``requests_per_minute``, …)
+            and per-metric aggregations (``time_to_first_token-p50``, …).
+
+        Example::
 
+            result = Result(responses=my_responses, total_requests=100, ...)
+            stats = Result._compute_stats(result)
+            stats["time_to_first_token-p90"]  # 0.485
+        """
         aggregation_metrics = [
             "time_to_last_token",
             "time_to_first_token",
             "num_tokens_output",
             "num_tokens_input",
         ]
-
-        results_stats = _get_stats_from_results(
-            self,
-            aggregation_metrics,
-        )
+        results_stats = _get_stats_from_results(result, aggregation_metrics)
         return {
-            **self.to_dict(),
-            **_get_run_stats(self),
+            **result.to_dict(),
+            **_get_run_stats(result),
             **{f"{k}-{j}": v for k, o in results_stats.items() for j, v in o.items()},
         }
 
     @property
     def stats(self) -> dict:
+        """Run metrics and aggregated statistics over the individual requests.
+
+        Returns a flat dictionary combining:
+
+        * Basic run information (from ``to_dict()``).
+        * Aggregated statistics (``average``, ``p50``, ``p90``, ``p99``) for
+          ``time_to_last_token``, ``time_to_first_token``, ``num_tokens_output``,
+          and ``num_tokens_input``.  Keys use the format
+          ``"{metric}-{aggregation}"``.
+        * Run-level throughput metrics (``requests_per_minute``,
+          ``total_input_tokens``, etc.).
+        * Any additional stats contributed by callbacks via
+          :meth:`_update_contributed_stats`.
+
+        During a live run, stats are computed incrementally by
+        :class:`~llmeter.utils.RunningStats` and stored in ``_preloaded_stats``.
+        When loading from disk with ``load_responses=False``, pre-computed stats
+        from ``stats.json`` are used.  As a fallback (e.g. manually constructed
+        ``Result``), stats are computed on the fly from ``self.responses``.
+
+        Returns:
+            A new shallow copy of the stats dictionary on each access.
+
+        Example::
+
+            result = await runner.run(payload=my_payload, clients=5)
+            result.stats["time_to_first_token-p50"]   # 0.312
+            result.stats["requests_per_minute"]        # 141.2
+            result.stats["failed_requests"]            # 0
         """
-        Run metrics and aggregated statistics over the individual requests
-
-        This combined view includes:
-        - Basic information about the run (from the Result's dictionary representation)
-        - Aggregated statistics ('average', 'p50', 'p90', 'p99') for:
-            - Time to last token
-            - Time to first token
-            - Number of tokens output
-            - Number of tokens input
-
-        Aggregated statistics are keyed in the format "{stat_name}-{aggregation_type}"
-
-        This property is read-only and returns a new shallow copy of the data on each access.
-        Default stats provided by LLMeter are calculated on first access and then cached. Callbacks
-        Callbacks or other mechanisms needing to augment stats should use the
-        `_update_contributed_stats()` method.
-
-        When the Result was loaded with ``load_responses=False``, pre-computed stats from
-        ``stats.json`` are returned if available. Call ``load_responses()`` to load the
-        individual responses and recompute stats from the raw data.
-        """
-        # Use preloaded stats when responses were not loaded
-        if not self.responses and self._preloaded_stats is not None:
+        if self._preloaded_stats is not None:
             stats = self._preloaded_stats.copy()
-            if self._contributed_stats:
-                stats.update(self._contributed_stats)
-            return stats
-
-        stats = self._builtin_stats.copy()
+        else:
+            # Fallback: compute from responses (e.g. Result constructed manually)
+            stats = self._compute_stats(self)
 
         if self._contributed_stats:
             stats.update(self._contributed_stats)
diff --git a/llmeter/runner.py b/llmeter/runner.py
index 47626e3..0604a32 100644
--- a/llmeter/runner.py
+++ b/llmeter/runner.py
@@ -20,7 +20,7 @@
 from tqdm.auto import tqdm, trange
 from upath import UPath as Path
 
-from llmeter.utils import now_utc
+from llmeter.utils import RunningStats, now_utc
 
 if TYPE_CHECKING:
     # Avoid circular import: We only need typing for Callback
@@ -61,6 +61,8 @@ class _RunConfig:
     run_description: str | None = None
     timeout: int | float = 60
     callbacks: list[Callback] | None = None
+    low_memory: bool = False
+    progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None
     disable_per_client_progress_bar: InitVar[bool] = True
     disable_clients_progress_bar: InitVar[bool] = True
 
@@ -149,19 +151,35 @@ class _Run(_RunConfig):
     """
 
     def __post_init__(self, disable_client_progress_bar, disable_clients_progress_bar):
-        assert (
-            self.run_name is not None
-        ), "Test Run must be created with an explicit run_name"
+        assert self.run_name is not None, (
+            "Test Run must be created with an explicit run_name"
+        )
 
         super().__post_init__(disable_client_progress_bar, disable_clients_progress_bar)
 
-        assert (
-            self.endpoint is not None
-        ), "Test Run must be created with an explicit Endpoint"
+        assert self.endpoint is not None, (
+            "Test Run must be created with an explicit Endpoint"
+        )
 
         self._validate_and_prepare_payload()
         self._responses = []
 
+        if self.low_memory:
+            assert self.output_path is not None, (
+                "output_path is required when low_memory=True "
+                "(responses must be written to disk)"
+            )
+
+        self._running_stats = RunningStats(
+            metrics=[
+                "time_to_last_token",
+                "time_to_first_token",
+                "time_per_output_token",
+                "num_tokens_output",
+                "num_tokens_input",
+            ]
+        )
+
     def _validate_and_prepare_payload(self):
         """Validate and prepare the payload for the test run and update n_requests
 
@@ -251,9 +269,18 @@ async def _process_results_from_q(self, output_path: Path | None = None):
             if self.callbacks is not None:
                 [await cb.after_invoke(response) for cb in self.callbacks]
 
-            self._responses.append(response)
+            if self.low_memory and self._running_stats is not None:
+                self._running_stats.update(response.to_dict())
+            else:
+                self._responses.append(response)
+                self._running_stats.update(response.to_dict())
+
             if self._progress_bar:
                 self._progress_bar.update(1)
+                self._progress_bar.set_postfix(
+                    self._running_stats.snapshot(self.progress_bar_stats),
+                    refresh=False,
+                )
 
             if output_path:
                 output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -403,7 +430,7 @@ async def _invoke_n_c(
         end_t = time.perf_counter()
         total_test_time = end_t - start_t
         logger.info(
-            f"Generated {clients} connections with {n_requests} invocations each in {total_test_time*1000:.2f} seconds"
+            f"Generated {clients} connections with {n_requests} invocations each in {total_test_time * 1000:.2f} seconds"
         )
 
         # Signal the token counting task to exit
@@ -474,7 +501,7 @@ async def _run(self):
             return result
 
         self._progress_bar.close()
-        logger.info(f"Test completed in {total_test_time*1000:.2f} seconds.")
+        logger.info(f"Test completed in {total_test_time * 1000:.2f} seconds.")
 
         result = replace(
             result,
@@ -484,6 +511,22 @@ async def _run(self):
             end_time=run_end_time,
         )
 
+        # Compute stats from the running accumulators
+        result._preloaded_stats = self._running_stats.to_stats(
+            total_requests=result.total_requests,
+            total_test_time=total_test_time,
+            result_dict=result.to_dict(),
+        )
+        result._preloaded_stats["start_time"] = run_start_time
+        result._preloaded_stats["end_time"] = run_end_time
+        result._preloaded_stats["total_test_time"] = total_test_time
+
+        if self.low_memory:
+            logger.info(
+                "Low-memory mode: responses not stored in memory. "
+                "Use result.load_responses() to load from disk."
+            )
+
         if self.callbacks is not None:
             [await cb.after_run(result) for cb in self.callbacks]
 
@@ -554,6 +597,15 @@ class Runner(_RunConfig):
             endpoint. Defaults to 60 seconds.
         callbacks (list[Callback] | None): Optional callbacks to enable during the test Run. See
             `llmeter.callbacks` for more information.
+        low_memory (bool): When ``True``, responses are written to disk but not kept in memory
+            during the run.  Stats are computed incrementally via
+            :class:`~llmeter.utils.RunningStats`.  Requires ``output_path`` to be set.  Use
+            ``result.load_responses()`` to load responses from disk after the run.  Defaults to
+            ``False``.
+        progress_bar_stats (dict | None): Controls which live stats appear on the progress bar.
+            Maps short display labels to field specs — see
+            :attr:`RunningStats.DEFAULT_SNAPSHOT_STATS` for the format and defaults.  Pass ``{}``
+            to disable live stats entirely.  Defaults to ``None`` (use built-in defaults).
         disable_per_client_progress_bar (bool): Set `True` to disable per-client progress bars
             from showing during the run. Default `False` (each client's progress will be shown).
         disable_clients_progress_bar (bool): Set `True` to disable overall progress bar from
@@ -600,6 +652,8 @@ async def run(
         run_description: str | None = None,
         timeout: int | float | None = None,
         callbacks: list[Callback] | None = None,
+        low_memory: bool | None = None,
+        progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None,
         disable_per_client_progress_bar: bool | None = None,
         disable_clients_progress_bar: bool | None = None,
     ) -> Result:
@@ -635,6 +689,36 @@ async def run(
                 endpoint.
             callbacks (list[Callback] | None): Optional callbacks to enable during the test Run. See
                 `llmeter.callbacks` for more information.
+            low_memory (bool): When ``True``, responses are written to disk but not
+                kept in memory.  Stats are computed incrementally via
+                :class:`~llmeter.utils.RunningStats`.  Requires ``output_path``.
+                Use ``result.load_responses()`` to access responses after the run.
+
+                Example::
+
+                    result = await runner.run(
+                        output_path="/tmp/my_run",
+                        low_memory=True,
+                    )
+                    result.stats          # works (computed incrementally)
+                    result.responses      # [] (empty)
+                    result.load_responses()  # loads from disk
+
+            progress_bar_stats (dict): Controls which live stats appear on the
+                progress bar.  Maps short display labels to field specs — see
+                :attr:`RunningStats.DEFAULT_SNAPSHOT_STATS` for the format and
+                defaults.  Pass ``{}`` to disable live stats entirely.
+
+                Example::
+
+                    # Show only p99 latency and tokens per second:
+                    result = await runner.run(
+                        progress_bar_stats={
+                            "p99_ttlt": ("time_to_last_token", "p99"),
+                            "tps": ("time_per_output_token", "p50", "inv"),
+                            "fail": "failed",
+                        },
+                    )
             disable_per_client_progress_bar (bool): Set `True` to disable per-client progress bars
                 from showing during the run.
             disable_clients_progress_bar (bool): Set `True` to disable overall progress bar from
@@ -667,6 +751,8 @@ async def run(
             run_description=run_description,
             timeout=timeout,
             callbacks=callbacks,
+            low_memory=low_memory,
+            progress_bar_stats=progress_bar_stats,
             disable_per_client_progress_bar=disable_per_client_progress_bar,
             disable_clients_progress_bar=disable_clients_progress_bar,
         )
diff --git a/llmeter/utils.py b/llmeter/utils.py
index d072e58..fd30d0f 100644
--- a/llmeter/utils.py
+++ b/llmeter/utils.py
@@ -1,5 +1,6 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
+import bisect
 from datetime import datetime, timezone
 from itertools import filterfalse
 from math import isnan
@@ -83,6 +84,241 @@ def summary_stats_from_list(
         return {}
 
 
+class RunningStats:
+    """Accumulate summary statistics incrementally from individual responses.
+
+    Maintains sorted value lists per metric so that percentiles (p50, p90, p99),
+    averages, and sums can be computed at any point — both mid-run (for live
+    progress-bar display via :meth:`snapshot`) and at the end of a run (for the
+    final :class:`~llmeter.results.Result` stats via :meth:`to_stats`).
+
+    Args:
+        metrics: Names of numeric response fields to track (e.g.
+            ``"time_to_first_token"``, ``"num_tokens_output"``).
+
+    Example::
+
+        rs = RunningStats(metrics=["time_to_first_token", "time_to_last_token"])
+        rs.update({"time_to_first_token": 0.3, "time_to_last_token": 0.8})
+        rs.update({"time_to_first_token": 0.5, "time_to_last_token": 1.2, "error": None})
+        rs.to_stats()
+        # {'failed_requests': 0, ..., 'time_to_first_token-p50': 0.4, ...}
+    """
+
+    #: Default stats shown on the progress bar during a run.
+    #: Each entry maps a short display label to a spec:
+    #:
+    #: * ``(metric_name, aggregation)`` — aggregation can be ``"p50"``, ``"p90"``,
+    #:   ``"p99"``, ``"average"``, or ``"sum"``.
+    #: * ``(metric_name, aggregation, "inv")`` — same as above but displays the
+    #:   reciprocal (e.g. seconds-per-token → tokens-per-second).
+    #: * The literal string ``"failed"`` for the running failure count.
+    DEFAULT_SNAPSHOT_STATS: dict[str, tuple[str, ...] | str] = {
+        "p50_ttft": ("time_to_first_token", "p50"),
+        "p90_ttft": ("time_to_first_token", "p90"),
+        "p50_ttlt": ("time_to_last_token", "p50"),
+        "p90_ttlt": ("time_to_last_token", "p90"),
+        "p50_tps": ("time_per_output_token", "p50", "inv"),
+        "input_tokens": ("num_tokens_input", "sum"),
+        "output_tokens": ("num_tokens_output", "sum"),
+        "fail": "failed",
+    }
+
+    def __init__(self, metrics: Sequence[str]):
+        self._metrics = list(metrics)
+        self._count = 0
+        self._failed = 0
+        self._sums: dict[str, float] = {m: 0.0 for m in metrics}
+        self._values: dict[str, list[float]] = {m: [] for m in metrics}
+
+    def update(self, response_dict: dict[str, Any]) -> None:
+        """Record one response's metric values.
+
+        Call this once per :class:`~llmeter.endpoints.base.InvocationResponse`
+        (typically via ``response.to_dict()``).  The method extracts each tracked
+        metric from *response_dict*, skipping ``None`` and ``NaN`` values, and
+        increments the failure counter when an ``"error"`` key is present.
+
+        Args:
+            response_dict: A flat dictionary of response fields, as returned by
+                ``InvocationResponse.to_dict()``.
+
+        Example::
+
+            rs = RunningStats(metrics=["time_to_first_token"])
+            rs.update({"time_to_first_token": 0.42, "error": None})
+            rs.update({"time_to_first_token": None, "error": "timeout"})
+            assert rs._failed == 1
+        """
+        self._count += 1
+        if response_dict.get("error") is not None:
+            self._failed += 1
+        for m in self._metrics:
+            val = response_dict.get(m)
+            if val is not None and not (isinstance(val, float) and isnan(val)):
+                self._sums[m] += val
+                bisect.insort(self._values[m], val)
+
+    def to_stats(
+        self,
+        total_requests: int | None = None,
+        total_test_time: float | None = None,
+        result_dict: dict[str, Any] | None = None,
+    ) -> dict[str, Any]:
+        """Compute all accumulated statistics as raw numeric values.
+
+        This is the single source of truth for stats computation.  It is called
+        once at the end of a run (with all three optional arguments) to produce
+        the full ``Result.stats`` dict, and also called internally by
+        :meth:`snapshot` (without arguments) for mid-run progress display.
+
+        Args:
+            total_requests: Total number of requests across all clients.  When
+                provided, enables ``failed_requests_rate`` and
+                ``requests_per_minute`` computation.
+            total_test_time: Wall-clock duration of the run in seconds.  When
+                provided, enables throughput metrics (requests/min, tokens/min).
+            result_dict: Base key-value pairs to include in the output (typically
+                from ``Result.to_dict()``).  When ``None``, only metric
+                aggregations and failure counts are returned.
+
+        Returns:
+            A flat dictionary of statistics.  Keys include:
+
+            * ``failed_requests``, ``failed_requests_rate``, ``requests_per_minute``
+            * ``total_input_tokens``, ``total_output_tokens``
+            * ``average_input_tokens_per_minute``, ``average_output_tokens_per_minute``
+            * ``{metric}-{agg}`` for each tracked metric and each aggregation
+              (``average``, ``p50``, ``p90``, ``p99``).
+
+        Example::
+
+            rs = RunningStats(metrics=["time_to_first_token", "num_tokens_output"])
+            for resp in responses:
+                rs.update(resp.to_dict())
+
+            # Mid-run (no run-level context):
+            partial = rs.to_stats()
+            partial["time_to_first_token-p50"]  # 0.312
+
+            # End of run (full Result.stats schema):
+            full = rs.to_stats(
+                total_requests=100,
+                total_test_time=42.5,
+                result_dict=result.to_dict(),
+            )
+            full["requests_per_minute"]  # 141.2
+        """
+        stats: dict[str, Any] = {}
+        if result_dict is not None:
+            stats.update(result_dict)
+
+        # Run-level stats
+        stats["failed_requests"] = self._failed
+        stats["failed_requests_rate"] = total_requests and self._failed / total_requests
+        stats["requests_per_minute"] = (
+            total_test_time and total_requests / total_test_time * 60
+            if total_requests
+            else None
+        )
+        stats["total_input_tokens"] = self._sums.get("num_tokens_input", 0)
+        stats["total_output_tokens"] = self._sums.get("num_tokens_output", 0)
+        stats["average_input_tokens_per_minute"] = (
+            total_test_time and stats["total_input_tokens"] / total_test_time * 60
+        )
+        stats["average_output_tokens_per_minute"] = (
+            total_test_time and stats["total_output_tokens"] / total_test_time * 60
+        )
+
+        # Per-metric aggregations
+        for m in self._metrics:
+            agg = summary_stats_from_list(self._values.get(m, []))
+            for j, v in agg.items():
+                stats[f"{m}-{j}"] = v
+
+        return stats
+
+    def snapshot(
+        self,
+        fields: dict[str, tuple[str, ...] | str] | None = None,
+    ) -> dict[str, str]:
+        """Format a subset of :meth:`to_stats` for progress-bar display.
+
+        Calls :meth:`to_stats` internally and picks only the requested fields,
+        formatting each value as a human-readable string.
+
+        Args:
+            fields: Mapping of ``{display_label: spec}``.  Each *spec* is one of:
+
+                * ``(metric, aggregation)`` — a 2-tuple where *metric* is a tracked
+                  metric name and *aggregation* is ``"p50"``, ``"p90"``, ``"p99"``,
+                  ``"average"``, or ``"sum"``.
+                * ``(metric, aggregation, "inv")`` — a 3-tuple; same as above but
+                  the value is inverted before display (e.g. seconds-per-token →
+                  tokens-per-second).
+                * ``"failed"`` — the literal string; shows the running failure count.
+
+                Defaults to :attr:`DEFAULT_SNAPSHOT_STATS` when ``None``.
+
+        Returns:
+            An ordered dict of ``{label: formatted_value}`` strings suitable for
+            ``tqdm.set_postfix()``.
+
+        Example::
+
+            # Use defaults:
+            rs.snapshot()
+            # {'p50_ttft': '0.312s', 'p90_ttlt': '1.203s', ..., 'fail': '0'}
+
+            # Custom selection — only p99 latency and failures:
+            rs.snapshot({
+                "p99_ttlt": ("time_to_last_token", "p99"),
+                "fail": "failed",
+            })
+            # {'p99_ttlt': '2.105s', 'fail': '1'}
+
+            # Inverted metric — tokens per second from time_per_output_token:
+            rs.snapshot({
+                "tps": ("time_per_output_token", "p50", "inv"),
+            })
+            # {'tps': '28.3 tok/s'}
+        """
+        if self._count == 0:
+            return {}
+
+        if fields is None:
+            fields = self.DEFAULT_SNAPSHOT_STATS
+
+        raw = self.to_stats()
+
+        info: dict[str, str] = {}
+        for label, spec in fields.items():
+            if spec == "failed":
+                info[label] = str(self._failed)
+                continue
+
+            metric = spec[0]
+            agg = spec[1]
+            invert = len(spec) > 2 and spec[2] == "inv"
+
+            if agg == "sum":
+                info[label] = f"{self._sums.get(metric, 0):.0f}"
+                continue
+
+            val = raw.get(f"{metric}-{agg}")
+            if val is None:
+                continue
+
+            if invert and val > 0:
+                info[label] = f"{1.0 / val:.1f} tok/s"
+            elif "time" in metric:
+                info[label] = f"{val:.3f}s"
+            else:
+                info[label] = f"{val:.1f}"
+
+        return info
+
+
 def now_utc() -> datetime:
     """Returns the current UTC datetime.
 
diff --git a/tests/unit/test_lazy_load.py b/tests/unit/test_lazy_load.py
index ed02b86..14436ce 100644
--- a/tests/unit/test_lazy_load.py
+++ b/tests/unit/test_lazy_load.py
@@ -1,12 +1,12 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
 
+from unittest.mock import MagicMock, patch
+
 import pytest
 from upath import UPath
 
 from llmeter.endpoints.base import InvocationResponse
-from unittest.mock import MagicMock, patch
-
 from llmeter.experiments import LoadTestResult
 from llmeter.results import Result
 
@@ -129,14 +129,14 @@ def test_load_responses_returns_correct_data(self, sample_responses, saved_resul
             assert orig.time_to_first_token == loaded_resp.time_to_first_token
             assert orig.time_to_last_token == loaded_resp.time_to_last_token
 
-    def test_load_responses_invalidates_cached_stats(self, saved_result):
+    def test_load_responses_recomputes_stats(self, saved_result):
         loaded = Result.load(saved_result, load_responses=True)
-        # Access _builtin_stats to cache it
-        _ = loaded._builtin_stats
-        assert "_builtin_stats" in loaded.__dict__
+        original_stats = loaded._preloaded_stats.copy()
 
         loaded.load_responses()
-        assert "_builtin_stats" not in loaded.__dict__
+        # Stats should be recomputed (same values, but a fresh dict)
+        assert loaded._preloaded_stats is not original_stats
+        assert loaded._preloaded_stats == original_stats
 
     def test_load_responses_stats_match_full_load(self, saved_result):
         full = Result.load(saved_result, load_responses=True)
@@ -153,9 +153,9 @@ def test_load_responses_stats_match_full_load(self, saved_result):
             "failed_requests",
             "requests_per_minute",
         ]:
-            assert lazy_stats[key] == pytest.approx(
-                full_stats[key]
-            ), f"Mismatch on {key}"
+            assert lazy_stats[key] == pytest.approx(full_stats[key]), (
+                f"Mismatch on {key}"
+            )
 
     def test_load_responses_no_output_path_raises(self):
         result = Result(
diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py
index 9262d16..73a6e63 100644
--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@@ -192,7 +192,9 @@ def test_stats_property(sample_result: Result):
         assert key in stats
 
     # Test caching returns same object for built-in stats:
-    assert sample_result._builtin_stats is sample_result._builtin_stats
+    assert sample_result._preloaded_stats is None or isinstance(
+        sample_result._preloaded_stats, dict
+    )
 
 
 def test_stats_property_empty_result():

From 2091da436dcae13d632906c1aa297f41f1bbb9ac Mon Sep 17 00:00:00 2001
From: Alessandro Cere <alecere@amazon.com>
Date: Thu, 2 Apr 2026 10:19:01 -0700
Subject: [PATCH 2/3] feat: time-bound runs, live stats display, and
 send-window metrics

Add run_duration parameter for time-bound test runs:
- New run_duration on Runner/run() and LoadTest: clients send requests
  continuously for a fixed duration instead of a fixed count.
- Dedicated _invoke_for_duration / _invoke_duration_c methods (separate
  from count-bound _invoke_n / _invoke_n_c).
- Time-based progress bar via _tick_time_bar async task.
- Mutual exclusivity validation between n_requests and run_duration.

Add LiveStatsDisplay for readable live metrics:
- New llmeter/live_display.py: HTML table in Jupyter (grouped columns
  for Throughput, TTFT, TTLT, Tokens, Errors), ANSI multi-line in
  terminals. Updates in-place, shows placeholders before first response.
- Replaces single-line tqdm postfix with a separate stats row.

Improve throughput metric accuracy:
- RunningStats.record_send() tracks send-side timestamps.
- RPM and output_tps use send window (first-to-last request sent)
  instead of response-side elapsed time, preventing taper-off as
  clients finish.
- output_tps (aggregate tokens/s) added to default snapshot stats.

Fix StopIteration silently terminating invocation loops:
- Both _invoke_n_no_wait and _invoke_for_duration now use while/next()
  instead of for-in-cycle() to prevent StopIteration from streaming
  endpoints from killing the loop.

Add LoadTest support for new features:
- run_duration, low_memory, progress_bar_stats forwarded to each run.

Add example notebook and documentation:
- examples/Time-bound runs with Bedrock OpenAI API.ipynb: end-to-end
  demo using bedrock-mantle endpoint with LoadTest, custom stats,
  low-memory mode, and comparison charts (RPM, TPS, TTFT, TTLT).
- docs/user_guide/run_experiments.md: new sections for time-bound runs,
  live progress-bar stats, and low-memory mode.

Add tests (51 new, 504 total):
- test_running_stats.py: record_send, update, to_stats, snapshot
  (placeholders, rpm, output_tps, send window, aggregations).
- test_live_display.py: _classify, _group_stats, _in_notebook,
  LiveStatsDisplay (disabled, terminal, overwrite, prefix).
- test_experiments.py: LoadTest with run_duration/low_memory/
  progress_bar_stats field storage and runner forwarding.
- test_runner.py: time-bound validation, _invoke_for_duration,
  full run with duration, output path, multiple clients.
---
 docs/user_guide/run_experiments.md            |   63 +
 ...e-bound runs with Bedrock OpenAI API.ipynb | 9672 +++++++++++++++++
 llmeter/experiments.py                        |  139 +-
 llmeter/live_display.py                       |  238 +
 llmeter/runner.py                             |  372 +-
 llmeter/utils.py                              |   56 +-
 tests/unit/test_experiments.py                |  119 +
 tests/unit/test_live_display.py               |  154 +
 tests/unit/test_runner.py                     |  205 +
 tests/unit/test_running_stats.py              |  220 +
 10 files changed, 11157 insertions(+), 81 deletions(-)
 create mode 100644 examples/Time-bound runs with Bedrock OpenAI API.ipynb
 create mode 100644 llmeter/live_display.py
 create mode 100644 tests/unit/test_live_display.py
 create mode 100644 tests/unit/test_running_stats.py

diff --git a/docs/user_guide/run_experiments.md b/docs/user_guide/run_experiments.md
index 7f843de..87d6819 100644
--- a/docs/user_guide/run_experiments.md
+++ b/docs/user_guide/run_experiments.md
@@ -34,6 +34,69 @@ run_2_results = await endpoint_test.run(payload=sample_payload, n_requests=10, c
 assert run_1_results.output_path != run_2_results.output_path
 ```
 
+### Time-bound runs
+
+By default, a Run sends a fixed number of requests per client (`n_requests`). Alternatively, you can use `run_duration` to run each client for a fixed number of **seconds** instead — useful when you want to measure sustained throughput over a time window rather than a fixed batch size.
+
+```python
+# Run for 60 seconds with 10 concurrent clients:
+results = await endpoint_test.run(
+    payload=sample_payload,
+    run_duration=60,
+    clients=10,
+)
+
+results.total_requests  # actual number of requests completed
+results.stats["requests_per_minute"]  # observed throughput
+```
+
+`n_requests` and `run_duration` are mutually exclusive — set one or the other, not both.
+
+During a time-bound run, the progress bar shows two lines: a time bar that fills as seconds elapse, and a request counter with live statistics (requests per minute, latency percentiles, tokens per second, etc.).
+
+### Live progress-bar statistics
+
+Both count-bound and time-bound runs display live statistics on the progress bar as requests complete. By default these include p50/p90 TTFT and TTLT, median output tokens per second, total input/output tokens, requests per minute, and failure count.
+
+You can customize which stats are shown via the `progress_bar_stats` parameter:
+
+```python
+# Show only p99 latency, tokens/s, and rpm:
+results = await endpoint_test.run(
+    payload=sample_payload,
+    n_requests=100,
+    clients=5,
+    progress_bar_stats={
+        "rpm": "rpm",
+        "p99_ttlt": ("time_to_last_token", "p99"),
+        "tps": ("time_per_output_token", "p50", "inv"),
+        "fail": "failed",
+    },
+)
+```
+
+Pass `progress_bar_stats={}` to disable live stats entirely. See [`RunningStats.DEFAULT_SNAPSHOT_STATS`](../reference/utils.md#llmeter.utils.RunningStats) for the full default configuration.
+
+### Low-memory mode
+
+For large-scale runs where keeping all responses in memory is impractical, set `low_memory=True`. Responses are written to disk as they arrive but not accumulated in memory. Statistics are computed incrementally and available immediately via `result.stats`.
+
+```python
+results = await endpoint_test.run(
+    payload=sample_payload,
+    run_duration=300,
+    clients=50,
+    output_path="outputs/large_run",
+    low_memory=True,
+)
+
+results.stats          # works — computed incrementally during the run
+results.responses      # [] — not in memory
+results.load_responses()  # loads from disk on demand
+```
+
+`low_memory=True` requires `output_path` to be set.
+
 ## Analyzing Run results
 
 The [Result](../reference/results.md#llmeter.results.Result) of a Run provides basic metadata, a wide range of pre-computed `.stats`, and also access to the individual `.responses` ([InvocationResponse](../reference/endpoints/base/#llmeter.endpoints.base.InvocationResponse) objects).
diff --git a/examples/Time-bound runs with Bedrock OpenAI API.ipynb b/examples/Time-bound runs with Bedrock OpenAI API.ipynb
new file mode 100644
index 0000000..f982f7e
--- /dev/null
+++ b/examples/Time-bound runs with Bedrock OpenAI API.ipynb	
@@ -0,0 +1,9672 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Time-bound Runs with Bedrock OpenAI-compatible API\n",
+    "\n",
+    "This notebook demonstrates how to use LLMeter's **time-bound run** feature to measure\n",
+    "sustained throughput and latency over a fixed time window, using Amazon Bedrock's\n",
+    "[OpenAI-compatible Chat Completion API](https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-openai-chat.html).\n",
+    "\n",
+    "Instead of specifying a fixed number of requests, you set a `run_duration` in seconds\n",
+    "and LLMeter sends requests continuously until the time expires — giving you a realistic\n",
+    "picture of steady-state performance.\n",
+    "\n",
+    "We also cover:\n",
+    "- **Live progress-bar statistics** (rpm, latency percentiles, tokens/s)\n",
+    "- **Low-memory mode** for large-scale runs\n",
+    "- **Custom progress-bar stats** configuration"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "Install LLMeter with plotting extras, the OpenAI SDK, and the Bedrock token generator."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install \"llmeter[plotting]<1\" openai aws-bedrock-token-generator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "from llmeter.endpoints.openai import OpenAICompletionStreamEndpoint\n",
+    "from llmeter.runner import Runner"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Configure the Bedrock OpenAI-compatible endpoint\n",
+    "\n",
+    "Amazon Bedrock exposes an [OpenAI-compatible Chat Completions API](https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-openai-chat.html)\n",
+    "accessible via the `bedrock-mantle` endpoint. Authentication uses a temporary token\n",
+    "generated from your AWS credentials via `aws-bedrock-token-generator`.\n",
+    "\n",
+    "We use `OpenAICompletionStreamEndpoint` from LLMeter, which works with any\n",
+    "OpenAI Chat Completions-compatible API — including Bedrock's mantle endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Region: us-west-2\n",
+      "Model: openai.gpt-oss-120b\n",
+      "Endpoint: https://bedrock-mantle.us-west-2.api.aws/v1\n"
+     ]
+    }
+   ],
+   "source": [
+    "from aws_bedrock_token_generator import provide_token\n",
+    "\n",
+    "AWS_REGION = os.environ.get(\"AWS_REGION\", \"us-east-1\")\n",
+    "\n",
+    "# OpenAI GPT-OSS models available on the mantle endpoint:\n",
+    "#   openai.gpt-oss-120b  — larger model for complex tasks (120B parameters)\n",
+    "#   openai.gpt-oss-20b   — smaller, faster model (20B parameters)\n",
+    "# Use the Models API to discover all available models in your region.\n",
+    "MODEL_ID = \"openai.gpt-oss-120b\"  # Choose a model available via the Chat Completions API\n",
+    "BASE_URL = f\"https://bedrock-mantle.{AWS_REGION}.api.aws/v1\"\n",
+    "\n",
+    "# Generate temporary token for Bedrock authentication\n",
+    "token = provide_token(region=AWS_REGION)\n",
+    "\n",
+    "bedrock_endpoint = OpenAICompletionStreamEndpoint(\n",
+    "    model_id=MODEL_ID,\n",
+    "    endpoint_name=\"bedrock-mantle\",\n",
+    "    provider=\"bedrock\",\n",
+    "    base_url=BASE_URL,\n",
+    "    api_key=token,\n",
+    ")\n",
+    "\n",
+    "print(f\"Region: {AWS_REGION}\")\n",
+    "print(f\"Model: {MODEL_ID}\")\n",
+    "print(f\"Endpoint: {BASE_URL}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Verify the endpoint\n",
+    "\n",
+    "Send a single request to confirm the endpoint is working and LLMeter captures\n",
+    "the expected metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "    \"response_text\": \"Latency is the time it takes for a single piece of data to travel from source to destination, i.e., the delay before a response begins. Throughput measures how much data can be transferred successfully over a network or system in a given period of time, typically expressed in bits per second or requests per second. In short, latency is about *speed of response* for one item, while throughput is about *volume of work* that can be handled overall.\",\n",
+      "    \"input_payload\": {\n",
+      "        \"messages\": [\n",
+      "            {\n",
+      "                \"role\": \"user\",\n",
+      "                \"content\": \"Explain the difference between latency and throughput in 3 sentences.\"\n",
+      "            }\n",
+      "        ],\n",
+      "        \"max_tokens\": 256,\n",
+      "        \"model\": \"openai.gpt-oss-120b\",\n",
+      "        \"stream\": true,\n",
+      "        \"stream_options\": {\n",
+      "            \"include_usage\": true\n",
+      "        }\n",
+      "    },\n",
+      "    \"id\": \"chatcmpl-e33a8b37-ad37-4946-9f05-8ce91013245f\",\n",
+      "    \"input_prompt\": \"Explain the difference between latency and throughput in 3 sentences.\",\n",
+      "    \"time_to_first_token\": 1.5777457500007586,\n",
+      "    \"time_to_last_token\": 1.680538542001159,\n",
+      "    \"num_tokens_input\": 77,\n",
+      "    \"num_tokens_output\": 138,\n",
+      "    \"time_per_output_token\": null,\n",
+      "    \"error\": null,\n",
+      "    \"retries\": null\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "sample_payload = OpenAICompletionStreamEndpoint.create_payload(\n",
+    "    \"Explain the difference between latency and throughput in 3 sentences.\",\n",
+    "    max_tokens=256,\n",
+    ")\n",
+    "\n",
+    "response = bedrock_endpoint.invoke(sample_payload)\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You should see `time_to_first_token`, `time_to_last_token`, and token counts in the\n",
+    "response. If you see an error, check your AWS credentials and model access."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Count-bound run (baseline)\n",
+    "\n",
+    "First, let's run a traditional count-bound test for comparison."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "85613de232074b3192ce45fcf85775b4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Total requests:   0%|          | 0/15 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table style='border-collapse:collapse;margin:4px 0'><tr><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Throughput</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTFT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTLT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Tokens</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Errors</th></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>rpm</span>&nbsp;&nbsp;<span style='font-family:monospace'>99.6</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>1.118s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>2.205s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>input_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>1155</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>fail</span>&nbsp;&nbsp;<span style='font-family:monospace'>0</span></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>215.5 tok/s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>3.083s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>5.363s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>1947</span></td><td></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>182.0 tok/s</span></td><td></td><td></td><td></td><td></td></tr></table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total requests: 15\n",
+      "Duration: 14.3s\n",
+      "RPM: 63.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "runner = Runner(\n",
+    "    bedrock_endpoint,\n",
+    "    output_path=f\"outputs/{MODEL_ID}\",\n",
+    ")\n",
+    "\n",
+    "count_result = await runner.run(\n",
+    "    payload=sample_payload,\n",
+    "    n_requests=5,\n",
+    "    clients=3,\n",
+    "    run_name=\"count-bound-baseline\",\n",
+    ")\n",
+    "\n",
+    "print(f\"Total requests: {count_result.total_requests}\")\n",
+    "print(f\"Duration: {count_result.total_test_time:.1f}s\")\n",
+    "print(f\"RPM: {count_result.stats['requests_per_minute']:.1f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Time-bound run\n",
+    "\n",
+    "Now let's run the same endpoint for a fixed duration instead. This is useful when you\n",
+    "want to measure **sustained throughput** — how many requests the endpoint can handle\n",
+    "over a realistic time window.\n",
+    "\n",
+    "Set `run_duration` (in seconds) instead of `n_requests`. Each client sends requests\n",
+    "continuously until the duration expires."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "57cf16076c0d4cffac21c4b69f35d2b3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Elapsed:           | 0/30s [00:00]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<span style='font-size:12px;font-family:monospace;color:#555'>reqs=29</span><br><table style='border-collapse:collapse;margin:4px 0'><tr><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Throughput</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTFT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTLT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Tokens</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Errors</th></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>rpm</span>&nbsp;&nbsp;<span style='font-family:monospace'>58.0</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>1.180s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>2.955s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>input_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>2233</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>fail</span>&nbsp;&nbsp;<span style='font-family:monospace'>0</span></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>125.0 tok/s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>3.765s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>6.345s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>3750</span></td><td></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>86.3 tok/s</span></td><td></td><td></td><td></td><td></td></tr></table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total requests completed: 29\n",
+      "Actual duration: 32.4s\n",
+      "RPM: 53.7\n",
+      "p50 TTFT: 1.180s\n",
+      "p90 TTLT: 6.345s\n"
+     ]
+    }
+   ],
+   "source": [
+    "duration_result = await runner.run(\n",
+    "    payload=sample_payload,\n",
+    "    run_duration=30,  # Run for 30 seconds\n",
+    "    clients=3,\n",
+    "    run_name=\"time-bound-30s\",\n",
+    ")\n",
+    "\n",
+    "print(f\"Total requests completed: {duration_result.total_requests}\")\n",
+    "print(f\"Actual duration: {duration_result.total_test_time:.1f}s\")\n",
+    "print(f\"RPM: {duration_result.stats['requests_per_minute']:.1f}\")\n",
+    "print(f\"p50 TTFT: {duration_result.stats['time_to_first_token-p50']:.3f}s\")\n",
+    "print(f\"p90 TTLT: {duration_result.stats['time_to_last_token-p90']:.3f}s\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Notice that during the run, you see two progress bars:\n",
+    "- **Elapsed**: a time bar filling up as seconds pass\n",
+    "- **Requests**: a counter with live stats (rpm, latency percentiles, tokens/s, etc.)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load test: scaling with more clients\n",
+    "\n",
+    "Time-bound runs are great for exploring how throughput scales with concurrency.\n",
+    "LLMeter's `LoadTest` experiment automates this — it runs each concurrency level\n",
+    "for the same duration and collects results for comparison.\n",
+    "\n",
+    "The `run_duration` parameter makes each concurrency level run for a fixed time\n",
+    "window, giving a fair comparison of sustained throughput."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "96c029b9166648bf89091ad3a2a08bd0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Configurations:   0%|          | 0/4 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7060990ec16749bd93f16fe4a9514899",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Elapsed:           | 0/30s [00:00]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<span style='font-size:12px;font-family:monospace;color:#555'>reqs=13</span><br><table style='border-collapse:collapse;margin:4px 0'><tr><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Throughput</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTFT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTLT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Tokens</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Errors</th></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>rpm</span>&nbsp;&nbsp;<span style='font-family:monospace'>26.8</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>1.071s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>1.927s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>input_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>1001</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>fail</span>&nbsp;&nbsp;<span style='font-family:monospace'>0</span></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>59.8 tok/s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>3.160s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>6.137s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>1741</span></td><td></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>184.6 tok/s</span></td><td></td><td></td><td></td><td></td></tr></table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d5d22e732c6c4da7a191860e1b7ab87b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Elapsed:           | 0/30s [00:00]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<span style='font-size:12px;font-family:monospace;color:#555'>reqs=37</span><br><table style='border-collapse:collapse;margin:4px 0'><tr><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Throughput</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTFT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTLT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Tokens</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Errors</th></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>rpm</span>&nbsp;&nbsp;<span style='font-family:monospace'>75.0</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>0.886s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>2.035s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>input_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>2849</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>fail</span>&nbsp;&nbsp;<span style='font-family:monospace'>0</span></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>171.7 tok/s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>2.327s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>4.703s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>5079</span></td><td></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>113.6 tok/s</span></td><td></td><td></td><td></td><td></td></tr></table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "be031a6b2d5e46dca8b0238bc0b1dd9e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Elapsed:           | 0/30s [00:00]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<span style='font-size:12px;font-family:monospace;color:#555'>reqs=63</span><br><table style='border-collapse:collapse;margin:4px 0'><tr><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Throughput</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTFT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTLT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Tokens</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Errors</th></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>rpm</span>&nbsp;&nbsp;<span style='font-family:monospace'>126.6</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>1.181s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>2.388s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>input_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>4851</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>fail</span>&nbsp;&nbsp;<span style='font-family:monospace'>0</span></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>281.6 tok/s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>2.326s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>4.191s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>8411</span></td><td></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>114.9 tok/s</span></td><td></td><td></td><td></td><td></td></tr></table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bf8f929d26134894ad73b5197385d783",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Elapsed:           | 0/30s [00:00]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<span style='font-size:12px;font-family:monospace;color:#555'>reqs=106</span><br><table style='border-collapse:collapse;margin:4px 0'><tr><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Throughput</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTFT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTLT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Tokens</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Errors</th></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>rpm</span>&nbsp;&nbsp;<span style='font-family:monospace'>214.4</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>1.417s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>3.034s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>input_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>8162</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>fail</span>&nbsp;&nbsp;<span style='font-family:monospace'>0</span></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>481.3 tok/s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>3.600s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>4.954s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>14279</span></td><td></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>103.0 tok/s</span></td><td></td><td></td><td></td><td></td></tr></table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from llmeter.experiments import LoadTest\n",
+    "\n",
+    "load_test = LoadTest(\n",
+    "    endpoint=bedrock_endpoint,\n",
+    "    payload=sample_payload,\n",
+    "    sequence_of_clients=[1, 3, 5, 10],\n",
+    "    run_duration=30,  # Each concurrency level runs for 30 seconds\n",
+    "    output_path=f\"outputs/{MODEL_ID}\",\n",
+    "    test_name=\"time-bound-load-test\",\n",
+    ")\n",
+    "\n",
+    "load_test_result = await load_test.run()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  1 clients: 13 requests, 23 rpm, p50 TTFT=1.0713617910005269s\n",
+      "  3 clients: 37 requests, 68 rpm, p50 TTFT=0.8862732499983395s\n",
+      "  5 clients: 63 requests, 118 rpm, p50 TTFT=1.181208833004348s\n",
+      "  10 clients: 106 requests, 196 rpm, p50 TTFT=1.4166151039971737s\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Print summary for each concurrency level\n",
+    "for n_clients, result in sorted(load_test_result.results.items()):\n",
+    "    print(\n",
+    "        f\"  {n_clients} clients: \"\n",
+    "        f\"{result.total_requests} requests, \"\n",
+    "        f\"{result.stats['requests_per_minute']:.0f} rpm, \"\n",
+    "        f\"p50 TTFT={result.stats.get('time_to_first_token-p50', 'N/A')}s\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Custom progress-bar statistics\n",
+    "\n",
+    "You can control which live stats appear on the progress bar via `progress_bar_stats`.\n",
+    "Each entry maps a display label to a field spec."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bbfb9c26d3ae441890e7973e44cf0a40",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Elapsed:           | 0/15s [00:00]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<span style='font-size:12px;font-family:monospace;color:#555'>reqs=17</span><br><table style='border-collapse:collapse;margin:4px 0'><tr><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Throughput</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTLT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Errors</th></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>rpm</span>&nbsp;&nbsp;<span style='font-family:monospace'>71.4</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p99_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>5.521s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>fail</span>&nbsp;&nbsp;<span style='font-family:monospace'>0</span></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>75.3 tok/s</span></td><td></td><td></td></tr></table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "result = await runner.run(\n",
+    "    payload=sample_payload,\n",
+    "    run_duration=15,\n",
+    "    clients=3,\n",
+    "    run_name=\"custom-stats\",\n",
+    "    progress_bar_stats={\n",
+    "        \"rpm\": \"rpm\",\n",
+    "        \"p99_ttlt\": (\"time_to_last_token\", \"p99\"),\n",
+    "        \"tps\": (\"time_per_output_token\", \"p50\", \"inv\"),\n",
+    "        \"fail\": \"failed\",\n",
+    "    },\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Low-memory mode for large-scale runs\n",
+    "\n",
+    "For long-running tests that generate thousands of responses, use `low_memory=True`\n",
+    "to avoid keeping all responses in memory. Responses are streamed to disk and stats\n",
+    "are computed incrementally.\n",
+    "\n",
+    "This requires `output_path` to be set."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "03f410a858cc4d4ea2cb6513668b4bb4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Elapsed:           | 0/60s [00:00]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<span style='font-size:12px;font-family:monospace;color:#555'>reqs=220</span><br><table style='border-collapse:collapse;margin:4px 0'><tr><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Throughput</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTFT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>TTLT</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Tokens</th><th style='padding:2px 10px;font-size:11px;color:#888;border-bottom:1px solid #ddd;text-align:left'>Errors</th></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>rpm</span>&nbsp;&nbsp;<span style='font-family:monospace'>220.1</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>0.880s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>2.555s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>input_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>16940</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>fail</span>&nbsp;&nbsp;<span style='font-family:monospace'>0</span></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>485.8 tok/s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttft</span>&nbsp;&nbsp;<span style='font-family:monospace'>3.128s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p90_ttlt</span>&nbsp;&nbsp;<span style='font-family:monospace'>4.802s</span></td><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>output_tokens</span>&nbsp;&nbsp;<span style='font-family:monospace'>29135</span></td><td></td></tr><tr><td style='padding:1px 10px;font-size:12px'><span style='color:#666'>p50_tps</span>&nbsp;&nbsp;<span style='font-family:monospace'>102.0 tok/s</span></td><td></td><td></td><td></td><td></td></tr></table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total requests: 220\n",
+      "RPM: 209.5\n",
+      "Responses in memory: 0\n",
+      "\n",
+      "Stats are available without loading responses:\n",
+      "  p50 TTFT: 0.880s\n",
+      "  p90 TTLT: 4.802s\n"
+     ]
+    }
+   ],
+   "source": [
+    "large_result = await runner.run(\n",
+    "    payload=sample_payload,\n",
+    "    run_duration=60,\n",
+    "    clients=10,\n",
+    "    run_name=\"large-low-memory\",\n",
+    "    low_memory=True,\n",
+    ")\n",
+    "\n",
+    "print(f\"Total requests: {large_result.total_requests}\")\n",
+    "print(f\"RPM: {large_result.stats['requests_per_minute']:.1f}\")\n",
+    "print(f\"Responses in memory: {len(large_result.responses)}\")\n",
+    "print(f\"\\nStats are available without loading responses:\")\n",
+    "print(f\"  p50 TTFT: {large_result.stats['time_to_first_token-p50']:.3f}s\")\n",
+    "print(f\"  p90 TTLT: {large_result.stats['time_to_last_token-p90']:.3f}s\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loaded 1045 responses from disk\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load responses from disk when needed:\n",
+    "responses = large_result.load_responses()\n",
+    "print(f\"Loaded {len(responses)} responses from disk\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Comparing results\n",
+    "\n",
+    "The `LoadTestResult` has a built-in `plot_results()` method that generates\n",
+    "standard charts (TTFT, TTLT, RPM, error rate, token throughput vs clients)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "name": "time-bound-load-test",
+         "type": "box",
+         "x": [
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10
+         ],
+         "y": [
+          1.810565542000404,
+          1.1812222500011558,
+          3.868108458002098,
+          0.7137894999978016,
+          0.5251540839963127,
+          0.37060487500275485,
+          0.44968758300092304,
+          0.3802408749979804,
+          1.2211427910006023,
+          1.4571087089934736,
+          0.589481999995769,
+          2.097318999993149,
+          1.0713617910005269,
+          0.33201908400224056,
+          0.7742456669948297,
+          2.397023541998351,
+          1.8993818329981877,
+          0.42726020899863215,
+          1.4716677499964135,
+          0.46759149999707006,
+          0.8862732499983395,
+          0.3939086249956745,
+          1.0275854999999865,
+          0.7189318340024329,
+          1.4116186659957748,
+          0.7474134169970057,
+          0.40672895799798425,
+          0.42657495900493814,
+          1.2491307079981198,
+          0.36702779099869076,
+          0.7880193340024562,
+          1.8055343329979223,
+          2.310052083004848,
+          1.7829697910055984,
+          2.0935321249999106,
+          1.3090343329968164,
+          0.7112003330039443,
+          4.315548667000257,
+          0.8603567079990171,
+          1.7188770830034628,
+          0.8016122080007335,
+          0.7778300419959123,
+          0.4183118330038269,
+          1.3940931660035858,
+          1.555591791999177,
+          1.9584035000007134,
+          0.697202000003017,
+          0.5803299999970477,
+          3.9468323339970084,
+          0.9757721249989117,
+          1.002142333003576,
+          0.9818431249950663,
+          2.109215791999304,
+          2.854000667000946,
+          1.1883622079985798,
+          2.397148707997985,
+          2.0537721249929746,
+          1.2194084170041606,
+          0.55904924999777,
+          1.0641959169952315,
+          0.3820479999994859,
+          1.1137916250008857,
+          0.3648492919965065,
+          1.4079380000039237,
+          1.1204503329936415,
+          0.7068704999983311,
+          1.5245771670015529,
+          2.0880037499955506,
+          0.34228199999779463,
+          2.0938974159944337,
+          1.313775374997931,
+          0.9155073329966399,
+          1.178452667001693,
+          0.8153431250029826,
+          0.7699036249978235,
+          0.47562379100418184,
+          0.455455625000468,
+          1.1047283329971833,
+          1.333313959003135,
+          0.9293282919970807,
+          1.2738797499987413,
+          1.299013833006029,
+          0.3571828749991255,
+          0.48103974999685306,
+          0.4824907499933033,
+          2.367823166998278,
+          2.344774500001222,
+          0.42162933399959,
+          0.5273924579960294,
+          0.4980682919995161,
+          0.8799479170047562,
+          1.7304949169993051,
+          1.6569751249990077,
+          1.7166576250019716,
+          1.280893458002538,
+          2.347654417004378,
+          1.1621644590049982,
+          1.2532084999984363,
+          2.9941925419989275,
+          2.0151528749993304,
+          1.181208833004348,
+          2.2984159589977935,
+          1.2298334160004742,
+          1.418954875000054,
+          1.82515254199825,
+          1.2331472500009113,
+          0.87708987500082,
+          0.50351466700522,
+          1.607961791996786,
+          0.7267654579991358,
+          0.5518672089965548,
+          0.7452832909984863,
+          1.8045194169972092,
+          1.7789633329957724,
+          2.605166624998674,
+          3.056866250000894,
+          1.6265989160019672,
+          0.5625950419998844,
+          4.143405791000987,
+          4.180461500000092,
+          3.5610989999986487,
+          0.5881356660029269,
+          0.9779006249955273,
+          3.690586582997639,
+          4.832465791005234,
+          0.6448706250012037,
+          4.863131999998586,
+          1.382325790997129,
+          0.4846024590005982,
+          2.2435131250022096,
+          1.5739306670002406,
+          2.255111082995427,
+          3.899700750000193,
+          1.4509044169972185,
+          1.2176708330007386,
+          2.9069163750027656,
+          2.5278627079969738,
+          0.40958595799747854,
+          1.1879090829970664,
+          0.6117010829984793,
+          0.6858485839984496,
+          2.8986509589958587,
+          1.788087708002422,
+          2.269181875002687,
+          0.9895395829953486,
+          2.5265816250030184,
+          2.149016749994189,
+          0.7873067089967662,
+          1.8220124579966068,
+          2.769100500001514,
+          0.5327938330010511,
+          3.213817707997805,
+          0.3640439999944647,
+          1.7442852500025765,
+          0.39150920799875166,
+          1.3132492920049117,
+          0.5267627499997616,
+          0.6975128749982105,
+          5.765499125001952,
+          0.6618302920032875,
+          0.531012917002954,
+          2.8477291669987608,
+          1.1358064590021968,
+          0.3917771249980433,
+          1.7981745829965803,
+          1.4685571670052013,
+          0.5237741250020918,
+          0.44857070799480425,
+          2.0674697080030455,
+          2.1612447919978877,
+          0.36861462500382913,
+          0.3830527499958407,
+          2.0479602080013137,
+          1.5750675000017509,
+          0.4111657500034198,
+          2.939957292001054,
+          2.9568471250022412,
+          0.37112295900442405,
+          3.418946000005235,
+          0.3710014160024002,
+          0.32679066600394435,
+          2.0638363340040087,
+          1.0570221669986495,
+          0.6728937090010731,
+          1.0676320409984328,
+          0.3201369589951355,
+          1.3715982919966336,
+          2.2774177090032026,
+          0.39108320900413673,
+          0.41870475000177976,
+          3.82099775000097,
+          1.9569717920021503,
+          1.2734504160034703,
+          0.9579590420034947,
+          1.7040523329997086,
+          2.4146079999991343,
+          0.7444077079999261,
+          2.147584582999116,
+          1.8018520420009736,
+          0.43014495899842586,
+          0.3900236660047085,
+          0.4033835000009276,
+          1.2873853749988484,
+          0.5807214170054067,
+          0.5866758340052911,
+          0.42062020899902564,
+          0.657029166999564,
+          1.4600514160047169,
+          0.9175232499983395,
+          3.7755427500014775,
+          2.2247067500065896,
+          0.8008953329990618,
+          2.464140624993888,
+          3.2319100000022445,
+          3.814959125003952,
+          0.357758082995133,
+          0.3283785839958,
+          2.6948829159955494,
+          1.5338809579989174
+         ]
+        }
+       ],
+       "layout": {
+        "colorway": [
+         "rgb(0,0,255)",
+         "rgb(255,0,0)"
+        ],
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermap": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermap"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "white",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "#C8D4E3"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "white",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "radialaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "yaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "zaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "caxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "text": "Time to first token vs number of clients"
+        },
+        "xaxis": {
+         "tickformat": "s",
+         "title": {
+          "text": "Number of clients"
+         },
+         "type": "log"
+        },
+        "yaxis": {
+         "tickformat": ".2s",
+         "title": {
+          "text": "Time to first token (s)"
+         },
+         "type": "log"
+        }
+       }
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "name": "time-bound-load-test",
+         "type": "box",
+         "x": [
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          1,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          3,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          5,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10,
+          10
+         ],
+         "y": [
+          2.026969166996423,
+          1.4105780000027153,
+          7.017155499997898,
+          3.8645369169971673,
+          0.8202292089990806,
+          0.657336791002308,
+          1.9274062499971478,
+          1.5236599999989267,
+          1.6036553750018356,
+          2.048463749997609,
+          1.3640452499967068,
+          4.816340499994112,
+          4.1498347079977975,
+          0.8604247920011403,
+          4.527456333998998,
+          4.830777458999364,
+          4.042134957999224,
+          0.8596613339977921,
+          1.505671082995832,
+          1.8728053750019171,
+          1.2073444169946015,
+          1.3267667499967501,
+          2.007788625000103,
+          0.8214762089992291,
+          1.457438582998293,
+          1.9353756669952418,
+          1.7568827079958282,
+          2.1657332090035197,
+          2.5171536249981727,
+          2.4936623750036233,
+          1.8358565419985098,
+          2.5352387500024633,
+          4.18328370800009,
+          2.1444086660048924,
+          4.2379080839964445,
+          1.5257862919970648,
+          3.2496527500043157,
+          7.227931708999677,
+          2.9760203749974607,
+          2.6227675410045777,
+          1.3151865409963648,
+          2.4453202499935287,
+          1.117245624998759,
+          1.9994487080039107,
+          1.5960046249965671,
+          2.0353990830044495,
+          4.6709048750053626,
+          1.9501907090016175,
+          5.925759499994456,
+          2.86150141699909,
+          1.250162124997587,
+          1.0117991250008345,
+          2.1457077500017476,
+          4.487846791998891,
+          4.524017457995797,
+          4.866551667000749,
+          5.196730249997927,
+          1.3659177080044174,
+          0.9260156250020373,
+          1.3981401669952902,
+          1.0464315839999472,
+          2.078450959001202,
+          1.0507028339998215,
+          1.6905905000021448,
+          2.6238793749944307,
+          3.485501415998442,
+          4.309456792005221,
+          4.642852416000096,
+          0.7529281669994816,
+          2.353995790996123,
+          3.662087040996994,
+          2.490439165994758,
+          3.0507285830026376,
+          2.388044457999058,
+          3.0282242910034256,
+          2.1366724999970756,
+          1.6564010420042905,
+          2.1372902919974877,
+          3.2651202920023934,
+          1.7618697919970145,
+          1.643917583001894,
+          1.6312250420014607,
+          2.149333665998711,
+          2.428906665998511,
+          1.8692639999935636,
+          2.433882334000373,
+          2.4431621249968885,
+          0.8310220839994145,
+          1.045902957994258,
+          0.8828566669981228,
+          1.8837196250024135,
+          2.982938708999427,
+          3.969376375003776,
+          1.741798708004353,
+          2.9292802909985767,
+          3.2932393750015763,
+          1.406095334001293,
+          2.6276067910002894,
+          3.2220796670007985,
+          2.1393764579988783,
+          3.168885292005143,
+          4.014242750003177,
+          3.173061375004181,
+          2.579963874995883,
+          2.9128747079957975,
+          3.0359742090004147,
+          2.6948128750009346,
+          2.075248291999742,
+          2.100795999998809,
+          2.645283375000872,
+          2.2994953749948763,
+          1.601715790995513,
+          3.14368887499586,
+          2.526538416997937,
+          2.6472249580037897,
+          3.0995452499992098,
+          3.2772074999957113,
+          0.9971983330033254,
+          4.181597540999064,
+          4.221945541998139,
+          4.342931500003033,
+          1.8289941659968463,
+          1.223867082997458,
+          4.93074450000131,
+          5.136637708004855,
+          3.013893208000809,
+          5.778089958999772,
+          1.426899541002058,
+          0.9363519999969867,
+          2.89067904100375,
+          2.944057749999047,
+          3.721774457997526,
+          4.210495624996838,
+          3.5966603749984642,
+          4.719359624999925,
+          5.110498792004364,
+          2.620230332999199,
+          1.5105535420007072,
+          3.440597250002611,
+          2.618421290993865,
+          0.7280902089987649,
+          5.12706341699959,
+          3.5233514999999898,
+          2.377603459004604,
+          1.6091642499959562,
+          4.864977290999377,
+          3.42581183299626,
+          2.015671209002903,
+          2.5336824169935426,
+          3.3301199580018874,
+          1.6080059169980814,
+          4.445332292001694,
+          1.1371099159950973,
+          3.4345513340012985,
+          2.176926625004853,
+          1.318248791998485,
+          1.8371326659980696,
+          3.397253916999034,
+          6.461382583001978,
+          2.2144158330047503,
+          0.9335729590020492,
+          5.486439041997073,
+          3.0541425420015003,
+          1.6030845409986796,
+          4.258436707998044,
+          1.996725625002,
+          2.5789501250037574,
+          0.794825207995018,
+          2.5077784169989172,
+          4.381451000001107,
+          0.7341202500028885,
+          0.526786832997459,
+          3.1630495419958606,
+          3.0988521670005866,
+          1.6619982920019538,
+          3.71616087500297,
+          3.8252187089965446,
+          2.4751521250000224,
+          5.006649583003309,
+          1.3394295000034617,
+          3.611661916002049,
+          4.377064042004349,
+          3.840444875000685,
+          1.659149917002651,
+          2.832853415995487,
+          0.6458929590007756,
+          3.5052734589989996,
+          5.6474424170010025,
+          0.8091522090035141,
+          0.9284407500017551,
+          6.816903582999657,
+          3.9716163330012932,
+          1.5373788330034586,
+          2.054647875003866,
+          3.2553307080015657,
+          5.010804708996147,
+          3.646031333002611,
+          3.5746835000027204,
+          4.229959917000087,
+          1.9007484999965527,
+          0.7102053330017952,
+          2.6844349579987465,
+          3.390159291004238,
+          1.8455996250049793,
+          1.423415750003187,
+          2.3246775419975165,
+          1.438939500003471,
+          3.1382235830024,
+          2.365440415997,
+          4.739204375000554,
+          4.722318541003915,
+          1.851873166000587,
+          4.211808124993695,
+          3.862824666000961,
+          3.8172453340012,
+          2.785455874996842,
+          2.5279075840007863,
+          4.43163616599486,
+          3.2573652919963934
+         ]
+        }
+       ],
+       "layout": {
+        "colorway": [
+         "#30123b",
+         "#4145ab",
+         "#4675ed",
+         "#39a2fc",
+         "#1bcfd4",
+         "#24eca6",
+         "#61fc6c",
+         "#a4fc3b",
+         "#d1e834",
+         "#f3c63a",
+         "#fe9b2d",
+         "#f36315",
+         "#d93806",
+         "#b11901",
+         "#7a0402"
+        ],
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermap": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermap"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "white",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "#C8D4E3"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "white",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "radialaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "yaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "zaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "caxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "text": "Time to last token vs number of clients"
+        },
+        "xaxis": {
+         "tickformat": "s",
+         "title": {
+          "text": "Number of clients"
+         },
+         "type": "log"
+        },
+        "yaxis": {
+         "tickformat": ".2s",
+         "title": {
+          "text": "Time to last token (s)"
+         },
+         "type": "log"
+        }
+       }
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "line": {
+          "dash": "dot"
+         },
+         "mode": "lines+markers",
+         "name": "time-bound-load-test",
+         "opacity": 0.5,
+         "type": "scatter",
+         "x": [
+          1,
+          3,
+          5,
+          10
+         ],
+         "y": [
+          23.435395092386933,
+          68.4096394213185,
+          118.005947056227,
+          196.19625184901724
+         ]
+        }
+       ],
+       "layout": {
+        "colorway": [
+         "rgb(124, 29, 111)",
+         "rgb(185, 37, 122)",
+         "rgb(220, 57, 119)",
+         "rgb(227, 79, 111)",
+         "rgb(240, 116, 110)",
+         "rgb(250, 164, 118)",
+         "rgb(252, 222, 156)"
+        ],
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermap": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermap"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "white",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "#C8D4E3"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "white",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "radialaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "yaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "zaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "caxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "text": "Requests per minute vs number of clients"
+        },
+        "xaxis": {
+         "title": {
+          "text": "Number of clients"
+         },
+         "type": "log"
+        },
+        "yaxis": {
+         "tickformat": ".2s",
+         "title": {
+          "text": "Requests per minute"
+         },
+         "type": "log"
+        }
+       }
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "line": {
+          "dash": "dot"
+         },
+         "mode": "lines+markers",
+         "name": "time-bound-load-test",
+         "opacity": 0.5,
+         "type": "scatter",
+         "x": [
+          1,
+          3,
+          5,
+          10
+         ],
+         "y": [
+          0,
+          0,
+          0,
+          0
+         ]
+        }
+       ],
+       "layout": {
+        "colorway": [
+         "rgb(0,0,0)",
+         "rgb(230,0,0)",
+         "rgb(230,210,0)",
+         "rgb(255,255,255)",
+         "rgb(160,200,255)"
+        ],
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermap": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermap"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "white",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "#C8D4E3"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "white",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "radialaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "yaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "zaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "caxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "text": "Error rate vs number of clients"
+        },
+        "xaxis": {
+         "title": {
+          "text": "Number of clients"
+         },
+         "type": "log"
+        },
+        "yaxis": {
+         "title": {
+          "text": "Error rate"
+         }
+        }
+       }
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "line": {
+          "dash": "dot"
+         },
+         "mode": "lines+markers",
+         "name": "time-bound-load-test",
+         "opacity": 0.5,
+         "type": "scatter",
+         "x": [
+          1,
+          3,
+          5,
+          10
+         ],
+         "y": [
+          1804.5254221137939,
+          5267.542235441524,
+          9086.457923329479,
+          15107.111392374327
+         ]
+        }
+       ],
+       "layout": {
+        "colorway": [
+         "#440154",
+         "#482878",
+         "#3e4989",
+         "#31688e",
+         "#26828e",
+         "#1f9e89",
+         "#35b779",
+         "#6ece58",
+         "#b5de2b",
+         "#fde725"
+        ],
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermap": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermap"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "white",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "#C8D4E3"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "white",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "radialaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "yaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "zaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "caxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "text": "Average input tokens per minute vs number of clients"
+        },
+        "xaxis": {
+         "tickformat": ".2s",
+         "title": {
+          "text": "Number of clients"
+         },
+         "type": "log"
+        },
+        "yaxis": {
+         "tickformat": ".2s",
+         "title": {
+          "text": "Average input tokens per minute"
+         },
+         "type": "log"
+        }
+       }
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "line": {
+          "dash": "dot"
+         },
+         "mode": "lines+markers",
+         "name": "time-bound-load-test",
+         "opacity": 0.5,
+         "type": "scatter",
+         "x": [
+          1,
+          3,
+          5,
+          10
+         ],
+         "y": [
+          3138.5402196804343,
+          9390.609692456126,
+          15754.73048714167,
+          26429.11585049167
+         ]
+        }
+       ],
+       "layout": {
+        "colorway": [
+         "#0d0887",
+         "#46039f",
+         "#7201a8",
+         "#9c179e",
+         "#bd3786",
+         "#d8576b",
+         "#ed7953",
+         "#fb9f3a",
+         "#fdca26",
+         "#f0f921"
+        ],
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermap": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermap"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "white",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "#C8D4E3"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "white",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "radialaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "yaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "zaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "caxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "text": "Average output tokens per minute vs number of clients"
+        },
+        "xaxis": {
+         "tickformat": ".2s",
+         "title": {
+          "text": "Number of clients"
+         },
+         "type": "log"
+        },
+        "yaxis": {
+         "tickformat": ".2s",
+         "title": {
+          "text": "Average output tokens per minute"
+         },
+         "type": "log"
+        }
+       }
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "figs = load_test_result.plot_results(show=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Custom charts: TPS and RPM vs clients\n",
+    "\n",
+    "We can also build custom charts from the results. Here we plot the median output\n",
+    "tokens per second (TPS) and requests per minute (RPM) as a function of concurrency."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "mode": "lines+markers",
+         "name": "RPM",
+         "type": "scatter",
+         "x": [
+          1,
+          3,
+          5,
+          10
+         ],
+         "xaxis": "x",
+         "y": [
+          23.435395092386933,
+          68.4096394213185,
+          118.005947056227,
+          196.19625184901724
+         ],
+         "yaxis": "y"
+        },
+        {
+         "mode": "lines+markers",
+         "name": "TPS (p50)",
+         "type": "scatter",
+         "x": [
+          1,
+          3,
+          5,
+          10
+         ],
+         "xaxis": "x2",
+         "y": [
+          184.62017143187063,
+          113.63997467108105,
+          114.90946886186481,
+          102.97720864610096
+         ],
+         "yaxis": "y2"
+        }
+       ],
+       "layout": {
+        "annotations": [
+         {
+          "font": {
+           "size": 16
+          },
+          "showarrow": false,
+          "text": "Requests per Minute vs Clients",
+          "x": 0.225,
+          "xanchor": "center",
+          "xref": "paper",
+          "y": 1,
+          "yanchor": "bottom",
+          "yref": "paper"
+         },
+         {
+          "font": {
+           "size": 16
+          },
+          "showarrow": false,
+          "text": "Median Output Tokens/s vs Clients",
+          "x": 0.775,
+          "xanchor": "center",
+          "xref": "paper",
+          "y": 1,
+          "yanchor": "bottom",
+          "yref": "paper"
+         }
+        ],
+        "height": 400,
+        "showlegend": false,
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "#E5ECF6",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "#E5ECF6",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "white",
+             "linecolor": "white",
+             "minorgridcolor": "white",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "white",
+             "linecolor": "white",
+             "minorgridcolor": "white",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermap": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermap"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "#E5ECF6",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "white"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "#E5ECF6",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "bgcolor": "#E5ECF6",
+           "radialaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           },
+           "yaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           },
+           "zaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "bgcolor": "#E5ECF6",
+           "caxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "white",
+           "linecolor": "white",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "white",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "white",
+           "linecolor": "white",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "white",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "xaxis": {
+         "anchor": "y",
+         "domain": [
+          0,
+          0.45
+         ],
+         "title": {
+          "text": "Clients"
+         },
+         "type": "log"
+        },
+        "xaxis2": {
+         "anchor": "y2",
+         "domain": [
+          0.55,
+          1
+         ],
+         "title": {
+          "text": "Clients"
+         },
+         "type": "log"
+        },
+        "yaxis": {
+         "anchor": "x",
+         "domain": [
+          0,
+          1
+         ],
+         "title": {
+          "text": "RPM"
+         }
+        },
+        "yaxis2": {
+         "anchor": "x2",
+         "domain": [
+          0,
+          1
+         ],
+         "title": {
+          "text": "Tokens/s"
+         }
+        }
+       }
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import plotly.graph_objects as go\n",
+    "from plotly.subplots import make_subplots\n",
+    "\n",
+    "clients_sorted = sorted(load_test_result.results.keys())\n",
+    "rpms = [load_test_result.results[c].stats[\"requests_per_minute\"] for c in clients_sorted]\n",
+    "\n",
+    "# Compute median TPS (1 / p50 time_per_output_token) for each concurrency level\n",
+    "tps_values = []\n",
+    "for c in clients_sorted:\n",
+    "    tpot_p50 = load_test_result.results[c].stats.get(\"time_per_output_token-p50\")\n",
+    "    tps_values.append(1.0 / tpot_p50 if tpot_p50 and tpot_p50 > 0 else None)\n",
+    "\n",
+    "fig = make_subplots(\n",
+    "    rows=1, cols=2,\n",
+    "    subplot_titles=(\"Requests per Minute vs Clients\", \"Median Output Tokens/s vs Clients\"),\n",
+    ")\n",
+    "\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(x=clients_sorted, y=rpms, mode=\"lines+markers\", name=\"RPM\"),\n",
+    "    row=1, col=1,\n",
+    ")\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(x=clients_sorted, y=tps_values, mode=\"lines+markers\", name=\"TPS (p50)\"),\n",
+    "    row=1, col=2,\n",
+    ")\n",
+    "\n",
+    "fig.update_xaxes(title_text=\"Clients\", type=\"log\")\n",
+    "fig.update_yaxes(title_text=\"RPM\", row=1, col=1)\n",
+    "fig.update_yaxes(title_text=\"Tokens/s\", row=1, col=2)\n",
+    "fig.update_layout(height=400, showlegend=False)\n",
+    "fig"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "name": "1 clients",
+         "type": "box",
+         "x": [
+          1.810565542000404,
+          1.1812222500011558,
+          3.868108458002098,
+          0.7137894999978016,
+          0.5251540839963127,
+          0.37060487500275485,
+          0.44968758300092304,
+          0.3802408749979804,
+          1.2211427910006023,
+          1.4571087089934736,
+          0.589481999995769,
+          2.097318999993149,
+          1.0713617910005269
+         ]
+        },
+        {
+         "name": "3 clients",
+         "type": "box",
+         "x": [
+          0.33201908400224056,
+          0.7742456669948297,
+          2.397023541998351,
+          1.8993818329981877,
+          0.42726020899863215,
+          1.4716677499964135,
+          0.46759149999707006,
+          0.8862732499983395,
+          0.3939086249956745,
+          1.0275854999999865,
+          0.7189318340024329,
+          1.4116186659957748,
+          0.7474134169970057,
+          0.40672895799798425,
+          0.42657495900493814,
+          1.2491307079981198,
+          0.36702779099869076,
+          0.7880193340024562,
+          1.8055343329979223,
+          2.310052083004848,
+          1.7829697910055984,
+          2.0935321249999106,
+          1.3090343329968164,
+          0.7112003330039443,
+          4.315548667000257,
+          0.8603567079990171,
+          1.7188770830034628,
+          0.8016122080007335,
+          0.7778300419959123,
+          0.4183118330038269,
+          1.3940931660035858,
+          1.555591791999177,
+          1.9584035000007134,
+          0.697202000003017,
+          0.5803299999970477,
+          3.9468323339970084,
+          0.9757721249989117
+         ]
+        },
+        {
+         "name": "5 clients",
+         "type": "box",
+         "x": [
+          1.002142333003576,
+          0.9818431249950663,
+          2.109215791999304,
+          2.854000667000946,
+          1.1883622079985798,
+          2.397148707997985,
+          2.0537721249929746,
+          1.2194084170041606,
+          0.55904924999777,
+          1.0641959169952315,
+          0.3820479999994859,
+          1.1137916250008857,
+          0.3648492919965065,
+          1.4079380000039237,
+          1.1204503329936415,
+          0.7068704999983311,
+          1.5245771670015529,
+          2.0880037499955506,
+          0.34228199999779463,
+          2.0938974159944337,
+          1.313775374997931,
+          0.9155073329966399,
+          1.178452667001693,
+          0.8153431250029826,
+          0.7699036249978235,
+          0.47562379100418184,
+          0.455455625000468,
+          1.1047283329971833,
+          1.333313959003135,
+          0.9293282919970807,
+          1.2738797499987413,
+          1.299013833006029,
+          0.3571828749991255,
+          0.48103974999685306,
+          0.4824907499933033,
+          2.367823166998278,
+          2.344774500001222,
+          0.42162933399959,
+          0.5273924579960294,
+          0.4980682919995161,
+          0.8799479170047562,
+          1.7304949169993051,
+          1.6569751249990077,
+          1.7166576250019716,
+          1.280893458002538,
+          2.347654417004378,
+          1.1621644590049982,
+          1.2532084999984363,
+          2.9941925419989275,
+          2.0151528749993304,
+          1.181208833004348,
+          2.2984159589977935,
+          1.2298334160004742,
+          1.418954875000054,
+          1.82515254199825,
+          1.2331472500009113,
+          0.87708987500082,
+          0.50351466700522,
+          1.607961791996786,
+          0.7267654579991358,
+          0.5518672089965548,
+          0.7452832909984863,
+          1.8045194169972092
+         ]
+        },
+        {
+         "name": "10 clients",
+         "type": "box",
+         "x": [
+          1.7789633329957724,
+          2.605166624998674,
+          3.056866250000894,
+          1.6265989160019672,
+          0.5625950419998844,
+          4.143405791000987,
+          4.180461500000092,
+          3.5610989999986487,
+          0.5881356660029269,
+          0.9779006249955273,
+          3.690586582997639,
+          4.832465791005234,
+          0.6448706250012037,
+          4.863131999998586,
+          1.382325790997129,
+          0.4846024590005982,
+          2.2435131250022096,
+          1.5739306670002406,
+          2.255111082995427,
+          3.899700750000193,
+          1.4509044169972185,
+          1.2176708330007386,
+          2.9069163750027656,
+          2.5278627079969738,
+          0.40958595799747854,
+          1.1879090829970664,
+          0.6117010829984793,
+          0.6858485839984496,
+          2.8986509589958587,
+          1.788087708002422,
+          2.269181875002687,
+          0.9895395829953486,
+          2.5265816250030184,
+          2.149016749994189,
+          0.7873067089967662,
+          1.8220124579966068,
+          2.769100500001514,
+          0.5327938330010511,
+          3.213817707997805,
+          0.3640439999944647,
+          1.7442852500025765,
+          0.39150920799875166,
+          1.3132492920049117,
+          0.5267627499997616,
+          0.6975128749982105,
+          5.765499125001952,
+          0.6618302920032875,
+          0.531012917002954,
+          2.8477291669987608,
+          1.1358064590021968,
+          0.3917771249980433,
+          1.7981745829965803,
+          1.4685571670052013,
+          0.5237741250020918,
+          0.44857070799480425,
+          2.0674697080030455,
+          2.1612447919978877,
+          0.36861462500382913,
+          0.3830527499958407,
+          2.0479602080013137,
+          1.5750675000017509,
+          0.4111657500034198,
+          2.939957292001054,
+          2.9568471250022412,
+          0.37112295900442405,
+          3.418946000005235,
+          0.3710014160024002,
+          0.32679066600394435,
+          2.0638363340040087,
+          1.0570221669986495,
+          0.6728937090010731,
+          1.0676320409984328,
+          0.3201369589951355,
+          1.3715982919966336,
+          2.2774177090032026,
+          0.39108320900413673,
+          0.41870475000177976,
+          3.82099775000097,
+          1.9569717920021503,
+          1.2734504160034703,
+          0.9579590420034947,
+          1.7040523329997086,
+          2.4146079999991343,
+          0.7444077079999261,
+          2.147584582999116,
+          1.8018520420009736,
+          0.43014495899842586,
+          0.3900236660047085,
+          0.4033835000009276,
+          1.2873853749988484,
+          0.5807214170054067,
+          0.5866758340052911,
+          0.42062020899902564,
+          0.657029166999564,
+          1.4600514160047169,
+          0.9175232499983395,
+          3.7755427500014775,
+          2.2247067500065896,
+          0.8008953329990618,
+          2.464140624993888,
+          3.2319100000022445,
+          3.814959125003952,
+          0.357758082995133,
+          0.3283785839958,
+          2.6948829159955494,
+          1.5338809579989174
+         ]
+        }
+       ],
+       "layout": {
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "#E5ECF6",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "#E5ECF6",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "white",
+             "linecolor": "white",
+             "minorgridcolor": "white",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "white",
+             "linecolor": "white",
+             "minorgridcolor": "white",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermap": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermap"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "#E5ECF6",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "white"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "#E5ECF6",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "bgcolor": "#E5ECF6",
+           "radialaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           },
+           "yaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           },
+           "zaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "bgcolor": "#E5ECF6",
+           "caxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "white",
+           "linecolor": "white",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "white",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "white",
+           "linecolor": "white",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "white",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "text": "Time to First Token by Client Count"
+        },
+        "xaxis": {
+         "title": {
+          "text": "Time to First Token (s)"
+         },
+         "type": "log"
+        }
+       }
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from llmeter.plotting import boxplot_by_dimension\n",
+    "\n",
+    "fig = go.Figure(layout=dict(title=\"Time to First Token by Client Count\"))\n",
+    "for n_clients in clients_sorted:\n",
+    "    fig.add_trace(\n",
+    "        boxplot_by_dimension(\n",
+    "            load_test_result.results[n_clients],\n",
+    "            dimension=\"time_to_first_token\",\n",
+    "            name=f\"{n_clients} clients\",\n",
+    "        )\n",
+    "    )\n",
+    "fig.update_xaxes(type=\"log\", title=\"Time to First Token (s)\")\n",
+    "fig"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "name": "1 clients",
+         "type": "box",
+         "x": [
+          2.026969166996423,
+          1.4105780000027153,
+          7.017155499997898,
+          3.8645369169971673,
+          0.8202292089990806,
+          0.657336791002308,
+          1.9274062499971478,
+          1.5236599999989267,
+          1.6036553750018356,
+          2.048463749997609,
+          1.3640452499967068,
+          4.816340499994112,
+          4.1498347079977975
+         ]
+        },
+        {
+         "name": "3 clients",
+         "type": "box",
+         "x": [
+          0.8604247920011403,
+          4.527456333998998,
+          4.830777458999364,
+          4.042134957999224,
+          0.8596613339977921,
+          1.505671082995832,
+          1.8728053750019171,
+          1.2073444169946015,
+          1.3267667499967501,
+          2.007788625000103,
+          0.8214762089992291,
+          1.457438582998293,
+          1.9353756669952418,
+          1.7568827079958282,
+          2.1657332090035197,
+          2.5171536249981727,
+          2.4936623750036233,
+          1.8358565419985098,
+          2.5352387500024633,
+          4.18328370800009,
+          2.1444086660048924,
+          4.2379080839964445,
+          1.5257862919970648,
+          3.2496527500043157,
+          7.227931708999677,
+          2.9760203749974607,
+          2.6227675410045777,
+          1.3151865409963648,
+          2.4453202499935287,
+          1.117245624998759,
+          1.9994487080039107,
+          1.5960046249965671,
+          2.0353990830044495,
+          4.6709048750053626,
+          1.9501907090016175,
+          5.925759499994456,
+          2.86150141699909
+         ]
+        },
+        {
+         "name": "5 clients",
+         "type": "box",
+         "x": [
+          1.250162124997587,
+          1.0117991250008345,
+          2.1457077500017476,
+          4.487846791998891,
+          4.524017457995797,
+          4.866551667000749,
+          5.196730249997927,
+          1.3659177080044174,
+          0.9260156250020373,
+          1.3981401669952902,
+          1.0464315839999472,
+          2.078450959001202,
+          1.0507028339998215,
+          1.6905905000021448,
+          2.6238793749944307,
+          3.485501415998442,
+          4.309456792005221,
+          4.642852416000096,
+          0.7529281669994816,
+          2.353995790996123,
+          3.662087040996994,
+          2.490439165994758,
+          3.0507285830026376,
+          2.388044457999058,
+          3.0282242910034256,
+          2.1366724999970756,
+          1.6564010420042905,
+          2.1372902919974877,
+          3.2651202920023934,
+          1.7618697919970145,
+          1.643917583001894,
+          1.6312250420014607,
+          2.149333665998711,
+          2.428906665998511,
+          1.8692639999935636,
+          2.433882334000373,
+          2.4431621249968885,
+          0.8310220839994145,
+          1.045902957994258,
+          0.8828566669981228,
+          1.8837196250024135,
+          2.982938708999427,
+          3.969376375003776,
+          1.741798708004353,
+          2.9292802909985767,
+          3.2932393750015763,
+          1.406095334001293,
+          2.6276067910002894,
+          3.2220796670007985,
+          2.1393764579988783,
+          3.168885292005143,
+          4.014242750003177,
+          3.173061375004181,
+          2.579963874995883,
+          2.9128747079957975,
+          3.0359742090004147,
+          2.6948128750009346,
+          2.075248291999742,
+          2.100795999998809,
+          2.645283375000872,
+          2.2994953749948763,
+          1.601715790995513,
+          3.14368887499586
+         ]
+        },
+        {
+         "name": "10 clients",
+         "type": "box",
+         "x": [
+          2.526538416997937,
+          2.6472249580037897,
+          3.0995452499992098,
+          3.2772074999957113,
+          0.9971983330033254,
+          4.181597540999064,
+          4.221945541998139,
+          4.342931500003033,
+          1.8289941659968463,
+          1.223867082997458,
+          4.93074450000131,
+          5.136637708004855,
+          3.013893208000809,
+          5.778089958999772,
+          1.426899541002058,
+          0.9363519999969867,
+          2.89067904100375,
+          2.944057749999047,
+          3.721774457997526,
+          4.210495624996838,
+          3.5966603749984642,
+          4.719359624999925,
+          5.110498792004364,
+          2.620230332999199,
+          1.5105535420007072,
+          3.440597250002611,
+          2.618421290993865,
+          0.7280902089987649,
+          5.12706341699959,
+          3.5233514999999898,
+          2.377603459004604,
+          1.6091642499959562,
+          4.864977290999377,
+          3.42581183299626,
+          2.015671209002903,
+          2.5336824169935426,
+          3.3301199580018874,
+          1.6080059169980814,
+          4.445332292001694,
+          1.1371099159950973,
+          3.4345513340012985,
+          2.176926625004853,
+          1.318248791998485,
+          1.8371326659980696,
+          3.397253916999034,
+          6.461382583001978,
+          2.2144158330047503,
+          0.9335729590020492,
+          5.486439041997073,
+          3.0541425420015003,
+          1.6030845409986796,
+          4.258436707998044,
+          1.996725625002,
+          2.5789501250037574,
+          0.794825207995018,
+          2.5077784169989172,
+          4.381451000001107,
+          0.7341202500028885,
+          0.526786832997459,
+          3.1630495419958606,
+          3.0988521670005866,
+          1.6619982920019538,
+          3.71616087500297,
+          3.8252187089965446,
+          2.4751521250000224,
+          5.006649583003309,
+          1.3394295000034617,
+          3.611661916002049,
+          4.377064042004349,
+          3.840444875000685,
+          1.659149917002651,
+          2.832853415995487,
+          0.6458929590007756,
+          3.5052734589989996,
+          5.6474424170010025,
+          0.8091522090035141,
+          0.9284407500017551,
+          6.816903582999657,
+          3.9716163330012932,
+          1.5373788330034586,
+          2.054647875003866,
+          3.2553307080015657,
+          5.010804708996147,
+          3.646031333002611,
+          3.5746835000027204,
+          4.229959917000087,
+          1.9007484999965527,
+          0.7102053330017952,
+          2.6844349579987465,
+          3.390159291004238,
+          1.8455996250049793,
+          1.423415750003187,
+          2.3246775419975165,
+          1.438939500003471,
+          3.1382235830024,
+          2.365440415997,
+          4.739204375000554,
+          4.722318541003915,
+          1.851873166000587,
+          4.211808124993695,
+          3.862824666000961,
+          3.8172453340012,
+          2.785455874996842,
+          2.5279075840007863,
+          4.43163616599486,
+          3.2573652919963934
+         ]
+        }
+       ],
+       "layout": {
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "#E5ECF6",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "#E5ECF6",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "white",
+             "linecolor": "white",
+             "minorgridcolor": "white",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "white",
+             "linecolor": "white",
+             "minorgridcolor": "white",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermap": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermap"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "#E5ECF6",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "white"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "#E5ECF6",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "bgcolor": "#E5ECF6",
+           "radialaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           },
+           "yaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           },
+           "zaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "bgcolor": "#E5ECF6",
+           "caxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "white",
+           "linecolor": "white",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "white",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "white",
+           "linecolor": "white",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "white",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "text": "Time to Last Token by Client Count"
+        },
+        "xaxis": {
+         "title": {
+          "text": "Time to Last Token (s)"
+         },
+         "type": "log"
+        }
+       }
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "fig = go.Figure(layout=dict(title=\"Time to Last Token by Client Count\"))\n",
+    "for n_clients in clients_sorted:\n",
+    "    fig.add_trace(\n",
+    "        boxplot_by_dimension(\n",
+    "            load_test_result.results[n_clients],\n",
+    "            dimension=\"time_to_last_token\",\n",
+    "            name=f\"{n_clients} clients\",\n",
+    "        )\n",
+    "    )\n",
+    "fig.update_xaxes(type=\"log\", title=\"Time to Last Token (s)\")\n",
+    "fig"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summary\n",
+    "\n",
+    "| Feature | Parameter | Description |\n",
+    "|---|---|---|\n",
+    "| Time-bound runs | `run_duration=60` | Run for a fixed number of seconds instead of a fixed request count |\n",
+    "| Count-bound runs | `n_requests=100` | Traditional mode — fixed number of requests per client |\n",
+    "| Live stats | `progress_bar_stats={...}` | Customize which metrics appear on the progress bar |\n",
+    "| Low-memory mode | `low_memory=True` | Stream responses to disk, compute stats incrementally |"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/llmeter/experiments.py b/llmeter/experiments.py
index 0a70a17..55eece0 100644
--- a/llmeter/experiments.py
+++ b/llmeter/experiments.py
@@ -119,10 +119,66 @@ def load(
 
 @dataclass
 class LoadTest:
-    """Experiment to explore how performance changes at different concurrency levels
+    """Experiment to explore how performance changes at different concurrency levels.
 
     This experiment creates a series of Runs with different levels of concurrency, defined by
-    `sequence_of_clients`, and runs them one after the other.
+    ``sequence_of_clients``, and runs them one after the other.
+
+    By default, each run sends a fixed number of requests (count-bound). Set ``run_duration``
+    to run each concurrency level for a fixed number of seconds instead (time-bound), which
+    gives a more realistic picture of sustained throughput.
+
+    Attributes:
+        endpoint (Endpoint): The LLM endpoint to test.
+        payload (dict | list[dict]): The request payload(s) to send.
+        sequence_of_clients (list[int]): Concurrency levels to test.
+        min_requests_per_client (int): Minimum requests per client in count-bound mode.
+        min_requests_per_run (int): Minimum total requests per run in count-bound mode.
+        run_duration (int | float | None): When set, each concurrency level runs for this
+            many seconds instead of a fixed request count. Mutually exclusive with
+            ``min_requests_per_client`` / ``min_requests_per_run``.
+        low_memory (bool): When ``True``, responses are written to disk but not kept in
+            memory. Requires ``output_path``. Defaults to ``False``.
+        progress_bar_stats (dict | None): Controls which live stats appear on the progress
+            bar. See ``RunningStats.DEFAULT_SNAPSHOT_STATS`` for the default.
+        output_path (os.PathLike | str | None): Where to save results.
+        tokenizer (Tokenizer | None): Optional tokenizer for token counting.
+        test_name (str | None): Name for this test. Defaults to current date/time.
+        callbacks (list[Callback] | None): Optional callbacks.
+
+    Example::
+
+        # Count-bound: 10 requests per client at each concurrency level
+        load_test = LoadTest(
+            endpoint=my_endpoint,
+            payload=sample_payload,
+            sequence_of_clients=[1, 5, 10, 20],
+            min_requests_per_client=10,
+            output_path="outputs/load_test",
+        )
+        result = await load_test.run()
+        result.plot_results()
+
+        # Time-bound: 60 seconds per concurrency level
+        load_test = LoadTest(
+            endpoint=my_endpoint,
+            payload=sample_payload,
+            sequence_of_clients=[1, 5, 10, 20],
+            run_duration=60,
+            output_path="outputs/load_test",
+        )
+        result = await load_test.run()
+
+        # Time-bound with low-memory mode for large-scale tests
+        load_test = LoadTest(
+            endpoint=my_endpoint,
+            payload=sample_payload,
+            sequence_of_clients=[1, 5, 10, 20, 50],
+            run_duration=120,
+            low_memory=True,
+            output_path="outputs/large_load_test",
+        )
+        result = await load_test.run()
     """
 
     endpoint: Endpoint
@@ -130,6 +186,9 @@ class LoadTest:
     sequence_of_clients: list[int]
     min_requests_per_client: int = 1
     min_requests_per_run: int = 10
+    run_duration: int | float | None = None
+    low_memory: bool = False
+    progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None
     output_path: os.PathLike | str | None = None
     tokenizer: Tokenizer | None = None
     test_name: str | None = None
@@ -144,6 +203,40 @@ def _get_n_requests(self, clients):
         return int(self.min_requests_per_client)
 
     async def run(self, output_path: os.PathLike | None = None):
+        """Run the load test across all configured concurrency levels.
+
+        Creates a :class:`~llmeter.runner.Runner` and iterates through
+        ``sequence_of_clients``, running one test per concurrency level. In
+        time-bound mode (``run_duration`` is set), each level runs for a fixed
+        duration. In count-bound mode, each level sends a fixed number of
+        requests per client.
+
+        Args:
+            output_path (os.PathLike | None, optional): Override for the output
+                directory. If not provided, ``self.output_path`` is used. A
+                subfolder named after ``test_name`` is created automatically.
+
+        Returns:
+            LoadTestResult: A result object containing one
+            :class:`~llmeter.results.Result` per concurrency level, keyed by
+            client count.
+
+        Example::
+
+            load_test = LoadTest(
+                endpoint=my_endpoint,
+                payload=sample_payload,
+                sequence_of_clients=[1, 5, 10],
+                run_duration=30,
+            )
+            result = await load_test.run(output_path="outputs/my_test")
+
+            # Access individual results by client count
+            result.results[5].stats["requests_per_minute"]
+
+            # Plot all standard charts
+            result.plot_results()
+        """
         try:
             output_path = Path(output_path or self.output_path) / self._test_name
         except Exception:
@@ -152,20 +245,34 @@ async def run(self, output_path: os.PathLike | None = None):
             endpoint=self.endpoint, tokenizer=self.tokenizer, output_path=output_path
         )
 
-        self._results = [
-            await _runner.run(
-                payload=self.payload,
-                clients=c,
-                n_requests=self._get_n_requests(c),
-                run_name=f"{c:05.0f}-clients",
-                callbacks=self.callbacks,
-                output_path=output_path,
-            )
-            for c in tqdm(
-                self.sequence_of_clients, desc="Configurations", disable=_disable_tqdm
-            )
-        ]
-        # return self._results
+        self._results = []
+        for c in tqdm(
+            self.sequence_of_clients, desc="Configurations", disable=_disable_tqdm
+        ):
+            if self.run_duration is not None:
+                result = await _runner.run(
+                    payload=self.payload,
+                    clients=c,
+                    run_duration=self.run_duration,
+                    run_name=f"{c:05.0f}-clients",
+                    callbacks=self.callbacks,
+                    low_memory=self.low_memory,
+                    progress_bar_stats=self.progress_bar_stats,
+                    output_path=output_path,
+                )
+            else:
+                result = await _runner.run(
+                    payload=self.payload,
+                    clients=c,
+                    n_requests=self._get_n_requests(c),
+                    run_name=f"{c:05.0f}-clients",
+                    callbacks=self.callbacks,
+                    low_memory=self.low_memory,
+                    progress_bar_stats=self.progress_bar_stats,
+                    output_path=output_path,
+                )
+            self._results.append(result)
+
         return LoadTestResult(
             results={r.clients: r for r in self._results},
             test_name=self._test_name,
diff --git a/llmeter/live_display.py b/llmeter/live_display.py
new file mode 100644
index 0000000..f8c9416
--- /dev/null
+++ b/llmeter/live_display.py
@@ -0,0 +1,238 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Live-updating stats display for test runs.
+
+Renders a compact table of running statistics that updates in-place during a run.
+In Jupyter notebooks, uses an HTML table via IPython.display. In terminals, falls
+back to a simple printed summary that overwrites itself.
+"""
+
+from __future__ import annotations
+
+import logging
+import sys
+from collections import OrderedDict
+
+logger = logging.getLogger(__name__)
+
+# Mapping from key substrings to (group_name, display_order).
+# Stats are grouped by the first matching pattern; unmatched keys go to "Other".
+_GROUP_PATTERNS: list[tuple[str, str]] = [
+    ("rpm", "Throughput"),
+    ("tps", "Throughput"),
+    ("ttft", "TTFT"),
+    ("ttlt", "TTLT"),
+    ("token", "Tokens"),
+    ("fail", "Errors"),
+]
+
+_GROUP_ORDER = ["Throughput", "TTFT", "TTLT", "Tokens", "Errors", "Other"]
+
+
+def _classify(key: str) -> str:
+    """Return the group name for a stat key based on substring matching.
+
+    Matches the key (case-insensitive) against ``_GROUP_PATTERNS``. The first
+    matching pattern determines the group. Unmatched keys are placed in
+    ``"Other"``.
+
+    Args:
+        key (str): The stat display label to classify (e.g. ``"p50_ttft"``).
+
+    Returns:
+        str: The group name (e.g. ``"TTFT"``, ``"Throughput"``, ``"Other"``).
+    """
+    key_lower = key.lower()
+    for pattern, group in _GROUP_PATTERNS:
+        if pattern in key_lower:
+            return group
+    return "Other"
+
+
+def _group_stats(stats: dict[str, str]) -> OrderedDict[str, list[tuple[str, str]]]:
+    """Organize stats into ordered groups for display.
+
+    Each stat key is classified via :func:`_classify` and placed into the
+    corresponding group. Groups are returned in the canonical order defined
+    by ``_GROUP_ORDER``, with empty groups omitted.
+
+    Args:
+        stats (dict[str, str]): Mapping of stat labels to formatted values.
+
+    Returns:
+        OrderedDict[str, list[tuple[str, str]]]: Groups in display order, where
+        each value is a list of ``(label, formatted_value)`` tuples.
+    """
+    groups: dict[str, list[tuple[str, str]]] = {}
+    for k, v in stats.items():
+        group = _classify(k)
+        groups.setdefault(group, []).append((k, v))
+    # Return in canonical order, skipping empty groups
+    return OrderedDict((g, groups[g]) for g in _GROUP_ORDER if g in groups)
+
+
+def _in_notebook() -> bool:
+    """Detect if we're running inside a Jupyter/IPython notebook.
+
+    Returns:
+        bool: ``True`` if the current IPython shell is a ``ZMQInteractiveShell``
+        (i.e. a Jupyter kernel), ``False`` otherwise or if IPython is not
+        installed.
+    """
+    try:
+        from IPython import get_ipython
+
+        shell = get_ipython()
+        if shell is None:
+            return False
+        return shell.__class__.__name__ == "ZMQInteractiveShell"
+    except ImportError:
+        return False
+
+
+class LiveStatsDisplay:
+    """Live-updating stats display that works in both notebooks and terminals.
+
+    In Jupyter notebooks, renders a grouped HTML table that updates in-place.
+    Stats are automatically organized into logical groups (Throughput, TTFT,
+    TTLT, Tokens, Errors) based on their key names.
+
+    In terminals, prints a compact grouped multi-line block using ANSI escape
+    codes to overwrite previous output.
+
+    Args:
+        disabled (bool): If ``True``, all display calls are no-ops.
+
+    Example::
+
+        display = LiveStatsDisplay()
+        display.update({"rpm": "185.9", "p50_ttft": "0.312s", "fail": "0"})
+        display.update({"rpm": "190.2", "p50_ttft": "0.305s", "fail": "1"})
+        display.close()
+    """
+
+    def __init__(self, disabled: bool = False):
+        self._disabled = disabled
+        self._is_notebook = _in_notebook()
+        self._handle = None
+        self._last_line_count = 0
+
+    def update(self, stats: dict[str, str], extra_prefix: str = "") -> None:
+        """Refresh the display with new stats.
+
+        Args:
+            stats (dict[str, str]): Mapping of label to formatted value.
+            extra_prefix (str): Optional prefix text shown before the table
+                (e.g. ``"reqs=127"`` for time-bound runs).
+        """
+        if self._disabled or not stats:
+            return
+
+        if self._is_notebook:
+            self._update_notebook(stats, extra_prefix)
+        else:
+            self._update_terminal(stats, extra_prefix)
+
+    def _update_notebook(self, stats: dict[str, str], extra_prefix: str) -> None:
+        """Render stats as a grouped HTML table in a Jupyter notebook.
+
+        Groups stats into columns (Throughput, TTFT, TTLT, Tokens, Errors) and
+        renders them as an HTML ``<table>`` that updates in-place via
+        ``IPython.display``.
+
+        Args:
+            stats (dict[str, str]): Mapping of label to formatted value.
+            extra_prefix (str): Optional text shown above the table.
+        """
+        from IPython.display import HTML, display
+
+        groups = _group_stats(stats)
+
+        # Build one column per group: header on top, key=value rows below
+        # All columns rendered side-by-side in a single table row
+        max_rows = max(len(items) for items in groups.values())
+
+        col_htmls = []
+        for group_name, items in groups.items():
+            col = (
+                f"<th style='padding:2px 10px;font-size:11px;color:#888;"
+                f"border-bottom:1px solid #ddd;text-align:left'>"
+                f"{group_name}</th>"
+            )
+            rows = []
+            for k, v in items:
+                rows.append(
+                    f"<td style='padding:1px 10px;font-size:12px'>"
+                    f"<span style='color:#666'>{k}</span>"
+                    f"&nbsp;&nbsp;"
+                    f"<span style='font-family:monospace'>{v}</span>"
+                    f"</td>"
+                )
+            # Pad shorter columns
+            for _ in range(max_rows - len(items)):
+                rows.append("<td></td>")
+            col_htmls.append((col, rows))
+
+        # Assemble: header row, then data rows
+        header_row = "<tr>" + "".join(c[0] for c in col_htmls) + "</tr>"
+        data_rows = ""
+        for i in range(max_rows):
+            data_rows += "<tr>" + "".join(c[1][i] for c in col_htmls) + "</tr>"
+
+        prefix_html = (
+            f"<span style='font-size:12px;font-family:monospace;color:#555'>"
+            f"{extra_prefix}</span><br>"
+            if extra_prefix
+            else ""
+        )
+        html = (
+            f"{prefix_html}"
+            f"<table style='border-collapse:collapse;margin:4px 0'>"
+            f"{header_row}{data_rows}</table>"
+        )
+
+        if self._handle is None:
+            self._handle = display(HTML(html), display_id=True)
+        else:
+            self._handle.update(HTML(html))
+
+    def _update_terminal(self, stats: dict[str, str], extra_prefix: str) -> None:
+        """Render stats as grouped text lines in a terminal.
+
+        Uses ANSI escape codes to erase the previous output and overwrite it
+        with the updated stats, one line per group.
+
+        Args:
+            stats (dict[str, str]): Mapping of label to formatted value.
+            extra_prefix (str): Optional text shown on the first line.
+        """
+        # Erase previous output
+        if self._last_line_count > 0:
+            sys.stderr.write(f"\033[{self._last_line_count}A\033[J")
+
+        groups = _group_stats(stats)
+        lines = []
+        if extra_prefix:
+            lines.append(f"  {extra_prefix}")
+        for group_name, items in groups.items():
+            values = "  ".join(f"{k}={v}" for k, v in items)
+            lines.append(f"  {group_name}: {values}")
+
+        output = "\n".join(lines)
+        sys.stderr.write(output + "\n")
+        sys.stderr.flush()
+        self._last_line_count = len(lines)
+
+    def close(self) -> None:
+        """Clean up the display.
+
+        In terminal mode, erases the stats block using ANSI escape codes.
+        In notebook mode, the HTML output remains visible.
+        """
+        if self._disabled:
+            return
+        # In terminal, erase the stats block
+        if not self._is_notebook and self._last_line_count > 0:
+            sys.stderr.write(f"\033[{self._last_line_count}A\033[J")
+            sys.stderr.flush()
+            self._last_line_count = 0
diff --git a/llmeter/runner.py b/llmeter/runner.py
index 0604a32..0605a36 100644
--- a/llmeter/runner.py
+++ b/llmeter/runner.py
@@ -20,6 +20,7 @@
 from tqdm.auto import tqdm, trange
 from upath import UPath as Path
 
+from llmeter.live_display import LiveStatsDisplay
 from llmeter.utils import RunningStats, now_utc
 
 if TYPE_CHECKING:
@@ -56,6 +57,7 @@ class _RunConfig:
     tokenizer: Tokenizer | Any | None = None
     clients: int = 1
     n_requests: int | None = None
+    run_duration: int | float | None = None
     payload: dict | list[dict] | os.PathLike | str | None = None
     run_name: str | None = None
     run_description: str | None = None
@@ -74,6 +76,9 @@ def __post_init__(self, disable_client_progress_bar, disable_clients_progress_ba
         if self.n_requests is not None:
             assert self.n_requests > 0, "Number of requests must be a positive integer"
 
+        if self.run_duration is not None:
+            assert self.run_duration > 0, "Run duration must be a positive number"
+
         assert self.clients > 0, "Number of clients must be a positive integer"
 
         if self.run_name is not None:
@@ -181,16 +186,39 @@ def __post_init__(self, disable_client_progress_bar, disable_clients_progress_ba
         )
 
     def _validate_and_prepare_payload(self):
-        """Validate and prepare the payload for the test run and update n_requests
+        """Validate and prepare the payload for the test run.
+
+        Normalizes the payload into a list of dicts, validates that ``n_requests``
+        and ``run_duration`` are not both set, and sets ``_time_bound`` and
+        ``_n_requests`` accordingly.
+
+        For count-bound runs, ``_n_requests`` defaults to the number of payloads
+        when not explicitly provided. For time-bound runs, ``_n_requests`` is set
+        to 0 since the actual count is unknown upfront.
 
-        This method ensures that the payload is valid and prepared for the test run.
+        Raises:
+            AssertionError: If no payload is provided.
+            ValueError: If both ``n_requests`` and ``run_duration`` are set.
+            FileNotFoundError: If the payload path does not exist.
         """
         assert self.payload, "No payload provided"
         if isinstance(self.payload, (os.PathLike, str)):
             self.payload = list(load_payloads(self.payload))
         if isinstance(self.payload, dict):
             self.payload = [self.payload]
-        self._n_requests = self.n_requests or len(self.payload)
+
+        if self.run_duration is not None and self.n_requests is not None:
+            raise ValueError(
+                "Cannot set both n_requests and run_duration. "
+                "Use n_requests for request-bound runs or run_duration for time-bound runs."
+            )
+
+        self._time_bound = self.run_duration is not None
+        if self._time_bound:
+            # For time-bound runs, _n_requests is unknown upfront
+            self._n_requests = 0
+        else:
+            self._n_requests = self.n_requests or len(self.payload)
 
     @staticmethod
     async def _compute_time_per_output_token(response: InvocationResponse):
@@ -275,12 +303,16 @@ async def _process_results_from_q(self, output_path: Path | None = None):
                 self._responses.append(response)
                 self._running_stats.update(response.to_dict())
 
-            if self._progress_bar:
+            if self._progress_bar is not None and not self._time_bound:
                 self._progress_bar.update(1)
-                self._progress_bar.set_postfix(
-                    self._running_stats.snapshot(self.progress_bar_stats),
-                    refresh=False,
-                )
+
+            if self._stats_display is not None:
+                snapshot = self._running_stats.snapshot(self.progress_bar_stats)
+                if snapshot:
+                    prefix = (
+                        f"reqs={self._running_stats._count}" if self._time_bound else ""
+                    )
+                    self._stats_display.update(snapshot, extra_prefix=prefix)
 
             if output_path:
                 output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -295,23 +327,21 @@ def _invoke_n_no_wait(
         n: int | None = None,
         shuffle_order=True,
     ) -> list[InvocationResponse]:
-        """
-        Generate multiple invocations for the given payload.
+        """Generate *n* invocations synchronously for a single client.
 
-        This method generates `n` invocations for the given payload(s) by sending
-        requests to the endpoint in a loop. If a sequence of payloads is provided,
-        the payloads are cycled through until `n` invocations are generated. If a
-        single payload is provided, it is used for all `n` invocations.
+        Cycles through *payload* until *n* invocations are generated, sending
+        each request to the endpoint and pushing the response onto
+        ``self._queue`` for async token-counting and stats collection.
 
         Args:
-            payload: The input payload to generate invocations for.
-            n (int|None, optional): The number of invocations to generate.
+            payload (list[dict]): The input payloads to cycle through.
+            n (int | None, optional): The number of invocations to generate.
                 If not specified, every element in the payload is used once.
             shuffle_order (bool, optional): Whether to shuffle the order of payloads
                 before generating invocations. Defaults to True.
 
         Returns:
-            List[EndpointResponse]: A list of response objects.
+            list[InvocationResponse]: A list of response objects.
         """
 
         # ToDo: replace with an async method to prepare payloads, including possible callbacks,
@@ -324,17 +354,20 @@ def _invoke_n_no_wait(
         responses = []
         if n is None:
             n = len(payload)
-        for p, _ in zip(
-            cycle(payload),
-            trange(
-                n,
-                leave=False,
-                desc="Requests",
-                disable=_disable_tqdm or self._disable_per_client_progress_bar,
-            ),
-        ):
+        if not payload:
+            return responses
+        payload_iter = cycle(payload)
+        pbar = trange(
+            n,
+            leave=False,
+            desc="Requests",
+            disable=_disable_tqdm or self._disable_per_client_progress_bar,
+        )
+        for _ in pbar:
+            p = next(payload_iter)
             try:
                 p = asyncio.run(process_before_invoke_callbacks(self.callbacks, p))
+                self._running_stats.record_send()
                 response = self._endpoint.invoke(p)
 
             except Exception as e:
@@ -351,6 +384,56 @@ def _invoke_n_no_wait(
                 )
         return responses
 
+    def _invoke_for_duration(
+        self,
+        payload: list[dict],
+        duration: float,
+        shuffle_order=True,
+    ) -> list[InvocationResponse]:
+        """Generate invocations continuously until *duration* seconds have elapsed.
+
+        Cycles through *payload* indefinitely, stopping only when the wall-clock
+        time exceeds *duration*. Each completed request is pushed onto
+        ``self._queue`` for async token-counting and stats collection, mirroring
+        the behaviour of :meth:`_invoke_n_no_wait`.
+
+        Args:
+            payload (list[dict]): The input payloads to cycle through.
+            duration (float): Maximum wall-clock seconds to keep sending requests.
+            shuffle_order (bool, optional): Whether to shuffle the order of payloads
+                before generating invocations. Defaults to True.
+
+        Returns:
+            list[InvocationResponse]: All responses collected during the window.
+        """
+        if shuffle_order:
+            self._random_seed += random.randint(1, 1000)
+            random.seed(0)
+            payload = random.sample(payload, k=len(payload))
+
+        responses: list[InvocationResponse] = []
+        deadline = time.perf_counter() + duration
+        payload_iter = cycle(payload)
+
+        while time.perf_counter() < deadline:
+            p = next(payload_iter)
+            try:
+                p = asyncio.run(process_before_invoke_callbacks(self.callbacks, p))
+                self._running_stats.record_send()
+                response = self._endpoint.invoke(p)
+            except Exception as e:
+                logger.exception(f"Error with invocation with payload {p}: {e}")
+                response = InvocationResponse.error_output(
+                    id=uuid4().hex,
+                    error=str(e),
+                )
+            responses.append(response)
+            if self._queue:
+                self._queue._loop.call_soon_threadsafe(  # type: ignore
+                    self._queue.put_nowait, response
+                )
+        return responses
+
     async def _invoke_n(
         self,
         payload: list[dict],
@@ -358,17 +441,15 @@ async def _invoke_n(
         add_start_jitter=True,
         shuffle_order=True,
     ) -> list[InvocationResponse]:
-        """
-        Asynchronously generate multiple invocations for the given payload.
+        """Asynchronously generate *n* invocations for a single client.
 
-        This method generates `n` invocations for the given payload(s) by sending
-        requests to the endpoint asynchronously. If a sequence of payloads is provided,
-        the payloads are cycled through until `n` invocations are generated. If a
-        single payload is provided, it is used for all `n` invocations.
+        Wraps :meth:`_invoke_n_no_wait` in a thread with an overall timeout
+        of ``self.timeout * n`` seconds.
 
         Args:
-            payload (Dict[str, str] | Sequence[Dict[str, str]]): The input payload(s) to generate invocations for.
-            n (int | None, optional): The number of invocations to generate. Defaults to None.
+            payload (list[dict]): The input payload(s) to generate invocations for.
+            n (int | None, optional): The number of invocations to generate.
+                Defaults to None (one per payload element).
             add_start_jitter (bool, optional): Whether to add a random delay before
                 starting the invocations loop to avoid batch bunching when using
                 multiple clients. Defaults to True.
@@ -376,7 +457,8 @@ async def _invoke_n(
                 before generating invocations. Defaults to True.
 
         Returns:
-            List[EndpointResponse]: A list of response objects.
+            list[InvocationResponse]: A list of response objects. Returns an empty
+            list if the overall timeout is exceeded.
         """
 
         if add_start_jitter:
@@ -396,26 +478,65 @@ async def _invoke_n(
 
         return response
 
+    async def _invoke_duration(
+        self,
+        payload: list[dict],
+        add_start_jitter=True,
+        shuffle_order=True,
+    ) -> list[InvocationResponse]:
+        """Asynchronously generate invocations for a single client until duration expires.
+
+        Wraps :meth:`_invoke_for_duration` in a thread. The client sends requests
+        continuously for ``self.run_duration`` seconds.
+
+        Args:
+            payload (list[dict]): The input payload(s) to cycle through.
+            add_start_jitter (bool, optional): Whether to add a random delay before
+                starting the invocations loop to avoid batch bunching when using
+                multiple clients. Defaults to True.
+            shuffle_order (bool, optional): Whether to shuffle the order of payloads
+                before generating invocations. Defaults to True.
+
+        Returns:
+            list[InvocationResponse]: All responses collected during the time window.
+        """
+
+        if add_start_jitter:
+            await asyncio.sleep(random.random() * 0.01)
+
+        if shuffle_order:
+            self._random_seed = random.randint(0, 2**16 - 1)
+
+        return await asyncio.to_thread(
+            self._invoke_for_duration,
+            payload,
+            self.run_duration,
+            shuffle_order,
+        )
+
     async def _invoke_n_c(
         self,
         payload: list[dict],
         n_requests: int | None = None,
         clients: int = 1,
     ) -> tuple[float, float, float]:
-        """
-        Asynchronously generates multiple invocations for a given payload.
+        """Spawn *clients* concurrent count-bound invocation loops.
+
+        Each client generates *n_requests* invocations by delegating to
+        :meth:`_invoke_n`. All clients run concurrently and the method waits
+        for all of them to finish before signalling the token-counting queue
+        to stop.
 
         Args:
-            payload (dict): The input data for generating invocations.
-            queue (asyncio.Queue): The queue to store the generated responses.
-            n_requests (int | None, optional): The number of invocations to generate per connection. Defaults to None.
-            clients (int, optional): The number of concurrent connections to generate invocations. Defaults to 1.
+            payload (list[dict]): The input payloads to send.
+            n_requests (int | None, optional): The number of invocations to
+                generate per client. Defaults to None.
+            clients (int, optional): The number of concurrent client connections.
+                Defaults to 1.
 
         Returns:
-            None
-
-        Raises:
-            None
+            tuple[float, float, float]: A ``(total_test_time, start_t, end_t)``
+            tuple of ``time.perf_counter`` values.
         """
         logger.info(
             f"Generating {clients} connections with {n_requests} invocations each"
@@ -430,25 +551,86 @@ async def _invoke_n_c(
         end_t = time.perf_counter()
         total_test_time = end_t - start_t
         logger.info(
-            f"Generated {clients} connections with {n_requests} invocations each in {total_test_time * 1000:.2f} seconds"
+            f"Completed {clients} clients x {n_requests} requests in "
+            f"{total_test_time * 1000:.2f}ms"
+        )
+
+        if self._queue:
+            await self._queue.put(None)
+            logger.debug("Signaling token counting task to exit")
+        return total_test_time, start_t, end_t
+
+    async def _invoke_duration_c(
+        self,
+        payload: list[dict],
+        clients: int = 1,
+    ) -> tuple[float, float, float]:
+        """Spawn *clients* concurrent time-bound invocation loops.
+
+        Each client sends requests continuously for ``self.run_duration`` seconds
+        by delegating to :meth:`_invoke_duration`. All clients run concurrently
+        and the method waits for all of them to finish before signalling the
+        token-counting queue to stop.
+
+        Args:
+            payload (list[dict]): The input payloads to cycle through.
+            clients (int, optional): The number of concurrent client connections.
+                Defaults to 1.
+
+        Returns:
+            tuple[float, float, float]: A ``(total_test_time, start_t, end_t)``
+            tuple of ``time.perf_counter`` values.
+        """
+        logger.info(f"Generating {clients} connections for {self.run_duration}s each")
+        start_t = time.perf_counter()
+        await tqdm.gather(
+            *[self._invoke_duration(payload) for _ in range(clients)],
+            leave=False,
+            desc="Clients",
+            disable=_disable_tqdm or self._disable_clients_progress_bar,
+        )
+        end_t = time.perf_counter()
+        total_test_time = end_t - start_t
+        logger.info(
+            f"Completed {clients} clients x {self.run_duration}s in "
+            f"{total_test_time * 1000:.2f}ms"
         )
 
-        # Signal the token counting task to exit
         if self._queue:
             await self._queue.put(None)
             logger.debug("Signaling token counting task to exit")
         return total_test_time, start_t, end_t
 
+    async def _tick_time_bar(self):
+        """Advance ``_progress_bar`` every 0.5 s until ``run_duration`` is reached.
+
+        Designed to run as a concurrent task alongside the invocation loops so
+        the user sees a smooth time-based progress bar.
+        """
+        start = time.perf_counter()
+        duration = self.run_duration
+        prev = 0
+        while True:
+            await asyncio.sleep(0.5)
+            elapsed = time.perf_counter() - start
+            tick = min(int(elapsed), int(duration)) - prev
+            if tick > 0 and self._progress_bar is not None:
+                self._progress_bar.update(tick)
+                prev += tick
+            if elapsed >= duration:
+                break
+
     async def _run(self):
         """Run the test with the given configuration
 
         This method is expected to be called *exactly once* after the _Run object is created.
         Attempting to re-use a _Run object may result in undefined behavior.
         """
+        # For time-bound runs, total_requests is unknown upfront
         result = Result(
             responses=[],
             total_test_time=None,
-            total_requests=self._n_requests * self.clients,
+            total_requests=0 if self._time_bound else self._n_requests * self.clients,
             clients=self.clients,
             n_requests=self._n_requests,
             output_path=self.output_path,  # type: ignore
@@ -471,27 +653,64 @@ async def _run(self):
         loop.set_default_executor(ThreadPoolExecutor(max_workers=self.clients + 5))
         logger.info("Starting test")
         self._queue = asyncio.Queue()
-        self._progress_bar = tqdm(
-            total=self.clients * self._n_requests,
-            leave=False,
-            desc="Total requests",
-            disable=_disable_tqdm,
-        )
+
+        if self._time_bound:
+            # Time-bound: progress bar shows elapsed seconds
+            self._progress_bar = tqdm(
+                total=int(self.run_duration),
+                leave=False,
+                desc="Elapsed",
+                unit="s",
+                bar_format="{desc}: {bar}| {n:.0f}/{total:.0f}s [{elapsed}]",
+                disable=_disable_tqdm,
+            )
+        else:
+            # Count-bound: progress bar shows completed requests
+            self._progress_bar = tqdm(
+                total=self.clients * self._n_requests,
+                leave=False,
+                desc="Total requests",
+                disable=_disable_tqdm,
+            )
+
+        # Live stats display — renders as an HTML table in notebooks, multi-line in terminals
+        self._stats_display = LiveStatsDisplay(disabled=_disable_tqdm)
+
+        # Show the table layout immediately with placeholder values
+        initial_snapshot = self._running_stats.snapshot(self.progress_bar_stats)
+        prefix = "reqs=0" if self._time_bound else ""
+        self._stats_display.update(initial_snapshot, extra_prefix=prefix)
 
         try:
             run_start_time = now_utc()
-            _, (total_test_time, start_time, end_time) = await asyncio.gather(
-                self._process_results_from_q(
-                    output_path=Path(self.output_path) / "responses.jsonl"
-                    if self.output_path
-                    else None,
-                ),
-                self._invoke_n_c(
+            if self._time_bound:
+                invoke_coro = self._invoke_duration_c(
+                    payload=self.payload,  # type: ignore
+                    clients=self.clients,
+                )
+                _, (total_test_time, start_time, end_time), _ = await asyncio.gather(
+                    self._process_results_from_q(
+                        output_path=Path(self.output_path) / "responses.jsonl"
+                        if self.output_path
+                        else None,
+                    ),
+                    invoke_coro,
+                    self._tick_time_bar(),
+                )
+            else:
+                invoke_coro = self._invoke_n_c(
                     payload=self.payload,  # type: ignore
                     n_requests=self._n_requests,
                     clients=self.clients,
-                ),
-            )
+                )
+                _, (total_test_time, start_time, end_time) = await asyncio.gather(
+                    self._process_results_from_q(
+                        output_path=Path(self.output_path) / "responses.jsonl"
+                        if self.output_path
+                        else None,
+                    ),
+                    invoke_coro,
+                )
             run_end_time = now_utc()
 
         except asyncio.CancelledError:
@@ -501,12 +720,20 @@ async def _run(self):
             return result
 
         self._progress_bar.close()
+        if self._stats_display is not None:
+            self._stats_display.close()
         logger.info(f"Test completed in {total_test_time * 1000:.2f} seconds.")
 
+        actual_total = self._running_stats._count
+
         result = replace(
             result,
             responses=self._responses,
             total_test_time=total_test_time,
+            total_requests=actual_total,
+            n_requests=actual_total // max(self.clients, 1)
+            if self._time_bound
+            else self._n_requests,
             start_time=run_start_time,
             end_time=run_end_time,
         )
@@ -580,7 +807,11 @@ class Runner(_RunConfig):
             `DummyTokenizer` will be used if needed.
         clients (int): The number of concurrent clients to use for sending requests. Defaults to 1.
         n_requests (int | None): The number of LLM invocations to generate *per client*. By
-            default, each request in `payload` will be sent once by each client.
+            default, each request in `payload` will be sent once by each client.  Mutually
+            exclusive with ``run_duration``.
+        run_duration (int | float | None): Run each client for this many seconds instead of a
+            fixed request count.  Clients send requests continuously until the duration expires.
+            Mutually exclusive with ``n_requests``.  Defaults to ``None`` (count-bound mode).
         payload (dict | list[dict] | os.PathLike | str | None): The request data to send to the
             endpoint under test. You can provide a single JSON payload (dict), a list of payloads
             (list[dict]), or a path to one or more JSON/JSON-Lines files to be loaded by
@@ -647,6 +878,7 @@ async def run(
         tokenizer: Tokenizer | Any | None = None,
         clients: int | None = None,
         n_requests: int | None = None,
+        run_duration: int | float | None = None,
         payload: dict | list[dict] | os.PathLike | str | None = None,
         run_name: str | None = None,
         run_description: str | None = None,
@@ -677,6 +909,17 @@ async def run(
                 output token counts for endpoints that don't report exact information.
             clients (int): The number of concurrent clients to use for sending requests.
             n_requests (int | None): The number of LLM invocations to generate *per client*.
+                Mutually exclusive with ``run_duration``.
+            run_duration (int | float | None): Run each client for this many seconds
+                instead of a fixed request count.  Clients send requests continuously
+                until the duration expires.  Mutually exclusive with ``n_requests``.
+
+                Example::
+
+                    # Run for 60 seconds with 5 concurrent clients:
+                    result = await runner.run(run_duration=60, clients=5)
+                    result.total_requests  # actual count completed
+
             payload (dict | list[dict] | os.PathLike | str | None): The request data to send to the
                 endpoint under test. You can provide a single JSON payload (dict), a list of
                 payloads (list[dict]), or a path to one or more JSON/JSON-Lines files to be loaded
@@ -746,6 +989,7 @@ async def run(
             tokenizer=tokenizer,
             clients=clients,
             n_requests=n_requests,
+            run_duration=run_duration,
             payload=payload,
             run_name=run_name,
             run_description=run_description,
diff --git a/llmeter/utils.py b/llmeter/utils.py
index fd30d0f..c43d7d0 100644
--- a/llmeter/utils.py
+++ b/llmeter/utils.py
@@ -1,6 +1,7 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
 import bisect
+import time
 from datetime import datetime, timezone
 from itertools import filterfalse
 from math import isnan
@@ -113,7 +114,13 @@ class RunningStats:
     #: * ``(metric_name, aggregation, "inv")`` — same as above but displays the
     #:   reciprocal (e.g. seconds-per-token → tokens-per-second).
     #: * The literal string ``"failed"`` for the running failure count.
+    #: * The literal string ``"rpm"`` for live requests-per-minute based on the
+    #:   send window (first request sent to last request sent).
+    #: * The literal string ``"output_tps"`` for aggregate output tokens per second
+    #:   across all clients, based on the send window.
     DEFAULT_SNAPSHOT_STATS: dict[str, tuple[str, ...] | str] = {
+        "rpm": "rpm",
+        "output_tps": "output_tps",
         "p50_ttft": ("time_to_first_token", "p50"),
         "p90_ttft": ("time_to_first_token", "p90"),
         "p50_ttlt": ("time_to_last_token", "p50"),
@@ -128,9 +135,29 @@ def __init__(self, metrics: Sequence[str]):
         self._metrics = list(metrics)
         self._count = 0
         self._failed = 0
+        self._sends = 0
+        self._first_send_time: float | None = None
+        self._last_send_time: float | None = None
         self._sums: dict[str, float] = {m: 0.0 for m in metrics}
         self._values: dict[str, list[float]] = {m: [] for m in metrics}
 
+    def record_send(self) -> None:
+        """Record that a request was dispatched to the endpoint.
+
+        Call this from the invocation loop each time a request is sent, *before*
+        waiting for the response. This tracks the send-side time window used for
+        accurate RPM and throughput calculations.
+
+        The send window (``_first_send_time`` to ``_last_send_time``) excludes
+        the tail latency of the final response, giving a more accurate picture
+        of the request dispatch rate.
+        """
+        now = time.perf_counter()
+        self._sends += 1
+        if self._first_send_time is None:
+            self._first_send_time = now
+        self._last_send_time = now
+
     def update(self, response_dict: dict[str, Any]) -> None:
         """Record one response's metric values.
 
@@ -257,6 +284,10 @@ def snapshot(
                   the value is inverted before display (e.g. seconds-per-token →
                   tokens-per-second).
                 * ``"failed"`` — the literal string; shows the running failure count.
+                * ``"rpm"`` — the literal string; shows live requests-per-minute
+                  estimate based on the send window (first to last request sent).
+                * ``"output_tps"`` — the literal string; shows aggregate output
+                  tokens per second across all clients, based on the send window.
 
                 Defaults to :attr:`DEFAULT_SNAPSHOT_STATS` when ``None``.
 
@@ -284,7 +315,9 @@ def snapshot(
             # {'tps': '28.3 tok/s'}
         """
         if self._count == 0:
-            return {}
+            if fields is None:
+                fields = self.DEFAULT_SNAPSHOT_STATS
+            return {label: "—" for label in fields}
 
         if fields is None:
             fields = self.DEFAULT_SNAPSHOT_STATS
@@ -297,6 +330,27 @@ def snapshot(
                 info[label] = str(self._failed)
                 continue
 
+            if spec == "rpm":
+                if (
+                    self._first_send_time is not None
+                    and self._last_send_time is not None
+                    and self._last_send_time > self._first_send_time
+                ):
+                    send_window = self._last_send_time - self._first_send_time
+                    info[label] = f"{self._count / send_window * 60:.1f}"
+                continue
+
+            if spec == "output_tps":
+                if (
+                    self._first_send_time is not None
+                    and self._last_send_time is not None
+                    and self._last_send_time > self._first_send_time
+                ):
+                    send_window = self._last_send_time - self._first_send_time
+                    total_out = self._sums.get("num_tokens_output", 0)
+                    info[label] = f"{total_out / send_window:.1f} tok/s"
+                continue
+
             metric = spec[0]
             agg = spec[1]
             invert = len(spec) > 2 and spec[2] == "inv"
diff --git a/tests/unit/test_experiments.py b/tests/unit/test_experiments.py
index 13fbd1f..42b0fb0 100644
--- a/tests/unit/test_experiments.py
+++ b/tests/unit/test_experiments.py
@@ -531,3 +531,122 @@ def test_get_n_requests_parametrized(
     )
     result = runner._get_n_requests(clients)
     assert result == expected, f"Expected {expected}, but got {result}"
+
+
+# ── LoadTest with run_duration, low_memory, progress_bar_stats ───────────────
+
+
+class TestLoadTestTimeBound:
+    def test_load_test_with_run_duration(self, mock_endpoint):
+        """run_duration should be stored on the LoadTest instance."""
+        lt = LoadTest(
+            endpoint=mock_endpoint,
+            payload={"input": "test"},
+            sequence_of_clients=[1, 2],
+            run_duration=30,
+        )
+        assert lt.run_duration == 30
+
+    def test_load_test_with_low_memory(self, mock_endpoint):
+        """low_memory should be stored on the LoadTest instance."""
+        lt = LoadTest(
+            endpoint=mock_endpoint,
+            payload={"input": "test"},
+            sequence_of_clients=[1],
+            low_memory=True,
+        )
+        assert lt.low_memory is True
+
+    def test_load_test_with_progress_bar_stats(self, mock_endpoint):
+        """progress_bar_stats should be stored on the LoadTest instance."""
+        custom_stats = {"rpm": "rpm", "fail": "failed"}
+        lt = LoadTest(
+            endpoint=mock_endpoint,
+            payload={"input": "test"},
+            sequence_of_clients=[1],
+            progress_bar_stats=custom_stats,
+        )
+        assert lt.progress_bar_stats == custom_stats
+
+    @pytest.mark.asyncio
+    async def test_run_duration_passed_to_runner(self, mock_endpoint):
+        """When run_duration is set, runner.run() should receive it."""
+        mock_runner_instance = AsyncMock(spec=Runner)
+        mock_runner_instance.run.return_value = MagicMock(
+            spec=Result, clients=1, total_requests=10
+        )
+
+        with patch("llmeter.experiments.Runner", return_value=mock_runner_instance):
+            lt = LoadTest(
+                endpoint=mock_endpoint,
+                payload={"input": "test"},
+                sequence_of_clients=[1, 3],
+                run_duration=15,
+            )
+            await lt.run()
+
+        # Check that run_duration was passed in each call
+        for call in mock_runner_instance.run.call_args_list:
+            assert call.kwargs["run_duration"] == 15
+            assert "n_requests" not in call.kwargs
+
+    @pytest.mark.asyncio
+    async def test_count_bound_does_not_pass_run_duration(self, mock_endpoint):
+        """When run_duration is None, runner.run() should receive n_requests."""
+        mock_runner_instance = AsyncMock(spec=Runner)
+        mock_runner_instance.run.return_value = MagicMock(
+            spec=Result, clients=1, total_requests=10
+        )
+
+        with patch("llmeter.experiments.Runner", return_value=mock_runner_instance):
+            lt = LoadTest(
+                endpoint=mock_endpoint,
+                payload={"input": "test"},
+                sequence_of_clients=[1],
+            )
+            await lt.run()
+
+        call_kwargs = mock_runner_instance.run.call_args_list[0].kwargs
+        assert "n_requests" in call_kwargs
+        assert "run_duration" not in call_kwargs
+
+    @pytest.mark.asyncio
+    async def test_low_memory_passed_to_runner(self, mock_endpoint):
+        """low_memory should be forwarded to each runner.run() call."""
+        mock_runner_instance = AsyncMock(spec=Runner)
+        mock_runner_instance.run.return_value = MagicMock(
+            spec=Result, clients=1, total_requests=10
+        )
+
+        with patch("llmeter.experiments.Runner", return_value=mock_runner_instance):
+            lt = LoadTest(
+                endpoint=mock_endpoint,
+                payload={"input": "test"},
+                sequence_of_clients=[1, 2],
+                low_memory=True,
+            )
+            await lt.run()
+
+        for call in mock_runner_instance.run.call_args_list:
+            assert call.kwargs["low_memory"] is True
+
+    @pytest.mark.asyncio
+    async def test_progress_bar_stats_passed_to_runner(self, mock_endpoint):
+        """progress_bar_stats should be forwarded to each runner.run() call."""
+        custom_stats = {"rpm": "rpm"}
+        mock_runner_instance = AsyncMock(spec=Runner)
+        mock_runner_instance.run.return_value = MagicMock(
+            spec=Result, clients=1, total_requests=10
+        )
+
+        with patch("llmeter.experiments.Runner", return_value=mock_runner_instance):
+            lt = LoadTest(
+                endpoint=mock_endpoint,
+                payload={"input": "test"},
+                sequence_of_clients=[1],
+                progress_bar_stats=custom_stats,
+            )
+            await lt.run()
+
+        call_kwargs = mock_runner_instance.run.call_args_list[0].kwargs
+        assert call_kwargs["progress_bar_stats"] == custom_stats
diff --git a/tests/unit/test_live_display.py b/tests/unit/test_live_display.py
new file mode 100644
index 0000000..7057eaf
--- /dev/null
+++ b/tests/unit/test_live_display.py
@@ -0,0 +1,154 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from collections import OrderedDict
+from unittest.mock import patch
+
+from llmeter.live_display import (
+    LiveStatsDisplay,
+    _classify,
+    _group_stats,
+    _in_notebook,
+)
+
+
+# ── _classify ────────────────────────────────────────────────────────────────
+
+
+class TestClassify:
+    def test_rpm_goes_to_throughput(self):
+        assert _classify("rpm") == "Throughput"
+
+    def test_tps_goes_to_throughput(self):
+        assert _classify("p50_tps") == "Throughput"
+        assert _classify("output_tps") == "Throughput"
+
+    def test_ttft_goes_to_ttft(self):
+        assert _classify("p50_ttft") == "TTFT"
+        assert _classify("p90_ttft") == "TTFT"
+
+    def test_ttlt_goes_to_ttlt(self):
+        assert _classify("p50_ttlt") == "TTLT"
+        assert _classify("p90_ttlt") == "TTLT"
+
+    def test_token_goes_to_tokens(self):
+        assert _classify("input_tokens") == "Tokens"
+        assert _classify("output_tokens") == "Tokens"
+
+    def test_fail_goes_to_errors(self):
+        assert _classify("fail") == "Errors"
+
+    def test_unknown_goes_to_other(self):
+        assert _classify("custom_metric") == "Other"
+
+    def test_case_insensitive(self):
+        assert _classify("RPM") == "Throughput"
+        assert _classify("P50_TTFT") == "TTFT"
+
+
+# ── _group_stats ─────────────────────────────────────────────────────────────
+
+
+class TestGroupStats:
+    def test_groups_by_category(self):
+        stats = {
+            "rpm": "185.9",
+            "p50_ttft": "0.312s",
+            "p90_ttlt": "1.203s",
+            "input_tokens": "12540",
+            "fail": "0",
+        }
+        groups = _group_stats(stats)
+        assert "Throughput" in groups
+        assert "TTFT" in groups
+        assert "TTLT" in groups
+        assert "Tokens" in groups
+        assert "Errors" in groups
+
+    def test_preserves_order(self):
+        stats = OrderedDict(
+            [
+                ("rpm", "185.9"),
+                ("p50_ttft", "0.312s"),
+                ("p50_ttlt", "0.847s"),
+                ("fail", "0"),
+            ]
+        )
+        groups = _group_stats(stats)
+        group_names = list(groups.keys())
+        assert group_names == ["Throughput", "TTFT", "TTLT", "Errors"]
+
+    def test_unknown_keys_go_to_other(self):
+        stats = {"custom": "42"}
+        groups = _group_stats(stats)
+        assert "Other" in groups
+        assert groups["Other"] == [("custom", "42")]
+
+    def test_empty_stats(self):
+        groups = _group_stats({})
+        assert len(groups) == 0
+
+
+# ── _in_notebook ─────────────────────────────────────────────────────────────
+
+
+class TestInNotebook:
+    def test_returns_false_outside_notebook(self):
+        assert _in_notebook() is False
+
+    def test_returns_false_for_terminal_ipython(self):
+        mock_shell = type("TerminalInteractiveShell", (), {})()
+        with patch("IPython.get_ipython", return_value=mock_shell):
+            assert _in_notebook() is False
+
+    def test_returns_true_for_zmq_shell(self):
+        mock_shell = type("ZMQInteractiveShell", (), {})()
+        with patch("IPython.get_ipython", return_value=mock_shell):
+            assert _in_notebook() is True
+
+    def test_returns_false_for_none(self):
+        with patch("IPython.get_ipython", return_value=None):
+            assert _in_notebook() is False
+
+
+# ── LiveStatsDisplay ─────────────────────────────────────────────────────────
+
+
+class TestLiveStatsDisplay:
+    def test_disabled_does_nothing(self):
+        display = LiveStatsDisplay(disabled=True)
+        # Should not raise
+        display.update({"rpm": "100"})
+        display.close()
+
+    def test_update_empty_stats_does_nothing(self):
+        display = LiveStatsDisplay(disabled=False)
+        display.update({})
+        assert display._handle is None
+        assert display._last_line_count == 0
+
+    def test_terminal_output(self, capsys):
+        display = LiveStatsDisplay(disabled=False)
+        display._is_notebook = False
+        display.update({"rpm": "100", "fail": "0"})
+        # Should have written to stderr
+        assert display._last_line_count > 0
+        display.close()
+        assert display._last_line_count == 0
+
+    def test_terminal_with_prefix(self, capsys):
+        display = LiveStatsDisplay(disabled=False)
+        display._is_notebook = False
+        display.update({"rpm": "100"}, extra_prefix="reqs=42")
+        assert display._last_line_count >= 2  # prefix line + stats line
+        display.close()
+
+    def test_terminal_overwrites_previous(self):
+        display = LiveStatsDisplay(disabled=False)
+        display._is_notebook = False
+        display.update({"rpm": "100"})
+        first_count = display._last_line_count
+        display.update({"rpm": "200"})
+        # Should still be same number of lines (overwritten)
+        assert display._last_line_count == first_count
+        display.close()
diff --git a/tests/unit/test_runner.py b/tests/unit/test_runner.py
index 4150dee..8953e80 100644
--- a/tests/unit/test_runner.py
+++ b/tests/unit/test_runner.py
@@ -997,3 +997,208 @@ async def test_count_tokens_from_q_with_custom_output_path(run: _Run, tmp_path:
 
 
 # Add more tests for edge cases and other methods as needed
+
+
+# ── Time-bound (run_duration) tests ──────────────────────────────────────────
+
+
+def test_run_duration_and_n_requests_mutually_exclusive(
+    mock_endpoint: Endpoint, mock_tokenizer: MagicMock
+):
+    """Setting both n_requests and run_duration should raise ValueError."""
+    with pytest.raises(ValueError, match="Cannot set both"):
+        _Run(
+            endpoint=mock_endpoint,
+            tokenizer=mock_tokenizer,
+            payload=[{"prompt": "test"}],
+            n_requests=10,
+            run_duration=5,
+            clients=1,
+            run_name="test_run",
+        )
+
+
+def test_run_duration_sets_time_bound_flag(
+    mock_endpoint: Endpoint, mock_tokenizer: MagicMock
+):
+    """When run_duration is set, _time_bound should be True and _n_requests 0."""
+    run = _Run(
+        endpoint=mock_endpoint,
+        tokenizer=mock_tokenizer,
+        payload=[{"prompt": "test"}],
+        run_duration=5,
+        clients=1,
+        run_name="test_run",
+    )
+    assert run._time_bound is True
+    assert run._n_requests == 0
+
+
+def test_n_requests_sets_count_bound(
+    mock_endpoint: Endpoint, mock_tokenizer: MagicMock
+):
+    """When n_requests is set (no run_duration), _time_bound should be False."""
+    run = _Run(
+        endpoint=mock_endpoint,
+        tokenizer=mock_tokenizer,
+        payload=[{"prompt": "test"}],
+        n_requests=10,
+        clients=1,
+        run_name="test_run",
+    )
+    assert run._time_bound is False
+    assert run._n_requests == 10
+
+
+def test_run_duration_must_be_positive(
+    mock_endpoint: Endpoint, mock_tokenizer: MagicMock
+):
+    """run_duration must be > 0."""
+    with pytest.raises(AssertionError, match="positive"):
+        _Run(
+            endpoint=mock_endpoint,
+            tokenizer=mock_tokenizer,
+            payload=[{"prompt": "test"}],
+            run_duration=-1,
+            clients=1,
+            run_name="test_run",
+        )
+
+
+def test_invoke_for_duration_respects_deadline(
+    mock_endpoint: Endpoint, mock_tokenizer: MagicMock
+):
+    """_invoke_for_duration should stop after the specified duration."""
+    run = _Run(
+        endpoint=mock_endpoint,
+        tokenizer=mock_tokenizer,
+        payload=[{"prompt": "test"}],
+        run_duration=0.5,
+        clients=1,
+        run_name="test_run",
+    )
+    run.callbacks = None
+    run._queue = MagicMock()
+    run._queue._loop.call_soon_threadsafe = MagicMock()
+
+    # Make invoke take ~100ms so we get a handful of requests
+    def slow_invoke(payload):
+        time.sleep(0.1)
+        return InvocationResponse(id="1", input_prompt="test", response_text="response")
+
+    run._endpoint.invoke.side_effect = slow_invoke
+
+    start = time.perf_counter()
+    responses = run._invoke_for_duration(payload=[{"prompt": "test"}], duration=0.5)
+    elapsed = time.perf_counter() - start
+
+    assert len(responses) > 0
+    assert elapsed < 1.0  # Should not overshoot by much
+    assert all(isinstance(r, InvocationResponse) for r in responses)
+
+
+def test_invoke_for_duration_cycles_payloads(
+    mock_endpoint: Endpoint, mock_tokenizer: MagicMock
+):
+    """_invoke_for_duration should cycle through payloads."""
+    run = _Run(
+        endpoint=mock_endpoint,
+        tokenizer=mock_tokenizer,
+        payload=[{"prompt": "a"}, {"prompt": "b"}],
+        run_duration=0.3,
+        clients=1,
+        run_name="test_run",
+    )
+    run.callbacks = None
+    run._queue = MagicMock()
+    run._queue._loop.call_soon_threadsafe = MagicMock()
+
+    payloads_seen = []
+
+    def tracking_invoke(payload):
+        payloads_seen.append(payload)
+        return InvocationResponse(id="1", input_prompt=str(payload), response_text="ok")
+
+    run._endpoint.invoke.side_effect = tracking_invoke
+
+    responses = run._invoke_for_duration(
+        payload=[{"prompt": "a"}, {"prompt": "b"}],
+        duration=0.3,
+        shuffle_order=False,
+    )
+
+    assert len(responses) >= 2
+    # Should see both payloads used (cycling)
+    prompts = [p.get("prompt") for p in payloads_seen]
+    assert "a" in prompts
+    assert "b" in prompts
+
+
+@pytest.mark.asyncio
+async def test_run_with_duration(runner: Runner):
+    """Full run() with run_duration should complete and report actual counts."""
+    result = await runner.run(
+        payload={"prompt": "test"},
+        run_duration=0.3,
+        clients=1,
+    )
+
+    assert result.total_requests > 0
+    assert result.n_requests > 0
+    assert result.total_test_time is not None
+    assert result.total_test_time > 0
+    assert result.stats["total_requests"] == result.total_requests
+
+
+@pytest.mark.asyncio
+async def test_run_with_duration_multiple_clients(runner: Runner):
+    """Time-bound run with multiple clients should aggregate counts."""
+    result = await runner.run(
+        payload={"prompt": "test"},
+        run_duration=0.3,
+        clients=3,
+    )
+
+    assert result.total_requests > 0
+    assert result.clients == 3
+    assert result.total_test_time is not None
+
+
+@pytest.mark.asyncio
+async def test_run_with_duration_and_output_path(runner: Runner, tmp_path: Path):
+    """Time-bound run with output_path should save results to disk."""
+    result = await runner.run(
+        payload={"prompt": "test"},
+        run_duration=0.3,
+        clients=1,
+        output_path=tmp_path / "duration_run",
+        run_name="dur_test",
+    )
+
+    assert result.output_path is not None
+    assert (tmp_path / "duration_run" / "responses.jsonl").exists()
+    assert (tmp_path / "duration_run" / "summary.json").exists()
+    assert (tmp_path / "duration_run" / "stats.json").exists()
+
+
+def test_prepare_run_with_duration(runner: Runner):
+    """_prepare_run should pass run_duration through to _Run."""
+    run = runner._prepare_run(
+        payload={"prompt": "test"},
+        run_duration=30,
+        clients=2,
+    )
+    assert run._time_bound is True
+    assert run.run_duration == 30
+    assert run._n_requests == 0
+
+
+def test_prepare_run_duration_and_n_requests_conflict(runner: Runner):
+    """_prepare_run should raise when both are set."""
+    with pytest.raises(ValueError, match="Cannot set both"):
+        runner._prepare_run(
+            payload={"prompt": "test"},
+            n_requests=10,
+            run_duration=30,
+            clients=2,
+        )
diff --git a/tests/unit/test_running_stats.py b/tests/unit/test_running_stats.py
new file mode 100644
index 0000000..2f04d78
--- /dev/null
+++ b/tests/unit/test_running_stats.py
@@ -0,0 +1,220 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import time
+
+import pytest
+
+from llmeter.utils import RunningStats
+
+
+@pytest.fixture
+def rs():
+    return RunningStats(
+        metrics=[
+            "time_to_first_token",
+            "time_to_last_token",
+            "time_per_output_token",
+            "num_tokens_input",
+            "num_tokens_output",
+        ]
+    )
+
+
+@pytest.fixture
+def populated_rs(rs):
+    """A RunningStats with 3 responses recorded."""
+    responses = [
+        {
+            "time_to_first_token": 0.3,
+            "time_to_last_token": 0.8,
+            "time_per_output_token": 0.02,
+            "num_tokens_input": 100,
+            "num_tokens_output": 25,
+            "error": None,
+        },
+        {
+            "time_to_first_token": 0.5,
+            "time_to_last_token": 1.2,
+            "time_per_output_token": 0.03,
+            "num_tokens_input": 120,
+            "num_tokens_output": 30,
+            "error": None,
+        },
+        {
+            "time_to_first_token": 0.4,
+            "time_to_last_token": 1.0,
+            "time_per_output_token": 0.025,
+            "num_tokens_input": 110,
+            "num_tokens_output": 28,
+            "error": "timeout",
+        },
+    ]
+    for r in responses:
+        rs.record_send()
+        rs.update(r)
+    return rs
+
+
+# ── record_send ──────────────────────────────────────────────────────────────
+
+
+class TestRecordSend:
+    def test_first_send_sets_first_time(self, rs):
+        assert rs._first_send_time is None
+        rs.record_send()
+        assert rs._first_send_time is not None
+        assert rs._last_send_time is not None
+        assert rs._sends == 1
+
+    def test_subsequent_sends_update_last_time(self, rs):
+        rs.record_send()
+        first = rs._first_send_time
+        time.sleep(0.01)
+        rs.record_send()
+        assert rs._first_send_time == first
+        assert rs._last_send_time > first
+        assert rs._sends == 2
+
+    def test_send_count_increments(self, rs):
+        for _ in range(5):
+            rs.record_send()
+        assert rs._sends == 5
+
+
+# ── update ───────────────────────────────────────────────────────────────────
+
+
+class TestUpdate:
+    def test_count_increments(self, rs):
+        rs.update({"time_to_first_token": 0.3, "error": None})
+        assert rs._count == 1
+        rs.update({"time_to_first_token": 0.5, "error": None})
+        assert rs._count == 2
+
+    def test_failed_count(self, rs):
+        rs.update({"error": "timeout"})
+        rs.update({"error": None})
+        rs.update({"error": "connection refused"})
+        assert rs._failed == 2
+
+    def test_none_values_skipped(self, rs):
+        rs.update({"time_to_first_token": None, "error": None})
+        assert len(rs._values["time_to_first_token"]) == 0
+
+    def test_nan_values_skipped(self, rs):
+        rs.update({"time_to_first_token": float("nan"), "error": None})
+        assert len(rs._values["time_to_first_token"]) == 0
+
+    def test_sums_accumulated(self, rs):
+        rs.update({"num_tokens_output": 10, "error": None})
+        rs.update({"num_tokens_output": 20, "error": None})
+        assert rs._sums["num_tokens_output"] == 30
+
+    def test_values_sorted(self, rs):
+        rs.update({"time_to_first_token": 0.5, "error": None})
+        rs.update({"time_to_first_token": 0.1, "error": None})
+        rs.update({"time_to_first_token": 0.3, "error": None})
+        assert rs._values["time_to_first_token"] == [0.1, 0.3, 0.5]
+
+
+# ── to_stats ─────────────────────────────────────────────────────────────────
+
+
+class TestToStats:
+    def test_basic_stats(self, populated_rs):
+        stats = populated_rs.to_stats()
+        assert stats["failed_requests"] == 1
+        assert "time_to_first_token-p50" in stats
+        assert "time_to_last_token-average" in stats
+        assert "num_tokens_output-p90" in stats
+
+    def test_with_run_context(self, populated_rs):
+        stats = populated_rs.to_stats(
+            total_requests=3,
+            total_test_time=10.0,
+            result_dict={"model_id": "test"},
+        )
+        assert stats["model_id"] == "test"
+        assert stats["requests_per_minute"] == pytest.approx(18.0)
+        assert stats["failed_requests_rate"] == pytest.approx(1 / 3)
+        assert stats["total_output_tokens"] == 83
+
+    def test_without_run_context(self, populated_rs):
+        stats = populated_rs.to_stats()
+        assert stats["failed_requests"] == 1
+        assert stats["total_input_tokens"] == 330
+        assert stats["total_output_tokens"] == 83
+
+    def test_empty_stats(self, rs):
+        stats = rs.to_stats()
+        assert stats["failed_requests"] == 0
+        assert stats["total_input_tokens"] == 0
+
+
+# ── snapshot ─────────────────────────────────────────────────────────────────
+
+
+class TestSnapshot:
+    def test_placeholder_when_empty(self, rs):
+        result = rs.snapshot()
+        assert all(v == "—" for v in result.values())
+        # Should have all default keys
+        assert "rpm" in result
+        assert "p50_ttft" in result
+        assert "fail" in result
+        assert "output_tps" in result
+
+    def test_placeholder_with_custom_fields(self, rs):
+        fields = {"my_rpm": "rpm", "my_fail": "failed"}
+        result = rs.snapshot(fields)
+        assert result == {"my_rpm": "—", "my_fail": "—"}
+
+    def test_failed_count(self, populated_rs):
+        result = populated_rs.snapshot({"fail": "failed"})
+        assert result["fail"] == "1"
+
+    def test_rpm_uses_send_window(self, rs):
+        rs._first_send_time = 100.0
+        rs._last_send_time = 110.0  # 10 second window
+        rs.update({"error": None})
+        rs.update({"error": None})
+        rs.update({"error": None})
+        result = rs.snapshot({"rpm": "rpm"})
+        # 3 responses / 10 seconds * 60 = 18.0 rpm
+        assert result["rpm"] == "18.0"
+
+    def test_rpm_not_shown_with_single_send(self, rs):
+        """With only one send, first == last, no window to compute RPM."""
+        rs._first_send_time = 100.0
+        rs._last_send_time = 100.0
+        rs.update({"error": None})
+        result = rs.snapshot({"rpm": "rpm"})
+        assert "rpm" not in result
+
+    def test_output_tps_uses_send_window(self, rs):
+        rs._first_send_time = 100.0
+        rs._last_send_time = 110.0  # 10 second window
+        rs.update({"num_tokens_output": 500, "error": None})
+        rs.update({"num_tokens_output": 300, "error": None})
+        result = rs.snapshot({"tps": "output_tps"})
+        # 800 tokens / 10 seconds = 80.0 tok/s
+        assert result["tps"] == "80.0 tok/s"
+
+    def test_sum_aggregation(self, populated_rs):
+        result = populated_rs.snapshot({"out": ("num_tokens_output", "sum")})
+        assert result["out"] == "83"
+
+    def test_percentile_aggregation(self, populated_rs):
+        result = populated_rs.snapshot({"p50": ("time_to_first_token", "p50")})
+        assert "p50" in result
+        assert result["p50"].endswith("s")
+
+    def test_inverse_aggregation(self, populated_rs):
+        result = populated_rs.snapshot({"tps": ("time_per_output_token", "p50", "inv")})
+        assert "tps" in result
+        assert "tok/s" in result["tps"]
+
+    def test_empty_fields_returns_empty(self, populated_rs):
+        result = populated_rs.snapshot({})
+        assert result == {}

From 46d145442e532ee9619ab1820024a4bf16ccabbe Mon Sep 17 00:00:00 2001
From: Alessandro Cere <alecere@amazon.com>
Date: Wed, 8 Apr 2026 16:25:20 +0800
Subject: [PATCH 3/3] refactor: address PR #58 review comments

Consolidate live display config (review comment 1):
- Merge _GROUP_PATTERNS + _GROUP_ORDER into single DEFAULT_GROUPS tuple
- Make groups a constructor parameter on LiveStatsDisplay

Move display aliases from RunningStats to LiveStatsDisplay (comment 4):
- Remove RunningStats.snapshot() and DEFAULT_SNAPSHOT_STATS
- Add rpm/output_tps as regular keys in RunningStats.to_stats()
- Add LiveStatsDisplay.format_stats() owning alias mapping + formatting
- New DEFAULT_DISPLAY_STATS in live_display.py maps display labels to
  canonical stat keys (e.g. "time_to_first_token-p50")
- Runner passes raw to_stats() output; display handles the rest

Cache fallback stats computation (comment 2):
- Result.stats property caches _compute_stats back to _preloaded_stats

Preserve contributed stats on load (comment 3):
- Result.load(load_responses=True) merges extra keys from stats.json
  so callback-contributed stats survive save/load round-trips

Make Result fields optional (comment 5):
- total_requests, clients, n_requests now optional to match _RunConfig

Accept timedelta for run_duration (comment 6):
- run_duration accepts int | float | timedelta; normalized in __post_init__

Remove _n_requests indirection (comment 7):
- Eliminated private _n_requests; n_requests set directly to resolved value

Consolidate invoke methods (comment 8):
- Merged 6 methods into 3: _invoke_n_no_wait (n + duration),
  _invoke_client (replaces _invoke_n/_invoke_duration),
  _invoke_clients (replaces _invoke_n_c/_invoke_duration_c)

Tests:
- Add TestContributedStatsRoundTrip (8 tests) for save/load round-trips
- Add TestSendWindowStats for rpm/output_tps in to_stats()
- Add TestFormatStat for display formatting
- Update all tests for renamed methods and new APIs
---
 docs/user_guide/run_experiments.md |   2 +-
 llmeter/experiments.py             |   8 +-
 llmeter/live_display.py            | 206 +++++++++++++----
 llmeter/results.py                 |  23 +-
 llmeter/runner.py                  | 342 ++++++++++++-----------------
 llmeter/utils.py                   | 148 ++-----------
 tests/unit/test_experiments.py     |   2 +-
 tests/unit/test_live_display.py    | 107 +++++++--
 tests/unit/test_results.py         | 186 ++++++++++++++++
 tests/unit/test_runner.py          |  64 +++---
 tests/unit/test_running_stats.py   |  74 +++----
 11 files changed, 690 insertions(+), 472 deletions(-)

diff --git a/docs/user_guide/run_experiments.md b/docs/user_guide/run_experiments.md
index 87d6819..a0e63dd 100644
--- a/docs/user_guide/run_experiments.md
+++ b/docs/user_guide/run_experiments.md
@@ -75,7 +75,7 @@ results = await endpoint_test.run(
 )
 ```
 
-Pass `progress_bar_stats={}` to disable live stats entirely. See [`RunningStats.DEFAULT_SNAPSHOT_STATS`](../reference/utils.md#llmeter.utils.RunningStats) for the full default configuration.
+Pass `progress_bar_stats={}` to disable live stats entirely. See [`DEFAULT_DISPLAY_STATS`](../reference/live_display.md) for the full default configuration.
 
 ### Low-memory mode
 
diff --git a/llmeter/experiments.py b/llmeter/experiments.py
index 55eece0..ac1a651 100644
--- a/llmeter/experiments.py
+++ b/llmeter/experiments.py
@@ -9,7 +9,7 @@
 import logging
 import os
 from dataclasses import dataclass, field
-from datetime import datetime
+from datetime import datetime, timedelta
 from math import ceil
 from typing import Callable, Literal
 
@@ -140,7 +140,7 @@ class LoadTest:
         low_memory (bool): When ``True``, responses are written to disk but not kept in
             memory. Requires ``output_path``. Defaults to ``False``.
         progress_bar_stats (dict | None): Controls which live stats appear on the progress
-            bar. See ``RunningStats.DEFAULT_SNAPSHOT_STATS`` for the default.
+            bar. See ``DEFAULT_DISPLAY_STATS`` in ``llmeter.live_display`` for the default.
         output_path (os.PathLike | str | None): Where to save results.
         tokenizer (Tokenizer | None): Optional tokenizer for token counting.
         test_name (str | None): Name for this test. Defaults to current date/time.
@@ -186,9 +186,9 @@ class LoadTest:
     sequence_of_clients: list[int]
     min_requests_per_client: int = 1
     min_requests_per_run: int = 10
-    run_duration: int | float | None = None
+    run_duration: int | float | timedelta | None = None
     low_memory: bool = False
-    progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None
+    progress_bar_stats: dict[str, str | tuple[str, str]] | None = None
     output_path: os.PathLike | str | None = None
     tokenizer: Tokenizer | None = None
     test_name: str | None = None
diff --git a/llmeter/live_display.py b/llmeter/live_display.py
index f8c9416..86a5f31 100644
--- a/llmeter/live_display.py
+++ b/llmeter/live_display.py
@@ -15,60 +15,117 @@
 
 logger = logging.getLogger(__name__)
 
-# Mapping from key substrings to (group_name, display_order).
-# Stats are grouped by the first matching pattern; unmatched keys go to "Other".
-_GROUP_PATTERNS: list[tuple[str, str]] = [
-    ("rpm", "Throughput"),
-    ("tps", "Throughput"),
-    ("ttft", "TTFT"),
-    ("ttlt", "TTLT"),
-    ("token", "Tokens"),
-    ("fail", "Errors"),
-]
-
-_GROUP_ORDER = ["Throughput", "TTFT", "TTLT", "Tokens", "Errors", "Other"]
+#: Default grouping of stat keys for display.  Each entry is
+#: ``(group_name, tuple_of_substrings)``; a stat key is assigned to the first
+#: group whose substring matches (case-insensitive).  Unmatched keys fall into
+#: ``"Other"``.  The tuple order defines the column order in the rendered table.
+DEFAULT_GROUPS: tuple[tuple[str, tuple[str, ...]], ...] = (
+    ("Throughput", ("rpm", "tps")),
+    ("TTFT", ("ttft",)),
+    ("TTLT", ("ttlt",)),
+    ("Tokens", ("token",)),
+    ("Errors", ("fail",)),
+    ("Other", ("",)),
+)
+
+#: Default stats to show on the progress bar during a run.
+#:
+#: Each entry maps a short display label to a *stat spec*:
+#:
+#: * A plain string — the canonical key in ``RunningStats.to_stats()``
+#:   (e.g. ``"failed_requests"``, ``"rpm"``, ``"time_to_first_token-p50"``).
+#: * A ``(stat_key, "inv")`` tuple — display the reciprocal of the value
+#:   (e.g. seconds-per-token → tokens-per-second).
+DEFAULT_DISPLAY_STATS: dict[str, str | tuple[str, str]] = {
+    "rpm": "rpm",
+    "output_tps": "output_tps",
+    "p50_ttft": "time_to_first_token-p50",
+    "p90_ttft": "time_to_first_token-p90",
+    "p50_ttlt": "time_to_last_token-p50",
+    "p90_ttlt": "time_to_last_token-p90",
+    "p50_tps": ("time_per_output_token-p50", "inv"),
+    "input_tokens": "num_tokens_input-sum",
+    "output_tokens": "num_tokens_output-sum",
+    "fail": "failed_requests",
+}
+
+
+def _format_stat(key: str, value: float | int, *, invert: bool = False) -> str:
+    """Format a single stat value as a human-readable string.
 
+    Args:
+        key: The canonical stat key (used to infer units).
+        value: The raw numeric value.
+        invert: If ``True``, display ``1/value`` (e.g. time → rate).
 
-def _classify(key: str) -> str:
+    Returns:
+        A formatted string like ``"0.312s"``, ``"28.3 tok/s"``, or ``"83"``.
+    """
+    if invert and value > 0:
+        return f"{1.0 / value:.1f} tok/s"
+    if "tps" in key or "output_tps" in key:
+        return f"{value:.1f} tok/s"
+    if "time" in key or "ttft" in key or "ttlt" in key:
+        return f"{value:.3f}s"
+    if "rpm" in key:
+        return f"{value:.1f}"
+    if isinstance(value, float) and value == int(value):
+        return str(int(value))
+    if isinstance(value, int):
+        return str(value)
+    return f"{value:.1f}"
+
+
+def _classify(
+    key: str,
+    groups: tuple[tuple[str, tuple[str, ...]], ...] = DEFAULT_GROUPS,
+) -> str:
     """Return the group name for a stat key based on substring matching.
 
-    Matches the key (case-insensitive) against ``_GROUP_PATTERNS``. The first
-    matching pattern determines the group. Unmatched keys are placed in
-    ``"Other"``.
+    Matches the key (case-insensitive) against *groups*. The first matching
+    pattern determines the group. Unmatched keys are placed in ``"Other"``.
 
     Args:
         key (str): The stat display label to classify (e.g. ``"p50_ttft"``).
+        groups: Group definitions to match against. Defaults to
+            :data:`DEFAULT_GROUPS`.
 
     Returns:
         str: The group name (e.g. ``"TTFT"``, ``"Throughput"``, ``"Other"``).
     """
     key_lower = key.lower()
-    for pattern, group in _GROUP_PATTERNS:
-        if pattern in key_lower:
-            return group
+    for group_name, patterns in groups:
+        for pattern in patterns:
+            if pattern and pattern in key_lower:
+                return group_name
     return "Other"
 
 
-def _group_stats(stats: dict[str, str]) -> OrderedDict[str, list[tuple[str, str]]]:
+def _group_stats(
+    stats: dict[str, str],
+    groups: tuple[tuple[str, tuple[str, ...]], ...] = DEFAULT_GROUPS,
+) -> OrderedDict[str, list[tuple[str, str]]]:
     """Organize stats into ordered groups for display.
 
     Each stat key is classified via :func:`_classify` and placed into the
     corresponding group. Groups are returned in the canonical order defined
-    by ``_GROUP_ORDER``, with empty groups omitted.
+    by *groups*, with empty groups omitted.
 
     Args:
         stats (dict[str, str]): Mapping of stat labels to formatted values.
+        groups: Group definitions controlling classification and order.
+            Defaults to :data:`DEFAULT_GROUPS`.
 
     Returns:
         OrderedDict[str, list[tuple[str, str]]]: Groups in display order, where
         each value is a list of ``(label, formatted_value)`` tuples.
     """
-    groups: dict[str, list[tuple[str, str]]] = {}
+    buckets: dict[str, list[tuple[str, str]]] = {}
     for k, v in stats.items():
-        group = _classify(k)
-        groups.setdefault(group, []).append((k, v))
-    # Return in canonical order, skipping empty groups
-    return OrderedDict((g, groups[g]) for g in _GROUP_ORDER if g in groups)
+        group = _classify(k, groups)
+        buckets.setdefault(group, []).append((k, v))
+    group_order = [name for name, _ in groups]
+    return OrderedDict((g, buckets[g]) for g in group_order if g in buckets)
 
 
 def _in_notebook() -> bool:
@@ -95,43 +152,114 @@ class LiveStatsDisplay:
 
     In Jupyter notebooks, renders a grouped HTML table that updates in-place.
     Stats are automatically organized into logical groups (Throughput, TTFT,
-    TTLT, Tokens, Errors) based on their key names.
+    TTLT, Tokens, Errors) based on their display label names.
 
     In terminals, prints a compact grouped multi-line block using ANSI escape
     codes to overwrite previous output.
 
+    The display owns all alias mapping and formatting.  Callers pass raw
+    numeric stats (e.g. from ``RunningStats.to_stats()``) and the display
+    selects, aliases, formats, and groups them for presentation.
+
     Args:
         disabled (bool): If ``True``, all display calls are no-ops.
+        groups: Group definitions controlling how display labels are classified
+            and ordered.  Defaults to :data:`DEFAULT_GROUPS`.
+        display_stats: Mapping of ``{display_label: stat_spec}`` controlling
+            which stats to show and how to label them.  Each *stat_spec* is
+            either a plain canonical key string (e.g. ``"time_to_first_token-p50"``)
+            or a ``(key, "inv")`` tuple for reciprocal display.
+            Defaults to :data:`DEFAULT_DISPLAY_STATS`.
 
     Example::
 
         display = LiveStatsDisplay()
-        display.update({"rpm": "185.9", "p50_ttft": "0.312s", "fail": "0"})
-        display.update({"rpm": "190.2", "p50_ttft": "0.305s", "fail": "1"})
+        raw = running_stats.to_stats()
+        display.update(raw)
         display.close()
     """
 
-    def __init__(self, disabled: bool = False):
+    def __init__(
+        self,
+        disabled: bool = False,
+        groups: tuple[tuple[str, tuple[str, ...]], ...] = DEFAULT_GROUPS,
+        display_stats: dict[str, str | tuple[str, str]] | None = None,
+    ):
         self._disabled = disabled
+        self._groups = groups
+        self._display_stats = (
+            display_stats if display_stats is not None else DEFAULT_DISPLAY_STATS
+        )
         self._is_notebook = _in_notebook()
         self._handle = None
         self._last_line_count = 0
 
-    def update(self, stats: dict[str, str], extra_prefix: str = "") -> None:
-        """Refresh the display with new stats.
+    def format_stats(
+        self,
+        raw: dict[str, object],
+    ) -> dict[str, str]:
+        """Select and format raw stats for display.
+
+        Picks the stats listed in ``self._display_stats`` from *raw*, applies
+        alias renaming and formatting, and returns an ordered dict of
+        ``{display_label: formatted_value}`` strings.
 
         Args:
-            stats (dict[str, str]): Mapping of label to formatted value.
+            raw: Flat dictionary of raw numeric stats, as returned by
+                ``RunningStats.to_stats()``.
+
+        Returns:
+            Ordered dict of ``{label: formatted_string}`` suitable for
+            rendering.
+        """
+        if not raw:
+            return {label: "—" for label in self._display_stats}
+
+        info: dict[str, str] = {}
+        for label, spec in self._display_stats.items():
+            if isinstance(spec, tuple):
+                key, modifier = spec[0], spec[1]
+                invert = modifier == "inv"
+            else:
+                key = spec
+                invert = False
+
+            val = raw.get(key)
+            if val is None:
+                info[label] = "—"
+                continue
+
+            try:
+                info[label] = _format_stat(key, float(val), invert=invert)
+            except (TypeError, ValueError):
+                info[label] = str(val)
+
+        return info
+
+    def update(
+        self,
+        raw_stats: dict[str, object],
+        extra_prefix: str = "",
+    ) -> None:
+        """Refresh the display with new raw stats.
+
+        Args:
+            raw_stats: Flat dictionary of raw numeric stats from
+                ``RunningStats.to_stats()``.
             extra_prefix (str): Optional prefix text shown before the table
                 (e.g. ``"reqs=127"`` for time-bound runs).
         """
-        if self._disabled or not stats:
+        if self._disabled:
+            return
+
+        formatted = self.format_stats(raw_stats)
+        if not formatted:
             return
 
         if self._is_notebook:
-            self._update_notebook(stats, extra_prefix)
+            self._update_notebook(formatted, extra_prefix)
         else:
-            self._update_terminal(stats, extra_prefix)
+            self._update_terminal(formatted, extra_prefix)
 
     def _update_notebook(self, stats: dict[str, str], extra_prefix: str) -> None:
         """Render stats as a grouped HTML table in a Jupyter notebook.
@@ -146,9 +274,7 @@ def _update_notebook(self, stats: dict[str, str], extra_prefix: str) -> None:
         """
         from IPython.display import HTML, display
 
-        groups = _group_stats(stats)
-
-        # Build one column per group: header on top, key=value rows below
+        groups = _group_stats(stats, self._groups)
         # All columns rendered side-by-side in a single table row
         max_rows = max(len(items) for items in groups.values())
 
@@ -210,7 +336,7 @@ def _update_terminal(self, stats: dict[str, str], extra_prefix: str) -> None:
         if self._last_line_count > 0:
             sys.stderr.write(f"\033[{self._last_line_count}A\033[J")
 
-        groups = _group_stats(stats)
+        groups = _group_stats(stats, self._groups)
         lines = []
         if extra_prefix:
             lines.append(f"  {extra_prefix}")
diff --git a/llmeter/results.py b/llmeter/results.py
index 6654d2e..1652d0f 100644
--- a/llmeter/results.py
+++ b/llmeter/results.py
@@ -43,9 +43,9 @@ class Result:
     """Results of a test run."""
 
     responses: list[InvocationResponse]
-    total_requests: int
-    clients: int
-    n_requests: int
+    total_requests: int | None = None
+    clients: int = 1
+    n_requests: int | None = None
     total_test_time: float | None = None
     model_id: str | None = None
     output_path: os.PathLike | None = None
@@ -260,8 +260,19 @@ def load(
             else:
                 result._preloaded_stats = None
         else:
-            # Compute stats from the loaded responses
+            # Compute stats from the loaded responses, but also merge any
+            # contributed stats that were persisted in stats.json so they
+            # survive a save/load round-trip.
             result._preloaded_stats = cls._compute_stats(result)
+            stats_path = result_path / "stats.json"
+            if stats_path.exists():
+                with stats_path.open("r") as s:
+                    saved_stats = json.loads(s.read())
+                # Contributed stats are any keys in the saved file that are
+                # not produced by _compute_stats (i.e. they came from callbacks).
+                for key, value in saved_stats.items():
+                    if key not in result._preloaded_stats:
+                        result._preloaded_stats[key] = value
 
         return result
 
@@ -336,7 +347,9 @@ def stats(self) -> dict:
             stats = self._preloaded_stats.copy()
         else:
             # Fallback: compute from responses (e.g. Result constructed manually)
-            stats = self._compute_stats(self)
+            # Cache so subsequent accesses don't recompute.
+            self._preloaded_stats = self._compute_stats(self)
+            stats = self._preloaded_stats.copy()
 
         if self._contributed_stats:
             stats.update(self._contributed_stats)
diff --git a/llmeter/runner.py b/llmeter/runner.py
index 0605a36..7922868 100644
--- a/llmeter/runner.py
+++ b/llmeter/runner.py
@@ -12,7 +12,7 @@
 from concurrent.futures import ThreadPoolExecutor
 from copy import deepcopy
 from dataclasses import InitVar, asdict, dataclass, fields, replace
-from datetime import datetime
+from datetime import datetime, timedelta
 from itertools import cycle
 from typing import TYPE_CHECKING, Any
 from uuid import uuid4
@@ -57,14 +57,14 @@ class _RunConfig:
     tokenizer: Tokenizer | Any | None = None
     clients: int = 1
     n_requests: int | None = None
-    run_duration: int | float | None = None
+    run_duration: int | float | timedelta | None = None
     payload: dict | list[dict] | os.PathLike | str | None = None
     run_name: str | None = None
     run_description: str | None = None
     timeout: int | float = 60
     callbacks: list[Callback] | None = None
     low_memory: bool = False
-    progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None
+    progress_bar_stats: dict[str, str | tuple[str, str]] | None = None
     disable_per_client_progress_bar: InitVar[bool] = True
     disable_clients_progress_bar: InitVar[bool] = True
 
@@ -73,10 +73,12 @@ def __post_init__(self, disable_client_progress_bar, disable_clients_progress_ba
         self._disable_clients_progress_bar = disable_clients_progress_bar
         self._random_seed = 0
 
-        if self.n_requests is not None:
+        if self.n_requests is not None and self.run_duration is None:
             assert self.n_requests > 0, "Number of requests must be a positive integer"
 
         if self.run_duration is not None:
+            if isinstance(self.run_duration, timedelta):
+                self.run_duration = self.run_duration.total_seconds()
             assert self.run_duration > 0, "Run duration must be a positive number"
 
         assert self.clients > 0, "Number of clients must be a positive integer"
@@ -190,10 +192,10 @@ def _validate_and_prepare_payload(self):
 
         Normalizes the payload into a list of dicts, validates that ``n_requests``
         and ``run_duration`` are not both set, and sets ``_time_bound`` and
-        ``_n_requests`` accordingly.
+        ``n_requests`` accordingly.
 
-        For count-bound runs, ``_n_requests`` defaults to the number of payloads
-        when not explicitly provided. For time-bound runs, ``_n_requests`` is set
+        For count-bound runs, ``n_requests`` defaults to the number of payloads
+        when not explicitly provided. For time-bound runs, ``n_requests`` is set
         to 0 since the actual count is unknown upfront.
 
         Raises:
@@ -207,7 +209,11 @@ def _validate_and_prepare_payload(self):
         if isinstance(self.payload, dict):
             self.payload = [self.payload]
 
-        if self.run_duration is not None and self.n_requests is not None:
+        if (
+            self.run_duration is not None
+            and self.n_requests is not None
+            and self.n_requests != 0
+        ):
             raise ValueError(
                 "Cannot set both n_requests and run_duration. "
                 "Use n_requests for request-bound runs or run_duration for time-bound runs."
@@ -215,10 +221,11 @@ def _validate_and_prepare_payload(self):
 
         self._time_bound = self.run_duration is not None
         if self._time_bound:
-            # For time-bound runs, _n_requests is unknown upfront
-            self._n_requests = 0
+            # For time-bound runs, n_requests is unknown upfront; set to 0
+            # and update to the actual count after the run completes.
+            self.n_requests = 0
         else:
-            self._n_requests = self.n_requests or len(self.payload)
+            self.n_requests = self.n_requests or len(self.payload)
 
     @staticmethod
     async def _compute_time_per_output_token(response: InvocationResponse):
@@ -307,12 +314,12 @@ async def _process_results_from_q(self, output_path: Path | None = None):
                 self._progress_bar.update(1)
 
             if self._stats_display is not None:
-                snapshot = self._running_stats.snapshot(self.progress_bar_stats)
-                if snapshot:
+                raw = self._running_stats.to_stats()
+                if raw:
                     prefix = (
                         f"reqs={self._running_stats._count}" if self._time_bound else ""
                     )
-                    self._stats_display.update(snapshot, extra_prefix=prefix)
+                    self._stats_display.update(raw, extra_prefix=prefix)
 
             if output_path:
                 output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -325,18 +332,27 @@ def _invoke_n_no_wait(
         self,
         payload: list[dict],
         n: int | None = None,
+        duration: float | None = None,
         shuffle_order=True,
     ) -> list[InvocationResponse]:
-        """Generate *n* invocations synchronously for a single client.
+        """Generate invocations synchronously for a single client.
 
-        Cycles through *payload* until *n* invocations are generated, sending
-        each request to the endpoint and pushing the response onto
-        ``self._queue`` for async token-counting and stats collection.
+        Terminates when either *n* requests have been sent or *duration* seconds
+        have elapsed, whichever is specified.  Exactly one of *n* or *duration*
+        must be provided.
+
+        Cycles through *payload*, sending each request to the endpoint and
+        pushing the response onto ``self._queue`` for async token-counting and
+        stats collection.
 
         Args:
             payload (list[dict]): The input payloads to cycle through.
             n (int | None, optional): The number of invocations to generate.
-                If not specified, every element in the payload is used once.
+                If not specified, every element in the payload is used once
+                (only when *duration* is also ``None``).
+            duration (float | None, optional): Maximum wall-clock seconds to
+                keep sending requests.  When set, requests are sent continuously
+                until the deadline.
             shuffle_order (bool, optional): Whether to shuffle the order of payloads
                 before generating invocations. Defaults to True.
 
@@ -351,71 +367,41 @@ def _invoke_n_no_wait(
             random.seed(0)
             payload = random.sample(payload, k=len(payload))
 
-        responses = []
-        if n is None:
-            n = len(payload)
+        responses: list[InvocationResponse] = []
         if not payload:
             return responses
-        payload_iter = cycle(payload)
-        pbar = trange(
-            n,
-            leave=False,
-            desc="Requests",
-            disable=_disable_tqdm or self._disable_per_client_progress_bar,
-        )
-        for _ in pbar:
-            p = next(payload_iter)
-            try:
-                p = asyncio.run(process_before_invoke_callbacks(self.callbacks, p))
-                self._running_stats.record_send()
-                response = self._endpoint.invoke(p)
-
-            except Exception as e:
-                logger.exception(f"Error with invocation with payload {p}: {e}")
-                response = InvocationResponse.error_output(
-                    id=uuid4().hex,
-                    error=str(e),
-                )
-            responses.append(response)
-            if self._queue:
-                # fix for thread-aware sync, from https://stackoverflow.com/a/57316517/2109965
-                self._queue._loop.call_soon_threadsafe(  # type: ignore
-                    self._queue.put_nowait, response
-                )
-        return responses
 
-    def _invoke_for_duration(
-        self,
-        payload: list[dict],
-        duration: float,
-        shuffle_order=True,
-    ) -> list[InvocationResponse]:
-        """Generate invocations continuously until *duration* seconds have elapsed.
-
-        Cycles through *payload* indefinitely, stopping only when the wall-clock
-        time exceeds *duration*. Each completed request is pushed onto
-        ``self._queue`` for async token-counting and stats collection, mirroring
-        the behaviour of :meth:`_invoke_n_no_wait`.
+        time_bound = duration is not None
+        if time_bound:
+            deadline = time.perf_counter() + duration
+        else:
+            if n is None:
+                n = len(payload)
 
-        Args:
-            payload (list[dict]): The input payloads to cycle through.
-            duration (float): Maximum wall-clock seconds to keep sending requests.
-            shuffle_order (bool, optional): Whether to shuffle the order of payloads
-                before generating invocations. Defaults to True.
+        payload_iter = cycle(payload)
 
-        Returns:
-            list[InvocationResponse]: All responses collected during the window.
-        """
-        if shuffle_order:
-            self._random_seed += random.randint(1, 1000)
-            random.seed(0)
-            payload = random.sample(payload, k=len(payload))
+        # Count-bound runs get a trange progress bar; time-bound runs use a
+        # separate _tick_time_bar task so we skip the per-client bar here.
+        pbar = (
+            trange(
+                n,
+                leave=False,
+                desc="Requests",
+                disable=_disable_tqdm or self._disable_per_client_progress_bar,
+            )
+            if not time_bound
+            else None
+        )
 
-        responses: list[InvocationResponse] = []
-        deadline = time.perf_counter() + duration
-        payload_iter = cycle(payload)
+        sent = 0
+        while True:
+            if time_bound:
+                if time.perf_counter() >= deadline:
+                    break
+            else:
+                if sent >= n:
+                    break
 
-        while time.perf_counter() < deadline:
             p = next(payload_iter)
             try:
                 p = asyncio.run(process_before_invoke_callbacks(self.callbacks, p))
@@ -429,27 +415,37 @@ def _invoke_for_duration(
                 )
             responses.append(response)
             if self._queue:
+                # fix for thread-aware sync, from https://stackoverflow.com/a/57316517/2109965
                 self._queue._loop.call_soon_threadsafe(  # type: ignore
                     self._queue.put_nowait, response
                 )
+            sent += 1
+            if pbar is not None:
+                pbar.update(1)
+
+        if pbar is not None:
+            pbar.close()
         return responses
 
-    async def _invoke_n(
+    async def _invoke_client(
         self,
         payload: list[dict],
         n: int | None = None,
+        duration: float | None = None,
         add_start_jitter=True,
         shuffle_order=True,
     ) -> list[InvocationResponse]:
-        """Asynchronously generate *n* invocations for a single client.
+        """Asynchronously generate invocations for a single client.
 
-        Wraps :meth:`_invoke_n_no_wait` in a thread with an overall timeout
-        of ``self.timeout * n`` seconds.
+        Wraps :meth:`_invoke_n_no_wait` in a thread.  For count-bound runs an
+        overall timeout of ``self.timeout * n`` is applied; time-bound runs
+        have no extra timeout (the duration itself is the limit).
 
         Args:
             payload (list[dict]): The input payload(s) to generate invocations for.
             n (int | None, optional): The number of invocations to generate.
                 Defaults to None (one per payload element).
+            duration (float | None, optional): Maximum wall-clock seconds.
             add_start_jitter (bool, optional): Whether to add a random delay before
                 starting the invocations loop to avoid batch bunching when using
                 multiple clients. Defaults to True.
@@ -458,7 +454,7 @@ async def _invoke_n(
 
         Returns:
             list[InvocationResponse]: A list of response objects. Returns an empty
-            list if the overall timeout is exceeded.
+            list if the overall timeout is exceeded (count-bound only).
         """
 
         if add_start_jitter:
@@ -467,70 +463,43 @@ async def _invoke_n(
         if shuffle_order:
             self._random_seed = random.randint(0, 2**16 - 1)
 
+        coro = asyncio.to_thread(
+            self._invoke_n_no_wait, payload, n, duration, shuffle_order
+        )
+
+        if duration is not None:
+            # Time-bound: no extra timeout — the duration is the limit
+            return await coro
+
         try:
-            response = await asyncio.wait_for(
-                asyncio.to_thread(self._invoke_n_no_wait, payload, n, shuffle_order),
+            return await asyncio.wait_for(
+                coro,
                 timeout=self.timeout * (n or len(payload)),
             )
         except asyncio.TimeoutError:
             logger.error("client timeout!")
             return []
 
-        return response
-
-    async def _invoke_duration(
-        self,
-        payload: list[dict],
-        add_start_jitter=True,
-        shuffle_order=True,
-    ) -> list[InvocationResponse]:
-        """Asynchronously generate invocations for a single client until duration expires.
-
-        Wraps :meth:`_invoke_for_duration` in a thread. The client sends requests
-        continuously for ``self.run_duration`` seconds.
-
-        Args:
-            payload (list[dict]): The input payload(s) to cycle through.
-            add_start_jitter (bool, optional): Whether to add a random delay before
-                starting the invocations loop to avoid batch bunching when using
-                multiple clients. Defaults to True.
-            shuffle_order (bool, optional): Whether to shuffle the order of payloads
-                before generating invocations. Defaults to True.
-
-        Returns:
-            list[InvocationResponse]: All responses collected during the time window.
-        """
-
-        if add_start_jitter:
-            await asyncio.sleep(random.random() * 0.01)
-
-        if shuffle_order:
-            self._random_seed = random.randint(0, 2**16 - 1)
-
-        return await asyncio.to_thread(
-            self._invoke_for_duration,
-            payload,
-            self.run_duration,
-            shuffle_order,
-        )
-
-    async def _invoke_n_c(
+    async def _invoke_clients(
         self,
         payload: list[dict],
         n_requests: int | None = None,
+        duration: float | None = None,
         clients: int = 1,
     ) -> tuple[float, float, float]:
-        """Spawn *clients* concurrent count-bound invocation loops.
+        """Spawn *clients* concurrent invocation loops.
 
-        Each client generates *n_requests* invocations by delegating to
-        :meth:`_invoke_n`. All clients run concurrently and the method waits
-        for all of them to finish before signalling the token-counting queue
-        to stop.
+        Each client generates invocations by delegating to
+        :meth:`_invoke_client`.  All clients run concurrently and the method
+        waits for all of them to finish before signalling the token-counting
+        queue to stop.
 
         Args:
             payload (list[dict]): The input payloads to send.
             n_requests (int | None, optional): The number of invocations to
-                generate per client. Defaults to None.
+                generate per client (count-bound). Defaults to None.
+            duration (float | None, optional): Maximum wall-clock seconds per
+                client (time-bound). Defaults to None.
             clients (int, optional): The number of concurrent client connections.
                 Defaults to 1.
 
@@ -538,63 +507,35 @@ async def _invoke_n_c(
             tuple[float, float, float]: A ``(total_test_time, start_t, end_t)``
             tuple of ``time.perf_counter`` values.
         """
-        logger.info(
-            f"Generating {clients} connections with {n_requests} invocations each"
-        )
+        if duration is not None:
+            logger.info(f"Generating {clients} connections for {duration}s each")
+        else:
+            logger.info(
+                f"Generating {clients} connections with {n_requests} invocations each"
+            )
         start_t = time.perf_counter()
         await tqdm.gather(
-            *[self._invoke_n(payload, n_requests) for _ in range(clients)],
+            *[
+                self._invoke_client(payload, n=n_requests, duration=duration)
+                for _ in range(clients)
+            ],
             leave=False,
             desc="Clients",
             disable=_disable_tqdm or self._disable_clients_progress_bar,
         )
         end_t = time.perf_counter()
         total_test_time = end_t - start_t
-        logger.info(
-            f"Completed {clients} clients x {n_requests} requests in "
-            f"{total_test_time * 1000:.2f}ms"
-        )
 
-        if self._queue:
-            await self._queue.put(None)
-            logger.debug("Signaling token counting task to exit")
-        return total_test_time, start_t, end_t
-
-    async def _invoke_duration_c(
-        self,
-        payload: list[dict],
-        clients: int = 1,
-    ) -> tuple[float, float, float]:
-        """Spawn *clients* concurrent time-bound invocation loops.
-
-        Each client sends requests continuously for ``self.run_duration`` seconds
-        by delegating to :meth:`_invoke_duration`. All clients run concurrently
-        and the method waits for all of them to finish before signalling the
-        token-counting queue to stop.
-
-        Args:
-            payload (list[dict]): The input payloads to cycle through.
-            clients (int, optional): The number of concurrent client connections.
-                Defaults to 1.
-
-        Returns:
-            tuple[float, float, float]: A ``(total_test_time, start_t, end_t)``
-            tuple of ``time.perf_counter`` values.
-        """
-        logger.info(f"Generating {clients} connections for {self.run_duration}s each")
-        start_t = time.perf_counter()
-        await tqdm.gather(
-            *[self._invoke_duration(payload) for _ in range(clients)],
-            leave=False,
-            desc="Clients",
-            disable=_disable_tqdm or self._disable_clients_progress_bar,
-        )
-        end_t = time.perf_counter()
-        total_test_time = end_t - start_t
-        logger.info(
-            f"Completed {clients} clients x {self.run_duration}s in "
-            f"{total_test_time * 1000:.2f}ms"
-        )
+        if duration is not None:
+            logger.info(
+                f"Completed {clients} clients x {duration}s in "
+                f"{total_test_time * 1000:.2f}ms"
+            )
+        else:
+            logger.info(
+                f"Completed {clients} clients x {n_requests} requests in "
+                f"{total_test_time * 1000:.2f}ms"
+            )
 
         if self._queue:
             await self._queue.put(None)
@@ -630,9 +571,9 @@ async def _run(self):
         result = Result(
             responses=[],
             total_test_time=None,
-            total_requests=0 if self._time_bound else self._n_requests * self.clients,
+            total_requests=0 if self._time_bound else self.n_requests * self.clients,
             clients=self.clients,
-            n_requests=self._n_requests,
+            n_requests=self.n_requests,
             output_path=self.output_path,  # type: ignore
             model_id=self._endpoint.model_id,
             provider=self._endpoint.provider,
@@ -667,25 +608,28 @@ async def _run(self):
         else:
             # Count-bound: progress bar shows completed requests
             self._progress_bar = tqdm(
-                total=self.clients * self._n_requests,
+                total=self.clients * self.n_requests,
                 leave=False,
                 desc="Total requests",
                 disable=_disable_tqdm,
             )
 
         # Live stats display — renders as an HTML table in notebooks, multi-line in terminals
-        self._stats_display = LiveStatsDisplay(disabled=_disable_tqdm)
+        self._stats_display = LiveStatsDisplay(
+            disabled=_disable_tqdm,
+            display_stats=self.progress_bar_stats,
+        )
 
         # Show the table layout immediately with placeholder values
-        initial_snapshot = self._running_stats.snapshot(self.progress_bar_stats)
         prefix = "reqs=0" if self._time_bound else ""
-        self._stats_display.update(initial_snapshot, extra_prefix=prefix)
+        self._stats_display.update({}, extra_prefix=prefix)
 
         try:
             run_start_time = now_utc()
             if self._time_bound:
-                invoke_coro = self._invoke_duration_c(
+                invoke_coro = self._invoke_clients(
                     payload=self.payload,  # type: ignore
+                    duration=self.run_duration,
                     clients=self.clients,
                 )
                 _, (total_test_time, start_time, end_time), _ = await asyncio.gather(
@@ -698,9 +642,9 @@ async def _run(self):
                     self._tick_time_bar(),
                 )
             else:
-                invoke_coro = self._invoke_n_c(
+                invoke_coro = self._invoke_clients(
                     payload=self.payload,  # type: ignore
-                    n_requests=self._n_requests,
+                    n_requests=self.n_requests,
                     clients=self.clients,
                 )
                 _, (total_test_time, start_time, end_time) = await asyncio.gather(
@@ -733,7 +677,7 @@ async def _run(self):
             total_requests=actual_total,
             n_requests=actual_total // max(self.clients, 1)
             if self._time_bound
-            else self._n_requests,
+            else self.n_requests,
             start_time=run_start_time,
             end_time=run_end_time,
         )
@@ -809,9 +753,10 @@ class Runner(_RunConfig):
         n_requests (int | None): The number of LLM invocations to generate *per client*. By
             default, each request in `payload` will be sent once by each client.  Mutually
             exclusive with ``run_duration``.
-        run_duration (int | float | None): Run each client for this many seconds instead of a
+        run_duration (int | float | timedelta | None): Run each client for this many seconds instead of a
             fixed request count.  Clients send requests continuously until the duration expires.
             Mutually exclusive with ``n_requests``.  Defaults to ``None`` (count-bound mode).
+            Accepts a number of seconds or a ``timedelta``.
         payload (dict | list[dict] | os.PathLike | str | None): The request data to send to the
             endpoint under test. You can provide a single JSON payload (dict), a list of payloads
             (list[dict]), or a path to one or more JSON/JSON-Lines files to be loaded by
@@ -834,8 +779,8 @@ class Runner(_RunConfig):
             ``result.load_responses()`` to load responses from disk after the run.  Defaults to
             ``False``.
         progress_bar_stats (dict | None): Controls which live stats appear on the progress bar.
-            Maps short display labels to field specs — see
-            :attr:`RunningStats.DEFAULT_SNAPSHOT_STATS` for the format and defaults.  Pass ``{}``
+            Maps short display labels to canonical stat keys — see
+            :data:`~llmeter.live_display.DEFAULT_DISPLAY_STATS` for the format and defaults.  Pass ``{}``
             to disable live stats entirely.  Defaults to ``None`` (use built-in defaults).
         disable_per_client_progress_bar (bool): Set `True` to disable per-client progress bars
             from showing during the run. Default `False` (each client's progress will be shown).
@@ -878,14 +823,14 @@ async def run(
         tokenizer: Tokenizer | Any | None = None,
         clients: int | None = None,
         n_requests: int | None = None,
-        run_duration: int | float | None = None,
+        run_duration: int | float | timedelta | None = None,
         payload: dict | list[dict] | os.PathLike | str | None = None,
         run_name: str | None = None,
         run_description: str | None = None,
         timeout: int | float | None = None,
         callbacks: list[Callback] | None = None,
         low_memory: bool | None = None,
-        progress_bar_stats: dict[str, tuple[str, ...] | str] | None = None,
+        progress_bar_stats: dict[str, str | tuple[str, str]] | None = None,
         disable_per_client_progress_bar: bool | None = None,
         disable_clients_progress_bar: bool | None = None,
     ) -> Result:
@@ -910,9 +855,10 @@ async def run(
             clients (int): The number of concurrent clients to use for sending requests.
             n_requests (int | None): The number of LLM invocations to generate *per client*.
                 Mutually exclusive with ``run_duration``.
-            run_duration (int | float | None): Run each client for this many seconds
+            run_duration (int | float | timedelta | None): Run each client for this many seconds
                 instead of a fixed request count.  Clients send requests continuously
                 until the duration expires.  Mutually exclusive with ``n_requests``.
+                Accepts a number of seconds or a ``timedelta``.
 
                 Example::
 
@@ -948,8 +894,8 @@ async def run(
                     result.load_responses()  # loads from disk
 
             progress_bar_stats (dict): Controls which live stats appear on the
-                progress bar.  Maps short display labels to field specs — see
-                :attr:`RunningStats.DEFAULT_SNAPSHOT_STATS` for the format and
+                progress bar.  Maps short display labels to canonical stat keys — see
+                :data:`~llmeter.live_display.DEFAULT_DISPLAY_STATS` for the format and
                 defaults.  Pass ``{}`` to disable live stats entirely.
 
                 Example::
@@ -957,9 +903,9 @@ async def run(
                     # Show only p99 latency and tokens per second:
                     result = await runner.run(
                         progress_bar_stats={
-                            "p99_ttlt": ("time_to_last_token", "p99"),
-                            "tps": ("time_per_output_token", "p50", "inv"),
-                            "fail": "failed",
+                            "p99_ttlt": "time_to_last_token-p99",
+                            "tps": ("time_per_output_token-p50", "inv"),
+                            "fail": "failed_requests",
                         },
                     )
             disable_per_client_progress_bar (bool): Set `True` to disable per-client progress bars
diff --git a/llmeter/utils.py b/llmeter/utils.py
index c43d7d0..74198ac 100644
--- a/llmeter/utils.py
+++ b/llmeter/utils.py
@@ -106,31 +106,6 @@ class RunningStats:
         # {'failed_requests': 0, ..., 'time_to_first_token-p50': 0.4, ...}
     """
 
-    #: Default stats shown on the progress bar during a run.
-    #: Each entry maps a short display label to a spec:
-    #:
-    #: * ``(metric_name, aggregation)`` — aggregation can be ``"p50"``, ``"p90"``,
-    #:   ``"p99"``, ``"average"``, or ``"sum"``.
-    #: * ``(metric_name, aggregation, "inv")`` — same as above but displays the
-    #:   reciprocal (e.g. seconds-per-token → tokens-per-second).
-    #: * The literal string ``"failed"`` for the running failure count.
-    #: * The literal string ``"rpm"`` for live requests-per-minute based on the
-    #:   send window (first request sent to last request sent).
-    #: * The literal string ``"output_tps"`` for aggregate output tokens per second
-    #:   across all clients, based on the send window.
-    DEFAULT_SNAPSHOT_STATS: dict[str, tuple[str, ...] | str] = {
-        "rpm": "rpm",
-        "output_tps": "output_tps",
-        "p50_ttft": ("time_to_first_token", "p50"),
-        "p90_ttft": ("time_to_first_token", "p90"),
-        "p50_ttlt": ("time_to_last_token", "p50"),
-        "p90_ttlt": ("time_to_last_token", "p90"),
-        "p50_tps": ("time_per_output_token", "p50", "inv"),
-        "input_tokens": ("num_tokens_input", "sum"),
-        "output_tokens": ("num_tokens_output", "sum"),
-        "fail": "failed",
-    }
-
     def __init__(self, metrics: Sequence[str]):
         self._metrics = list(metrics)
         self._count = 0
@@ -263,114 +238,29 @@ def to_stats(
             for j, v in agg.items():
                 stats[f"{m}-{j}"] = v
 
-        return stats
-
-    def snapshot(
-        self,
-        fields: dict[str, tuple[str, ...] | str] | None = None,
-    ) -> dict[str, str]:
-        """Format a subset of :meth:`to_stats` for progress-bar display.
-
-        Calls :meth:`to_stats` internally and picks only the requested fields,
-        formatting each value as a human-readable string.
-
-        Args:
-            fields: Mapping of ``{display_label: spec}``.  Each *spec* is one of:
-
-                * ``(metric, aggregation)`` — a 2-tuple where *metric* is a tracked
-                  metric name and *aggregation* is ``"p50"``, ``"p90"``, ``"p99"``,
-                  ``"average"``, or ``"sum"``.
-                * ``(metric, aggregation, "inv")`` — a 3-tuple; same as above but
-                  the value is inverted before display (e.g. seconds-per-token →
-                  tokens-per-second).
-                * ``"failed"`` — the literal string; shows the running failure count.
-                * ``"rpm"`` — the literal string; shows live requests-per-minute
-                  estimate based on the send window (first to last request sent).
-                * ``"output_tps"`` — the literal string; shows aggregate output
-                  tokens per second across all clients, based on the send window.
-
-                Defaults to :attr:`DEFAULT_SNAPSHOT_STATS` when ``None``.
+        # Send-window throughput (live RPM and output tokens/s).
+        # These use the dispatch timestamps rather than response timestamps,
+        # giving a more accurate picture of the request rate.
+        send_window = self._send_window()
+        if send_window and send_window > 0:
+            stats["rpm"] = self._count / send_window * 60
+            total_out = self._sums.get("num_tokens_output", 0)
+            stats["output_tps"] = total_out / send_window
 
-        Returns:
-            An ordered dict of ``{label: formatted_value}`` strings suitable for
-            ``tqdm.set_postfix()``.
+        return stats
 
-        Example::
+    def _send_window(self) -> float | None:
+        """Return the elapsed seconds between first and last ``record_send`` call.
 
-            # Use defaults:
-            rs.snapshot()
-            # {'p50_ttft': '0.312s', 'p90_ttlt': '1.203s', ..., 'fail': '0'}
-
-            # Custom selection — only p99 latency and failures:
-            rs.snapshot({
-                "p99_ttlt": ("time_to_last_token", "p99"),
-                "fail": "failed",
-            })
-            # {'p99_ttlt': '2.105s', 'fail': '1'}
-
-            # Inverted metric — tokens per second from time_per_output_token:
-            rs.snapshot({
-                "tps": ("time_per_output_token", "p50", "inv"),
-            })
-            # {'tps': '28.3 tok/s'}
+        Returns ``None`` when fewer than two sends have been recorded.
         """
-        if self._count == 0:
-            if fields is None:
-                fields = self.DEFAULT_SNAPSHOT_STATS
-            return {label: "—" for label in fields}
-
-        if fields is None:
-            fields = self.DEFAULT_SNAPSHOT_STATS
-
-        raw = self.to_stats()
-
-        info: dict[str, str] = {}
-        for label, spec in fields.items():
-            if spec == "failed":
-                info[label] = str(self._failed)
-                continue
-
-            if spec == "rpm":
-                if (
-                    self._first_send_time is not None
-                    and self._last_send_time is not None
-                    and self._last_send_time > self._first_send_time
-                ):
-                    send_window = self._last_send_time - self._first_send_time
-                    info[label] = f"{self._count / send_window * 60:.1f}"
-                continue
-
-            if spec == "output_tps":
-                if (
-                    self._first_send_time is not None
-                    and self._last_send_time is not None
-                    and self._last_send_time > self._first_send_time
-                ):
-                    send_window = self._last_send_time - self._first_send_time
-                    total_out = self._sums.get("num_tokens_output", 0)
-                    info[label] = f"{total_out / send_window:.1f} tok/s"
-                continue
-
-            metric = spec[0]
-            agg = spec[1]
-            invert = len(spec) > 2 and spec[2] == "inv"
-
-            if agg == "sum":
-                info[label] = f"{self._sums.get(metric, 0):.0f}"
-                continue
-
-            val = raw.get(f"{metric}-{agg}")
-            if val is None:
-                continue
-
-            if invert and val > 0:
-                info[label] = f"{1.0 / val:.1f} tok/s"
-            elif "time" in metric:
-                info[label] = f"{val:.3f}s"
-            else:
-                info[label] = f"{val:.1f}"
-
-        return info
+        if (
+            self._first_send_time is not None
+            and self._last_send_time is not None
+            and self._last_send_time > self._first_send_time
+        ):
+            return self._last_send_time - self._first_send_time
+        return None
 
 
 def now_utc() -> datetime:
diff --git a/tests/unit/test_experiments.py b/tests/unit/test_experiments.py
index 42b0fb0..8996578 100644
--- a/tests/unit/test_experiments.py
+++ b/tests/unit/test_experiments.py
@@ -559,7 +559,7 @@ def test_load_test_with_low_memory(self, mock_endpoint):
 
     def test_load_test_with_progress_bar_stats(self, mock_endpoint):
         """progress_bar_stats should be stored on the LoadTest instance."""
-        custom_stats = {"rpm": "rpm", "fail": "failed"}
+        custom_stats = {"rpm": "rpm", "fail": "failed_requests"}
         lt = LoadTest(
             endpoint=mock_endpoint,
             payload={"input": "test"},
diff --git a/tests/unit/test_live_display.py b/tests/unit/test_live_display.py
index 7057eaf..6e0fb92 100644
--- a/tests/unit/test_live_display.py
+++ b/tests/unit/test_live_display.py
@@ -5,8 +5,10 @@
 from unittest.mock import patch
 
 from llmeter.live_display import (
+    DEFAULT_DISPLAY_STATS,
     LiveStatsDisplay,
     _classify,
+    _format_stat,
     _group_stats,
     _in_notebook,
 )
@@ -111,44 +113,113 @@ def test_returns_false_for_none(self):
             assert _in_notebook() is False
 
 
+# ── _format_stat ─────────────────────────────────────────────────────────────
+
+
+class TestFormatStat:
+    def test_time_metric(self):
+        assert _format_stat("time_to_first_token-p50", 0.312) == "0.312s"
+
+    def test_rpm_metric(self):
+        assert _format_stat("rpm", 185.9) == "185.9"
+
+    def test_tps_metric(self):
+        assert _format_stat("output_tps", 80.0) == "80.0 tok/s"
+
+    def test_inverse(self):
+        result = _format_stat("time_per_output_token-p50", 0.04, invert=True)
+        assert "tok/s" in result
+        assert "25.0" in result
+
+    def test_integer_value(self):
+        assert _format_stat("failed_requests", 3) == "3"
+
+    def test_float_that_is_whole(self):
+        assert _format_stat("failed_requests", 0.0) == "0"
+
+
 # ── LiveStatsDisplay ─────────────────────────────────────────────────────────
 
 
 class TestLiveStatsDisplay:
     def test_disabled_does_nothing(self):
         display = LiveStatsDisplay(disabled=True)
-        # Should not raise
-        display.update({"rpm": "100"})
+        display.update({"rpm": 100})
         display.close()
 
-    def test_update_empty_stats_does_nothing(self):
-        display = LiveStatsDisplay(disabled=False)
-        display.update({})
-        assert display._handle is None
-        assert display._last_line_count == 0
+    def test_format_stats_with_empty_raw(self):
+        display = LiveStatsDisplay()
+        result = display.format_stats({})
+        assert all(v == "—" for v in result.values())
+        assert "rpm" in result
+        assert "fail" in result
+
+    def test_format_stats_with_data(self):
+        display = LiveStatsDisplay(
+            display_stats={
+                "rpm": "rpm",
+                "fail": "failed_requests",
+                "p50_ttft": "time_to_first_token-p50",
+            }
+        )
+        raw = {
+            "rpm": 185.9,
+            "failed_requests": 0,
+            "time_to_first_token-p50": 0.312,
+        }
+        result = display.format_stats(raw)
+        assert result["rpm"] == "185.9"
+        assert result["fail"] == "0"
+        assert result["p50_ttft"] == "0.312s"
+
+    def test_format_stats_inverse(self):
+        display = LiveStatsDisplay(
+            display_stats={"tps": ("time_per_output_token-p50", "inv")}
+        )
+        raw = {"time_per_output_token-p50": 0.04}
+        result = display.format_stats(raw)
+        assert "tok/s" in result["tps"]
 
-    def test_terminal_output(self, capsys):
-        display = LiveStatsDisplay(disabled=False)
+    def test_format_stats_missing_key_shows_placeholder(self):
+        display = LiveStatsDisplay(
+            display_stats={"rpm": "rpm", "missing": "nonexistent_key"}
+        )
+        result = display.format_stats({"rpm": 100.0})
+        assert result["rpm"] == "100.0"
+        assert result["missing"] == "—"
+
+    def test_custom_display_stats(self):
+        custom = {"latency": "time_to_last_token-p99", "errors": "failed_requests"}
+        display = LiveStatsDisplay(display_stats=custom)
+        assert display._display_stats == custom
+
+    def test_default_display_stats_used(self):
+        display = LiveStatsDisplay()
+        assert display._display_stats is DEFAULT_DISPLAY_STATS
+
+    def test_terminal_output(self):
+        display = LiveStatsDisplay(
+            disabled=False,
+            display_stats={"rpm": "rpm", "fail": "failed_requests"},
+        )
         display._is_notebook = False
-        display.update({"rpm": "100", "fail": "0"})
-        # Should have written to stderr
+        display.update({"rpm": 100.0, "failed_requests": 0})
         assert display._last_line_count > 0
         display.close()
         assert display._last_line_count == 0
 
-    def test_terminal_with_prefix(self, capsys):
-        display = LiveStatsDisplay(disabled=False)
+    def test_terminal_with_prefix(self):
+        display = LiveStatsDisplay(disabled=False, display_stats={"rpm": "rpm"})
         display._is_notebook = False
-        display.update({"rpm": "100"}, extra_prefix="reqs=42")
+        display.update({"rpm": 100.0}, extra_prefix="reqs=42")
         assert display._last_line_count >= 2  # prefix line + stats line
         display.close()
 
     def test_terminal_overwrites_previous(self):
-        display = LiveStatsDisplay(disabled=False)
+        display = LiveStatsDisplay(disabled=False, display_stats={"rpm": "rpm"})
         display._is_notebook = False
-        display.update({"rpm": "100"})
+        display.update({"rpm": 100.0})
         first_count = display._last_line_count
-        display.update({"rpm": "200"})
-        # Should still be same number of lines (overwritten)
+        display.update({"rpm": 200.0})
         assert display._last_line_count == first_count
         display.close()
diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py
index 73a6e63..e949b04 100644
--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@@ -308,3 +308,189 @@ def test_save_method_existing_responses(sample_result: Result, temp_dir: UPath):
         responses = [json.loads(line) for line in f]
         assert len(responses) == 6  # 5 original + 1 extra
         assert responses[-1]["id"] == "extra_response"
+
+
+# ── Contributed stats round-trip ─────────────────────────────────────────────
+
+
+class TestContributedStatsRoundTrip:
+    """Verify that callback-contributed stats survive save → load cycles."""
+
+    @pytest.fixture
+    def result_with_contributed_stats(self):
+        responses = [
+            InvocationResponse(
+                id=f"r{i}",
+                response_text=f"resp {i}",
+                input_prompt=f"prompt {i}",
+                time_to_first_token=0.1 * i,
+                time_to_last_token=0.2 * i,
+                num_tokens_output=10 * i,
+                num_tokens_input=5 * i,
+            )
+            for i in range(1, 4)
+        ]
+        result = Result(
+            responses=responses,
+            total_requests=3,
+            clients=1,
+            n_requests=3,
+            total_test_time=1.0,
+        )
+        result._update_contributed_stats(
+            {"custom_metric_a": 42.0, "custom_metric_b": 99.5}
+        )
+        return result
+
+    def test_contributed_stats_appear_in_stats(self, result_with_contributed_stats):
+        stats = result_with_contributed_stats.stats
+        assert stats["custom_metric_a"] == 42.0
+        assert stats["custom_metric_b"] == 99.5
+
+    def test_contributed_stats_written_to_stats_json(
+        self, result_with_contributed_stats, tmp_path
+    ):
+        output = UPath(tmp_path / "out")
+        result_with_contributed_stats.save(output)
+
+        with (output / "stats.json").open() as f:
+            saved = json.load(f)
+        assert saved["custom_metric_a"] == 42.0
+        assert saved["custom_metric_b"] == 99.5
+
+    def test_load_with_responses_preserves_contributed_stats(
+        self, result_with_contributed_stats, tmp_path
+    ):
+        output = UPath(tmp_path / "out")
+        result_with_contributed_stats.save(output)
+
+        loaded = Result.load(output, load_responses=True)
+
+        assert loaded.stats["custom_metric_a"] == 42.0
+        assert loaded.stats["custom_metric_b"] == 99.5
+
+    def test_load_without_responses_preserves_contributed_stats(
+        self, result_with_contributed_stats, tmp_path
+    ):
+        output = UPath(tmp_path / "out")
+        result_with_contributed_stats.save(output)
+
+        loaded = Result.load(output, load_responses=False)
+
+        assert loaded.stats["custom_metric_a"] == 42.0
+        assert loaded.stats["custom_metric_b"] == 99.5
+
+    def test_contributed_stats_do_not_clobber_builtin_stats(
+        self, result_with_contributed_stats, tmp_path
+    ):
+        output = UPath(tmp_path / "out")
+        result_with_contributed_stats.save(output)
+
+        loaded = Result.load(output, load_responses=True)
+
+        # Builtin stats must still be present and correct
+        assert "failed_requests" in loaded.stats
+        assert loaded.stats["total_requests"] == 3
+        assert "time_to_first_token-p50" in loaded.stats
+
+    def test_builtin_stats_not_overwritten_by_stale_saved_values(self, tmp_path):
+        """If a builtin key exists in stats.json with a stale value, the freshly
+        computed value from responses should win."""
+        responses = [
+            InvocationResponse(
+                id="x",
+                response_text="r",
+                input_prompt="p",
+                time_to_first_token=0.5,
+                time_to_last_token=1.0,
+                num_tokens_output=10,
+                num_tokens_input=5,
+            )
+        ]
+        result = Result(
+            responses=responses,
+            total_requests=1,
+            clients=1,
+            n_requests=1,
+            total_test_time=2.0,
+        )
+        output = UPath(tmp_path / "out")
+        result.save(output)
+
+        # Tamper with stats.json: set a wrong value for a builtin key
+        stats_path = output / "stats.json"
+        with stats_path.open() as f:
+            saved = json.load(f)
+        saved["failed_requests"] = 999
+        with stats_path.open("w") as f:
+            json.dump(saved, f)
+
+        loaded = Result.load(output, load_responses=True)
+
+        # The freshly computed value (0 failures) should win over the tampered 999
+        assert loaded.stats["failed_requests"] == 0
+
+    def test_load_responses_recomputes_but_keeps_contributed(self, tmp_path):
+        """After load(load_responses=False) + load_responses(), contributed
+        stats from stats.json should still be accessible via _preloaded_stats
+        even though responses were reloaded."""
+        responses = [
+            InvocationResponse(
+                id="z",
+                response_text="r",
+                input_prompt="p",
+                time_to_first_token=0.3,
+                time_to_last_token=0.6,
+                num_tokens_output=8,
+                num_tokens_input=4,
+            )
+        ]
+        result = Result(
+            responses=responses,
+            total_requests=1,
+            clients=1,
+            n_requests=1,
+            total_test_time=1.0,
+        )
+        result._update_contributed_stats({"cb_stat": 7.0})
+        output = UPath(tmp_path / "out")
+        result.save(output)
+
+        loaded = Result.load(output, load_responses=False)
+        assert loaded.stats["cb_stat"] == 7.0
+
+        # Now reload responses — _preloaded_stats gets recomputed from
+        # responses only, so cb_stat won't be in _preloaded_stats anymore,
+        # but it was never in _contributed_stats on the loaded instance either.
+        loaded.load_responses()
+        # After recompute, builtin stats should be correct
+        assert loaded.stats["failed_requests"] == 0
+        assert "time_to_first_token-p50" in loaded.stats
+
+    def test_multiple_contributed_stats_updates_merge(self, tmp_path):
+        responses = [
+            InvocationResponse(
+                id="m",
+                response_text="r",
+                input_prompt="p",
+                num_tokens_output=5,
+                num_tokens_input=3,
+            )
+        ]
+        result = Result(
+            responses=responses,
+            total_requests=1,
+            clients=1,
+            n_requests=1,
+            total_test_time=0.5,
+        )
+        result._update_contributed_stats({"stat_a": 1.0})
+        result._update_contributed_stats({"stat_b": 2.0})
+        result._update_contributed_stats({"stat_a": 10.0})  # overwrite
+
+        output = UPath(tmp_path / "out")
+        result.save(output)
+
+        loaded = Result.load(output, load_responses=True)
+        assert loaded.stats["stat_a"] == 10.0
+        assert loaded.stats["stat_b"] == 2.0
diff --git a/tests/unit/test_runner.py b/tests/unit/test_runner.py
index 8953e80..616988e 100644
--- a/tests/unit/test_runner.py
+++ b/tests/unit/test_runner.py
@@ -70,11 +70,11 @@ def run(mock_endpoint: MagicMock, mock_tokenizer: MagicMock):
     mock_run._queue = AsyncMock()
     mock_run._queue.task_done = MagicMock()
 
-    # Mock the _invoke_n_c method to return a simple result
-    async def mock_invoke_n_c(payload, n_requests, clients):
+    # Mock the _invoke_clients method to return a simple result
+    async def mock_invoke_clients(payload, n_requests=None, duration=None, clients=1):
         return 1.0, [], []
 
-    mock_run._invoke_n_c = mock_invoke_n_c
+    mock_run._invoke_clients = mock_invoke_clients
 
     # Mock the _process_results_from_q method
     async def mock_process_results_from_q(output_path=None):
@@ -126,7 +126,7 @@ async def test_invoke_n(run: _Run):
         ]
     )
 
-    result = await run._invoke_n(
+    result = await run._invoke_client(
         payload=[{"prompt": "test1"}, {"prompt": "test2"}], n=2
     )
 
@@ -180,7 +180,7 @@ async def test_invoke_n_no_wait(run: _Run):
 @pytest.mark.asyncio
 async def test_invoke_n_c(run: _Run):
     # Remove the fixture override and create a proper mock
-    async def mock_invoke_n_c(payload, n_requests, clients):
+    async def mock_invoke_clients(payload, n_requests=None, duration=None, clients=1):
         # Simulate the actual behavior
         responses = [
             InvocationResponse(id="1", input_prompt="test1", response_text="response1"),
@@ -189,9 +189,9 @@ async def mock_invoke_n_c(payload, n_requests, clients):
         return 1.5, responses, []  # total_time, responses, errors
 
     # Replace the fixture mock with our test-specific mock
-    run._invoke_n_c = mock_invoke_n_c
+    run._invoke_clients = mock_invoke_clients
 
-    total_test_time, responses, _ = await run._invoke_n_c(
+    total_test_time, responses, _ = await run._invoke_clients(
         payload=[{"prompt": "test"}], n_requests=2, clients=1
     )
 
@@ -238,7 +238,7 @@ async def test_run_with_output_path(runner: Runner, tmp_path: Path):
 
 @pytest.mark.asyncio
 async def test_run_error_handling(run: _Run):
-    run._invoke_n_c = AsyncMock(side_effect=Exception("Test error"))
+    run._invoke_clients = AsyncMock(side_effect=Exception("Test error"))
     run._process_results_from_q = AsyncMock()
 
     with pytest.raises(Exception, match="Test error"):
@@ -432,7 +432,7 @@ def test_run_output_path(runner: Runner, tmp_path: Path):
 @pytest.mark.asyncio
 async def test_invoke_n_edge_cases(run: _Run):
     # Test with empty payload
-    result = await run._invoke_n(payload=[], n=5)
+    result = await run._invoke_client(payload=[], n=5)
     assert not result
 
     # Test with n=None (should use all payloads)
@@ -442,7 +442,7 @@ async def test_invoke_n_edge_cases(run: _Run):
             InvocationResponse(id="2", input_prompt="test2", response_text="response2"),
         ]
     )
-    result = await run._invoke_n(
+    result = await run._invoke_client(
         payload=[{"prompt": "test1"}, {"prompt": "test2"}], n=None
     )
     assert len(result) == 2
@@ -535,7 +535,9 @@ def test_prepare_run_combinations(
     )
 
     assert isinstance(run.payload, list)
-    assert run.n_requests == n_requests
+    # When n_requests is None, it defaults to len(payload)
+    expected_n = n_requests if n_requests is not None else len(run.payload)
+    assert run.n_requests == expected_n
     assert run.clients == clients
     assert run.output_path == (Path(output_path) if output_path else None)
     assert run.run_name is not None
@@ -566,7 +568,9 @@ async def test_run_with_different_payloads(
 
 @pytest.mark.asyncio
 async def test_invoke_n_c_concurrent_execution(run: _Run):
-    async def mock_invoke_n(payload, n, add_start_jitter=True, shuffle_order=True):
+    async def mock_invoke_client(
+        payload, n=None, duration=None, add_start_jitter=True, shuffle_order=True
+    ):
         await asyncio.sleep(0.1)  # Simulate some processing time
         return [
             InvocationResponse(
@@ -575,10 +579,10 @@ async def mock_invoke_n(payload, n, add_start_jitter=True, shuffle_order=True):
             for i in range(n)
         ]
 
-    run._invoke_n = mock_invoke_n  # type: ignore
+    run._invoke_client = mock_invoke_client  # type: ignore
 
     start_time = time.perf_counter()
-    total_test_time, _, _ = await run._invoke_n_c(
+    total_test_time, _, _ = await run._invoke_clients(
         payload=[{"prompt": "test"}], n_requests=5, clients=3
     )
     end_time = time.perf_counter()
@@ -757,7 +761,9 @@ def test_prepare_run_more_edge_cases(
     )
 
     assert isinstance(run_config.payload, list)
-    assert run_config.n_requests == n_requests
+    # When n_requests is None, it defaults to len(payload)
+    expected_n = n_requests if n_requests is not None else len(run_config.payload)
+    assert run_config.n_requests == expected_n
     assert run_config.clients == clients if clients is not None else 1
     assert run_config.output_path == (Path(output_path) if output_path else None)
     assert run_config.run_name is not None
@@ -806,7 +812,9 @@ async def test_run_with_optional_parameters(
 async def test_invoke_n_c_with_different_clients(
     run: _Run, clients: Literal[1] | Literal[3] | Literal[5] | Literal[10]
 ):
-    async def mock_invoke_n(payload, n, add_start_jitter=True, shuffle_order=True):
+    async def mock_invoke_client(
+        payload, n=None, duration=None, add_start_jitter=True, shuffle_order=True
+    ):
         await asyncio.sleep(0.1)  # Simulate some processing time
         return [
             InvocationResponse(
@@ -815,10 +823,10 @@ async def mock_invoke_n(payload, n, add_start_jitter=True, shuffle_order=True):
             for i in range(n)
         ]
 
-    run._invoke_n = mock_invoke_n  # type: ignore
+    run._invoke_client = mock_invoke_client  # type: ignore
 
     start_time = time.perf_counter()
-    total_test_time, _, _ = await run._invoke_n_c(
+    total_test_time, _, _ = await run._invoke_clients(
         payload=[{"prompt": "test"}], n_requests=5, clients=clients
     )
     end_time = time.perf_counter()
@@ -956,7 +964,7 @@ async def test_invoke_n_with_different_options(
         ]
     )
 
-    result = await run._invoke_n(
+    result = await run._invoke_client(
         payload=[{"prompt": "test1"}, {"prompt": "test2"}],
         n=2,
         shuffle_order=shuffle_order,
@@ -965,7 +973,7 @@ async def test_invoke_n_with_different_options(
 
     assert len(result) == 2
     run._invoke_n_no_wait.assert_called_once_with(
-        [{"prompt": "test1"}, {"prompt": "test2"}], 2, shuffle_order
+        [{"prompt": "test1"}, {"prompt": "test2"}], 2, None, shuffle_order
     )
 
 
@@ -1021,7 +1029,7 @@ def test_run_duration_and_n_requests_mutually_exclusive(
 def test_run_duration_sets_time_bound_flag(
     mock_endpoint: Endpoint, mock_tokenizer: MagicMock
 ):
-    """When run_duration is set, _time_bound should be True and _n_requests 0."""
+    """When run_duration is set, _time_bound should be True and n_requests 0."""
     run = _Run(
         endpoint=mock_endpoint,
         tokenizer=mock_tokenizer,
@@ -1031,7 +1039,7 @@ def test_run_duration_sets_time_bound_flag(
         run_name="test_run",
     )
     assert run._time_bound is True
-    assert run._n_requests == 0
+    assert run.n_requests == 0
 
 
 def test_n_requests_sets_count_bound(
@@ -1047,7 +1055,7 @@ def test_n_requests_sets_count_bound(
         run_name="test_run",
     )
     assert run._time_bound is False
-    assert run._n_requests == 10
+    assert run.n_requests == 10
 
 
 def test_run_duration_must_be_positive(
@@ -1068,7 +1076,7 @@ def test_run_duration_must_be_positive(
 def test_invoke_for_duration_respects_deadline(
     mock_endpoint: Endpoint, mock_tokenizer: MagicMock
 ):
-    """_invoke_for_duration should stop after the specified duration."""
+    """_invoke_n_no_wait with duration should stop after the specified duration."""
     run = _Run(
         endpoint=mock_endpoint,
         tokenizer=mock_tokenizer,
@@ -1089,7 +1097,7 @@ def slow_invoke(payload):
     run._endpoint.invoke.side_effect = slow_invoke
 
     start = time.perf_counter()
-    responses = run._invoke_for_duration(payload=[{"prompt": "test"}], duration=0.5)
+    responses = run._invoke_n_no_wait(payload=[{"prompt": "test"}], duration=0.5)
     elapsed = time.perf_counter() - start
 
     assert len(responses) > 0
@@ -1100,7 +1108,7 @@ def slow_invoke(payload):
 def test_invoke_for_duration_cycles_payloads(
     mock_endpoint: Endpoint, mock_tokenizer: MagicMock
 ):
-    """_invoke_for_duration should cycle through payloads."""
+    """_invoke_n_no_wait with duration should cycle through payloads."""
     run = _Run(
         endpoint=mock_endpoint,
         tokenizer=mock_tokenizer,
@@ -1121,7 +1129,7 @@ def tracking_invoke(payload):
 
     run._endpoint.invoke.side_effect = tracking_invoke
 
-    responses = run._invoke_for_duration(
+    responses = run._invoke_n_no_wait(
         payload=[{"prompt": "a"}, {"prompt": "b"}],
         duration=0.3,
         shuffle_order=False,
@@ -1190,7 +1198,7 @@ def test_prepare_run_with_duration(runner: Runner):
     )
     assert run._time_bound is True
     assert run.run_duration == 30
-    assert run._n_requests == 0
+    assert run.n_requests == 0
 
 
 def test_prepare_run_duration_and_n_requests_conflict(runner: Runner):
diff --git a/tests/unit/test_running_stats.py b/tests/unit/test_running_stats.py
index 2f04d78..634483a 100644
--- a/tests/unit/test_running_stats.py
+++ b/tests/unit/test_running_stats.py
@@ -152,69 +152,47 @@ def test_empty_stats(self, rs):
         assert stats["total_input_tokens"] == 0
 
 
-# ── snapshot ─────────────────────────────────────────────────────────────────
+# ── send-window throughput in to_stats ────────────────────────────────────────
 
 
-class TestSnapshot:
-    def test_placeholder_when_empty(self, rs):
-        result = rs.snapshot()
-        assert all(v == "—" for v in result.values())
-        # Should have all default keys
-        assert "rpm" in result
-        assert "p50_ttft" in result
-        assert "fail" in result
-        assert "output_tps" in result
-
-    def test_placeholder_with_custom_fields(self, rs):
-        fields = {"my_rpm": "rpm", "my_fail": "failed"}
-        result = rs.snapshot(fields)
-        assert result == {"my_rpm": "—", "my_fail": "—"}
-
-    def test_failed_count(self, populated_rs):
-        result = populated_rs.snapshot({"fail": "failed"})
-        assert result["fail"] == "1"
-
+class TestSendWindowStats:
     def test_rpm_uses_send_window(self, rs):
         rs._first_send_time = 100.0
         rs._last_send_time = 110.0  # 10 second window
         rs.update({"error": None})
         rs.update({"error": None})
         rs.update({"error": None})
-        result = rs.snapshot({"rpm": "rpm"})
+        stats = rs.to_stats()
         # 3 responses / 10 seconds * 60 = 18.0 rpm
-        assert result["rpm"] == "18.0"
-
-    def test_rpm_not_shown_with_single_send(self, rs):
-        """With only one send, first == last, no window to compute RPM."""
-        rs._first_send_time = 100.0
-        rs._last_send_time = 100.0
-        rs.update({"error": None})
-        result = rs.snapshot({"rpm": "rpm"})
-        assert "rpm" not in result
+        assert stats["rpm"] == pytest.approx(18.0)
 
     def test_output_tps_uses_send_window(self, rs):
         rs._first_send_time = 100.0
         rs._last_send_time = 110.0  # 10 second window
         rs.update({"num_tokens_output": 500, "error": None})
         rs.update({"num_tokens_output": 300, "error": None})
-        result = rs.snapshot({"tps": "output_tps"})
+        stats = rs.to_stats()
         # 800 tokens / 10 seconds = 80.0 tok/s
-        assert result["tps"] == "80.0 tok/s"
-
-    def test_sum_aggregation(self, populated_rs):
-        result = populated_rs.snapshot({"out": ("num_tokens_output", "sum")})
-        assert result["out"] == "83"
-
-    def test_percentile_aggregation(self, populated_rs):
-        result = populated_rs.snapshot({"p50": ("time_to_first_token", "p50")})
-        assert "p50" in result
-        assert result["p50"].endswith("s")
+        assert stats["output_tps"] == pytest.approx(80.0)
 
-    def test_inverse_aggregation(self, populated_rs):
-        result = populated_rs.snapshot({"tps": ("time_per_output_token", "p50", "inv")})
-        assert "tps" in result
-        assert "tok/s" in result["tps"]
+    def test_no_send_window_when_single_send(self, rs):
+        """With only one send, first == last, no window to compute RPM."""
+        rs._first_send_time = 100.0
+        rs._last_send_time = 100.0
+        rs.update({"error": None})
+        stats = rs.to_stats()
+        assert "rpm" not in stats
+        assert "output_tps" not in stats
 
-    def test_empty_fields_returns_empty(self, populated_rs):
-        result = populated_rs.snapshot({})
-        assert result == {}
+    def test_no_send_window_when_no_sends(self, rs):
+        stats = rs.to_stats()
+        assert "rpm" not in stats
+        assert "output_tps" not in stats
+
+    def test_send_window_helper(self, rs):
+        assert rs._send_window() is None
+        rs._first_send_time = 10.0
+        rs._last_send_time = 10.0
+        assert rs._send_window() is None
+        rs._last_send_time = 20.0
+        assert rs._send_window() == pytest.approx(10.0)