From 284d13e8a1850f049bffe62f8634218482e08130 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Tue, 28 Apr 2026 15:19:17 -0400 Subject: [PATCH 1/5] Revert "Clean up all references to HTML" This reverts commit 80cea4a2023d5bfbe5853488abd3ec5aba200425. Signed-off-by: Samuel Monson --- src/guidellm/__main__.py | 6 +- src/guidellm/benchmark/__init__.py | 2 + src/guidellm/benchmark/entrypoints.py | 2 +- src/guidellm/benchmark/outputs/__init__.py | 4 +- src/guidellm/benchmark/outputs/html.py | 477 +++++++++++++++++++++ src/guidellm/settings.py | 11 + 6 files changed, 497 insertions(+), 5 deletions(-) create mode 100644 src/guidellm/benchmark/outputs/html.py diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 4cdc88090..fc95031e0 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -272,7 +272,7 @@ def benchmark(): default=BenchmarkGenerativeTextArgs.get_default("outputs"), help=( "The filename.ext for each of the outputs to create or the " - "alises (json, csv) for the output files to create with " + "alises (json, csv, html) for the output files to create with " "their default file names (benchmark.[EXT])" ), ) @@ -512,8 +512,8 @@ def run(**kwargs): # noqa: C901 "--output-formats", multiple=True, type=str, - default=("console", "json"), - help="Output formats for benchmark results (e.g., console, json, csv).", + default=("console", "json"), # ("console", "json", "html", "csv") + help="Output formats for benchmark results (e.g., console, json, html, csv).", ) def from_file(path, output_path, output_formats): asyncio.run(reimport_benchmarks_report(path, output_path, output_formats)) diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py index 20327d502..c8f10459b 100644 --- a/src/guidellm/benchmark/__init__.py +++ b/src/guidellm/benchmark/__init__.py @@ -15,6 +15,7 @@ from .outputs import ( GenerativeBenchmarkerConsole, GenerativeBenchmarkerCSV, + GenerativeBenchmarkerHTML, GenerativeBenchmarkerOutput, ) from .profiles import ( @@ -71,6 +72,7 @@ "GenerativeBenchmarkTimings", "GenerativeBenchmarkerCSV", "GenerativeBenchmarkerConsole", + "GenerativeBenchmarkerHTML", "GenerativeBenchmarkerOutput", "GenerativeBenchmarksReport", "GenerativeConsoleBenchmarkerProgress", diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index ab431bffd..1f0ed3043 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -591,7 +591,7 @@ async def benchmark_generative_text( async def reimport_benchmarks_report( file: Path, output_path: Path | None, - output_formats: OutputFormatT = ("console", "json", "csv"), + output_formats: OutputFormatT = ("console", "json", "html", "csv"), ) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]: """ Load and re-export an existing benchmarks report in specified output formats. diff --git a/src/guidellm/benchmark/outputs/__init__.py b/src/guidellm/benchmark/outputs/__init__.py index bb2e8855d..2e321605d 100644 --- a/src/guidellm/benchmark/outputs/__init__.py +++ b/src/guidellm/benchmark/outputs/__init__.py @@ -2,7 +2,7 @@ Output formatters for benchmark results. Provides output formatter implementations that transform benchmark reports into -various file formats including JSON, CSV, and console display. All formatters +various file formats including JSON, CSV, HTML, and console display. All formatters extend the base GenerativeBenchmarkerOutput interface, enabling dynamic resolution and flexible output configuration for benchmark result persistence and analysis. """ @@ -11,12 +11,14 @@ from .console import GenerativeBenchmarkerConsole from .csv import GenerativeBenchmarkerCSV +from .html import GenerativeBenchmarkerHTML from .output import GenerativeBenchmarkerOutput from .serialized import GenerativeBenchmarkerSerialized __all__ = [ "GenerativeBenchmarkerCSV", "GenerativeBenchmarkerConsole", + "GenerativeBenchmarkerHTML", "GenerativeBenchmarkerOutput", "GenerativeBenchmarkerSerialized", ] diff --git a/src/guidellm/benchmark/outputs/html.py b/src/guidellm/benchmark/outputs/html.py new file mode 100644 index 000000000..464c730c4 --- /dev/null +++ b/src/guidellm/benchmark/outputs/html.py @@ -0,0 +1,477 @@ +""" +HTML output formatter for benchmark results. + +Transforms benchmark data into interactive web-based reports by building UI data +structures, converting keys to camelCase for JavaScript compatibility, and injecting +formatted data into HTML templates. The formatter processes GenerativeBenchmark +instances and their associated metrics, creating histogram buckets for distributions, +formatting percentile statistics for tabular display, and embedding all data as +JavaScript objects within an HTML template for client-side rendering and visualization. +""" + +from __future__ import annotations + +import json +import random +import re +from collections import defaultdict +from copy import deepcopy +from math import ceil +from pathlib import Path +from typing import Any, ClassVar + +from loguru import logger +from pydantic import BaseModel, Field, computed_field + +from guidellm.benchmark.outputs.output import GenerativeBenchmarkerOutput +from guidellm.benchmark.schemas import ( + BenchmarkGenerativeTextArgs, + GenerativeBenchmark, + GenerativeBenchmarksReport, +) +from guidellm.schemas import DistributionSummary, Percentiles +from guidellm.settings import settings +from guidellm.utils.dict import recursive_key_update +from guidellm.utils.text import camelize_str, load_text + +__all__ = ["GenerativeBenchmarkerHTML"] + + +@GenerativeBenchmarkerOutput.register("html") +class GenerativeBenchmarkerHTML(GenerativeBenchmarkerOutput): + """ + HTML output formatter for benchmark results. + + Generates interactive HTML reports from benchmark data by transforming results + into camelCase JSON structures and injecting them into HTML templates. The + formatter processes benchmark metrics, creates histogram distributions, and + embeds all data into a pre-built HTML template for browser-based visualization. + Reports are saved to the specified output path or current working directory. + + :cvar DEFAULT_FILE: Default filename for HTML output when a directory is provided + """ + + DEFAULT_FILE: ClassVar[str] = "benchmarks.html" + + output_path: Path = Field( + default_factory=lambda: Path.cwd(), + description=( + "Directory or file path for saving the HTML report, " + "defaults to current working directory" + ), + ) + + @classmethod + def validated_kwargs( + cls, output_path: str | Path | None, **_kwargs + ) -> dict[str, Any]: + """ + Validate and normalize output path argument. + + :param output_path: Output file or directory path for the HTML report + :return: Dictionary containing validated output_path if provided + """ + validated: dict[str, Any] = {} + if output_path is not None: + validated["output_path"] = ( + Path(output_path) if not isinstance(output_path, Path) else output_path + ) + return validated + + async def finalize(self, report: GenerativeBenchmarksReport) -> Path: + """ + Generate and save the HTML benchmark report. + + Transforms benchmark data into camelCase JSON format, injects it into the + HTML template, and writes the resulting report to the output path. Creates + parent directories if they don't exist. + + :param report: Completed benchmark report containing all results + :return: Path to the saved HTML report file + """ + output_path = self.output_path + if output_path.is_dir(): + output_path = output_path / self.DEFAULT_FILE + output_path.parent.mkdir(parents=True, exist_ok=True) + + data = _build_ui_data(report.benchmarks, report.args) + camel_data = recursive_key_update(deepcopy(data), camelize_str) + + ui_api_data = { + f"window.{key} = {{}};": f"window.{key} = {json.dumps(value, indent=2)};\n" + for key, value in camel_data.items() + } + + _create_html_report(ui_api_data, output_path) + + return output_path + + +class _Bucket(BaseModel): + """ + Histogram bucket for data distribution visualization. + + Represents a single bucket in a histogram with its starting value and count + of data points falling within the bucket range. Used to create distribution + histograms for metrics like token counts and request timings. + """ + + value: float | int = Field(description="Starting value of the bucket range") + count: int = Field(description="Number of data points falling within this bucket") + + @staticmethod + def from_data( + data: list[float] | list[int], + bucket_width: float | None = None, + n_buckets: int | None = None, + ) -> tuple[list[_Bucket], float]: + """ + Create histogram buckets from numeric data values. + + Divides the data range into equal-width buckets and counts values within + each bucket. Either bucket_width or n_buckets can be specified; if neither + is provided, defaults to 10 buckets. + + :param data: Numeric values to bucket + :param bucket_width: Width of each bucket, computed if None + :param n_buckets: Number of buckets, defaults to 10 if width not specified + :return: Tuple of bucket list and computed bucket width + """ + if not data: + return [], 1.0 + + min_v = min(data) + max_v = max(data) + range_v = (1 + max_v) - min_v + + if bucket_width is None: + if n_buckets is None: + n_buckets = 10 + bucket_width = range_v / n_buckets + else: + n_buckets = ceil(range_v / bucket_width) + + bucket_counts: defaultdict[float | int, int] = defaultdict(int) + for val in data: + idx = int((val - min_v) // bucket_width) + if idx >= n_buckets: + idx = n_buckets - 1 + bucket_start = min_v + idx * bucket_width + bucket_counts[bucket_start] += 1 + + buckets = [ + _Bucket(value=start, count=count) + for start, count in sorted(bucket_counts.items()) + ] + return buckets, bucket_width + + +class _TabularDistributionSummary(DistributionSummary): + """ + Distribution summary with tabular percentile representation. + + Extends DistributionSummary to provide percentile data formatted for table + display in the HTML report. Filters to show only key percentiles (p50, p90, + p95, p99) for concise presentation. + """ + + @computed_field + def percentile_rows(self) -> list[dict[str, str | float]]: + """ + Format percentiles as table rows for UI display. + + :return: List of dictionaries with percentile names and values + """ + rows = [ + {"percentile": name, "value": value} + for name, value in self.percentiles.model_dump().items() + ] + return list( + filter(lambda row: row["percentile"] in ["p50", "p90", "p95", "p99"], rows) + ) + + def model_dump(self, **kwargs) -> dict: + """ + Override model_dump to filter duplicate consecutive percentile values. + + This prevents visualization errors when distributions have limited data + points causing multiple percentiles to collapse to the same value. + + :param kwargs: Arguments to pass to parent model_dump + :return: Dictionary with filtered percentiles + """ + data = super().model_dump(**kwargs) + + if "percentiles" in data and data["percentiles"]: + filtered_percentiles = _filter_duplicate_percentiles(data["percentiles"]) + data["percentiles"] = filtered_percentiles + + return data + + @classmethod + def from_distribution_summary( + cls, distribution: DistributionSummary + ) -> _TabularDistributionSummary: + """ + Convert standard DistributionSummary to tabular format. + + :param distribution: Source distribution summary to convert + :return: Tabular distribution summary with formatted percentile rows + """ + return cls(**distribution.model_dump()) + + +def _create_html_report(js_data: dict[str, str], output_path: Path) -> Path: + """ + Create HTML report by injecting JavaScript data into template. + + Loads the HTML template, injects JavaScript data into the head section, and + writes the final report to the specified output path. + + :param js_data: Dictionary mapping placeholder strings to JavaScript code + :param output_path: Path where HTML report will be saved + :return: Path to the saved report file + """ + html_content = load_text(settings.report_generation.source) + report_content = _inject_data(js_data, html_content) + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(report_content) + return output_path + + +def _filter_duplicate_percentiles(percentiles: dict[str, float]) -> dict[str, float]: + """ + Filter out consecutive duplicate percentile values. + + When distributions have very few data points, multiple percentiles can have + the same value, which causes visualization libraries to fail. This function + keeps only the largest percentile for consecutive duplicate values, which is + more mathematically accurate as higher percentiles have greater statistical + significance. + + :param percentiles: Dictionary of percentile names to values + :return: Filtered percentiles dictionary with no consecutive duplicates + """ + if not percentiles: + return percentiles + + percentile_order = list(Percentiles.model_fields.keys()) + + # Iterate in reverse to keep the largest percentile for each value + filtered = {} + previous_value = None + + for key in reversed(percentile_order): + if key in percentiles: + current_value = percentiles[key] + if previous_value is None or current_value != previous_value: + filtered[key] = current_value + previous_value = current_value + + # Restore original order + return {key: filtered[key] for key in percentile_order if key in filtered} + + +def _inject_data(js_data: dict[str, str], html: str) -> str: + """ + Inject JavaScript data into HTML head section. + + Replaces placeholder strings in the HTML head section with actual JavaScript + code containing benchmark data. Returns original HTML if no head section found. + + :param js_data: Dictionary mapping placeholder strings to JavaScript code + :param html: HTML template content + :return: HTML with injected JavaScript data + """ + head_match = re.search(r"]*>(.*?)", html, re.DOTALL | re.IGNORECASE) + if not head_match: + logger.warning(" section missing, returning original HTML.") + return html + + head_content = head_match.group(1) + + for placeholder, script in js_data.items(): + head_content = head_content.replace(placeholder, script) + + new_head = f"{head_content}" + return html[: head_match.start()] + new_head + html[head_match.end() :] + + +def _build_ui_data( + benchmarks: list[GenerativeBenchmark], args: BenchmarkGenerativeTextArgs +) -> dict[str, Any]: + """ + Build complete UI data structure from benchmarks. + + Aggregates benchmark results into a structured format for the HTML UI, + including run metadata, workload details, and per-benchmark metrics. + + :param benchmarks: List of completed benchmark results + :param args: Benchmark configuration arguments + :return: Dictionary with run_info, workload_details, and benchmarks sections + """ + return { + "run_info": _build_run_info(benchmarks, args), + "workload_details": _build_workload_details(benchmarks, args), + "benchmarks": _build_benchmarks(benchmarks), + } + + +def _build_run_info( + benchmarks: list[GenerativeBenchmark], args: BenchmarkGenerativeTextArgs +) -> dict[str, Any]: + """ + Build run metadata from benchmarks. + + Extracts model name, timestamp, and dataset information from the benchmark + configuration and results. + + :param benchmarks: List of completed benchmark results + :param args: Benchmark configuration arguments + :return: Dictionary with model, task, timestamp, and dataset information + """ + model = ( + args.backend_kwargs.model if hasattr(args.backend_kwargs, "model") else "N/A" + ) + timestamp = max(bm.start_time for bm in benchmarks if bm.start_time is not None) + return { + "model": {"name": model, "size": 0}, + "task": "N/A", + "timestamp": timestamp, + "dataset": {"name": "N/A"}, + } + + +def _build_workload_details( + benchmarks: list[GenerativeBenchmark], args: BenchmarkGenerativeTextArgs +) -> dict[str, Any]: + """ + Build workload details from benchmarks. + + Aggregates prompt and generation samples, token distribution statistics, + request timing histograms, and server configuration. Samples up to 5 random + prompts and outputs for display. + + :param benchmarks: List of completed benchmark results + :param args: Benchmark configuration arguments + :return: Dictionary with prompts, generations, request timing, and server info + """ + target = ( + args.backend_kwargs.target if hasattr(args.backend_kwargs, "target") else None + ) + rate_type = benchmarks[0].config.strategy.type_ + successful_requests = [req for bm in benchmarks for req in bm.requests.successful] + + sample_indices = random.sample( + range(len(successful_requests)), min(5, len(successful_requests)) + ) + sample_prompts = [ + req.request_args.replace("\n", " ").replace('"', "'") + if (req := successful_requests[i]).request_args + else "" + for i in sample_indices + ] + sample_outputs = [ + req.output.replace("\n", " ").replace('"', "'") + if (req := successful_requests[i]).output + else "" + for i in sample_indices + ] + + prompt_tokens = [ + float(req.prompt_tokens) if req.prompt_tokens is not None else -1 + for bm in benchmarks + for req in bm.requests.successful + ] + output_tokens = [ + float(req.output_tokens) if req.output_tokens is not None else -1 + for bm in benchmarks + for req in bm.requests.successful + ] + + prompt_token_buckets, _prompt_bucket_width = _Bucket.from_data(prompt_tokens, 1) + output_token_buckets, _output_bucket_width = _Bucket.from_data(output_tokens, 1) + + prompt_token_stats = DistributionSummary.from_values(prompt_tokens) + output_token_stats = DistributionSummary.from_values(output_tokens) + + min_start_time = benchmarks[0].start_time + all_req_times = [ + req.info.timings.request_start - min_start_time + for bm in benchmarks + for req in bm.requests.successful + if req.info.timings.request_start is not None + ] + + number_of_buckets = len(benchmarks) + request_buckets, bucket_width = _Bucket.from_data( + all_req_times, None, number_of_buckets + ) + + return { + "prompts": { + "samples": sample_prompts, + "token_distributions": { + "statistics": prompt_token_stats.model_dump() + if prompt_token_stats + else None, + "buckets": [b.model_dump() for b in prompt_token_buckets], + "bucket_width": 1, + }, + }, + "generations": { + "samples": sample_outputs, + "token_distributions": { + "statistics": output_token_stats.model_dump() + if output_token_stats + else None, + "buckets": [b.model_dump() for b in output_token_buckets], + "bucket_width": 1, + }, + }, + "requests_over_time": { + "requests_over_time": { + "buckets": [b.model_dump() for b in request_buckets], + "bucket_width": bucket_width, + }, + "num_benchmarks": number_of_buckets, + }, + "rate_type": rate_type, + "server": {"target": target}, + } + + +def _build_benchmarks(benchmarks: list[GenerativeBenchmark]) -> list[dict[str, Any]]: + """ + Build benchmark metrics data for UI display. + + Extracts key performance metrics from each benchmark including requests per + second, inter-token latency, time to first token, throughput, and request + latency. Formats distribution summaries for tabular display. + + :param benchmarks: List of completed benchmark results + :return: List of dictionaries with formatted benchmark metrics + """ + result = [] + for bm in benchmarks: + result.append( + { + "requests_per_second": bm.metrics.requests_per_second.successful.mean, + "itl": _TabularDistributionSummary.from_distribution_summary( + bm.metrics.inter_token_latency_ms.successful + ).model_dump(), + "ttft": _TabularDistributionSummary.from_distribution_summary( + bm.metrics.time_to_first_token_ms.successful + ).model_dump(), + "throughput": _TabularDistributionSummary.from_distribution_summary( + bm.metrics.output_tokens_per_second.successful + ).model_dump(), + "time_per_request": ( + _TabularDistributionSummary.from_distribution_summary( + bm.metrics.request_latency.successful + ).model_dump() + ), + } + ) + return result diff --git a/src/guidellm/settings.py b/src/guidellm/settings.py index a7a6aaf02..f16ac5b6c 100644 --- a/src/guidellm/settings.py +++ b/src/guidellm/settings.py @@ -53,6 +53,14 @@ class DatasetSettings(BaseModel): ) +class ReportGenerationSettings(BaseModel): + """ + Report generation settings for the application + """ + + source: str = "https://raw.githubusercontent.com/vllm-project/guidellm/refs/heads/gh-pages/ui/v0.5.4/index.html" + + class Settings(BaseSettings): """ All the settings are powered by pydantic_settings and could be @@ -100,6 +108,9 @@ class Settings(BaseSettings): # Data settings dataset: DatasetSettings = DatasetSettings() + # Report settings + report_generation: ReportGenerationSettings = ReportGenerationSettings() + # Output settings table_border_char: str = "=" table_headers_border_char: str = "-" From 5fe48419d367e836a3ee017389bc857fe01dae38 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Tue, 20 Jan 2026 15:21:24 -0500 Subject: [PATCH 2/5] Add new UI output that is python native Signed-off-by: Samuel Monson --- pyproject.toml | 8 +- src/guidellm/benchmark/outputs/html.py | 477 ------------------ .../benchmark/outputs/html/__init__.py | 25 + .../outputs/html/components/__init__.py | 5 + .../benchmark/outputs/html/components/base.py | 60 +++ .../outputs/html/components/footer.py | 32 ++ .../outputs/html/components/header.py | 59 +++ .../html/components/metrics_summary.py | 251 +++++++++ .../html/components/workload_details.py | 311 ++++++++++++ .../html/components/workload_metrics.py | 169 +++++++ .../benchmark/outputs/html/data_builder.py | 324 ++++++++++++ .../benchmark/outputs/html/plotly_output.py | 134 +++++ .../outputs/html/templates/base.html | 25 + src/guidellm/benchmark/outputs/html/theme.py | 265 ++++++++++ uv.lock | 145 +++--- 15 files changed, 1753 insertions(+), 537 deletions(-) delete mode 100644 src/guidellm/benchmark/outputs/html.py create mode 100644 src/guidellm/benchmark/outputs/html/__init__.py create mode 100644 src/guidellm/benchmark/outputs/html/components/__init__.py create mode 100644 src/guidellm/benchmark/outputs/html/components/base.py create mode 100644 src/guidellm/benchmark/outputs/html/components/footer.py create mode 100644 src/guidellm/benchmark/outputs/html/components/header.py create mode 100644 src/guidellm/benchmark/outputs/html/components/metrics_summary.py create mode 100644 src/guidellm/benchmark/outputs/html/components/workload_details.py create mode 100644 src/guidellm/benchmark/outputs/html/components/workload_metrics.py create mode 100644 src/guidellm/benchmark/outputs/html/data_builder.py create mode 100644 src/guidellm/benchmark/outputs/html/plotly_output.py create mode 100644 src/guidellm/benchmark/outputs/html/templates/base.html create mode 100644 src/guidellm/benchmark/outputs/html/theme.py diff --git a/pyproject.toml b/pyproject.toml index 085f49489..50175022a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ explicit = true [tool.uv.sources] torch = { index = "pytorch-cpu" } +guidellm = { workspace = true } # ************************************************ # ********** Project Metadata ********** @@ -75,8 +76,8 @@ dependencies = [ [project.optional-dependencies] # Meta Extras -all = ["guidellm[perf,tokenizers,audio,vision]"] -recommended = ["guidellm[perf,tokenizers]"] +all = ["guidellm[perf,tokenizers,audio,vision,ui]"] +recommended = ["guidellm[perf,tokenizers,ui]"] # Feature Extras perf = ["orjson", "msgpack", "msgspec", "uvloop"] tokenizers = ["tiktoken", "blobfile", "mistral-common"] @@ -137,6 +138,9 @@ dev = [ # link checking "mkdocs-linkcheck~=1.0.6", ] +ui = [ + "plotly>=5.24.0", +] [dependency-groups] dev = ["guidellm[dev]"] diff --git a/src/guidellm/benchmark/outputs/html.py b/src/guidellm/benchmark/outputs/html.py deleted file mode 100644 index 464c730c4..000000000 --- a/src/guidellm/benchmark/outputs/html.py +++ /dev/null @@ -1,477 +0,0 @@ -""" -HTML output formatter for benchmark results. - -Transforms benchmark data into interactive web-based reports by building UI data -structures, converting keys to camelCase for JavaScript compatibility, and injecting -formatted data into HTML templates. The formatter processes GenerativeBenchmark -instances and their associated metrics, creating histogram buckets for distributions, -formatting percentile statistics for tabular display, and embedding all data as -JavaScript objects within an HTML template for client-side rendering and visualization. -""" - -from __future__ import annotations - -import json -import random -import re -from collections import defaultdict -from copy import deepcopy -from math import ceil -from pathlib import Path -from typing import Any, ClassVar - -from loguru import logger -from pydantic import BaseModel, Field, computed_field - -from guidellm.benchmark.outputs.output import GenerativeBenchmarkerOutput -from guidellm.benchmark.schemas import ( - BenchmarkGenerativeTextArgs, - GenerativeBenchmark, - GenerativeBenchmarksReport, -) -from guidellm.schemas import DistributionSummary, Percentiles -from guidellm.settings import settings -from guidellm.utils.dict import recursive_key_update -from guidellm.utils.text import camelize_str, load_text - -__all__ = ["GenerativeBenchmarkerHTML"] - - -@GenerativeBenchmarkerOutput.register("html") -class GenerativeBenchmarkerHTML(GenerativeBenchmarkerOutput): - """ - HTML output formatter for benchmark results. - - Generates interactive HTML reports from benchmark data by transforming results - into camelCase JSON structures and injecting them into HTML templates. The - formatter processes benchmark metrics, creates histogram distributions, and - embeds all data into a pre-built HTML template for browser-based visualization. - Reports are saved to the specified output path or current working directory. - - :cvar DEFAULT_FILE: Default filename for HTML output when a directory is provided - """ - - DEFAULT_FILE: ClassVar[str] = "benchmarks.html" - - output_path: Path = Field( - default_factory=lambda: Path.cwd(), - description=( - "Directory or file path for saving the HTML report, " - "defaults to current working directory" - ), - ) - - @classmethod - def validated_kwargs( - cls, output_path: str | Path | None, **_kwargs - ) -> dict[str, Any]: - """ - Validate and normalize output path argument. - - :param output_path: Output file or directory path for the HTML report - :return: Dictionary containing validated output_path if provided - """ - validated: dict[str, Any] = {} - if output_path is not None: - validated["output_path"] = ( - Path(output_path) if not isinstance(output_path, Path) else output_path - ) - return validated - - async def finalize(self, report: GenerativeBenchmarksReport) -> Path: - """ - Generate and save the HTML benchmark report. - - Transforms benchmark data into camelCase JSON format, injects it into the - HTML template, and writes the resulting report to the output path. Creates - parent directories if they don't exist. - - :param report: Completed benchmark report containing all results - :return: Path to the saved HTML report file - """ - output_path = self.output_path - if output_path.is_dir(): - output_path = output_path / self.DEFAULT_FILE - output_path.parent.mkdir(parents=True, exist_ok=True) - - data = _build_ui_data(report.benchmarks, report.args) - camel_data = recursive_key_update(deepcopy(data), camelize_str) - - ui_api_data = { - f"window.{key} = {{}};": f"window.{key} = {json.dumps(value, indent=2)};\n" - for key, value in camel_data.items() - } - - _create_html_report(ui_api_data, output_path) - - return output_path - - -class _Bucket(BaseModel): - """ - Histogram bucket for data distribution visualization. - - Represents a single bucket in a histogram with its starting value and count - of data points falling within the bucket range. Used to create distribution - histograms for metrics like token counts and request timings. - """ - - value: float | int = Field(description="Starting value of the bucket range") - count: int = Field(description="Number of data points falling within this bucket") - - @staticmethod - def from_data( - data: list[float] | list[int], - bucket_width: float | None = None, - n_buckets: int | None = None, - ) -> tuple[list[_Bucket], float]: - """ - Create histogram buckets from numeric data values. - - Divides the data range into equal-width buckets and counts values within - each bucket. Either bucket_width or n_buckets can be specified; if neither - is provided, defaults to 10 buckets. - - :param data: Numeric values to bucket - :param bucket_width: Width of each bucket, computed if None - :param n_buckets: Number of buckets, defaults to 10 if width not specified - :return: Tuple of bucket list and computed bucket width - """ - if not data: - return [], 1.0 - - min_v = min(data) - max_v = max(data) - range_v = (1 + max_v) - min_v - - if bucket_width is None: - if n_buckets is None: - n_buckets = 10 - bucket_width = range_v / n_buckets - else: - n_buckets = ceil(range_v / bucket_width) - - bucket_counts: defaultdict[float | int, int] = defaultdict(int) - for val in data: - idx = int((val - min_v) // bucket_width) - if idx >= n_buckets: - idx = n_buckets - 1 - bucket_start = min_v + idx * bucket_width - bucket_counts[bucket_start] += 1 - - buckets = [ - _Bucket(value=start, count=count) - for start, count in sorted(bucket_counts.items()) - ] - return buckets, bucket_width - - -class _TabularDistributionSummary(DistributionSummary): - """ - Distribution summary with tabular percentile representation. - - Extends DistributionSummary to provide percentile data formatted for table - display in the HTML report. Filters to show only key percentiles (p50, p90, - p95, p99) for concise presentation. - """ - - @computed_field - def percentile_rows(self) -> list[dict[str, str | float]]: - """ - Format percentiles as table rows for UI display. - - :return: List of dictionaries with percentile names and values - """ - rows = [ - {"percentile": name, "value": value} - for name, value in self.percentiles.model_dump().items() - ] - return list( - filter(lambda row: row["percentile"] in ["p50", "p90", "p95", "p99"], rows) - ) - - def model_dump(self, **kwargs) -> dict: - """ - Override model_dump to filter duplicate consecutive percentile values. - - This prevents visualization errors when distributions have limited data - points causing multiple percentiles to collapse to the same value. - - :param kwargs: Arguments to pass to parent model_dump - :return: Dictionary with filtered percentiles - """ - data = super().model_dump(**kwargs) - - if "percentiles" in data and data["percentiles"]: - filtered_percentiles = _filter_duplicate_percentiles(data["percentiles"]) - data["percentiles"] = filtered_percentiles - - return data - - @classmethod - def from_distribution_summary( - cls, distribution: DistributionSummary - ) -> _TabularDistributionSummary: - """ - Convert standard DistributionSummary to tabular format. - - :param distribution: Source distribution summary to convert - :return: Tabular distribution summary with formatted percentile rows - """ - return cls(**distribution.model_dump()) - - -def _create_html_report(js_data: dict[str, str], output_path: Path) -> Path: - """ - Create HTML report by injecting JavaScript data into template. - - Loads the HTML template, injects JavaScript data into the head section, and - writes the final report to the specified output path. - - :param js_data: Dictionary mapping placeholder strings to JavaScript code - :param output_path: Path where HTML report will be saved - :return: Path to the saved report file - """ - html_content = load_text(settings.report_generation.source) - report_content = _inject_data(js_data, html_content) - - output_path.parent.mkdir(parents=True, exist_ok=True) - output_path.write_text(report_content) - return output_path - - -def _filter_duplicate_percentiles(percentiles: dict[str, float]) -> dict[str, float]: - """ - Filter out consecutive duplicate percentile values. - - When distributions have very few data points, multiple percentiles can have - the same value, which causes visualization libraries to fail. This function - keeps only the largest percentile for consecutive duplicate values, which is - more mathematically accurate as higher percentiles have greater statistical - significance. - - :param percentiles: Dictionary of percentile names to values - :return: Filtered percentiles dictionary with no consecutive duplicates - """ - if not percentiles: - return percentiles - - percentile_order = list(Percentiles.model_fields.keys()) - - # Iterate in reverse to keep the largest percentile for each value - filtered = {} - previous_value = None - - for key in reversed(percentile_order): - if key in percentiles: - current_value = percentiles[key] - if previous_value is None or current_value != previous_value: - filtered[key] = current_value - previous_value = current_value - - # Restore original order - return {key: filtered[key] for key in percentile_order if key in filtered} - - -def _inject_data(js_data: dict[str, str], html: str) -> str: - """ - Inject JavaScript data into HTML head section. - - Replaces placeholder strings in the HTML head section with actual JavaScript - code containing benchmark data. Returns original HTML if no head section found. - - :param js_data: Dictionary mapping placeholder strings to JavaScript code - :param html: HTML template content - :return: HTML with injected JavaScript data - """ - head_match = re.search(r"]*>(.*?)", html, re.DOTALL | re.IGNORECASE) - if not head_match: - logger.warning(" section missing, returning original HTML.") - return html - - head_content = head_match.group(1) - - for placeholder, script in js_data.items(): - head_content = head_content.replace(placeholder, script) - - new_head = f"{head_content}" - return html[: head_match.start()] + new_head + html[head_match.end() :] - - -def _build_ui_data( - benchmarks: list[GenerativeBenchmark], args: BenchmarkGenerativeTextArgs -) -> dict[str, Any]: - """ - Build complete UI data structure from benchmarks. - - Aggregates benchmark results into a structured format for the HTML UI, - including run metadata, workload details, and per-benchmark metrics. - - :param benchmarks: List of completed benchmark results - :param args: Benchmark configuration arguments - :return: Dictionary with run_info, workload_details, and benchmarks sections - """ - return { - "run_info": _build_run_info(benchmarks, args), - "workload_details": _build_workload_details(benchmarks, args), - "benchmarks": _build_benchmarks(benchmarks), - } - - -def _build_run_info( - benchmarks: list[GenerativeBenchmark], args: BenchmarkGenerativeTextArgs -) -> dict[str, Any]: - """ - Build run metadata from benchmarks. - - Extracts model name, timestamp, and dataset information from the benchmark - configuration and results. - - :param benchmarks: List of completed benchmark results - :param args: Benchmark configuration arguments - :return: Dictionary with model, task, timestamp, and dataset information - """ - model = ( - args.backend_kwargs.model if hasattr(args.backend_kwargs, "model") else "N/A" - ) - timestamp = max(bm.start_time for bm in benchmarks if bm.start_time is not None) - return { - "model": {"name": model, "size": 0}, - "task": "N/A", - "timestamp": timestamp, - "dataset": {"name": "N/A"}, - } - - -def _build_workload_details( - benchmarks: list[GenerativeBenchmark], args: BenchmarkGenerativeTextArgs -) -> dict[str, Any]: - """ - Build workload details from benchmarks. - - Aggregates prompt and generation samples, token distribution statistics, - request timing histograms, and server configuration. Samples up to 5 random - prompts and outputs for display. - - :param benchmarks: List of completed benchmark results - :param args: Benchmark configuration arguments - :return: Dictionary with prompts, generations, request timing, and server info - """ - target = ( - args.backend_kwargs.target if hasattr(args.backend_kwargs, "target") else None - ) - rate_type = benchmarks[0].config.strategy.type_ - successful_requests = [req for bm in benchmarks for req in bm.requests.successful] - - sample_indices = random.sample( - range(len(successful_requests)), min(5, len(successful_requests)) - ) - sample_prompts = [ - req.request_args.replace("\n", " ").replace('"', "'") - if (req := successful_requests[i]).request_args - else "" - for i in sample_indices - ] - sample_outputs = [ - req.output.replace("\n", " ").replace('"', "'") - if (req := successful_requests[i]).output - else "" - for i in sample_indices - ] - - prompt_tokens = [ - float(req.prompt_tokens) if req.prompt_tokens is not None else -1 - for bm in benchmarks - for req in bm.requests.successful - ] - output_tokens = [ - float(req.output_tokens) if req.output_tokens is not None else -1 - for bm in benchmarks - for req in bm.requests.successful - ] - - prompt_token_buckets, _prompt_bucket_width = _Bucket.from_data(prompt_tokens, 1) - output_token_buckets, _output_bucket_width = _Bucket.from_data(output_tokens, 1) - - prompt_token_stats = DistributionSummary.from_values(prompt_tokens) - output_token_stats = DistributionSummary.from_values(output_tokens) - - min_start_time = benchmarks[0].start_time - all_req_times = [ - req.info.timings.request_start - min_start_time - for bm in benchmarks - for req in bm.requests.successful - if req.info.timings.request_start is not None - ] - - number_of_buckets = len(benchmarks) - request_buckets, bucket_width = _Bucket.from_data( - all_req_times, None, number_of_buckets - ) - - return { - "prompts": { - "samples": sample_prompts, - "token_distributions": { - "statistics": prompt_token_stats.model_dump() - if prompt_token_stats - else None, - "buckets": [b.model_dump() for b in prompt_token_buckets], - "bucket_width": 1, - }, - }, - "generations": { - "samples": sample_outputs, - "token_distributions": { - "statistics": output_token_stats.model_dump() - if output_token_stats - else None, - "buckets": [b.model_dump() for b in output_token_buckets], - "bucket_width": 1, - }, - }, - "requests_over_time": { - "requests_over_time": { - "buckets": [b.model_dump() for b in request_buckets], - "bucket_width": bucket_width, - }, - "num_benchmarks": number_of_buckets, - }, - "rate_type": rate_type, - "server": {"target": target}, - } - - -def _build_benchmarks(benchmarks: list[GenerativeBenchmark]) -> list[dict[str, Any]]: - """ - Build benchmark metrics data for UI display. - - Extracts key performance metrics from each benchmark including requests per - second, inter-token latency, time to first token, throughput, and request - latency. Formats distribution summaries for tabular display. - - :param benchmarks: List of completed benchmark results - :return: List of dictionaries with formatted benchmark metrics - """ - result = [] - for bm in benchmarks: - result.append( - { - "requests_per_second": bm.metrics.requests_per_second.successful.mean, - "itl": _TabularDistributionSummary.from_distribution_summary( - bm.metrics.inter_token_latency_ms.successful - ).model_dump(), - "ttft": _TabularDistributionSummary.from_distribution_summary( - bm.metrics.time_to_first_token_ms.successful - ).model_dump(), - "throughput": _TabularDistributionSummary.from_distribution_summary( - bm.metrics.output_tokens_per_second.successful - ).model_dump(), - "time_per_request": ( - _TabularDistributionSummary.from_distribution_summary( - bm.metrics.request_latency.successful - ).model_dump() - ), - } - ) - return result diff --git a/src/guidellm/benchmark/outputs/html/__init__.py b/src/guidellm/benchmark/outputs/html/__init__.py new file mode 100644 index 000000000..bdbcf8060 --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/__init__.py @@ -0,0 +1,25 @@ +"""Plotly-based HTML output generation for GuideLLM benchmarks.""" + +from guidellm.benchmark.outputs.html.data_builder import ( + Bucket, + TabularDistributionSummary, + build_benchmarks, + build_run_info, + build_ui_data, + build_workload_details, +) +from guidellm.benchmark.outputs.html.plotly_output import ( + GenerativeBenchmarkerHTML, +) +from guidellm.benchmark.outputs.html.theme import PlotlyTheme + +__all__ = [ + "Bucket", + "GenerativeBenchmarkerHTML", + "PlotlyTheme", + "TabularDistributionSummary", + "build_benchmarks", + "build_run_info", + "build_ui_data", + "build_workload_details", +] diff --git a/src/guidellm/benchmark/outputs/html/components/__init__.py b/src/guidellm/benchmark/outputs/html/components/__init__.py new file mode 100644 index 000000000..d99829953 --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/components/__init__.py @@ -0,0 +1,5 @@ +"""HTML component generators for Plotly-based reports.""" + +from guidellm.benchmark.outputs.html.components.base import PlotlyComponentBase + +__all__ = ["PlotlyComponentBase"] diff --git a/src/guidellm/benchmark/outputs/html/components/base.py b/src/guidellm/benchmark/outputs/html/components/base.py new file mode 100644 index 000000000..7f632d28c --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/components/base.py @@ -0,0 +1,60 @@ +"""Base classes for HTML component generation.""" + +from abc import ABC, abstractmethod +from typing import Any + +import plotly.graph_objects as go + +from guidellm.benchmark.outputs.html.theme import PlotlyTheme + + +class PlotlyComponentBase(ABC): + """Abstract base class for Plotly-based HTML components.""" + + def __init__(self, theme: PlotlyTheme | None = None): + """Initialize the component. + + Args: + theme: Optional PlotlyTheme instance. If None, uses default theme. + """ + self.theme = theme or PlotlyTheme() + + @abstractmethod + def generate(self, data: dict[str, Any]) -> str | go.Figure: + """Generate the component output. + + Args: + data: Data dictionary containing component-specific data. + + Returns: + Either an HTML string or a Plotly Figure object. + """ + ... + + def _apply_theme_to_figure(self, fig: go.Figure) -> go.Figure: + """Apply theme to a Plotly figure. + + Args: + fig: Plotly figure to style. + + Returns: + Styled figure. + """ + layout_updates = self.theme.get_base_layout() + fig.update_layout(**layout_updates) + return fig + + def _create_figure(self, **layout_kwargs: Any) -> go.Figure: + """Create a new figure with theme applied. + + Args: + **layout_kwargs: Additional layout parameters to merge with theme. + + Returns: + New Plotly figure with theme applied. + """ + fig = go.Figure() + base_layout = self.theme.get_base_layout() + base_layout.update(layout_kwargs) + fig.update_layout(**base_layout) + return fig diff --git a/src/guidellm/benchmark/outputs/html/components/footer.py b/src/guidellm/benchmark/outputs/html/components/footer.py new file mode 100644 index 000000000..db191fe6c --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/components/footer.py @@ -0,0 +1,32 @@ +"""Footer component for HTML reports.""" + +from typing import Any + +from guidellm.benchmark.outputs.html.components.base import PlotlyComponentBase + + +class FooterComponent(PlotlyComponentBase): + """Generates the page footer.""" + + def generate(self, _data: dict[str, Any] | None = None) -> str: + """Generate footer HTML. + + Args: + _data: Optional data dictionary (not used for footer). + + Returns: + HTML string for the footer section. + """ + return """ + + """ diff --git a/src/guidellm/benchmark/outputs/html/components/header.py b/src/guidellm/benchmark/outputs/html/components/header.py new file mode 100644 index 000000000..e6033fe41 --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/components/header.py @@ -0,0 +1,59 @@ +"""Header component for HTML reports.""" + +from typing import Any + +from guidellm.benchmark.outputs.html.components.base import PlotlyComponentBase +from guidellm.utils.functions import safe_format_timestamp + + +class HeaderComponent(PlotlyComponentBase): + """Generates the page header with run metadata.""" + + def generate(self, data: dict[str, Any]) -> str: + """Generate header HTML. + + Args: + data: Dictionary containing: + - model: Dict with 'name' key + - timestamp: ISO format timestamp or datetime + - dataset: Dict with 'name' key (optional) + - task: Task name (optional) + + Returns: + HTML string for the header section. + """ + model_name = data.get("model", {}).get("name", "N/A") + timestamp = safe_format_timestamp( + data.get("timestamp"), + format_="%B %d %Y at %H:%M:%S", + ) + dataset_name = data.get("dataset", {}).get("name", "N/A") + task = data.get("task", "N/A") + + return f""" +
+

GuideLLM Benchmark Report

+
+
+
Model
+
{model_name}
+
+
+
Timestamp
+
{timestamp}
+
+
+
Dataset
+
{dataset_name}
+
+ { + f'''
+
Task
+
{task}
+
''' + if task != "N/A" + else "" + } +
+
+ """ diff --git a/src/guidellm/benchmark/outputs/html/components/metrics_summary.py b/src/guidellm/benchmark/outputs/html/components/metrics_summary.py new file mode 100644 index 000000000..63f43b3a9 --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/components/metrics_summary.py @@ -0,0 +1,251 @@ +"""Metrics summary component for HTML reports.""" + +from typing import Any + +import plotly.graph_objects as go +from plotly.subplots import make_subplots + +from guidellm.benchmark.outputs.html.components.base import PlotlyComponentBase + + +class MetricsSummaryComponent(PlotlyComponentBase): + """Generates the SLO metrics summary dashboard with interactive RPS slider.""" + + def generate(self, data: dict[str, Any]) -> str: + """Generate metrics summary HTML with SLO dashboard. + + Args: + data: Dictionary containing: + - benchmarks: List of benchmark dicts with metrics + - thresholds: Optional dict with SLO thresholds + + Returns: + HTML string with metrics summary section. + """ + benchmarks = data.get("benchmarks", []) + thresholds = data.get("thresholds", {}) + + if not benchmarks: + return """ +
+

Metrics Summary

+

No benchmark data available

+
+ """ + + # Create the interactive dashboard figure + fig = self._create_dashboard_figure(benchmarks, thresholds) + + # Convert to HTML + chart_html = fig.to_html(include_plotlyjs=False, div_id="metrics-summary") + + return f""" +
+

SLO Metrics Summary

+

+ Use the slider below to explore metrics at different RPS rates. + Default percentile: P95 +

+ {chart_html} +
+ """ + + def _create_dashboard_figure( + self, benchmarks: list[dict[str, Any]], thresholds: dict[str, float] + ) -> go.Figure: + """Create interactive dashboard with RPS slider. + + Args: + benchmarks: List of benchmark data. + thresholds: Optional SLO thresholds. + + Returns: + Plotly figure with interactive dashboard. + """ + # Create 2x2 subplot + fig = make_subplots( + rows=2, + cols=2, + subplot_titles=( + "TTFT (ms)", + "ITL (ms)", + "Time Per Request (s)", + "Throughput (tokens/s)", + ), + vertical_spacing=0.15, + horizontal_spacing=0.12, + ) + + # Apply theme + fig = self._apply_theme_to_figure(fig) + + # Extract RPS values + rps_values = [bm["requests_per_second"] for bm in benchmarks] + + # For each RPS value, create traces (all hidden except default) + default_idx = len(benchmarks) // 2 # Default to middle RPS value + + # Create traces for each metric at each RPS + self._add_metric_bar_traces( + fig, benchmarks, "ttft", 1, 1, "TTFT", default_idx, thresholds.get("ttft") + ) + self._add_metric_bar_traces( + fig, benchmarks, "itl", 1, 2, "ITL", default_idx, thresholds.get("itl") + ) + self._add_metric_bar_traces( + fig, + benchmarks, + "time_per_request", + 2, + 1, + "Latency", + default_idx, + thresholds.get("time_per_request"), + ) + self._add_metric_bar_traces( + fig, + benchmarks, + "throughput", + 2, + 2, + "Throughput", + default_idx, + thresholds.get("throughput"), + ) + + # Create slider steps + steps = [] + for i, rps in enumerate(rps_values): + step = { + "method": "update", + "args": [ + {"visible": self._create_visibility_array(i, len(benchmarks))}, + { + "title": f"SLO Metrics at {rps:.2f} RPS" + }, # Update title with current RPS + ], + "label": f"{rps:.1f}", + } + steps.append(step) + + sliders = [ + { + "active": default_idx, + "yanchor": "top", + "y": -0.15, + "xanchor": "left", + "x": 0.1, + "currentvalue": { + "prefix": "RPS: ", + "visible": True, + "xanchor": "center", + "font": {"size": 16, "color": self.theme.PRIMARY}, + }, + "pad": {"b": 10, "t": 50}, + "len": 0.8, + "steps": steps, + } + ] + + # Update layout + fig.update_layout( + title=f"SLO Metrics at {rps_values[default_idx]:.2f} RPS", + title_font_size=24, + title_font_color=self.theme.SECONDARY, + sliders=sliders, + showlegend=False, + height=700, + margin={"b": 120}, # Extra margin for slider + ) + + return fig + + def _add_metric_bar_traces( + self, + fig: go.Figure, + benchmarks: list[dict[str, Any]], + metric_name: str, + row: int, + col: int, + label: str, + default_idx: int, + threshold: float | None = None, + ) -> None: + """Add bar chart traces for a metric across all RPS values. + + Args: + fig: Figure to add traces to. + benchmarks: List of benchmark data. + metric_name: Metric name (e.g., 'ttft'). + row: Subplot row. + col: Subplot column. + label: Label for the metric. + default_idx: Index of default visible trace. + threshold: Optional SLO threshold value. + """ + percentiles = ["p50", "p90", "p95", "p99"] + percentile_labels = ["P50", "P90", "P95 (default)", "P99"] + colors = [ + self.theme.TERTIARY, + self.theme.SECONDARY, + self.theme.PRIMARY, + self.theme.ERROR, + ] + + for i, bm in enumerate(benchmarks): + metric_data = bm[metric_name] + percentiles_data = metric_data.get("percentiles", {}) + + # Get values for each percentile + pct_values = [percentiles_data.get(p, 0) for p in percentiles] + + # Add bar trace + visible = i == default_idx + + fig.add_trace( + go.Bar( + x=percentile_labels, + y=pct_values, + marker_color=colors, + visible=visible, + hovertemplate=f"{label}: %{{y:.2f}}", + text=[f"{v:.2f}" for v in pct_values], + textposition="outside", + textfont={"color": self.theme.TEXT_PRIMARY}, + ), + row=row, + col=col, + ) + + # Add threshold line if provided + if threshold and visible: + fig.add_hline( + y=threshold, + line_dash="dash", + line_color=self.theme.SUCCESS, + annotation_text=f"Threshold: {threshold}", + annotation_position="right", + row=row, + col=col, + ) + + def _create_visibility_array( + self, active_idx: int, num_benchmarks: int + ) -> list[bool]: + """Create visibility array for slider step. + + Each metric has num_benchmarks traces, so visibility array needs + to show the active_idx trace for each of the 4 metrics. + + Args: + active_idx: Index of RPS value to show. + num_benchmarks: Total number of benchmarks. + + Returns: + List of boolean visibility values. + """ + visibility = [] + for _metric_idx in range(4): # 4 metrics (TTFT, ITL, TPR, Throughput) + for bm_idx in range(num_benchmarks): + visibility.append(bm_idx == active_idx) + return visibility diff --git a/src/guidellm/benchmark/outputs/html/components/workload_details.py b/src/guidellm/benchmark/outputs/html/components/workload_details.py new file mode 100644 index 000000000..eb0961580 --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/components/workload_details.py @@ -0,0 +1,311 @@ +"""Workload details component for HTML reports.""" + +from typing import Any + +import plotly.graph_objects as go + +from guidellm.benchmark.outputs.html.components.base import PlotlyComponentBase + +# Maximum characters to display for sample text +_SAMPLE_MAX_LENGTH = 100 + + +class WorkloadDetailsComponent(PlotlyComponentBase): + """Generates the workload details 3-panel layout.""" + + def generate(self, data: dict[str, Any]) -> str: + """Generate workload details HTML with charts. + + Args: + data: Dictionary containing: + - prompts: Dict with 'samples' and 'token_distributions' + - generations: Dict with 'samples' and 'token_distributions' + - requests_over_time: Dict with request timing data + - server: Dict with 'target' URL + - rate_type: Benchmark rate type + + Returns: + HTML string with workload details section including charts. + """ + prompts_data = data.get("prompts", {}) + generations_data = data.get("generations", {}) + requests_data = data.get("requests_over_time", {}) + server_data = data.get("server", {}) + rate_type = data.get("rate_type", "N/A") + num_benchmarks = requests_data.get("num_benchmarks", 0) + + # Build HTML sections + prompts_html = self._generate_prompts_section(prompts_data) + server_html = self._generate_server_section( + server_data, rate_type, num_benchmarks + ) + generations_html = self._generate_generations_section(generations_data) + + # Build charts + prompt_tokens_fig = self._create_histogram_chart( + prompts_data.get("token_distributions", {}), + "Prompt Token Distribution", + "length (tokens)", + ) + output_tokens_fig = self._create_histogram_chart( + generations_data.get("token_distributions", {}), + "Output Token Distribution", + "length (tokens)", + ) + requests_fig = self._create_requests_over_time_chart(requests_data) + + # Convert figures to HTML + prompt_chart_html = prompt_tokens_fig.to_html( + include_plotlyjs=False, div_id="prompt-tokens-chart" + ) + output_chart_html = output_tokens_fig.to_html( + include_plotlyjs=False, div_id="output-tokens-chart" + ) + requests_chart_html = requests_fig.to_html( + include_plotlyjs=False, div_id="requests-over-time-chart" + ) + + return f""" +
+

Workload Details

+
+
+

Prompts

+ {prompts_html} + {prompt_chart_html} +
+
+

Server Configuration

+ {server_html} + {requests_chart_html} +
+
+

Generations

+ {generations_html} + {output_chart_html} +
+
+
+ """ + + def _generate_prompts_section(self, prompts_data: dict[str, Any]) -> str: + """Generate HTML for prompts samples. + + Args: + prompts_data: Dict with 'samples' and 'token_distributions'. + + Returns: + HTML string for prompts section. + """ + samples = prompts_data.get("samples", []) + token_stats = prompts_data.get("token_distributions", {}).get("statistics", {}) + mean_tokens = token_stats.get("mean", 0) if token_stats else 0 + + # Sample prompt header + header_html = '
Sample Prompt
' + + if not samples: + samples_html = "

No prompt samples available

" + else: + samples_html = "".join( + f'
' + f"{sample[:_SAMPLE_MAX_LENGTH]}" + f"{'...' if len(sample) > _SAMPLE_MAX_LENGTH else ''}
" + for sample in samples[:5] + ) + + # Mean prompt length + mean_html = f""" +
+
Mean Prompt Length
+
{mean_tokens:.2f} tokens
+
+ """ + + return f"{header_html}{samples_html}{mean_html}" + + def _generate_server_section( + self, server_data: dict[str, Any], rate_type: str, num_benchmarks: int + ) -> str: + """Generate HTML for server configuration. + + Args: + server_data: Dict with 'target' URL. + rate_type: Rate type string. + num_benchmarks: Number of benchmarks. + + Returns: + HTML string for server section. + """ + target = server_data.get("target", "N/A") + + # Parse URL to extract protocol and port + protocol = "N/A" + port = "N/A" + if target != "N/A" and "://" in target: + protocol = target.split("://")[0] + rest = target.split("://")[1] + if ":" in rest: + port = rest.split(":")[1].split("/")[0] + else: + port = "80" if protocol == "http" else "443" + + return f""" +
+
+
Target
+
{target}
+
+
+
+
Type
+
{protocol}
+
+
+
Port
+
{port}
+
+
+
+
Number of Benchmarks
+
{num_benchmarks}
+
+
+
Rate Type
+
+ {rate_type} +
+
+
+ """ + + def _generate_generations_section(self, generations_data: dict[str, Any]) -> str: + """Generate HTML for generation samples. + + Args: + generations_data: Dict with 'samples' and 'token_distributions'. + + Returns: + HTML string for generations section. + """ + samples = generations_data.get("samples", []) + token_stats = generations_data.get("token_distributions", {}).get( + "statistics", {} + ) + mean_tokens = token_stats.get("mean", 0) if token_stats else 0 + + # Sample generated header + header_html = '
Sample Generated
' + + if not samples: + samples_html = "

No generation samples available

" + else: + samples_html = "".join( + f'
' + f"{sample[:_SAMPLE_MAX_LENGTH]}" + f"{'...' if len(sample) > _SAMPLE_MAX_LENGTH else ''}
" + for sample in samples[:5] + ) + + # Mean generated length + mean_html = f""" +
+
Mean Generated Length
+
{mean_tokens:.2f} tokens
+
+ """ + + return f"{header_html}{samples_html}{mean_html}" + + def _create_histogram_chart( + self, distribution_data: dict[str, Any], title: str, xaxis_title: str + ) -> go.Figure: + """Create a histogram chart with statistics overlay. + + Args: + distribution_data: Dict with 'buckets' and 'statistics'. + title: Chart title. + xaxis_title: X-axis title. + + Returns: + Plotly figure with histogram. + """ + buckets = distribution_data.get("buckets", []) + statistics = distribution_data.get("statistics", {}) + + fig = self._create_figure( + title=title, xaxis_title=xaxis_title, yaxis_title="Count" + ) + + if not buckets: + return fig + + # Extract bucket data + x_values = [b["value"] for b in buckets] + counts = [b["count"] for b in buckets] + + # Add bar chart + fig.add_trace( + go.Bar( + x=x_values, + y=counts, + name="Count", + marker_color=self.theme.PRIMARY, + hovertemplate="Tokens: %{x}
Count: %{y}", + ) + ) + + # Add mean line if available + if statistics and "mean" in statistics: + mean = statistics["mean"] + fig.add_vline( + x=mean, + line_dash="dash", + line_color=self.theme.SECONDARY, + annotation_text=f"Mean: {mean:.1f}", + annotation_position="top", + ) + + return fig + + def _create_requests_over_time_chart( + self, requests_data: dict[str, Any] + ) -> go.Figure: + """Create requests over time bar chart. + + Args: + requests_data: Dict with 'requests_over_time' containing 'buckets'. + + Returns: + Plotly figure with requests over time. + """ + requests_over_time = requests_data.get("requests_over_time", {}) + buckets = requests_over_time.get("buckets", []) + bucket_width = requests_over_time.get("bucket_width", 1.0) + + fig = self._create_figure( + title="Requests Over Time", + xaxis_title="Time (seconds)", + yaxis_title="Request Count", + ) + + if not buckets: + return fig + + # Extract bucket data + x_values = [b["value"] for b in buckets] + counts = [b["count"] for b in buckets] + + # Add bar chart + fig.add_trace( + go.Bar( + x=x_values, + y=counts, + name="Requests", + marker_color=self.theme.TERTIARY, + hovertemplate="Time: %{x:.1f}s
Requests: %{y}", + width=bucket_width * 0.8, # Make bars slightly narrower than buckets + ) + ) + + return fig diff --git a/src/guidellm/benchmark/outputs/html/components/workload_metrics.py b/src/guidellm/benchmark/outputs/html/components/workload_metrics.py new file mode 100644 index 000000000..5035fc285 --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/components/workload_metrics.py @@ -0,0 +1,169 @@ +"""Workload metrics component for HTML reports.""" + +from typing import Any + +import plotly.graph_objects as go +from plotly.subplots import make_subplots + +from guidellm.benchmark.outputs.html.components.base import PlotlyComponentBase + + +class WorkloadMetricsComponent(PlotlyComponentBase): + """Generates the 2x2 workload metrics grid.""" + + def generate(self, data: dict[str, Any]) -> str: + """Generate workload metrics HTML with 2x2 grid of charts. + + Args: + data: Dictionary containing: + - benchmarks: List of benchmark dicts with metrics + + Returns: + HTML string with workload metrics section. + """ + benchmarks = data.get("benchmarks", []) + + if not benchmarks: + return """ +
+

Workload Metrics

+

No benchmark data available

+
+ """ + + # Create 2x2 subplot figure + fig = make_subplots( + rows=2, + cols=2, + subplot_titles=( + "Time to First Token (TTFT)", + "Inter-Token Latency (ITL)", + "Time Per Request", + "Throughput (tokens/sec)", + ), + vertical_spacing=0.12, + horizontal_spacing=0.1, + ) + + # Apply base theme + fig = self._apply_theme_to_figure(fig) + + # Extract RPS values for x-axis + [bm["requests_per_second"] for bm in benchmarks] + + # Add metric traces to each subplot + self._add_metric_traces(fig, benchmarks, "ttft", 1, 1, "TTFT (ms)") + self._add_metric_traces(fig, benchmarks, "itl", 1, 2, "ITL (ms)") + self._add_metric_traces( + fig, benchmarks, "time_per_request", 2, 1, "Latency (s)" + ) + self._add_metric_traces( + fig, benchmarks, "throughput", 2, 2, "Throughput (tokens/s)" + ) + + # Update axes + fig.update_xaxes(title_text="Requests per Second (RPS)", row=2, col=1) + fig.update_xaxes(title_text="Requests per Second (RPS)", row=2, col=2) + fig.update_yaxes(title_text="Milliseconds", row=1, col=1) + fig.update_yaxes(title_text="Milliseconds", row=1, col=2) + fig.update_yaxes(title_text="Seconds", row=2, col=1) + fig.update_yaxes(title_text="Tokens/Second", row=2, col=2) + + # Update layout + fig.update_layout( + title_text="Workload Metrics", + title_font_size=24, + title_font_color=self.theme.SECONDARY, + showlegend=True, + legend={ + "orientation": "h", + "yanchor": "bottom", + "y": 1.02, + "xanchor": "right", + "x": 1, + }, + height=800, + ) + + # Convert to HTML + chart_html = fig.to_html(include_plotlyjs=False, div_id="workload-metrics") + + return f""" +
+ {chart_html} +
+ """ + + def _add_metric_traces( + self, + fig: go.Figure, + benchmarks: list[dict[str, Any]], + metric_name: str, + row: int, + col: int, + trace_prefix: str, + ) -> None: + """Add metric traces to a subplot. + + Args: + fig: Plotly figure to add traces to. + benchmarks: List of benchmark data dicts. + metric_name: Name of the metric (e.g., 'ttft', 'itl'). + row: Subplot row number. + col: Subplot column number. + trace_prefix: Prefix for trace names. + """ + rps_values = [bm["requests_per_second"] for bm in benchmarks] + + # Extract metric data + mean_values = [bm[metric_name].get("mean", 0) for bm in benchmarks] + + # Add mean line (primary trace) + fig.add_trace( + go.Scatter( + x=rps_values, + y=mean_values, + mode="lines+markers", + name=f"{trace_prefix} Mean", + line={"width": 3, "color": self.theme.PRIMARY}, + marker={"size": 8}, + hovertemplate=( + f"RPS: %{{x:.2f}}
{trace_prefix}: %{{y:.2f}}" + ), + ), + row=row, + col=col, + ) + + # Add percentile lines + percentiles = ["p50", "p90", "p95", "p99"] + colors = [ + self.theme.TERTIARY, + self.theme.SECONDARY, + self.theme.QUATERNARY, + self.theme.ERROR, + ] + + for pct, color in zip(percentiles, colors, strict=False): + # Check if percentiles exist in the data + if benchmarks[0][metric_name].get("percentiles"): + pct_values = [ + bm[metric_name].get("percentiles", {}).get(pct, 0) + for bm in benchmarks + ] + + fig.add_trace( + go.Scatter( + x=rps_values, + y=pct_values, + mode="lines", + name=f"{trace_prefix} {pct.upper()}", + line={"width": 1.5, "dash": "dash", "color": color}, + hovertemplate=( + f"RPS: %{{x:.2f}}
{pct.upper()}: " + "%{y:.2f}" + ), + ), + row=row, + col=col, + ) diff --git a/src/guidellm/benchmark/outputs/html/data_builder.py b/src/guidellm/benchmark/outputs/html/data_builder.py new file mode 100644 index 000000000..5639b74c3 --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/data_builder.py @@ -0,0 +1,324 @@ +"""Data building functions for HTML reports. + +These functions transform benchmark results into data structures suitable for +HTML report generation. They are shared between the legacy Next.js HTML output +and the new Plotly-based HTML output. +""" + +from __future__ import annotations + +import random +from collections import defaultdict +from math import ceil +from typing import Any + +from pydantic import BaseModel, Field, computed_field + +from guidellm.benchmark.schemas import ( + BenchmarkGenerativeTextArgs, + GenerativeBenchmark, +) +from guidellm.schemas import DistributionSummary + +__all__ = [ + "Bucket", + "TabularDistributionSummary", + "build_benchmarks", + "build_run_info", + "build_ui_data", + "build_workload_details", +] + + +class Bucket(BaseModel): + """ + Histogram bucket for data distribution visualization. + + Represents a single bucket in a histogram with its starting value and count + of data points falling within the bucket range. + """ + + value: float | int = Field(description="Starting value of the bucket range") + count: int = Field(description="Number of data points falling within this bucket") + + @staticmethod + def from_data( + data: list[float] | list[int], + bucket_width: float | None = None, + n_buckets: int | None = None, + ) -> tuple[list[Bucket], float]: + """ + Create histogram buckets from numeric data values. + + :param data: Numeric values to bucket + :param bucket_width: Width of each bucket, computed if None + :param n_buckets: Number of buckets, defaults to 10 if width not specified + :return: Tuple of bucket list and computed bucket width + """ + if not data: + return [], 1.0 + + min_v = min(data) + max_v = max(data) + range_v = (1 + max_v) - min_v + + if bucket_width is None: + if n_buckets is None: + n_buckets = 10 + bucket_width = range_v / n_buckets + else: + n_buckets = ceil(range_v / bucket_width) + + bucket_counts: defaultdict[float | int, int] = defaultdict(int) + for val in data: + idx = int((val - min_v) // bucket_width) + if idx >= n_buckets: + idx = n_buckets - 1 + bucket_start = min_v + idx * bucket_width + bucket_counts[bucket_start] += 1 + + buckets = [ + Bucket(value=start, count=count) + for start, count in sorted(bucket_counts.items()) + ] + return buckets, bucket_width + + +def _filter_duplicate_percentiles(percentiles: dict[str, float]) -> dict[str, float]: + """ + Filter out duplicate consecutive percentile values. + + Keeps the highest percentile from each group of duplicates for + mathematical accuracy. + + :param percentiles: Dictionary of percentile names to values + :return: Filtered percentiles dictionary with highest percentile from + each duplicate group + """ + if not percentiles: + return percentiles + + # First pass: identify which percentiles to keep (last of each duplicate group) + items = list(percentiles.items()) + to_keep = set() + + for i, (name, value) in enumerate(items): + # Check if this is the last occurrence of this value + is_last = i == len(items) - 1 or items[i + 1][1] != value + if is_last: + to_keep.add(name) + + # Build result maintaining order + return {name: value for name, value in percentiles.items() if name in to_keep} + + +class TabularDistributionSummary(DistributionSummary): + """ + Distribution summary with tabular percentile representation. + + Extends DistributionSummary to provide percentile data formatted for table + display in the HTML report. + """ + + @computed_field + def percentile_rows(self) -> list[dict[str, str | float]]: + """ + Format percentiles as table rows for UI display. + + :return: List of dictionaries with percentile names and values + """ + rows = [ + {"percentile": name, "value": value} + for name, value in self.percentiles.model_dump().items() + ] + return list( + filter(lambda row: row["percentile"] in ["p50", "p90", "p95", "p99"], rows) + ) + + def model_dump(self, **kwargs) -> dict: + """ + Override model_dump to filter duplicate consecutive percentile values. + + :param kwargs: Arguments to pass to parent model_dump + :return: Dictionary with filtered percentiles + """ + data = super().model_dump(**kwargs) + + if "percentiles" in data and data["percentiles"]: + filtered_percentiles = _filter_duplicate_percentiles(data["percentiles"]) + data["percentiles"] = filtered_percentiles + + return data + + @classmethod + def from_distribution_summary( + cls, distribution: DistributionSummary + ) -> TabularDistributionSummary: + """ + Convert standard DistributionSummary to tabular format. + + :param distribution: Source distribution summary to convert + :return: Tabular distribution summary with formatted percentile rows + """ + return cls(**distribution.model_dump()) + + +def build_ui_data( + benchmarks: list[GenerativeBenchmark], args: BenchmarkGenerativeTextArgs +) -> dict[str, Any]: + """ + Build complete UI data structure from benchmarks. + + :param benchmarks: List of completed benchmark results + :param args: Benchmark configuration arguments + :return: Dictionary with run_info, workload_details, and benchmarks sections + """ + return { + "run_info": build_run_info(benchmarks, args), + "workload_details": build_workload_details(benchmarks, args), + "benchmarks": build_benchmarks(benchmarks), + } + + +def build_run_info( + benchmarks: list[GenerativeBenchmark], args: BenchmarkGenerativeTextArgs +) -> dict[str, Any]: + """ + Build run metadata from benchmarks. + + :param benchmarks: List of completed benchmark results + :param args: Benchmark configuration arguments + :return: Dictionary with model, task, timestamp, and dataset information + """ + model = args.model or "N/A" + timestamp = max(bm.start_time for bm in benchmarks if bm.start_time is not None) + return { + "model": {"name": model, "size": 0}, + "task": "N/A", + "timestamp": timestamp, + "dataset": {"name": "N/A"}, + } + + +def build_workload_details( + benchmarks: list[GenerativeBenchmark], args: BenchmarkGenerativeTextArgs +) -> dict[str, Any]: + """ + Build workload details from benchmarks. + + :param benchmarks: List of completed benchmark results + :param args: Benchmark configuration arguments + :return: Dictionary with prompts, generations, request timing, and server info + """ + target = args.target + rate_type = benchmarks[0].config.strategy.type_ + successful_requests = [req for bm in benchmarks for req in bm.requests.successful] + + sample_indices = random.sample( + range(len(successful_requests)), min(5, len(successful_requests)) + ) + sample_prompts = [ + req.request_args.replace("\n", " ").replace('"', "'") + if (req := successful_requests[i]).request_args + else "" + for i in sample_indices + ] + sample_outputs = [ + req.output.replace("\n", " ").replace('"', "'") + if (req := successful_requests[i]).output + else "" + for i in sample_indices + ] + + prompt_tokens = [ + float(req.prompt_tokens) if req.prompt_tokens is not None else -1 + for bm in benchmarks + for req in bm.requests.successful + ] + output_tokens = [ + float(req.output_tokens) if req.output_tokens is not None else -1 + for bm in benchmarks + for req in bm.requests.successful + ] + + prompt_token_buckets, _prompt_bucket_width = Bucket.from_data(prompt_tokens, 1) + output_token_buckets, _output_bucket_width = Bucket.from_data(output_tokens, 1) + + prompt_token_stats = DistributionSummary.from_values(prompt_tokens) + output_token_stats = DistributionSummary.from_values(output_tokens) + + min_start_time = benchmarks[0].start_time + all_req_times = [ + req.info.timings.request_start - min_start_time + for bm in benchmarks + for req in bm.requests.successful + if req.info.timings.request_start is not None + ] + + number_of_buckets = len(benchmarks) + request_buckets, bucket_width = Bucket.from_data( + all_req_times, None, number_of_buckets + ) + + return { + "prompts": { + "samples": sample_prompts, + "token_distributions": { + "statistics": prompt_token_stats.model_dump() + if prompt_token_stats + else None, + "buckets": [b.model_dump() for b in prompt_token_buckets], + "bucket_width": 1, + }, + }, + "generations": { + "samples": sample_outputs, + "token_distributions": { + "statistics": output_token_stats.model_dump() + if output_token_stats + else None, + "buckets": [b.model_dump() for b in output_token_buckets], + "bucket_width": 1, + }, + }, + "requests_over_time": { + "requests_over_time": { + "buckets": [b.model_dump() for b in request_buckets], + "bucket_width": bucket_width, + }, + "num_benchmarks": number_of_buckets, + }, + "rate_type": rate_type, + "server": {"target": target}, + } + + +def build_benchmarks(benchmarks: list[GenerativeBenchmark]) -> list[dict[str, Any]]: + """ + Build benchmark metrics data for UI display. + + :param benchmarks: List of completed benchmark results + :return: List of dictionaries with formatted benchmark metrics + """ + result = [] + for bm in benchmarks: + result.append( + { + "requests_per_second": bm.metrics.requests_per_second.successful.mean, + "itl": TabularDistributionSummary.from_distribution_summary( + bm.metrics.inter_token_latency_ms.successful + ).model_dump(), + "ttft": TabularDistributionSummary.from_distribution_summary( + bm.metrics.time_to_first_token_ms.successful + ).model_dump(), + "throughput": TabularDistributionSummary.from_distribution_summary( + bm.metrics.output_tokens_per_second.successful + ).model_dump(), + "time_per_request": ( + TabularDistributionSummary.from_distribution_summary( + bm.metrics.request_latency.successful + ).model_dump() + ), + } + ) + return result diff --git a/src/guidellm/benchmark/outputs/html/plotly_output.py b/src/guidellm/benchmark/outputs/html/plotly_output.py new file mode 100644 index 000000000..ddd4e1598 --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/plotly_output.py @@ -0,0 +1,134 @@ +"""Plotly-based HTML output generator for benchmark results.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, ClassVar + +from pydantic import Field + +from guidellm.benchmark.outputs.html.components.footer import FooterComponent +from guidellm.benchmark.outputs.html.components.header import HeaderComponent +from guidellm.benchmark.outputs.html.components.metrics_summary import ( + MetricsSummaryComponent, +) +from guidellm.benchmark.outputs.html.components.workload_details import ( + WorkloadDetailsComponent, +) +from guidellm.benchmark.outputs.html.components.workload_metrics import ( + WorkloadMetricsComponent, +) + +# Import data building functions +from guidellm.benchmark.outputs.html.data_builder import build_ui_data +from guidellm.benchmark.outputs.html.theme import PlotlyTheme +from guidellm.benchmark.outputs.output import GenerativeBenchmarkerOutput +from guidellm.benchmark.schemas import GenerativeBenchmarksReport + +__all__ = ["GenerativeBenchmarkerHTML"] + + +@GenerativeBenchmarkerOutput.register("html") +class GenerativeBenchmarkerHTML(GenerativeBenchmarkerOutput): + """ + Plotly-based HTML output formatter for benchmark results. + + Generates interactive HTML reports using Plotly charts. + This eliminates JavaScript dependencies and security vulnerabilities while + maintaining visual appearance and interactivity. + + :cvar DEFAULT_FILE: Default filename for HTML output + """ + + DEFAULT_FILE: ClassVar[str] = "benchmarks.html" + + output_path: Path = Field( + default_factory=lambda: Path.cwd(), + description=( + "Directory or file path for saving the HTML report, " + "defaults to current working directory" + ), + ) + + @classmethod + def validated_kwargs( + cls, output_path: str | Path | None, **_kwargs + ) -> dict[str, Any]: + """ + Validate and normalize output path argument. + + :param output_path: Output file or directory path for the HTML report + :return: Dictionary containing validated output_path if provided + """ + validated: dict[str, Any] = {} + if output_path is not None: + validated["output_path"] = ( + Path(output_path) if not isinstance(output_path, Path) else output_path + ) + return validated + + async def finalize(self, report: GenerativeBenchmarksReport) -> Path: + """ + Generate and save the Plotly-based HTML benchmark report. + + Builds data structures, generates components, assembles HTML, and writes + to the output path. + + :param report: Completed benchmark report containing all results + :return: Path to the saved HTML report file + """ + output_path = self.output_path + if output_path.is_dir(): + output_path = output_path / self.DEFAULT_FILE + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Build UI data using existing functions + data = build_ui_data(report.benchmarks, report.args) + + # Generate HTML content + html_content = self._assemble_html(data) + + # Write to file + output_path.write_text(html_content, encoding="utf-8") + + return output_path + + def _assemble_html(self, data: dict[str, Any]) -> str: + """ + Assemble complete HTML from components. + + :param data: UI data dictionary with run_info, workload_details, benchmarks + :return: Complete HTML string + """ + theme = PlotlyTheme() + + # Initialize components + header_component = HeaderComponent(theme=theme) + footer_component = FooterComponent(theme=theme) + workload_details_component = WorkloadDetailsComponent(theme=theme) + metrics_summary_component = MetricsSummaryComponent(theme=theme) + workload_metrics_component = WorkloadMetricsComponent(theme=theme) + + # Generate component HTML + header_html = header_component.generate(data["run_info"]) + footer_html = footer_component.generate() + workload_details_html = workload_details_component.generate( + data["workload_details"] + ) + + # For metrics components, pass benchmarks + metrics_data = {"benchmarks": data["benchmarks"]} + metrics_summary_html = metrics_summary_component.generate(metrics_data) + workload_metrics_html = workload_metrics_component.generate(metrics_data) + + # Load base template + template_path = Path(__file__).parent / "templates" / "base.html" + template = template_path.read_text(encoding="utf-8") + + # Replace placeholders + html = template.replace("{CSS_CONTENT}", theme.get_css()) + html = html.replace("{HEADER_CONTENT}", header_html) + html = html.replace("{WORKLOAD_DETAILS_CONTENT}", workload_details_html) + html = html.replace("{METRICS_SUMMARY_CONTENT}", metrics_summary_html) + html = html.replace("{WORKLOAD_METRICS_CONTENT}", workload_metrics_html) + return html.replace("{FOOTER_CONTENT}", footer_html) diff --git a/src/guidellm/benchmark/outputs/html/templates/base.html b/src/guidellm/benchmark/outputs/html/templates/base.html new file mode 100644 index 000000000..00a6bb46b --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/templates/base.html @@ -0,0 +1,25 @@ + + + + + + GuideLLM Benchmark Report + + + + +
+ {HEADER_CONTENT} + + {WORKLOAD_DETAILS_CONTENT} + + {METRICS_SUMMARY_CONTENT} + + {WORKLOAD_METRICS_CONTENT} + + {FOOTER_CONTENT} +
+ + diff --git a/src/guidellm/benchmark/outputs/html/theme.py b/src/guidellm/benchmark/outputs/html/theme.py new file mode 100644 index 000000000..b0a56f79f --- /dev/null +++ b/src/guidellm/benchmark/outputs/html/theme.py @@ -0,0 +1,265 @@ +"""Theme configuration for Plotly-based HTML reports. + +This module provides Material-UI dark theme colors and Plotly layout configurations +that match the original Next.js UI appearance. +""" + +from typing import Any + + +class PlotlyTheme: + """Material-UI dark theme configuration for Plotly charts.""" + + # Material-UI dark theme colors + BACKGROUND = "#121212" + SURFACE = "#1e1e1e" + PRIMARY = "#90caf9" # Blue + SECONDARY = "#ce93d8" # Purple + TERTIARY = "#80cbc4" # Teal + QUATERNARY = "#fff59d" # Yellow + SUCCESS = "#66bb6a" # Green + ERROR = "#f44336" # Red + TEXT_PRIMARY = "rgba(255, 255, 255, 0.87)" + TEXT_SECONDARY = "rgba(255, 255, 255, 0.6)" + TEXT_DISABLED = "rgba(255, 255, 255, 0.38)" + + # Font family + FONT_FAMILY = "Spezia, Roboto, -apple-system, BlinkMacSystemFont, sans-serif" + + # Chart colors palette (for multi-line charts) + CHART_COLORS = [ + PRIMARY, + SECONDARY, + TERTIARY, + QUATERNARY, + "#ef5350", # Red + "#ab47bc", # Deep purple + "#42a5f5", # Light blue + "#26a69a", # Teal + ] + + @classmethod + def get_base_layout(cls) -> dict[str, Any]: + """Get base Plotly layout configuration. + + Returns: + Dictionary with Plotly layout settings for dark theme. + """ + return { + "paper_bgcolor": cls.BACKGROUND, + "plot_bgcolor": cls.SURFACE, + "font": { + "family": cls.FONT_FAMILY, + "size": 12, + "color": cls.TEXT_PRIMARY, + }, + "xaxis": { + "gridcolor": "rgba(255, 255, 255, 0.1)", + "zerolinecolor": "rgba(255, 255, 255, 0.2)", + "color": cls.TEXT_SECONDARY, + }, + "yaxis": { + "gridcolor": "rgba(255, 255, 255, 0.1)", + "zerolinecolor": "rgba(255, 255, 255, 0.2)", + "color": cls.TEXT_SECONDARY, + }, + "legend": { + "font": {"color": cls.TEXT_PRIMARY}, + "bgcolor": "rgba(0, 0, 0, 0.5)", + }, + "hovermode": "closest", + "hoverlabel": { + "bgcolor": cls.SURFACE, + "font": {"family": cls.FONT_FAMILY, "color": cls.TEXT_PRIMARY}, + }, + } + + @classmethod + def get_css(cls) -> str: + """Get CSS stylesheet for HTML reports. + + Returns: + CSS string with Material-UI dark theme styles. + """ + return f""" + * {{ + margin: 0; + padding: 0; + box-sizing: border-box; + }} + + body {{ + font-family: {cls.FONT_FAMILY}; + background: linear-gradient(105deg, black, {cls.SURFACE}); + color: {cls.TEXT_PRIMARY}; + line-height: 1.6; + padding: 2rem; + }} + + h1, h2, h3, h4, h5, h6 {{ + font-weight: 500; + margin-bottom: 1rem; + }} + + h1 {{ + font-size: 2.5rem; + color: {cls.PRIMARY}; + }} + + h2 {{ + font-size: 2rem; + color: {cls.SECONDARY}; + }} + + h3 {{ + font-size: 1.5rem; + }} + + .container {{ + max-width: 1400px; + margin: 0 auto; + }} + + .header {{ + margin-bottom: 2rem; + padding: 1.5rem; + background-color: {cls.SURFACE}; + border-radius: 8px; + }} + + .section {{ + margin-bottom: 2rem; + padding: 1.5rem; + background-color: {cls.SURFACE}; + border-radius: 8px; + }} + + .footer {{ + margin-top: 2rem; + padding: 1rem; + text-align: center; + color: {cls.TEXT_SECONDARY}; + font-size: 0.875rem; + }} + + .badge {{ + display: inline-block; + padding: 0.25rem 0.75rem; + border-radius: 4px; + font-size: 0.875rem; + font-weight: 500; + margin: 0.25rem; + }} + + .badge-primary {{ + background-color: {cls.PRIMARY}; + color: #000; + }} + + .badge-secondary {{ + background-color: {cls.SECONDARY}; + color: #000; + }} + + .badge-success {{ + background-color: {cls.SUCCESS}; + color: #fff; + }} + + .badge-error {{ + background-color: {cls.ERROR}; + color: #fff; + }} + + .info-row {{ + display: flex; + flex-wrap: wrap; + gap: 1rem; + margin-bottom: 0.5rem; + }} + + .info-item {{ + flex: 1; + min-width: 200px; + }} + + .info-label {{ + color: {cls.TEXT_SECONDARY}; + font-size: 0.875rem; + margin-bottom: 0.25rem; + }} + + .info-value {{ + color: {cls.TEXT_PRIMARY}; + font-size: 1rem; + font-weight: 500; + }} + + a {{ + color: {cls.PRIMARY}; + text-decoration: none; + }} + + a:hover {{ + text-decoration: underline; + }} + + /* Workload details styles */ + .section-header {{ + margin-bottom: 0.5rem; + color: rgba(255,255,255,0.6); + font-size: 0.875rem; + text-transform: uppercase; + }} + + .sample-box {{ + padding: 0.5rem; + margin: 0.25rem 0; + background-color: rgba(255,255,255,0.05); + border-radius: 4px; + font-size: 0.875rem; + color: rgba(255, 255, 255, 0.7); + }} + + .mean-container {{ + margin-top: 1.5rem; + margin-bottom: 0.5rem; + }} + + .mean-label {{ + color: rgba(255,255,255,0.6); + font-size: 0.875rem; + text-transform: uppercase; + }} + + .mean-value-primary {{ + color: {cls.PRIMARY}; + font-size: 2rem; + font-weight: 500; + }} + + .info-value-primary {{ + color: {cls.PRIMARY}; + word-break: break-all; + }} + + .grid-3col {{ + display: grid; + grid-template-columns: repeat(3, 1fr); + gap: 1.5rem; + margin-top: 1rem; + }} + + .flex-col {{ + display: flex; + flex-direction: column; + gap: 0.75rem; + margin-bottom: 1rem; + }} + + .grid-2col {{ + display: grid; + grid-template-columns: 1fr 1fr; + gap: 0.5rem; + }} + """ diff --git a/uv.lock b/uv.lock index c09f6e994..8d9340313 100644 --- a/uv.lock +++ b/uv.lock @@ -836,6 +836,7 @@ all = [ { name = "msgspec" }, { name = "orjson" }, { name = "pillow" }, + { name = "plotly" }, { name = "tiktoken" }, { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, @@ -865,6 +866,7 @@ dev = [ { name = "orjson" }, { name = "pandas-stubs" }, { name = "pillow" }, + { name = "plotly" }, { name = "pre-commit" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -902,6 +904,7 @@ recommended = [ { name = "msgpack" }, { name = "msgspec" }, { name = "orjson" }, + { name = "plotly" }, { name = "tiktoken" }, { name = "uvloop" }, ] @@ -910,6 +913,9 @@ tokenizers = [ { name = "mistral-common" }, { name = "tiktoken" }, ] +ui = [ + { name = "plotly" }, +] vision = [ { name = "datasets", extra = ["vision"] }, { name = "pillow" }, @@ -932,9 +938,9 @@ requires-dist = [ { name = "eval-type-backport" }, { name = "faker" }, { name = "ftfy", specifier = ">=6.0.0" }, - { name = "guidellm", extras = ["all"], marker = "extra == 'dev'" }, - { name = "guidellm", extras = ["audio", "perf", "tokenizers", "vision"], marker = "extra == 'all'" }, - { name = "guidellm", extras = ["perf", "tokenizers"], marker = "extra == 'recommended'" }, + { name = "guidellm", extras = ["all"], marker = "extra == 'dev'", editable = "." }, + { name = "guidellm", extras = ["audio", "perf", "tokenizers", "ui", "vision"], marker = "extra == 'all'", editable = "." }, + { name = "guidellm", extras = ["perf", "tokenizers", "ui"], marker = "extra == 'recommended'", editable = "." }, { name = "httpx", extras = ["http2"], specifier = "<1.0.0" }, { name = "loguru" }, { name = "lorem", marker = "extra == 'dev'", specifier = "~=0.1.1" }, @@ -953,6 +959,7 @@ requires-dist = [ { name = "orjson", marker = "extra == 'perf'" }, { name = "pandas-stubs", marker = "extra == 'dev'" }, { name = "pillow", marker = "extra == 'vision'" }, + { name = "plotly", marker = "extra == 'ui'", specifier = ">=5.24.0" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = "~=3.5.0" }, { name = "protobuf" }, { name = "pydantic", specifier = ">=2.11.7" }, @@ -986,10 +993,10 @@ requires-dist = [ { name = "uvloop", specifier = ">=0.18" }, { name = "uvloop", marker = "extra == 'perf'" }, ] -provides-extras = ["all", "recommended", "perf", "tokenizers", "audio", "vision", "dev"] +provides-extras = ["all", "recommended", "perf", "tokenizers", "audio", "vision", "dev", "ui"] [package.metadata.requires-dev] -dev = [{ name = "guidellm", extras = ["dev"] }] +dev = [{ name = "guidellm", extras = ["dev"], editable = "." }] [[package]] name = "h11" @@ -1943,6 +1950,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, ] +[[package]] +name = "narwhals" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e9/f3/257adc69a71011b4c8cda321b00f02c5bf1980ae38ffd05a58d9632d4de8/narwhals-2.20.0.tar.gz", hash = "sha256:c10994975fa7dc5a68c2cffcddbd5908fc8ebb2d463c5bab085309c0ee1f551e", size = 627848, upload-time = "2026-04-20T12:11:45.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/69/f24d3d1c38ad69e256138b4ec2452a8c7cf66be49dc214771ae99dd4f0a0/narwhals-2.20.0-py3-none-any.whl", hash = "sha256:16e750ea5507d4ba6e8d03455b5f93a535e0405976561baea235bca5dc9f475d", size = 449373, upload-time = "2026-04-20T12:11:43.596Z" }, +] + [[package]] name = "networkx" version = "3.4.2" @@ -2406,6 +2422,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/73/cb/ac7874b3e5d58441674fb70742e6c374b28b0c7cb988d37d991cde47166c/platformdirs-4.5.0-py3-none-any.whl", hash = "sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3", size = 18651, upload-time = "2025-10-08T17:44:47.223Z" }, ] +[[package]] +name = "plotly" +version = "6.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "narwhals" }, + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/7f/0f100df1172aadf88a929a9dbb902656b0880ba4b960fe5224867159d8f4/plotly-6.7.0.tar.gz", hash = "sha256:45eea0ff27e2a23ccd62776f77eb43aa1ca03df4192b76036e380bb479b892c6", size = 6911286, upload-time = "2026-04-09T20:36:45.738Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/ad/cba91b3bcf04073e4d1655a5c1710ef3f457f56f7d1b79dcc3d72f4dd912/plotly-6.7.0-py3-none-any.whl", hash = "sha256:ac8aca1c25c663a59b5b9140a549264a5badde2e057d79b8c772ae2920e32ff0", size = 9898444, upload-time = "2026-04-09T20:36:39.812Z" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -3932,21 +3961,21 @@ dependencies = [ { name = "typing-extensions", marker = "sys_platform == 'darwin'" }, ] wheels = [ - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:4db72a4d257c45c3502f11764ee41460a87312fdc3dff47a8957812efe961725" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:0826ac8e409551e12b2360ac18b4161a838cbd111933e694752f351191331d09" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:7fbbf409143a4fe0812a40c0b46a436030a7e1d14fe8c5234dfbe44df47f617e" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:b39cafff7229699f9d6e172cac74d85fd71b568268e439e08d9c540e54732a3e" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:7417ef370d7c3969dd509dae8d5c7daeb945af335ab76dd38358ba30a91251c1" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:90821a3194b8806d9fa9fdaa9308c1bc73df0c26808274b14129a97c99f35794" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:358bd7125cbec6e692d60618a5eec7f55a51b29e3652a849fd42af021d818023" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:470de4176007c2700735e003a830828a88d27129032a3add07291da07e2a94e8" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2d16abfce6c92584ceeb00c3b2665d5798424dd9ed235ea69b72e045cd53ae97" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:4584ab167995c0479f6821e3dceaf199c8166c811d3adbba5d8eedbbfa6764fd" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:45a1c5057629444aeb1c452c18298fa7f30f2f7aeadd4dc41f9d340980294407" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:339e05502b6c839db40e88720cb700f5a3b50cda332284873e851772d41b2c1e" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:840351da59cedb7bcbc51981880050813c19ef6b898a7fecf73a3afc71aff3fe" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:c88b1129fd4e14f0f882963c6728315caae35d2f47374d17edeed1edc7697497" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f4bea7dc451267c028593751612ad559299589304e68df54ae7672427893ff2c" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:4db72a4d257c45c3502f11764ee41460a87312fdc3dff47a8957812efe961725", upload-time = "2026-02-06T16:27:14Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:0826ac8e409551e12b2360ac18b4161a838cbd111933e694752f351191331d09", upload-time = "2026-02-06T16:27:14Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:7fbbf409143a4fe0812a40c0b46a436030a7e1d14fe8c5234dfbe44df47f617e", upload-time = "2026-02-06T16:27:14Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:b39cafff7229699f9d6e172cac74d85fd71b568268e439e08d9c540e54732a3e", upload-time = "2026-02-06T16:27:17Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:7417ef370d7c3969dd509dae8d5c7daeb945af335ab76dd38358ba30a91251c1", upload-time = "2026-02-10T19:55:42Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:90821a3194b8806d9fa9fdaa9308c1bc73df0c26808274b14129a97c99f35794", upload-time = "2026-02-10T19:55:42Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:358bd7125cbec6e692d60618a5eec7f55a51b29e3652a849fd42af021d818023", upload-time = "2026-02-10T19:55:42Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:470de4176007c2700735e003a830828a88d27129032a3add07291da07e2a94e8", upload-time = "2026-02-10T19:55:43Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2d16abfce6c92584ceeb00c3b2665d5798424dd9ed235ea69b72e045cd53ae97", upload-time = "2026-01-23T15:09:55Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:4584ab167995c0479f6821e3dceaf199c8166c811d3adbba5d8eedbbfa6764fd", upload-time = "2026-01-23T15:09:55Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:45a1c5057629444aeb1c452c18298fa7f30f2f7aeadd4dc41f9d340980294407", upload-time = "2026-01-23T15:09:55Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:339e05502b6c839db40e88720cb700f5a3b50cda332284873e851772d41b2c1e", upload-time = "2026-01-23T15:09:57Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:840351da59cedb7bcbc51981880050813c19ef6b898a7fecf73a3afc71aff3fe", upload-time = "2026-01-23T15:09:59Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:c88b1129fd4e14f0f882963c6728315caae35d2f47374d17edeed1edc7697497", upload-time = "2026-01-23T15:09:59Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f4bea7dc451267c028593751612ad559299589304e68df54ae7672427893ff2c", upload-time = "2026-01-23T15:10:01Z" }, ] [[package]] @@ -3969,44 +3998,44 @@ dependencies = [ { name = "typing-extensions", marker = "sys_platform != 'darwin'" }, ] wheels = [ - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-linux_aarch64.whl", hash = "sha256:31ae44836c8b9bbd1a3943d29c7c7457709ddf7c6173aa34aefe9d2203e4c405" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-linux_s390x.whl", hash = "sha256:beadc2a6a1785b09a46daad378de91ef274b8d3eea7af0bc2d017d97f115afdf" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d63ee6a80982fd73fe44bb70d97d2976e010312ff6db81d7bfb9167b06dd45b9" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a280ffaea7b9c828e0c1b9b3bd502d9b6a649dc9416997b69b84544bd469f215" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-win_amd64.whl", hash = "sha256:6c6f0df770144907092a0d067048d96ed4f278a6c840376d2ff0e27e7579b925" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_aarch64.whl", hash = "sha256:ce5c113d1f55f8c1f5af05047a24e50d11d293e0cbbb5bf7a75c6c761edd6eaa" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_s390x.whl", hash = "sha256:0e286fcf6ce0cc7b204396c9b4ea0d375f1f0c3e752f68ce3d3aeb265511db8c" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:1cfcb9b1558c6e52dffd0d4effce83b13c5ae5d97338164c372048c21f9cfccb" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b7cb1ec66cefb90fd7b676eac72cfda3b8d4e4d0cacd7a531963bc2e0a9710ab" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:17a09465bab2aab8f0f273410297133d8d8fb6dd84dccbd252ca4a4f3a111847" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-win_arm64.whl", hash = "sha256:c35c0de592941d4944698dbfa87271ab85d3370eca3b694943a2ab307ac34b3f" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-linux_aarch64.whl", hash = "sha256:8de5a36371b775e2d4881ed12cc7f2de400b1ad3d728aa74a281f649f87c9b8c" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-linux_s390x.whl", hash = "sha256:9accc30b56cb6756d4a9d04fcb8ebc0bb68c7d55c1ed31a8657397d316d31596" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:179451716487f8cb09b56459667fa1f5c4c0946c1e75fbeae77cfc40a5768d87" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ee40b8a4b4b2cf0670c6fd4f35a7ef23871af956fecb238fbf5da15a72650b1d" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:21cb5436978ef47c823b7a813ff0f8c2892e266cfe0f1d944879b5fba81bf4e1" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:3eaa727e6a73affa61564d86b9d03191df45c8650d0666bd3d57c8597ef61e78" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-linux_aarch64.whl", hash = "sha256:fd215f3d0f681905c5b56b0630a3d666900a37fcc3ca5b937f95275c66f9fd9c" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-linux_s390x.whl", hash = "sha256:170a0623108055be5199370335cf9b41ba6875b3cb6f086db4aee583331a4899" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e51994492cdb76edce29da88de3672a3022f9ef0ffd90345436948d4992be2c7" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8d316e5bf121f1eab1147e49ad0511a9d92e4c45cc357d1ab0bee440da71a095" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:b719da5af01b59126ac13eefd6ba3dd12d002dc0e8e79b8b365e55267a8189d3" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:b67d91326e4ed9eccbd6b7d84ed7ffa43f93103aa3f0b24145f3001f3b11b714" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-linux_aarch64.whl", hash = "sha256:5af75e5f49de21b0bdf7672bc27139bd285f9e8dbcabe2d617a2eb656514ac36" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-linux_s390x.whl", hash = "sha256:ba51ef01a510baf8fff576174f702c47e1aa54389a9f1fba323bb1a5003ff0bf" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:0fedcb1a77e8f2aaf7bfd21591bf6d1e0b207473268c9be16b17cb7783253969" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:106dd1930cb30a4a337366ba3f9b25318ebf940f51fd46f789281dd9e736bdc4" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:eb1bde1ce198f05c8770017de27e001d404499cf552aaaa014569eff56ca25c0" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-linux_aarch64.whl", hash = "sha256:ea2bcc9d1fca66974a71d4bf9a502539283f35d61fcab5a799b4e120846f1e02" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-linux_s390x.whl", hash = "sha256:f8294fd2fc6dd8f4435a891a0122307a043b14b21f0dac1bca63c85bfb59e586" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:a28fdbcfa2fbacffec81300f24dd1bed2b0ccfdbed107a823cff12bc1db070f6" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:aada8afc068add586464b2a55adb7cc9091eec55caf5320447204741cb6a0604" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:2adc71fe471e98a608723bfc837f7e1929885ebb912c693597711e139c1cda41" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-linux_aarch64.whl", hash = "sha256:9412bd37b70f5ebd1205242c4ba4cabae35a605947f2b30806d5c9b467936db9" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-linux_s390x.whl", hash = "sha256:e71c476517c33e7db69825a9ff46c7f47a723ec4dac5b2481cff4246d1c632be" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:23882f8d882460aca809882fc42f5e343bf07585274f929ced00177d1be1eb67" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:4fcd8b4cc2ae20f2b7749fb275349c55432393868778c2d50a08e81d5ee5591e" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:ffc8da9a1341092d6a90cb5b1c1a33cd61abf0fb43f0cd88443c27fa372c26ae" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-linux_aarch64.whl", hash = "sha256:31ae44836c8b9bbd1a3943d29c7c7457709ddf7c6173aa34aefe9d2203e4c405", upload-time = "2026-01-23T15:10:02Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-linux_s390x.whl", hash = "sha256:beadc2a6a1785b09a46daad378de91ef274b8d3eea7af0bc2d017d97f115afdf", upload-time = "2026-01-23T15:10:03Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d63ee6a80982fd73fe44bb70d97d2976e010312ff6db81d7bfb9167b06dd45b9", upload-time = "2026-01-23T15:10:05Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a280ffaea7b9c828e0c1b9b3bd502d9b6a649dc9416997b69b84544bd469f215", upload-time = "2026-01-23T15:10:07Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-win_amd64.whl", hash = "sha256:6c6f0df770144907092a0d067048d96ed4f278a6c840376d2ff0e27e7579b925", upload-time = "2026-01-23T15:10:09Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_aarch64.whl", hash = "sha256:ce5c113d1f55f8c1f5af05047a24e50d11d293e0cbbb5bf7a75c6c761edd6eaa", upload-time = "2026-01-23T15:10:11Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_s390x.whl", hash = "sha256:0e286fcf6ce0cc7b204396c9b4ea0d375f1f0c3e752f68ce3d3aeb265511db8c", upload-time = "2026-01-23T15:10:12Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:1cfcb9b1558c6e52dffd0d4effce83b13c5ae5d97338164c372048c21f9cfccb", upload-time = "2026-01-23T15:10:15Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b7cb1ec66cefb90fd7b676eac72cfda3b8d4e4d0cacd7a531963bc2e0a9710ab", upload-time = "2026-01-23T15:10:15Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:17a09465bab2aab8f0f273410297133d8d8fb6dd84dccbd252ca4a4f3a111847", upload-time = "2026-01-23T15:10:19Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-win_arm64.whl", hash = "sha256:c35c0de592941d4944698dbfa87271ab85d3370eca3b694943a2ab307ac34b3f", upload-time = "2026-01-23T15:10:20Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-linux_aarch64.whl", hash = "sha256:8de5a36371b775e2d4881ed12cc7f2de400b1ad3d728aa74a281f649f87c9b8c", upload-time = "2026-01-23T15:10:22Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-linux_s390x.whl", hash = "sha256:9accc30b56cb6756d4a9d04fcb8ebc0bb68c7d55c1ed31a8657397d316d31596", upload-time = "2026-01-23T15:10:24Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:179451716487f8cb09b56459667fa1f5c4c0946c1e75fbeae77cfc40a5768d87", upload-time = "2026-01-23T15:10:25Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ee40b8a4b4b2cf0670c6fd4f35a7ef23871af956fecb238fbf5da15a72650b1d", upload-time = "2026-01-23T15:10:27Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:21cb5436978ef47c823b7a813ff0f8c2892e266cfe0f1d944879b5fba81bf4e1", upload-time = "2026-01-23T15:10:30Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:3eaa727e6a73affa61564d86b9d03191df45c8650d0666bd3d57c8597ef61e78", upload-time = "2026-01-23T15:10:31Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-linux_aarch64.whl", hash = "sha256:fd215f3d0f681905c5b56b0630a3d666900a37fcc3ca5b937f95275c66f9fd9c", upload-time = "2026-01-23T15:10:34Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-linux_s390x.whl", hash = "sha256:170a0623108055be5199370335cf9b41ba6875b3cb6f086db4aee583331a4899", upload-time = "2026-01-23T15:10:35Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e51994492cdb76edce29da88de3672a3022f9ef0ffd90345436948d4992be2c7", upload-time = "2026-01-23T15:10:37Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8d316e5bf121f1eab1147e49ad0511a9d92e4c45cc357d1ab0bee440da71a095", upload-time = "2026-01-23T15:10:38Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:b719da5af01b59126ac13eefd6ba3dd12d002dc0e8e79b8b365e55267a8189d3", upload-time = "2026-01-23T15:10:41Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:b67d91326e4ed9eccbd6b7d84ed7ffa43f93103aa3f0b24145f3001f3b11b714", upload-time = "2026-01-23T15:10:42Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-linux_aarch64.whl", hash = "sha256:5af75e5f49de21b0bdf7672bc27139bd285f9e8dbcabe2d617a2eb656514ac36", upload-time = "2026-01-23T15:10:44Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-linux_s390x.whl", hash = "sha256:ba51ef01a510baf8fff576174f702c47e1aa54389a9f1fba323bb1a5003ff0bf", upload-time = "2026-01-23T15:10:48Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:0fedcb1a77e8f2aaf7bfd21591bf6d1e0b207473268c9be16b17cb7783253969", upload-time = "2026-01-23T15:10:48Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:106dd1930cb30a4a337366ba3f9b25318ebf940f51fd46f789281dd9e736bdc4", upload-time = "2026-01-23T15:10:50Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:eb1bde1ce198f05c8770017de27e001d404499cf552aaaa014569eff56ca25c0", upload-time = "2026-01-23T15:10:50Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-linux_aarch64.whl", hash = "sha256:ea2bcc9d1fca66974a71d4bf9a502539283f35d61fcab5a799b4e120846f1e02", upload-time = "2026-01-23T15:10:53Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-linux_s390x.whl", hash = "sha256:f8294fd2fc6dd8f4435a891a0122307a043b14b21f0dac1bca63c85bfb59e586", upload-time = "2026-01-23T15:10:55Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:a28fdbcfa2fbacffec81300f24dd1bed2b0ccfdbed107a823cff12bc1db070f6", upload-time = "2026-01-23T15:10:56Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:aada8afc068add586464b2a55adb7cc9091eec55caf5320447204741cb6a0604", upload-time = "2026-01-23T15:10:58Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:2adc71fe471e98a608723bfc837f7e1929885ebb912c693597711e139c1cda41", upload-time = "2026-01-23T15:11:01Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-linux_aarch64.whl", hash = "sha256:9412bd37b70f5ebd1205242c4ba4cabae35a605947f2b30806d5c9b467936db9", upload-time = "2026-01-23T15:11:03Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-linux_s390x.whl", hash = "sha256:e71c476517c33e7db69825a9ff46c7f47a723ec4dac5b2481cff4246d1c632be", upload-time = "2026-01-23T15:11:04Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:23882f8d882460aca809882fc42f5e343bf07585274f929ced00177d1be1eb67", upload-time = "2026-01-23T15:11:07Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:4fcd8b4cc2ae20f2b7749fb275349c55432393868778c2d50a08e81d5ee5591e", upload-time = "2026-01-23T15:11:07Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:ffc8da9a1341092d6a90cb5b1c1a33cd61abf0fb43f0cd88443c27fa372c26ae", upload-time = "2026-01-23T15:11:10Z" }, ] [[package]] From 8507a04d5424207bf73bc3680ee86aac08255baf Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Tue, 20 Jan 2026 18:36:25 -0500 Subject: [PATCH 3/5] Improve Workload Details and Metrics Signed-off-by: Samuel Monson --- .../html/components/workload_details.py | 185 +++++++++++----- .../html/components/workload_metrics.py | 208 +++++++++--------- .../benchmark/outputs/html/data_builder.py | 15 +- src/guidellm/benchmark/outputs/html/theme.py | 68 ++++++ 4 files changed, 318 insertions(+), 158 deletions(-) diff --git a/src/guidellm/benchmark/outputs/html/components/workload_details.py b/src/guidellm/benchmark/outputs/html/components/workload_details.py index eb0961580..101425925 100644 --- a/src/guidellm/benchmark/outputs/html/components/workload_details.py +++ b/src/guidellm/benchmark/outputs/html/components/workload_details.py @@ -7,7 +7,7 @@ from guidellm.benchmark.outputs.html.components.base import PlotlyComponentBase # Maximum characters to display for sample text -_SAMPLE_MAX_LENGTH = 100 +_SAMPLE_MAX_LENGTH = 200 class WorkloadDetailsComponent(PlotlyComponentBase): @@ -31,13 +31,13 @@ def generate(self, data: dict[str, Any]) -> str: generations_data = data.get("generations", {}) requests_data = data.get("requests_over_time", {}) server_data = data.get("server", {}) - rate_type = data.get("rate_type", "N/A") + rate_types = data.get("rate_types", ["N/A"]) num_benchmarks = requests_data.get("num_benchmarks", 0) # Build HTML sections prompts_html = self._generate_prompts_section(prompts_data) server_html = self._generate_server_section( - server_data, rate_type, num_benchmarks + server_data, rate_types, num_benchmarks ) generations_html = self._generate_generations_section(generations_data) @@ -69,20 +69,26 @@ def generate(self, data: dict[str, Any]) -> str:

Workload Details

-
+

Prompts

{prompts_html} - {prompt_chart_html} +
+ {prompt_chart_html} +
-
-

Server Configuration

+
+

Run Configuration

{server_html} - {requests_chart_html} +
+ {requests_chart_html} +
-
+

Generations

{generations_html} - {output_chart_html} +
+ {output_chart_html} +
@@ -107,12 +113,44 @@ def _generate_prompts_section(self, prompts_data: dict[str, Any]) -> str: if not samples: samples_html = "

No prompt samples available

" else: - samples_html = "".join( - f'
' - f"{sample[:_SAMPLE_MAX_LENGTH]}" - f"{'...' if len(sample) > _SAMPLE_MAX_LENGTH else ''}
" - for sample in samples[:5] - ) + # Use only first 5 samples + samples_to_show = samples[:5] + # Prepare samples for JavaScript (escape quotes and truncate) + samples_js = [ + s[:_SAMPLE_MAX_LENGTH].replace("\\", "\\\\").replace('"', '\\"') + for s in samples_to_show + ] + + samples_html = f""" + + + """ # Mean prompt length mean_html = f""" @@ -125,55 +163,35 @@ def _generate_prompts_section(self, prompts_data: dict[str, Any]) -> str: return f"{header_html}{samples_html}{mean_html}" def _generate_server_section( - self, server_data: dict[str, Any], rate_type: str, num_benchmarks: int + self, _server_data: dict[str, Any], rate_types: list[str], num_benchmarks: int ) -> str: - """Generate HTML for server configuration. + """Generate HTML for benchmark configuration. Args: - server_data: Dict with 'target' URL. - rate_type: Rate type string. + _server_data: Dict with server data (unused but kept for compatibility). + rate_types: List of rate type strings in execution order. num_benchmarks: Number of benchmarks. Returns: - HTML string for server section. + HTML string for benchmark configuration section. """ - target = server_data.get("target", "N/A") - - # Parse URL to extract protocol and port - protocol = "N/A" - port = "N/A" - if target != "N/A" and "://" in target: - protocol = target.split("://")[0] - rest = target.split("://")[1] - if ":" in rest: - port = rest.split(":")[1].split("/")[0] - else: - port = "80" if protocol == "http" else "443" + # Generate multiple rate type badges + rate_badges = " ".join( + f'{rt}' for rt in rate_types + ) + + rate_label = "Profile" + ("s" if len(rate_types) > 1 else "") return f"""
-
-
Target
-
{target}
-
-
-
-
Type
-
{protocol}
-
-
-
Port
-
{port}
-
-
Number of Benchmarks
{num_benchmarks}
-
Rate Type
+
{rate_label}
- {rate_type} + {rate_badges}
@@ -200,12 +218,44 @@ def _generate_generations_section(self, generations_data: dict[str, Any]) -> str if not samples: samples_html = "

No generation samples available

" else: - samples_html = "".join( - f'
' - f"{sample[:_SAMPLE_MAX_LENGTH]}" - f"{'...' if len(sample) > _SAMPLE_MAX_LENGTH else ''}
" - for sample in samples[:5] - ) + # Use only first 5 samples + samples_to_show = samples[:5] + # Prepare samples for JavaScript (escape quotes and truncate) + samples_js = [ + s[:_SAMPLE_MAX_LENGTH].replace("\\", "\\\\").replace('"', '\\"') + for s in samples_to_show + ] + + samples_html = f""" + + + """ # Mean generated length mean_html = f""" @@ -234,7 +284,11 @@ def _create_histogram_chart( statistics = distribution_data.get("statistics", {}) fig = self._create_figure( - title=title, xaxis_title=xaxis_title, yaxis_title="Count" + title=title, + xaxis_title=xaxis_title, + yaxis_title="Count", + autosize=True, + width=None, ) if not buckets: @@ -244,6 +298,15 @@ def _create_histogram_chart( x_values = [b["value"] for b in buckets] counts = [b["count"] for b in buckets] + # Calculate appropriate bar width with max limit + if len(x_values) > 1: + data_range = max(x_values) - min(x_values) + calculated_width = data_range / len(x_values) * 0.8 + max_width = 20 # Maximum bar width in data units + bar_width = min(calculated_width, max_width) + else: + bar_width = 10 # Single bar default width + # Add bar chart fig.add_trace( go.Bar( @@ -252,6 +315,7 @@ def _create_histogram_chart( name="Count", marker_color=self.theme.PRIMARY, hovertemplate="Tokens: %{x}
Count: %{y}", + width=bar_width, ) ) @@ -287,6 +351,8 @@ def _create_requests_over_time_chart( title="Requests Over Time", xaxis_title="Time (seconds)", yaxis_title="Request Count", + autosize=True, + width=None, ) if not buckets: @@ -296,6 +362,11 @@ def _create_requests_over_time_chart( x_values = [b["value"] for b in buckets] counts = [b["count"] for b in buckets] + # Calculate bar width with max limit + calculated_width = bucket_width * 0.8 + max_width = 50 # Maximum width in seconds for requests chart + bar_width = min(calculated_width, max_width) + # Add bar chart fig.add_trace( go.Bar( @@ -304,7 +375,7 @@ def _create_requests_over_time_chart( name="Requests", marker_color=self.theme.TERTIARY, hovertemplate="Time: %{x:.1f}s
Requests: %{y}", - width=bucket_width * 0.8, # Make bars slightly narrower than buckets + width=bar_width, ) ) diff --git a/src/guidellm/benchmark/outputs/html/components/workload_metrics.py b/src/guidellm/benchmark/outputs/html/components/workload_metrics.py index 5035fc285..a84ba7c32 100644 --- a/src/guidellm/benchmark/outputs/html/components/workload_metrics.py +++ b/src/guidellm/benchmark/outputs/html/components/workload_metrics.py @@ -3,16 +3,15 @@ from typing import Any import plotly.graph_objects as go -from plotly.subplots import make_subplots from guidellm.benchmark.outputs.html.components.base import PlotlyComponentBase class WorkloadMetricsComponent(PlotlyComponentBase): - """Generates the 2x2 workload metrics grid.""" + """Generates the 2x2 workload metrics grid with separate charts.""" def generate(self, data: dict[str, Any]) -> str: - """Generate workload metrics HTML with 2x2 grid of charts. + """Generate workload metrics HTML with 4 separate charts. Args: data: Dictionary containing: @@ -31,139 +30,152 @@ def generate(self, data: dict[str, Any]) -> str:
""" - # Create 2x2 subplot figure - fig = make_subplots( - rows=2, - cols=2, - subplot_titles=( - "Time to First Token (TTFT)", - "Inter-Token Latency (ITL)", - "Time Per Request", - "Throughput (tokens/sec)", - ), - vertical_spacing=0.12, - horizontal_spacing=0.1, + # Sort benchmarks by requests_per_second to prevent line crossovers + benchmarks_sorted = sorted( + benchmarks, key=lambda bm: bm.get("requests_per_second", 0) ) - # Apply base theme - fig = self._apply_theme_to_figure(fig) - - # Extract RPS values for x-axis - [bm["requests_per_second"] for bm in benchmarks] - - # Add metric traces to each subplot - self._add_metric_traces(fig, benchmarks, "ttft", 1, 1, "TTFT (ms)") - self._add_metric_traces(fig, benchmarks, "itl", 1, 2, "ITL (ms)") - self._add_metric_traces( - fig, benchmarks, "time_per_request", 2, 1, "Latency (s)" + # Create 4 separate figures + ttft_fig = self._create_metric_chart( + benchmarks_sorted, + "ttft", + "Time to First Token (TTFT)", + "Milliseconds", ) - self._add_metric_traces( - fig, benchmarks, "throughput", 2, 2, "Throughput (tokens/s)" + itl_fig = self._create_metric_chart( + benchmarks_sorted, + "itl", + "Inter-Token Latency (ITL)", + "Milliseconds", ) - - # Update axes - fig.update_xaxes(title_text="Requests per Second (RPS)", row=2, col=1) - fig.update_xaxes(title_text="Requests per Second (RPS)", row=2, col=2) - fig.update_yaxes(title_text="Milliseconds", row=1, col=1) - fig.update_yaxes(title_text="Milliseconds", row=1, col=2) - fig.update_yaxes(title_text="Seconds", row=2, col=1) - fig.update_yaxes(title_text="Tokens/Second", row=2, col=2) - - # Update layout - fig.update_layout( - title_text="Workload Metrics", - title_font_size=24, - title_font_color=self.theme.SECONDARY, - showlegend=True, - legend={ - "orientation": "h", - "yanchor": "bottom", - "y": 1.02, - "xanchor": "right", - "x": 1, - }, - height=800, + tpr_fig = self._create_metric_chart( + benchmarks_sorted, + "time_per_request", + "Time Per Request", + "Seconds", + ) + throughput_fig = self._create_metric_chart( + benchmarks_sorted, + "throughput", + "Throughput", + "Tokens/Second", ) - # Convert to HTML - chart_html = fig.to_html(include_plotlyjs=False, div_id="workload-metrics") + # Convert each figure to HTML + ttft_html = ttft_fig.to_html(include_plotlyjs=False, div_id="ttft-chart") + itl_html = itl_fig.to_html(include_plotlyjs=False, div_id="itl-chart") + tpr_html = tpr_fig.to_html(include_plotlyjs=False, div_id="tpr-chart") + throughput_html = throughput_fig.to_html( + include_plotlyjs=False, div_id="throughput-chart" + ) + # Return HTML with 2x2 grid layout return f"""
- {chart_html} +

Workload Metrics

+
+
+

TIME TO FIRST TOKEN

+ {ttft_html} +
+
+

INTER-TOKEN LATENCY

+ {itl_html} +
+
+

TIME PER REQUEST

+ {tpr_html} +
+
+

THROUGHPUT

+ {throughput_html} +
+
""" - def _add_metric_traces( + def _create_metric_chart( self, - fig: go.Figure, benchmarks: list[dict[str, Any]], - metric_name: str, - row: int, - col: int, - trace_prefix: str, - ) -> None: - """Add metric traces to a subplot. + metric_key: str, + title: str, + yaxis_title: str, + ) -> go.Figure: + """Create a single metric chart with mean + percentile lines. Args: - fig: Plotly figure to add traces to. - benchmarks: List of benchmark data dicts. - metric_name: Name of the metric (e.g., 'ttft', 'itl'). - row: Subplot row number. - col: Subplot column number. - trace_prefix: Prefix for trace names. + benchmarks: Sorted list of benchmark data dicts. + metric_key: Name of the metric (e.g., 'ttft', 'itl'). + title: Chart title. + yaxis_title: Y-axis title. + + Returns: + Plotly figure with metric chart. """ - rps_values = [bm["requests_per_second"] for bm in benchmarks] + fig = self._create_figure( + title=title, + xaxis_title="Requests per Second", + yaxis_title=yaxis_title, + ) - # Extract metric data - mean_values = [bm[metric_name].get("mean", 0) for bm in benchmarks] + # Extract data + rps_values = [bm["requests_per_second"] for bm in benchmarks] + metric_data = [bm[metric_key] for bm in benchmarks] - # Add mean line (primary trace) + # Add mean line (solid, primary color, thicker) + mean_values = [m["mean"] for m in metric_data] fig.add_trace( go.Scatter( x=rps_values, y=mean_values, mode="lines+markers", - name=f"{trace_prefix} Mean", - line={"width": 3, "color": self.theme.PRIMARY}, - marker={"size": 8}, - hovertemplate=( - f"RPS: %{{x:.2f}}
{trace_prefix}: %{{y:.2f}}" - ), - ), - row=row, - col=col, + name="mean", + line={"color": self.theme.PRIMARY, "width": 3}, + marker={"size": 6, "color": self.theme.PRIMARY}, + hovertemplate="RPS: %{x:.2f}
Mean: %{y:.2f}", + ) ) - # Add percentile lines + # Add percentile lines (dashed, thinner) percentiles = ["p50", "p90", "p95", "p99"] colors = [ - self.theme.TERTIARY, - self.theme.SECONDARY, - self.theme.QUATERNARY, - self.theme.ERROR, + self.theme.TERTIARY, # p50 - teal + self.theme.SECONDARY, # p90 - purple + self.theme.QUATERNARY, # p95 - yellow + self.theme.ERROR, # p99 - red ] for pct, color in zip(percentiles, colors, strict=False): # Check if percentiles exist in the data - if benchmarks[0][metric_name].get("percentiles"): - pct_values = [ - bm[metric_name].get("percentiles", {}).get(pct, 0) - for bm in benchmarks - ] - + if metric_data[0].get("percentiles"): + pct_values = [m.get("percentiles", {}).get(pct, 0) for m in metric_data] fig.add_trace( go.Scatter( x=rps_values, y=pct_values, - mode="lines", - name=f"{trace_prefix} {pct.upper()}", - line={"width": 1.5, "dash": "dash", "color": color}, + mode="lines+markers", + name=pct, + line={"color": color, "width": 2, "dash": "dot"}, + marker={"size": 4, "color": color}, hovertemplate=( f"RPS: %{{x:.2f}}
{pct.upper()}: " "%{y:.2f}" ), - ), - row=row, - col=col, + ) ) + + # Update layout with individual legend + fig.update_layout( + showlegend=True, + legend={ + "orientation": "h", + "yanchor": "top", + "y": -0.15, # Below the chart + "xanchor": "center", + "x": 0.5, + }, + height=400, # Fixed height for consistency + margin={"l": 60, "r": 20, "t": 60, "b": 100}, # Extra bottom for legend + ) + + return fig diff --git a/src/guidellm/benchmark/outputs/html/data_builder.py b/src/guidellm/benchmark/outputs/html/data_builder.py index 5639b74c3..86460821f 100644 --- a/src/guidellm/benchmark/outputs/html/data_builder.py +++ b/src/guidellm/benchmark/outputs/html/data_builder.py @@ -211,7 +211,16 @@ def build_workload_details( :return: Dictionary with prompts, generations, request timing, and server info """ target = args.target - rate_type = benchmarks[0].config.strategy.type_ + + # Collect all rate types in execution order, keeping first occurrence only + rate_types_raw = [bm.config.strategy.type_ for bm in benchmarks] + seen = set() + rate_types = [] + for rt in rate_types_raw: + if rt not in seen: + seen.add(rt) + rate_types.append(rt) + successful_requests = [req for bm in benchmarks for req in bm.requests.successful] sample_indices = random.sample( @@ -288,7 +297,7 @@ def build_workload_details( }, "num_benchmarks": number_of_buckets, }, - "rate_type": rate_type, + "rate_types": rate_types, "server": {"target": target}, } @@ -312,7 +321,7 @@ def build_benchmarks(benchmarks: list[GenerativeBenchmark]) -> list[dict[str, An bm.metrics.time_to_first_token_ms.successful ).model_dump(), "throughput": TabularDistributionSummary.from_distribution_summary( - bm.metrics.output_tokens_per_second.successful + bm.metrics.output_tokens_per_second.total ).model_dump(), "time_per_request": ( TabularDistributionSummary.from_distribution_summary( diff --git a/src/guidellm/benchmark/outputs/html/theme.py b/src/guidellm/benchmark/outputs/html/theme.py index b0a56f79f..ac0b0dac6 100644 --- a/src/guidellm/benchmark/outputs/html/theme.py +++ b/src/guidellm/benchmark/outputs/html/theme.py @@ -221,6 +221,27 @@ def get_css(cls) -> str: color: rgba(255, 255, 255, 0.7); }} + .sample-carousel {{ + margin: 0.5rem 0; + }} + + .sample-display {{ + padding: 0.5rem; + background-color: rgba(255,255,255,0.05); + border-radius: 4px; + font-size: 0.875rem; + color: rgba(255, 255, 255, 0.7); + min-height: 4rem; + display: flex; + align-items: center; + opacity: 1; + transition: opacity 0.5s ease-in-out; + }} + + .sample-display.fade-out {{ + opacity: 0; + }} + .mean-container {{ margin-top: 1.5rem; margin-bottom: 0.5rem; @@ -248,6 +269,28 @@ def get_css(cls) -> str: grid-template-columns: repeat(3, 1fr); gap: 1.5rem; margin-top: 1rem; + align-items: start; + }} + + .chart-container {{ + display: flex; + flex-direction: column; + }} + + .chart-wrapper {{ + flex: 1; + display: flex; + align-items: flex-end; + width: 100%; + overflow: hidden; + }} + + .chart-wrapper > div {{ + width: 100% !important; + }} + + .chart-wrapper .plotly {{ + width: 100% !important; }} .flex-col {{ @@ -262,4 +305,29 @@ def get_css(cls) -> str: grid-template-columns: 1fr 1fr; gap: 0.5rem; }} + + /* Workload metrics grid */ + .metrics-grid {{ + display: grid; + grid-template-columns: repeat(2, 1fr); + grid-template-rows: repeat(2, 1fr); + gap: 2rem; + margin-top: 1.5rem; + }} + + .metric-card {{ + background-color: rgba(255, 255, 255, 0.02); + border-radius: 8px; + padding: 1.5rem; + border: 1px solid rgba(255, 255, 255, 0.1); + }} + + .metric-card h3 {{ + font-size: 0.875rem; + font-weight: 500; + letter-spacing: 0.05em; + color: rgba(255, 255, 255, 0.6); + margin-bottom: 1rem; + text-transform: uppercase; + }} """ From f8a4f9a4a42d6d9aaa4338e5fdcdf6ebc8324cb8 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Tue, 20 Jan 2026 19:52:56 -0500 Subject: [PATCH 4/5] More improvements Signed-off-by: Samuel Monson --- .../html/components/workload_details.py | 299 ++++++++++++------ .../benchmark/outputs/html/data_builder.py | 22 +- src/guidellm/benchmark/outputs/html/theme.py | 26 +- 3 files changed, 233 insertions(+), 114 deletions(-) diff --git a/src/guidellm/benchmark/outputs/html/components/workload_details.py b/src/guidellm/benchmark/outputs/html/components/workload_details.py index 101425925..28c174b94 100644 --- a/src/guidellm/benchmark/outputs/html/components/workload_details.py +++ b/src/guidellm/benchmark/outputs/html/components/workload_details.py @@ -33,13 +33,43 @@ def generate(self, data: dict[str, Any]) -> str: server_data = data.get("server", {}) rate_types = data.get("rate_types", ["N/A"]) num_benchmarks = requests_data.get("num_benchmarks", 0) + total_requests = requests_data.get("total_requests", 0) + requests_per_benchmark = requests_data.get( + "requests_per_benchmark", + {"successful": [], "incomplete": [], "errored": []}, + ) + + # Extract samples for unified JavaScript rotation + prompt_samples = prompts_data.get("samples", [])[:5] + generation_samples = generations_data.get("samples", [])[:5] + + # Prepare samples for JavaScript (escape quotes and truncate) + prompt_samples_js = ( + [ + s[:_SAMPLE_MAX_LENGTH].replace("\\", "\\\\").replace('"', '\\"') + for s in prompt_samples + ] + if prompt_samples + else [] + ) + + generation_samples_js = ( + [ + s[:_SAMPLE_MAX_LENGTH].replace("\\", "\\\\").replace('"', '\\"') + for s in generation_samples + ] + if generation_samples + else [] + ) # Build HTML sections - prompts_html = self._generate_prompts_section(prompts_data) + prompts_html = self._generate_prompts_section(prompts_data, prompt_samples_js) server_html = self._generate_server_section( - server_data, rate_types, num_benchmarks + server_data, rate_types, num_benchmarks, total_requests + ) + generations_html = self._generate_generations_section( + generations_data, generation_samples_js ) - generations_html = self._generate_generations_section(generations_data) # Build charts prompt_tokens_fig = self._create_histogram_chart( @@ -52,7 +82,7 @@ def generate(self, data: dict[str, Any]) -> str: "Output Token Distribution", "length (tokens)", ) - requests_fig = self._create_requests_over_time_chart(requests_data) + requests_fig = self._create_requests_per_benchmark_chart(requests_per_benchmark) # Convert figures to HTML prompt_chart_html = prompt_tokens_fig.to_html( @@ -62,94 +92,95 @@ def generate(self, data: dict[str, Any]) -> str: include_plotlyjs=False, div_id="output-tokens-chart" ) requests_chart_html = requests_fig.to_html( - include_plotlyjs=False, div_id="requests-over-time-chart" + include_plotlyjs=False, div_id="requests-per-benchmark-chart" ) + # Create unified JavaScript for synchronized sample rotation + unified_script = "" + if prompt_samples_js and generation_samples_js: + unified_script = f""" + + """ + return f"""

Workload Details

-
-

Prompts

- {prompts_html} -
- {prompt_chart_html} -
-
-
-

Run Configuration

- {server_html} -
- {requests_chart_html} -
-
-
-

Generations

- {generations_html} -
- {output_chart_html} -
-
+

Prompts

+

Run Configuration

+

Outputs

+ +
{prompts_html}
+
{server_html}
+
{generations_html}
+ +
{prompt_chart_html}
+
{requests_chart_html}
+
{output_chart_html}
+ {unified_script}
""" - def _generate_prompts_section(self, prompts_data: dict[str, Any]) -> str: + def _generate_prompts_section( + self, prompts_data: dict[str, Any], samples_js: list[str] + ) -> str: """Generate HTML for prompts samples. Args: prompts_data: Dict with 'samples' and 'token_distributions'. + samples_js: Prepared JavaScript-safe sample strings. Returns: HTML string for prompts section. """ - samples = prompts_data.get("samples", []) token_stats = prompts_data.get("token_distributions", {}).get("statistics", {}) mean_tokens = token_stats.get("mean", 0) if token_stats else 0 # Sample prompt header header_html = '
Sample Prompt
' - if not samples: + if not samples_js: samples_html = "

No prompt samples available

" else: - # Use only first 5 samples - samples_to_show = samples[:5] - # Prepare samples for JavaScript (escape quotes and truncate) - samples_js = [ - s[:_SAMPLE_MAX_LENGTH].replace("\\", "\\\\").replace('"', '\\"') - for s in samples_to_show - ] - samples_html = f""" - """ # Mean prompt length @@ -163,7 +194,11 @@ def _generate_prompts_section(self, prompts_data: dict[str, Any]) -> str: return f"{header_html}{samples_html}{mean_html}" def _generate_server_section( - self, _server_data: dict[str, Any], rate_types: list[str], num_benchmarks: int + self, + _server_data: dict[str, Any], + rate_types: list[str], + num_benchmarks: int, + total_requests: int, ) -> str: """Generate HTML for benchmark configuration. @@ -171,6 +206,7 @@ def _generate_server_section( _server_data: Dict with server data (unused but kept for compatibility). rate_types: List of rate type strings in execution order. num_benchmarks: Number of benchmarks. + total_requests: Total number of requests across all benchmarks. Returns: HTML string for benchmark configuration section. @@ -195,72 +231,47 @@ def _generate_server_section(
+
+
Total Request Count
+
{total_requests}
+
""" - def _generate_generations_section(self, generations_data: dict[str, Any]) -> str: + def _generate_generations_section( + self, generations_data: dict[str, Any], samples_js: list[str] + ) -> str: """Generate HTML for generation samples. Args: generations_data: Dict with 'samples' and 'token_distributions'. + samples_js: Prepared JavaScript-safe sample strings. Returns: HTML string for generations section. """ - samples = generations_data.get("samples", []) token_stats = generations_data.get("token_distributions", {}).get( "statistics", {} ) mean_tokens = token_stats.get("mean", 0) if token_stats else 0 - # Sample generated header - header_html = '
Sample Generated
' + # Sample output header + header_html = '
Sample Output
' - if not samples: - samples_html = "

No generation samples available

" + if not samples_js: + samples_html = "

No output samples available

" else: - # Use only first 5 samples - samples_to_show = samples[:5] - # Prepare samples for JavaScript (escape quotes and truncate) - samples_js = [ - s[:_SAMPLE_MAX_LENGTH].replace("\\", "\\\\").replace('"', '\\"') - for s in samples_to_show - ] - samples_html = f""" - """ - # Mean generated length + # Mean output length mean_html = f"""
-
Mean Generated Length
+
Mean Output Length
{mean_tokens:.2f} tokens
""" @@ -332,6 +343,94 @@ def _create_histogram_chart( return fig + def _create_requests_per_benchmark_chart( + self, requests_per_benchmark: dict[str, list[int]] + ) -> go.Figure: + """Create requests per benchmark stacked bar chart. + + Args: + requests_per_benchmark: Dict with 'successful', 'incomplete', + and 'errored' lists. + + Returns: + Plotly figure with stacked requests per benchmark. + """ + fig = self._create_figure( + title="Requests per Benchmark", + xaxis_title="Benchmark Index", + yaxis_title="Request Count", + autosize=True, + width=None, + ) + + successful = requests_per_benchmark.get("successful", []) + incomplete = requests_per_benchmark.get("incomplete", []) + errored = requests_per_benchmark.get("errored", []) + + if not successful and not incomplete and not errored: + return fig + + # Create x values as benchmark indices (1-based for display) + num_benchmarks = max(len(successful), len(incomplete), len(errored)) + x_values = list(range(1, num_benchmarks + 1)) + + # Add stacked bars - order matters for stacking + # Add successful requests bar using primary theme color + if successful: + fig.add_trace( + go.Bar( + x=x_values, + y=successful, + name="Successful", + marker_color=self.theme.PRIMARY, + hovertemplate=( + "Benchmark: %{x}
Successful: %{y}" + ), + ) + ) + + # Add incomplete requests bar using secondary theme color (lavender) + if incomplete: + fig.add_trace( + go.Bar( + x=x_values, + y=incomplete, + name="Incomplete", + marker_color=self.theme.SECONDARY, + hovertemplate=( + "Benchmark: %{x}
Incomplete: %{y}" + ), + ) + ) + + # Add errored requests bar using error theme color + if errored: + fig.add_trace( + go.Bar( + x=x_values, + y=errored, + name="Errored", + marker_color=self.theme.ERROR, + hovertemplate=("Benchmark: %{x}
Errored: %{y}"), + ) + ) + + # Enable stacked bar mode with legend at bottom + fig.update_layout( + barmode="stack", + showlegend=True, + legend={ + "orientation": "h", + "yanchor": "top", + "y": -0.2, + "xanchor": "center", + "x": 0.5, + }, + margin={"b": 80}, + ) + + return fig + def _create_requests_over_time_chart( self, requests_data: dict[str, Any] ) -> go.Figure: diff --git a/src/guidellm/benchmark/outputs/html/data_builder.py b/src/guidellm/benchmark/outputs/html/data_builder.py index 86460821f..e59ce2993 100644 --- a/src/guidellm/benchmark/outputs/html/data_builder.py +++ b/src/guidellm/benchmark/outputs/html/data_builder.py @@ -239,15 +239,18 @@ def build_workload_details( for i in sample_indices ] + # Token counts for successful requests only prompt_tokens = [ - float(req.prompt_tokens) if req.prompt_tokens is not None else -1 + float(req.prompt_tokens) for bm in benchmarks for req in bm.requests.successful + if req.prompt_tokens is not None ] output_tokens = [ - float(req.output_tokens) if req.output_tokens is not None else -1 + float(req.output_tokens) for bm in benchmarks for req in bm.requests.successful + if req.output_tokens is not None ] prompt_token_buckets, _prompt_bucket_width = Bucket.from_data(prompt_tokens, 1) @@ -269,6 +272,19 @@ def build_workload_details( all_req_times, None, number_of_buckets ) + # Calculate requests per benchmark (successful, incomplete, errored) + requests_per_benchmark = { + "successful": [len(bm.requests.successful) for bm in benchmarks], + "incomplete": [len(bm.requests.incomplete) for bm in benchmarks], + "errored": [len(bm.requests.errored) for bm in benchmarks], + } + total_requests = sum( + len(bm.requests.successful) + + len(bm.requests.incomplete) + + len(bm.requests.errored) + for bm in benchmarks + ) + return { "prompts": { "samples": sample_prompts, @@ -296,6 +312,8 @@ def build_workload_details( "bucket_width": bucket_width, }, "num_benchmarks": number_of_buckets, + "requests_per_benchmark": requests_per_benchmark, + "total_requests": total_requests, }, "rate_types": rate_types, "server": {"target": target}, diff --git a/src/guidellm/benchmark/outputs/html/theme.py b/src/guidellm/benchmark/outputs/html/theme.py index ac0b0dac6..d58339505 100644 --- a/src/guidellm/benchmark/outputs/html/theme.py +++ b/src/guidellm/benchmark/outputs/html/theme.py @@ -267,29 +267,31 @@ def get_css(cls) -> str: .grid-3col {{ display: grid; grid-template-columns: repeat(3, 1fr); + grid-template-rows: auto auto 1fr; gap: 1.5rem; + column-gap: 1.5rem; + row-gap: 1rem; margin-top: 1rem; align-items: start; }} - .chart-container {{ - display: flex; - flex-direction: column; + .grid-3col h3 {{ + align-self: start; }} - .chart-wrapper {{ - flex: 1; - display: flex; - align-items: flex-end; - width: 100%; - overflow: hidden; + .grid-3col .content-section {{ + align-self: start; }} - .chart-wrapper > div {{ - width: 100% !important; + .grid-3col .chart-section {{ + align-self: stretch; + display: flex; + flex-direction: column; + justify-content: flex-end; + min-height: 300px; }} - .chart-wrapper .plotly {{ + .grid-3col .chart-section > div {{ width: 100% !important; }} From f63db81a120ffdb17b9ad98f093b455812d13242 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Tue, 20 Jan 2026 19:59:32 -0500 Subject: [PATCH 5/5] Round all plot edges Signed-off-by: Samuel Monson --- src/guidellm/benchmark/outputs/html/theme.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/guidellm/benchmark/outputs/html/theme.py b/src/guidellm/benchmark/outputs/html/theme.py index d58339505..91d0910dc 100644 --- a/src/guidellm/benchmark/outputs/html/theme.py +++ b/src/guidellm/benchmark/outputs/html/theme.py @@ -72,6 +72,8 @@ def get_base_layout(cls) -> dict[str, Any]: "bgcolor": cls.SURFACE, "font": {"family": cls.FONT_FAMILY, "color": cls.TEXT_PRIMARY}, }, + "shapes": [], + "margin": {"pad": 0}, } @classmethod @@ -332,4 +334,19 @@ def get_css(cls) -> str: margin-bottom: 1rem; text-transform: uppercase; }} + + /* Plotly chart styling */ + .plotly {{ + border-radius: 8px; + overflow: hidden; + }} + + .js-plotly-plot {{ + border-radius: 8px; + overflow: hidden; + }} + + .plotly .main-svg {{ + border-radius: 8px; + }} """