Skip to content

Option to re-display a benchmark file #185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Jul 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
58bca2c
Allow result file to be re-displayed
jaredoconnell Jun 12, 2025
90328e6
Added test for JSON
jaredoconnell Jun 12, 2025
07d8150
Added yaml test
jaredoconnell Jun 13, 2025
1ed37cd
Fix warning
jaredoconnell Jun 13, 2025
379c64f
Add uncommitted file
jaredoconnell Jun 13, 2025
2a8db0d
Fix linter errors
jaredoconnell Jun 13, 2025
c43573a
Use fixed width for CLI tests
jaredoconnell Jun 13, 2025
18d5897
Fix linter error and exclude test assets from linting
jaredoconnell Jun 13, 2025
2063d43
Added option to regenerate test artifact
jaredoconnell Jun 13, 2025
27e8391
Fix linter errors
jaredoconnell Jun 13, 2025
554a182
Merge branch 'main' into redisplay-results
sjmonson Jun 20, 2025
7f2dd40
Address review comments
jaredoconnell Jun 23, 2025
fff5c87
Allow reexporting reimported benchmarks
jaredoconnell Jun 26, 2025
9f9ddc9
Add test for reexporting and fix other tests
jaredoconnell Jun 26, 2025
bf3d175
Merge branch 'main' into redisplay-results
jaredoconnell Jun 26, 2025
83b0c77
Switch to internal dependency, and fix linter errors
jaredoconnell Jun 26, 2025
c0baf33
Update documentation to reflect command change
jaredoconnell Jun 26, 2025
9d41aa6
Merge branch 'main' into redisplay-results
jaredoconnell Jun 27, 2025
686fcee
Merge branch 'main' into redisplay-results
jaredoconnell Jun 27, 2025
534fbe4
Fix linter errors
jaredoconnell Jun 27, 2025
bece157
Revert docs changes
jaredoconnell Jul 8, 2025
be1730d
Update command to use hyphen
jaredoconnell Jul 8, 2025
2bd7f5e
Merge branch 'main' into redisplay-results
markurtz Jul 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/


# MacOS files
Expand Down
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ repos:
rev: v4.6.0
hooks:
- id: trailing-whitespace
exclude: ^tests/?.*/assets/.+
- id: end-of-file-fixer
exclude: ^tests/?.*/assets/.+
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.7
hooks:
Expand Down
54 changes: 50 additions & 4 deletions src/guidellm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@
from pydantic import ValidationError

from guidellm.backend import BackendType
from guidellm.benchmark import ProfileType
from guidellm.benchmark import (
ProfileType,
reimport_benchmarks_report,
)
from guidellm.benchmark.entrypoints import benchmark_with_scenario
from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
from guidellm.config import print_config
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
from guidellm.scheduler import StrategyType
from guidellm.utils import DefaultGroupHandler
from guidellm.utils import cli as cli_tools

STRATEGY_PROFILE_CHOICES = set(
Expand All @@ -25,7 +29,17 @@ def cli():
pass


@cli.command(
@cli.group(
help="Commands to run a new benchmark or load a prior one.",
cls=DefaultGroupHandler,
default="run",
)
def benchmark():
pass


@benchmark.command(
"run",
help="Run a benchmark against a generative model using the specified arguments.",
context_settings={"auto_envvar_prefix": "GUIDELLM"},
)
Expand Down Expand Up @@ -230,7 +244,7 @@ def cli():
type=int,
help="The random seed to use for benchmarking to ensure reproducibility.",
)
def benchmark(
def run(
scenario,
target,
backend_type,
Expand Down Expand Up @@ -306,6 +320,37 @@ def benchmark(
)


@benchmark.command(
"from-file",
help="Load a saved benchmark report."
)
@click.argument(
"path",
type=click.Path(file_okay=True, dir_okay=False, exists=True),
default=Path.cwd() / "benchmarks.json",
)
@click.option(
"--output-path",
type=click.Path(file_okay=True, dir_okay=True, exists=False),
default=None,
is_flag=False,
flag_value=Path.cwd() / "benchmarks_reexported.json",
help=(
"Allows re-exporting the benchmarks to another format. "
"The path to save the output to. If it is a directory, "
"it will save benchmarks.json under it. "
"Otherwise, json, yaml, or csv files are supported for output types "
"which will be read from the extension for the file path. "
"This input is optional. If the output path flag is not provided, "
"the benchmarks will not be reexported. If the flag is present but "
"no value is specified, it will default to the current directory "
"with the file name `benchmarks_reexported.json`."
),
)
def from_file(path, output_path):
reimport_benchmarks_report(path, output_path)


def decode_escaped_str(_ctx, _param, value):
"""
Click auto adds characters. For example, when using --pad-char "\n",
Expand All @@ -321,10 +366,11 @@ def decode_escaped_str(_ctx, _param, value):


@cli.command(
short_help="Prints environment variable settings.",
help=(
"Print out the available configuration settings that can be set "
"through environment variables."
)
),
)
def config():
print_config()
Expand Down
3 changes: 2 additions & 1 deletion src/guidellm/benchmark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
StatusBreakdown,
)
from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker
from .entrypoints import benchmark_generative_text
from .entrypoints import benchmark_generative_text, reimport_benchmarks_report
from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport
from .profile import (
AsyncProfile,
Expand Down Expand Up @@ -63,4 +63,5 @@
"ThroughputProfile",
"benchmark_generative_text",
"create_profile",
"reimport_benchmarks_report",
]
24 changes: 18 additions & 6 deletions src/guidellm/benchmark/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,8 @@ async def benchmark_generative_text(
)

if output_console:
orig_enabled = console.enabled
console.enabled = True
console.benchmarks = report.benchmarks
console.print_benchmarks_metadata()
console.print_benchmarks_info()
console.print_benchmarks_stats()
console.enabled = orig_enabled
console.print_full_report()

if output_path:
console.print_line("\nSaving benchmarks report...")
Expand All @@ -151,3 +146,20 @@ async def benchmark_generative_text(
console.print_line("\nBenchmarking complete.")

return report, saved_path


def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None:
"""
The command-line entry point for re-importing and displaying an
existing benchmarks report. Can also specify
Assumes the file provided exists.
"""
console = GenerativeBenchmarksConsole(enabled=True)
report = GenerativeBenchmarksReport.load_file(file)
console.benchmarks = report.benchmarks
console.print_full_report()

if output_path:
console.print_line("\nSaving benchmarks report...")
saved_path = report.save_file(output_path)
console.print_line(f"Benchmarks report saved to {saved_path}")
22 changes: 21 additions & 1 deletion src/guidellm/benchmark/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,10 @@ def _file_setup(
if path_suffix in [".csv"]:
return path, "csv"

raise ValueError(f"Unsupported file extension: {path_suffix} for {path}.")
raise ValueError(
f"Unsupported file extension: {path_suffix} for {path}; "
"expected json, yaml, or csv."
)

@staticmethod
def _benchmark_desc_headers_and_values(
Expand Down Expand Up @@ -944,3 +947,20 @@ def print_benchmarks_stats(self):
title="Benchmarks Stats",
sections=sections,
)

def print_full_report(self):
"""
Print out the benchmark statistics to the console.
Temporarily enables the console if it's disabled.

Format:
- Metadata
- Info
- Stats
"""
orig_enabled = self.enabled
self.enabled = True
self.print_benchmarks_metadata()
self.print_benchmarks_info()
self.print_benchmarks_stats()
self.enabled = orig_enabled
2 changes: 2 additions & 0 deletions src/guidellm/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .colors import Colors
from .default_group import DefaultGroupHandler
from .hf_datasets import (
SUPPORTED_TYPES,
save_dataset_to_file,
Expand All @@ -20,6 +21,7 @@
__all__ = [
"SUPPORTED_TYPES",
"Colors",
"DefaultGroupHandler",
"EndlessTextCreator",
"IntegerRangeSampler",
"check_load_processor",
Expand Down
104 changes: 104 additions & 0 deletions src/guidellm/utils/default_group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
File uses code adapted from code with the following license:

Copyright (c) 2015-2023, Heungsub Lee
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.

Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""

__all__ = ["DefaultGroupHandler"]

import collections.abc as cabc

import click


class DefaultGroupHandler(click.Group):
"""
Allows the migration to a new sub-command by allowing the group to run
one of its sub-commands as the no-args default command.
"""

def __init__(self, *args, **kwargs):
# To resolve as the default command.
if not kwargs.get("ignore_unknown_options", True):
raise ValueError("Default group accepts unknown options")
self.ignore_unknown_options = True
self.default_cmd_name = kwargs.pop("default", None)
self.default_if_no_args = kwargs.pop("default_if_no_args", False)
super().__init__(*args, **kwargs)

def parse_args(self, ctx, args):
if not args and self.default_if_no_args:
args.insert(0, self.default_cmd_name)
return super().parse_args(ctx, args)

def get_command(self, ctx, cmd_name):
if cmd_name not in self.commands:
# If it doesn't match an existing command, use the default command name.
ctx.arg0 = cmd_name
cmd_name = self.default_cmd_name
return super().get_command(ctx, cmd_name)

def resolve_command(self, ctx, args):
cmd_name, cmd, args = super().resolve_command(ctx, args)
if hasattr(ctx, "arg0"):
args.insert(0, ctx.arg0)
cmd_name = cmd.name
return cmd_name, cmd, args

def format_commands(self, ctx, formatter):
"""
Used to wrap the default formatter to clarify which command is the default.
"""
formatter = DefaultCommandFormatter(self, formatter, mark=" (default)")
return super().format_commands(ctx, formatter)


class DefaultCommandFormatter:
"""
Wraps a formatter to edit the line for the default command to mark it
with the specified mark string.
"""

def __init__(self, group, formatter, mark="*"):
self.group = group
self.formatter = formatter
self.mark = mark
super().__init__()

def __getattr__(self, attr):
return getattr(self.formatter, attr)

def write_dl(self, rows: cabc.Sequence[tuple[str, str]], *args, **kwargs):
rows_: list[tuple[str, str]] = []
for cmd_name, help_msg in rows:
if cmd_name == self.group.default_cmd_name:
rows_.insert(0, (cmd_name + self.mark, help_msg))
else:
rows_.append((cmd_name, help_msg))
return self.formatter.write_dl(rows_, *args, **kwargs)
Empty file.
1 change: 1 addition & 0 deletions tests/unit/entrypoints/assets/benchmarks_stripped.json

Large diffs are not rendered by default.

Loading