diff --git a/.gitignore b/.gitignore index ebbf9b09..6a6ee9bd 100644 --- a/.gitignore +++ b/.gitignore @@ -168,7 +168,7 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ # MacOS files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 61b765a2..8d6bbf2e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,9 @@ repos: rev: v4.6.0 hooks: - id: trailing-whitespace + exclude: ^tests/?.*/assets/.+ - id: end-of-file-fixer + exclude: ^tests/?.*/assets/.+ - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.11.7 hooks: diff --git a/README.md b/README.md index 1e489bb5..93f09f41 100644 --- a/README.md +++ b/README.md @@ -68,12 +68,12 @@ For information on starting other supported inference servers or platforms, see #### 2. Run a GuideLLM Benchmark -To run a GuideLLM benchmark, use the `guidellm benchmark` command with the target set to an OpenAI-compatible server. For this example, the target is set to 'http://localhost:8000', assuming that vLLM is active and running on the same server. Otherwise, update it to the appropriate location. By default, GuideLLM automatically determines the model available on the server and uses it. To target a different model, pass the desired name with the `--model` argument. Additionally, the `--rate-type` is set to `sweep`, which automatically runs a range of benchmarks to determine the minimum and maximum rates that the server and model can support. Each benchmark run under the sweep will run for 30 seconds, as set by the `--max-seconds` argument. Finally, `--data` is set to a synthetic dataset with 256 prompt tokens and 128 output tokens per request. For more arguments, supported scenarios, and configurations, jump to the [Configurations Section](#configurations) or run `guidellm benchmark --help`. +To run a GuideLLM benchmark, use the `guidellm benchmark run` command with the target set to an OpenAI-compatible server. For this example, the target is set to 'http://localhost:8000', assuming that vLLM is active and running on the same server. Otherwise, update it to the appropriate location. By default, GuideLLM automatically determines the model available on the server and uses it. To target a different model, pass the desired name with the `--model` argument. Additionally, the `--rate-type` is set to `sweep`, which automatically runs a range of benchmarks to determine the minimum and maximum rates that the server and model can support. Each benchmark run under the sweep will run for 30 seconds, as set by the `--max-seconds` argument. Finally, `--data` is set to a synthetic dataset with 256 prompt tokens and 128 output tokens per request. For more arguments, supported scenarios, and configurations, jump to the [Configurations Section](#configurations) or run `guidellm benchmark --help`. Now, to start benchmarking, run the following command: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -110,11 +110,11 @@ For further details on determining the optimal request rate and SLOs, refer to t ### Configurations -GuideLLM offers a range of configurations through both the benchmark CLI command and environment variables, which provide default values and more granular controls. The most common configurations are listed below. A complete list is easily accessible, though, by running `guidellm benchmark --help` or `guidellm config` respectively. +GuideLLM offers a range of configurations through both the benchmark CLI command and environment variables, which provide default values and more granular controls. The most common configurations are listed below. A complete list is easily accessible, though, by running `guidellm benchmark run --help` or `guidellm config` respectively. #### Benchmark CLI -The `guidellm benchmark` command is used to run benchmarks against a generative AI backend/server. The command accepts a variety of arguments to customize the benchmark run. The most common arguments include: +The `guidellm benchmark run` command is used to run benchmarks against a generative AI backend/server. The command accepts a variety of arguments to customize the benchmark run. The most common arguments include: - `--target`: Specifies the target path for the backend to run benchmarks against. For example, `http://localhost:8000`. This is required to define the server endpoint. diff --git a/docs/datasets.md b/docs/datasets.md index a5d0aa4e..a626b58c 100644 --- a/docs/datasets.md +++ b/docs/datasets.md @@ -20,7 +20,7 @@ The following arguments can be used to configure datasets and their processing: ### Example Usage ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -49,7 +49,7 @@ For different use cases, here are the recommended dataset profiles to pass as ar #### Example Commands ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -59,7 +59,7 @@ guidellm benchmark \ Or using a JSON string: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -90,7 +90,7 @@ GuideLLM supports datasets from the Hugging Face Hub or local directories that f #### Example Commands ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -100,7 +100,7 @@ guidellm benchmark \ Or using a local dataset: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -152,7 +152,7 @@ GuideLLM supports various file formats for datasets, including text, CSV, JSON, #### Example Commands ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ diff --git a/docs/outputs.md b/docs/outputs.md index ea3d9a6f..29a16ef5 100644 --- a/docs/outputs.md +++ b/docs/outputs.md @@ -5,7 +5,7 @@ GuideLLM provides flexible options for outputting benchmark results, catering to For all of the output formats, `--output-extras` can be used to include additional information. This could include tags, metadata, hardware details, and other relevant information that can be useful for analysis. This must be supplied as a JSON encoded string. For example: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -26,10 +26,10 @@ By default, GuideLLM displays benchmark results and progress directly in the con ### Disabling Console Output -To disable the progress outputs to the console, use the `disable-progress` flag when running the `guidellm benchmark` command. For example: +To disable the progress outputs to the console, use the `disable-progress` flag when running the `guidellm benchmark run` command. For example: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -37,10 +37,10 @@ guidellm benchmark \ --disable-progress ``` -To disable console output, use the `--disable-console-outputs` flag when running the `guidellm benchmark` command. For example: +To disable console output, use the `--disable-console-outputs` flag when running the `guidellm benchmark run` command. For example: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -50,10 +50,10 @@ guidellm benchmark \ ### Enabling Extra Information -GuideLLM includes the option to display extra information during the benchmark runs to monitor the overheads and performance of the system. This can be enabled by using the `--display-scheduler-stats` flag when running the `guidellm benchmark` command. For example: +GuideLLM includes the option to display extra information during the benchmark runs to monitor the overheads and performance of the system. This can be enabled by using the `--display-scheduler-stats` flag when running the `guidellm benchmark run` command. For example: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -81,7 +81,7 @@ GuideLLM supports saving benchmark results to files in various formats, includin Example command to save results in YAML format: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 9e8a12fb..4deff3bf 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -7,12 +7,16 @@ from pydantic import ValidationError from guidellm.backend import BackendType -from guidellm.benchmark import ProfileType +from guidellm.benchmark import ( + ProfileType, + reimport_benchmarks_report, +) from guidellm.benchmark.entrypoints import benchmark_with_scenario from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios from guidellm.config import print_config from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset from guidellm.scheduler import StrategyType +from guidellm.utils import DefaultGroupHandler from guidellm.utils import cli as cli_tools STRATEGY_PROFILE_CHOICES = set( @@ -25,7 +29,17 @@ def cli(): pass -@cli.command( +@cli.group( + help="Commands to run a new benchmark or load a prior one.", + cls=DefaultGroupHandler, + default="run", +) +def benchmark(): + pass + + +@benchmark.command( + "run", help="Run a benchmark against a generative model using the specified arguments.", context_settings={"auto_envvar_prefix": "GUIDELLM"}, ) @@ -230,7 +244,7 @@ def cli(): type=int, help="The random seed to use for benchmarking to ensure reproducibility.", ) -def benchmark( +def run( scenario, target, backend_type, @@ -306,6 +320,34 @@ def benchmark( ) +@benchmark.command(help="Load a saved benchmark report.") +@click.argument( + "path", + type=click.Path(file_okay=True, dir_okay=False, exists=True), + default=Path.cwd() / "benchmarks.json", +) +@click.option( + "--output-path", + type=click.Path(file_okay=True, dir_okay=True, exists=False), + default=None, + is_flag=False, + flag_value=Path.cwd() / "benchmarks_reexported.json", + help=( + "Allows re-exporting the benchmarks to another format. " + "The path to save the output to. If it is a directory, " + "it will save benchmarks.json under it. " + "Otherwise, json, yaml, or csv files are supported for output types " + "which will be read from the extension for the file path. " + "This input is optional. If the output path flag is not provided, " + "the benchmarks will not be reexported. If the flag is present but " + "no value is specified, it will default to the current directory " + "with the file name `benchmarks_reexported.json`." + ), +) +def from_file(path, output_path): + reimport_benchmarks_report(path, output_path) + + def decode_escaped_str(_ctx, _param, value): """ Click auto adds characters. For example, when using --pad-char "\n", @@ -321,10 +363,11 @@ def decode_escaped_str(_ctx, _param, value): @cli.command( + short_help="Prints environment variable settings.", help=( "Print out the available configuration settings that can be set " "through environment variables." - ) + ), ) def config(): print_config() diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py index f5130711..a4676c7e 100644 --- a/src/guidellm/benchmark/__init__.py +++ b/src/guidellm/benchmark/__init__.py @@ -12,7 +12,7 @@ StatusBreakdown, ) from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker -from .entrypoints import benchmark_generative_text +from .entrypoints import benchmark_generative_text, reimport_benchmarks_report from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport from .profile import ( AsyncProfile, @@ -63,4 +63,5 @@ "ThroughputProfile", "benchmark_generative_text", "create_profile", + "reimport_benchmarks_report", ] diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index ce43fca3..2ef85c3e 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -133,13 +133,8 @@ async def benchmark_generative_text( ) if output_console: - orig_enabled = console.enabled - console.enabled = True console.benchmarks = report.benchmarks - console.print_benchmarks_metadata() - console.print_benchmarks_info() - console.print_benchmarks_stats() - console.enabled = orig_enabled + console.print_full_report() if output_path: console.print_line("\nSaving benchmarks report...") @@ -151,3 +146,20 @@ async def benchmark_generative_text( console.print_line("\nBenchmarking complete.") return report, saved_path + + +def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None: + """ + The command-line entry point for re-importing and displaying an + existing benchmarks report. Can also specify + Assumes the file provided exists. + """ + console = GenerativeBenchmarksConsole(enabled=True) + report = GenerativeBenchmarksReport.load_file(file) + console.benchmarks = report.benchmarks + console.print_full_report() + + if output_path: + console.print_line("\nSaving benchmarks report...") + saved_path = report.save_file(output_path) + console.print_line(f"Benchmarks report saved to {saved_path}") diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py index 4847160d..5e4c4c67 100644 --- a/src/guidellm/benchmark/output.py +++ b/src/guidellm/benchmark/output.py @@ -242,7 +242,10 @@ def _file_setup( if path_suffix in [".csv"]: return path, "csv" - raise ValueError(f"Unsupported file extension: {path_suffix} for {path}.") + raise ValueError( + f"Unsupported file extension: {path_suffix} for {path}; " + "expected json, yaml, or csv." + ) @staticmethod def _benchmark_desc_headers_and_values( @@ -944,3 +947,20 @@ def print_benchmarks_stats(self): title="Benchmarks Stats", sections=sections, ) + + def print_full_report(self): + """ + Print out the benchmark statistics to the console. + Temporarily enables the console if it's disabled. + + Format: + - Metadata + - Info + - Stats + """ + orig_enabled = self.enabled + self.enabled = True + self.print_benchmarks_metadata() + self.print_benchmarks_info() + self.print_benchmarks_stats() + self.enabled = orig_enabled diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 399c021d..fb9262c3 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -1,4 +1,5 @@ from .colors import Colors +from .default_group import DefaultGroupHandler from .hf_datasets import ( SUPPORTED_TYPES, save_dataset_to_file, @@ -20,6 +21,7 @@ __all__ = [ "SUPPORTED_TYPES", "Colors", + "DefaultGroupHandler", "EndlessTextCreator", "IntegerRangeSampler", "check_load_processor", diff --git a/src/guidellm/utils/default_group.py b/src/guidellm/utils/default_group.py new file mode 100644 index 00000000..b3f0f03b --- /dev/null +++ b/src/guidellm/utils/default_group.py @@ -0,0 +1,104 @@ +""" +File uses code adapted from code with the following license: + +Copyright (c) 2015-2023, Heungsub Lee +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + + Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +__all__ = ["DefaultGroupHandler"] + +import collections.abc as cabc + +import click + + +class DefaultGroupHandler(click.Group): + """ + Allows the migration to a new sub-command by allowing the group to run + one of its sub-commands as the no-args default command. + """ + + def __init__(self, *args, **kwargs): + # To resolve as the default command. + if not kwargs.get("ignore_unknown_options", True): + raise ValueError("Default group accepts unknown options") + self.ignore_unknown_options = True + self.default_cmd_name = kwargs.pop("default", None) + self.default_if_no_args = kwargs.pop("default_if_no_args", False) + super().__init__(*args, **kwargs) + + def parse_args(self, ctx, args): + if not args and self.default_if_no_args: + args.insert(0, self.default_cmd_name) + return super().parse_args(ctx, args) + + def get_command(self, ctx, cmd_name): + if cmd_name not in self.commands: + # If it doesn't match an existing command, use the default command name. + ctx.arg0 = cmd_name + cmd_name = self.default_cmd_name + return super().get_command(ctx, cmd_name) + + def resolve_command(self, ctx, args): + cmd_name, cmd, args = super().resolve_command(ctx, args) + if hasattr(ctx, "arg0"): + args.insert(0, ctx.arg0) + cmd_name = cmd.name + return cmd_name, cmd, args + + def format_commands(self, ctx, formatter): + """ + Used to wrap the default formatter to clarify which command is the default. + """ + formatter = DefaultCommandFormatter(self, formatter, mark=" (default)") + return super().format_commands(ctx, formatter) + + +class DefaultCommandFormatter: + """ + Wraps a formatter to edit the line for the default command to mark it + with the specified mark string. + """ + + def __init__(self, group, formatter, mark="*"): + self.group = group + self.formatter = formatter + self.mark = mark + super().__init__() + + def __getattr__(self, attr): + return getattr(self.formatter, attr) + + def write_dl(self, rows: cabc.Sequence[tuple[str, str]], *args, **kwargs): + rows_: list[tuple[str, str]] = [] + for cmd_name, help_msg in rows: + if cmd_name == self.group.default_cmd_name: + rows_.insert(0, (cmd_name + self.mark, help_msg)) + else: + rows_.append((cmd_name, help_msg)) + return self.formatter.write_dl(rows_, *args, **kwargs) diff --git a/tests/unit/entrypoints/__init__.py b/tests/unit/entrypoints/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped.json b/tests/unit/entrypoints/assets/benchmarks_stripped.json new file mode 100644 index 00000000..a95d2880 --- /dev/null +++ b/tests/unit/entrypoints/assets/benchmarks_stripped.json @@ -0,0 +1 @@ +{"benchmarks": [{"type_": "generative_benchmark", "id_": "97ece514-8717-412f-9dba-2b42bcd9866f", "run_id": "93e36b31-b454-471d-ba62-6b2671585485", "args": {"profile": {"type_": "sweep", "completed_strategies": 10, "measured_rates": [1.5481806532737452], "measured_concurrencies": [0.9977627456483604], "max_concurrency": null, "strategy_type": "constant", "rate": -1.0, "initial_burst": true, "random_seed": 42, "sweep_size": 10, "rate_type": "constant", "strategy_types": ["synchronous", "throughput", "constant", "constant", "constant", "constant", "constant", "constant", "constant", "constant"]}, "strategy_index": 0, "strategy": {"type_": "synchronous"}, "max_number": null, "max_duration": 30.0, "warmup_number": null, "warmup_duration": null, "cooldown_number": null, "cooldown_duration": null}, "run_stats": {"start_time": 1749157168.054225, "end_time": 1749157198.213826, "requests_made": {"successful": 1, "errored": 0, "incomplete": 0, "total": 1}, "queued_time_avg": 0.631589580089488, "scheduled_time_delay_avg": 3.784260851271609e-06, "scheduled_time_sleep_avg": 0.0, "worker_start_delay_avg": 2.8021792148021943e-05, "worker_time_avg": 0.6373953819274902, "worker_start_time_targeted_delay_avg": 0.6319031715393066, "request_start_time_delay_avg": 0.316034068452551, "request_start_time_targeted_delay_avg": 0.6319856542222043, "request_time_delay_avg": 0.00029866238857837433, "request_time_avg": 0.6370967195389119}, "worker": {"type_": "generative_requests_worker", "backend_type": "openai_http", "backend_target": "example_target", "backend_model": "example_model", "backend_info": {"max_output_tokens": 16384, "timeout": 300, "http2": true, "authorization": false, "organization": null, "project": null, "text_completions_path": "/v1/completions", "chat_completions_path": "/v1/chat/completions"}}, "request_loader": {"type_": "generative_request_loader", "data": "prompt_tokens=256,output_tokens=128", "data_args": null, "processor": "example_processor", "processor_args": null}, "extras": {}, "metrics": {"requests_per_second": {"successful": {"mean": 1.5481806532737452, "median": 1.5530116578512305, "mode": 1.555484186315253, "variance": 0.0003352629331303757, "std_dev": 0.01831018659463567, "min": 1.4509899157628907, "max": 1.5597664461806156, "count": 45, "total_sum": 69.6707872953874, "percentiles": {"p001": 1.4509899157628907, "p01": 1.4509899157628907, "p05": 1.5190957942495127, "p10": 1.5377883923356668, "p25": 1.5483918601985445, "p75": 1.5567531615313124, "p90": 1.5583715343236735, "p95": 1.5590938878953722, "p99": 1.5597664461806156, "p999": 1.5597664461806156}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 1.5668128271815418, "median": 1.5530312090734288, "mode": 1.555484186315253, "variance": 0.036536424510388923, "std_dev": 0.19114503527528232, "min": 1.4509899157628907, "max": 3.509921881864626, "count": 46, "total_sum": 73.18070917725203, "percentiles": {"p001": 1.4509899157628907, "p01": 1.4509899157628907, "p05": 1.5190957942495127, "p10": 1.5377883923356668, "p25": 1.5483918601985445, "p75": 1.5567531615313124, "p90": 1.5583715343236735, "p95": 1.5591048992639953, "p99": 1.5597664461806156, "p999": 3.509921881864626}, "cumulative_distribution_function": null}}, "request_concurrency": {"successful": {"mean": 0.9977627456483604, "median": 1.0, "mode": 1.0, "variance": 0.002232249044605607, "std_dev": 0.047246682895263736, "min": 0.0, "max": 1.0, "count": 2, "total_sum": 1.0, "percentiles": {"p001": 0.0, "p01": 1.0, "p05": 1.0, "p10": 1.0, "p25": 1.0, "p75": 1.0, "p90": 1.0, "p95": 1.0, "p99": 1.0, "p999": 1.0}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 1.0, "median": 1.0, "mode": 1.0, "variance": 0.0, "std_dev": 0.0, "min": 1.0, "max": 1.0, "count": 1, "total_sum": 1.0, "percentiles": {"p001": 1.0, "p01": 1.0, "p05": 1.0, "p10": 1.0, "p25": 1.0, "p75": 1.0, "p90": 1.0, "p95": 1.0, "p99": 1.0, "p999": 1.0}, "cumulative_distribution_function": null}, "total": {"mean": 0.9977433642674269, "median": 1.0, "mode": 1.0, "variance": 0.002251543327743578, "std_dev": 0.047450430216633206, "min": 0.0, "max": 1.0, "count": 2, "total_sum": 1.0, "percentiles": {"p001": 0.0, "p01": 1.0, "p05": 1.0, "p10": 1.0, "p25": 1.0, "p75": 1.0, "p90": 1.0, "p95": 1.0, "p99": 1.0, "p999": 1.0}, "cumulative_distribution_function": null}}, "request_latency": {"successful": {"mean": 0.6444743664368339, "median": 0.6424565315246582, "mode": 0.6395885944366455, "variance": 6.414585873782315e-05, "std_dev": 0.008009110982988258, "min": 0.6395885944366455, "max": 0.6891846656799316, "count": 46, "total_sum": 29.64582085609436, "percentiles": {"p001": 0.6395885944366455, "p01": 0.6395885944366455, "p05": 0.6399857997894287, "p10": 0.6403069496154785, "p25": 0.6409540176391602, "p75": 0.644390344619751, "p90": 0.6488735675811768, "p95": 0.656728982925415, "p99": 0.6891846656799316, "p999": 0.6891846656799316}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.2836878299713135, "median": 0.2836878299713135, "mode": 0.2836878299713135, "variance": 0.0, "std_dev": 0.0, "min": 0.2836878299713135, "max": 0.2836878299713135, "count": 1, "total_sum": 0.2836878299713135, "percentiles": {"p001": 0.2836878299713135, "p01": 0.2836878299713135, "p05": 0.2836878299713135, "p10": 0.2836878299713135, "p25": 0.2836878299713135, "p75": 0.2836878299713135, "p90": 0.2836878299713135, "p95": 0.2836878299713135, "p99": 0.2836878299713135, "p999": 0.2836878299713135}, "cumulative_distribution_function": null}, "total": {"mean": 0.6367980571503334, "median": 0.642310380935669, "mode": 0.2836878299713135, "variance": 0.0027733643692853522, "std_dev": 0.05266274175624881, "min": 0.2836878299713135, "max": 0.6891846656799316, "count": 47, "total_sum": 29.929508686065674, "percentiles": {"p001": 0.2836878299713135, "p01": 0.2836878299713135, "p05": 0.6398613452911377, "p10": 0.6402454376220703, "p25": 0.640899658203125, "p75": 0.644390344619751, "p90": 0.6488735675811768, "p95": 0.656728982925415, "p99": 0.6891846656799316, "p999": 0.6891846656799316}, "cumulative_distribution_function": null}}, "prompt_token_count": {"successful": {"mean": 257.1086956521739, "median": 257.0, "mode": 257.0, "variance": 0.14035916824196598, "std_dev": 0.37464538999161057, "min": 257.0, "max": 259.0, "count": 46, "total_sum": 11827.0, "percentiles": {"p001": 257.0, "p01": 257.0, "p05": 257.0, "p10": 257.0, "p25": 257.0, "p75": 257.0, "p90": 257.0, "p95": 258.0, "p99": 259.0, "p999": 259.0}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 256.0, "median": 256.0, "mode": 256.0, "variance": 0.0, "std_dev": 0.0, "min": 256.0, "max": 256.0, "count": 1, "total_sum": 256.0, "percentiles": {"p001": 256.0, "p01": 256.0, "p05": 256.0, "p10": 256.0, "p25": 256.0, "p75": 256.0, "p90": 256.0, "p95": 256.0, "p99": 256.0, "p999": 256.0}, "cumulative_distribution_function": null}, "total": {"mean": 257.0851063829787, "median": 257.0, "mode": 256.0, "variance": 0.16296966953372566, "std_dev": 0.40369502044702715, "min": 256.0, "max": 259.0, "count": 47, "total_sum": 12083.0, "percentiles": {"p001": 256.0, "p01": 256.0, "p05": 257.0, "p10": 257.0, "p25": 257.0, "p75": 257.0, "p90": 257.0, "p95": 258.0, "p99": 259.0, "p999": 259.0}, "cumulative_distribution_function": null}}, "output_token_count": {"successful": {"mean": 127.99999999999999, "median": 128.0, "mode": 128.0, "variance": 2.01948391736579e-28, "std_dev": 1.4210854715202002e-14, "min": 128.0, "max": 128.0, "count": 46, "total_sum": 5888.0, "percentiles": {"p001": 128.0, "p01": 128.0, "p05": 128.0, "p10": 128.0, "p25": 128.0, "p75": 128.0, "p90": 128.0, "p95": 128.0, "p99": 128.0, "p999": 128.0}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 126.44680851063832, "median": 128.0, "mode": 55.0, "variance": 110.97057492077867, "std_dev": 10.534257207832866, "min": 55.0, "max": 128.0, "count": 47, "total_sum": 5943.0, "percentiles": {"p001": 55.0, "p01": 55.0, "p05": 128.0, "p10": 128.0, "p25": 128.0, "p75": 128.0, "p90": 128.0, "p95": 128.0, "p99": 128.0, "p999": 128.0}, "cumulative_distribution_function": null}}, "time_to_first_token_ms": {"successful": {"mean": 16.792535781860348, "median": 16.38054847717285, "mode": 15.790939331054688, "variance": 1.2776652847210441, "std_dev": 1.1303385708366516, "min": 15.790939331054688, "max": 21.281957626342773, "count": 46, "total_sum": 772.4566459655762, "percentiles": {"p001": 15.790939331054688, "p01": 15.790939331054688, "p05": 15.971660614013672, "p10": 16.034841537475586, "p25": 16.111373901367188, "p75": 16.840696334838867, "p90": 18.505334854125977, "p95": 19.00935173034668, "p99": 21.281957626342773, "p999": 21.281957626342773}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 16.777170465347616, "median": 16.371726989746094, "mode": 15.790939331054688, "variance": 1.2613411927317046, "std_dev": 1.1230944718641014, "min": 15.790939331054688, "max": 21.281957626342773, "count": 47, "total_sum": 788.5270118713379, "percentiles": {"p001": 15.790939331054688, "p01": 15.790939331054688, "p05": 15.971660614013672, "p10": 16.034841537475586, "p25": 16.100645065307617, "p75": 16.840696334838867, "p90": 18.505334854125977, "p95": 19.00935173034668, "p99": 21.281957626342773, "p999": 21.281957626342773}, "cumulative_distribution_function": null}}, "time_per_output_token_ms": {"successful": {"mean": 4.90300272307966, "median": 4.885653033852577, "mode": 4.870360717177391, "variance": 0.003163643010108571, "std_dev": 0.05624627107736628, "min": 4.870360717177391, "max": 5.217265337705612, "count": 46, "total_sum": 225.5381252616644, "percentiles": {"p001": 4.870360717177391, "p01": 4.870360717177391, "p05": 4.8728808760643005, "p10": 4.873953759670258, "p25": 4.876237362623215, "p75": 4.904214292764664, "p90": 4.934689030051231, "p95": 4.993332549929619, "p99": 5.217265337705612, "p999": 5.217265337705612}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 4.9022222114856975, "median": 4.882922396063805, "mode": 4.870360717177391, "variance": 0.003199582258516055, "std_dev": 0.05656485002646127, "min": 4.81866489757191, "max": 5.217265337705612, "count": 47, "total_sum": 230.3567901592363, "percentiles": {"p001": 4.81866489757191, "p01": 4.870360717177391, "p05": 4.872731864452362, "p10": 4.873953759670258, "p25": 4.876237362623215, "p75": 4.904214292764664, "p90": 4.934689030051231, "p95": 4.993332549929619, "p99": 5.217265337705612, "p999": 5.217265337705612}, "cumulative_distribution_function": null}}, "inter_token_latency_ms": {"successful": {"mean": 4.941609043733832, "median": 4.9241227427805505, "mode": 4.90871001416304, "variance": 0.003213660306132974, "std_dev": 0.056689155101597465, "min": 4.90871001416304, "max": 5.258346167136365, "count": 46, "total_sum": 227.31401601175622, "percentiles": {"p001": 4.90871001416304, "p01": 4.90871001416304, "p05": 4.911250016820713, "p10": 4.9123313483290785, "p25": 4.91463293240765, "p75": 4.9428301533376136, "p90": 4.973544849185493, "p95": 5.032650129062923, "p99": 5.258346167136365, "p999": 5.258346167136365}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 4.9413003057767115, "median": 4.921370603906826, "mode": 4.90871001416304, "variance": 0.003194539306669541, "std_dev": 0.056520255720135776, "min": 4.9078994327121315, "max": 5.258346167136365, "count": 47, "total_sum": 232.22191544446835, "percentiles": {"p001": 4.9078994327121315, "p01": 4.90871001416304, "p05": 4.911099831888995, "p10": 4.9123313483290785, "p25": 4.91463293240765, "p75": 4.9428301533376136, "p90": 4.973544849185493, "p95": 5.032650129062923, "p99": 5.258346167136365, "p999": 5.258346167136365}, "cumulative_distribution_function": null}}, "output_tokens_per_second": {"successful": {"mean": 198.13346751788123, "median": 203.04516628745705, "mode": 203.5378269520066, "variance": 613.9948900522365, "std_dev": 24.778920276158857, "min": 0.0, "max": 203.69598368219124, "count": 122, "total_sum": 17849.590625912137, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 190.14888022486173, "p10": 200.69400449782287, "p25": 202.23259402121505, "p75": 203.42923658938793, "p90": 203.5378269520066, "p95": 203.58722454130668, "p99": 203.6860916860917, "p999": 203.69598368219124}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 198.08514508750469, "median": 203.04516628745705, "mode": 203.5378269520066, "variance": 619.6237334717947, "std_dev": 24.89224243558211, "min": 0.0, "max": 203.69598368219124, "count": 125, "total_sum": 18310.99071823841, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 190.14888022486173, "p10": 200.69400449782287, "p25": 202.23259402121505, "p75": 203.4193704835346, "p90": 203.5378269520066, "p95": 203.58722454130668, "p99": 203.6860916860917, "p999": 203.69598368219124}, "cumulative_distribution_function": null}}, "tokens_per_second": {"successful": {"mean": 992.6867036588937, "median": 614.3700014647723, "mode": 615.2712336805046, "variance": 62014350.40386989, "std_dev": 7874.919072845758, "min": 0.0, "max": 159300.81436773148, "count": 139, "total_sum": 5852579.912913391, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 574.9559972583961, "p10": 606.8148148148148, "p25": 611.5928842228055, "p75": 615.0907757735738, "p90": 615.4517975055026, "p95": 615.542119166422, "p99": 617.5359246171967, "p999": 157985.65557672578}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 1002.1268169766876, "median": 614.3700014647723, "mode": 615.2712336805046, "variance": 63939736.95341249, "std_dev": 7996.232672541019, "min": 0.0, "max": 296531.848660591, "count": 143, "total_sum": 6151486.576325966, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 574.9559972583961, "p10": 606.8148148148148, "p25": 611.5928842228055, "p75": 615.0907757735738, "p90": 615.4517975055026, "p95": 615.542119166422, "p99": 1158.3275338304336, "p999": 158008.81383758428}, "cumulative_distribution_function": null}}}, "start_time": 1749157168.1827004, "end_time": 1749157198.1799018, "request_totals": {"successful": 46, "errored": 0, "incomplete": 1, "total": 47}, "request_samples": null, "requests": {"successful": [{"type_": "generative_text_response", "request_id": "73054dd1-486f-4894-a861-075750b82453", "request_type": "text_completions", "scheduler_info": {"requested": true, "completed": true, "errored": false, "canceled": false, "targeted_start_time": 1749157168.179883, "queued_time": 1749157168.1811602, "dequeued_time": 1749157168.1818697, "scheduled_time": 1749157168.181895, "worker_start": 1749157168.1820004, "request_start": 1749157168.1827004, "request_end": 1749157168.871885, "worker_end": 1749157168.8723884, "process_id": 0}, "prompt": "such a sacrifice to her advantage as years of gratitude cannot enough acknowledge. By this time she is actually with them! If such goodness does not make her miserable now, she will never deserve to be happy! What a meeting for her, when she first sees my aunt! We must endeavour to forget all that has passed on either side, said Jane I hope and trust they will yet be happy. His consenting to marry her is a proof, I will believe, that he is come to a right way of thinking. Their mutual affection will steady them; and I flatter myself they will settle so quietly, and live in so rational a manner, as may in time make their past imprudence forgotten. Their conduct has been such, replied Elizabeth, as neither you, nor I, nor anybody, can ever forget. It is useless to talk of it. It now occurred to the girls that their mother was in all likelihood perfectly ignorant of what had happened. They went to the library, therefore, and asked their father whether he would not wish them to make it known to her. He was writing, and, without raising his head, coolly replied, Just as you please. May we take my uncle s letter to read to her? Take whatever you like, and get away", "output": ", said Jane. The letter was read, and the girls retired to their own apartments. Elizabeth was the first to return. She found her mother seated in the drawing-room, and looking very pale. She was dressed in a loose white gown, and her hair was disordered. She rose as they entered, and clasped them both in her arms, and then, without saying a word, took her seat on the sofa, and began to weep. Elizabeth and Jane stood by her side, and listened to the sobs which issued from her heart. She had no words to express her gratitude, and, in a few minutes,", "prompt_tokens": 257, "output_tokens": 128, "start_time": 1749157168.1827004, "end_time": 1749157168.871885, "first_token_time": 1749157168.2039824, "last_token_time": 1749157168.8717923, "request_latency": 0.6891846656799316, "time_to_first_token_ms": 21.281957626342773, "time_per_output_token_ms": 5.217265337705612, "inter_token_latency_ms": 5.258346167136365, "tokens_per_second": 558.631117568713, "output_tokens_per_second": 185.72670921765}], "errored": [], "incomplete": [], "total": null}, "duration": 29.997201442718506}]} \ No newline at end of file diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped.yaml b/tests/unit/entrypoints/assets/benchmarks_stripped.yaml new file mode 100644 index 00000000..1d39e62d --- /dev/null +++ b/tests/unit/entrypoints/assets/benchmarks_stripped.yaml @@ -0,0 +1,1026 @@ +--- +benchmarks: +- type_: generative_benchmark + id_: 97ece514-8717-412f-9dba-2b42bcd9866f + run_id: 93e36b31-b454-471d-ba62-6b2671585485 + args: + profile: + type_: sweep + completed_strategies: 10 + measured_rates: + - 1.5481806532737452 + measured_concurrencies: + - 0.9977627456483604 + max_concurrency: + strategy_type: constant + rate: -1 + initial_burst: true + random_seed: 42 + sweep_size: 10 + rate_type: constant + strategy_types: + - synchronous + strategy_index: 0 + strategy: + type_: synchronous + max_number: + max_duration: 30 + warmup_number: + warmup_duration: + cooldown_number: + cooldown_duration: + run_stats: + start_time: 1749157168.054225 + end_time: 1749157198.213826 + requests_made: + successful: 1 + errored: 0 + incomplete: 0 + total: 1 + queued_time_avg: 0.631589580089488 + scheduled_time_delay_avg: 3.784260851271609e-06 + scheduled_time_sleep_avg: 0 + worker_start_delay_avg: 2.8021792148021943e-05 + worker_time_avg: 0.6373953819274902 + worker_start_time_targeted_delay_avg: 0.6319031715393066 + request_start_time_delay_avg: 0.316034068452551 + request_start_time_targeted_delay_avg: 0.6319856542222043 + request_time_delay_avg: 0.00029866238857837433 + request_time_avg: 0.6370967195389119 + worker: + type_: generative_requests_worker + backend_type: openai_http + backend_target: example_target + backend_model: example_model + backend_info: + max_output_tokens: 16384 + timeout: 300 + http2: true + authorization: false + organization: + project: + text_completions_path: "/v1/completions" + chat_completions_path: "/v1/chat/completions" + request_loader: + type_: generative_request_loader + data: prompt_tokens=256,output_tokens=128 + data_args: + processor: example_processor + processor_args: + extras: {} + metrics: + requests_per_second: + successful: + mean: 1.5481806532737452 + median: 1.5530116578512305 + mode: 1.555484186315253 + variance: 0.0003352629331303757 + std_dev: 0.01831018659463567 + min: 1.4509899157628907 + max: 1.5597664461806156 + count: 45 + total_sum: 69.6707872953874 + percentiles: + p001: 1.4509899157628907 + p01: 1.4509899157628907 + p05: 1.5190957942495127 + p10: 1.5377883923356668 + p25: 1.5483918601985445 + p75: 1.5567531615313124 + p90: 1.5583715343236735 + p95: 1.5590938878953722 + p99: 1.5597664461806156 + p999: 1.5597664461806156 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 1.5668128271815418 + median: 1.5530312090734288 + mode: 1.555484186315253 + variance: 0.036536424510388923 + std_dev: 0.19114503527528232 + min: 1.4509899157628907 + max: 3.509921881864626 + count: 46 + total_sum: 73.18070917725203 + percentiles: + p001: 1.4509899157628907 + p01: 1.4509899157628907 + p05: 1.5190957942495127 + p10: 1.5377883923356668 + p25: 1.5483918601985445 + p75: 1.5567531615313124 + p90: 1.5583715343236735 + p95: 1.5591048992639953 + p99: 1.5597664461806156 + p999: 3.509921881864626 + cumulative_distribution_function: + request_concurrency: + successful: + mean: 0.9977627456483604 + median: 1 + mode: 1 + variance: 0.002232249044605607 + std_dev: 0.047246682895263736 + min: 0 + max: 1 + count: 2 + total_sum: 1 + percentiles: + p001: 0 + p01: 1 + p05: 1 + p10: 1 + p25: 1 + p75: 1 + p90: 1 + p95: 1 + p99: 1 + p999: 1 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 1 + median: 1 + mode: 1 + variance: 0 + std_dev: 0 + min: 1 + max: 1 + count: 1 + total_sum: 1 + percentiles: + p001: 1 + p01: 1 + p05: 1 + p10: 1 + p25: 1 + p75: 1 + p90: 1 + p95: 1 + p99: 1 + p999: 1 + cumulative_distribution_function: + total: + mean: 0.9977433642674269 + median: 1 + mode: 1 + variance: 0.002251543327743578 + std_dev: 0.047450430216633206 + min: 0 + max: 1 + count: 2 + total_sum: 1 + percentiles: + p001: 0 + p01: 1 + p05: 1 + p10: 1 + p25: 1 + p75: 1 + p90: 1 + p95: 1 + p99: 1 + p999: 1 + cumulative_distribution_function: + request_latency: + successful: + mean: 0.6444743664368339 + median: 0.6424565315246582 + mode: 0.6395885944366455 + variance: 6.414585873782315e-05 + std_dev: 0.008009110982988258 + min: 0.6395885944366455 + max: 0.6891846656799316 + count: 46 + total_sum: 29.64582085609436 + percentiles: + p001: 0.6395885944366455 + p01: 0.6395885944366455 + p05: 0.6399857997894287 + p10: 0.6403069496154785 + p25: 0.6409540176391602 + p75: 0.644390344619751 + p90: 0.6488735675811768 + p95: 0.656728982925415 + p99: 0.6891846656799316 + p999: 0.6891846656799316 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0.2836878299713135 + median: 0.2836878299713135 + mode: 0.2836878299713135 + variance: 0 + std_dev: 0 + min: 0.2836878299713135 + max: 0.2836878299713135 + count: 1 + total_sum: 0.2836878299713135 + percentiles: + p001: 0.2836878299713135 + p01: 0.2836878299713135 + p05: 0.2836878299713135 + p10: 0.2836878299713135 + p25: 0.2836878299713135 + p75: 0.2836878299713135 + p90: 0.2836878299713135 + p95: 0.2836878299713135 + p99: 0.2836878299713135 + p999: 0.2836878299713135 + cumulative_distribution_function: + total: + mean: 0.6367980571503334 + median: 0.642310380935669 + mode: 0.2836878299713135 + variance: 0.0027733643692853522 + std_dev: 0.05266274175624881 + min: 0.2836878299713135 + max: 0.6891846656799316 + count: 47 + total_sum: 29.929508686065674 + percentiles: + p001: 0.2836878299713135 + p01: 0.2836878299713135 + p05: 0.6398613452911377 + p10: 0.6402454376220703 + p25: 0.640899658203125 + p75: 0.644390344619751 + p90: 0.6488735675811768 + p95: 0.656728982925415 + p99: 0.6891846656799316 + p999: 0.6891846656799316 + cumulative_distribution_function: + prompt_token_count: + successful: + mean: 257.1086956521739 + median: 257 + mode: 257 + variance: 0.14035916824196598 + std_dev: 0.37464538999161057 + min: 257 + max: 259 + count: 46 + total_sum: 11827 + percentiles: + p001: 257 + p01: 257 + p05: 257 + p10: 257 + p25: 257 + p75: 257 + p90: 257 + p95: 258 + p99: 259 + p999: 259 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 256 + median: 256 + mode: 256 + variance: 0 + std_dev: 0 + min: 256 + max: 256 + count: 1 + total_sum: 256 + percentiles: + p001: 256 + p01: 256 + p05: 256 + p10: 256 + p25: 256 + p75: 256 + p90: 256 + p95: 256 + p99: 256 + p999: 256 + cumulative_distribution_function: + total: + mean: 257.0851063829787 + median: 257 + mode: 256 + variance: 0.16296966953372566 + std_dev: 0.40369502044702715 + min: 256 + max: 259 + count: 47 + total_sum: 12083 + percentiles: + p001: 256 + p01: 256 + p05: 257 + p10: 257 + p25: 257 + p75: 257 + p90: 257 + p95: 258 + p99: 259 + p999: 259 + cumulative_distribution_function: + output_token_count: + successful: + mean: 127.99999999999999 + median: 128 + mode: 128 + variance: 2.01948391736579e-28 + std_dev: 1.4210854715202002e-14 + min: 128 + max: 128 + count: 46 + total_sum: 5888 + percentiles: + p001: 128 + p01: 128 + p05: 128 + p10: 128 + p25: 128 + p75: 128 + p90: 128 + p95: 128 + p99: 128 + p999: 128 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 126.44680851063832 + median: 128 + mode: 55 + variance: 110.97057492077867 + std_dev: 10.534257207832866 + min: 55 + max: 128 + count: 47 + total_sum: 5943 + percentiles: + p001: 55 + p01: 55 + p05: 128 + p10: 128 + p25: 128 + p75: 128 + p90: 128 + p95: 128 + p99: 128 + p999: 128 + cumulative_distribution_function: + time_to_first_token_ms: + successful: + mean: 16.792535781860348 + median: 16.38054847717285 + mode: 15.790939331054688 + variance: 1.2776652847210441 + std_dev: 1.1303385708366516 + min: 15.790939331054688 + max: 21.281957626342773 + count: 46 + total_sum: 772.4566459655762 + percentiles: + p001: 15.790939331054688 + p01: 15.790939331054688 + p05: 15.971660614013672 + p10: 16.034841537475586 + p25: 16.111373901367188 + p75: 16.840696334838867 + p90: 18.505334854125977 + p95: 19.00935173034668 + p99: 21.281957626342773 + p999: 21.281957626342773 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 16.777170465347616 + median: 16.371726989746094 + mode: 15.790939331054688 + variance: 1.2613411927317046 + std_dev: 1.1230944718641014 + min: 15.790939331054688 + max: 21.281957626342773 + count: 47 + total_sum: 788.5270118713379 + percentiles: + p001: 15.790939331054688 + p01: 15.790939331054688 + p05: 15.971660614013672 + p10: 16.034841537475586 + p25: 16.100645065307617 + p75: 16.840696334838867 + p90: 18.505334854125977 + p95: 19.00935173034668 + p99: 21.281957626342773 + p999: 21.281957626342773 + cumulative_distribution_function: + time_per_output_token_ms: + successful: + mean: 4.90300272307966 + median: 4.885653033852577 + mode: 4.870360717177391 + variance: 0.003163643010108571 + std_dev: 0.05624627107736628 + min: 4.870360717177391 + max: 5.217265337705612 + count: 46 + total_sum: 225.5381252616644 + percentiles: + p001: 4.870360717177391 + p01: 4.870360717177391 + p05: 4.8728808760643005 + p10: 4.873953759670258 + p25: 4.876237362623215 + p75: 4.904214292764664 + p90: 4.934689030051231 + p95: 4.993332549929619 + p99: 5.217265337705612 + p999: 5.217265337705612 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 4.9022222114856975 + median: 4.882922396063805 + mode: 4.870360717177391 + variance: 0.003199582258516055 + std_dev: 0.05656485002646127 + min: 4.81866489757191 + max: 5.217265337705612 + count: 47 + total_sum: 230.3567901592363 + percentiles: + p001: 4.81866489757191 + p01: 4.870360717177391 + p05: 4.872731864452362 + p10: 4.873953759670258 + p25: 4.876237362623215 + p75: 4.904214292764664 + p90: 4.934689030051231 + p95: 4.993332549929619 + p99: 5.217265337705612 + p999: 5.217265337705612 + cumulative_distribution_function: + inter_token_latency_ms: + successful: + mean: 4.941609043733832 + median: 4.9241227427805505 + mode: 4.90871001416304 + variance: 0.003213660306132974 + std_dev: 0.056689155101597465 + min: 4.90871001416304 + max: 5.258346167136365 + count: 46 + total_sum: 227.31401601175622 + percentiles: + p001: 4.90871001416304 + p01: 4.90871001416304 + p05: 4.911250016820713 + p10: 4.9123313483290785 + p25: 4.91463293240765 + p75: 4.9428301533376136 + p90: 4.973544849185493 + p95: 5.032650129062923 + p99: 5.258346167136365 + p999: 5.258346167136365 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 4.9413003057767115 + median: 4.921370603906826 + mode: 4.90871001416304 + variance: 0.003194539306669541 + std_dev: 0.056520255720135776 + min: 4.9078994327121315 + max: 5.258346167136365 + count: 47 + total_sum: 232.22191544446835 + percentiles: + p001: 4.9078994327121315 + p01: 4.90871001416304 + p05: 4.911099831888995 + p10: 4.9123313483290785 + p25: 4.91463293240765 + p75: 4.9428301533376136 + p90: 4.973544849185493 + p95: 5.032650129062923 + p99: 5.258346167136365 + p999: 5.258346167136365 + cumulative_distribution_function: + output_tokens_per_second: + successful: + mean: 198.13346751788123 + median: 203.04516628745705 + mode: 203.5378269520066 + variance: 613.9948900522365 + std_dev: 24.778920276158857 + min: 0 + max: 203.69598368219124 + count: 122 + total_sum: 17849.590625912137 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 190.14888022486173 + p10: 200.69400449782287 + p25: 202.23259402121505 + p75: 203.42923658938793 + p90: 203.5378269520066 + p95: 203.58722454130668 + p99: 203.6860916860917 + p999: 203.69598368219124 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 198.08514508750469 + median: 203.04516628745705 + mode: 203.5378269520066 + variance: 619.6237334717947 + std_dev: 24.89224243558211 + min: 0 + max: 203.69598368219124 + count: 125 + total_sum: 18310.99071823841 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 190.14888022486173 + p10: 200.69400449782287 + p25: 202.23259402121505 + p75: 203.4193704835346 + p90: 203.5378269520066 + p95: 203.58722454130668 + p99: 203.6860916860917 + p999: 203.69598368219124 + cumulative_distribution_function: + tokens_per_second: + successful: + mean: 992.6867036588937 + median: 614.3700014647723 + mode: 615.2712336805046 + variance: 62014350.40386989 + std_dev: 7874.919072845758 + min: 0 + max: 159300.81436773148 + count: 139 + total_sum: 5852579.912913391 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 574.9559972583961 + p10: 606.8148148148148 + p25: 611.5928842228055 + p75: 615.0907757735738 + p90: 615.4517975055026 + p95: 615.542119166422 + p99: 617.5359246171967 + p999: 157985.65557672578 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 1002.1268169766876 + median: 614.3700014647723 + mode: 615.2712336805046 + variance: 63939736.95341249 + std_dev: 7996.232672541019 + min: 0 + max: 296531.848660591 + count: 143 + total_sum: 6151486.576325966 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 574.9559972583961 + p10: 606.8148148148148 + p25: 611.5928842228055 + p75: 615.0907757735738 + p90: 615.4517975055026 + p95: 615.542119166422 + p99: 1158.3275338304336 + p999: 158008.81383758428 + cumulative_distribution_function: + start_time: 1749157168.1827004 + end_time: 1749157198.1799018 + request_totals: + successful: 46 + errored: 0 + incomplete: 1 + total: 47 + request_samples: + requests: + successful: + - type_: generative_text_response + request_id: 73054dd1-486f-4894-a861-075750b82453 + request_type: text_completions + scheduler_info: + requested: true + completed: true + errored: false + canceled: false + targeted_start_time: 1749157168.179883 + queued_time: 1749157168.1811602 + dequeued_time: 1749157168.1818697 + scheduled_time: 1749157168.181895 + worker_start: 1749157168.1820004 + request_start: 1749157168.1827004 + request_end: 1749157168.871885 + worker_end: 1749157168.8723884 + process_id: 0 + prompt: such a sacrifice to her advantage as years of gratitude cannot enough + acknowledge. By this time she is actually with them! If such goodness does + not make her miserable now, she will never deserve to be happy! What a meeting + for her, when she first sees my aunt! We must endeavour to forget all that + has passed on either side, said Jane I hope and trust they will yet be happy. + His consenting to marry her is a proof, I will believe, that he is come to + a right way of thinking. Their mutual affection will steady them; and I flatter + myself they will settle so quietly, and live in so rational a manner, as may + in time make their past imprudence forgotten. Their conduct has been such, + replied Elizabeth, as neither you, nor I, nor anybody, can ever forget. It + is useless to talk of it. It now occurred to the girls that their mother was + in all likelihood perfectly ignorant of what had happened. They went to the + library, therefore, and asked their father whether he would not wish them + to make it known to her. He was writing, and, without raising his head, coolly + replied, Just as you please. May we take my uncle s letter to read to her? + Take whatever you like, and get away + output: ", said Jane. The letter was read, and the girls retired to their own + apartments. Elizabeth was the first to return. She found her mother seated + in the drawing-room, and looking very pale. She was dressed in a loose white + gown, and her hair was disordered. She rose as they entered, and clasped them + both in her arms, and then, without saying a word, took her seat on the sofa, + and began to weep. Elizabeth and Jane stood by her side, and listened to the + sobs which issued from her heart. She had no words to express her gratitude, + and, in a few minutes," + prompt_tokens: 257 + output_tokens: 128 + start_time: 1749157168.1827004 + end_time: 1749157168.871885 + first_token_time: 1749157168.2039824 + last_token_time: 1749157168.8717923 + request_latency: 0.6891846656799316 + time_to_first_token_ms: 21.281957626342773 + time_per_output_token_ms: 5.217265337705612 + inter_token_latency_ms: 5.258346167136365 + tokens_per_second: 558.631117568713 + output_tokens_per_second: 185.72670921765 + total: + duration: 29.997201442718506 diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt b/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt new file mode 100644 index 00000000..170d1e6a --- /dev/null +++ b/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt @@ -0,0 +1,31 @@ + + +Benchmarks Metadata: + Run id:93e36b31-b454-471d-ba62-6b2671585485 + Duration:30.2 seconds + Profile:type=sweep, strategies=['synchronous', 'throughput', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant'], + max_concurrency=None + Args:max_number=None, max_duration=30.0, warmup_number=None, warmup_duration=None, cooldown_number=None, cooldown_duration=None + Worker:type_='generative_requests_worker' backend_type='openai_http' backend_target='example_target' backend_model='example_model' backend_info={'max_output_tokens': 16384, + 'timeout': 300, 'http2': True, 'authorization': False, 'organization': None, 'project': None, 'text_completions_path': '/v1/completions', 'chat_completions_path': + '/v1/chat/completions'} + Request Loader:type_='generative_request_loader' data='prompt_tokens=256,output_tokens=128' data_args=None processor='example_processor' processor_args=None + Extras:None + + +Benchmarks Info: +=================================================================================================================================================== +Metadata |||| Requests Made ||| Prompt Tok/Req ||| Output Tok/Req ||| Prompt Tok Total||| Output Tok Total || + Benchmark| Start Time| End Time| Duration (s)| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err +-----------|-----------|---------|-------------|------|-----|-----|------|------|----|-------|-----|-----|-------|-----|-----|-------|------|------ +synchronous| 16:59:28| 16:59:58| 30.0| 46| 1| 0| 257.1| 256.0| 0.0| 128.0| 0.0| 0.0| 11827| 256| 0| 5888| 0| 0 +=================================================================================================================================================== + + +Benchmarks Stats: +=============================================================================================================================================== +Metadata | Request Stats || Out Tok/sec| Tot Tok/sec| Req Latency (sec) ||| TTFT (ms) ||| ITL (ms) ||| TPOT (ms) || + Benchmark| Per Second| Concurrency| mean| mean| mean| median| p99| mean| median| p99| mean| median| p99| mean| median| p99 +-----------|-----------|------------|------------|------------|------|--------|------|-----|-------|-----|-----|-------|----|-----|-------|---- +synchronous| 1.55| 1.00| 198.1| 992.7| 0.64| 0.64| 0.69| 16.8| 16.4| 21.3| 4.9| 4.9| 5.3| 4.9| 4.9| 5.2 +=============================================================================================================================================== diff --git a/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py b/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py new file mode 100644 index 00000000..d76265be --- /dev/null +++ b/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py @@ -0,0 +1,79 @@ +import filecmp +import os +import unittest +from pathlib import Path + +import pytest + +from guidellm.benchmark import reimport_benchmarks_report + +# Set to true to re-write the expected output. +REGENERATE_ARTIFACTS = False + + +@pytest.fixture +def get_test_asset_dir(): + def _() -> Path: + return Path(__file__).parent / "assets" + + return _ + + +@pytest.fixture +def cleanup(): + to_delete: list[Path] = [] + yield to_delete + for item in to_delete: + if item.exists(): + item.unlink() # Deletes the file + + +def test_display_entrypoint_json(capfd, get_test_asset_dir): + generic_test_display_entrypoint( + "benchmarks_stripped.json", + capfd, + get_test_asset_dir, + ) + + +def test_display_entrypoint_yaml(capfd, get_test_asset_dir): + generic_test_display_entrypoint( + "benchmarks_stripped.yaml", + capfd, + get_test_asset_dir, + ) + + +def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): + os.environ["COLUMNS"] = "180" # CLI output depends on terminal width. + asset_dir = get_test_asset_dir() + reimport_benchmarks_report(asset_dir / filename, None) + out, err = capfd.readouterr() + expected_output_path = asset_dir / "benchmarks_stripped_output.txt" + if REGENERATE_ARTIFACTS: + expected_output_path.write_text(out) + # Fail to prevent accidentally leaving regeneration mode on + pytest.fail("Test bypassed to regenerate output") + else: + with expected_output_path.open(encoding="utf_8") as file: + expected_output = file.read() + assert out == expected_output + + +def test_reexporting_benchmark(get_test_asset_dir, cleanup): + asset_dir = get_test_asset_dir() + source_file = asset_dir / "benchmarks_stripped.json" + exported_file = asset_dir / "benchmarks_reexported.json" + # If you need to inspect the output to see why it failed, comment out + # the cleanup statement. + cleanup.append(exported_file) + if exported_file.exists(): + exported_file.unlink() + reimport_benchmarks_report(source_file, exported_file) + # The reexported file should exist and be identical to the source. + assert exported_file.exists() + assert filecmp.cmp(source_file, exported_file, shallow=False) + + +if __name__ == "__main__": + unittest.main()