From a26cb8725fafe35531e224ee45eed812b0474772 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Fri, 14 Mar 2025 17:11:21 -0400 Subject: [PATCH 1/7] Add scenerio argument --- src/guidellm/main.py | 15 +++++++++++- src/guidellm/utils/cli_params.py | 39 +++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/src/guidellm/main.py b/src/guidellm/main.py index e7363c6..169131f 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -1,5 +1,5 @@ import asyncio -from typing import Any, Literal, Mapping, Optional, Union, get_args +from typing import Any, IO, Literal, Mapping, Optional, Union, get_args import click from loguru import logger @@ -18,6 +18,8 @@ __all__ = ["generate_benchmark_report"] +# FIXME: Remove +SCENARIOS = Literal["rag", "short"] @click.command() @click.option( @@ -29,6 +31,14 @@ "Ex: 'http://localhost:8000'" ), ) +@click.option( + "--scenario", + type=cli_params.Union(click.File(mode='r'), click.Choice(get_args(SCENARIOS))), + default=None, + help=( + "TODO: A scenario or path to config" + ), +) @click.option( "--backend", type=click.Choice(get_args(BackendType)), @@ -154,6 +164,7 @@ ) def generate_benchmark_report_cli( target: str, + scenario: Optional[Union[IO[Any], SCENARIOS]], backend: BackendType, model: Optional[str], data: Optional[str], @@ -171,6 +182,7 @@ def generate_benchmark_report_cli( """ generate_benchmark_report( target=target, + scenario=scenario, backend=backend, model=model, data=data, @@ -189,6 +201,7 @@ def generate_benchmark_report( target: str, data: Optional[str], data_type: Literal["emulated", "file", "transformers"], + scenario: Optional[Union[IO[Any], SCENARIOS]], backend: BackendType = "openai_http", backend_kwargs: Optional[Mapping[str, Any]] = None, model: Optional[str] = None, diff --git a/src/guidellm/utils/cli_params.py b/src/guidellm/utils/cli_params.py index 4e8800d..f4a2c36 100644 --- a/src/guidellm/utils/cli_params.py +++ b/src/guidellm/utils/cli_params.py @@ -4,7 +4,7 @@ from typing import Any, Optional -from click import Context, Parameter, ParamType +from click import BadParameter, Context, Parameter, ParamType __all__ = ["MAX_REQUESTS"] @@ -32,3 +32,40 @@ def convert( MAX_REQUESTS = MaxRequestsType() + + +class Union(ParamType): + """ + A custom click parameter type that allows for multiple types to be accepted. + """ + + def __init__(self, *types: ParamType): + self.types = types + self.name = "".join(t.name for t in types) + + def convert(self, value, param, ctx): + fails = [] + for t in self.types: + try: + return t.convert(value, param, ctx) + except BadParameter as e: + fails.append(str(e)) + continue + + self.fail("; ".join(fails) or f"Invalid value: {value}") + + + def get_metavar(self, param: Parameter) -> str: + def get_choices(t: ParamType) -> str: + meta = t.get_metavar(param) + return meta if meta is not None else t.name + + # Get the choices for each type in the union. + choices_str = "|".join(map(get_choices, self.types)) + + # Use curly braces to indicate a required argument. + if param.required and param.param_type_name == "argument": + return f"{{{choices_str}}}" + + # Use square braces to indicate an option or optional argument. + return f"[{choices_str}]" From 7a9a691d648cada6a67d8ba7b7ba84875163a355 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 19 Mar 2025 14:10:08 -0400 Subject: [PATCH 2/7] Add Benchmark scenario and manager classes --- src/guidellm/benchmark/__init__.py | 0 src/guidellm/benchmark/scenario.py | 63 ++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 src/guidellm/benchmark/__init__.py create mode 100644 src/guidellm/benchmark/scenario.py diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/guidellm/benchmark/scenario.py b/src/guidellm/benchmark/scenario.py new file mode 100644 index 0000000..39f67af --- /dev/null +++ b/src/guidellm/benchmark/scenario.py @@ -0,0 +1,63 @@ +from pathlib import Path +from typing import Any, Dict, Literal, Optional, Self, Union + +from pydantic import Field + +from guidellm.backend import BackendType +from guidellm.core import Serializable +from guidellm.executor import ProfileGenerationMode + +__ALL__ = ["Scenario", "ScenarioManager"] + +scenarios_path = Path(__name__).parent / "scenarios" + + +class Scenario(Serializable): + backend: BackendType = "openai_http" + backend_kwargs: Optional[Dict[str, Any]] = None + model: Optional[str] = None + tokenizer: Optional[str] = None + data: Union[str, Dict[str, Any]] = Field(default_factory=dict) + data_type: Literal["emulated", "file", "transformers"] = "emulated" + rate_type: ProfileGenerationMode = "sweep" + rate: Optional[float] = None + max_seconds: int = 120 + max_requests: Optional[Union[int, Literal["dataset"]]] = None + + def _update(self, **fields: Mapping[str, Any]) -> Self: + for k, v in fields.items(): + if not hasattr(self, k): + raise ValueError(f"Invalid field {k}") + setattr(self, k, v) + + return self + + def update(self, **fields: Mapping[str, Any]) -> Self: + return self._update(**{k: v for k, v in fields.items() if v is not None}) + + +class ScenarioManager: + def __init__(self, scenarios_dir: Optional[str] = None): + self.scenarios: Dict[str, Scenario] = {} + + if scenarios_dir is None: + global scenarios_path + else: + scenarios_path = Path(scenarios_dir) + + # Load built-in scenarios + for scenario_path in scenarios_path.glob("*.json"): + scenario = Scenario.from_json(scenario_path.read_text()) + self[scenario_path.stem] = scenario + + def __getitem__(self, scenario_name: str) -> Scenario: + return self.scenarios[scenario_name] + + def __setitem__(self, scenario_name: str, scenario: Scenario): + if scenario_name in self.scenarios: + raise ValueError(f"Scenario {scenario_name} already exists") + + self.scenarios[scenario_name] = scenario + + def list(self): + return tuple(self.scenarios.keys()) From b420f8b21c44c6b7503505834ef4a2f1095a6510 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 19 Mar 2025 14:12:05 -0400 Subject: [PATCH 3/7] Replace all args to generate_benchmark_report with a scenario --- src/guidellm/main.py | 106 +++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 50 deletions(-) diff --git a/src/guidellm/main.py b/src/guidellm/main.py index 169131f..ae1d5e3 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -1,11 +1,12 @@ import asyncio -from typing import Any, IO, Literal, Mapping, Optional, Union, get_args +from typing import IO, Any, Literal, Optional, Union, get_args import click from loguru import logger from transformers import AutoTokenizer # type: ignore[import-untyped] from guidellm.backend import Backend, BackendType +from guidellm.benchmark.scenario import Scenario, ScenarioManager from guidellm.core import GuidanceReport, TextGenerationBenchmarkReport from guidellm.executor import Executor, ProfileGenerationMode from guidellm.request import ( @@ -19,7 +20,7 @@ __all__ = ["generate_benchmark_report"] # FIXME: Remove -SCENARIOS = Literal["rag", "short"] +SCENARIOS = ScenarioManager() @click.command() @click.option( @@ -33,7 +34,7 @@ ) @click.option( "--scenario", - type=cli_params.Union(click.File(mode='r'), click.Choice(get_args(SCENARIOS))), + type=cli_params.Union(click.File(mode="r"), click.Choice(SCENARIOS.list())), default=None, help=( "TODO: A scenario or path to config" @@ -42,7 +43,7 @@ @click.option( "--backend", type=click.Choice(get_args(BackendType)), - default="openai_http", + default=None, help=( "The backend to use for benchmarking. " "The default is OpenAI Server enabling compatability with any server that " @@ -61,7 +62,7 @@ @click.option( "--data", type=str, - required=True, + default=None, help=( "The data source to use for benchmarking. " "Depending on the data-type, it should be a " @@ -74,7 +75,7 @@ @click.option( "--data-type", type=click.Choice(["emulated", "file", "transformers"]), - required=True, + default=None, help=( "The type of data to use for benchmarking. " "Use 'emulated' for synthetic data, 'file' for a file, or 'transformers' " @@ -96,7 +97,7 @@ @click.option( "--rate-type", type=click.Choice(get_args(ProfileGenerationMode)), - default="sweep", + default=None, help=( "The type of request rate to use for benchmarking. " "Use sweep to run a full range from synchronous to throughput (default), " @@ -119,7 +120,7 @@ @click.option( "--max-seconds", type=int, - default=120, + default=None, help=( "The maximum number of seconds for each benchmark run. " "Either max-seconds, max-requests, or both must be set. " @@ -164,25 +165,35 @@ ) def generate_benchmark_report_cli( target: str, - scenario: Optional[Union[IO[Any], SCENARIOS]], - backend: BackendType, + scenario: Optional[Union[IO[Any], str]], + backend: Optional[BackendType], model: Optional[str], data: Optional[str], - data_type: Literal["emulated", "file", "transformers"], + data_type: Optional[Literal["emulated", "file", "transformers"]], tokenizer: Optional[str], - rate_type: ProfileGenerationMode, + rate_type: Optional[ProfileGenerationMode], rate: Optional[float], max_seconds: Optional[int], max_requests: Union[Literal["dataset"], int, None], - output_path: str, + output_path: Optional[str], enable_continuous_refresh: bool, ): """ Generate a benchmark report for a specified backend and dataset. """ - generate_benchmark_report( - target=target, - scenario=scenario, + + if isinstance(scenario, str): + defaults = SCENARIOS[scenario] + elif isinstance(scenario, IO): + defaults = Scenario.from_json(scenario.read()) + SCENARIOS["custom"] = defaults + elif scenario is None: + defaults = Scenario() + else: + raise ValueError("Invalid scenario type") + + # Update defaults with CLI args + defaults.update( backend=backend, model=model, data=data, @@ -191,7 +202,12 @@ def generate_benchmark_report_cli( rate_type=rate_type, rate=rate, max_seconds=max_seconds, - max_requests=max_requests, + max_requests=max_requests + ) + + generate_benchmark_report( + target=target, + scenario=defaults, output_path=output_path, cont_refresh_table=enable_continuous_refresh, ) @@ -199,17 +215,7 @@ def generate_benchmark_report_cli( def generate_benchmark_report( target: str, - data: Optional[str], - data_type: Literal["emulated", "file", "transformers"], - scenario: Optional[Union[IO[Any], SCENARIOS]], - backend: BackendType = "openai_http", - backend_kwargs: Optional[Mapping[str, Any]] = None, - model: Optional[str] = None, - tokenizer: Optional[str] = None, - rate_type: ProfileGenerationMode = "sweep", - rate: Optional[float] = None, - max_seconds: Optional[int] = 120, - max_requests: Union[Literal["dataset"], int, None] = None, + scenario: Scenario, output_path: Optional[str] = None, cont_refresh_table: bool = False, ) -> GuidanceReport: @@ -236,22 +242,22 @@ def generate_benchmark_report( :param backend_kwargs: Additional keyword arguments for the backend. """ logger.info( - "Generating benchmark report with target: {}, backend: {}", target, backend + "Generating benchmark report with target: {}, backend: {}", target, scenario.backend ) # Create backend backend_inst = Backend.create( - type_=backend, + type_=scenario.backend, target=target, - model=model, - **(backend_kwargs or {}), + model=scenario.model, + **(scenario.backend_kwargs or {}), ) backend_inst.validate() request_generator: RequestGenerator # Create tokenizer and request generator - tokenizer_inst = tokenizer + tokenizer_inst = scenario.tokenizer if not tokenizer_inst: try: tokenizer_inst = AutoTokenizer.from_pretrained(backend_inst.model) @@ -261,44 +267,44 @@ def generate_benchmark_report( "--tokenizer must be provided for request generation" ) from err - if data_type == "emulated": + if scenario.data_type == "emulated": request_generator = EmulatedRequestGenerator( - config=data, tokenizer=tokenizer_inst + config=scenario.data, tokenizer=tokenizer_inst ) - elif data_type == "file": - request_generator = FileRequestGenerator(path=data, tokenizer=tokenizer_inst) - elif data_type == "transformers": + elif scenario.data_type == "file": + request_generator = FileRequestGenerator(path=scenario.data, tokenizer=tokenizer_inst) + elif scenario.data_type == "transformers": request_generator = TransformersDatasetRequestGenerator( - dataset=data, tokenizer=tokenizer_inst + dataset=scenario.data, tokenizer=tokenizer_inst ) else: - raise ValueError(f"Unknown data type: {data_type}") + raise ValueError(f"Unknown data type: {scenario.data_type}") - if data_type == "emulated" and max_requests == "dataset": + if scenario.data_type == "emulated" and scenario.max_requests == "dataset": raise ValueError("Cannot use 'dataset' for emulated data") # Create executor executor = Executor( backend=backend_inst, request_generator=request_generator, - mode=rate_type, - rate=rate if rate_type in ("constant", "poisson") else None, + mode=scenario.rate_type, + rate=scenario.rate if scenario.rate_type in ("constant", "poisson") else None, max_number=( - len(request_generator) if max_requests == "dataset" else max_requests + len(request_generator) if scenario.max_requests == "dataset" else scenario.max_requests ), - max_duration=max_seconds, + max_duration=scenario.max_seconds, ) # Run executor logger.debug( "Running executor with args: {}", { - "backend": backend, + "backend": scenario.backend, "request_generator": request_generator, - "mode": rate_type, - "rate": rate, - "max_number": max_requests, - "max_duration": max_seconds, + "mode": scenario.rate_type, + "rate": scenario.rate, + "max_number": scenario.max_requests, + "max_duration": scenario.max_seconds, }, ) report = asyncio.run(_run_executor_for_result(executor)) From 26bb3a3ad321a589d52909af0966bf1de893e41b Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 19 Mar 2025 14:30:15 -0400 Subject: [PATCH 4/7] kwargs type is the value type not mapping --- src/guidellm/benchmark/scenario.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/guidellm/benchmark/scenario.py b/src/guidellm/benchmark/scenario.py index 39f67af..21db9bc 100644 --- a/src/guidellm/benchmark/scenario.py +++ b/src/guidellm/benchmark/scenario.py @@ -24,7 +24,7 @@ class Scenario(Serializable): max_seconds: int = 120 max_requests: Optional[Union[int, Literal["dataset"]]] = None - def _update(self, **fields: Mapping[str, Any]) -> Self: + def _update(self, **fields: Any) -> Self: for k, v in fields.items(): if not hasattr(self, k): raise ValueError(f"Invalid field {k}") @@ -32,7 +32,7 @@ def _update(self, **fields: Mapping[str, Any]) -> Self: return self - def update(self, **fields: Mapping[str, Any]) -> Self: + def update(self, **fields: Any) -> Self: return self._update(**{k: v for k, v in fields.items() if v is not None}) From 91651b03cfb77c0872198b76e53c274ef332f936 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 19 Mar 2025 14:52:39 -0400 Subject: [PATCH 5/7] Ignore issue with pydantic typing + other fixes --- src/guidellm/benchmark/scenario.py | 5 +++-- src/guidellm/utils/cli_params.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/guidellm/benchmark/scenario.py b/src/guidellm/benchmark/scenario.py index 21db9bc..d28e8da 100644 --- a/src/guidellm/benchmark/scenario.py +++ b/src/guidellm/benchmark/scenario.py @@ -1,7 +1,8 @@ from pathlib import Path -from typing import Any, Dict, Literal, Optional, Self, Union +from typing import Any, Dict, Literal, Optional, Union from pydantic import Field +from typing_extensions import Self from guidellm.backend import BackendType from guidellm.core import Serializable @@ -17,7 +18,7 @@ class Scenario(Serializable): backend_kwargs: Optional[Dict[str, Any]] = None model: Optional[str] = None tokenizer: Optional[str] = None - data: Union[str, Dict[str, Any]] = Field(default_factory=dict) + data: Union[str, Dict[str, Any]] = Field(default_factory=dict) # type: ignore[arg-type] data_type: Literal["emulated", "file", "transformers"] = "emulated" rate_type: ProfileGenerationMode = "sweep" rate: Optional[float] = None diff --git a/src/guidellm/utils/cli_params.py b/src/guidellm/utils/cli_params.py index f4a2c36..854e6a2 100644 --- a/src/guidellm/utils/cli_params.py +++ b/src/guidellm/utils/cli_params.py @@ -43,7 +43,7 @@ def __init__(self, *types: ParamType): self.types = types self.name = "".join(t.name for t in types) - def convert(self, value, param, ctx): + def convert(self, value, param, ctx) -> Any: fails = [] for t in self.types: try: From 047d37b9a24ad123048dbc7e6fc5d638304c6af6 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 19 Mar 2025 15:31:07 -0400 Subject: [PATCH 6/7] Allow sequence of rates --- src/guidellm/benchmark/scenario.py | 4 ++-- src/guidellm/main.py | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/guidellm/benchmark/scenario.py b/src/guidellm/benchmark/scenario.py index d28e8da..570c4c7 100644 --- a/src/guidellm/benchmark/scenario.py +++ b/src/guidellm/benchmark/scenario.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Any, Dict, Literal, Optional, Union +from typing import Any, Dict, Literal, Optional, Sequence, Union from pydantic import Field from typing_extensions import Self @@ -21,7 +21,7 @@ class Scenario(Serializable): data: Union[str, Dict[str, Any]] = Field(default_factory=dict) # type: ignore[arg-type] data_type: Literal["emulated", "file", "transformers"] = "emulated" rate_type: ProfileGenerationMode = "sweep" - rate: Optional[float] = None + rate: Optional[Union[float, Sequence[float]]] = None max_seconds: int = 120 max_requests: Optional[Union[int, Literal["dataset"]]] = None diff --git a/src/guidellm/main.py b/src/guidellm/main.py index ae1d5e3..5e539d0 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -1,5 +1,5 @@ import asyncio -from typing import IO, Any, Literal, Optional, Union, get_args +from typing import Literal, Optional, Sequence, TextIO, Union, get_args import click from loguru import logger @@ -165,14 +165,14 @@ ) def generate_benchmark_report_cli( target: str, - scenario: Optional[Union[IO[Any], str]], + scenario: Optional[Union[TextIO, str]], backend: Optional[BackendType], model: Optional[str], data: Optional[str], data_type: Optional[Literal["emulated", "file", "transformers"]], tokenizer: Optional[str], rate_type: Optional[ProfileGenerationMode], - rate: Optional[float], + rate: Optional[Union[float, Sequence[float]]], max_seconds: Optional[int], max_requests: Union[Literal["dataset"], int, None], output_path: Optional[str], @@ -184,9 +184,8 @@ def generate_benchmark_report_cli( if isinstance(scenario, str): defaults = SCENARIOS[scenario] - elif isinstance(scenario, IO): + elif isinstance(scenario, TextIO): defaults = Scenario.from_json(scenario.read()) - SCENARIOS["custom"] = defaults elif scenario is None: defaults = Scenario() else: From 614b28876f24485498a1895368b494cee5195cf1 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 19 Mar 2025 15:50:50 -0400 Subject: [PATCH 7/7] Use TextIOWrapper to typecheck scenario input TextIOWrapper should be of type TextIO but TextIO does not seem to work for typecheck. Just use the class as type argument of isinstance. --- src/guidellm/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/guidellm/main.py b/src/guidellm/main.py index 5e539d0..d27eb1f 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -1,4 +1,5 @@ import asyncio +from io import TextIOWrapper from typing import Literal, Optional, Sequence, TextIO, Union, get_args import click @@ -184,7 +185,7 @@ def generate_benchmark_report_cli( if isinstance(scenario, str): defaults = SCENARIOS[scenario] - elif isinstance(scenario, TextIO): + elif isinstance(scenario, TextIOWrapper): defaults = Scenario.from_json(scenario.read()) elif scenario is None: defaults = Scenario()