neuralmagic · sjmonson · Mar 14, 2025 · Mar 19, 2025 · Mar 19, 2025 · Mar 19, 2025
diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py
diff --git a/src/guidellm/benchmark/scenario.py b/src/guidellm/benchmark/scenario.py
@@ -0,0 +1,64 @@
+from pathlib import Path
+from typing import Any, Dict, Literal, Optional, Sequence, Union
+
+from pydantic import Field
+from typing_extensions import Self
+
+from guidellm.backend import BackendType
+from guidellm.core import Serializable
+from guidellm.executor import ProfileGenerationMode
+
+__ALL__ = ["Scenario", "ScenarioManager"]
+
+scenarios_path = Path(__name__).parent / "scenarios"
+
+
+class Scenario(Serializable):
+    backend: BackendType = "openai_http"
+    backend_kwargs: Optional[Dict[str, Any]] = None
+    model: Optional[str] = None
+    tokenizer: Optional[str] = None
+    data: Union[str, Dict[str, Any]] = Field(default_factory=dict)  # type: ignore[arg-type]
+    data_type: Literal["emulated", "file", "transformers"] = "emulated"
+    rate_type: ProfileGenerationMode = "sweep"
+    rate: Optional[Union[float, Sequence[float]]] = None
+    max_seconds: int = 120
+    max_requests: Optional[Union[int, Literal["dataset"]]] = None
+
+    def _update(self, **fields: Any) -> Self:
+        for k, v in fields.items():
+            if not hasattr(self, k):
+                raise ValueError(f"Invalid field {k}")
+            setattr(self, k, v)
+
+        return self
+
+    def update(self, **fields: Any) -> Self:
+        return self._update(**{k: v for k, v in fields.items() if v is not None})
+
+
+class ScenarioManager:
+    def __init__(self, scenarios_dir: Optional[str] = None):
+        self.scenarios: Dict[str, Scenario] = {}
+
+        if scenarios_dir is None:
+            global scenarios_path
+        else:
+            scenarios_path = Path(scenarios_dir)
+
+        # Load built-in scenarios
+        for scenario_path in scenarios_path.glob("*.json"):
+            scenario = Scenario.from_json(scenario_path.read_text())
+            self[scenario_path.stem] = scenario
+
+    def __getitem__(self, scenario_name: str) -> Scenario:
+        return self.scenarios[scenario_name]
+
+    def __setitem__(self, scenario_name: str, scenario: Scenario):
+        if scenario_name in self.scenarios:
+            raise ValueError(f"Scenario {scenario_name} already exists")
+
+        self.scenarios[scenario_name] = scenario
+
+    def list(self):
+        return tuple(self.scenarios.keys())
diff --git a/src/guidellm/main.py b/src/guidellm/main.py
@@ -1,11 +1,13 @@
 import asyncio
-from typing import Any, Literal, Mapping, Optional, Union, get_args
+from io import TextIOWrapper
+from typing import Literal, Optional, Sequence, TextIO, Union, get_args
 
 import click
 from loguru import logger
 from transformers import AutoTokenizer  # type: ignore[import-untyped]
 
 from guidellm.backend import Backend, BackendType
+from guidellm.benchmark.scenario import Scenario, ScenarioManager
 from guidellm.core import GuidanceReport, TextGenerationBenchmarkReport
 from guidellm.executor import Executor, ProfileGenerationMode
 from guidellm.request import (
@@ -18,6 +20,8 @@
 
 __all__ = ["generate_benchmark_report"]
 
+# FIXME: Remove
+SCENARIOS = ScenarioManager()
 
 @click.command()
 @click.option(
@@ -29,10 +33,18 @@
         "Ex: 'http://localhost:8000'"
     ),
 )
+@click.option(
+    "--scenario",
+    type=cli_params.Union(click.File(mode="r"), click.Choice(SCENARIOS.list())),
+    default=None,
+    help=(
+        "TODO: A scenario or path to config"
+    ),
+)
 @click.option(
     "--backend",
     type=click.Choice(get_args(BackendType)),
-    default="openai_http",
+    default=None,
     help=(
         "The backend to use for benchmarking. "
         "The default is OpenAI Server enabling compatability with any server that "
@@ -51,7 +63,7 @@
 @click.option(
     "--data",
     type=str,
-    required=True,
+    default=None,
     help=(
         "The data source to use for benchmarking. "
         "Depending on the data-type, it should be a "
@@ -64,7 +76,7 @@
 @click.option(
     "--data-type",
     type=click.Choice(["emulated", "file", "transformers"]),
-    required=True,
+    default=None,
     help=(
         "The type of data to use for benchmarking. "
         "Use 'emulated' for synthetic data, 'file' for a file, or 'transformers' "
@@ -86,7 +98,7 @@
 @click.option(
     "--rate-type",
     type=click.Choice(get_args(ProfileGenerationMode)),
-    default="sweep",
+    default=None,
     help=(
         "The type of request rate to use for benchmarking. "
         "Use sweep to run a full range from synchronous to throughput (default), "
@@ -109,7 +121,7 @@
 @click.option(
     "--max-seconds",
     type=int,
-    default=120,
+    default=None,
     help=(
         "The maximum number of seconds for each benchmark run. "
         "Either max-seconds, max-requests, or both must be set. "
@@ -154,23 +166,34 @@
 )
 def generate_benchmark_report_cli(
     target: str,
-    backend: BackendType,
+    scenario: Optional[Union[TextIO, str]],
+    backend: Optional[BackendType],
     model: Optional[str],
     data: Optional[str],
-    data_type: Literal["emulated", "file", "transformers"],
+    data_type: Optional[Literal["emulated", "file", "transformers"]],
     tokenizer: Optional[str],
-    rate_type: ProfileGenerationMode,
-    rate: Optional[float],
+    rate_type: Optional[ProfileGenerationMode],
+    rate: Optional[Union[float, Sequence[float]]],
     max_seconds: Optional[int],
     max_requests: Union[Literal["dataset"], int, None],
-    output_path: str,
+    output_path: Optional[str],
     enable_continuous_refresh: bool,
 ):
     """
     Generate a benchmark report for a specified backend and dataset.
     """
-    generate_benchmark_report(
-        target=target,
+
+    if isinstance(scenario, str):
+        defaults = SCENARIOS[scenario]
+    elif isinstance(scenario, TextIOWrapper):
+        defaults = Scenario.from_json(scenario.read())
+    elif scenario is None:
+        defaults = Scenario()
+    else:
+        raise ValueError("Invalid scenario type")
+
+    # Update defaults with CLI args
+    defaults.update(
         backend=backend,
         model=model,
         data=data,
@@ -179,24 +202,20 @@ def generate_benchmark_report_cli(
         rate_type=rate_type,
         rate=rate,
         max_seconds=max_seconds,
-        max_requests=max_requests,
+        max_requests=max_requests
+    )
+
+    generate_benchmark_report(
+        target=target,
+        scenario=defaults,
         output_path=output_path,
         cont_refresh_table=enable_continuous_refresh,
     )
 
 
 def generate_benchmark_report(
     target: str,
-    data: Optional[str],
-    data_type: Literal["emulated", "file", "transformers"],
-    backend: BackendType = "openai_http",
-    backend_kwargs: Optional[Mapping[str, Any]] = None,
-    model: Optional[str] = None,
-    tokenizer: Optional[str] = None,
-    rate_type: ProfileGenerationMode = "sweep",
-    rate: Optional[float] = None,
-    max_seconds: Optional[int] = 120,
-    max_requests: Union[Literal["dataset"], int, None] = None,
+    scenario: Scenario,
     output_path: Optional[str] = None,
     cont_refresh_table: bool = False,
 ) -> GuidanceReport:
@@ -223,22 +242,22 @@ def generate_benchmark_report(
     :param backend_kwargs: Additional keyword arguments for the backend.
     """
     logger.info(
-        "Generating benchmark report with target: {}, backend: {}", target, backend
+        "Generating benchmark report with target: {}, backend: {}", target, scenario.backend
     )
 
     # Create backend
     backend_inst = Backend.create(
-        type_=backend,
+        type_=scenario.backend,
         target=target,
-        model=model,
-        **(backend_kwargs or {}),
+        model=scenario.model,
+        **(scenario.backend_kwargs or {}),
     )
     backend_inst.validate()
 
     request_generator: RequestGenerator
 
     # Create tokenizer and request generator
-    tokenizer_inst = tokenizer
+    tokenizer_inst = scenario.tokenizer
     if not tokenizer_inst:
         try:
             tokenizer_inst = AutoTokenizer.from_pretrained(backend_inst.model)
@@ -248,44 +267,44 @@ def generate_benchmark_report(
                 "--tokenizer must be provided for request generation"
             ) from err
 
-    if data_type == "emulated":
+    if scenario.data_type == "emulated":
         request_generator = EmulatedRequestGenerator(
-            config=data, tokenizer=tokenizer_inst
+            config=scenario.data, tokenizer=tokenizer_inst
         )
-    elif data_type == "file":
-        request_generator = FileRequestGenerator(path=data, tokenizer=tokenizer_inst)
-    elif data_type == "transformers":
+    elif scenario.data_type == "file":
+        request_generator = FileRequestGenerator(path=scenario.data, tokenizer=tokenizer_inst)
+    elif scenario.data_type == "transformers":
         request_generator = TransformersDatasetRequestGenerator(
-            dataset=data, tokenizer=tokenizer_inst
+            dataset=scenario.data, tokenizer=tokenizer_inst
         )
     else:
-        raise ValueError(f"Unknown data type: {data_type}")
+        raise ValueError(f"Unknown data type: {scenario.data_type}")
 
-    if data_type == "emulated" and max_requests == "dataset":
+    if scenario.data_type == "emulated" and scenario.max_requests == "dataset":
         raise ValueError("Cannot use 'dataset' for emulated data")
 
     # Create executor
     executor = Executor(
         backend=backend_inst,
         request_generator=request_generator,
-        mode=rate_type,
-        rate=rate if rate_type in ("constant", "poisson") else None,
+        mode=scenario.rate_type,
+        rate=scenario.rate if scenario.rate_type in ("constant", "poisson") else None,
         max_number=(
-            len(request_generator) if max_requests == "dataset" else max_requests
+            len(request_generator) if scenario.max_requests == "dataset" else scenario.max_requests
         ),
-        max_duration=max_seconds,
+        max_duration=scenario.max_seconds,
     )
 
     # Run executor
     logger.debug(
         "Running executor with args: {}",
         {
-            "backend": backend,
+            "backend": scenario.backend,
             "request_generator": request_generator,
-            "mode": rate_type,
-            "rate": rate,
-            "max_number": max_requests,
-            "max_duration": max_seconds,
+            "mode": scenario.rate_type,
+            "rate": scenario.rate,
+            "max_number": scenario.max_requests,
+            "max_duration": scenario.max_seconds,
         },
     )
     report = asyncio.run(_run_executor_for_result(executor))

diff --git a/src/guidellm/utils/cli_params.py b/src/guidellm/utils/cli_params.py
@@ -4,7 +4,7 @@
 
 from typing import Any, Optional
 
-from click import Context, Parameter, ParamType
+from click import BadParameter, Context, Parameter, ParamType
 
 __all__ = ["MAX_REQUESTS"]
 
@@ -32,3 +32,40 @@ def convert(
 
 
 MAX_REQUESTS = MaxRequestsType()
+
+
+class Union(ParamType):
+    """
+    A custom click parameter type that allows for multiple types to be accepted.
+    """
+
+    def __init__(self, *types: ParamType):
+        self.types = types
+        self.name = "".join(t.name for t in types)
+
+    def convert(self, value, param, ctx) -> Any:
+        fails = []
+        for t in self.types:
+            try:
+                return t.convert(value, param, ctx)
+            except BadParameter as e:
+                fails.append(str(e))
+                continue
+
+        self.fail("; ".join(fails) or f"Invalid value: {value}")
+
+
+    def get_metavar(self, param: Parameter) -> str:
+        def get_choices(t: ParamType) -> str:
+            meta = t.get_metavar(param)
+            return meta if meta is not None else t.name
+
+        # Get the choices for each type in the union.
+        choices_str = "|".join(map(get_choices, self.types))
+
+        # Use curly braces to indicate a required argument.
+        if param.required and param.param_type_name == "argument":
+            return f"{{{choices_str}}}"
+
+        # Use square braces to indicate an option or optional argument.
+        return f"[{choices_str}]"