Skip to content

Benchmarking scenarios #99

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
64 changes: 64 additions & 0 deletions src/guidellm/benchmark/scenario.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from pathlib import Path
from typing import Any, Dict, Literal, Optional, Sequence, Union

from pydantic import Field
from typing_extensions import Self

from guidellm.backend import BackendType
from guidellm.core import Serializable
from guidellm.executor import ProfileGenerationMode

__ALL__ = ["Scenario", "ScenarioManager"]

scenarios_path = Path(__name__).parent / "scenarios"


class Scenario(Serializable):
backend: BackendType = "openai_http"
backend_kwargs: Optional[Dict[str, Any]] = None
model: Optional[str] = None
tokenizer: Optional[str] = None
data: Union[str, Dict[str, Any]] = Field(default_factory=dict) # type: ignore[arg-type]
data_type: Literal["emulated", "file", "transformers"] = "emulated"
rate_type: ProfileGenerationMode = "sweep"
rate: Optional[Union[float, Sequence[float]]] = None
max_seconds: int = 120
max_requests: Optional[Union[int, Literal["dataset"]]] = None

def _update(self, **fields: Any) -> Self:
for k, v in fields.items():
if not hasattr(self, k):
raise ValueError(f"Invalid field {k}")
setattr(self, k, v)

return self

def update(self, **fields: Any) -> Self:
return self._update(**{k: v for k, v in fields.items() if v is not None})


class ScenarioManager:
def __init__(self, scenarios_dir: Optional[str] = None):
self.scenarios: Dict[str, Scenario] = {}

if scenarios_dir is None:
global scenarios_path
else:
scenarios_path = Path(scenarios_dir)

# Load built-in scenarios
for scenario_path in scenarios_path.glob("*.json"):
scenario = Scenario.from_json(scenario_path.read_text())
self[scenario_path.stem] = scenario

def __getitem__(self, scenario_name: str) -> Scenario:
return self.scenarios[scenario_name]

def __setitem__(self, scenario_name: str, scenario: Scenario):
if scenario_name in self.scenarios:
raise ValueError(f"Scenario {scenario_name} already exists")

self.scenarios[scenario_name] = scenario

def list(self):
return tuple(self.scenarios.keys())
111 changes: 65 additions & 46 deletions src/guidellm/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import asyncio
from typing import Any, Literal, Mapping, Optional, Union, get_args
from io import TextIOWrapper
from typing import Literal, Optional, Sequence, TextIO, Union, get_args

import click
from loguru import logger
from transformers import AutoTokenizer # type: ignore[import-untyped]

from guidellm.backend import Backend, BackendType
from guidellm.benchmark.scenario import Scenario, ScenarioManager
from guidellm.core import GuidanceReport, TextGenerationBenchmarkReport
from guidellm.executor import Executor, ProfileGenerationMode
from guidellm.request import (
Expand All @@ -18,6 +20,8 @@

__all__ = ["generate_benchmark_report"]

# FIXME: Remove
SCENARIOS = ScenarioManager()

@click.command()
@click.option(
Expand All @@ -29,10 +33,18 @@
"Ex: 'http://localhost:8000'"
),
)
@click.option(
"--scenario",
type=cli_params.Union(click.File(mode="r"), click.Choice(SCENARIOS.list())),
default=None,
help=(
"TODO: A scenario or path to config"
),
)
@click.option(
"--backend",
type=click.Choice(get_args(BackendType)),
default="openai_http",
default=None,
help=(
"The backend to use for benchmarking. "
"The default is OpenAI Server enabling compatability with any server that "
Expand All @@ -51,7 +63,7 @@
@click.option(
"--data",
type=str,
required=True,
default=None,
help=(
"The data source to use for benchmarking. "
"Depending on the data-type, it should be a "
Expand All @@ -64,7 +76,7 @@
@click.option(
"--data-type",
type=click.Choice(["emulated", "file", "transformers"]),
required=True,
default=None,
help=(
"The type of data to use for benchmarking. "
"Use 'emulated' for synthetic data, 'file' for a file, or 'transformers' "
Expand All @@ -86,7 +98,7 @@
@click.option(
"--rate-type",
type=click.Choice(get_args(ProfileGenerationMode)),
default="sweep",
default=None,
help=(
"The type of request rate to use for benchmarking. "
"Use sweep to run a full range from synchronous to throughput (default), "
Expand All @@ -109,7 +121,7 @@
@click.option(
"--max-seconds",
type=int,
default=120,
default=None,
help=(
"The maximum number of seconds for each benchmark run. "
"Either max-seconds, max-requests, or both must be set. "
Expand Down Expand Up @@ -154,23 +166,34 @@
)
def generate_benchmark_report_cli(
target: str,
backend: BackendType,
scenario: Optional[Union[TextIO, str]],
backend: Optional[BackendType],
model: Optional[str],
data: Optional[str],
data_type: Literal["emulated", "file", "transformers"],
data_type: Optional[Literal["emulated", "file", "transformers"]],
tokenizer: Optional[str],
rate_type: ProfileGenerationMode,
rate: Optional[float],
rate_type: Optional[ProfileGenerationMode],
rate: Optional[Union[float, Sequence[float]]],
max_seconds: Optional[int],
max_requests: Union[Literal["dataset"], int, None],
output_path: str,
output_path: Optional[str],
enable_continuous_refresh: bool,
):
"""
Generate a benchmark report for a specified backend and dataset.
"""
generate_benchmark_report(
target=target,

if isinstance(scenario, str):
defaults = SCENARIOS[scenario]
elif isinstance(scenario, TextIOWrapper):
defaults = Scenario.from_json(scenario.read())
elif scenario is None:
defaults = Scenario()
else:
raise ValueError("Invalid scenario type")

# Update defaults with CLI args
defaults.update(
backend=backend,
model=model,
data=data,
Expand All @@ -179,24 +202,20 @@ def generate_benchmark_report_cli(
rate_type=rate_type,
rate=rate,
max_seconds=max_seconds,
max_requests=max_requests,
max_requests=max_requests
)

generate_benchmark_report(
target=target,
scenario=defaults,
output_path=output_path,
cont_refresh_table=enable_continuous_refresh,
)


def generate_benchmark_report(
target: str,
data: Optional[str],
data_type: Literal["emulated", "file", "transformers"],
backend: BackendType = "openai_http",
backend_kwargs: Optional[Mapping[str, Any]] = None,
model: Optional[str] = None,
tokenizer: Optional[str] = None,
rate_type: ProfileGenerationMode = "sweep",
rate: Optional[float] = None,
max_seconds: Optional[int] = 120,
max_requests: Union[Literal["dataset"], int, None] = None,
scenario: Scenario,
output_path: Optional[str] = None,
cont_refresh_table: bool = False,
) -> GuidanceReport:
Expand All @@ -223,22 +242,22 @@ def generate_benchmark_report(
:param backend_kwargs: Additional keyword arguments for the backend.
"""
logger.info(
"Generating benchmark report with target: {}, backend: {}", target, backend
"Generating benchmark report with target: {}, backend: {}", target, scenario.backend
)

# Create backend
backend_inst = Backend.create(
type_=backend,
type_=scenario.backend,
target=target,
model=model,
**(backend_kwargs or {}),
model=scenario.model,
**(scenario.backend_kwargs or {}),
)
backend_inst.validate()

request_generator: RequestGenerator

# Create tokenizer and request generator
tokenizer_inst = tokenizer
tokenizer_inst = scenario.tokenizer
if not tokenizer_inst:
try:
tokenizer_inst = AutoTokenizer.from_pretrained(backend_inst.model)
Expand All @@ -248,44 +267,44 @@ def generate_benchmark_report(
"--tokenizer must be provided for request generation"
) from err

if data_type == "emulated":
if scenario.data_type == "emulated":
request_generator = EmulatedRequestGenerator(
config=data, tokenizer=tokenizer_inst
config=scenario.data, tokenizer=tokenizer_inst
)
elif data_type == "file":
request_generator = FileRequestGenerator(path=data, tokenizer=tokenizer_inst)
elif data_type == "transformers":
elif scenario.data_type == "file":
request_generator = FileRequestGenerator(path=scenario.data, tokenizer=tokenizer_inst)
elif scenario.data_type == "transformers":
request_generator = TransformersDatasetRequestGenerator(
dataset=data, tokenizer=tokenizer_inst
dataset=scenario.data, tokenizer=tokenizer_inst
)
else:
raise ValueError(f"Unknown data type: {data_type}")
raise ValueError(f"Unknown data type: {scenario.data_type}")

if data_type == "emulated" and max_requests == "dataset":
if scenario.data_type == "emulated" and scenario.max_requests == "dataset":
raise ValueError("Cannot use 'dataset' for emulated data")

# Create executor
executor = Executor(
backend=backend_inst,
request_generator=request_generator,
mode=rate_type,
rate=rate if rate_type in ("constant", "poisson") else None,
mode=scenario.rate_type,
rate=scenario.rate if scenario.rate_type in ("constant", "poisson") else None,
max_number=(
len(request_generator) if max_requests == "dataset" else max_requests
len(request_generator) if scenario.max_requests == "dataset" else scenario.max_requests
),
max_duration=max_seconds,
max_duration=scenario.max_seconds,
)

# Run executor
logger.debug(
"Running executor with args: {}",
{
"backend": backend,
"backend": scenario.backend,
"request_generator": request_generator,
"mode": rate_type,
"rate": rate,
"max_number": max_requests,
"max_duration": max_seconds,
"mode": scenario.rate_type,
"rate": scenario.rate,
"max_number": scenario.max_requests,
"max_duration": scenario.max_seconds,
},
)
report = asyncio.run(_run_executor_for_result(executor))
Expand Down
39 changes: 38 additions & 1 deletion src/guidellm/utils/cli_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from typing import Any, Optional

from click import Context, Parameter, ParamType
from click import BadParameter, Context, Parameter, ParamType

__all__ = ["MAX_REQUESTS"]

Expand Down Expand Up @@ -32,3 +32,40 @@ def convert(


MAX_REQUESTS = MaxRequestsType()


class Union(ParamType):
"""
A custom click parameter type that allows for multiple types to be accepted.
"""

def __init__(self, *types: ParamType):
self.types = types
self.name = "".join(t.name for t in types)

def convert(self, value, param, ctx) -> Any:
fails = []
for t in self.types:
try:
return t.convert(value, param, ctx)
except BadParameter as e:
fails.append(str(e))
continue

self.fail("; ".join(fails) or f"Invalid value: {value}")


def get_metavar(self, param: Parameter) -> str:
def get_choices(t: ParamType) -> str:
meta = t.get_metavar(param)
return meta if meta is not None else t.name

# Get the choices for each type in the union.
choices_str = "|".join(map(get_choices, self.types))

# Use curly braces to indicate a required argument.
if param.required and param.param_type_name == "argument":
return f"{{{choices_str}}}"

# Use square braces to indicate an option or optional argument.
return f"[{choices_str}]"
Loading