Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/guidellm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,16 @@ def benchmark():
"If None, runs until max_seconds or data exhaustion."
),
)
@click.option(
"--min-requests",
type=int,
default=BenchmarkGenerativeTextArgs.get_default("min_requests"),
help=(
"Minimum requests per benchmark. "
"Unlike max_requests, requests will continue "
"to run until min_requests complete."
),
)
@click.option(
"--max-errors",
type=int,
Expand Down
4 changes: 4 additions & 0 deletions src/guidellm/benchmark/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ async def resolve_profile(
constraints: MutableMapping[str, ConstraintInitializer | Any],
max_seconds: int | float | None,
max_requests: int | None,
min_requests: int | None,
max_errors: int | None,
max_error_rate: float | None,
max_global_error_rate: float | None,
Expand All @@ -371,6 +372,7 @@ async def resolve_profile(
:param constraints: Dictionary of constraint initializers for benchmark limits
:param max_seconds: Maximum duration in seconds for the benchmark
:param max_requests: Maximum number of requests to process
:param min_requests: Minimum number of requests to process
:param max_errors: Maximum number of errors before stopping
:param max_error_rate: Maximum error rate threshold before stopping
:param max_global_error_rate: Maximum global error rate threshold before stopping
Expand All @@ -388,6 +390,7 @@ async def resolve_profile(
for key, val in {
"max_seconds": max_seconds,
"max_requests": max_requests,
"min_requests": min_requests,
"max_errors": max_errors,
"max_error_rate": max_error_rate,
"max_global_error_rate": max_global_error_rate,
Expand Down Expand Up @@ -531,6 +534,7 @@ async def benchmark_generative_text(
constraints=constraints,
max_seconds=args.max_seconds,
max_requests=args.max_requests,
min_requests=args.min_requests,
max_errors=args.max_errors,
max_error_rate=args.max_error_rate,
max_global_error_rate=args.max_global_error_rate,
Expand Down
3 changes: 3 additions & 0 deletions src/guidellm/benchmark/schemas/generative/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,9 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
max_requests: int | None = Field(
default=None, description="Maximum number of requests to execute"
)
min_requests: int | None = Field(
default=None, description="Minimum number of requests to execute"
)
max_errors: int | None = Field(
default=None, description="Maximum number of errors before stopping"
)
Expand Down
2 changes: 2 additions & 0 deletions src/guidellm/scheduler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
MaxErrorsConstraint,
MaxGlobalErrorRateConstraint,
MaxNumberConstraint,
MinNumberConstraint,
OverSaturationConstraint,
OverSaturationConstraintInitializer,
PydanticConstraintInitializer,
Expand Down Expand Up @@ -72,6 +73,7 @@
"MaxErrorsConstraint",
"MaxGlobalErrorRateConstraint",
"MaxNumberConstraint",
"MinNumberConstraint",
"NonDistributedEnvironment",
"OverSaturationConstraint",
"OverSaturationConstraintInitializer",
Expand Down
2 changes: 2 additions & 0 deletions src/guidellm/scheduler/constraints/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from .request import (
MaxDurationConstraint,
MaxNumberConstraint,
MinNumberConstraint,
RequestsExhaustedConstraint,
)
from .saturation import (
Expand All @@ -40,6 +41,7 @@
"MaxErrorsConstraint",
"MaxGlobalErrorRateConstraint",
"MaxNumberConstraint",
"MinNumberConstraint",
"OverSaturationConstraint",
"OverSaturationConstraintInitializer",
"PydanticConstraintInitializer",
Expand Down
111 changes: 111 additions & 0 deletions src/guidellm/scheduler/constraints/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,117 @@ def _validate_max_duration(
return value[0] if isinstance(value, list) and len(value) == 1 else value


@ConstraintsInitializerFactory.register( # type: ignore[arg-type]
["min_number", "min_num", "min_requests", "min_req"]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may make sense to instead rename this to max-processed. I think this would be less confusing. But I can see the argument for min, since it's going to keep scheduling past that until the max-processed is reached. So I'm not sure what should be done.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah... max-processed is both a little too vague and also incorrect since we can end up processing more requests then set. I think min is fine actually. I'll just add some notes to the docs that clarify constraints are OR not AND. Maybe in the future we can support AND constraint combinations.

)
class MinNumberConstraint(PydanticConstraintInitializer):
"""
Constraint that limits execution based on minimum request counts.

Like MinNumberConstraint but instead of stopping request generation after reaching
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mistake: It should say "Like MaxNumberConstraint"

I think this wording doesn't emphasize the nuances of this implementation enough. Maybe clarify generation and processing, and why this may be helpful. It's identical except that it doesn't stop queueing until the max processed quantity is reached.

a minimum, it ends the benchmark after completing a minimum number of requests.
"""

type_: Literal["min_number"] = "min_number" # type: ignore[assignment]
min_num: int | float | list[int | float] = Field(
description="Minimum number of requests allowed before triggering constraint",
)
current_index: int = Field(
default=-1, description="Current index for list-based min_num values"
)

@classmethod
def validated_kwargs(
cls, min_num: int | float | list[int | float], **kwargs
) -> dict[str, Any]:
"""
Validate and process arguments for MinNumberConstraint creation.

:param min_num: Minimum number of requests to allow
:param kwargs: Supports min_num, min_number, min_requests, min_req,
and optional type_
:return: Validated dictionary with min_num and type_ fields
"""
aliases = ["min_number", "min_num", "min_requests", "min_req"]
for alias in aliases:
if min_num is None:
min_num = kwargs.get(alias)

return {"min_num": min_num, "current_index": kwargs.get("current_index", -1)}

def create_constraint(self, **_kwargs) -> Constraint:
"""
Return self as the constraint instance.

:param kwargs: Additional keyword arguments (unused)
:return: Self instance as the constraint
"""
self.current_index += 1

return cast("Constraint", self.model_copy())

def __call__(
self, state: SchedulerState, request_info: RequestInfo
) -> SchedulerUpdateAction:
"""
Evaluate constraint against current scheduler state and request count.

:param state: Current scheduler state with request counts
:param request_info: Individual request information (unused)
:return: Action indicating whether to continue or stop operations
"""
_ = request_info # Unused parameters
current_index = max(0, self.current_index)
min_num = (
self.min_num
if isinstance(self.min_num, int | float)
else self.min_num[max(current_index, len(self.min_num) - 1)]
)

processed_exceeded = state.processed_requests >= min_num
remaining_requests = min(max(0, min_num - state.processed_requests), min_num)
stop_time = (
None if remaining_requests > 0 else request_info.completed_at or time.time()
)

return SchedulerUpdateAction(
request_queuing="stop" if processed_exceeded else "continue",
request_processing="stop_local" if processed_exceeded else "continue",
metadata={
"min_number": min_num,
"processed_exceeded": processed_exceeded,
"created_requests": state.created_requests,
"processed_requests": state.processed_requests,
"remaining_requests": remaining_requests,
"stop_time": stop_time,
},
progress=SchedulerProgress(
remaining_requests=remaining_requests,
total_requests=min_num,
stop_time=stop_time,
),
)

@field_validator("min_num")
@classmethod
def _validate_min_num(
cls, value: int | float | list[int | float]
) -> int | float | list[int | float]:
if not isinstance(value, list):
value = [value]
for val in value:
if not val:
raise ValueError(
f"min_num must be set and truthful, received {value} ({val} failed)"
)
if not isinstance(val, int | float) or val <= 0:
raise ValueError(
f"min_num must be a positive num, received {value} ({val} failed)"
)

return value[0] if isinstance(value, list) and len(value) == 1 else value


class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin):
type_: Literal["requests_exhausted"] = "requests_exhausted" # type: ignore[assignment]
num_requests: int
Expand Down
1 change: 1 addition & 0 deletions tests/unit/scheduler/constraints/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests for scheduler constraints."""
Loading
Loading