Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ audio = [
# Torchcodec needs specific torch version
"torch==2.10.*",
"torchcodec==0.10.*",
"websockets>=13.0",
]
vision = [
"datasets[vision]",
Expand Down
3 changes: 2 additions & 1 deletion src/guidellm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,8 @@ def benchmark():
"template per vLLM docs. Default: default-template"
"For openai backend: http endpoint path (/v1/chat/completions, "
"/v1/completions, /v1/audio/transcriptions, /v1/audio/translations) or "
"alias (e.g. chat_completions); default /v1/chat/completions."
"alias (e.g. chat_completions); default /v1/chat/completions. "
"For openai_websocket: WebSocket URL path such as /v1/realtime (default)."
),
)
@click.option(
Expand Down
4 changes: 4 additions & 0 deletions src/guidellm/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
OpenAIHTTPBackend,
OpenAIRequestHandler,
OpenAIRequestHandlerFactory,
OpenAIWebSocketBackend,
OpenAIWebSocketBackendArgs,
TextCompletionsRequestHandler,
)

Expand All @@ -37,6 +39,8 @@
"OpenAIHTTPBackend",
"OpenAIRequestHandler",
"OpenAIRequestHandlerFactory",
"OpenAIWebSocketBackend",
"OpenAIWebSocketBackendArgs",
"TextCompletionsRequestHandler",
]

Expand Down
6 changes: 5 additions & 1 deletion src/guidellm/backends/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@
]


BackendType = Literal["openai_http", "vllm_python"]
BackendType = Literal[
"openai_http",
"openai_websocket",
"vllm_python",
]


class BackendArgs(BaseModel):
Expand Down
3 changes: 3 additions & 0 deletions src/guidellm/backends/openai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
ResponsesRequestHandler,
TextCompletionsRequestHandler,
)
from .websocket import OpenAIWebSocketBackend, OpenAIWebSocketBackendArgs

__all__ = [
"AudioRequestHandler",
"ChatCompletionsRequestHandler",
"OpenAIHTTPBackend",
"OpenAIRequestHandler",
"OpenAIRequestHandlerFactory",
"OpenAIWebSocketBackend",
"OpenAIWebSocketBackendArgs",
"ResponsesRequestHandler",
"TextCompletionsRequestHandler",
]
79 changes: 79 additions & 0 deletions src/guidellm/backends/openai/common.py
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename to common.py

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing __all__.

Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Shared helpers for OpenAI-compatible HTTP and WebSocket backends."""

from __future__ import annotations

from typing import Any

__all__ = [
"FALLBACK_TIMEOUT",
"build_headers",
"resolve_validate_kwargs",
]

# NOTE: This value is taken from httpx's default
FALLBACK_TIMEOUT = 5.0


def build_headers(
api_key: str | None,
existing_headers: dict[str, str] | None = None,
) -> dict[str, str] | None:
"""
Build headers with bearer authentication for OpenAI-compatible requests.

Merges the Authorization bearer token (if ``api_key`` is set) with any
existing headers. User-provided headers take precedence over the bearer token.

:param api_key: Optional API key for Bearer authentication
:param existing_headers: Optional headers to merge in
:return: Headers dict, or ``None`` if there are no headers to send
"""
headers: dict[str, str] = {}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
if existing_headers:
headers = {**headers, **existing_headers}
return headers or None


def resolve_validate_kwargs(
validate_backend: bool | str | dict[str, Any],
target: str,
api_routes: dict[str, str],
) -> dict[str, Any] | None:
"""
Build ``httpx`` request keyword arguments from backend validation settings.

``validate_backend`` may be ``False``/equivalent (skip validation), ``True``
(default ``GET`` against the ``/health`` route key), a route key present in
``api_routes`` (resolved to ``{target}/{path}``), a full URL string, or a
``dict`` that includes ``url`` and optionally ``method`` (default ``GET``).

:return: Keyword arguments suitable for ``httpx.AsyncClient.request``, or
``None`` when validation is turned off.
"""
raw = validate_backend
if not raw:
return None

if raw is True:
raw = "/health"

if isinstance(raw, str):
url = f"{target}/{api_routes[raw]}" if raw in api_routes else raw
request_kwargs: dict[str, Any] = {"method": "GET", "url": url}
elif isinstance(raw, dict):
request_kwargs = raw
else:
request_kwargs = raw

if not isinstance(request_kwargs, dict) or "url" not in request_kwargs:
raise ValueError(
"validate_backend must be a boolean, string, or dictionary and contain "
f"a target URL. Got: {request_kwargs}"
)

if "method" not in request_kwargs:
request_kwargs["method"] = "GET"

return request_kwargs
64 changes: 10 additions & 54 deletions src/guidellm/backends/openai/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
from pydantic import Field, field_validator

from guidellm.backends.backend import Backend, BackendArgs
from guidellm.backends.openai.common import (
FALLBACK_TIMEOUT,
build_headers,
resolve_validate_kwargs,
)
from guidellm.backends.openai.request_handlers import OpenAIRequestHandlerFactory
from guidellm.schemas import (
GenerationRequest,
Expand Down Expand Up @@ -123,9 +128,6 @@ def validate_request_format(cls, v: str | None) -> str | None:
"audio_translations": "/v1/audio/translations",
}

# NOTE: This value is taken from httpx's default
FALLBACK_TIMEOUT = 5.0


@Backend.register("openai_http")
class OpenAIHTTPBackend(Backend):
Expand Down Expand Up @@ -230,8 +232,10 @@ def __init__(
self.http2 = http2
self.follow_redirects = follow_redirects
self.verify = verify
self.validate_backend: dict[str, Any] | None = self._resolve_validate_kwargs(
validate_backend
self.validate_backend: dict[str, Any] | None = resolve_validate_kwargs(
validate_backend,
self.target,
self.api_routes,
)
self.stream: bool = stream
self.extras = (
Expand Down Expand Up @@ -501,52 +505,4 @@ async def _aiter_lines(self, stream: httpx.Response) -> AsyncIterator[str]:
def _build_headers(
self, existing_headers: dict[str, str] | None = None
) -> dict[str, str] | None:
"""
Build headers dictionary with bearer token authentication.

Merges the Authorization bearer token header (if api_key is set) with any
existing headers. User-provided headers take precedence over the bearer token.

:param existing_headers: Optional existing headers to merge with
:return: Dictionary of headers with bearer token included if api_key is set
"""
headers: dict[str, str] = {}

# Add bearer token if api_key is set
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"

# Merge with existing headers (user headers take precedence)
if existing_headers:
headers = {**headers, **existing_headers}

return headers or None

def _resolve_validate_kwargs(
self, validate_backend: bool | str | dict[str, Any]
) -> dict[str, Any] | None:
if not (validate_kwargs := validate_backend):
return None

if validate_kwargs is True:
validate_kwargs = "/health"

if isinstance(validate_kwargs, str) and validate_kwargs in self.api_routes:
validate_kwargs = f"{self.target}/{self.api_routes[validate_kwargs]}"

if isinstance(validate_kwargs, str):
validate_kwargs = {
"method": "GET",
"url": validate_kwargs,
}

if not isinstance(validate_kwargs, dict) or "url" not in validate_kwargs:
raise ValueError(
"validate_backend must be a boolean, string, or dictionary and contain "
f"a target URL. Got: {validate_kwargs}"
)

if "method" not in validate_kwargs:
validate_kwargs["method"] = "GET"

return validate_kwargs
return build_headers(self.api_key, existing_headers)
Loading
Loading