diff --git a/pyproject.toml b/pyproject.toml index 085f49489..420b4cf0b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,6 +86,7 @@ audio = [ # Torchcodec needs specific torch version "torch==2.10.*", "torchcodec==0.10.*", + "websockets>=13.0", ] vision = [ "datasets[vision]", diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 02ad23bb8..258fa844f 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -166,7 +166,8 @@ def benchmark(): "template per vLLM docs. Default: default-template" "For openai backend: http endpoint path (/v1/chat/completions, " "/v1/completions, /v1/audio/transcriptions, /v1/audio/translations) or " - "alias (e.g. chat_completions); default /v1/chat/completions." + "alias (e.g. chat_completions); default /v1/chat/completions. " + "For openai_websocket: WebSocket URL path such as /v1/realtime (default)." ), ) @click.option( diff --git a/src/guidellm/backends/__init__.py b/src/guidellm/backends/__init__.py index 52ba6ecb3..1c7e48b92 100644 --- a/src/guidellm/backends/__init__.py +++ b/src/guidellm/backends/__init__.py @@ -18,6 +18,8 @@ OpenAIHTTPBackend, OpenAIRequestHandler, OpenAIRequestHandlerFactory, + OpenAIWebSocketBackend, + OpenAIWebSocketBackendArgs, TextCompletionsRequestHandler, ) @@ -37,6 +39,8 @@ "OpenAIHTTPBackend", "OpenAIRequestHandler", "OpenAIRequestHandlerFactory", + "OpenAIWebSocketBackend", + "OpenAIWebSocketBackendArgs", "TextCompletionsRequestHandler", ] diff --git a/src/guidellm/backends/backend.py b/src/guidellm/backends/backend.py index 88c3617cb..4f83afd4c 100644 --- a/src/guidellm/backends/backend.py +++ b/src/guidellm/backends/backend.py @@ -24,7 +24,11 @@ ] -BackendType = Literal["openai_http", "vllm_python"] +BackendType = Literal[ + "openai_http", + "openai_websocket", + "vllm_python", +] class BackendArgs(BaseModel): diff --git a/src/guidellm/backends/openai/__init__.py b/src/guidellm/backends/openai/__init__.py index fd76e80df..30c2211e9 100644 --- a/src/guidellm/backends/openai/__init__.py +++ b/src/guidellm/backends/openai/__init__.py @@ -7,6 +7,7 @@ ResponsesRequestHandler, TextCompletionsRequestHandler, ) +from .websocket import OpenAIWebSocketBackend, OpenAIWebSocketBackendArgs __all__ = [ "AudioRequestHandler", @@ -14,6 +15,8 @@ "OpenAIHTTPBackend", "OpenAIRequestHandler", "OpenAIRequestHandlerFactory", + "OpenAIWebSocketBackend", + "OpenAIWebSocketBackendArgs", "ResponsesRequestHandler", "TextCompletionsRequestHandler", ] diff --git a/src/guidellm/backends/openai/common.py b/src/guidellm/backends/openai/common.py new file mode 100644 index 000000000..18e422caf --- /dev/null +++ b/src/guidellm/backends/openai/common.py @@ -0,0 +1,79 @@ +"""Shared helpers for OpenAI-compatible HTTP and WebSocket backends.""" + +from __future__ import annotations + +from typing import Any + +__all__ = [ + "FALLBACK_TIMEOUT", + "build_headers", + "resolve_validate_kwargs", +] + +# NOTE: This value is taken from httpx's default +FALLBACK_TIMEOUT = 5.0 + + +def build_headers( + api_key: str | None, + existing_headers: dict[str, str] | None = None, +) -> dict[str, str] | None: + """ + Build headers with bearer authentication for OpenAI-compatible requests. + + Merges the Authorization bearer token (if ``api_key`` is set) with any + existing headers. User-provided headers take precedence over the bearer token. + + :param api_key: Optional API key for Bearer authentication + :param existing_headers: Optional headers to merge in + :return: Headers dict, or ``None`` if there are no headers to send + """ + headers: dict[str, str] = {} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + if existing_headers: + headers = {**headers, **existing_headers} + return headers or None + + +def resolve_validate_kwargs( + validate_backend: bool | str | dict[str, Any], + target: str, + api_routes: dict[str, str], +) -> dict[str, Any] | None: + """ + Build ``httpx`` request keyword arguments from backend validation settings. + + ``validate_backend`` may be ``False``/equivalent (skip validation), ``True`` + (default ``GET`` against the ``/health`` route key), a route key present in + ``api_routes`` (resolved to ``{target}/{path}``), a full URL string, or a + ``dict`` that includes ``url`` and optionally ``method`` (default ``GET``). + + :return: Keyword arguments suitable for ``httpx.AsyncClient.request``, or + ``None`` when validation is turned off. + """ + raw = validate_backend + if not raw: + return None + + if raw is True: + raw = "/health" + + if isinstance(raw, str): + url = f"{target}/{api_routes[raw]}" if raw in api_routes else raw + request_kwargs: dict[str, Any] = {"method": "GET", "url": url} + elif isinstance(raw, dict): + request_kwargs = raw + else: + request_kwargs = raw + + if not isinstance(request_kwargs, dict) or "url" not in request_kwargs: + raise ValueError( + "validate_backend must be a boolean, string, or dictionary and contain " + f"a target URL. Got: {request_kwargs}" + ) + + if "method" not in request_kwargs: + request_kwargs["method"] = "GET" + + return request_kwargs diff --git a/src/guidellm/backends/openai/http.py b/src/guidellm/backends/openai/http.py index 527bb53aa..67cd19c6c 100644 --- a/src/guidellm/backends/openai/http.py +++ b/src/guidellm/backends/openai/http.py @@ -19,6 +19,11 @@ from pydantic import Field, field_validator from guidellm.backends.backend import Backend, BackendArgs +from guidellm.backends.openai.common import ( + FALLBACK_TIMEOUT, + build_headers, + resolve_validate_kwargs, +) from guidellm.backends.openai.request_handlers import OpenAIRequestHandlerFactory from guidellm.schemas import ( GenerationRequest, @@ -123,9 +128,6 @@ def validate_request_format(cls, v: str | None) -> str | None: "audio_translations": "/v1/audio/translations", } -# NOTE: This value is taken from httpx's default -FALLBACK_TIMEOUT = 5.0 - @Backend.register("openai_http") class OpenAIHTTPBackend(Backend): @@ -230,8 +232,10 @@ def __init__( self.http2 = http2 self.follow_redirects = follow_redirects self.verify = verify - self.validate_backend: dict[str, Any] | None = self._resolve_validate_kwargs( - validate_backend + self.validate_backend: dict[str, Any] | None = resolve_validate_kwargs( + validate_backend, + self.target, + self.api_routes, ) self.stream: bool = stream self.extras = ( @@ -501,52 +505,4 @@ async def _aiter_lines(self, stream: httpx.Response) -> AsyncIterator[str]: def _build_headers( self, existing_headers: dict[str, str] | None = None ) -> dict[str, str] | None: - """ - Build headers dictionary with bearer token authentication. - - Merges the Authorization bearer token header (if api_key is set) with any - existing headers. User-provided headers take precedence over the bearer token. - - :param existing_headers: Optional existing headers to merge with - :return: Dictionary of headers with bearer token included if api_key is set - """ - headers: dict[str, str] = {} - - # Add bearer token if api_key is set - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" - - # Merge with existing headers (user headers take precedence) - if existing_headers: - headers = {**headers, **existing_headers} - - return headers or None - - def _resolve_validate_kwargs( - self, validate_backend: bool | str | dict[str, Any] - ) -> dict[str, Any] | None: - if not (validate_kwargs := validate_backend): - return None - - if validate_kwargs is True: - validate_kwargs = "/health" - - if isinstance(validate_kwargs, str) and validate_kwargs in self.api_routes: - validate_kwargs = f"{self.target}/{self.api_routes[validate_kwargs]}" - - if isinstance(validate_kwargs, str): - validate_kwargs = { - "method": "GET", - "url": validate_kwargs, - } - - if not isinstance(validate_kwargs, dict) or "url" not in validate_kwargs: - raise ValueError( - "validate_backend must be a boolean, string, or dictionary and contain " - f"a target URL. Got: {validate_kwargs}" - ) - - if "method" not in validate_kwargs: - validate_kwargs["method"] = "GET" - - return validate_kwargs + return build_headers(self.api_key, existing_headers) diff --git a/src/guidellm/backends/openai/websocket.py b/src/guidellm/backends/openai/websocket.py new file mode 100644 index 000000000..78dfc566d --- /dev/null +++ b/src/guidellm/backends/openai/websocket.py @@ -0,0 +1,615 @@ +""" +WebSocket backend for vLLM-compatible realtime audio transcription. + +Implements the JSON event protocol used by vLLM's ``/v1/realtime`` endpoint: +``session.created`` → ``session.update`` → ``input_audio_buffer.append`` → +``input_audio_buffer.commit`` (``final: false`` starts transcription, then +``final: true`` ends the audio stream) → ``transcription.delta`` / +``transcription.done``. +""" + +from __future__ import annotations + +import asyncio +import json +import ssl +import time +from collections.abc import AsyncIterator +from typing import TYPE_CHECKING, Any +from urllib.parse import ParseResult, urlparse + +import httpx +from pydantic import Field, field_validator + +if TYPE_CHECKING: + from websockets.asyncio.client import ClientConnection + +from guidellm.backends.backend import Backend, BackendArgs +from guidellm.backends.openai.common import ( + FALLBACK_TIMEOUT, + build_headers, + resolve_validate_kwargs, +) +from guidellm.backends.openai.request_handlers import AudioRequestHandler +from guidellm.schemas import ( + GenerationRequest, + GenerationRequestArguments, + GenerationResponse, + RequestInfo, +) + +__all__ = [ + "OpenAIWebSocketBackend", + "OpenAIWebSocketBackendArgs", +] + +_WS_API_ROUTES = { + "/health": "health", + "/v1/models": "v1/models", +} + +# Default WebSocket HTTP path under target (CLI: --request-format / --request-type). +_DEFAULT_WS_REQUEST_FORMAT = "/v1/realtime" +_WS_REQUEST_FORMAT_ALIASES: dict[str, str] = { + "realtime": _DEFAULT_WS_REQUEST_FORMAT, +} + + +def _effective_websocket_http_path(request_format: str | None) -> str: + """Normalize ``request_format`` to a WebSocket path (``/…`` segment on the host).""" + if request_format is None: + return _DEFAULT_WS_REQUEST_FORMAT + s = request_format.strip() + if not s: + raise ValueError("request_format must not be empty or whitespace") + canonical = _WS_REQUEST_FORMAT_ALIASES.get(s, s) + if not canonical.startswith("/"): + raise ValueError( + "request_format must be a path starting with '/' (for example " + f"{_DEFAULT_WS_REQUEST_FORMAT!r}) or alias " + f"{', '.join(repr(k) for k in _WS_REQUEST_FORMAT_ALIASES)}" + ) + return canonical + + +# Guard against a misbehaving server that only emits ignored event types. +_MAX_IGNORED_WS_EVENT_TYPES = 50_000 + +# Per-message WebSocket recv timeout default so benchmark workers do not hang forever +# on a silent peer. Pass ``timeout=None`` to wait indefinitely. +_DEFAULT_WS_RECV_TIMEOUT = 120.0 + +_AUDIO_EXTRA_HINT = ( + "Install optional audio extras: pip install 'guidellm[audio]' " + "(includes websockets and torchcodec for realtime transcription)." +) + + +def _require_ws_connect() -> Any: + try: + from websockets.asyncio.client import connect as ws_connect + except ImportError as exc: + raise ImportError( + "The openai_websocket backend requires the 'websockets' package. " + + _AUDIO_EXTRA_HINT + ) from exc + return ws_connect + + +def _ws_error_message(err: Any) -> str: + """Format WebSocket ``error`` for exceptions (supports dict payloads).""" + if isinstance(err, dict): + msg = err.get("message") or err.get("msg") + code = err.get("code") + parts = [str(p) for p in (code, msg) if p] + if parts: + return ": ".join(parts) + try: + return json.dumps(err)[:500] + except (TypeError, ValueError): + return repr(err) + if err is None or err == "": + return "WebSocket error" + return str(err) + + +def _model_ids_from_openai_models_payload(payload: Any) -> list[str]: + """Parse ``GET /v1/models`` JSON body; raise RuntimeError if shape is unexpected.""" + if not isinstance(payload, dict): + raise RuntimeError( + "Unexpected /v1/models response: top-level JSON must be an object, " + f"got {type(payload).__name__}" + ) + data = payload.get("data") + if not isinstance(data, list): + raise RuntimeError( + "Unexpected /v1/models response: 'data' must be a list, " + f"got {type(data).__name__}" + ) + ids: list[str] = [] + for i, item in enumerate(data): + if not isinstance(item, dict) or "id" not in item: + raise RuntimeError( + "Unexpected /v1/models response: each entry must be an object with " + f"'id' (index {i})" + ) + ids.append(str(item["id"])) + return ids + + +def _load_ws_event(raw: str) -> dict[str, Any]: + """Parse a JSON WebSocket text frame; raise RuntimeError on invalid JSON.""" + try: + parsed: Any = json.loads(raw) + except json.JSONDecodeError as exc: + raise RuntimeError( + f"Invalid JSON from realtime WebSocket: {exc.msg} at position {exc.pos}" + ) from exc + if not isinstance(parsed, dict): + raise RuntimeError( + f"Expected JSON object from realtime WebSocket, got {type(parsed).__name__}" + ) + return parsed + + +# Module-level hook for ``guidellm.extras.audio.pcm16_append_b64_chunks``: on first +# realtime encode we assign the imported callable here (see ``_ensure_*``). Unit tests +# patch this attribute so WS logic can be exercised without ``guidellm[audio]``. +pcm16_append_b64_chunks: Any = None +_pcm_imported_fn: dict[str, Any] = {"fn": None} + + +def _ensure_pcm16_append_b64_chunks() -> Any: + if pcm16_append_b64_chunks is not None: + return pcm16_append_b64_chunks + if _pcm_imported_fn["fn"] is not None: + return _pcm_imported_fn["fn"] + try: + from guidellm.extras.audio import pcm16_append_b64_chunks as fn + except ImportError as exc: + raise ImportError( + "The openai_websocket backend requires the audio extras for PCM " + "handling used in realtime transcription. " + _AUDIO_EXTRA_HINT + ) from exc + _pcm_imported_fn["fn"] = fn + return fn + + +def _coerce_usage_int(value: Any) -> int | None: + if isinstance(value, bool): + return None + if isinstance(value, int | float): + return int(value) + if isinstance(value, str): + stripped = value.strip() + if not stripped: + return None + try: + return int(stripped) + except ValueError: + return None + return None + + +def _normalize_transcription_usage( + raw_usage: Any, +) -> dict[str, int | dict[str, int]] | None: + """Coerce OpenAI-style usage dict values to ints (including numeric strings).""" + if not isinstance(raw_usage, dict): + return None + result: dict[str, int | dict[str, int]] = {} + for key, val in raw_usage.items(): + if isinstance(val, dict): + inner: dict[str, int] = {} + for ik, iv in val.items(): + num = _coerce_usage_int(iv) + if num is not None: + inner[ik] = num + if inner: + result[key] = inner + else: + num = _coerce_usage_int(val) + if num is not None: + result[key] = num + return result if result else None + + +class OpenAIWebSocketBackendArgs(BackendArgs): + """Arguments for creating the realtime WebSocket backend.""" + + target: str = Field( + description=( + "HTTP(S) base URL of the server (WebSocket URL is derived from it)." + ), + json_schema_extra={ + "error_message": ( + "Backend '{backend_type}' requires --target with a valid URL." + ) + }, + ) + model: str | None = Field( + default=None, + description="Model identifier (required unless discoverable from /v1/models).", + ) + request_format: str | None = Field( + default=None, + description=( + "WebSocket path on the HTTP host (default /v1/realtime). " + "Use the same top-level CLI flags as ``openai_http``: " + "--request-format / --request-type." + ), + json_schema_extra={ + "error_message": ( + "Backend '{backend_type}' received an invalid --request-format / " + f"request_format. Use {_DEFAULT_WS_REQUEST_FORMAT!r} or another " + "path starting with '/'." + ) + }, + ) + + @field_validator("request_format") + @classmethod + def validate_request_format(cls, v: str | None) -> str | None: + if v is None: + return None + return _effective_websocket_http_path(v) + + chunk_samples: int = Field( + default=3200, + ge=1, + description="PCM16 frames per input_audio_buffer.append chunk (16 kHz).", + ) + api_key: str | None = Field(default=None, description="Bearer token if required.") + verify: bool = Field(default=False, description="Verify TLS certificates.") + timeout: float | None = Field( + default=_DEFAULT_WS_RECV_TIMEOUT, + description=( + "Per-message read timeout for WebSocket receives (seconds). " + f"Defaults to {_DEFAULT_WS_RECV_TIMEOUT}s so hung servers do not block " + "workers; use ``None`` for no limit." + ), + ) + timeout_connect: float = Field( + default=FALLBACK_TIMEOUT, + description="Timeout for establishing the WebSocket connection.", + ) + validate_backend: bool | str | dict[str, Any] = Field( + default=True, + description=( + "HTTP health check before benchmarks (same semantics as openai_http)." + ), + ) + extras: dict[str, Any] | None = Field( + default=None, + description="Extra fields merged into session.update (backend model wins).", + ) + + +@Backend.register("openai_websocket") +class OpenAIWebSocketBackend(Backend): + """WebSocket client for realtime (streaming) audio transcription.""" + + @classmethod + def backend_args(cls) -> type[BackendArgs]: + return OpenAIWebSocketBackendArgs + + def __init__( + self, + target: str, + model: str = "", + request_format: str | None = None, + chunk_samples: int = 3200, + api_key: str | None = None, + verify: bool = False, + timeout: float | None = _DEFAULT_WS_RECV_TIMEOUT, + timeout_connect: float = FALLBACK_TIMEOUT, + validate_backend: bool | str | dict[str, Any] = True, + extras: dict[str, Any] | None = None, + ): + super().__init__(type_="openai_websocket") + self.target = target.rstrip("/").removesuffix("/v1") + self.model = model or "" + self.websocket_path = _effective_websocket_http_path(request_format) + self.chunk_samples = chunk_samples + self.api_key = api_key + self.verify = verify + self.timeout = timeout + self.timeout_connect = timeout_connect + self.api_routes = _WS_API_ROUTES + self.validate_backend: dict[str, Any] | None = resolve_validate_kwargs( + validate_backend, + self.target, + self.api_routes, + ) + self.extras = extras or {} + self._in_process = False + self._async_client: httpx.AsyncClient | None = None + + @property + def info(self) -> dict[str, Any]: + return { + "target": self.target, + "model": self.model, + "websocket_path": self.websocket_path, + "chunk_samples": self.chunk_samples, + "timeout": self.timeout, + "timeout_connect": self.timeout_connect, + "verify": self.verify, + "validate_backend": self.validate_backend, + } + + def _parsed_target(self) -> ParseResult: + raw = self.target if "://" in self.target else f"http://{self.target}" + return urlparse(raw) + + def _ws_url(self) -> str: + parsed = self._parsed_target() + if not parsed.netloc: + raise ValueError(f"Invalid target URL for WebSocket: {self.target!r}") + ws_scheme = "wss" if parsed.scheme in ("https", "wss") else "ws" + path = self.websocket_path + if not path.startswith("/"): + path = f"/{path}" + return f"{ws_scheme}://{parsed.netloc}{path}" + + def _ssl_context(self) -> ssl.SSLContext | None: + if self._parsed_target().scheme in ("http", "ws"): + return None + ctx = ssl.create_default_context() + if not self.verify: + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + return ctx + + def _build_headers( + self, existing_headers: dict[str, str] | None = None + ) -> dict[str, str] | None: + return build_headers(self.api_key, existing_headers) + + async def process_startup(self) -> None: + if self._in_process: + raise RuntimeError("Backend already started up for process.") + self._async_client = httpx.AsyncClient( + timeout=httpx.Timeout( + FALLBACK_TIMEOUT, + read=self.timeout, + connect=self.timeout_connect, + ), + verify=self.verify, + limits=httpx.Limits( + max_connections=None, + max_keepalive_connections=None, + keepalive_expiry=5.0, + ), + ) + self._in_process = True + + async def process_shutdown(self) -> None: + if not self._in_process: + raise RuntimeError("Backend not started up for process.") + client = self._async_client + if client is None: + raise RuntimeError("Backend not started up for process.") + await client.aclose() + self._async_client = None + self._in_process = False + + async def validate(self) -> None: + if self._async_client is None: + raise RuntimeError("Backend not started up for process.") + if not self.validate_backend: + return + validate_kwargs = {**self.validate_backend} + existing_headers = validate_kwargs.get("headers") + validate_kwargs["headers"] = build_headers(existing_headers) + try: + response = await self._async_client.request(**validate_kwargs) + response.raise_for_status() + except Exception as exc: + raise RuntimeError( + "Backend validation request failed. Could not connect to the server " + "or validate the backend configuration." + ) from exc + + async def available_models(self) -> list[str]: + if self._async_client is None: + raise RuntimeError("Backend not started up for process.") + target = f"{self.target}/v1/models" + response = await self._async_client.get( + target, headers=build_headers(self.api_key) + ) + response.raise_for_status() + try: + payload: Any = response.json() + except json.JSONDecodeError as exc: + raise RuntimeError( + "Unexpected /v1/models response: body is not valid JSON" + ) from exc + return _model_ids_from_openai_models_payload(payload) + + async def default_model(self) -> str: + if self.model: + return self.model + if not self._in_process: + return "" + models = await self.available_models() + self.model = models[0] if models else "" + return self.model + + async def resolve( # type: ignore[override, misc] # noqa: C901, PLR0912, PLR0915 + self, + request: GenerationRequest, + request_info: RequestInfo, + history: list[tuple[GenerationRequest, GenerationResponse | None]] + | None = None, + ) -> AsyncIterator[tuple[GenerationResponse | None, RequestInfo]]: + if self._async_client is None: + raise RuntimeError("Backend not started up for process.") + if history: + raise NotImplementedError( + "openai_websocket does not support multiturn/history yet." + ) + + audio_columns = request.columns.get("audio_column", []) + if len(audio_columns) != 1: + raise ValueError( + "Realtime transcription expects exactly one audio_column entry; " + f"got {len(audio_columns)}." + ) + + model_name = await self.default_model() + if not str(model_name).strip(): + raise RuntimeError( + "No model configured for openai_websocket and /v1/models returned " + "none. Pass --model or ensure the server lists at least one model." + ) + + arguments = GenerationRequestArguments( + body={ + "model": model_name, + "websocket_path": self.websocket_path, + "chunk_samples": self.chunk_samples, + } + ) + + pcm_fn = _ensure_pcm16_append_b64_chunks() + chunks = pcm_fn( + audio_columns[0], + chunk_samples=self.chunk_samples, + ) + + session_update: dict[str, Any] = {"type": "session.update"} + if self.extras: + for key, val in self.extras.items(): + if key not in ("type", "model"): + session_update[key] = val + session_update["model"] = model_name + + ssl_ctx = self._ssl_context() + ws_headers = build_headers(self.api_key) + audio_handler = AudioRequestHandler() + full_text_parts: list[str] = [] + + try: + request_info.timings.request_start = time.time() + connect_kw: dict[str, Any] = { + "ssl": ssl_ctx, + "open_timeout": self.timeout_connect, + } + if ws_headers: + connect_kw["additional_headers"] = ws_headers + ws_connect = _require_ws_connect() + async with ws_connect(self._ws_url(), **connect_kw) as ws: + raw_first = await self._recv_ws(ws) + first_event = _load_ws_event(raw_first) + if first_event.get("type") == "error": + raise RuntimeError(_ws_error_message(first_event.get("error"))) + if first_event.get("type") != "session.created": + raise RuntimeError( + f"Expected session.created, got {first_event.get('type')!r}" + ) + await ws.send(json.dumps(session_update)) + for b64_chunk in chunks: + await ws.send( + json.dumps( + {"type": "input_audio_buffer.append", "audio": b64_chunk} + ) + ) + await ws.send( + json.dumps({"type": "input_audio_buffer.commit", "final": False}) + ) + # Sentinel end-of-stream for vLLM's audio queue + # (see RealtimeConnection). + await ws.send( + json.dumps({"type": "input_audio_buffer.commit", "final": True}) + ) + + ignored_events = 0 + while True: + raw = await self._recv_ws(ws) + event = _load_ws_event(raw) + et = event.get("type") + if et == "transcription.delta": + iter_time = time.time() + if request_info.timings.first_request_iteration is None: + request_info.timings.first_request_iteration = iter_time + request_info.timings.last_request_iteration = iter_time + request_info.timings.request_iterations += 1 + delta = event.get("delta") or "" + full_text_parts.append(delta) + if request_info.timings.first_token_iteration is None: + request_info.timings.first_token_iteration = iter_time + request_info.timings.token_iterations = 0 + yield None, request_info + request_info.timings.last_token_iteration = iter_time + request_info.timings.token_iterations += 1 if delta else 0 + + elif et == "transcription.done": + iter_time = time.time() + request_info.timings.request_end = iter_time + full_text = event.get("text") or "".join(full_text_parts) + if request_info.timings.first_token_iteration is None: + if request_info.timings.first_request_iteration is None: + request_info.timings.first_request_iteration = iter_time + request_info.timings.last_request_iteration = iter_time + request_info.timings.request_iterations += 1 + request_info.timings.first_token_iteration = iter_time + request_info.timings.token_iterations = 0 + yield None, request_info + request_info.timings.last_token_iteration = iter_time + request_info.timings.token_iterations += ( + 1 if full_text else 0 + ) + usage_dict = _normalize_transcription_usage(event.get("usage")) + inp, outp = audio_handler.extract_metrics(usage_dict, full_text) + yield ( + GenerationResponse( + request_id=request.request_id, + request_args=arguments.model_dump_json(), + text=full_text, + input_metrics=inp, + output_metrics=outp, + ), + request_info, + ) + break + elif et == "error": + raise RuntimeError(_ws_error_message(event.get("error"))) + else: + ignored_events += 1 + if ignored_events > _MAX_IGNORED_WS_EVENT_TYPES: + raise RuntimeError( + "Exceeded maximum ignored realtime WebSocket events " + f"without transcription.done (last type={et!r})." + ) + continue + + except asyncio.CancelledError as err: + text_so_far = "".join(full_text_parts) + inp, outp = audio_handler.extract_metrics(None, text_so_far or "") + yield ( + GenerationResponse( + request_id=request.request_id, + request_args=arguments.model_dump_json(), + text=text_so_far, + input_metrics=inp, + output_metrics=outp, + ), + request_info, + ) + raise err + finally: + if ( + request_info.timings.request_start is not None + and request_info.timings.request_end is None + ): + request_info.timings.request_end = time.time() + + async def _recv_ws(self, ws: ClientConnection) -> str: + if self.timeout is None: + msg = await ws.recv() + else: + msg = await asyncio.wait_for(ws.recv(), timeout=self.timeout) + if isinstance(msg, bytes): + return msg.decode() + return str(msg) diff --git a/src/guidellm/extras/audio.py b/src/guidellm/extras/audio.py index fe05f2275..acdfb3b0f 100644 --- a/src/guidellm/extras/audio.py +++ b/src/guidellm/extras/audio.py @@ -1,5 +1,6 @@ from __future__ import annotations +import base64 from pathlib import Path from typing import Any, Literal @@ -17,6 +18,7 @@ __all__ = [ "encode_audio", "is_url", + "pcm16_append_b64_chunks", ] @@ -212,3 +214,109 @@ def _encode_audio( def get_file_name(path: Path | str) -> str: """Get file name from path.""" return Path(path).name + + +# Decoded float waveforms are nominally in [-1.0, 1.0]; clip before scaling to int16. +_PCM16_WAVE_CLIP_MIN = -1.0 +_PCM16_WAVE_CLIP_MAX = 1.0 +# Symmetric int16 positive peak (2**15 - 1); standard float[-1, 1] -> PCM16 mapping. +_PCM16_FLOAT_TO_INT16_SCALE = 32767.0 +_BYTES_PER_PCM16_SAMPLE = 2 + + +def _sample_rate_hint_from_audio_column_dict(d: dict[str, Any]) -> int | None: + """Return ``sample_rate`` / ``sampling_rate`` from an audio column dict.""" + hint = d.get("sample_rate", d.get("sampling_rate")) + if ( + hint is not None + and not isinstance(hint, bool) + and isinstance(hint, int | float) + and hint > 0 + ): + return int(round(float(hint))) + return None + + +def _require_positive_sample_rate(sr_raw: Any) -> float: + if isinstance(sr_raw, bool) or not isinstance(sr_raw, int | float) or sr_raw <= 0: + raise ValueError( + "Decoded audio has invalid sample_rate " + f"{sr_raw!r}; expected a positive number" + ) + return float(sr_raw) + + +def pcm16_append_b64_chunks( + audio_item: dict[str, Any] | bytes, + *, + target_sample_rate: int = 16000, + chunk_samples: int = 3200, +) -> list[str]: + """ + Decode audio to base64-encoded PCM16 mono chunks for realtime ``append`` events. + + Matches vLLM ``input_audio_buffer.append`` (PCM16 mono at ``target_sample_rate`` + Hz), split into ``chunk_samples``-frame segments. + Equivalent conversion flow to vLLM's realtime microphone client example, but + generalized for dataset/file inputs used by GuideLLM benchmarks. + """ + # Accept common audio column shapes used in GuideLLM datasets. + if isinstance(audio_item, dict): + if "audio" in audio_item: + decode_sr = _sample_rate_hint_from_audio_column_dict(audio_item) + samples = _decode_audio( + audio_item["audio"], + sample_rate=decode_sr, + ) + elif "data" in audio_item or "url" in audio_item: + samples = _decode_audio(audio_item) + else: + raise ValueError( + "audio_column dict must include 'audio', 'data', or 'url' " + "(same shapes as encode_audio / _decode_audio); " + f"got keys {list(audio_item)!r}" + ) + else: + samples = _decode_audio(audio_item) + + # Ensure channel-first shape, then downmix to mono for realtime PCM input. + data = samples.data + if data.dim() == 1: + data = data.unsqueeze(0) + if data.shape[0] > 1: + data = data.mean(dim=0, keepdim=True) + + # Realtime endpoint expects 16 kHz PCM16 mono. + sr = _require_positive_sample_rate(samples.sample_rate) + if sr != target_sample_rate: + t_in = data.shape[1] + t_out = max(1, int(round(t_in * target_sample_rate / sr))) + data = torch.nn.functional.interpolate( + data.unsqueeze(0), + size=t_out, + mode="linear", + align_corners=False, + ).squeeze(0) + + # Convert float waveform to signed little-endian PCM16 bytes. + wave = data.squeeze(0) + pcm_i16 = ( + ( + wave.clamp(_PCM16_WAVE_CLIP_MIN, _PCM16_WAVE_CLIP_MAX) + * _PCM16_FLOAT_TO_INT16_SCALE + ) + .round() + .to(torch.int16) + ) + buf = pcm_i16.cpu().numpy().tobytes() + + # Split PCM bytes into chunk-sized base64 payloads for append events. + chunk_bytes = max(1, chunk_samples) * _BYTES_PER_PCM16_SAMPLE + out: list[str] = [] + for i in range(0, len(buf), chunk_bytes): + pcm_chunk = buf[i : i + chunk_bytes] + if pcm_chunk: + out.append(base64.b64encode(pcm_chunk).decode("ascii")) + if not out: + raise ValueError("Decoded audio produced no PCM data") + return out diff --git a/tests/e2e/test_realtime_ws_e2e.py b/tests/e2e/test_realtime_ws_e2e.py new file mode 100644 index 000000000..db8cfa125 --- /dev/null +++ b/tests/e2e/test_realtime_ws_e2e.py @@ -0,0 +1,142 @@ +"""End-to-end integration: realtime backend + PCM encoding + WebSocket (same loop). + +## WRITTEN BY AI ## +""" + +from __future__ import annotations + +import json +import socket +import struct +import wave +from collections.abc import Awaitable, Callable +from pathlib import Path +from typing import Any + +import pytest + +try: + from websockets.asyncio.server import serve +except ImportError: + pytest.skip( + "websockets not installed; install guidellm[audio] for realtime e2e", + allow_module_level=True, + ) + +from guidellm.backends.openai.websocket import OpenAIWebSocketBackend +from guidellm.schemas import GenerationRequest, RequestInfo, RequestTimings + + +def make_realtime_transcription_stub_handler( + *, + delta_text: str = "hello", + done_text: str | None = None, + usage: dict[str, Any] | None = None, + session_id: str = "stub-sess", +) -> Callable[[Any], Awaitable[None]]: + """Build an async handler that completes one transcription after two commits.""" + + resolved_done = done_text if done_text is not None else delta_text + resolved_usage = usage or { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15, + } + + async def handler(ws: Any) -> None: + await ws.send( + json.dumps({"type": "session.created", "id": session_id, "created": 0}) + ) + commits: list[bool | None] = [] + while True: + msg = await ws.recv() + payload = json.loads(msg if isinstance(msg, str) else msg.decode()) + if payload.get("type") == "input_audio_buffer.commit": + commits.append(payload.get("final")) + if payload.get("final"): + break + assert commits == [False, True] + await ws.send(json.dumps({"type": "transcription.delta", "delta": delta_text})) + await ws.send( + json.dumps( + { + "type": "transcription.done", + "text": resolved_done, + "usage": resolved_usage, + } + ) + ) + + return handler + + +def _free_port() -> int: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("127.0.0.1", 0)) + return int(s.getsockname()[1]) + + +def _write_minimal_wav_16k_mono(path: Path) -> None: + n_samples = 4000 + with wave.open(str(path), "w") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(16000) + frames = b"".join(struct.pack(" None: + """ + In-process: WebSocket server, OpenAI realtime backend, and torchcodec PCM path. + + No ``guidellm benchmark`` subprocess (avoids worker/hang issues in test envs). + For a live vLLM run, use ``scripts/e2e_realtime_external.sh``. + """ + port = _free_port() + wav_path = tmp_path / "clip.wav" + _write_minimal_wav_16k_mono(wav_path) + audio_item = { + "audio": wav_path.read_bytes(), + "file_name": "clip.wav", + "format": "wav", + } + request = GenerationRequest( + request_id="e2e-1", + columns={"audio_column": [audio_item]}, + ) + info = RequestInfo(timings=RequestTimings()) + + stub = make_realtime_transcription_stub_handler(session_id="e2e-stub-sess") + async with serve(stub, "127.0.0.1", port): + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="stub-model", + validate_backend=False, + ) + await be.process_startup() + try: + out: list = [] + async for item in be.resolve(request, info): + out.append(item) + finally: + await be.process_shutdown() + + assert len(out) == 2 + assert out[0][0] is None + final = out[1][0] + assert final is not None + assert final.text == "hello" + assert final.input_metrics.audio_tokens == 10 + assert final.output_metrics.text_tokens == 5 diff --git a/tests/unit/backends/openai/test_realtime_ws.py b/tests/unit/backends/openai/test_realtime_ws.py new file mode 100644 index 000000000..7f0fd497b --- /dev/null +++ b/tests/unit/backends/openai/test_realtime_ws.py @@ -0,0 +1,738 @@ +"""Tests for OpenAIWebSocketBackend. + +## WRITTEN BY AI ## +""" + +from __future__ import annotations + +import asyncio +import contextlib +import json + +import pytest +from pydantic import ValidationError + +try: + from websockets.asyncio.server import serve + from websockets.exceptions import ConnectionClosed +except ImportError: + pytest.skip( + "websockets not installed; install guidellm[audio] for realtime tests", + allow_module_level=True, + ) + +from guidellm.backends.backend import Backend +from guidellm.backends.openai.websocket import ( + _DEFAULT_WS_RECV_TIMEOUT, + OpenAIWebSocketBackend, + OpenAIWebSocketBackendArgs, +) +from guidellm.schemas import GenerationRequest, RequestInfo, RequestTimings + + +async def _bounded_ws_recv(ws: object, *, timeout: float = 5.0) -> None: + """Recv once with a cap so stub handlers never block ``serve()`` teardown.""" + with contextlib.suppress(asyncio.TimeoutError, ConnectionClosed): + await asyncio.wait_for(ws.recv(), timeout=timeout) + + +@pytest.mark.asyncio +async def test_resolve_streams_deltas_and_done(monkeypatch: pytest.MonkeyPatch) -> None: + """Fake server speaks vLLM-style realtime events; PCM path is patched.""" + + async def handler(ws: object) -> None: + await ws.send( + json.dumps({"type": "session.created", "id": "sess-x", "created": 0}) + ) + commits: list[bool | None] = [] + while True: + msg = await ws.recv() + data = json.loads(msg if isinstance(msg, str) else msg.decode()) + if data.get("type") == "input_audio_buffer.commit": + commits.append(data.get("final")) + if data.get("final"): + break + assert commits == [False, True] + await ws.send(json.dumps({"type": "transcription.delta", "delta": "hi"})) + await ws.send( + json.dumps( + { + "type": "transcription.done", + "text": "hi", + "usage": { + "prompt_tokens": 5, + "completion_tokens": 1, + "total_tokens": 6, + }, + } + ) + ) + + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YWFhYQ=="], + ) + + async with serve(handler, "127.0.0.1", 0) as server: + port = server.sockets[0].getsockname()[1] + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="test-model", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={ + "audio_column": [ + {"audio": b"fake", "format": "mp3", "file_name": "f.mp3"} + ] + }, + ) + info = RequestInfo(timings=RequestTimings()) + out: list = [] + async for item in be.resolve(req, info): + out.append(item) + await be.process_shutdown() + + assert len(out) == 2 + assert out[0][0] is None + final_resp, _ = out[1] + assert final_resp.text == "hi" + assert final_resp.input_metrics.audio_tokens == 5 + assert final_resp.output_metrics.text_tokens == 1 + + +@pytest.mark.asyncio +async def test_transcription_done_without_deltas_sets_first_token_and_prefetch_yield( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Only ``transcription.done`` (no deltas): TTFT and two yields match delta path.""" + + async def handler(ws: object) -> None: + await ws.send( + json.dumps({"type": "session.created", "id": "sess-x", "created": 0}) + ) + commits: list[bool | None] = [] + while True: + msg = await ws.recv() + data = json.loads(msg if isinstance(msg, str) else msg.decode()) + if data.get("type") == "input_audio_buffer.commit": + commits.append(data.get("final")) + if data.get("final"): + break + assert commits == [False, True] + await ws.send( + json.dumps( + { + "type": "transcription.done", + "text": "only-done", + "usage": { + "prompt_tokens": 2, + "completion_tokens": 7, + "total_tokens": 9, + }, + } + ) + ) + + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YWFhYQ=="], + ) + + async with serve(handler, "127.0.0.1", 0) as server: + port = server.sockets[0].getsockname()[1] + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="test-model", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={ + "audio_column": [ + {"audio": b"fake", "format": "mp3", "file_name": "f.mp3"} + ] + }, + ) + info = RequestInfo(timings=RequestTimings()) + out: list = [] + async for item in be.resolve(req, info): + out.append(item) + await be.process_shutdown() + + assert len(out) == 2 + prefetch, prefetch_info = out[0] + assert prefetch is None + assert prefetch_info.timings.first_token_iteration is not None + assert prefetch_info.timings.last_token_iteration is not None + assert prefetch_info.timings.token_iterations == 1 + final_resp, final_info = out[1] + assert final_resp.text == "only-done" + assert final_resp.input_metrics.audio_tokens == 2 + assert final_resp.output_metrics.text_tokens == 7 + assert final_info.timings.request_end is not None + + +@pytest.mark.asyncio +async def test_transcription_done_usage_string_counts( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """String token counts in usage should still feed AudioRequestHandler metrics.""" + + async def handler(ws: object) -> None: + await ws.send( + json.dumps({"type": "session.created", "id": "sess-x", "created": 0}) + ) + while True: + msg = await ws.recv() + data = json.loads(msg if isinstance(msg, str) else msg.decode()) + if data.get("type") == "input_audio_buffer.commit" and data.get("final"): + break + await ws.send(json.dumps({"type": "transcription.delta", "delta": "x"})) + await ws.send( + json.dumps( + { + "type": "transcription.done", + "text": "x", + "usage": { + "prompt_tokens": "12", + "completion_tokens": "3", + "total_tokens": "15", + }, + } + ) + ) + + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YWFhYQ=="], + ) + + async with serve(handler, "127.0.0.1", 0) as server: + port = server.sockets[0].getsockname()[1] + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="test-model", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={ + "audio_column": [ + {"audio": b"fake", "format": "mp3", "file_name": "f.mp3"} + ] + }, + ) + info = RequestInfo(timings=RequestTimings()) + out: list = [] + async for item in be.resolve(req, info): + out.append(item) + await be.process_shutdown() + + final_resp, _ = out[1] + assert final_resp.input_metrics.audio_tokens == 12 + assert final_resp.output_metrics.text_tokens == 3 + + +@pytest.mark.asyncio +async def test_server_error_event_raises( + monkeypatch: pytest.MonkeyPatch, +) -> None: + async def handler(ws: object) -> None: + await ws.send( + json.dumps({"type": "session.created", "id": "sess-x", "created": 0}) + ) + while True: + msg = await ws.recv() + data = json.loads(msg if isinstance(msg, str) else msg.decode()) + if data.get("type") == "input_audio_buffer.commit" and not data.get( + "final" + ): + await ws.send( + json.dumps({"type": "error", "error": "bad", "code": "e1"}) + ) + await _bounded_ws_recv(ws) + return + + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YQ=="], + ) + + async with serve(handler, "127.0.0.1", 0) as server: + port = server.sockets[0].getsockname()[1] + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="m", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + with pytest.raises(RuntimeError, match="bad"): + async for _ in be.resolve(req, info): + pass + await be.process_shutdown() + + +@pytest.mark.asyncio +async def test_first_message_error_event_raises( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YQ=="], + ) + + async def handler(ws: object) -> None: + await ws.send(json.dumps({"type": "error", "error": "auth failed"})) + + async with serve(handler, "127.0.0.1", 0) as server: + port = server.sockets[0].getsockname()[1] + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="m", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + with pytest.raises(RuntimeError, match="auth failed"): + async for _ in be.resolve(req, info): + pass + await be.process_shutdown() + + +@pytest.mark.asyncio +async def test_first_message_not_session_created_raises( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YQ=="], + ) + + async def handler(ws: object) -> None: + await ws.send(json.dumps({"type": "unexpected.ping"})) + + async with serve(handler, "127.0.0.1", 0) as server: + port = server.sockets[0].getsockname()[1] + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="m", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + with pytest.raises(RuntimeError, match="session.created"): + async for _ in be.resolve(req, info): + pass + await be.process_shutdown() + + +@pytest.mark.asyncio +async def test_invalid_json_from_server_raises( + monkeypatch: pytest.MonkeyPatch, +) -> None: + async def handler(ws: object) -> None: + await ws.send( + json.dumps({"type": "session.created", "id": "sess-x", "created": 0}) + ) + while True: + msg = await ws.recv() + data = json.loads(msg if isinstance(msg, str) else msg.decode()) + if data.get("type") == "input_audio_buffer.commit" and data.get("final"): + break + await ws.send("{not-json") + + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YQ=="], + ) + + async with serve(handler, "127.0.0.1", 0) as server: + port = server.sockets[0].getsockname()[1] + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="m", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + with pytest.raises(RuntimeError, match="Invalid JSON"): + async for _ in be.resolve(req, info): + pass + await be.process_shutdown() + + +@pytest.mark.asyncio +async def test_resolve_requires_process_startup() -> None: + be = OpenAIWebSocketBackend( + target="http://127.0.0.1:9", + model="m", + validate_backend=False, + ) + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + with pytest.raises(RuntimeError, match="started"): + async for _ in be.resolve(req, info): + pass + + +@pytest.mark.asyncio +async def test_resolve_rejects_history() -> None: + be = OpenAIWebSocketBackend( + target="http://127.0.0.1:9", + model="m", + validate_backend=False, + ) + await be.process_startup() + prev = GenerationRequest(request_id="prev", columns={}) + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + with pytest.raises(NotImplementedError, match="history"): + async for _ in be.resolve(req, info, history=[(prev, None)]): + pass + await be.process_shutdown() + + +@pytest.mark.asyncio +async def test_resolve_rejects_wrong_audio_column_count() -> None: + be = OpenAIWebSocketBackend( + target="http://127.0.0.1:9", + model="m", + validate_backend=False, + ) + await be.process_startup() + info = RequestInfo(timings=RequestTimings()) + + async def drain(req: GenerationRequest) -> None: + async for _ in be.resolve(req, info): + pass + + req_empty = GenerationRequest(request_id="r1", columns={"audio_column": []}) + with pytest.raises(ValueError, match="exactly one"): + await drain(req_empty) + req_two = GenerationRequest( + request_id="r2", + columns={"audio_column": [{"audio": b"a"}, {"audio": b"b"}]}, + ) + with pytest.raises(ValueError, match="exactly one"): + await drain(req_two) + await be.process_shutdown() + + +@pytest.mark.asyncio +@pytest.mark.timeout(45) +async def test_resolve_cancelled_after_delta_yields_partial_then_reraises( + monkeypatch: pytest.MonkeyPatch, +) -> None: + delta_seen = asyncio.Event() + + async def handler(ws: object) -> None: + await ws.send(json.dumps({"type": "session.created", "id": "s", "created": 0})) + while True: + msg = await ws.recv() + data = json.loads(msg if isinstance(msg, str) else msg.decode()) + if data.get("type") == "input_audio_buffer.commit" and data.get("final"): + break + await ws.send(json.dumps({"type": "transcription.delta", "delta": "partial"})) + delta_seen.set() + await _bounded_ws_recv(ws) + + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YQ=="], + ) + + results: list = [] + async with serve(handler, "127.0.0.1", 0) as server: + port = server.sockets[0].getsockname()[1] + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="m", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + + async def collect() -> None: + async for item in be.resolve(req, info): + results.append(item) + + task = asyncio.create_task(collect()) + await asyncio.wait_for(delta_seen.wait(), timeout=5.0) + await asyncio.sleep(0.05) + task.cancel() + with pytest.raises(asyncio.CancelledError): + await task + await be.process_shutdown() + + assert len(results) == 2 + assert results[0][0] is None + assert results[1][0] is not None + assert results[1][0].text == "partial" + + +@pytest.mark.asyncio +async def test_non_object_json_after_handshake_raises( + monkeypatch: pytest.MonkeyPatch, +) -> None: + async def handler(ws: object) -> None: + await ws.send(json.dumps({"type": "session.created", "id": "s", "created": 0})) + while True: + msg = await ws.recv() + data = json.loads(msg if isinstance(msg, str) else msg.decode()) + if data.get("type") == "input_audio_buffer.commit" and data.get("final"): + break + await ws.send("[]") + + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YQ=="], + ) + + async with serve(handler, "127.0.0.1", 0) as server: + port = server.sockets[0].getsockname()[1] + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="m", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + with pytest.raises(RuntimeError, match="JSON object"): + async for _ in be.resolve(req, info): + pass + await be.process_shutdown() + + +@pytest.mark.asyncio +async def test_excessive_ignored_events_raises( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "guidellm.backends.openai.websocket._MAX_IGNORED_WS_EVENT_TYPES", + 2, + ) + + async def handler(ws: object) -> None: + await ws.send(json.dumps({"type": "session.created", "id": "s", "created": 0})) + while True: + msg = await ws.recv() + data = json.loads(msg if isinstance(msg, str) else msg.decode()) + if data.get("type") == "input_audio_buffer.commit" and data.get("final"): + break + for _ in range(10): + await ws.send(json.dumps({"type": "noise.event"})) + + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YQ=="], + ) + + async with serve(handler, "127.0.0.1", 0) as server: + port = server.sockets[0].getsockname()[1] + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="m", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + with pytest.raises(RuntimeError, match="Exceeded maximum"): + async for _ in be.resolve(req, info): + pass + await be.process_shutdown() + + +@pytest.mark.asyncio +async def test_available_models_parses_response(httpx_mock: object) -> None: + httpx_mock.add_response( + url="http://127.0.0.1:9/v1/models", + json={"data": [{"id": "a"}, {"id": "b"}]}, + ) + be = OpenAIWebSocketBackend( + target="http://127.0.0.1:9", + validate_backend=False, + ) + await be.process_startup() + assert await be.available_models() == ["a", "b"] + await be.process_shutdown() + + +@pytest.mark.asyncio +async def test_available_models_bad_data_shape_raises(httpx_mock: object) -> None: + httpx_mock.add_response( + url="http://127.0.0.1:9/v1/models", + json={"data": "not-a-list"}, + ) + be = OpenAIWebSocketBackend( + target="http://127.0.0.1:9", + validate_backend=False, + ) + await be.process_startup() + with pytest.raises(RuntimeError, match="list"): + await be.available_models() + await be.process_shutdown() + + +@pytest.mark.asyncio +async def test_resolve_raises_when_no_model_and_empty_catalog( + httpx_mock: object, + monkeypatch: pytest.MonkeyPatch, +) -> None: + httpx_mock.add_response( + url="http://127.0.0.1:9/v1/models", + json={"data": []}, + ) + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YQ=="], + ) + be = OpenAIWebSocketBackend( + target="http://127.0.0.1:9", + model="", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + with pytest.raises(RuntimeError, match="No model configured"): + async for _ in be.resolve(req, info): + pass + await be.process_shutdown() + + +@pytest.mark.asyncio +async def test_resolve_invalid_ws_target_url_raises( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YQ=="], + ) + be = OpenAIWebSocketBackend( + target="", + model="m", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + with pytest.raises(ValueError, match="Invalid target"): + async for _ in be.resolve(req, info): + pass + await be.process_shutdown() + + +@pytest.mark.asyncio +async def test_error_event_dict_formatted_message( + monkeypatch: pytest.MonkeyPatch, +) -> None: + async def handler(ws: object) -> None: + await ws.send( + json.dumps( + { + "type": "error", + "error": {"message": "auth failed", "code": "401"}, + } + ) + ) + + monkeypatch.setattr( + "guidellm.backends.openai.websocket.pcm16_append_b64_chunks", + lambda *a, **k: ["YQ=="], + ) + + async with serve(handler, "127.0.0.1", 0) as server: + port = server.sockets[0].getsockname()[1] + be = OpenAIWebSocketBackend( + target=f"http://127.0.0.1:{port}", + model="m", + validate_backend=False, + ) + await be.process_startup() + req = GenerationRequest( + request_id="r1", + columns={"audio_column": [{"audio": b"x"}]}, + ) + info = RequestInfo(timings=RequestTimings()) + with pytest.raises(RuntimeError, match="401"): + async for _ in be.resolve(req, info): + pass + await be.process_shutdown() + + +def test_openai_websocket_backend_args_model() -> None: + a = OpenAIWebSocketBackendArgs(target="http://localhost:8000", model="x") + assert a.request_format is None + assert a.chunk_samples == 3200 + assert a.timeout == _DEFAULT_WS_RECV_TIMEOUT + + +def test_openai_websocket_backend_args_normalizes_request_format_alias() -> None: + args = OpenAIWebSocketBackendArgs( + target="http://localhost:8000", + request_format="realtime", + ) + assert args.request_format == "/v1/realtime" + + +def test_openai_websocket_backend_resolves_websocket_path_from_request_format() -> None: + backend = Backend.create( + "openai_websocket", + target="http://127.0.0.1:9", + request_format="/custom/ws", + ) + assert backend.websocket_path == "/custom/ws" + + +def test_openai_websocket_backend_args_invalid_request_format_rejected() -> None: + with pytest.raises(ValidationError): + OpenAIWebSocketBackendArgs( + target="http://localhost:8000", + request_format="nope", + ) diff --git a/tests/unit/backends/test_backend.py b/tests/unit/backends/test_backend.py index 1cae4952a..271117c3d 100644 --- a/tests/unit/backends/test_backend.py +++ b/tests/unit/backends/test_backend.py @@ -318,6 +318,21 @@ async def default_model(self) -> str: assert backend.type_ == "mock_backend" @pytest.mark.smoke + def test_openai_websocket_backend_registered(self): + """WebSocket OpenAI backend is registered and constructible.""" + from guidellm.backends.openai import ( + OpenAIWebSocketBackend, + OpenAIWebSocketBackendArgs, + ) + + assert Backend.is_registered("openai_websocket") + ws_args = OpenAIWebSocketBackendArgs + assert Backend.get_backend_args("openai_websocket") is ws_args + + backend = Backend.create("openai_websocket", target="http://localhost:9000") + assert isinstance(backend, OpenAIWebSocketBackend) + assert backend.type_ == "openai_websocket" + def test_openai_backend_registered(self): """Test that OpenAI HTTP backend is registered.""" from guidellm.backends.openai import OpenAIHTTPBackend diff --git a/tests/unit/benchmark/schemas/generative/test_entrypoints.py b/tests/unit/benchmark/schemas/generative/test_entrypoints.py index 2e0e1623b..ad5f8618c 100644 --- a/tests/unit/benchmark/schemas/generative/test_entrypoints.py +++ b/tests/unit/benchmark/schemas/generative/test_entrypoints.py @@ -12,6 +12,7 @@ from guidellm.backends.backend import BackendArgs from guidellm.backends.openai.http import OpenAIHttpBackendArgs +from guidellm.backends.openai.websocket import OpenAIWebSocketBackendArgs from guidellm.benchmark.schemas.generative.entrypoints import ( BenchmarkGenerativeTextArgs, ) @@ -52,6 +53,37 @@ def test_dict_backend_kwargs_transformed(self): assert args.backend_kwargs.target == "http://localhost:9000" assert args.backend_kwargs.model == "test_model" + def test_openai_websocket_backend_kwargs_validates(self) -> None: + """WebSocket backend accepts ``request_format`` (CLI --request-format).""" + args = BenchmarkGenerativeTextArgs.model_validate( + { + "backend": "openai_websocket", + "backend_kwargs": { + "target": "http://localhost:8000", + "model": "rt-model", + }, + "data": ["prompt_tokens=256,output_tokens=128"], + } + ) + assert args.backend == "openai_websocket" + assert isinstance(args.backend_kwargs, OpenAIWebSocketBackendArgs) + assert args.backend_kwargs.target == "http://localhost:8000" + assert args.backend_kwargs.model == "rt-model" + assert args.backend_kwargs.request_format is None + + with_format = BenchmarkGenerativeTextArgs.model_validate( + { + "backend": "openai_websocket", + "backend_kwargs": { + "target": "http://localhost:8000", + "model": "rt-model", + "request_format": "realtime", + }, + "data": ["prompt_tokens=256,output_tokens=128"], + } + ) + assert with_format.backend_kwargs.request_format == "/v1/realtime" + def test_dict_with_request_format(self): """ Test that request_format is included in BackendArgs transformation. diff --git a/tests/unit/extras/test_audio.py b/tests/unit/extras/test_audio.py index b7f783693..de5ca6250 100644 --- a/tests/unit/extras/test_audio.py +++ b/tests/unit/extras/test_audio.py @@ -7,7 +7,7 @@ import pytest import torch -from guidellm.extras.audio import encode_audio +from guidellm.extras.audio import encode_audio, pcm16_append_b64_chunks @pytest.fixture @@ -194,3 +194,83 @@ def test_end_to_end_audio_processing(sample_audio_tensor): assert result["format"] == "mp3" assert result["audio_samples"] == 16000 assert result["audio_seconds"] == min(original_duration, 0.5) + + +@patch("guidellm.extras.audio._decode_audio") +def test_pcm16_append_b64_chunks_rejects_unknown_dict_keys(mock_decode): + """## WRITTEN BY AI ##""" + mock_decode.side_effect = AssertionError("_decode_audio should not run") + with pytest.raises(ValueError, match="audio_column dict"): + pcm16_append_b64_chunks({"foo": 1}) + + +@patch("guidellm.extras.audio._decode_audio") +def test_pcm16_append_b64_chunks_splits_into_multiple_base64_chunks(mock_decode): + """## WRITTEN BY AI ##""" + mock_decode.return_value = MagicMock() + mock_decode.return_value.data = torch.zeros(1, 5000) + mock_decode.return_value.sample_rate = 16000 + + out = pcm16_append_b64_chunks({"audio": b"x"}, chunk_samples=3200) + + assert len(out) == 2 + assert all(isinstance(chunk_b64, str) for chunk_b64 in out) + + +@patch("guidellm.extras.audio._decode_audio") +def test_pcm16_append_b64_chunks_empty_wave_raises(mock_decode): + """## WRITTEN BY AI ##""" + mock_decode.return_value = MagicMock() + mock_decode.return_value.data = torch.zeros(1, 0) + mock_decode.return_value.sample_rate = 16000 + + with pytest.raises(ValueError, match="no PCM"): + pcm16_append_b64_chunks({"audio": b"x"}) + + +@patch("guidellm.extras.audio._decode_audio") +def test_pcm16_append_b64_chunks_downmixes_stereo(mock_decode): + """## WRITTEN BY AI ##""" + mock_decode.return_value = MagicMock() + mock_decode.return_value.data = torch.randn(2, 200) + mock_decode.return_value.sample_rate = 16000 + + out = pcm16_append_b64_chunks({"audio": b"x"}, chunk_samples=100) + + assert len(out) >= 1 + + +@patch("guidellm.extras.audio._decode_audio") +def test_pcm16_append_b64_chunks_audio_dict_passes_outer_sample_rate(mock_decode): + """## WRITTEN BY AI ##""" + mock_decode.return_value = MagicMock() + mock_decode.return_value.data = torch.zeros(1, 100) + mock_decode.return_value.sample_rate = 16000 + + pcm16_append_b64_chunks({"audio": b"x", "sample_rate": 8000}) + + mock_decode.assert_called_once() + assert mock_decode.call_args.kwargs.get("sample_rate") == 8000 + + +@patch("guidellm.extras.audio._decode_audio") +def test_pcm16_append_b64_chunks_sampling_rate_alias(mock_decode): + """## WRITTEN BY AI ##""" + mock_decode.return_value = MagicMock() + mock_decode.return_value.data = torch.zeros(1, 50) + mock_decode.return_value.sample_rate = 16000 + + pcm16_append_b64_chunks({"audio": b"x", "sampling_rate": 44100}) + + assert mock_decode.call_args.kwargs.get("sample_rate") == 44100 + + +@patch("guidellm.extras.audio._decode_audio") +def test_pcm16_append_b64_chunks_invalid_decoder_sample_rate_raises(mock_decode): + """## WRITTEN BY AI ##""" + mock_decode.return_value = MagicMock() + mock_decode.return_value.data = torch.zeros(1, 10) + mock_decode.return_value.sample_rate = 0 + + with pytest.raises(ValueError, match="invalid sample_rate"): + pcm16_append_b64_chunks({"audio": b"x"}) diff --git a/uv.lock b/uv.lock index c09f6e994..401f76da7 100644 --- a/uv.lock +++ b/uv.lock @@ -841,12 +841,14 @@ all = [ { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, { name = "torchcodec" }, { name = "uvloop" }, + { name = "websockets" }, ] audio = [ { name = "datasets", extra = ["audio"] }, { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, { name = "torchcodec" }, + { name = "websockets" }, ] dev = [ { name = "blobfile" }, @@ -889,6 +891,7 @@ dev = [ { name = "types-requests" }, { name = "types-toml" }, { name = "uvloop" }, + { name = "websockets" }, ] perf = [ { name = "msgpack" }, @@ -985,6 +988,7 @@ requires-dist = [ { name = "types-toml", marker = "extra == 'dev'" }, { name = "uvloop", specifier = ">=0.18" }, { name = "uvloop", marker = "extra == 'perf'" }, + { name = "websockets", marker = "extra == 'audio'", specifier = ">=13.0" }, ] provides-extras = ["all", "recommended", "perf", "tokenizers", "audio", "vision", "dev"] @@ -3932,21 +3936,21 @@ dependencies = [ { name = "typing-extensions", marker = "sys_platform == 'darwin'" }, ] wheels = [ - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:4db72a4d257c45c3502f11764ee41460a87312fdc3dff47a8957812efe961725" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:0826ac8e409551e12b2360ac18b4161a838cbd111933e694752f351191331d09" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:7fbbf409143a4fe0812a40c0b46a436030a7e1d14fe8c5234dfbe44df47f617e" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:b39cafff7229699f9d6e172cac74d85fd71b568268e439e08d9c540e54732a3e" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:7417ef370d7c3969dd509dae8d5c7daeb945af335ab76dd38358ba30a91251c1" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:90821a3194b8806d9fa9fdaa9308c1bc73df0c26808274b14129a97c99f35794" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:358bd7125cbec6e692d60618a5eec7f55a51b29e3652a849fd42af021d818023" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:470de4176007c2700735e003a830828a88d27129032a3add07291da07e2a94e8" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2d16abfce6c92584ceeb00c3b2665d5798424dd9ed235ea69b72e045cd53ae97" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:4584ab167995c0479f6821e3dceaf199c8166c811d3adbba5d8eedbbfa6764fd" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:45a1c5057629444aeb1c452c18298fa7f30f2f7aeadd4dc41f9d340980294407" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:339e05502b6c839db40e88720cb700f5a3b50cda332284873e851772d41b2c1e" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:840351da59cedb7bcbc51981880050813c19ef6b898a7fecf73a3afc71aff3fe" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:c88b1129fd4e14f0f882963c6728315caae35d2f47374d17edeed1edc7697497" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f4bea7dc451267c028593751612ad559299589304e68df54ae7672427893ff2c" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:4db72a4d257c45c3502f11764ee41460a87312fdc3dff47a8957812efe961725", upload-time = "2026-02-06T16:27:14Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:0826ac8e409551e12b2360ac18b4161a838cbd111933e694752f351191331d09", upload-time = "2026-02-06T16:27:14Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:7fbbf409143a4fe0812a40c0b46a436030a7e1d14fe8c5234dfbe44df47f617e", upload-time = "2026-02-06T16:27:14Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:b39cafff7229699f9d6e172cac74d85fd71b568268e439e08d9c540e54732a3e", upload-time = "2026-02-06T16:27:17Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:7417ef370d7c3969dd509dae8d5c7daeb945af335ab76dd38358ba30a91251c1", upload-time = "2026-02-10T19:55:42Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:90821a3194b8806d9fa9fdaa9308c1bc73df0c26808274b14129a97c99f35794", upload-time = "2026-02-10T19:55:42Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:358bd7125cbec6e692d60618a5eec7f55a51b29e3652a849fd42af021d818023", upload-time = "2026-02-10T19:55:42Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:470de4176007c2700735e003a830828a88d27129032a3add07291da07e2a94e8", upload-time = "2026-02-10T19:55:43Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2d16abfce6c92584ceeb00c3b2665d5798424dd9ed235ea69b72e045cd53ae97", upload-time = "2026-01-23T15:09:55Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:4584ab167995c0479f6821e3dceaf199c8166c811d3adbba5d8eedbbfa6764fd", upload-time = "2026-01-23T15:09:55Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:45a1c5057629444aeb1c452c18298fa7f30f2f7aeadd4dc41f9d340980294407", upload-time = "2026-01-23T15:09:55Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:339e05502b6c839db40e88720cb700f5a3b50cda332284873e851772d41b2c1e", upload-time = "2026-01-23T15:09:57Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:840351da59cedb7bcbc51981880050813c19ef6b898a7fecf73a3afc71aff3fe", upload-time = "2026-01-23T15:09:59Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:c88b1129fd4e14f0f882963c6728315caae35d2f47374d17edeed1edc7697497", upload-time = "2026-01-23T15:09:59Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f4bea7dc451267c028593751612ad559299589304e68df54ae7672427893ff2c", upload-time = "2026-01-23T15:10:01Z" }, ] [[package]] @@ -3969,44 +3973,44 @@ dependencies = [ { name = "typing-extensions", marker = "sys_platform != 'darwin'" }, ] wheels = [ - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-linux_aarch64.whl", hash = "sha256:31ae44836c8b9bbd1a3943d29c7c7457709ddf7c6173aa34aefe9d2203e4c405" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-linux_s390x.whl", hash = "sha256:beadc2a6a1785b09a46daad378de91ef274b8d3eea7af0bc2d017d97f115afdf" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d63ee6a80982fd73fe44bb70d97d2976e010312ff6db81d7bfb9167b06dd45b9" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a280ffaea7b9c828e0c1b9b3bd502d9b6a649dc9416997b69b84544bd469f215" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-win_amd64.whl", hash = "sha256:6c6f0df770144907092a0d067048d96ed4f278a6c840376d2ff0e27e7579b925" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_aarch64.whl", hash = "sha256:ce5c113d1f55f8c1f5af05047a24e50d11d293e0cbbb5bf7a75c6c761edd6eaa" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_s390x.whl", hash = "sha256:0e286fcf6ce0cc7b204396c9b4ea0d375f1f0c3e752f68ce3d3aeb265511db8c" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:1cfcb9b1558c6e52dffd0d4effce83b13c5ae5d97338164c372048c21f9cfccb" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b7cb1ec66cefb90fd7b676eac72cfda3b8d4e4d0cacd7a531963bc2e0a9710ab" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:17a09465bab2aab8f0f273410297133d8d8fb6dd84dccbd252ca4a4f3a111847" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-win_arm64.whl", hash = "sha256:c35c0de592941d4944698dbfa87271ab85d3370eca3b694943a2ab307ac34b3f" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-linux_aarch64.whl", hash = "sha256:8de5a36371b775e2d4881ed12cc7f2de400b1ad3d728aa74a281f649f87c9b8c" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-linux_s390x.whl", hash = "sha256:9accc30b56cb6756d4a9d04fcb8ebc0bb68c7d55c1ed31a8657397d316d31596" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:179451716487f8cb09b56459667fa1f5c4c0946c1e75fbeae77cfc40a5768d87" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ee40b8a4b4b2cf0670c6fd4f35a7ef23871af956fecb238fbf5da15a72650b1d" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:21cb5436978ef47c823b7a813ff0f8c2892e266cfe0f1d944879b5fba81bf4e1" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:3eaa727e6a73affa61564d86b9d03191df45c8650d0666bd3d57c8597ef61e78" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-linux_aarch64.whl", hash = "sha256:fd215f3d0f681905c5b56b0630a3d666900a37fcc3ca5b937f95275c66f9fd9c" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-linux_s390x.whl", hash = "sha256:170a0623108055be5199370335cf9b41ba6875b3cb6f086db4aee583331a4899" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e51994492cdb76edce29da88de3672a3022f9ef0ffd90345436948d4992be2c7" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8d316e5bf121f1eab1147e49ad0511a9d92e4c45cc357d1ab0bee440da71a095" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:b719da5af01b59126ac13eefd6ba3dd12d002dc0e8e79b8b365e55267a8189d3" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:b67d91326e4ed9eccbd6b7d84ed7ffa43f93103aa3f0b24145f3001f3b11b714" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-linux_aarch64.whl", hash = "sha256:5af75e5f49de21b0bdf7672bc27139bd285f9e8dbcabe2d617a2eb656514ac36" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-linux_s390x.whl", hash = "sha256:ba51ef01a510baf8fff576174f702c47e1aa54389a9f1fba323bb1a5003ff0bf" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:0fedcb1a77e8f2aaf7bfd21591bf6d1e0b207473268c9be16b17cb7783253969" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:106dd1930cb30a4a337366ba3f9b25318ebf940f51fd46f789281dd9e736bdc4" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:eb1bde1ce198f05c8770017de27e001d404499cf552aaaa014569eff56ca25c0" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-linux_aarch64.whl", hash = "sha256:ea2bcc9d1fca66974a71d4bf9a502539283f35d61fcab5a799b4e120846f1e02" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-linux_s390x.whl", hash = "sha256:f8294fd2fc6dd8f4435a891a0122307a043b14b21f0dac1bca63c85bfb59e586" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:a28fdbcfa2fbacffec81300f24dd1bed2b0ccfdbed107a823cff12bc1db070f6" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:aada8afc068add586464b2a55adb7cc9091eec55caf5320447204741cb6a0604" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:2adc71fe471e98a608723bfc837f7e1929885ebb912c693597711e139c1cda41" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-linux_aarch64.whl", hash = "sha256:9412bd37b70f5ebd1205242c4ba4cabae35a605947f2b30806d5c9b467936db9" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-linux_s390x.whl", hash = "sha256:e71c476517c33e7db69825a9ff46c7f47a723ec4dac5b2481cff4246d1c632be" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:23882f8d882460aca809882fc42f5e343bf07585274f929ced00177d1be1eb67" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:4fcd8b4cc2ae20f2b7749fb275349c55432393868778c2d50a08e81d5ee5591e" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:ffc8da9a1341092d6a90cb5b1c1a33cd61abf0fb43f0cd88443c27fa372c26ae" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-linux_aarch64.whl", hash = "sha256:31ae44836c8b9bbd1a3943d29c7c7457709ddf7c6173aa34aefe9d2203e4c405", upload-time = "2026-01-23T15:10:02Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-linux_s390x.whl", hash = "sha256:beadc2a6a1785b09a46daad378de91ef274b8d3eea7af0bc2d017d97f115afdf", upload-time = "2026-01-23T15:10:03Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d63ee6a80982fd73fe44bb70d97d2976e010312ff6db81d7bfb9167b06dd45b9", upload-time = "2026-01-23T15:10:05Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a280ffaea7b9c828e0c1b9b3bd502d9b6a649dc9416997b69b84544bd469f215", upload-time = "2026-01-23T15:10:07Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp310-cp310-win_amd64.whl", hash = "sha256:6c6f0df770144907092a0d067048d96ed4f278a6c840376d2ff0e27e7579b925", upload-time = "2026-01-23T15:10:09Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_aarch64.whl", hash = "sha256:ce5c113d1f55f8c1f5af05047a24e50d11d293e0cbbb5bf7a75c6c761edd6eaa", upload-time = "2026-01-23T15:10:11Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_s390x.whl", hash = "sha256:0e286fcf6ce0cc7b204396c9b4ea0d375f1f0c3e752f68ce3d3aeb265511db8c", upload-time = "2026-01-23T15:10:12Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:1cfcb9b1558c6e52dffd0d4effce83b13c5ae5d97338164c372048c21f9cfccb", upload-time = "2026-01-23T15:10:15Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b7cb1ec66cefb90fd7b676eac72cfda3b8d4e4d0cacd7a531963bc2e0a9710ab", upload-time = "2026-01-23T15:10:15Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:17a09465bab2aab8f0f273410297133d8d8fb6dd84dccbd252ca4a4f3a111847", upload-time = "2026-01-23T15:10:19Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-win_arm64.whl", hash = "sha256:c35c0de592941d4944698dbfa87271ab85d3370eca3b694943a2ab307ac34b3f", upload-time = "2026-01-23T15:10:20Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-linux_aarch64.whl", hash = "sha256:8de5a36371b775e2d4881ed12cc7f2de400b1ad3d728aa74a281f649f87c9b8c", upload-time = "2026-01-23T15:10:22Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-linux_s390x.whl", hash = "sha256:9accc30b56cb6756d4a9d04fcb8ebc0bb68c7d55c1ed31a8657397d316d31596", upload-time = "2026-01-23T15:10:24Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:179451716487f8cb09b56459667fa1f5c4c0946c1e75fbeae77cfc40a5768d87", upload-time = "2026-01-23T15:10:25Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ee40b8a4b4b2cf0670c6fd4f35a7ef23871af956fecb238fbf5da15a72650b1d", upload-time = "2026-01-23T15:10:27Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:21cb5436978ef47c823b7a813ff0f8c2892e266cfe0f1d944879b5fba81bf4e1", upload-time = "2026-01-23T15:10:30Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:3eaa727e6a73affa61564d86b9d03191df45c8650d0666bd3d57c8597ef61e78", upload-time = "2026-01-23T15:10:31Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-linux_aarch64.whl", hash = "sha256:fd215f3d0f681905c5b56b0630a3d666900a37fcc3ca5b937f95275c66f9fd9c", upload-time = "2026-01-23T15:10:34Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-linux_s390x.whl", hash = "sha256:170a0623108055be5199370335cf9b41ba6875b3cb6f086db4aee583331a4899", upload-time = "2026-01-23T15:10:35Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e51994492cdb76edce29da88de3672a3022f9ef0ffd90345436948d4992be2c7", upload-time = "2026-01-23T15:10:37Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8d316e5bf121f1eab1147e49ad0511a9d92e4c45cc357d1ab0bee440da71a095", upload-time = "2026-01-23T15:10:38Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:b719da5af01b59126ac13eefd6ba3dd12d002dc0e8e79b8b365e55267a8189d3", upload-time = "2026-01-23T15:10:41Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:b67d91326e4ed9eccbd6b7d84ed7ffa43f93103aa3f0b24145f3001f3b11b714", upload-time = "2026-01-23T15:10:42Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-linux_aarch64.whl", hash = "sha256:5af75e5f49de21b0bdf7672bc27139bd285f9e8dbcabe2d617a2eb656514ac36", upload-time = "2026-01-23T15:10:44Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-linux_s390x.whl", hash = "sha256:ba51ef01a510baf8fff576174f702c47e1aa54389a9f1fba323bb1a5003ff0bf", upload-time = "2026-01-23T15:10:48Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:0fedcb1a77e8f2aaf7bfd21591bf6d1e0b207473268c9be16b17cb7783253969", upload-time = "2026-01-23T15:10:48Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:106dd1930cb30a4a337366ba3f9b25318ebf940f51fd46f789281dd9e736bdc4", upload-time = "2026-01-23T15:10:50Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:eb1bde1ce198f05c8770017de27e001d404499cf552aaaa014569eff56ca25c0", upload-time = "2026-01-23T15:10:50Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-linux_aarch64.whl", hash = "sha256:ea2bcc9d1fca66974a71d4bf9a502539283f35d61fcab5a799b4e120846f1e02", upload-time = "2026-01-23T15:10:53Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-linux_s390x.whl", hash = "sha256:f8294fd2fc6dd8f4435a891a0122307a043b14b21f0dac1bca63c85bfb59e586", upload-time = "2026-01-23T15:10:55Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:a28fdbcfa2fbacffec81300f24dd1bed2b0ccfdbed107a823cff12bc1db070f6", upload-time = "2026-01-23T15:10:56Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:aada8afc068add586464b2a55adb7cc9091eec55caf5320447204741cb6a0604", upload-time = "2026-01-23T15:10:58Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:2adc71fe471e98a608723bfc837f7e1929885ebb912c693597711e139c1cda41", upload-time = "2026-01-23T15:11:01Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-linux_aarch64.whl", hash = "sha256:9412bd37b70f5ebd1205242c4ba4cabae35a605947f2b30806d5c9b467936db9", upload-time = "2026-01-23T15:11:03Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-linux_s390x.whl", hash = "sha256:e71c476517c33e7db69825a9ff46c7f47a723ec4dac5b2481cff4246d1c632be", upload-time = "2026-01-23T15:11:04Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:23882f8d882460aca809882fc42f5e343bf07585274f929ced00177d1be1eb67", upload-time = "2026-01-23T15:11:07Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:4fcd8b4cc2ae20f2b7749fb275349c55432393868778c2d50a08e81d5ee5591e", upload-time = "2026-01-23T15:11:07Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:ffc8da9a1341092d6a90cb5b1c1a33cd61abf0fb43f0cd88443c27fa372c26ae", upload-time = "2026-01-23T15:11:10Z" }, ] [[package]]