diff --git a/docs/docs/api-openai/provider_matrix.md b/docs/docs/api-openai/provider_matrix.md index 6f3c13b61c..0e7d37cb00 100644 --- a/docs/docs/api-openai/provider_matrix.md +++ b/docs/docs/api-openai/provider_matrix.md @@ -33,7 +33,7 @@ Models, endpoints, and versions used during test recordings. | Provider | Model(s) | Endpoint | Version Info | |----------|----------|----------|--------------| | azure | gpt-4o | llama-stack-test.openai.azure.com, lls-test.openai.azure.com | openai sdk: 2.5.0 | -| bedrock | openai.gpt-oss-20b | bedrock-mantle.us-east-2.api.aws, bedrock-mantle.us-west-2.api.aws | openai sdk: 2.5.0 | +| bedrock | openai.gpt-oss-20b | bedrock-mantle.us-west-2.api.aws, bedrock-runtime.us-east-2.amazonaws.com | openai sdk: 2.5.0 | | ollama | gpt-oss:20b | — | openai sdk: 2.5.0, vllm server: 0.9.2rc2.dev136+g0b382b53a.d20250924 | | openai | gpt-4o, o4-mini, text-embedding-3-small | api.openai.com | openai sdk: 2.5.0 | | vllm | Qwen/Qwen3-0.6B | — | openai sdk: 2.5.0, vllm server: 0.18.1rc1.dev197+g0e9358c11 | diff --git a/docs/docs/providers/inference/remote_bedrock.mdx b/docs/docs/providers/inference/remote_bedrock.mdx index be31b9a983..3095d3c6cc 100644 --- a/docs/docs/providers/inference/remote_bedrock.mdx +++ b/docs/docs/providers/inference/remote_bedrock.mdx @@ -34,11 +34,25 @@ AWS Bedrock inference provider using OpenAI compatible endpoint. | `network.timeout.connect` | `float \| None` | No | | Connection timeout in seconds. | | `network.timeout.read` | `float \| None` | No | | Read timeout in seconds. | | `network.headers` | `dict[str, str] \| None` | No | | Additional HTTP headers to include in all requests. | -| `region_name` | `str` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint | +| `aws_access_key_id` | `SecretStr \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID | +| `aws_secret_access_key` | `SecretStr \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY | +| `aws_session_token` | `SecretStr \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN | +| `aws_role_arn` | `str \| None` | No | | The AWS role ARN to assume. Default use environment variable: AWS_ROLE_ARN | +| `aws_web_identity_token_file` | `str \| None` | No | | The path to the web identity token file. Default use environment variable: AWS_WEB_IDENTITY_TOKEN_FILE | +| `aws_role_session_name` | `str \| None` | No | | The session name to use when assuming a role. Default use environment variable: AWS_ROLE_SESSION_NAME | +| `region_name` | `str \| None` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint | +| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | +| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS | +| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE | +| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. | +| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. | +| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). | ## Sample Configuration ```yaml api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} +aws_role_arn: ${env.AWS_ROLE_ARN:=} +aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=} ``` diff --git a/docs/docs/providers/safety/remote_bedrock.mdx b/docs/docs/providers/safety/remote_bedrock.mdx index fc18f726ea..0d3de70b94 100644 --- a/docs/docs/providers/safety/remote_bedrock.mdx +++ b/docs/docs/providers/safety/remote_bedrock.mdx @@ -36,6 +36,9 @@ AWS Bedrock safety provider for content moderation using AWS's safety services. | `aws_access_key_id` | `SecretStr \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID | | `aws_secret_access_key` | `SecretStr \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY | | `aws_session_token` | `SecretStr \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN | +| `aws_role_arn` | `str \| None` | No | | The AWS role ARN to assume. Default use environment variable: AWS_ROLE_ARN | +| `aws_web_identity_token_file` | `str \| None` | No | | The path to the web identity token file. Default use environment variable: AWS_WEB_IDENTITY_TOKEN_FILE | +| `aws_role_session_name` | `str \| None` | No | | The session name to use when assuming a role. Default use environment variable: AWS_ROLE_SESSION_NAME | | `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION | | `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | | `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS | diff --git a/src/llama_stack/core/request_headers.py b/src/llama_stack/core/request_headers.py index e0a61c4ba6..a55e13e016 100644 --- a/src/llama_stack/core/request_headers.py +++ b/src/llama_stack/core/request_headers.py @@ -7,7 +7,7 @@ import contextvars import json from contextlib import AbstractContextManager -from typing import Any +from typing import TYPE_CHECKING, Any, cast from starlette.types import Scope @@ -16,6 +16,9 @@ from .utils.dynamic import instantiate_class_type +if TYPE_CHECKING: + from llama_stack_api import ProviderSpec + log = get_logger(name=__name__, category="core") # Context variable for request provider data and auth attributes @@ -26,6 +29,9 @@ class RequestProviderDataContext(AbstractContextManager[None]): """Context manager for request provider data""" def __init__(self, provider_data: dict[str, Any] | None = None, user: User | None = None) -> None: + if provider_data is not None and not isinstance(provider_data, dict): + log.error("Provider data must be a JSON object") + provider_data = None self.provider_data = provider_data or {} if user: self.provider_data["__authenticated_user"] = user @@ -45,6 +51,8 @@ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: class NeedsRequestProviderData: """Mixin for providers that require per-request provider data from request headers.""" + __provider_spec__: "ProviderSpec" + def get_request_provider_data(self) -> Any: spec = self.__provider_spec__ # type: ignore[attr-defined] if not spec: @@ -84,11 +92,20 @@ def parse_request_provider_data(headers: dict[str, str]) -> dict[str, Any] | Non return None try: - return json.loads(val) # type: ignore[no-any-return] + parsed = json.loads(val) except json.JSONDecodeError: log.error("Provider data not encoded as a JSON object!") return None + if parsed is None: + return None + + if not isinstance(parsed, dict): + log.error("Provider data must be encoded as a JSON object") + return None + + return cast(dict[str, Any], parsed) + def request_provider_data_context(headers: dict[str, str], user: User | None = None) -> AbstractContextManager[None]: """Context manager that sets request provider data from headers and user for the duration of the context""" diff --git a/src/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py index e100ad161d..302b1c07ee 100644 --- a/src/llama_stack/distributions/ci-tests/ci_tests.py +++ b/src/llama_stack/distributions/ci-tests/ci_tests.py @@ -52,9 +52,11 @@ def get_distribution_template() -> DistributionTemplate: # Bedrock model must be pre-registered because the recording system cannot # replay model-list discovery calls against the Bedrock endpoint in CI. + # Gate on AWS_DEFAULT_REGION (required for both bearer-token and SigV4 modes) + # rather than AWS_BEARER_TOKEN_BEDROCK so the model registers in OIDC/IRSA CI too. bedrock_model = ModelInput( model_id="bedrock/openai.gpt-oss-20b", - provider_id="${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock}", + provider_id="${env.AWS_DEFAULT_REGION:+bedrock}", provider_model_id="openai.gpt-oss-20b", model_type=ModelType.llm, ) diff --git a/src/llama_stack/distributions/ci-tests/config.yaml b/src/llama_stack/distributions/ci-tests/config.yaml index 7bcbb6eee9..a51945e36f 100644 --- a/src/llama_stack/distributions/ci-tests/config.yaml +++ b/src/llama_stack/distributions/ci-tests/config.yaml @@ -47,6 +47,8 @@ providers: config: api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + aws_role_arn: ${env.AWS_ROLE_ARN:=} + aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=} - provider_id: ${env.NVIDIA_API_KEY:+nvidia} provider_type: remote::nvidia config: @@ -301,7 +303,7 @@ registered_resources: model_type: llm - metadata: {} model_id: bedrock/openai.gpt-oss-20b - provider_id: ${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock} + provider_id: ${env.AWS_DEFAULT_REGION:+bedrock} provider_model_id: openai.gpt-oss-20b model_type: llm shields: diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml index 553ed41118..9583bbcd20 100644 --- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -47,6 +47,8 @@ providers: config: api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + aws_role_arn: ${env.AWS_ROLE_ARN:=} + aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=} - provider_id: ${env.NVIDIA_API_KEY:+nvidia} provider_type: remote::nvidia config: @@ -314,7 +316,7 @@ registered_resources: model_type: llm - metadata: {} model_id: bedrock/openai.gpt-oss-20b - provider_id: ${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock} + provider_id: ${env.AWS_DEFAULT_REGION:+bedrock} provider_model_id: openai.gpt-oss-20b model_type: llm shields: diff --git a/src/llama_stack/distributions/starter/config.yaml b/src/llama_stack/distributions/starter/config.yaml index fb9cf9ae84..bbe35b85a3 100644 --- a/src/llama_stack/distributions/starter/config.yaml +++ b/src/llama_stack/distributions/starter/config.yaml @@ -47,6 +47,8 @@ providers: config: api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + aws_role_arn: ${env.AWS_ROLE_ARN:=} + aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=} - provider_id: ${env.NVIDIA_API_KEY:+nvidia} provider_type: remote::nvidia config: diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index 52225576f5..86e00cf5ba 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -47,6 +47,8 @@ providers: config: api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + aws_role_arn: ${env.AWS_ROLE_ARN:=} + aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=} - provider_id: ${env.NVIDIA_API_KEY:+nvidia} provider_type: remote::nvidia config: diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py index 6bc2976d4d..12bf1a940b 100644 --- a/src/llama_stack/providers/registry/inference.py +++ b/src/llama_stack/providers/registry/inference.py @@ -121,7 +121,7 @@ def available_providers() -> list[ProviderSpec]: api=Api.inference, adapter_type="bedrock", provider_type="remote::bedrock", - pip_packages=[], + pip_packages=["boto3"], module="llama_stack.providers.remote.inference.bedrock", config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig", provider_data_validator="llama_stack.providers.remote.inference.bedrock.config.BedrockProviderDataValidator", diff --git a/src/llama_stack/providers/remote/inference/bedrock/__init__.py b/src/llama_stack/providers/remote/inference/bedrock/__init__.py index 4b0686b187..12726e32d0 100644 --- a/src/llama_stack/providers/remote/inference/bedrock/__init__.py +++ b/src/llama_stack/providers/remote/inference/bedrock/__init__.py @@ -9,7 +9,8 @@ async def get_adapter_impl(config: BedrockConfig, _deps): from .bedrock import BedrockInferenceAdapter - assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}" + if not isinstance(config, BedrockConfig): + raise TypeError(f"Unexpected config type: {type(config)}") impl = BedrockInferenceAdapter(config=config) diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py index 02800d21ed..bd5ba32630 100644 --- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -4,16 +4,29 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from collections.abc import AsyncIterator +import asyncio +from collections.abc import AsyncIterator, Iterable +from typing import TYPE_CHECKING, Any, NoReturn -from openai import AuthenticationError +if TYPE_CHECKING: + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + +import httpx +from openai import AuthenticationError, PermissionDeniedError +from pydantic import PrivateAttr from llama_stack.log import get_logger from llama_stack.providers.inline.responses.builtin.responses.types import ( AssistantMessageWithReasoning, ) +from llama_stack.providers.utils.inference.http_client import ( + build_network_client_kwargs, + network_config_fingerprint, + set_client_network_fingerprint, +) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack_api import ( + InternalServerError, OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionChunkWithReasoning, @@ -25,8 +38,6 @@ OpenAIEmbeddingsResponse, ) -from .config import BedrockConfig - logger = get_logger(name=__name__, category="inference::bedrock") @@ -35,14 +46,139 @@ class BedrockInferenceAdapter(OpenAIMixin): Adapter for AWS Bedrock's OpenAI-compatible API endpoints. Supports Llama models across regions and GPT-OSS models (us-west-2 only). + + Authentication modes: + 1. Bearer token (legacy): Set AWS_BEARER_TOKEN_BEDROCK or api_key in config + 2. AWS credential chain (enterprise): Leave api_key unset, configure AWS creds + - Web Identity Federation (IRSA, GitHub Actions OIDC) + - IAM roles (EC2, ECS, Lambda) + - AWS profiles + - Static credentials + + When using AWS credential chain, requests are signed using SigV4 with the + "bedrock" signing name (note: the endpoint hostname uses "bedrock-runtime", + but SigV4 credential scope uses the signing name "bedrock"). + + Web Identity Federation Examples: + + Kubernetes/OpenShift (IRSA): + Set these environment variables in your pod spec: + - AWS_ROLE_ARN=arn:aws:iam::123456789012:role/llama-stack-role + - AWS_WEB_IDENTITY_TOKEN_FILE= + Common paths: + - EKS: /var/run/secrets/eks.amazonaws.com/serviceaccount/token + - Generic K8s: /var/run/secrets/kubernetes.io/serviceaccount/token + - AWS_DEFAULT_REGION=us-east-2 + + GitHub Actions: + Use aws-actions/configure-aws-credentials with OIDC: + + permissions: + id-token: write # Required for OIDC + + steps: + - uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::123456789012:role/github-actions-role + aws-region: us-east-2 + + Credentials are automatically refreshed by boto3 when they expire. + + Note: Bedrock's OpenAI-compatible endpoint does not support /v1/models + for dynamic model discovery. Models must be pre-registered in the config. """ - config: BedrockConfig - provider_data_api_key_field: str = "aws_bearer_token_bedrock" + provider_data_api_key_field: str | None = "aws_bearer_token_bedrock" + + # built once in initialize() so get_extra_client_params() can stay sync; + # reusing one client also avoids opening a new socket per request + _sigv4_http_client: httpx.AsyncClient | None = PrivateAttr(default=None) + + @property + def _bedrock_config(self) -> "BedrockConfig": + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + if not isinstance(self.config, BedrockConfig): + raise TypeError(f"Expected BedrockConfig, got {type(self.config)}") + return self.config def get_base_url(self) -> str: - """Get base URL for OpenAI client.""" - return f"https://bedrock-mantle.{self.config.region_name}.api.aws/v1" + region = self._bedrock_config.region_name or "us-east-2" + return f"https://bedrock-runtime.{region}.amazonaws.com/openai/v1" + + def _should_use_sigv4(self) -> bool: + # checked per-request so a bearer token in provider data can override SigV4 at runtime + if self._bedrock_config.has_bearer_token(): + return False + + provider_data = self.get_request_provider_data() + if provider_data and provider_data.aws_bearer_token_bedrock is not None: + val = provider_data.aws_bearer_token_bedrock.get_secret_value() + if val and val.strip(): + return False + + return True + + def _build_sigv4_http_client(self) -> httpx.AsyncClient: + # lazy import so bearer-token installs don't need boto3/botocore + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + cfg = self._bedrock_config + sigv4_args: dict[str, Any] = { + "region": cfg.region_name or "us-east-2", + "service": "bedrock", # botocore signing name, not the endpoint prefix "bedrock-runtime" + "aws_access_key_id": cfg.aws_access_key_id.get_secret_value() if cfg.aws_access_key_id else None, + "aws_secret_access_key": cfg.aws_secret_access_key.get_secret_value() + if cfg.aws_secret_access_key + else None, + "aws_session_token": cfg.aws_session_token.get_secret_value() if cfg.aws_session_token else None, + "profile_name": cfg.profile_name, + "aws_role_arn": cfg.aws_role_arn, + "aws_web_identity_token_file": cfg.aws_web_identity_token_file, + "aws_role_session_name": cfg.aws_role_session_name, + "session_ttl": cfg.session_ttl, + } + auth = BedrockSigV4Auth(**{k: v for k, v in sigv4_args.items() if v is not None}) + network_config = cfg.network + network_kwargs = build_network_client_kwargs(network_config) + client = httpx.AsyncClient(auth=auth, **network_kwargs) + if network_config is not None: + set_client_network_fingerprint(client, network_config_fingerprint(network_config)) + return client + + async def initialize(self) -> None: + await super().initialize() + # no request context at init time, so only the static config is available; + # per-request bearer token overrides are handled in get_extra_client_params() + if not self._bedrock_config.has_bearer_token(): + self._sigv4_http_client = self._build_sigv4_http_client() + + def get_api_key(self) -> str | None: + if self._should_use_sigv4(): + # openai sdk requires a non-empty api_key; sigv4_auth will overwrite + # the resulting "Bearer " header with the real SigV4 signature + return "" + return super().get_api_key() + + def get_extra_client_params(self) -> dict[str, Any]: + # re-check per request so a runtime bearer token in provider data can bypass sigv4 + if self._sigv4_http_client is not None and self._should_use_sigv4(): + return {"http_client": self._sigv4_http_client} + return {} + + async def list_provider_model_ids(self) -> Iterable[str]: + # bedrock's openai-compatible endpoint doesn't expose /v1/models + return [] + + async def check_model_availability(self, model: str) -> bool: + # no /v1/models to query — accept whatever is registered in config + return True + + async def shutdown(self) -> None: + if self._sigv4_http_client is not None: + # shield so stack.py's 5s asyncio.wait_for doesn't abort mid-close and leak a socket + await asyncio.shield(self._sigv4_http_client.aclose()) + self._sigv4_http_client = None async def openai_embeddings( self, @@ -120,9 +256,10 @@ async def openai_chat_completion( self, params: OpenAIChatCompletionRequestWithExtraBody, ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - """Override to handle authentication errors and null responses.""" + use_sigv4 = self._should_use_sigv4() + try: - logger.debug("Calling Bedrock OpenAI API", model=params.model, stream=params.stream) + logger.debug("Calling Bedrock OpenAI API", model=params.model, stream=params.stream, sigv4=use_sigv4) result = await super().openai_chat_completion(params=params) logger.debug("Bedrock API returned", result_type=type(result).__name__ if result is not None else "None") @@ -134,27 +271,44 @@ async def openai_chat_completion( ) return result - except AuthenticationError as e: + except (AuthenticationError, PermissionDeniedError) as e: + # PermissionDeniedError (403) covers SigV4 failures like SignatureDoesNotMatch + # and AccessDenied — same sanitized path as AuthenticationError (401) error_msg = str(e) - - # Check if this is a token expiration error - if "expired" in error_msg.lower() or "Bearer Token has expired" in error_msg: - logger.error("AWS Bedrock authentication token expired", error=error_msg) - raise ValueError( - "AWS Bedrock authentication failed: Bearer token has expired. " - "The AWS_BEARER_TOKEN_BEDROCK environment variable contains an expired pre-signed URL. " - "Please refresh your token by generating a new pre-signed URL with AWS credentials. " - "Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints." - ) from e - else: - logger.error("AWS Bedrock authentication failed", error=error_msg) - raise ValueError( - f"AWS Bedrock authentication failed: {error_msg}. " - "Please verify your API key is correct in the provider config or x-llamastack-provider-data header. " - "The API key should be a valid AWS pre-signed URL for Bedrock's OpenAI-compatible endpoint." + self._handle_auth_error(error_msg, e, use_sigv4=use_sigv4) + except (RuntimeError, OSError) as e: + # credential resolution failures (missing AWS creds, unreadable web identity + # token file, STS errors) should surface as sanitized auth errors, not raw + # exception messages that may leak internal paths or AWS account details + if use_sigv4: + logger.error("AWS Bedrock SigV4 credential resolution failed", error_type=type(e).__name__) + raise InternalServerError( + "Authentication failed because the server could not resolve AWS credentials. " + "Please verify that the server has valid AWS credentials configured." ) from e + raise except Exception as e: logger.error( "Unexpected error calling Bedrock API", error_type=type(e).__name__, error=str(e), exc_info=True ) raise + + def _handle_auth_error(self, error_msg: str, original_error: Exception, *, use_sigv4: bool) -> NoReturn: + if use_sigv4: + logger.error("AWS Bedrock SigV4 authentication failed") + raise InternalServerError( + "Authentication failed because the configured cloud credentials could not authorize this request. " + "Please verify that the credentials available to the server are valid, unexpired, and allowed to access the requested model." + ) from original_error + + if "expired" in error_msg.lower() or "Bearer Token has expired" in error_msg: + logger.error("AWS Bedrock authentication token expired") + raise InternalServerError( + "Authentication failed because the provided request credential has expired. " + "Please refresh the credential and try again, or remove it so the server can use its configured cloud credentials." + ) from original_error + logger.error("AWS Bedrock authentication failed") + raise InternalServerError( + "Authentication failed because the provided request credential was rejected. " + "Please verify that the credential is valid, unexpired, and authorized for this request." + ) from original_error diff --git a/src/llama_stack/providers/remote/inference/bedrock/config.py b/src/llama_stack/providers/remote/inference/bedrock/config.py index 667b5a6e56..810e9e5a2c 100644 --- a/src/llama_stack/providers/remote/inference/bedrock/config.py +++ b/src/llama_stack/providers/remote/inference/bedrock/config.py @@ -8,7 +8,7 @@ from pydantic import BaseModel, Field, SecretStr -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig class BedrockProviderDataValidator(BaseModel): @@ -20,17 +20,32 @@ class BedrockProviderDataValidator(BaseModel): ) -class BedrockConfig(RemoteInferenceProviderConfig): +class BedrockConfig(BedrockBaseConfig): """Configuration for the AWS Bedrock inference provider.""" - region_name: str = Field( + auth_credential: SecretStr | None = Field( + default=None, + description="Authentication credential for the provider", + alias="api_key", + ) + # Override region_name to default to us-east-2 when unset + region_name: str | None = Field( default_factory=lambda: os.getenv("AWS_DEFAULT_REGION", "us-east-2"), description="AWS Region for the Bedrock Runtime endpoint", ) + def has_bearer_token(self) -> bool: + """Check if a bearer token is configured.""" + if self.auth_credential is None: + return False + token = self.auth_credential.get_secret_value() + return bool(token and token.strip()) + @classmethod def sample_run_config(cls, **kwargs): return { "api_key": "${env.AWS_BEARER_TOKEN_BEDROCK:=}", "region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}", + "aws_role_arn": "${env.AWS_ROLE_ARN:=}", + "aws_web_identity_token_file": "${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}", } diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py index 045dfb83b3..3f6899d073 100644 --- a/src/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py @@ -13,7 +13,9 @@ from llama_stack.providers.inline.responses.builtin.responses.types import ( AssistantMessageWithReasoning, ) -from llama_stack.providers.utils.inference.http_client import _build_network_client_kwargs +from llama_stack.providers.utils.inference.http_client import ( + build_network_client_kwargs as _build_network_client_kwargs, +) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack_api import ( HealthResponse, diff --git a/src/llama_stack/providers/utils/bedrock/client.py b/src/llama_stack/providers/utils/bedrock/client.py index 90b1b247b5..7f2a2d9abc 100644 --- a/src/llama_stack/providers/utils/bedrock/client.py +++ b/src/llama_stack/providers/utils/bedrock/client.py @@ -9,7 +9,7 @@ from botocore.client import BaseClient from botocore.config import Config -from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig +from llama_stack.providers.utils.bedrock.config import DEFAULT_SESSION_TTL, BedrockBaseConfig from llama_stack.providers.utils.bedrock.refreshable_boto_session import ( RefreshableBotoSession, ) @@ -25,36 +25,50 @@ def create_bedrock_client(config: BedrockBaseConfig, service_name: str = "bedroc Returns: A configured boto3 client """ - if config.aws_access_key_id and config.aws_secret_access_key: - retries_config = { - k: v - for k, v in dict( - total_max_attempts=config.total_max_attempts, - mode=config.retry_mode, - ).items() - if v is not None - } - - config_args = { - k: v - for k, v in dict( - region_name=config.region_name, - retries=retries_config if retries_config else None, - connect_timeout=config.connect_timeout, - read_timeout=config.read_timeout, - ).items() - if v is not None - } - - boto3_config = Config(**config_args) + retries_config = { + k: v + for k, v in dict( + total_max_attempts=config.total_max_attempts, + mode=config.retry_mode, + ).items() + if v is not None + } + boto3_config_args = { + k: v + for k, v in dict( + region_name=config.region_name, + retries=retries_config if retries_config else None, + connect_timeout=config.connect_timeout, + read_timeout=config.read_timeout, + ).items() + if v is not None + } + boto3_config = Config(**boto3_config_args) if boto3_config_args else None + if config.aws_role_arn: + # role assumption takes priority — source credentials (if any) are passed in + # so the refreshable session can use them as the base for assume-role calls + client = RefreshableBotoSession( + region_name=config.region_name, + aws_access_key_id=config.aws_access_key_id.get_secret_value() if config.aws_access_key_id else None, + aws_secret_access_key=config.aws_secret_access_key.get_secret_value() + if config.aws_secret_access_key + else None, + aws_session_token=config.aws_session_token.get_secret_value() if config.aws_session_token else None, + profile_name=config.profile_name, + sts_arn=config.aws_role_arn, + web_identity_token_file=config.aws_web_identity_token_file, + session_name=config.aws_role_session_name, + session_ttl=config.session_ttl or DEFAULT_SESSION_TTL, + ).refreshable_session() + return client.client(service_name, config=boto3_config) if boto3_config else client.client(service_name) + elif config.aws_access_key_id and config.aws_secret_access_key: session_args = { "aws_access_key_id": config.aws_access_key_id.get_secret_value(), "aws_secret_access_key": config.aws_secret_access_key.get_secret_value(), "aws_session_token": config.aws_session_token.get_secret_value() if config.aws_session_token else None, "region_name": config.region_name, "profile_name": config.profile_name, - "session_ttl": config.session_ttl, } # Remove None values @@ -63,12 +77,9 @@ def create_bedrock_client(config: BedrockBaseConfig, service_name: str = "bedroc boto3_session = boto3.session.Session(**session_args) return boto3_session.client(service_name, config=boto3_config) else: - return ( - RefreshableBotoSession( - region_name=config.region_name, - profile_name=config.profile_name, - session_ttl=config.session_ttl, - ) - .refreshable_session() - .client(service_name) - ) + session = RefreshableBotoSession( + region_name=config.region_name, + profile_name=config.profile_name, + session_ttl=config.session_ttl or DEFAULT_SESSION_TTL, + ).refreshable_session() + return session.client(service_name, config=boto3_config) if boto3_config else session.client(service_name) diff --git a/src/llama_stack/providers/utils/bedrock/config.py b/src/llama_stack/providers/utils/bedrock/config.py index b4bb0afa28..48385a3faf 100644 --- a/src/llama_stack/providers/utils/bedrock/config.py +++ b/src/llama_stack/providers/utils/bedrock/config.py @@ -8,6 +8,9 @@ from pydantic import Field, SecretStr +# 1 hour — matches AWS's default role expiration and minimum recommended TTL +DEFAULT_SESSION_TTL = 3600 + from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig @@ -27,6 +30,18 @@ class BedrockBaseConfig(RemoteInferenceProviderConfig): default_factory=lambda: SecretStr(val) if (val := os.getenv("AWS_SESSION_TOKEN")) else None, description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN", ) + aws_role_arn: str | None = Field( + default_factory=lambda: os.getenv("AWS_ROLE_ARN"), + description="The AWS role ARN to assume. Default use environment variable: AWS_ROLE_ARN", + ) + aws_web_identity_token_file: str | None = Field( + default_factory=lambda: os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE"), + description="The path to the web identity token file. Default use environment variable: AWS_WEB_IDENTITY_TOKEN_FILE", + ) + aws_role_session_name: str | None = Field( + default_factory=lambda: os.getenv("AWS_ROLE_SESSION_NAME"), + description="The session name to use when assuming a role. Default use environment variable: AWS_ROLE_SESSION_NAME", + ) region_name: str | None = Field( default_factory=lambda: os.getenv("AWS_DEFAULT_REGION"), description="The default AWS Region to use, for example, us-west-1 or us-west-2." @@ -57,7 +72,7 @@ class BedrockBaseConfig(RemoteInferenceProviderConfig): "The default is 60 seconds.", ) session_ttl: int | None = Field( - default_factory=lambda: int(os.getenv("AWS_SESSION_TTL", "3600")), + default_factory=lambda: int(os.getenv("AWS_SESSION_TTL", str(DEFAULT_SESSION_TTL))), description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).", ) diff --git a/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py b/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py index 8dab40424d..575d8d031b 100644 --- a/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py +++ b/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py @@ -12,68 +12,69 @@ from botocore.credentials import RefreshableCredentials from botocore.session import get_session +from llama_stack.providers.utils.bedrock.config import DEFAULT_SESSION_TTL + class RefreshableBotoSession: """ - Boto Helper class which lets us create a refreshable session so that we can cache the client or resource. - - Usage - ----- - session = RefreshableBotoSession().refreshable_session() + Wraps a boto3 session so credentials refresh automatically before they expire. - client = session.client("s3") # we now can cache this client object without worrying about expiring credentials + Use this when you need a long-lived boto3 client (e.g. a cached bedrock-runtime + client) without worrying about STS credentials timing out mid-request. """ def __init__( self, - region_name: str = None, - profile_name: str = None, - sts_arn: str = None, - session_name: str = None, - session_ttl: int = 30000, + region_name: str | None = None, + aws_access_key_id: str | None = None, + aws_secret_access_key: str | None = None, + aws_session_token: str | None = None, + profile_name: str | None = None, + sts_arn: str | None = None, + web_identity_token_file: str | None = None, + session_name: str | None = None, + session_ttl: int = DEFAULT_SESSION_TTL, ): - """ - Initialize `RefreshableBotoSession` - - Parameters - ---------- - region_name : str (optional) - Default region when creating a new connection. - - profile_name : str (optional) - The name of a profile to use. - - sts_arn : str (optional) - The role arn to sts before creating a session. - - session_name : str (optional) - An identifier for the assumed role session. (required when `sts_arn` is given) - - session_ttl : int (optional) - An integer number to set the TTL for each session. Beyond this session, it will renew the token. - 50 minutes by default which is before the default role expiration of 1 hour - """ - self.region_name = region_name + self.aws_access_key_id = aws_access_key_id + self.aws_secret_access_key = aws_secret_access_key + self.aws_session_token = aws_session_token self.profile_name = profile_name self.sts_arn = sts_arn + self.web_identity_token_file = web_identity_token_file self.session_name = session_name or uuid4().hex self.session_ttl = session_ttl def __get_session_credentials(self): - """ - Get session credentials - """ - session = Session(region_name=self.region_name, profile_name=self.profile_name) + session_args = { + "region_name": self.region_name, + "profile_name": self.profile_name, + "aws_access_key_id": self.aws_access_key_id, + "aws_secret_access_key": self.aws_secret_access_key, + "aws_session_token": self.aws_session_token, + } + session_args = {k: v for k, v in session_args.items() if v is not None} + session = Session(**session_args) - # if sts_arn is given, get credential by assuming the given role if self.sts_arn: sts_client = session.client(service_name="sts", region_name=self.region_name) - response = sts_client.assume_role( - RoleArn=self.sts_arn, - RoleSessionName=self.session_name, - DurationSeconds=self.session_ttl, - ).get("Credentials") + + if self.web_identity_token_file: + with open(self.web_identity_token_file) as f: + web_identity_token = f.read().strip() + + response = sts_client.assume_role_with_web_identity( + RoleArn=self.sts_arn, + RoleSessionName=self.session_name, + WebIdentityToken=web_identity_token, + DurationSeconds=self.session_ttl, + ).get("Credentials") + else: + response = sts_client.assume_role( + RoleArn=self.sts_arn, + RoleSessionName=self.session_name, + DurationSeconds=self.session_ttl, + ).get("Credentials") credentials = { "access_key": response.get("AccessKeyId"), @@ -93,17 +94,12 @@ def __get_session_credentials(self): return credentials def refreshable_session(self) -> Session: - """ - Get refreshable boto3 session. - """ - # Get refreshable credentials refreshable_credentials = RefreshableCredentials.create_from_metadata( metadata=self.__get_session_credentials(), refresh_using=self.__get_session_credentials, method="sts-assume-role", ) - # attach refreshable credentials current session session = get_session() session._credentials = refreshable_credentials session.set_config_variable("region", self.region_name) diff --git a/src/llama_stack/providers/utils/bedrock/sigv4_auth.py b/src/llama_stack/providers/utils/bedrock/sigv4_auth.py new file mode 100644 index 0000000000..11a0d2a605 --- /dev/null +++ b/src/llama_stack/providers/utils/bedrock/sigv4_auth.py @@ -0,0 +1,189 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +SigV4 authentication for AWS Bedrock OpenAI-compatible endpoint. + +This module provides httpx.Auth implementation that signs requests using +AWS Signature Version 4, enabling IAM/STS authentication with the Bedrock +OpenAI-compatible API endpoint. + +Supported credential sources (via boto3 credential chain): +- Static credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) +- Web Identity Federation (AWS_ROLE_ARN, AWS_WEB_IDENTITY_TOKEN_FILE) +- IAM roles (IMDS for EC2, ECS task roles, Lambda execution roles) +- AWS profiles (~/.aws/credentials) + +Web Identity Federation enables keyless authentication in: +- Kubernetes/OpenShift with IRSA (IAM Roles for Service Accounts) +- GitHub Actions with OIDC (aws-actions/configure-aws-credentials) +- Any OIDC-compatible identity provider + +Environment variables for Web Identity: + AWS_ROLE_ARN: ARN of the IAM role to assume + AWS_WEB_IDENTITY_TOKEN_FILE: Path to the OIDC token file + Common paths: + - EKS: /var/run/secrets/eks.amazonaws.com/serviceaccount/token + - Generic Kubernetes: /var/run/secrets/kubernetes.io/serviceaccount/token + - GitHub Actions: Set automatically by aws-actions/configure-aws-credentials + AWS_DEFAULT_REGION: AWS region for the Bedrock endpoint + +Credentials are automatically refreshed by boto3 when they expire. + +References: +- https://docs.aws.amazon.com/bedrock/latest/userguide/inference-chat-completions.html +- https://github.com/meta-llama/llama-stack/issues/4730 +- https://github.com/opendatahub-io/llama-stack-distribution/issues/112 +""" + +from __future__ import annotations + +import asyncio +import threading +from collections.abc import AsyncGenerator, Generator +from typing import Any + +import httpx +from botocore.auth import SigV4Auth +from botocore.awsrequest import AWSRequest + +from llama_stack.log import get_logger +from llama_stack.providers.utils.bedrock.config import DEFAULT_SESSION_TTL + +logger = get_logger(name=__name__, category="providers") + + +class BedrockSigV4Auth(httpx.Auth): + """ + httpx.Auth that signs requests with AWS SigV4. + + Only signs headers that httpx won't touch after signing, to avoid + signature mismatches. Credential refresh is handled automatically + by boto3 for temporary credentials (STS, IRSA). + """ + + def __init__( + self, + region: str, + service: str = "bedrock", + aws_access_key_id: str | None = None, + aws_secret_access_key: str | None = None, + aws_session_token: str | None = None, + profile_name: str | None = None, + aws_role_arn: str | None = None, + aws_web_identity_token_file: str | None = None, + aws_role_session_name: str | None = None, + session_ttl: int | None = DEFAULT_SESSION_TTL, + ): + # service must be "bedrock" (the botocore signing name), not "bedrock-runtime" + # (the endpoint prefix) — using the wrong one causes SignatureDoesNotMatch + self._region = region + self._service = service + self._aws_access_key_id = aws_access_key_id + self._aws_secret_access_key = aws_secret_access_key + self._aws_session_token = aws_session_token + self._profile_name = profile_name + self._aws_role_arn = aws_role_arn + self._aws_web_identity_token_file = aws_web_identity_token_file + self._aws_role_session_name = aws_role_session_name + self._session_ttl = session_ttl or DEFAULT_SESSION_TTL + self._lock = threading.Lock() + self._session: Any = None # boto3.Session | None — Any because boto3 is an optional dep + + def _get_credentials(self) -> Any: + from llama_stack.providers.utils.bedrock.refreshable_boto_session import ( + RefreshableBotoSession, + ) + + with self._lock: + if self._session is None: + if self._aws_role_arn: + self._session = RefreshableBotoSession( + region_name=self._region, + aws_access_key_id=self._aws_access_key_id, + aws_secret_access_key=self._aws_secret_access_key, + aws_session_token=self._aws_session_token, + profile_name=self._profile_name, + sts_arn=self._aws_role_arn, + web_identity_token_file=self._aws_web_identity_token_file, + session_name=self._aws_role_session_name, + session_ttl=self._session_ttl, + ).refreshable_session() + else: + import boto3 + + self._session = boto3.Session( + region_name=self._region, + aws_access_key_id=self._aws_access_key_id, + aws_secret_access_key=self._aws_secret_access_key, + aws_session_token=self._aws_session_token, + profile_name=self._profile_name, + ) + + credentials = self._session.get_credentials() + if credentials is None: + raise RuntimeError( + "Failed to load AWS credentials. Ensure AWS credentials are " + "configured via environment variables (AWS_ACCESS_KEY_ID, " + "AWS_SECRET_ACCESS_KEY), IAM role, or AWS profile." + ) + return credentials.get_frozen_credentials() + + def _sign_request(self, request: httpx.Request) -> None: + credentials = self._get_credentials() + + # drop the openai sdk's "Bearer " placeholder before signing + if "authorization" in request.headers: + del request.headers["authorization"] + + # sign only stable headers — anything httpx might rewrite after this point + # would invalidate the signature, so we leave those out + host = request.headers.get("host") or str(request.url.netloc) + headers_to_sign = {"host": host} + + # only include content-type if the request already has one; injecting a + # default here would cause a mismatch if httpx sends a different value + if "content-type" in request.headers: + headers_to_sign["content-type"] = request.headers["content-type"] + + for header_name in ["x-amz-content-sha256", "x-amz-security-token"]: + if header_name in request.headers: + headers_to_sign[header_name] = request.headers[header_name] + + try: + content = request.content + except httpx.RequestNotRead: + content = request.read() + + aws_request = AWSRequest( + method=request.method, + url=str(request.url), + data=content, + headers=headers_to_sign, + ) + + signer = SigV4Auth(credentials, self._service, self._region) + signer.add_auth(aws_request) + + # copy Authorization, X-Amz-Date, and X-Amz-Security-Token back onto the live request + for key, value in aws_request.headers.items(): + request.headers[key] = value + + logger.debug( + f"SigV4 signed request: method={request.method}, " + f"path={request.url.path}, service={self._service}, region={self._region}" + ) + + def auth_flow(self, request: httpx.Request) -> Generator[httpx.Request, httpx.Response, None]: + self._sign_request(request) + yield request + + async def async_auth_flow(self, request: httpx.Request) -> AsyncGenerator[httpx.Request, httpx.Response]: + # offload to a thread because credential resolution can do IMDS calls or file I/O; + # shield so a rolling-restart cancellation doesn't abort mid-sign and leave the + # connection in an inconsistent auth state + await asyncio.shield(asyncio.to_thread(self._sign_request, request)) + yield request diff --git a/src/llama_stack/providers/utils/inference/http_client.py b/src/llama_stack/providers/utils/inference/http_client.py index 5014703386..3a408f74b2 100644 --- a/src/llama_stack/providers/utils/inference/http_client.py +++ b/src/llama_stack/providers/utils/inference/http_client.py @@ -4,6 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import hashlib +import json import ssl from pathlib import Path from typing import Any @@ -22,6 +24,29 @@ logger = get_logger(name=__name__, category="providers::utils") +_NETWORK_CONFIG_FINGERPRINT_ATTR = "_llama_stack_network_config_fingerprint" + + +def network_config_fingerprint(network_config: NetworkConfig) -> str: + dumped = json.dumps(network_config.model_dump(mode="json"), sort_keys=True, separators=(",", ":")) + return hashlib.sha256(dumped.encode("utf-8")).hexdigest() + + +def _get_client_network_fingerprint(existing_client: httpx.AsyncClient | DefaultAsyncHttpxClient) -> str | None: + if isinstance(existing_client, DefaultAsyncHttpxClient): + underlying_client = existing_client._client # type: ignore[union-attr,attr-defined] + return getattr(underlying_client, _NETWORK_CONFIG_FINGERPRINT_ATTR, None) + return getattr(existing_client, _NETWORK_CONFIG_FINGERPRINT_ATTR, None) + + +def set_client_network_fingerprint(client: httpx.AsyncClient | DefaultAsyncHttpxClient, fingerprint: str) -> None: + if isinstance(client, DefaultAsyncHttpxClient): + underlying_client = client._client # type: ignore[union-attr,attr-defined] + setattr(underlying_client, _NETWORK_CONFIG_FINGERPRINT_ATTR, fingerprint) + else: + setattr(client, _NETWORK_CONFIG_FINGERPRINT_ATTR, fingerprint) + + def _build_ssl_context(tls_config: TLSConfig) -> ssl.SSLContext | bool | str: """ Build an SSL context from TLS configuration. @@ -92,7 +117,7 @@ def _build_proxy_mounts(proxy_config: ProxyConfig) -> dict[str, httpx.AsyncHTTPT return mounts if mounts else None -def _build_network_client_kwargs(network_config: NetworkConfig | None) -> dict[str, Any]: +def build_network_client_kwargs(network_config: NetworkConfig | None) -> dict[str, Any]: """ Build httpx.AsyncClient kwargs from network configuration. @@ -187,7 +212,11 @@ def _merge_network_config_into_client( if network_config is None: return existing_client - network_kwargs = _build_network_client_kwargs(network_config) + fingerprint = network_config_fingerprint(network_config) + if _get_client_network_fingerprint(existing_client) == fingerprint: + return existing_client + + network_kwargs = build_network_client_kwargs(network_config) if not network_kwargs: return existing_client @@ -210,9 +239,13 @@ def _merge_network_config_into_client( # Create new client with merged config new_client = httpx.AsyncClient(**network_kwargs) + set_client_network_fingerprint(new_client, fingerprint) + # If original was DefaultAsyncHttpxClient, wrap the new client if isinstance(existing_client, DefaultAsyncHttpxClient): - return DefaultAsyncHttpxClient(client=new_client, headers=network_kwargs.get("headers")) # type: ignore[call-arg] + wrapped = DefaultAsyncHttpxClient(client=new_client, headers=network_kwargs.get("headers")) # type: ignore[call-arg] + set_client_network_fingerprint(wrapped, fingerprint) + return wrapped return new_client except Exception as e: @@ -234,7 +267,7 @@ def build_http_client(network_config: NetworkConfig | None) -> dict[str, Any]: Dictionary of kwargs to pass to httpx.AsyncClient constructor, wrapped in {"http_client": AsyncClient(...)} for use with AsyncOpenAI """ - network_kwargs = _build_network_client_kwargs(network_config) + network_kwargs = build_network_client_kwargs(network_config) if not network_kwargs: return {} diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py index 253d73b949..f1de6eb4ff 100644 --- a/src/llama_stack/providers/utils/inference/openai_mixin.py +++ b/src/llama_stack/providers/utils/inference/openai_mixin.py @@ -18,8 +18,8 @@ from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.http_client import ( - _build_network_client_kwargs, _merge_network_config_into_client, + build_network_client_kwargs, ) from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.openai_compat import ( @@ -227,7 +227,7 @@ def client(self) -> AsyncOpenAI: raise ValueError(message) extra_params = self.get_extra_client_params() - network_kwargs = _build_network_client_kwargs(self.config.network) + network_kwargs = build_network_client_kwargs(self.config.network) # Handle http_client creation/merging: # - If get_extra_client_params() provides an http_client (e.g., OCI with custom auth), diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-3ff721a4.json b/tests/integration/common/recordings/models-66c1506ac9a1c3cac55a2dabb6a81f068d753f4a69ae4edeb730b7ca374186c7-3ff721a4.json similarity index 99% rename from tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-3ff721a4.json rename to tests/integration/common/recordings/models-66c1506ac9a1c3cac55a2dabb6a81f068d753f4a69ae4edeb730b7ca374186c7-3ff721a4.json index 23216fedcf..edba78d30d 100644 --- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-3ff721a4.json +++ b/tests/integration/common/recordings/models-66c1506ac9a1c3cac55a2dabb6a81f068d753f4a69ae4edeb730b7ca374186c7-3ff721a4.json @@ -2,7 +2,7 @@ "test_id": null, "request": { "method": "POST", - "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/models", + "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/models", "headers": {}, "body": {}, "endpoint": "/v1/models", diff --git a/tests/integration/inference/recordings/1675987cb787dc51be76c547577dd38172be220c805c37ba73ba56fa89cd1121.json b/tests/integration/inference/recordings/7e3382aab43824f89f365fb42a16f3a9a7592d5336fd3d2101fec1e143080184.json similarity index 98% rename from tests/integration/inference/recordings/1675987cb787dc51be76c547577dd38172be220c805c37ba73ba56fa89cd1121.json rename to tests/integration/inference/recordings/7e3382aab43824f89f365fb42a16f3a9a7592d5336fd3d2101fec1e143080184.json index f25555e9c2..0eec4eff66 100644 --- a/tests/integration/inference/recordings/1675987cb787dc51be76c547577dd38172be220c805c37ba73ba56fa89cd1121.json +++ b/tests/integration/inference/recordings/7e3382aab43824f89f365fb42a16f3a9a7592d5336fd3d2101fec1e143080184.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:streaming_02]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/inference/recordings/718bb1cd939e26c4b7e6b940c3acd97304f46e13b606e081086dd6657e9d3db2.json b/tests/integration/inference/recordings/94bb6e3fe939afbdb25c9ebfe9e44fad66ae8a37504748ca0a620f241d8537bd.json similarity index 97% rename from tests/integration/inference/recordings/718bb1cd939e26c4b7e6b940c3acd97304f46e13b606e081086dd6657e9d3db2.json rename to tests/integration/inference/recordings/94bb6e3fe939afbdb25c9ebfe9e44fad66ae8a37504748ca0a620f241d8537bd.json index 28fe67627a..4e549bf6fb 100644 --- a/tests/integration/inference/recordings/718bb1cd939e26c4b7e6b940c3acd97304f46e13b606e081086dd6657e9d3db2.json +++ b/tests/integration/inference/recordings/94bb6e3fe939afbdb25c9ebfe9e44fad66ae8a37504748ca0a620f241d8537bd.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=bedrock/openai.gpt-oss-20b-True]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/inference/recordings/42ba670916ebfdbe86b518ad1692971ccbb2c2e4a30372f43f38f8e1fc22daf0.json b/tests/integration/inference/recordings/a89357c63194c44cba530faecbbb3ecd51e713f8e6cbd96134c911cc5a9d25c2.json similarity index 98% rename from tests/integration/inference/recordings/42ba670916ebfdbe86b518ad1692971ccbb2c2e4a30372f43f38f8e1fc22daf0.json rename to tests/integration/inference/recordings/a89357c63194c44cba530faecbbb3ecd51e713f8e6cbd96134c911cc5a9d25c2.json index 1d86504abc..c599bab06c 100644 --- a/tests/integration/inference/recordings/42ba670916ebfdbe86b518ad1692971ccbb2c2e4a30372f43f38f8e1fc22daf0.json +++ b/tests/integration/inference/recordings/a89357c63194c44cba530faecbbb3ecd51e713f8e6cbd96134c911cc5a9d25c2.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:streaming_01]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/inference/recordings/de019f1d2995412fd461393c644f07c743cf81d087249ef2db974576238ab1c7.json b/tests/integration/inference/recordings/d91f3615a81a97e7d86b3cd5bb05d8d92f852af7f2286b0a20624a45f4f5f380.json similarity index 97% rename from tests/integration/inference/recordings/de019f1d2995412fd461393c644f07c743cf81d087249ef2db974576238ab1c7.json rename to tests/integration/inference/recordings/d91f3615a81a97e7d86b3cd5bb05d8d92f852af7f2286b0a20624a45f4f5f380.json index fd00c0232f..7d28deb715 100644 --- a/tests/integration/inference/recordings/de019f1d2995412fd461393c644f07c743cf81d087249ef2db974576238ab1c7.json +++ b/tests/integration/inference/recordings/d91f3615a81a97e7d86b3cd5bb05d8d92f852af7f2286b0a20624a45f4f5f380.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:non_streaming_02]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/inference/recordings/0241337625808a1602a0b4b1c715c7531cecf2491f2f2129e7e6884e3ffb62bb.json b/tests/integration/inference/recordings/e9d3ce4be836e5478689cf56f0375fc800499022cd1daf923df591207980513c.json similarity index 96% rename from tests/integration/inference/recordings/0241337625808a1602a0b4b1c715c7531cecf2491f2f2129e7e6884e3ffb62bb.json rename to tests/integration/inference/recordings/e9d3ce4be836e5478689cf56f0375fc800499022cd1daf923df591207980513c.json index 4024097bbe..163f37387e 100644 --- a/tests/integration/inference/recordings/0241337625808a1602a0b4b1c715c7531cecf2491f2f2129e7e6884e3ffb62bb.json +++ b/tests/integration/inference/recordings/e9d3ce4be836e5478689cf56f0375fc800499022cd1daf923df591207980513c.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:non_streaming_01]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/inference/recordings/d6f772f62c859028bada2fe328498364e82c1d36ed256910623577c6fcd5696c.json b/tests/integration/inference/recordings/f4fa57034e80f69cb75c300241862e98911e3124f09225d42d29239045854ce4.json similarity index 94% rename from tests/integration/inference/recordings/d6f772f62c859028bada2fe328498364e82c1d36ed256910623577c6fcd5696c.json rename to tests/integration/inference/recordings/f4fa57034e80f69cb75c300241862e98911e3124f09225d42d29239045854ce4.json index bf3f8adbf6..b546698660 100644 --- a/tests/integration/inference/recordings/d6f772f62c859028bada2fe328498364e82c1d36ed256910623577c6fcd5696c.json +++ b/tests/integration/inference/recordings/f4fa57034e80f69cb75c300241862e98911e3124f09225d42d29239045854ce4.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=bedrock/openai.gpt-oss-20b-False]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/7fe113f5fc7f0e055b4abd4e6ea0a183dc1a2b959cc410ec2c594a04028712b9.json b/tests/integration/responses/recordings/0469f7efcdf69ae378b7395f518d863badff5ace79116b718d28b8fa3e53a93a.json similarity index 99% rename from tests/integration/responses/recordings/7fe113f5fc7f0e055b4abd4e6ea0a183dc1a2b959cc410ec2c594a04028712b9.json rename to tests/integration/responses/recordings/0469f7efcdf69ae378b7395f518d863badff5ace79116b718d28b8fa3e53a93a.json index acbabb0973..ad2a6a5d08 100644 --- a/tests/integration/responses/recordings/7fe113f5fc7f0e055b4abd4e6ea0a183dc1a2b959cc410ec2c594a04028712b9.json +++ b/tests/integration/responses/recordings/0469f7efcdf69ae378b7395f518d863badff5ace79116b718d28b8fa3e53a93a.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/d480b100b782370dd41e8dbbb53e2f37783b99074446dcb50d49034007bfb97e.json b/tests/integration/responses/recordings/04a4837c6c7974929a7328bb090e7c9d991bfb10ba71d307ed8ba39411c407df.json similarity index 98% rename from tests/integration/responses/recordings/d480b100b782370dd41e8dbbb53e2f37783b99074446dcb50d49034007bfb97e.json rename to tests/integration/responses/recordings/04a4837c6c7974929a7328bb090e7c9d991bfb10ba71d307ed8ba39411c407df.json index c4b64b33c0..bb85206433 100644 --- a/tests/integration/responses/recordings/d480b100b782370dd41e8dbbb53e2f37783b99074446dcb50d49034007bfb97e.json +++ b/tests/integration/responses/recordings/04a4837c6c7974929a7328bb090e7c9d991bfb10ba71d307ed8ba39411c407df.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/c71d141a2ec044cc7eb01574672e5e6bc6689fb1d6aa8cc6494773fddff919d4.json b/tests/integration/responses/recordings/0c65c6fe47669a1dde7e3f13d4b357970f0e3dabc4442278caa66054f29cf2b2.json similarity index 99% rename from tests/integration/responses/recordings/c71d141a2ec044cc7eb01574672e5e6bc6689fb1d6aa8cc6494773fddff919d4.json rename to tests/integration/responses/recordings/0c65c6fe47669a1dde7e3f13d4b357970f0e3dabc4442278caa66054f29cf2b2.json index 1354ec7d90..9c3a603bfd 100644 --- a/tests/integration/responses/recordings/c71d141a2ec044cc7eb01574672e5e6bc6689fb1d6aa8cc6494773fddff919d4.json +++ b/tests/integration/responses/recordings/0c65c6fe47669a1dde7e3f13d4b357970f0e3dabc4442278caa66054f29cf2b2.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/29462a1046f2fcc98301a8eed2fc861ba7f9de28bf2dc06bfae8c04bf664e645.json b/tests/integration/responses/recordings/1a5af1292127b401be03305c2d0a8f109a1bd0444c0394ad5e3ca4f6bce9a3c7.json similarity index 98% rename from tests/integration/responses/recordings/29462a1046f2fcc98301a8eed2fc861ba7f9de28bf2dc06bfae8c04bf664e645.json rename to tests/integration/responses/recordings/1a5af1292127b401be03305c2d0a8f109a1bd0444c0394ad5e3ca4f6bce9a3c7.json index b457375b69..4c58e51aa8 100644 --- a/tests/integration/responses/recordings/29462a1046f2fcc98301a8eed2fc861ba7f9de28bf2dc06bfae8c04bf664e645.json +++ b/tests/integration/responses/recordings/1a5af1292127b401be03305c2d0a8f109a1bd0444c0394ad5e3ca4f6bce9a3c7.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/076784d55c6a712b559ad4061d7ed4391dc7f1cf35a2b141ad4b0b8f8c8503e1.json b/tests/integration/responses/recordings/1f1d016a1f7977d024de7a10f1e63acefb20e6839258b8b132471f404007c8f3.json similarity index 98% rename from tests/integration/responses/recordings/076784d55c6a712b559ad4061d7ed4391dc7f1cf35a2b141ad4b0b8f8c8503e1.json rename to tests/integration/responses/recordings/1f1d016a1f7977d024de7a10f1e63acefb20e6839258b8b132471f404007c8f3.json index fa214a2d26..4db3f44845 100644 --- a/tests/integration/responses/recordings/076784d55c6a712b559ad4061d7ed4391dc7f1cf35a2b141ad4b0b8f8c8503e1.json +++ b/tests/integration/responses/recordings/1f1d016a1f7977d024de7a10f1e63acefb20e6839258b8b132471f404007c8f3.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[openai_client-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/95ae8c627acc6e165fa5046946ae7c009324d3704b64695049cbbd399a6348b8.json b/tests/integration/responses/recordings/23223eabe994fd8172f8650d7cc9c3cc6f11f0618996f28403378a85d7258c88.json similarity index 99% rename from tests/integration/responses/recordings/95ae8c627acc6e165fa5046946ae7c009324d3704b64695049cbbd399a6348b8.json rename to tests/integration/responses/recordings/23223eabe994fd8172f8650d7cc9c3cc6f11f0618996f28403378a85d7258c88.json index dfbf4d1684..e393f19733 100644 --- a/tests/integration/responses/recordings/95ae8c627acc6e165fa5046946ae7c009324d3704b64695049cbbd399a6348b8.json +++ b/tests/integration/responses/recordings/23223eabe994fd8172f8650d7cc9c3cc6f11f0618996f28403378a85d7258c88.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_prompt_templates.py::test_multi_version_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/21b2d947138024ef80069f89c5157185a70f43c4d128ffdb4337194ae2486429.json b/tests/integration/responses/recordings/23e253ab6f05ad695e1d2dfd2190797667d27d8a1a8a963ffba82cf69bf3696d.json similarity index 98% rename from tests/integration/responses/recordings/21b2d947138024ef80069f89c5157185a70f43c4d128ffdb4337194ae2486429.json rename to tests/integration/responses/recordings/23e253ab6f05ad695e1d2dfd2190797667d27d8a1a8a963ffba82cf69bf3696d.json index 2567646684..89159b2fdd 100644 --- a/tests/integration/responses/recordings/21b2d947138024ef80069f89c5157185a70f43c4d128ffdb4337194ae2486429.json +++ b/tests/integration/responses/recordings/23e253ab6f05ad695e1d2dfd2190797667d27d8a1a8a963ffba82cf69bf3696d.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/68dd2778e5b1caa3ae28e5eed59c41db0d39c58c113612b94abaa459fc0fad3e.json b/tests/integration/responses/recordings/2c604588ef302f0e4a93ef93dc713522c7d7798542038b89b465af2e9ef0b299.json similarity index 99% rename from tests/integration/responses/recordings/68dd2778e5b1caa3ae28e5eed59c41db0d39c58c113612b94abaa459fc0fad3e.json rename to tests/integration/responses/recordings/2c604588ef302f0e4a93ef93dc713522c7d7798542038b89b465af2e9ef0b299.json index a6fe2f3a76..d3b85e36bd 100644 --- a/tests/integration/responses/recordings/68dd2778e5b1caa3ae28e5eed59c41db0d39c58c113612b94abaa459fc0fad3e.json +++ b/tests/integration/responses/recordings/2c604588ef302f0e4a93ef93dc713522c7d7798542038b89b465af2e9ef0b299.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_prompt_templates.py::test_multi_version_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/6e9164fd65ebd8121be120a06262efd9c0f603104e86d84ad07ff4e5b5ab4d69.json b/tests/integration/responses/recordings/2ecc2783d6558449838ced5f0180990a80c4cbaffa952f1fe5668b74e9e57ae9.json similarity index 99% rename from tests/integration/responses/recordings/6e9164fd65ebd8121be120a06262efd9c0f603104e86d84ad07ff4e5b5ab4d69.json rename to tests/integration/responses/recordings/2ecc2783d6558449838ced5f0180990a80c4cbaffa952f1fe5668b74e9e57ae9.json index dd88fbdc31..8df6a01cc9 100644 --- a/tests/integration/responses/recordings/6e9164fd65ebd8121be120a06262efd9c0f603104e86d84ad07ff4e5b5ab4d69.json +++ b/tests/integration/responses/recordings/2ecc2783d6558449838ced5f0180990a80c4cbaffa952f1fe5668b74e9e57ae9.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[openai_client-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/efffb6e430b20dc2f20fbe77ef230b47660008ab6ae2102f73596d4331dcf683.json b/tests/integration/responses/recordings/30dee5b8b4a1716103dd23d5249b6649aa2c9d30931cccdbb2a8b2fefa69675d.json similarity index 99% rename from tests/integration/responses/recordings/efffb6e430b20dc2f20fbe77ef230b47660008ab6ae2102f73596d4331dcf683.json rename to tests/integration/responses/recordings/30dee5b8b4a1716103dd23d5249b6649aa2c9d30931cccdbb2a8b2fefa69675d.json index 0bd9da0c4e..39ba90fd12 100644 --- a/tests/integration/responses/recordings/efffb6e430b20dc2f20fbe77ef230b47660008ab6ae2102f73596d4331dcf683.json +++ b/tests/integration/responses/recordings/30dee5b8b4a1716103dd23d5249b6649aa2c9d30931cccdbb2a8b2fefa69675d.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/e1bf2d00006e9b3c60f537c724c98c518a571caec500010e2b07a42662a0db94.json b/tests/integration/responses/recordings/3e79875697a99012661ee6e8a9160a9251d716e340408d20c5c7cc2b337c71b0.json similarity index 98% rename from tests/integration/responses/recordings/e1bf2d00006e9b3c60f537c724c98c518a571caec500010e2b07a42662a0db94.json rename to tests/integration/responses/recordings/3e79875697a99012661ee6e8a9160a9251d716e340408d20c5c7cc2b337c71b0.json index f9d5725488..befbab5abd 100644 --- a/tests/integration/responses/recordings/e1bf2d00006e9b3c60f537c724c98c518a571caec500010e2b07a42662a0db94.json +++ b/tests/integration/responses/recordings/3e79875697a99012661ee6e8a9160a9251d716e340408d20c5c7cc2b337c71b0.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_multi_turn_and_streaming[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/a6204a1d782ab7d9f85b3ac2b792796b668ba731bf670254e3c9d9ede934193c.json b/tests/integration/responses/recordings/4290deaf6bf03c9ccf3228f6b51a2f05dc909966c425203775402388b7ced238.json similarity index 98% rename from tests/integration/responses/recordings/a6204a1d782ab7d9f85b3ac2b792796b668ba731bf670254e3c9d9ede934193c.json rename to tests/integration/responses/recordings/4290deaf6bf03c9ccf3228f6b51a2f05dc909966c425203775402388b7ced238.json index c16d810e5d..54ed03755d 100644 --- a/tests/integration/responses/recordings/a6204a1d782ab7d9f85b3ac2b792796b668ba731bf670254e3c9d9ede934193c.json +++ b/tests/integration/responses/recordings/4290deaf6bf03c9ccf3228f6b51a2f05dc909966c425203775402388b7ced238.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/ae06979da89d9f6949f1ceb5ee057759bfbf01f3e0bd58f68d55996929192f5e.json b/tests/integration/responses/recordings/4752668af3df1f42fe5b16f6f0c7a4954472d2cda8559b8466fad6ba0aa1c642.json similarity index 99% rename from tests/integration/responses/recordings/ae06979da89d9f6949f1ceb5ee057759bfbf01f3e0bd58f68d55996929192f5e.json rename to tests/integration/responses/recordings/4752668af3df1f42fe5b16f6f0c7a4954472d2cda8559b8466fad6ba0aa1c642.json index 8689cb508f..e70fef56b6 100644 --- a/tests/integration/responses/recordings/ae06979da89d9f6949f1ceb5ee057759bfbf01f3e0bd58f68d55996929192f5e.json +++ b/tests/integration/responses/recordings/4752668af3df1f42fe5b16f6f0c7a4954472d2cda8559b8466fad6ba0aa1c642.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/16ef4e2da1cf48a86471cb57c7ce4a4c11c6968d3e9607dbd632d3bd9a54493f.json b/tests/integration/responses/recordings/48d747eb358721344236340fdca57eb977700bad50a47dcde078625f507277cc.json similarity index 99% rename from tests/integration/responses/recordings/16ef4e2da1cf48a86471cb57c7ce4a4c11c6968d3e9607dbd632d3bd9a54493f.json rename to tests/integration/responses/recordings/48d747eb358721344236340fdca57eb977700bad50a47dcde078625f507277cc.json index 48db8bb1cb..9c481d8163 100644 --- a/tests/integration/responses/recordings/16ef4e2da1cf48a86471cb57c7ce4a4c11c6968d3e9607dbd632d3bd9a54493f.json +++ b/tests/integration/responses/recordings/48d747eb358721344236340fdca57eb977700bad50a47dcde078625f507277cc.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_basic_workflow[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/2e46c50d186ad42569a701893e2fd59e4aa93e9b973d7780c32c6e864a4abed7.json b/tests/integration/responses/recordings/498ab18308a6e603d405bf5b5c1102f29c9250f8386e90baab57cbfa9782a558.json similarity index 98% rename from tests/integration/responses/recordings/2e46c50d186ad42569a701893e2fd59e4aa93e9b973d7780c32c6e864a4abed7.json rename to tests/integration/responses/recordings/498ab18308a6e603d405bf5b5c1102f29c9250f8386e90baab57cbfa9782a558.json index 0565553f98..35299f72fe 100644 --- a/tests/integration/responses/recordings/2e46c50d186ad42569a701893e2fd59e4aa93e9b973d7780c32c6e864a4abed7.json +++ b/tests/integration/responses/recordings/498ab18308a6e603d405bf5b5c1102f29c9250f8386e90baab57cbfa9782a558.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_prompt_templates.py::test_multi_variable_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/d004a76ddc87e3d1f10a07b50f2aa82cf39d20e5cfb3be1433c924a126fe1a58.json b/tests/integration/responses/recordings/4c112247d4a195fac2b27e6a4936b248ba0848b70882489e1a32a9e323dc081b.json similarity index 99% rename from tests/integration/responses/recordings/d004a76ddc87e3d1f10a07b50f2aa82cf39d20e5cfb3be1433c924a126fe1a58.json rename to tests/integration/responses/recordings/4c112247d4a195fac2b27e6a4936b248ba0848b70882489e1a32a9e323dc081b.json index d6f25547df..e7a54d580e 100644 --- a/tests/integration/responses/recordings/d004a76ddc87e3d1f10a07b50f2aa82cf39d20e5cfb3be1433c924a126fe1a58.json +++ b/tests/integration/responses/recordings/4c112247d4a195fac2b27e6a4936b248ba0848b70882489e1a32a9e323dc081b.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[openai_client-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/f9f8f255a3e65fdea20eedd28f7febfb159dafa9a3aab7a83c8afc7e89ba0b91.json b/tests/integration/responses/recordings/4e1fd22f5d9ded67352dbe815bb0cb7e463ad85b856514425df394ba222b92c4.json similarity index 98% rename from tests/integration/responses/recordings/f9f8f255a3e65fdea20eedd28f7febfb159dafa9a3aab7a83c8afc7e89ba0b91.json rename to tests/integration/responses/recordings/4e1fd22f5d9ded67352dbe815bb0cb7e463ad85b856514425df394ba222b92c4.json index 48dee3d271..a5ac4ce369 100644 --- a/tests/integration/responses/recordings/f9f8f255a3e65fdea20eedd28f7febfb159dafa9a3aab7a83c8afc7e89ba0b91.json +++ b/tests/integration/responses/recordings/4e1fd22f5d9ded67352dbe815bb0cb7e463ad85b856514425df394ba222b92c4.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/e6e1b682e9b368426efc1bd93b326bec2f7db76ab9d8c9c6334a2d5177672fd6.json b/tests/integration/responses/recordings/5100781c53bb19a320364926454cb1a17051bb6a1a72989937e1b42465983960.json similarity index 99% rename from tests/integration/responses/recordings/e6e1b682e9b368426efc1bd93b326bec2f7db76ab9d8c9c6334a2d5177672fd6.json rename to tests/integration/responses/recordings/5100781c53bb19a320364926454cb1a17051bb6a1a72989937e1b42465983960.json index b8052fc8a4..1876fb2b4c 100644 --- a/tests/integration/responses/recordings/e6e1b682e9b368426efc1bd93b326bec2f7db76ab9d8c9c6334a2d5177672fd6.json +++ b/tests/integration/responses/recordings/5100781c53bb19a320364926454cb1a17051bb6a1a72989937e1b42465983960.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_streaming[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/5ab06b404383787d1a545eacc7674fc068f37fbdaaa8d3b2f991f884b0c1d07a.json b/tests/integration/responses/recordings/53e2e090845d1a09194d5f18dca914ab5eecc2a2af6add585ef5be9d61549d1a.json similarity index 99% rename from tests/integration/responses/recordings/5ab06b404383787d1a545eacc7674fc068f37fbdaaa8d3b2f991f884b0c1d07a.json rename to tests/integration/responses/recordings/53e2e090845d1a09194d5f18dca914ab5eecc2a2af6add585ef5be9d61549d1a.json index 524561bac2..9f7bac0205 100644 --- a/tests/integration/responses/recordings/5ab06b404383787d1a545eacc7674fc068f37fbdaaa8d3b2f991f884b0c1d07a.json +++ b/tests/integration/responses/recordings/53e2e090845d1a09194d5f18dca914ab5eecc2a2af6add585ef5be9d61549d1a.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/6a8b76b4d8ba3bc65c2cbd242b17aa224cc77abb15beba937decfda457ba2b6e.json b/tests/integration/responses/recordings/6138d550fdb668e3a48400b36501a2a51cea83c4157e7d6fc35792837df40bdb.json similarity index 99% rename from tests/integration/responses/recordings/6a8b76b4d8ba3bc65c2cbd242b17aa224cc77abb15beba937decfda457ba2b6e.json rename to tests/integration/responses/recordings/6138d550fdb668e3a48400b36501a2a51cea83c4157e7d6fc35792837df40bdb.json index ae6a4b7715..ddb3c9e5c8 100644 --- a/tests/integration/responses/recordings/6a8b76b4d8ba3bc65c2cbd242b17aa224cc77abb15beba937decfda457ba2b6e.json +++ b/tests/integration/responses/recordings/6138d550fdb668e3a48400b36501a2a51cea83c4157e7d6fc35792837df40bdb.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_with_function_tools[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/01675632398179e5013b897e113ce0bfcb3c8c06352c4884b31b84bcc88b8e43.json b/tests/integration/responses/recordings/66ca3acff5d7d5880103bbfde08c52bac60f1038d84e0591a279b941a9b49b32.json similarity index 99% rename from tests/integration/responses/recordings/01675632398179e5013b897e113ce0bfcb3c8c06352c4884b31b84bcc88b8e43.json rename to tests/integration/responses/recordings/66ca3acff5d7d5880103bbfde08c52bac60f1038d84e0591a279b941a9b49b32.json index a46192e4cf..b6006df3af 100644 --- a/tests/integration/responses/recordings/01675632398179e5013b897e113ce0bfcb3c8c06352c4884b31b84bcc88b8e43.json +++ b/tests/integration/responses/recordings/66ca3acff5d7d5880103bbfde08c52bac60f1038d84e0591a279b941a9b49b32.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/c2735c8a02e57ab45fd2b72ac4478bad6a39cb2ff3ac1a457fe8476f73146934.json b/tests/integration/responses/recordings/67c29a33f0d688d469c0d59feb5ca085c51bf886c6836fce3b630c1cc693e8cd.json similarity index 99% rename from tests/integration/responses/recordings/c2735c8a02e57ab45fd2b72ac4478bad6a39cb2ff3ac1a457fe8476f73146934.json rename to tests/integration/responses/recordings/67c29a33f0d688d469c0d59feb5ca085c51bf886c6836fce3b630c1cc693e8cd.json index b3d394a752..3ab5fcc7f5 100644 --- a/tests/integration/responses/recordings/c2735c8a02e57ab45fd2b72ac4478bad6a39cb2ff3ac1a457fe8476f73146934.json +++ b/tests/integration/responses/recordings/67c29a33f0d688d469c0d59feb5ca085c51bf886c6836fce3b630c1cc693e8cd.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/1c75005e90b5447d913967026e8bd7c13afa92ce90d2af9913fe81db06e8c88b.json b/tests/integration/responses/recordings/6becbeb1c15378d9a82f60c3a9309dad73dd604848005240745cb0f729c4330f.json similarity index 98% rename from tests/integration/responses/recordings/1c75005e90b5447d913967026e8bd7c13afa92ce90d2af9913fe81db06e8c88b.json rename to tests/integration/responses/recordings/6becbeb1c15378d9a82f60c3a9309dad73dd604848005240745cb0f729c4330f.json index 5009f42036..9bd9eff2fa 100644 --- a/tests/integration/responses/recordings/1c75005e90b5447d913967026e8bd7c13afa92ce90d2af9913fe81db06e8c88b.json +++ b/tests/integration/responses/recordings/6becbeb1c15378d9a82f60c3a9309dad73dd604848005240745cb0f729c4330f.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_non_streaming[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/97a9427f4c20f3fb104a6d04e3c08380f50a86a947b89ddd9477d09ec713313e.json b/tests/integration/responses/recordings/6f80b6307533eb1efc08bb9252ba14579a5f7114057b9b774d18b72e7a932fb2.json similarity index 98% rename from tests/integration/responses/recordings/97a9427f4c20f3fb104a6d04e3c08380f50a86a947b89ddd9477d09ec713313e.json rename to tests/integration/responses/recordings/6f80b6307533eb1efc08bb9252ba14579a5f7114057b9b774d18b72e7a932fb2.json index c931916e1f..a36d46108c 100644 --- a/tests/integration/responses/recordings/97a9427f4c20f3fb104a6d04e3c08380f50a86a947b89ddd9477d09ec713313e.json +++ b/tests/integration/responses/recordings/6f80b6307533eb1efc08bb9252ba14579a5f7114057b9b774d18b72e7a932fb2.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[openai_client-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/565887f8230acc7918b316e3428650fa2f8aa13aa8a01859da53f1bc10d7c8d5.json b/tests/integration/responses/recordings/72e8dd36d46c572167f589fe59a32bcdfbfed5ba7f272c55f2cf412b099f7c93.json similarity index 99% rename from tests/integration/responses/recordings/565887f8230acc7918b316e3428650fa2f8aa13aa8a01859da53f1bc10d7c8d5.json rename to tests/integration/responses/recordings/72e8dd36d46c572167f589fe59a32bcdfbfed5ba7f272c55f2cf412b099f7c93.json index 7df6152346..21f98bd2fa 100644 --- a/tests/integration/responses/recordings/565887f8230acc7918b316e3428650fa2f8aa13aa8a01859da53f1bc10d7c8d5.json +++ b/tests/integration/responses/recordings/72e8dd36d46c572167f589fe59a32bcdfbfed5ba7f272c55f2cf412b099f7c93.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/6b1b3d872eb6746dbfe9b8eed7fcbe23ca9e3408e7b3d8b99df407ae62f0a9f8.json b/tests/integration/responses/recordings/7fce3badd53d491bbae372c12f38743876b1177094e1a97d08bbd600900388e1.json similarity index 99% rename from tests/integration/responses/recordings/6b1b3d872eb6746dbfe9b8eed7fcbe23ca9e3408e7b3d8b99df407ae62f0a9f8.json rename to tests/integration/responses/recordings/7fce3badd53d491bbae372c12f38743876b1177094e1a97d08bbd600900388e1.json index df200b467d..a7573dded0 100644 --- a/tests/integration/responses/recordings/6b1b3d872eb6746dbfe9b8eed7fcbe23ca9e3408e7b3d8b99df407ae62f0a9f8.json +++ b/tests/integration/responses/recordings/7fce3badd53d491bbae372c12f38743876b1177094e1a97d08bbd600900388e1.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/612c6642c5fb5b4203098183335b17ac0d121a04e39cc737b6cd2c1cb1f8ab30.json b/tests/integration/responses/recordings/86985e69a4be4073af20f65ba9ec927389f1b18c13691565186eba89fd1df24e.json similarity index 98% rename from tests/integration/responses/recordings/612c6642c5fb5b4203098183335b17ac0d121a04e39cc737b6cd2c1cb1f8ab30.json rename to tests/integration/responses/recordings/86985e69a4be4073af20f65ba9ec927389f1b18c13691565186eba89fd1df24e.json index 5daeb5e399..45a3daea46 100644 --- a/tests/integration/responses/recordings/612c6642c5fb5b4203098183335b17ac0d121a04e39cc737b6cd2c1cb1f8ab30.json +++ b/tests/integration/responses/recordings/86985e69a4be4073af20f65ba9ec927389f1b18c13691565186eba89fd1df24e.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/255d2b2c743eb4102b0938c2ece15c5ce5a534fc3ee5ea5067b677a9007fab40.json b/tests/integration/responses/recordings/8fffbf821fc03c867d710ee20471566dd845588cce7605101332153175738d64.json similarity index 98% rename from tests/integration/responses/recordings/255d2b2c743eb4102b0938c2ece15c5ce5a534fc3ee5ea5067b677a9007fab40.json rename to tests/integration/responses/recordings/8fffbf821fc03c867d710ee20471566dd845588cce7605101332153175738d64.json index eef2f4abbb..5c1f4c5359 100644 --- a/tests/integration/responses/recordings/255d2b2c743eb4102b0938c2ece15c5ce5a534fc3ee5ea5067b677a9007fab40.json +++ b/tests/integration/responses/recordings/8fffbf821fc03c867d710ee20471566dd845588cce7605101332153175738d64.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/b4669f2ae72ba853cd152e36d2e1aed1fc55ff53d48e9fe7371ce36e67b75445.json b/tests/integration/responses/recordings/9a097dbf69b822064c0e709fc32196ae1825730205c6ccd617c7ab88f57b1bb1.json similarity index 99% rename from tests/integration/responses/recordings/b4669f2ae72ba853cd152e36d2e1aed1fc55ff53d48e9fe7371ce36e67b75445.json rename to tests/integration/responses/recordings/9a097dbf69b822064c0e709fc32196ae1825730205c6ccd617c7ab88f57b1bb1.json index 1fccfa99c0..d0f580f350 100644 --- a/tests/integration/responses/recordings/b4669f2ae72ba853cd152e36d2e1aed1fc55ff53d48e9fe7371ce36e67b75445.json +++ b/tests/integration/responses/recordings/9a097dbf69b822064c0e709fc32196ae1825730205c6ccd617c7ab88f57b1bb1.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_responses_errors.py::TestResponsesAPIStreamingErrors::test_completed_response_has_no_error[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/fff9dbea699f84c0f924ee9dbc91c9f5587eff0d27a554fd55b916da9662ce2c.json b/tests/integration/responses/recordings/a0a8368aca6a21aff59a88f8eadae3bf8cbaa47c2c83fcfc439c29c3693cf6a3.json similarity index 98% rename from tests/integration/responses/recordings/fff9dbea699f84c0f924ee9dbc91c9f5587eff0d27a554fd55b916da9662ce2c.json rename to tests/integration/responses/recordings/a0a8368aca6a21aff59a88f8eadae3bf8cbaa47c2c83fcfc439c29c3693cf6a3.json index 4532cdae61..c70c2c852b 100644 --- a/tests/integration/responses/recordings/fff9dbea699f84c0f924ee9dbc91c9f5587eff0d27a554fd55b916da9662ce2c.json +++ b/tests/integration/responses/recordings/a0a8368aca6a21aff59a88f8eadae3bf8cbaa47c2c83fcfc439c29c3693cf6a3.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_backward_compatibility[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/611613fb2c06bb1ed79fbe5dae18ac6e2a650340832aa7538498b5b26aff1f63.json b/tests/integration/responses/recordings/a3fea537eafccc69367c15360b09ca732197f2fbd3757e8afe5bc583df8f1c2c.json similarity index 98% rename from tests/integration/responses/recordings/611613fb2c06bb1ed79fbe5dae18ac6e2a650340832aa7538498b5b26aff1f63.json rename to tests/integration/responses/recordings/a3fea537eafccc69367c15360b09ca732197f2fbd3757e8afe5bc583df8f1c2c.json index 07098e7e33..c123f0e399 100644 --- a/tests/integration/responses/recordings/611613fb2c06bb1ed79fbe5dae18ac6e2a650340832aa7538498b5b26aff1f63.json +++ b/tests/integration/responses/recordings/a3fea537eafccc69367c15360b09ca732197f2fbd3757e8afe5bc583df8f1c2c.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/516a186291f3c6a4cf45ceecd49e9f8f5b396f37caa0c557e664c608b29bcfcc.json b/tests/integration/responses/recordings/a640e097dcf17544825aa20ce937c0b783de5f4bd2916d9e370145768551075c.json similarity index 98% rename from tests/integration/responses/recordings/516a186291f3c6a4cf45ceecd49e9f8f5b396f37caa0c557e664c608b29bcfcc.json rename to tests/integration/responses/recordings/a640e097dcf17544825aa20ce937c0b783de5f4bd2916d9e370145768551075c.json index f5e7895174..e5c913a998 100644 --- a/tests/integration/responses/recordings/516a186291f3c6a4cf45ceecd49e9f8f5b396f37caa0c557e664c608b29bcfcc.json +++ b/tests/integration/responses/recordings/a640e097dcf17544825aa20ce937c0b783de5f4bd2916d9e370145768551075c.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/68da149c99f86029626e9bca5df9a72d5204a4423c5caca31efe60384c4b2b37.json b/tests/integration/responses/recordings/aaed9594b345da41db3dd6754b1e7c6366dff704ea943bda851c2d3a658d44ec.json similarity index 98% rename from tests/integration/responses/recordings/68da149c99f86029626e9bca5df9a72d5204a4423c5caca31efe60384c4b2b37.json rename to tests/integration/responses/recordings/aaed9594b345da41db3dd6754b1e7c6366dff704ea943bda851c2d3a658d44ec.json index d0fb69b3ea..a58476f361 100644 --- a/tests/integration/responses/recordings/68da149c99f86029626e9bca5df9a72d5204a4423c5caca31efe60384c4b2b37.json +++ b/tests/integration/responses/recordings/aaed9594b345da41db3dd6754b1e7c6366dff704ea943bda851c2d3a658d44ec.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/555654e0ddbd51a65af179d9300fdedc42077a90849824b037f9dae1edda4102.json b/tests/integration/responses/recordings/b2741f85f52f5eb88f5401e02cad1d4f5d4a9bf61953e12b0441c0c4c2a2831a.json similarity index 98% rename from tests/integration/responses/recordings/555654e0ddbd51a65af179d9300fdedc42077a90849824b037f9dae1edda4102.json rename to tests/integration/responses/recordings/b2741f85f52f5eb88f5401e02cad1d4f5d4a9bf61953e12b0441c0c4c2a2831a.json index 49b997b2ed..dedcc9a08e 100644 --- a/tests/integration/responses/recordings/555654e0ddbd51a65af179d9300fdedc42077a90849824b037f9dae1edda4102.json +++ b/tests/integration/responses/recordings/b2741f85f52f5eb88f5401e02cad1d4f5d4a9bf61953e12b0441c0c4c2a2831a.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/f8eb947297e60407c022e3326b5bbcfa0e186abf83bf4b3a826640715e14a3a9.json b/tests/integration/responses/recordings/baa7ab6db86f51910a35c45cf2234670f308b01d4b905faf3e1313901d2a6b61.json similarity index 99% rename from tests/integration/responses/recordings/f8eb947297e60407c022e3326b5bbcfa0e186abf83bf4b3a826640715e14a3a9.json rename to tests/integration/responses/recordings/baa7ab6db86f51910a35c45cf2234670f308b01d4b905faf3e1313901d2a6b61.json index 5e63214137..d4ea806034 100644 --- a/tests/integration/responses/recordings/f8eb947297e60407c022e3326b5bbcfa0e186abf83bf4b3a826640715e14a3a9.json +++ b/tests/integration/responses/recordings/baa7ab6db86f51910a35c45cf2234670f308b01d4b905faf3e1313901d2a6b61.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/bf90e1d57fcb8ac4ebb20ee417bb219a091ac9c45000f4c22e4fd16716dfaceb.json b/tests/integration/responses/recordings/c40e45bc283154cb2e212a514e5ab935e8851add8edeef48d6a65764716ce634.json similarity index 98% rename from tests/integration/responses/recordings/bf90e1d57fcb8ac4ebb20ee417bb219a091ac9c45000f4c22e4fd16716dfaceb.json rename to tests/integration/responses/recordings/c40e45bc283154cb2e212a514e5ab935e8851add8edeef48d6a65764716ce634.json index 56e96e3284..98c4e919a2 100644 --- a/tests/integration/responses/recordings/bf90e1d57fcb8ac4ebb20ee417bb219a091ac9c45000f4c22e4fd16716dfaceb.json +++ b/tests/integration/responses/recordings/c40e45bc283154cb2e212a514e5ab935e8851add8edeef48d6a65764716ce634.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/da6a362ff3aadd43bc06ba806b0f3710f565e33c63b5f1aff08a68807e01ce8a.json b/tests/integration/responses/recordings/cc0c289ff3a932cbfc46188197ebf4074e790c3bbdae24c280c711aa752fb6b4.json similarity index 99% rename from tests/integration/responses/recordings/da6a362ff3aadd43bc06ba806b0f3710f565e33c63b5f1aff08a68807e01ce8a.json rename to tests/integration/responses/recordings/cc0c289ff3a932cbfc46188197ebf4074e790c3bbdae24c280c711aa752fb6b4.json index cadff507ca..e7e7f1d014 100644 --- a/tests/integration/responses/recordings/da6a362ff3aadd43bc06ba806b0f3710f565e33c63b5f1aff08a68807e01ce8a.json +++ b/tests/integration/responses/recordings/cc0c289ff3a932cbfc46188197ebf4074e790c3bbdae24c280c711aa752fb6b4.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/2df2850b224738a8f7b945cc06681f73f38988a267cd1ba6a546a8352a57ffa5.json b/tests/integration/responses/recordings/d0996e5e764315da97c15eb62c37f1c374b3e46a69f5526ef45f70fd24fd18b9.json similarity index 99% rename from tests/integration/responses/recordings/2df2850b224738a8f7b945cc06681f73f38988a267cd1ba6a546a8352a57ffa5.json rename to tests/integration/responses/recordings/d0996e5e764315da97c15eb62c37f1c374b3e46a69f5526ef45f70fd24fd18b9.json index 5e507106ac..e102b33960 100644 --- a/tests/integration/responses/recordings/2df2850b224738a8f7b945cc06681f73f38988a267cd1ba6a546a8352a57ffa5.json +++ b/tests/integration/responses/recordings/d0996e5e764315da97c15eb62c37f1c374b3e46a69f5526ef45f70fd24fd18b9.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/8dba857db6730d6ab259e2c80f5cdd1f8bbcce22fb4a14df4389f96e2db04eaa.json b/tests/integration/responses/recordings/d2cf88feac32878a5072007ed404bbc48650109698c6d0ac664a92426b242301.json similarity index 98% rename from tests/integration/responses/recordings/8dba857db6730d6ab259e2c80f5cdd1f8bbcce22fb4a14df4389f96e2db04eaa.json rename to tests/integration/responses/recordings/d2cf88feac32878a5072007ed404bbc48650109698c6d0ac664a92426b242301.json index ce613bbaf8..315fb4ae9f 100644 --- a/tests/integration/responses/recordings/8dba857db6730d6ab259e2c80f5cdd1f8bbcce22fb4a14df4389f96e2db04eaa.json +++ b/tests/integration/responses/recordings/d2cf88feac32878a5072007ed404bbc48650109698c6d0ac664a92426b242301.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_multi_turn_and_streaming[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/448f5aad2604000a0616341f2a97086fef7087f561fa24e21b5f665b04cebff2.json b/tests/integration/responses/recordings/d490ef38aa100255e6031998088783380dbe25ba782d6e71e1ff3544628c06ab.json similarity index 99% rename from tests/integration/responses/recordings/448f5aad2604000a0616341f2a97086fef7087f561fa24e21b5f665b04cebff2.json rename to tests/integration/responses/recordings/d490ef38aa100255e6031998088783380dbe25ba782d6e71e1ff3544628c06ab.json index b1ada77efa..16b936f4e4 100644 --- a/tests/integration/responses/recordings/448f5aad2604000a0616341f2a97086fef7087f561fa24e21b5f665b04cebff2.json +++ b/tests/integration/responses/recordings/d490ef38aa100255e6031998088783380dbe25ba782d6e71e1ff3544628c06ab.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_non_streaming[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/723ec06a9462e87536dbfaeb2b9396943dc3ea635425d9a1262212a1ccec2910.json b/tests/integration/responses/recordings/d508c01d0d6670167da0e8758e9ee46fcde6afb092f0b5b57da9fb1c40b3d506.json similarity index 98% rename from tests/integration/responses/recordings/723ec06a9462e87536dbfaeb2b9396943dc3ea635425d9a1262212a1ccec2910.json rename to tests/integration/responses/recordings/d508c01d0d6670167da0e8758e9ee46fcde6afb092f0b5b57da9fb1c40b3d506.json index 5c6a486afc..b1cd3601a6 100644 --- a/tests/integration/responses/recordings/723ec06a9462e87536dbfaeb2b9396943dc3ea635425d9a1262212a1ccec2910.json +++ b/tests/integration/responses/recordings/d508c01d0d6670167da0e8758e9ee46fcde6afb092f0b5b57da9fb1c40b3d506.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/9ecdff3fb145793283aa9233afb27e687c1aa26f93586f5a624b7a78c4c15c3c.json b/tests/integration/responses/recordings/d510a5744713675c17f35596254f6704f32c46c11a85e585906992614239da54.json similarity index 95% rename from tests/integration/responses/recordings/9ecdff3fb145793283aa9233afb27e687c1aa26f93586f5a624b7a78c4c15c3c.json rename to tests/integration/responses/recordings/d510a5744713675c17f35596254f6704f32c46c11a85e585906992614239da54.json index 7d7b3f6d34..e0bbca0dc2 100644 --- a/tests/integration/responses/recordings/9ecdff3fb145793283aa9233afb27e687c1aa26f93586f5a624b7a78c4c15c3c.json +++ b/tests/integration/responses/recordings/d510a5744713675c17f35596254f6704f32c46c11a85e585906992614239da54.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_responses_errors.py::TestResponsesAPIStreamingErrors::test_non_vision_model_with_base64_image_returns_server_error[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/06653e6e4563eb378ee18044c32cd36e1ace560b2ab90d84c901b67dde0827f8.json b/tests/integration/responses/recordings/d6784a627287aef1c3171bdbf1d4bdb3523bd9c5b73088a745c431dc2514ea16.json similarity index 99% rename from tests/integration/responses/recordings/06653e6e4563eb378ee18044c32cd36e1ace560b2ab90d84c901b67dde0827f8.json rename to tests/integration/responses/recordings/d6784a627287aef1c3171bdbf1d4bdb3523bd9c5b73088a745c431dc2514ea16.json index b5d6c97404..75c1947e9b 100644 --- a/tests/integration/responses/recordings/06653e6e4563eb378ee18044c32cd36e1ace560b2ab90d84c901b67dde0827f8.json +++ b/tests/integration/responses/recordings/d6784a627287aef1c3171bdbf1d4bdb3523bd9c5b73088a745c431dc2514ea16.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/95d123bff66974b73beabe29f92030b8a24ebda6f06e83ed31b9a0749c60199e.json b/tests/integration/responses/recordings/d7b50078103e88d20c4955ba23c55193ed0cc70e43e577cbb0a3dec164e250c4.json similarity index 98% rename from tests/integration/responses/recordings/95d123bff66974b73beabe29f92030b8a24ebda6f06e83ed31b9a0749c60199e.json rename to tests/integration/responses/recordings/d7b50078103e88d20c4955ba23c55193ed0cc70e43e577cbb0a3dec164e250c4.json index 1c9db0929b..68f7c36128 100644 --- a/tests/integration/responses/recordings/95d123bff66974b73beabe29f92030b8a24ebda6f06e83ed31b9a0749c60199e.json +++ b/tests/integration/responses/recordings/d7b50078103e88d20c4955ba23c55193ed0cc70e43e577cbb0a3dec164e250c4.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_no_variables[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/eebe9f0ac51599aa9d0f81b0d0a8b2a279054d2263d728fc1eb0b6250004b339.json b/tests/integration/responses/recordings/d89ee3f0b9b9ae608fd6bfd7efa57378f61a28811513ccddddffc77ff05a0417.json similarity index 98% rename from tests/integration/responses/recordings/eebe9f0ac51599aa9d0f81b0d0a8b2a279054d2263d728fc1eb0b6250004b339.json rename to tests/integration/responses/recordings/d89ee3f0b9b9ae608fd6bfd7efa57378f61a28811513ccddddffc77ff05a0417.json index 15df46028a..0db0c69e13 100644 --- a/tests/integration/responses/recordings/eebe9f0ac51599aa9d0f81b0d0a8b2a279054d2263d728fc1eb0b6250004b339.json +++ b/tests/integration/responses/recordings/d89ee3f0b9b9ae608fd6bfd7efa57378f61a28811513ccddddffc77ff05a0417.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/3d42f101f95388471691e0c8d8c1a37a67a2234432874a79b684d8bd40f73eff.json b/tests/integration/responses/recordings/e779e2713de2d91ee559033a5e77397d77d36726335d9c6c1b22e5c0ad1c22ac.json similarity index 98% rename from tests/integration/responses/recordings/3d42f101f95388471691e0c8d8c1a37a67a2234432874a79b684d8bd40f73eff.json rename to tests/integration/responses/recordings/e779e2713de2d91ee559033a5e77397d77d36726335d9c6c1b22e5c0ad1c22ac.json index 17f2fac8a9..1cb39b7331 100644 --- a/tests/integration/responses/recordings/3d42f101f95388471691e0c8d8c1a37a67a2234432874a79b684d8bd40f73eff.json +++ b/tests/integration/responses/recordings/e779e2713de2d91ee559033a5e77397d77d36726335d9c6c1b22e5c0ad1c22ac.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/2eafd5c6eb8e2cff3433007bf23b19e6d66547b2dcab1b1892d5bd53f8ad34b4.json b/tests/integration/responses/recordings/ecb3bc2e13c604f0003308df4fdb94149395c39cb195974f2893fbf340e50589.json similarity index 95% rename from tests/integration/responses/recordings/2eafd5c6eb8e2cff3433007bf23b19e6d66547b2dcab1b1892d5bd53f8ad34b4.json rename to tests/integration/responses/recordings/ecb3bc2e13c604f0003308df4fdb94149395c39cb195974f2893fbf340e50589.json index 7a7249a3c3..c06fda78f5 100644 --- a/tests/integration/responses/recordings/2eafd5c6eb8e2cff3433007bf23b19e6d66547b2dcab1b1892d5bd53f8ad34b4.json +++ b/tests/integration/responses/recordings/ecb3bc2e13c604f0003308df4fdb94149395c39cb195974f2893fbf340e50589.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_responses_errors.py::TestResponsesAPIStreamingErrors::test_non_vision_model_returns_error_for_image_input[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/f7ba1ce7bbeef963f2002b812fd07386cecf9179bfc6f92a2e09348edbfd7f07.json b/tests/integration/responses/recordings/ee3d5360f48357a1f00d8fc9f1b01844fcb08bf9ff738f34d7f915514173f2c4.json similarity index 99% rename from tests/integration/responses/recordings/f7ba1ce7bbeef963f2002b812fd07386cecf9179bfc6f92a2e09348edbfd7f07.json rename to tests/integration/responses/recordings/ee3d5360f48357a1f00d8fc9f1b01844fcb08bf9ff738f34d7f915514173f2c4.json index fe4ed13ed2..8b7482585a 100644 --- a/tests/integration/responses/recordings/f7ba1ce7bbeef963f2002b812fd07386cecf9179bfc6f92a2e09348edbfd7f07.json +++ b/tests/integration/responses/recordings/ee3d5360f48357a1f00d8fc9f1b01844fcb08bf9ff738f34d7f915514173f2c4.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/2a08014e5c5da31c16896c0e59ea432e71eb8eb83a3cf25a3e4ab7fa5700bd6b.json b/tests/integration/responses/recordings/f4770c02fdb697b36717c6713dacd87b12b16b020611dd32866110cbf0c699d9.json similarity index 98% rename from tests/integration/responses/recordings/2a08014e5c5da31c16896c0e59ea432e71eb8eb83a3cf25a3e4ab7fa5700bd6b.json rename to tests/integration/responses/recordings/f4770c02fdb697b36717c6713dacd87b12b16b020611dd32866110cbf0c699d9.json index 5670d23ef1..ae54912b15 100644 --- a/tests/integration/responses/recordings/2a08014e5c5da31c16896c0e59ea432e71eb8eb83a3cf25a3e4ab7fa5700bd6b.json +++ b/tests/integration/responses/recordings/f4770c02fdb697b36717c6713dacd87b12b16b020611dd32866110cbf0c699d9.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_reasoning.py::test_reasoning_basic_streaming[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/b4d86c5231fc155339edbcb937b8e91a4be8c2c171a49120991e0eab9582b806.json b/tests/integration/responses/recordings/f91a97f69204421e717fc7d7660ea2123789abe1d080020cbe97402d51e074eb.json similarity index 98% rename from tests/integration/responses/recordings/b4d86c5231fc155339edbcb937b8e91a4be8c2c171a49120991e0eab9582b806.json rename to tests/integration/responses/recordings/f91a97f69204421e717fc7d7660ea2123789abe1d080020cbe97402d51e074eb.json index 26d44c5072..ef8065854c 100644 --- a/tests/integration/responses/recordings/b4d86c5231fc155339edbcb937b8e91a4be8c2c171a49120991e0eab9582b806.json +++ b/tests/integration/responses/recordings/f91a97f69204421e717fc7d7660ea2123789abe1d080020cbe97402d51e074eb.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_prompt_templates.py::test_basic_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/49aa949715243abb9094994f88b7fdc37a54757ec4c6579e6640d4e62d148c87.json b/tests/integration/responses/recordings/fd5280a00f415baa514839d1903263035e94bea9462e613798c48a8c39697217.json similarity index 98% rename from tests/integration/responses/recordings/49aa949715243abb9094994f88b7fdc37a54757ec4c6579e6640d4e62d148c87.json rename to tests/integration/responses/recordings/fd5280a00f415baa514839d1903263035e94bea9462e613798c48a8c39697217.json index ab6e7f5468..e38ff0baf4 100644 --- a/tests/integration/responses/recordings/49aa949715243abb9094994f88b7fdc37a54757ec4c6579e6640d4e62d148c87.json +++ b/tests/integration/responses/recordings/fd5280a00f415baa514839d1903263035e94bea9462e613798c48a8c39697217.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_context_loading[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions", "headers": {}, "body": { "model": "openai.gpt-oss-20b", diff --git a/tests/integration/responses/recordings/models-ce53c4fcbb40594a10b3d758a338cccbd471bba93e04d38264d663c6dcec8578-31ec6c83.json b/tests/integration/responses/recordings/models-2f9eb79dbfa9f25cae2fdc739d4358a242c429f68d4fb635ddbeeb55595b7027-31ec6c83.json similarity index 99% rename from tests/integration/responses/recordings/models-ce53c4fcbb40594a10b3d758a338cccbd471bba93e04d38264d663c6dcec8578-31ec6c83.json rename to tests/integration/responses/recordings/models-2f9eb79dbfa9f25cae2fdc739d4358a242c429f68d4fb635ddbeeb55595b7027-31ec6c83.json index 5d2b85f526..2003c2f1db 100644 --- a/tests/integration/responses/recordings/models-ce53c4fcbb40594a10b3d758a338cccbd471bba93e04d38264d663c6dcec8578-31ec6c83.json +++ b/tests/integration/responses/recordings/models-2f9eb79dbfa9f25cae2fdc739d4358a242c429f68d4fb635ddbeeb55595b7027-31ec6c83.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_reasoning.py::test_reasoning_basic_streaming[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models", "headers": {}, "body": {}, "endpoint": "/v1/models", diff --git a/tests/integration/responses/recordings/models-d487ac38a1f3f3bb9ebc5254525862e9fcefdf01d5184789f6ba4b3039dc17ba-31ec6c83.json b/tests/integration/responses/recordings/models-48bcf1eca39637c8a9767f122e10ed8a6421f340543238789bbbe8c516b79114-31ec6c83.json similarity index 99% rename from tests/integration/responses/recordings/models-d487ac38a1f3f3bb9ebc5254525862e9fcefdf01d5184789f6ba4b3039dc17ba-31ec6c83.json rename to tests/integration/responses/recordings/models-48bcf1eca39637c8a9767f122e10ed8a6421f340543238789bbbe8c516b79114-31ec6c83.json index 7ed290b164..e95bcad895 100644 --- a/tests/integration/responses/recordings/models-d487ac38a1f3f3bb9ebc5254525862e9fcefdf01d5184789f6ba4b3039dc17ba-31ec6c83.json +++ b/tests/integration/responses/recordings/models-48bcf1eca39637c8a9767f122e10ed8a6421f340543238789bbbe8c516b79114-31ec6c83.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_streaming[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models", "headers": {}, "body": {}, "endpoint": "/v1/models", diff --git a/tests/integration/responses/recordings/models-0535aa1a1942a5b3191f8d08fb57457ef937494a6e6cb46918fd196f77016145-31ec6c83.json b/tests/integration/responses/recordings/models-c7239f3a8bc25af23106c20d3a0844f33356aaddf6594e08964cde42375f54dd-31ec6c83.json similarity index 99% rename from tests/integration/responses/recordings/models-0535aa1a1942a5b3191f8d08fb57457ef937494a6e6cb46918fd196f77016145-31ec6c83.json rename to tests/integration/responses/recordings/models-c7239f3a8bc25af23106c20d3a0844f33356aaddf6594e08964cde42375f54dd-31ec6c83.json index 0090cedb61..2c2d94637b 100644 --- a/tests/integration/responses/recordings/models-0535aa1a1942a5b3191f8d08fb57457ef937494a6e6cb46918fd196f77016145-31ec6c83.json +++ b/tests/integration/responses/recordings/models-c7239f3a8bc25af23106c20d3a0844f33356aaddf6594e08964cde42375f54dd-31ec6c83.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_with_web_search[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models", "headers": {}, "body": {}, "endpoint": "/v1/models", diff --git a/tests/integration/responses/recordings/models-03ffd7273c9bf0c84abd4750391c19dfc7545772caacd9bdd02c953c5c3fb6cd-31ec6c83.json b/tests/integration/responses/recordings/models-e3014308c84bda792e9ce27641b08ece821283fca29f3ced3ffbff35583d1f9b-31ec6c83.json similarity index 99% rename from tests/integration/responses/recordings/models-03ffd7273c9bf0c84abd4750391c19dfc7545772caacd9bdd02c953c5c3fb6cd-31ec6c83.json rename to tests/integration/responses/recordings/models-e3014308c84bda792e9ce27641b08ece821283fca29f3ced3ffbff35583d1f9b-31ec6c83.json index 7426212fe6..b223fe0158 100644 --- a/tests/integration/responses/recordings/models-03ffd7273c9bf0c84abd4750391c19dfc7545772caacd9bdd02c953c5c3fb6cd-31ec6c83.json +++ b/tests/integration/responses/recordings/models-e3014308c84bda792e9ce27641b08ece821283fca29f3ced3ffbff35583d1f9b-31ec6c83.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_with_function_tools[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models", "headers": {}, "body": {}, "endpoint": "/v1/models", diff --git a/tests/integration/responses/recordings/models-ffb55818ff4f53a50d044fbf570e18ed249a0245bfe68a7e91eed69ccf5f2193-31ec6c83.json b/tests/integration/responses/recordings/models-e400deaab35580927e2926f5b0d13b37ce15a07c9c0a13e6ecc80ea805e1d61b-31ec6c83.json similarity index 99% rename from tests/integration/responses/recordings/models-ffb55818ff4f53a50d044fbf570e18ed249a0245bfe68a7e91eed69ccf5f2193-31ec6c83.json rename to tests/integration/responses/recordings/models-e400deaab35580927e2926f5b0d13b37ce15a07c9c0a13e6ecc80ea805e1d61b-31ec6c83.json index f8b2bf499f..2ef3e42b7c 100644 --- a/tests/integration/responses/recordings/models-ffb55818ff4f53a50d044fbf570e18ed249a0245bfe68a7e91eed69ccf5f2193-31ec6c83.json +++ b/tests/integration/responses/recordings/models-e400deaab35580927e2926f5b0d13b37ce15a07c9c0a13e6ecc80ea805e1d61b-31ec6c83.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_response_extra_body_guided_choice[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models", "headers": {}, "body": {}, "endpoint": "/v1/models", diff --git a/tests/integration/responses/recordings/models-879ae66c787975780a21251d2755da3ba56850f94c78577db27ba1e5ff8a95e8-31ec6c83.json b/tests/integration/responses/recordings/models-fca7382830039be10bc959fcbb2299c4835921a886e43c02c4f68ff4872e3024-31ec6c83.json similarity index 99% rename from tests/integration/responses/recordings/models-879ae66c787975780a21251d2755da3ba56850f94c78577db27ba1e5ff8a95e8-31ec6c83.json rename to tests/integration/responses/recordings/models-fca7382830039be10bc959fcbb2299c4835921a886e43c02c4f68ff4872e3024-31ec6c83.json index fed332b52f..b5099e8a63 100644 --- a/tests/integration/responses/recordings/models-879ae66c787975780a21251d2755da3ba56850f94c78577db27ba1e5ff8a95e8-31ec6c83.json +++ b/tests/integration/responses/recordings/models-fca7382830039be10bc959fcbb2299c4835921a886e43c02c4f68ff4872e3024-31ec6c83.json @@ -2,7 +2,7 @@ "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_non_streaming[txt=bedrock/openai.gpt-oss-20b]", "request": { "method": "POST", - "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models", + "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models", "headers": {}, "body": {}, "endpoint": "/v1/models", diff --git a/tests/unit/providers/inference/bedrock/test_openai_sdk_integration.py b/tests/unit/providers/inference/bedrock/test_openai_sdk_integration.py new file mode 100644 index 0000000000..b11c8ccb40 --- /dev/null +++ b/tests/unit/providers/inference/bedrock/test_openai_sdk_integration.py @@ -0,0 +1,154 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Tests for Bedrock OpenAI SDK integration with SigV4 authentication. + +These tests verify: +1. Base URL uses bedrock-runtime hostname +2. SigV4 signing uses "bedrock" as the service name (NOT "bedrock-runtime") +3. In SigV4 mode, no Bearer Authorization header is present +4. STS credentials work properly with temporary tokens +""" + +import importlib.util +from unittest.mock import MagicMock, patch + +import httpx +import pytest + +HAS_BOTO3 = importlib.util.find_spec("boto3") is not None + + +@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed") +class TestBedrockOpenAISDKIntegration: + """Tests for Bedrock OpenAI SDK integration with SigV4 auth.""" + + def test_base_url_uses_bedrock_runtime_hostname(self): + """Base URL should use bedrock-runtime hostname (endpoint prefix).""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig(region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + + base_url = adapter.get_base_url() + # Hostname uses "bedrock-runtime" (endpoint prefix) + assert base_url == "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1" + + def test_sigv4_uses_bedrock_signing_name_not_bedrock_runtime(self): + """ + SigV4 signing must use 'bedrock' as the service name, NOT 'bedrock-runtime'. + + The hostname is bedrock-runtime..amazonaws.com (endpoint prefix), + but the SigV4 credential scope uses the signing name 'bedrock'. + This is defined in botocore's service metadata. + """ + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig(region_name="us-west-2") + adapter = BedrockInferenceAdapter(config=config) + + with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth_cls: + mock_auth_cls.return_value = MagicMock() + adapter._build_sigv4_http_client() + + # Verify signing name is "bedrock", NOT "bedrock-runtime" + call_kwargs = mock_auth_cls.call_args[1] + assert call_kwargs["service"] == "bedrock", ( + "SigV4 must use signing name 'bedrock', not endpoint prefix 'bedrock-runtime'" + ) + + def test_sigv4_mode_uses_placeholder_api_key(self): + """In SigV4 mode, api_key should be a placeholder (SigV4 auth replaces the header).""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig(region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + + with patch.object(adapter, "get_request_provider_data", return_value=None): + # Patch SigV4Auth to avoid actual boto3 calls + with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth_cls: + mock_auth = MagicMock() + mock_auth_cls.return_value = mock_auth + + client = adapter.client + + # OpenAI SDK requires a non-empty api_key for validation. + # We use a placeholder that SigV4 auth replaces with proper signature. + # This follows the same pattern as the OCI provider. + assert client.api_key == "" + + def test_sigv4_authorization_header_format(self): + """SigV4 Authorization header should start with AWS4-HMAC-SHA256, not Bearer.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + captured_request = None + + def capture_request(request: httpx.Request) -> httpx.Response: + nonlocal captured_request + captured_request = request + return httpx.Response(200, json={"status": "ok"}) + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE" + mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + mock_frozen_creds.token = None + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + # Use "bedrock" signing name (correct) + auth = BedrockSigV4Auth(region="us-west-2", service="bedrock") + transport = httpx.MockTransport(capture_request) + + with httpx.Client(auth=auth, transport=transport) as client: + client.post( + "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions", + json={"model": "test"}, + ) + + assert captured_request is not None + auth_header = captured_request.headers.get("authorization", "") + + # Must be SigV4, NOT Bearer + assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}" + assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer" + + def test_sts_credentials_include_security_token(self): + """SigV4 auth should include x-amz-security-token for STS credentials.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "ASIAIOSFODNN7EXAMPLE" + mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + mock_frozen_creds.token = "AQoDYXdzEJr..." + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + # Use "bedrock" signing name (correct) + auth = BedrockSigV4Auth(region="us-west-2", service="bedrock") + + request = httpx.Request( + method="POST", + url="https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions", + headers={"content-type": "application/json"}, + content=b'{"model": "test"}', + ) + + gen = auth.auth_flow(request) + signed_request = next(gen) + + # Verify session token header is present for STS credentials + assert "x-amz-security-token" in signed_request.headers + assert signed_request.headers["x-amz-security-token"] == mock_frozen_creds.token diff --git a/tests/unit/providers/inference/bedrock/test_sigv4_auth.py b/tests/unit/providers/inference/bedrock/test_sigv4_auth.py new file mode 100644 index 0000000000..a345247e00 --- /dev/null +++ b/tests/unit/providers/inference/bedrock/test_sigv4_auth.py @@ -0,0 +1,885 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Unit tests for Bedrock SigV4 authentication. + +These tests verify: +1. SigV4 auth handler correctly signs requests +2. Auth mode detection (bearer vs SigV4) +3. Credential chain integration +4. Error handling +""" + +# Check if boto3 is available for SigV4 tests +import importlib.util +from unittest.mock import MagicMock, patch + +import httpx +import pytest + +HAS_BOTO3 = importlib.util.find_spec("boto3") is not None + + +@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed") +class TestBedrockSigV4Auth: + """Tests for BedrockSigV4Auth httpx.Auth implementation.""" + + def test_auth_flow_signs_request(self): + """SigV4 auth should add AWS signature headers to request.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + # Mock boto3 credentials + mock_creds = MagicMock() + mock_creds.access_key = "AKIAIOSFODNN7EXAMPLE" + mock_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + mock_creds.token = None + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = mock_creds.access_key + mock_frozen_creds.secret_key = mock_creds.secret_key + mock_frozen_creds.token = mock_creds.token + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + auth = BedrockSigV4Auth(region="us-east-1", service="bedrock") + + # Create a test request + request = httpx.Request( + method="POST", + url="https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions", + headers={"content-type": "application/json"}, + content=b'{"model": "test"}', + ) + + # Run auth flow + gen = auth.auth_flow(request) + signed_request = next(gen) + + # Verify SigV4 headers were added + assert "authorization" in signed_request.headers + assert "x-amz-date" in signed_request.headers + assert "AWS4-HMAC-SHA256" in signed_request.headers["authorization"] + + def test_auth_flow_with_explicit_role_assumption(self): + """SigV4 auth should use RefreshableBotoSession when role_arn is provided.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "ASIAEXP_ROLE_KEY" + mock_frozen_creds.secret_key = "exp_secret" + mock_frozen_creds.token = "exp_token" + + with patch( + "llama_stack.providers.utils.bedrock.refreshable_boto_session.RefreshableBotoSession" + ) as mock_refreshable_cls: + mock_refreshable = MagicMock() + mock_refreshable_cls.return_value = mock_refreshable + mock_session = MagicMock() + mock_refreshable.refreshable_session.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + auth = BedrockSigV4Auth( + region="us-east-1", + aws_role_arn="arn:aws:iam::123456789012:role/test-role", + aws_web_identity_token_file="/path/to/token", + aws_role_session_name="test-session", + ) + + request = httpx.Request( + method="POST", + url="https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions", + content=b"{}", + ) + + gen = auth.auth_flow(request) + signed_request = next(gen) + + # Verify RefreshableBotoSession was called with correct args + mock_refreshable_cls.assert_called_once_with( + region_name="us-east-1", + aws_access_key_id=None, + aws_secret_access_key=None, + aws_session_token=None, + profile_name=None, + sts_arn="arn:aws:iam::123456789012:role/test-role", + web_identity_token_file="/path/to/token", + session_name="test-session", + session_ttl=3600, + ) + assert signed_request.headers["x-amz-security-token"] == "exp_token" + + def test_auth_flow_with_session_token(self): + """SigV4 auth should include X-Amz-Security-Token for STS credentials.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "ASIAIOSFODNN7EXAMPLE" + mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + mock_frozen_creds.token = "FwoGZXIvYXdzEBYaDG..." # STS session token + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + auth = BedrockSigV4Auth(region="us-west-2", service="bedrock") + + request = httpx.Request( + method="POST", + url="https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions", + headers={"content-type": "application/json"}, + content=b'{"model": "test"}', + ) + + gen = auth.auth_flow(request) + signed_request = next(gen) + + # Verify session token header is present + assert "x-amz-security-token" in signed_request.headers + assert signed_request.headers["x-amz-security-token"] == mock_frozen_creds.token + + def test_auth_raises_on_missing_credentials(self): + """SigV4 auth should raise clear error when credentials unavailable.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value = None + + auth = BedrockSigV4Auth(region="us-east-1") + + request = httpx.Request( + method="POST", + url="https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions", + content=b"{}", + ) + + with pytest.raises(RuntimeError, match="Failed to load AWS credentials"): + gen = auth.auth_flow(request) + next(gen) + + +class TestBedrockConfigAuthDetection: + """Tests for BedrockConfig auth mode detection.""" + + def test_has_bearer_token_with_token(self): + """Config should detect when bearer token is present.""" + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + # Use api_key as that's the alias for auth_credential + config = BedrockConfig(api_key="my-bearer-token") + assert config.has_bearer_token() is True + + def test_has_bearer_token_without_token(self): + """Config should detect when bearer token is absent.""" + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig() + assert config.has_bearer_token() is False + + def test_has_bearer_token_with_empty_string(self): + """Empty string should be treated as no token.""" + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig(api_key="") + assert config.has_bearer_token() is False + + def test_has_bearer_token_with_whitespace(self): + """Whitespace-only string should be treated as no token.""" + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig(api_key=" ") + assert config.has_bearer_token() is False + + +class TestBedrockInferenceAdapterAuthMode: + """Tests for BedrockInferenceAdapter auth mode selection.""" + + def test_should_use_sigv4_when_no_bearer_token(self): + """Adapter should use SigV4 when no bearer token configured.""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig(region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + + # Mock get_request_provider_data to return None + with patch.object(adapter, "get_request_provider_data", return_value=None): + assert adapter._should_use_sigv4() is True + + def test_should_not_use_sigv4_when_bearer_token_in_config(self): + """Adapter should use bearer auth when token in config.""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig( + region_name="us-east-1", + api_key="my-bearer-token", # Use api_key alias + ) + adapter = BedrockInferenceAdapter(config=config) + + with patch.object(adapter, "get_request_provider_data", return_value=None): + assert adapter._should_use_sigv4() is False + + def test_should_not_use_sigv4_when_bearer_token_in_provider_data(self): + """Adapter should use bearer auth when token in provider data.""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import ( + BedrockConfig, + BedrockProviderDataValidator, + ) + + config = BedrockConfig(region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + + provider_data = BedrockProviderDataValidator(aws_bearer_token_bedrock="per-request-token") + with patch.object(adapter, "get_request_provider_data", return_value=provider_data): + assert adapter._should_use_sigv4() is False + + def test_get_extra_client_params_skips_sigv4_client_when_bearer_override(self): + """Per-request bearer token override must not be silently discarded by the SigV4 client. + + When the server starts in SigV4 mode (_sigv4_http_client is not None) but a request + arrives with aws_bearer_token_bedrock in provider data, get_extra_client_params() + must return {} so the OpenAI SDK uses the bearer token instead of SigV4 auth. + """ + from unittest.mock import MagicMock + + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import ( + BedrockConfig, + BedrockProviderDataValidator, + ) + + config = BedrockConfig(region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + + # Simulate that initialize() already built the SigV4 client + adapter._sigv4_http_client = MagicMock() + + # Per-request bearer token override in provider data + provider_data = BedrockProviderDataValidator(aws_bearer_token_bedrock="per-request-token") + with patch.object(adapter, "get_request_provider_data", return_value=provider_data): + params = adapter.get_extra_client_params() + # Must return {} — the bearer token path must not receive the SigV4 http_client, + # which would strip and replace the Authorization header + assert params == {} + + def test_get_extra_client_params_uses_sigv4_client_when_no_override(self): + """SigV4 client is returned when no per-request bearer token is present.""" + from unittest.mock import MagicMock + + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig(region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + mock_client = MagicMock() + adapter._sigv4_http_client = mock_client + + with patch.object(adapter, "get_request_provider_data", return_value=None): + params = adapter.get_extra_client_params() + assert params == {"http_client": mock_client} + + def test_should_use_sigv4_when_provider_data_token_is_whitespace(self): + """Adapter should use SigV4 when provider data token is whitespace-only.""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import ( + BedrockConfig, + BedrockProviderDataValidator, + ) + + config = BedrockConfig(region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + + # Whitespace-only token should be treated as no token (use SigV4) + provider_data = BedrockProviderDataValidator(aws_bearer_token_bedrock=" ") + with patch.object(adapter, "get_request_provider_data", return_value=provider_data): + assert adapter._should_use_sigv4() is True + + def test_get_api_key_returns_placeholder_for_sigv4(self): + """When using SigV4, get_api_key should return placeholder to satisfy OpenAIMixin validation.""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig(region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + + with patch.object(adapter, "get_request_provider_data", return_value=None): + api_key = adapter.get_api_key() + # Placeholder satisfies OpenAIMixin validation; SigV4 auth handler replaces + # the Bearer header with proper SigV4 signature (OCI pattern) + assert api_key == "" + + @pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed") + def test_client_uses_sigv4_auth_when_no_bearer_token(self): + """_build_sigv4_http_client should use correct service name and pass config fields.""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig(region_name="us-west-2") + adapter = BedrockInferenceAdapter(config=config) + + with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth_cls: + mock_auth_cls.return_value = MagicMock() + adapter._build_sigv4_http_client() + + # Verify auth was created with correct service name ("bedrock", not "bedrock-runtime") + call_kwargs = mock_auth_cls.call_args[1] + assert call_kwargs["region"] == "us-west-2" + assert call_kwargs["service"] == "bedrock" + + @pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed") + def test_sigv4_http_client_cached_after_initialize(self): + """_sigv4_http_client should be created once in initialize() and reused.""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + config = BedrockConfig(region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + + with patch.object(adapter, "_build_sigv4_http_client") as mock_build: + mock_build.return_value = MagicMock() + + # Simulate initialize() — called once + adapter._sigv4_http_client = adapter._build_sigv4_http_client() + assert mock_build.call_count == 1 + + # get_extra_client_params reuses the cached client, does NOT rebuild + with patch.object(adapter, "get_request_provider_data", return_value=None): + adapter.get_extra_client_params() + assert mock_build.call_count == 1 # still 1, not 2 + + +class TestBedrockInferenceAdapterAuthErrors: + """Tests for user-facing auth error handling.""" + + def test_sigv4_auth_error_preserves_detail_in_internal_server_error(self): + """SigV4 auth failures should return a clear, generic 500 message.""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + from llama_stack_api.common.errors import InternalServerError + + adapter = BedrockInferenceAdapter(config=BedrockConfig(region_name="us-east-1")) + + with pytest.raises(InternalServerError) as exc_info: + adapter._handle_auth_error( + "request signed with invalid credentials", + RuntimeError("provider boom"), + use_sigv4=True, + ) + + message = str(exc_info.value) + assert ( + message + == "Authentication failed because the configured cloud credentials could not authorize this request. " + "Please verify that the credentials available to the server are valid, unexpired, and allowed to access the requested model." + ) + assert "AWS_ROLE_ARN" not in message + assert "Bedrock" not in message + + def test_bearer_auth_error_preserves_detail_in_internal_server_error(self): + """Bearer auth failures should be actionable without exposing internal header/config details.""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + from llama_stack_api.common.errors import InternalServerError + + adapter = BedrockInferenceAdapter(config=BedrockConfig(region_name="us-east-1")) + + with pytest.raises(InternalServerError) as exc_info: + adapter._handle_auth_error( + "Error code: 401 - invalid api key format", + RuntimeError("provider boom"), + use_sigv4=False, + ) + + message = str(exc_info.value) + assert ( + message == "Authentication failed because the provided request credential was rejected. " + "Please verify that the credential is valid, unexpired, and authorized for this request." + ) + assert "x-llamastack-provider-data" not in message + assert "Bedrock" not in message + + def test_expired_bearer_auth_error_preserves_sanitized_detail(self): + """Expired bearer auth failures should stay actionable without exposing config names.""" + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + from llama_stack_api.common.errors import InternalServerError + + adapter = BedrockInferenceAdapter(config=BedrockConfig(region_name="us-east-1")) + + with pytest.raises(InternalServerError) as exc_info: + adapter._handle_auth_error( + "Bearer Token has expired", + RuntimeError("provider boom"), + use_sigv4=False, + ) + + message = str(exc_info.value) + assert ( + message == "Authentication failed because the provided request credential has expired. " + "Please refresh the credential and try again, or remove it so the server can use its configured cloud credentials." + ) + assert "AWS_BEARER_TOKEN_BEDROCK" not in message + assert "Bedrock" not in message + + +@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed") +class TestSigV4MockTransport: + """Integration-style tests using httpx.MockTransport to verify SigV4 signing.""" + + def test_sigv4_adds_aws4_signature_header(self): + """SigV4 auth should add AWS4-HMAC-SHA256 Authorization header.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + # Track the request that gets sent + captured_request = None + + def capture_request(request: httpx.Request) -> httpx.Response: + nonlocal captured_request + captured_request = request + return httpx.Response(200, json={"status": "ok"}) + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE" + mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + mock_frozen_creds.token = None + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + auth = BedrockSigV4Auth(region="us-east-1", service="bedrock") + transport = httpx.MockTransport(capture_request) + + with httpx.Client(auth=auth, transport=transport) as client: + client.post( + "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions", + json={"model": "test"}, + ) + + assert captured_request is not None + auth_header = captured_request.headers.get("authorization", "") + + # Verify SigV4 signature format + assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}" + assert "Credential=" in auth_header + assert "SignedHeaders=" in auth_header + assert "Signature=" in auth_header + + # Verify NO Bearer token is present + assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer token" + + def test_sigv4_no_bearer_header_when_empty_api_key(self): + """When api_key is empty, no Bearer header should be added.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + captured_request = None + + def capture_request(request: httpx.Request) -> httpx.Response: + nonlocal captured_request + captured_request = request + return httpx.Response(200, json={"status": "ok"}) + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE" + mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + mock_frozen_creds.token = None + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + auth = BedrockSigV4Auth(region="us-east-1", service="bedrock") + transport = httpx.MockTransport(capture_request) + + with httpx.Client(auth=auth, transport=transport) as client: + client.post( + "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions", + json={"model": "test"}, + ) + + assert captured_request is not None + auth_header = captured_request.headers.get("authorization", "") + + # Authorization header should be SigV4, not Bearer + assert "AWS4-HMAC-SHA256" in auth_header + assert "Bearer" not in auth_header + + def test_sigv4_includes_security_token_for_sts(self): + """SigV4 auth should include x-amz-security-token for STS credentials.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + captured_request = None + + def capture_request(request: httpx.Request) -> httpx.Response: + nonlocal captured_request + captured_request = request + return httpx.Response(200, json={"status": "ok"}) + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "ASIAIOSFODNN7EXAMPLE" + mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + mock_frozen_creds.token = "FwoGZXIvYXdzEBYaDGTestSessionToken" + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + auth = BedrockSigV4Auth(region="us-west-2", service="bedrock") + transport = httpx.MockTransport(capture_request) + + with httpx.Client(auth=auth, transport=transport) as client: + client.post( + "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions", + json={"model": "test"}, + ) + + assert captured_request is not None + + # Verify security token header is present for STS credentials + assert "x-amz-security-token" in captured_request.headers + assert captured_request.headers["x-amz-security-token"] == mock_frozen_creds.token + + def test_sigv4_replaces_existing_bearer_header(self): + """SigV4 auth should replace any existing Bearer Authorization header.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + captured_request = None + + def capture_request(request: httpx.Request) -> httpx.Response: + nonlocal captured_request + captured_request = request + return httpx.Response(200, json={"status": "ok"}) + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE" + mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + mock_frozen_creds.token = None + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + auth = BedrockSigV4Auth(region="us-east-1", service="bedrock") + transport = httpx.MockTransport(capture_request) + + with httpx.Client(auth=auth, transport=transport) as client: + # Simulate what OpenAI SDK does: add Bearer header + client.post( + "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions", + json={"model": "test"}, + headers={"Authorization": "Bearer "}, + ) + + assert captured_request is not None + auth_header = captured_request.headers.get("authorization", "") + + # Verify SigV4 replaced the Bearer header (not appended) + assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}" + assert "Bearer" not in auth_header, "SigV4 auth should have replaced Bearer header" + assert "" not in auth_header, "Placeholder should be removed" + + def test_sigv4_host_header_includes_port(self): + """Host header should include port for non-default ports.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + captured_request = None + + def capture_request(request: httpx.Request) -> httpx.Response: + nonlocal captured_request + captured_request = request + return httpx.Response(200, json={"status": "ok"}) + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE" + mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + mock_frozen_creds.token = None + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + auth = BedrockSigV4Auth(region="us-east-1", service="bedrock") + transport = httpx.MockTransport(capture_request) + + # Use non-default port + with httpx.Client(auth=auth, transport=transport) as client: + client.post( + "https://localhost:8443/openai/v1/chat/completions", + json={"model": "test"}, + ) + + assert captured_request is not None + + # Verify the Host header includes the port + host_header = captured_request.headers.get("host", "") + assert host_header == "localhost:8443", f"Expected host with port, got: {host_header}" + + # The signed Authorization header should include host in SignedHeaders + auth_header = captured_request.headers.get("authorization", "") + assert "host" in auth_header.lower() + + # Verify SigV4 signature format and no Bearer token + assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}" + assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer token" + + +@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed") +class TestWebIdentityFederation: + """ + Tests for Web Identity Federation (IRSA, GitHub Actions OIDC). + + These tests verify that SigV4 auth works correctly with temporary credentials + obtained via AssumeRoleWithWebIdentity, as used in: + - Kubernetes/OpenShift with IRSA (IAM Roles for Service Accounts) + - GitHub Actions with OIDC (aws-actions/configure-aws-credentials) + """ + + def test_web_identity_credentials_include_session_token(self): + """ + Web identity credentials should include x-amz-security-token header. + + When using IRSA or GitHub Actions OIDC, boto3 calls AssumeRoleWithWebIdentity + which returns temporary credentials with a session token. This token must + be included in the x-amz-security-token header for the request to succeed. + """ + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + captured_request = None + + def capture_request(request: httpx.Request) -> httpx.Response: + nonlocal captured_request + captured_request = request + return httpx.Response(200, json={"status": "ok"}) + + # Simulate credentials from AssumeRoleWithWebIdentity + # Note: ASIA prefix indicates temporary credentials (vs AKIA for static) + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "ASIAQWERTYUIOPASDFGH" + mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYzxcvbnm123" + mock_frozen_creds.token = "IQoJb3JpZ2luX2VjEBYaCXVzLWVhc3QtMSJHMEUCIQDExample..." # STS session token + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + auth = BedrockSigV4Auth(region="us-east-2", service="bedrock") + transport = httpx.MockTransport(capture_request) + + with httpx.Client(auth=auth, transport=transport) as client: + client.post( + "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions", + json={ + "model": "us.meta.llama3-2-1b-instruct-v1:0", + "messages": [{"role": "user", "content": "Hi"}], + }, + ) + + assert captured_request is not None + + # Verify STS session token is included + assert "x-amz-security-token" in captured_request.headers + assert captured_request.headers["x-amz-security-token"] == mock_frozen_creds.token + + # Verify SigV4 signature is present and valid format + auth_header = captured_request.headers.get("authorization", "") + assert auth_header.startswith("AWS4-HMAC-SHA256") + assert "Credential=ASIAQWERTYUIOPASDFGH" in auth_header + assert "bedrock/aws4_request" in auth_header + + # Verify no Bearer token (would conflict with SigV4) + assert "Bearer" not in auth_header + + def test_adapter_uses_sigv4_with_web_identity_env(self, monkeypatch): + """ + BedrockInferenceAdapter should use SigV4 when web identity env vars are set. + + This simulates the Kubernetes/GitHub Actions scenario where no bearer token + is configured but AWS credentials are available via web identity federation. + """ + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter + from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + # Set web identity environment variables + monkeypatch.setenv("AWS_ROLE_ARN", "arn:aws:iam::123456789012:role/test-role") + monkeypatch.setenv("AWS_WEB_IDENTITY_TOKEN_FILE", "/var/run/secrets/token") + monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-2") + + # Create adapter without bearer token (should trigger SigV4) + config = BedrockConfig(region_name="us-east-2") + adapter = BedrockInferenceAdapter(config=config) + + with patch.object(adapter, "get_request_provider_data", return_value=None): + # Should use SigV4 since no bearer token is configured + assert adapter._should_use_sigv4() is True + + # API key should be placeholder to satisfy OpenAIMixin validation (OCI pattern) + # SigV4 auth handler replaces Bearer header with proper SigV4 signature + assert adapter.get_api_key() == "" + + def test_credential_refresh_returns_fresh_credentials(self): + """ + SigV4 auth should get fresh credentials on each request. + + Web identity credentials are temporary and expire. boto3's credential + chain handles refresh automatically, but we need to call get_frozen_credentials() + on each request to get the current valid credentials. + """ + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + call_count = 0 + captured_requests = [] + + def capture_request(request: httpx.Request) -> httpx.Response: + captured_requests.append(request) + return httpx.Response(200, json={"status": "ok"}) + + # Simulate credentials that change (as would happen after refresh) + initial_creds = MagicMock() + initial_creds.access_key = "ASIAFIRSTCREDENTIAL" + initial_creds.secret_key = "firstSecretKey123" + initial_creds.token = "firstSessionToken" + + refreshed_creds = MagicMock() + refreshed_creds.access_key = "ASIASECONDCREDENTIAL" + refreshed_creds.secret_key = "secondSecretKey456" + refreshed_creds.token = "secondSessionToken" + + def get_frozen_credentials(): + nonlocal call_count + call_count += 1 + # Return different credentials on second call (simulating refresh) + return initial_creds if call_count == 1 else refreshed_creds + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_creds = MagicMock() + mock_creds.get_frozen_credentials = get_frozen_credentials + mock_session.get_credentials.return_value = mock_creds + + auth = BedrockSigV4Auth(region="us-east-2", service="bedrock") + transport = httpx.MockTransport(capture_request) + + with httpx.Client(auth=auth, transport=transport) as client: + # First request + client.post( + "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions", + json={"model": "test"}, + ) + # Second request (after simulated credential refresh) + client.post( + "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions", + json={"model": "test"}, + ) + + assert len(captured_requests) == 2 + + # First request should use initial credentials + first_auth = captured_requests[0].headers.get("authorization", "") + assert "ASIAFIRSTCREDENTIAL" in first_auth + assert captured_requests[0].headers.get("x-amz-security-token") == "firstSessionToken" + + # Second request should use refreshed credentials + second_auth = captured_requests[1].headers.get("authorization", "") + assert "ASIASECONDCREDENTIAL" in second_auth + assert captured_requests[1].headers.get("x-amz-security-token") == "secondSessionToken" + + +@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed") +class TestAsyncAuthFlow: + """Tests for async auth flow to verify non-blocking behavior.""" + + async def test_async_auth_flow_signs_request(self): + """Async auth flow should sign requests without blocking the event loop.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + captured_request = None + + async def capture_request(request: httpx.Request) -> httpx.Response: + nonlocal captured_request + captured_request = request + return httpx.Response(200, json={"status": "ok"}) + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE" + mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + mock_frozen_creds.token = None + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + auth = BedrockSigV4Auth(region="us-east-1", service="bedrock") + transport = httpx.MockTransport(capture_request) + + async with httpx.AsyncClient(auth=auth, transport=transport) as client: + await client.post( + "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions", + json={"model": "test"}, + ) + + assert captured_request is not None + auth_header = captured_request.headers.get("authorization", "") + + # Verify SigV4 signature format + assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}" + assert "Credential=" in auth_header + assert "SignedHeaders=" in auth_header + assert "Signature=" in auth_header + + # Verify NO Bearer token is present + assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer token" + + async def test_async_auth_flow_includes_session_token(self): + """Async auth flow should include x-amz-security-token for STS credentials.""" + from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + + captured_request = None + + async def capture_request(request: httpx.Request) -> httpx.Response: + nonlocal captured_request + captured_request = request + return httpx.Response(200, json={"status": "ok"}) + + mock_frozen_creds = MagicMock() + mock_frozen_creds.access_key = "ASIAQWERTYUIOPASDFGH" + mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYzxcvbnm123" + mock_frozen_creds.token = "IQoJb3JpZ2luX2VjAsyncTest..." + + with patch("boto3.Session") as mock_session_class: + mock_session = MagicMock() + mock_session_class.return_value = mock_session + mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds + + auth = BedrockSigV4Auth(region="us-east-2", service="bedrock") + transport = httpx.MockTransport(capture_request) + + async with httpx.AsyncClient(auth=auth, transport=transport) as client: + await client.post( + "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions", + json={"model": "test"}, + ) + + assert captured_request is not None + + # Verify STS session token is included + assert "x-amz-security-token" in captured_request.headers + assert captured_request.headers["x-amz-security-token"] == mock_frozen_creds.token diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py index ed3987e898..3ab886ed0b 100644 --- a/tests/unit/providers/inference/test_bedrock_adapter.py +++ b/tests/unit/providers/inference/test_bedrock_adapter.py @@ -13,7 +13,7 @@ from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig -from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody +from llama_stack_api import InternalServerError, OpenAIChatCompletionRequestWithExtraBody def test_adapter_initialization(): @@ -28,7 +28,7 @@ def test_client_url_construction(): config = BedrockConfig(api_key="test-key", region_name="us-west-2") adapter = BedrockInferenceAdapter(config=config) - assert adapter.get_base_url() == "https://bedrock-mantle.us-west-2.api.aws/v1" + assert adapter.get_base_url() == "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1" def test_api_key_from_config(): @@ -51,7 +51,7 @@ def test_api_key_from_header_overrides_config(): async def test_authentication_error_handling(): - """Test that AuthenticationError from OpenAI client is converted to ValueError with helpful message""" + """Authentication failures should surface as a sanitized InternalServerError.""" config = BedrockConfig(api_key="invalid-key", region_name="us-east-1") adapter = BedrockInferenceAdapter(config=config) @@ -68,14 +68,19 @@ async def test_authentication_error_handling(): BedrockInferenceAdapter.__bases__[0].openai_chat_completion = mock_super try: - with pytest.raises(ValueError) as exc_info: + with pytest.raises(InternalServerError) as exc_info: params = OpenAIChatCompletionRequestWithExtraBody( model="test-model", messages=[{"role": "user", "content": "test"}] ) await adapter.openai_chat_completion(params=params) - assert "AWS Bedrock authentication failed" in str(exc_info.value) - assert "Please verify your API key" in str(exc_info.value) + message = str(exc_info.value) + assert ( + message == "Authentication failed because the provided request credential was rejected. " + "Please verify that the credential is valid, unexpired, and authorized for this request." + ) + assert "Bedrock" not in message + assert "x-llamastack-provider-data" not in message finally: # Restore original method BedrockInferenceAdapter.__bases__[0].openai_chat_completion = original_method diff --git a/tests/unit/providers/inference/test_bedrock_config.py b/tests/unit/providers/inference/test_bedrock_config.py index 6220804261..60e18a45d0 100644 --- a/tests/unit/providers/inference/test_bedrock_config.py +++ b/tests/unit/providers/inference/test_bedrock_config.py @@ -35,5 +35,17 @@ def test_bedrock_config_sample(): sample = BedrockConfig.sample_run_config() assert "api_key" in sample assert "region_name" in sample + assert "aws_role_arn" in sample + assert "aws_web_identity_token_file" in sample assert sample["api_key"] == "${env.AWS_BEARER_TOKEN_BEDROCK:=}" assert sample["region_name"] == "${env.AWS_DEFAULT_REGION:=us-east-2}" + assert sample["aws_role_arn"] == "${env.AWS_ROLE_ARN:=}" + assert sample["aws_web_identity_token_file"] == "${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}" + + +def test_bedrock_config_sts_fields(monkeypatch): + monkeypatch.setenv("AWS_ROLE_ARN", "arn:aws:iam::123:role/test") + monkeypatch.setenv("AWS_WEB_IDENTITY_TOKEN_FILE", "/tmp/token") + config = BedrockConfig() + assert config.aws_role_arn == "arn:aws:iam::123:role/test" + assert config.aws_web_identity_token_file == "/tmp/token" diff --git a/tests/unit/providers/inference/test_bedrock_sts.py b/tests/unit/providers/inference/test_bedrock_sts.py new file mode 100644 index 0000000000..65354765e3 --- /dev/null +++ b/tests/unit/providers/inference/test_bedrock_sts.py @@ -0,0 +1,84 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import importlib.util +from unittest.mock import MagicMock, patch + +import pytest + +from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter +from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig +from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth + +HAS_BOTO3 = importlib.util.find_spec("boto3") is not None + + +def test_sigv4_auth_initialization(): + auth = BedrockSigV4Auth( + region="us-east-1", + aws_role_arn="arn:aws:iam::123:role/test", + aws_web_identity_token_file="/tmp/token", + aws_role_session_name="test-session", + session_ttl=1800, + ) + assert auth._region == "us-east-1" + assert auth._aws_role_arn == "arn:aws:iam::123:role/test" + assert auth._aws_web_identity_token_file == "/tmp/token" + assert auth._aws_role_session_name == "test-session" + assert auth._session_ttl == 1800 + + +@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed") +@patch("llama_stack.providers.utils.bedrock.sigv4_auth.logger") +def test_sigv4_auth_gets_refreshable_session(mock_logger): + with patch( + "llama_stack.providers.utils.bedrock.refreshable_boto_session.RefreshableBotoSession" + ) as mock_refreshable: + mock_session = MagicMock() + mock_refreshable.return_value.refreshable_session.return_value = mock_session + + auth = BedrockSigV4Auth( + region="us-east-1", + aws_role_arn="arn:aws:iam::123:role/test", + aws_web_identity_token_file="/tmp/token", + ) + + auth._get_credentials() + + mock_refreshable.assert_called_once_with( + region_name="us-east-1", + aws_access_key_id=None, + aws_secret_access_key=None, + aws_session_token=None, + profile_name=None, + sts_arn="arn:aws:iam::123:role/test", + web_identity_token_file="/tmp/token", + session_name=None, + session_ttl=3600, + ) + assert auth._session == mock_session + + +def test_adapter_passes_sts_config_to_auth(): + config = BedrockConfig( + region_name="us-west-2", + aws_role_arn="arn:aws:iam::123:role/test", + aws_web_identity_token_file="/tmp/token", + session_ttl=1800, + ) + adapter = BedrockInferenceAdapter(config=config) + + with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth: + mock_auth.return_value = MagicMock() + adapter._build_sigv4_http_client() + + mock_auth.assert_called_once_with( + region="us-west-2", + service="bedrock", + aws_role_arn="arn:aws:iam::123:role/test", + aws_web_identity_token_file="/tmp/token", + session_ttl=1800, + ) diff --git a/tests/unit/providers/safety/test_bedrock_safety_adapter.py b/tests/unit/providers/safety/test_bedrock_safety_adapter.py new file mode 100644 index 0000000000..d761075fab --- /dev/null +++ b/tests/unit/providers/safety/test_bedrock_safety_adapter.py @@ -0,0 +1,35 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from unittest.mock import MagicMock, call, patch + +from llama_stack.providers.remote.safety.bedrock.bedrock import BedrockSafetyAdapter +from llama_stack.providers.remote.safety.bedrock.config import BedrockSafetyConfig + + +async def test_bedrock_safety_initialize_creates_clients(): + config = BedrockSafetyConfig( + region_name="us-west-2", + aws_role_arn="arn:aws:iam::123:role/test", + aws_web_identity_token_file="/tmp/token", + ) + adapter = BedrockSafetyAdapter(config=config) + + runtime_client = MagicMock(name="bedrock-runtime-client") + bedrock_client = MagicMock(name="bedrock-client") + with patch("llama_stack.providers.remote.safety.bedrock.bedrock.create_bedrock_client") as mock_create: + mock_create.side_effect = [runtime_client, bedrock_client] + + await adapter.initialize() + + assert adapter.bedrock_runtime_client is runtime_client + assert adapter.bedrock_client is bedrock_client + mock_create.assert_has_calls( + [ + call(config), + call(config, "bedrock"), + ] + ) diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py deleted file mode 100644 index c6d6e54baa..0000000000 --- a/tests/unit/providers/test_bedrock.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from types import SimpleNamespace -from unittest.mock import AsyncMock, PropertyMock, patch - -from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter -from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig -from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody - - -def test_can_create_adapter(): - config = BedrockConfig(api_key="test-key", region_name="us-east-1") - adapter = BedrockInferenceAdapter(config=config) - - assert adapter is not None - assert adapter.config.region_name == "us-east-1" - assert adapter.get_api_key() == "test-key" - - -def test_different_aws_regions(): - # just check a couple regions to verify URL construction works - config = BedrockConfig(api_key="key", region_name="us-east-1") - adapter = BedrockInferenceAdapter(config=config) - assert adapter.get_base_url() == "https://bedrock-mantle.us-east-1.api.aws/v1" - - config = BedrockConfig(api_key="key", region_name="eu-west-1") - adapter = BedrockInferenceAdapter(config=config) - assert adapter.get_base_url() == "https://bedrock-mantle.eu-west-1.api.aws/v1" - - -async def test_basic_chat_completion(): - """Test basic chat completion works with OpenAIMixin""" - config = BedrockConfig(api_key="k", region_name="us-east-1") - adapter = BedrockInferenceAdapter(config=config) - - class FakeModelStore: - async def has_model(self, model_id): - return True - - async def get_model(self, model_id): - return SimpleNamespace(provider_resource_id="meta.llama3-1-8b-instruct-v1:0") - - adapter.model_store = FakeModelStore() - - fake_response = SimpleNamespace( - id="chatcmpl-123", - choices=[SimpleNamespace(message=SimpleNamespace(content="Hello!", role="assistant"), finish_reason="stop")], - ) - - mock_create = AsyncMock(return_value=fake_response) - - class FakeClient: - def __init__(self): - self.chat = SimpleNamespace(completions=SimpleNamespace(create=mock_create)) - - with patch.object(type(adapter), "client", new_callable=PropertyMock, return_value=FakeClient()): - params = OpenAIChatCompletionRequestWithExtraBody( - model="llama3-1-8b", - messages=[{"role": "user", "content": "hello"}], - stream=False, - ) - response = await adapter.openai_chat_completion(params=params) - - assert response.id == "chatcmpl-123" - assert mock_create.await_count == 1 diff --git a/tests/unit/providers/utils/inference/test_network_config.py b/tests/unit/providers/utils/inference/test_network_config.py index 40678668c7..ea00c8d7aa 100644 --- a/tests/unit/providers/utils/inference/test_network_config.py +++ b/tests/unit/providers/utils/inference/test_network_config.py @@ -12,11 +12,13 @@ import pytest from llama_stack.providers.utils.inference.http_client import ( - _build_network_client_kwargs, _build_proxy_mounts, _build_ssl_context, build_http_client, ) +from llama_stack.providers.utils.inference.http_client import ( + build_network_client_kwargs as _build_network_client_kwargs, +) from llama_stack.providers.utils.inference.model_registry import ( NetworkConfig, ProxyConfig,