diff --git a/docs/docs/api-openai/provider_matrix.md b/docs/docs/api-openai/provider_matrix.md
index 6f3c13b61c..0e7d37cb00 100644
--- a/docs/docs/api-openai/provider_matrix.md
+++ b/docs/docs/api-openai/provider_matrix.md
@@ -33,7 +33,7 @@ Models, endpoints, and versions used during test recordings.
 | Provider | Model(s) | Endpoint | Version Info |
 |----------|----------|----------|--------------|
 | azure | gpt-4o | llama-stack-test.openai.azure.com, lls-test.openai.azure.com | openai sdk: 2.5.0 |
-| bedrock | openai.gpt-oss-20b | bedrock-mantle.us-east-2.api.aws, bedrock-mantle.us-west-2.api.aws | openai sdk: 2.5.0 |
+| bedrock | openai.gpt-oss-20b | bedrock-mantle.us-west-2.api.aws, bedrock-runtime.us-east-2.amazonaws.com | openai sdk: 2.5.0 |
 | ollama | gpt-oss:20b | — | openai sdk: 2.5.0, vllm server: 0.9.2rc2.dev136+g0b382b53a.d20250924 |
 | openai | gpt-4o, o4-mini, text-embedding-3-small | api.openai.com | openai sdk: 2.5.0 |
 | vllm | Qwen/Qwen3-0.6B | — | openai sdk: 2.5.0, vllm server: 0.18.1rc1.dev197+g0e9358c11 |
diff --git a/docs/docs/providers/inference/remote_bedrock.mdx b/docs/docs/providers/inference/remote_bedrock.mdx
index be31b9a983..3095d3c6cc 100644
--- a/docs/docs/providers/inference/remote_bedrock.mdx
+++ b/docs/docs/providers/inference/remote_bedrock.mdx
@@ -34,11 +34,25 @@ AWS Bedrock inference provider using OpenAI compatible endpoint.
 | `network.timeout.connect` | `float \| None` | No |  | Connection timeout in seconds. |
 | `network.timeout.read` | `float \| None` | No |  | Read timeout in seconds. |
 | `network.headers` | `dict[str, str] \| None` | No |  | Additional HTTP headers to include in all requests. |
-| `region_name` | `str` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
+| `aws_access_key_id` | `SecretStr \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
+| `aws_secret_access_key` | `SecretStr \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
+| `aws_session_token` | `SecretStr \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
+| `aws_role_arn` | `str \| None` | No |  | The AWS role ARN to assume. Default use environment variable: AWS_ROLE_ARN |
+| `aws_web_identity_token_file` | `str \| None` | No |  | The path to the web identity token file. Default use environment variable: AWS_WEB_IDENTITY_TOKEN_FILE |
+| `aws_role_session_name` | `str \| None` | No |  | The session name to use when assuming a role. Default use environment variable: AWS_ROLE_SESSION_NAME |
+| `region_name` | `str \| None` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
+| `profile_name` | `str \| None` | No |  | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
+| `total_max_attempts` | `int \| None` | No |  | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
+| `retry_mode` | `str \| None` | No |  | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
+| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
+| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
+| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
 
 ## Sample Configuration
 
 ```yaml
 api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
 region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+aws_role_arn: ${env.AWS_ROLE_ARN:=}
+aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
 ```
diff --git a/docs/docs/providers/safety/remote_bedrock.mdx b/docs/docs/providers/safety/remote_bedrock.mdx
index fc18f726ea..0d3de70b94 100644
--- a/docs/docs/providers/safety/remote_bedrock.mdx
+++ b/docs/docs/providers/safety/remote_bedrock.mdx
@@ -36,6 +36,9 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
 | `aws_access_key_id` | `SecretStr \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
 | `aws_secret_access_key` | `SecretStr \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
 | `aws_session_token` | `SecretStr \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
+| `aws_role_arn` | `str \| None` | No |  | The AWS role ARN to assume. Default use environment variable: AWS_ROLE_ARN |
+| `aws_web_identity_token_file` | `str \| None` | No |  | The path to the web identity token file. Default use environment variable: AWS_WEB_IDENTITY_TOKEN_FILE |
+| `aws_role_session_name` | `str \| None` | No |  | The session name to use when assuming a role. Default use environment variable: AWS_ROLE_SESSION_NAME |
 | `region_name` | `str \| None` | No |  | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
 | `profile_name` | `str \| None` | No |  | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
 | `total_max_attempts` | `int \| None` | No |  | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
diff --git a/src/llama_stack/core/request_headers.py b/src/llama_stack/core/request_headers.py
index e0a61c4ba6..a55e13e016 100644
--- a/src/llama_stack/core/request_headers.py
+++ b/src/llama_stack/core/request_headers.py
@@ -7,7 +7,7 @@
 import contextvars
 import json
 from contextlib import AbstractContextManager
-from typing import Any
+from typing import TYPE_CHECKING, Any, cast
 
 from starlette.types import Scope
 
@@ -16,6 +16,9 @@
 
 from .utils.dynamic import instantiate_class_type
 
+if TYPE_CHECKING:
+    from llama_stack_api import ProviderSpec
+
 log = get_logger(name=__name__, category="core")
 
 # Context variable for request provider data and auth attributes
@@ -26,6 +29,9 @@ class RequestProviderDataContext(AbstractContextManager[None]):
     """Context manager for request provider data"""
 
     def __init__(self, provider_data: dict[str, Any] | None = None, user: User | None = None) -> None:
+        if provider_data is not None and not isinstance(provider_data, dict):
+            log.error("Provider data must be a JSON object")
+            provider_data = None
         self.provider_data = provider_data or {}
         if user:
             self.provider_data["__authenticated_user"] = user
@@ -45,6 +51,8 @@ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
 class NeedsRequestProviderData:
     """Mixin for providers that require per-request provider data from request headers."""
 
+    __provider_spec__: "ProviderSpec"
+
     def get_request_provider_data(self) -> Any:
         spec = self.__provider_spec__  # type: ignore[attr-defined]
         if not spec:
@@ -84,11 +92,20 @@ def parse_request_provider_data(headers: dict[str, str]) -> dict[str, Any] | Non
         return None
 
     try:
-        return json.loads(val)  # type: ignore[no-any-return]
+        parsed = json.loads(val)
     except json.JSONDecodeError:
         log.error("Provider data not encoded as a JSON object!")
         return None
 
+    if parsed is None:
+        return None
+
+    if not isinstance(parsed, dict):
+        log.error("Provider data must be encoded as a JSON object")
+        return None
+
+    return cast(dict[str, Any], parsed)
+
 
 def request_provider_data_context(headers: dict[str, str], user: User | None = None) -> AbstractContextManager[None]:
     """Context manager that sets request provider data from headers and user for the duration of the context"""
diff --git a/src/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py
index e100ad161d..302b1c07ee 100644
--- a/src/llama_stack/distributions/ci-tests/ci_tests.py
+++ b/src/llama_stack/distributions/ci-tests/ci_tests.py
@@ -52,9 +52,11 @@ def get_distribution_template() -> DistributionTemplate:
 
     # Bedrock model must be pre-registered because the recording system cannot
     # replay model-list discovery calls against the Bedrock endpoint in CI.
+    # Gate on AWS_DEFAULT_REGION (required for both bearer-token and SigV4 modes)
+    # rather than AWS_BEARER_TOKEN_BEDROCK so the model registers in OIDC/IRSA CI too.
     bedrock_model = ModelInput(
         model_id="bedrock/openai.gpt-oss-20b",
-        provider_id="${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock}",
+        provider_id="${env.AWS_DEFAULT_REGION:+bedrock}",
         provider_model_id="openai.gpt-oss-20b",
         model_type=ModelType.llm,
     )
diff --git a/src/llama_stack/distributions/ci-tests/config.yaml b/src/llama_stack/distributions/ci-tests/config.yaml
index 7bcbb6eee9..a51945e36f 100644
--- a/src/llama_stack/distributions/ci-tests/config.yaml
+++ b/src/llama_stack/distributions/ci-tests/config.yaml
@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
@@ -301,7 +303,7 @@ registered_resources:
     model_type: llm
   - metadata: {}
     model_id: bedrock/openai.gpt-oss-20b
-    provider_id: ${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock}
+    provider_id: ${env.AWS_DEFAULT_REGION:+bedrock}
     provider_model_id: openai.gpt-oss-20b
     model_type: llm
   shields:
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index 553ed41118..9583bbcd20 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
@@ -314,7 +316,7 @@ registered_resources:
     model_type: llm
   - metadata: {}
     model_id: bedrock/openai.gpt-oss-20b
-    provider_id: ${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock}
+    provider_id: ${env.AWS_DEFAULT_REGION:+bedrock}
     provider_model_id: openai.gpt-oss-20b
     model_type: llm
   shields:
diff --git a/src/llama_stack/distributions/starter/config.yaml b/src/llama_stack/distributions/starter/config.yaml
index fb9cf9ae84..bbe35b85a3 100644
--- a/src/llama_stack/distributions/starter/config.yaml
+++ b/src/llama_stack/distributions/starter/config.yaml
@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index 52225576f5..86e00cf5ba 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py
index 6bc2976d4d..12bf1a940b 100644
--- a/src/llama_stack/providers/registry/inference.py
+++ b/src/llama_stack/providers/registry/inference.py
@@ -121,7 +121,7 @@ def available_providers() -> list[ProviderSpec]:
             api=Api.inference,
             adapter_type="bedrock",
             provider_type="remote::bedrock",
-            pip_packages=[],
+            pip_packages=["boto3"],
             module="llama_stack.providers.remote.inference.bedrock",
             config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
             provider_data_validator="llama_stack.providers.remote.inference.bedrock.config.BedrockProviderDataValidator",
diff --git a/src/llama_stack/providers/remote/inference/bedrock/__init__.py b/src/llama_stack/providers/remote/inference/bedrock/__init__.py
index 4b0686b187..12726e32d0 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/__init__.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/__init__.py
@@ -9,7 +9,8 @@
 async def get_adapter_impl(config: BedrockConfig, _deps):
     from .bedrock import BedrockInferenceAdapter
 
-    assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}"
+    if not isinstance(config, BedrockConfig):
+        raise TypeError(f"Unexpected config type: {type(config)}")
 
     impl = BedrockInferenceAdapter(config=config)
 
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 02800d21ed..bd5ba32630 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -4,16 +4,29 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from collections.abc import AsyncIterator
+import asyncio
+from collections.abc import AsyncIterator, Iterable
+from typing import TYPE_CHECKING, Any, NoReturn
 
-from openai import AuthenticationError
+if TYPE_CHECKING:
+    from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+import httpx
+from openai import AuthenticationError, PermissionDeniedError
+from pydantic import PrivateAttr
 
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.responses.builtin.responses.types import (
     AssistantMessageWithReasoning,
 )
+from llama_stack.providers.utils.inference.http_client import (
+    build_network_client_kwargs,
+    network_config_fingerprint,
+    set_client_network_fingerprint,
+)
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
+    InternalServerError,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionChunkWithReasoning,
@@ -25,8 +38,6 @@
     OpenAIEmbeddingsResponse,
 )
 
-from .config import BedrockConfig
-
 logger = get_logger(name=__name__, category="inference::bedrock")
 
 
@@ -35,14 +46,139 @@ class BedrockInferenceAdapter(OpenAIMixin):
     Adapter for AWS Bedrock's OpenAI-compatible API endpoints.
 
     Supports Llama models across regions and GPT-OSS models (us-west-2 only).
+
+    Authentication modes:
+    1. Bearer token (legacy): Set AWS_BEARER_TOKEN_BEDROCK or api_key in config
+    2. AWS credential chain (enterprise): Leave api_key unset, configure AWS creds
+       - Web Identity Federation (IRSA, GitHub Actions OIDC)
+       - IAM roles (EC2, ECS, Lambda)
+       - AWS profiles
+       - Static credentials
+
+    When using AWS credential chain, requests are signed using SigV4 with the
+    "bedrock" signing name (note: the endpoint hostname uses "bedrock-runtime",
+    but SigV4 credential scope uses the signing name "bedrock").
+
+    Web Identity Federation Examples:
+
+    Kubernetes/OpenShift (IRSA):
+        Set these environment variables in your pod spec:
+        - AWS_ROLE_ARN=arn:aws:iam::123456789012:role/llama-stack-role
+        - AWS_WEB_IDENTITY_TOKEN_FILE=<path-to-serviceaccount-token>
+          Common paths:
+          - EKS: /var/run/secrets/eks.amazonaws.com/serviceaccount/token
+          - Generic K8s: /var/run/secrets/kubernetes.io/serviceaccount/token
+        - AWS_DEFAULT_REGION=us-east-2
+
+    GitHub Actions:
+        Use aws-actions/configure-aws-credentials with OIDC:
+
+        permissions:
+          id-token: write  # Required for OIDC
+
+        steps:
+          - uses: aws-actions/configure-aws-credentials@v4
+            with:
+              role-to-assume: arn:aws:iam::123456789012:role/github-actions-role
+              aws-region: us-east-2
+
+    Credentials are automatically refreshed by boto3 when they expire.
+
+    Note: Bedrock's OpenAI-compatible endpoint does not support /v1/models
+    for dynamic model discovery. Models must be pre-registered in the config.
     """
 
-    config: BedrockConfig
-    provider_data_api_key_field: str = "aws_bearer_token_bedrock"
+    provider_data_api_key_field: str | None = "aws_bearer_token_bedrock"
+
+    # built once in initialize() so get_extra_client_params() can stay sync;
+    # reusing one client also avoids opening a new socket per request
+    _sigv4_http_client: httpx.AsyncClient | None = PrivateAttr(default=None)
+
+    @property
+    def _bedrock_config(self) -> "BedrockConfig":
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        if not isinstance(self.config, BedrockConfig):
+            raise TypeError(f"Expected BedrockConfig, got {type(self.config)}")
+        return self.config
 
     def get_base_url(self) -> str:
-        """Get base URL for OpenAI client."""
-        return f"https://bedrock-mantle.{self.config.region_name}.api.aws/v1"
+        region = self._bedrock_config.region_name or "us-east-2"
+        return f"https://bedrock-runtime.{region}.amazonaws.com/openai/v1"
+
+    def _should_use_sigv4(self) -> bool:
+        # checked per-request so a bearer token in provider data can override SigV4 at runtime
+        if self._bedrock_config.has_bearer_token():
+            return False
+
+        provider_data = self.get_request_provider_data()
+        if provider_data and provider_data.aws_bearer_token_bedrock is not None:
+            val = provider_data.aws_bearer_token_bedrock.get_secret_value()
+            if val and val.strip():
+                return False
+
+        return True
+
+    def _build_sigv4_http_client(self) -> httpx.AsyncClient:
+        # lazy import so bearer-token installs don't need boto3/botocore
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        cfg = self._bedrock_config
+        sigv4_args: dict[str, Any] = {
+            "region": cfg.region_name or "us-east-2",
+            "service": "bedrock",  # botocore signing name, not the endpoint prefix "bedrock-runtime"
+            "aws_access_key_id": cfg.aws_access_key_id.get_secret_value() if cfg.aws_access_key_id else None,
+            "aws_secret_access_key": cfg.aws_secret_access_key.get_secret_value()
+            if cfg.aws_secret_access_key
+            else None,
+            "aws_session_token": cfg.aws_session_token.get_secret_value() if cfg.aws_session_token else None,
+            "profile_name": cfg.profile_name,
+            "aws_role_arn": cfg.aws_role_arn,
+            "aws_web_identity_token_file": cfg.aws_web_identity_token_file,
+            "aws_role_session_name": cfg.aws_role_session_name,
+            "session_ttl": cfg.session_ttl,
+        }
+        auth = BedrockSigV4Auth(**{k: v for k, v in sigv4_args.items() if v is not None})
+        network_config = cfg.network
+        network_kwargs = build_network_client_kwargs(network_config)
+        client = httpx.AsyncClient(auth=auth, **network_kwargs)
+        if network_config is not None:
+            set_client_network_fingerprint(client, network_config_fingerprint(network_config))
+        return client
+
+    async def initialize(self) -> None:
+        await super().initialize()
+        # no request context at init time, so only the static config is available;
+        # per-request bearer token overrides are handled in get_extra_client_params()
+        if not self._bedrock_config.has_bearer_token():
+            self._sigv4_http_client = self._build_sigv4_http_client()
+
+    def get_api_key(self) -> str | None:
+        if self._should_use_sigv4():
+            # openai sdk requires a non-empty api_key; sigv4_auth will overwrite
+            # the resulting "Bearer <NOTUSED>" header with the real SigV4 signature
+            return "<NOTUSED>"
+        return super().get_api_key()
+
+    def get_extra_client_params(self) -> dict[str, Any]:
+        # re-check per request so a runtime bearer token in provider data can bypass sigv4
+        if self._sigv4_http_client is not None and self._should_use_sigv4():
+            return {"http_client": self._sigv4_http_client}
+        return {}
+
+    async def list_provider_model_ids(self) -> Iterable[str]:
+        # bedrock's openai-compatible endpoint doesn't expose /v1/models
+        return []
+
+    async def check_model_availability(self, model: str) -> bool:
+        # no /v1/models to query — accept whatever is registered in config
+        return True
+
+    async def shutdown(self) -> None:
+        if self._sigv4_http_client is not None:
+            # shield so stack.py's 5s asyncio.wait_for doesn't abort mid-close and leak a socket
+            await asyncio.shield(self._sigv4_http_client.aclose())
+            self._sigv4_http_client = None
 
     async def openai_embeddings(
         self,
@@ -120,9 +256,10 @@ async def openai_chat_completion(
         self,
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        """Override to handle authentication errors and null responses."""
+        use_sigv4 = self._should_use_sigv4()
+
         try:
-            logger.debug("Calling Bedrock OpenAI API", model=params.model, stream=params.stream)
+            logger.debug("Calling Bedrock OpenAI API", model=params.model, stream=params.stream, sigv4=use_sigv4)
             result = await super().openai_chat_completion(params=params)
             logger.debug("Bedrock API returned", result_type=type(result).__name__ if result is not None else "None")
 
@@ -134,27 +271,44 @@ async def openai_chat_completion(
                 )
 
             return result
-        except AuthenticationError as e:
+        except (AuthenticationError, PermissionDeniedError) as e:
+            # PermissionDeniedError (403) covers SigV4 failures like SignatureDoesNotMatch
+            # and AccessDenied — same sanitized path as AuthenticationError (401)
             error_msg = str(e)
-
-            # Check if this is a token expiration error
-            if "expired" in error_msg.lower() or "Bearer Token has expired" in error_msg:
-                logger.error("AWS Bedrock authentication token expired", error=error_msg)
-                raise ValueError(
-                    "AWS Bedrock authentication failed: Bearer token has expired. "
-                    "The AWS_BEARER_TOKEN_BEDROCK environment variable contains an expired pre-signed URL. "
-                    "Please refresh your token by generating a new pre-signed URL with AWS credentials. "
-                    "Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints."
-                ) from e
-            else:
-                logger.error("AWS Bedrock authentication failed", error=error_msg)
-                raise ValueError(
-                    f"AWS Bedrock authentication failed: {error_msg}. "
-                    "Please verify your API key is correct in the provider config or x-llamastack-provider-data header. "
-                    "The API key should be a valid AWS pre-signed URL for Bedrock's OpenAI-compatible endpoint."
+            self._handle_auth_error(error_msg, e, use_sigv4=use_sigv4)
+        except (RuntimeError, OSError) as e:
+            # credential resolution failures (missing AWS creds, unreadable web identity
+            # token file, STS errors) should surface as sanitized auth errors, not raw
+            # exception messages that may leak internal paths or AWS account details
+            if use_sigv4:
+                logger.error("AWS Bedrock SigV4 credential resolution failed", error_type=type(e).__name__)
+                raise InternalServerError(
+                    "Authentication failed because the server could not resolve AWS credentials. "
+                    "Please verify that the server has valid AWS credentials configured."
                 ) from e
+            raise
         except Exception as e:
             logger.error(
                 "Unexpected error calling Bedrock API", error_type=type(e).__name__, error=str(e), exc_info=True
             )
             raise
+
+    def _handle_auth_error(self, error_msg: str, original_error: Exception, *, use_sigv4: bool) -> NoReturn:
+        if use_sigv4:
+            logger.error("AWS Bedrock SigV4 authentication failed")
+            raise InternalServerError(
+                "Authentication failed because the configured cloud credentials could not authorize this request. "
+                "Please verify that the credentials available to the server are valid, unexpired, and allowed to access the requested model."
+            ) from original_error
+
+        if "expired" in error_msg.lower() or "Bearer Token has expired" in error_msg:
+            logger.error("AWS Bedrock authentication token expired")
+            raise InternalServerError(
+                "Authentication failed because the provided request credential has expired. "
+                "Please refresh the credential and try again, or remove it so the server can use its configured cloud credentials."
+            ) from original_error
+        logger.error("AWS Bedrock authentication failed")
+        raise InternalServerError(
+            "Authentication failed because the provided request credential was rejected. "
+            "Please verify that the credential is valid, unexpired, and authorized for this request."
+        ) from original_error
diff --git a/src/llama_stack/providers/remote/inference/bedrock/config.py b/src/llama_stack/providers/remote/inference/bedrock/config.py
index 667b5a6e56..810e9e5a2c 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/config.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/config.py
@@ -8,7 +8,7 @@
 
 from pydantic import BaseModel, Field, SecretStr
 
-from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
 
 
 class BedrockProviderDataValidator(BaseModel):
@@ -20,17 +20,32 @@ class BedrockProviderDataValidator(BaseModel):
     )
 
 
-class BedrockConfig(RemoteInferenceProviderConfig):
+class BedrockConfig(BedrockBaseConfig):
     """Configuration for the AWS Bedrock inference provider."""
 
-    region_name: str = Field(
+    auth_credential: SecretStr | None = Field(
+        default=None,
+        description="Authentication credential for the provider",
+        alias="api_key",
+    )
+    # Override region_name to default to us-east-2 when unset
+    region_name: str | None = Field(
         default_factory=lambda: os.getenv("AWS_DEFAULT_REGION", "us-east-2"),
         description="AWS Region for the Bedrock Runtime endpoint",
     )
 
+    def has_bearer_token(self) -> bool:
+        """Check if a bearer token is configured."""
+        if self.auth_credential is None:
+            return False
+        token = self.auth_credential.get_secret_value()
+        return bool(token and token.strip())
+
     @classmethod
     def sample_run_config(cls, **kwargs):
         return {
             "api_key": "${env.AWS_BEARER_TOKEN_BEDROCK:=}",
             "region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}",
+            "aws_role_arn": "${env.AWS_ROLE_ARN:=}",
+            "aws_web_identity_token_file": "${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}",
         }
diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
index 045dfb83b3..3f6899d073 100644
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -13,7 +13,9 @@
 from llama_stack.providers.inline.responses.builtin.responses.types import (
     AssistantMessageWithReasoning,
 )
-from llama_stack.providers.utils.inference.http_client import _build_network_client_kwargs
+from llama_stack.providers.utils.inference.http_client import (
+    build_network_client_kwargs as _build_network_client_kwargs,
+)
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     HealthResponse,
diff --git a/src/llama_stack/providers/utils/bedrock/client.py b/src/llama_stack/providers/utils/bedrock/client.py
index 90b1b247b5..7f2a2d9abc 100644
--- a/src/llama_stack/providers/utils/bedrock/client.py
+++ b/src/llama_stack/providers/utils/bedrock/client.py
@@ -9,7 +9,7 @@
 from botocore.client import BaseClient
 from botocore.config import Config
 
-from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
+from llama_stack.providers.utils.bedrock.config import DEFAULT_SESSION_TTL, BedrockBaseConfig
 from llama_stack.providers.utils.bedrock.refreshable_boto_session import (
     RefreshableBotoSession,
 )
@@ -25,36 +25,50 @@ def create_bedrock_client(config: BedrockBaseConfig, service_name: str = "bedroc
     Returns:
         A configured boto3 client
     """
-    if config.aws_access_key_id and config.aws_secret_access_key:
-        retries_config = {
-            k: v
-            for k, v in dict(
-                total_max_attempts=config.total_max_attempts,
-                mode=config.retry_mode,
-            ).items()
-            if v is not None
-        }
-
-        config_args = {
-            k: v
-            for k, v in dict(
-                region_name=config.region_name,
-                retries=retries_config if retries_config else None,
-                connect_timeout=config.connect_timeout,
-                read_timeout=config.read_timeout,
-            ).items()
-            if v is not None
-        }
-
-        boto3_config = Config(**config_args)
+    retries_config = {
+        k: v
+        for k, v in dict(
+            total_max_attempts=config.total_max_attempts,
+            mode=config.retry_mode,
+        ).items()
+        if v is not None
+    }
+    boto3_config_args = {
+        k: v
+        for k, v in dict(
+            region_name=config.region_name,
+            retries=retries_config if retries_config else None,
+            connect_timeout=config.connect_timeout,
+            read_timeout=config.read_timeout,
+        ).items()
+        if v is not None
+    }
+    boto3_config = Config(**boto3_config_args) if boto3_config_args else None
 
+    if config.aws_role_arn:
+        # role assumption takes priority — source credentials (if any) are passed in
+        # so the refreshable session can use them as the base for assume-role calls
+        client = RefreshableBotoSession(
+            region_name=config.region_name,
+            aws_access_key_id=config.aws_access_key_id.get_secret_value() if config.aws_access_key_id else None,
+            aws_secret_access_key=config.aws_secret_access_key.get_secret_value()
+            if config.aws_secret_access_key
+            else None,
+            aws_session_token=config.aws_session_token.get_secret_value() if config.aws_session_token else None,
+            profile_name=config.profile_name,
+            sts_arn=config.aws_role_arn,
+            web_identity_token_file=config.aws_web_identity_token_file,
+            session_name=config.aws_role_session_name,
+            session_ttl=config.session_ttl or DEFAULT_SESSION_TTL,
+        ).refreshable_session()
+        return client.client(service_name, config=boto3_config) if boto3_config else client.client(service_name)
+    elif config.aws_access_key_id and config.aws_secret_access_key:
         session_args = {
             "aws_access_key_id": config.aws_access_key_id.get_secret_value(),
             "aws_secret_access_key": config.aws_secret_access_key.get_secret_value(),
             "aws_session_token": config.aws_session_token.get_secret_value() if config.aws_session_token else None,
             "region_name": config.region_name,
             "profile_name": config.profile_name,
-            "session_ttl": config.session_ttl,
         }
 
         # Remove None values
@@ -63,12 +77,9 @@ def create_bedrock_client(config: BedrockBaseConfig, service_name: str = "bedroc
         boto3_session = boto3.session.Session(**session_args)
         return boto3_session.client(service_name, config=boto3_config)
     else:
-        return (
-            RefreshableBotoSession(
-                region_name=config.region_name,
-                profile_name=config.profile_name,
-                session_ttl=config.session_ttl,
-            )
-            .refreshable_session()
-            .client(service_name)
-        )
+        session = RefreshableBotoSession(
+            region_name=config.region_name,
+            profile_name=config.profile_name,
+            session_ttl=config.session_ttl or DEFAULT_SESSION_TTL,
+        ).refreshable_session()
+        return session.client(service_name, config=boto3_config) if boto3_config else session.client(service_name)
diff --git a/src/llama_stack/providers/utils/bedrock/config.py b/src/llama_stack/providers/utils/bedrock/config.py
index b4bb0afa28..48385a3faf 100644
--- a/src/llama_stack/providers/utils/bedrock/config.py
+++ b/src/llama_stack/providers/utils/bedrock/config.py
@@ -8,6 +8,9 @@
 
 from pydantic import Field, SecretStr
 
+# 1 hour — matches AWS's default role expiration and minimum recommended TTL
+DEFAULT_SESSION_TTL = 3600
+
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 
 
@@ -27,6 +30,18 @@ class BedrockBaseConfig(RemoteInferenceProviderConfig):
         default_factory=lambda: SecretStr(val) if (val := os.getenv("AWS_SESSION_TOKEN")) else None,
         description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
     )
+    aws_role_arn: str | None = Field(
+        default_factory=lambda: os.getenv("AWS_ROLE_ARN"),
+        description="The AWS role ARN to assume. Default use environment variable: AWS_ROLE_ARN",
+    )
+    aws_web_identity_token_file: str | None = Field(
+        default_factory=lambda: os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE"),
+        description="The path to the web identity token file. Default use environment variable: AWS_WEB_IDENTITY_TOKEN_FILE",
+    )
+    aws_role_session_name: str | None = Field(
+        default_factory=lambda: os.getenv("AWS_ROLE_SESSION_NAME"),
+        description="The session name to use when assuming a role. Default use environment variable: AWS_ROLE_SESSION_NAME",
+    )
     region_name: str | None = Field(
         default_factory=lambda: os.getenv("AWS_DEFAULT_REGION"),
         description="The default AWS Region to use, for example, us-west-1 or us-west-2."
@@ -57,7 +72,7 @@ class BedrockBaseConfig(RemoteInferenceProviderConfig):
         "The default is 60 seconds.",
     )
     session_ttl: int | None = Field(
-        default_factory=lambda: int(os.getenv("AWS_SESSION_TTL", "3600")),
+        default_factory=lambda: int(os.getenv("AWS_SESSION_TTL", str(DEFAULT_SESSION_TTL))),
         description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
     )
 
diff --git a/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py b/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
index 8dab40424d..575d8d031b 100644
--- a/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
+++ b/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
@@ -12,68 +12,69 @@
 from botocore.credentials import RefreshableCredentials
 from botocore.session import get_session
 
+from llama_stack.providers.utils.bedrock.config import DEFAULT_SESSION_TTL
+
 
 class RefreshableBotoSession:
     """
-    Boto Helper class which lets us create a refreshable session so that we can cache the client or resource.
-
-    Usage
-    -----
-    session = RefreshableBotoSession().refreshable_session()
+    Wraps a boto3 session so credentials refresh automatically before they expire.
 
-    client = session.client("s3") # we now can cache this client object without worrying about expiring credentials
+    Use this when you need a long-lived boto3 client (e.g. a cached bedrock-runtime
+    client) without worrying about STS credentials timing out mid-request.
     """
 
     def __init__(
         self,
-        region_name: str = None,
-        profile_name: str = None,
-        sts_arn: str = None,
-        session_name: str = None,
-        session_ttl: int = 30000,
+        region_name: str | None = None,
+        aws_access_key_id: str | None = None,
+        aws_secret_access_key: str | None = None,
+        aws_session_token: str | None = None,
+        profile_name: str | None = None,
+        sts_arn: str | None = None,
+        web_identity_token_file: str | None = None,
+        session_name: str | None = None,
+        session_ttl: int = DEFAULT_SESSION_TTL,
     ):
-        """
-        Initialize `RefreshableBotoSession`
-
-        Parameters
-        ----------
-        region_name : str (optional)
-            Default region when creating a new connection.
-
-        profile_name : str (optional)
-            The name of a profile to use.
-
-        sts_arn : str (optional)
-            The role arn to sts before creating a session.
-
-        session_name : str (optional)
-            An identifier for the assumed role session. (required when `sts_arn` is given)
-
-        session_ttl : int (optional)
-            An integer number to set the TTL for each session. Beyond this session, it will renew the token.
-            50 minutes by default which is before the default role expiration of 1 hour
-        """
-
         self.region_name = region_name
+        self.aws_access_key_id = aws_access_key_id
+        self.aws_secret_access_key = aws_secret_access_key
+        self.aws_session_token = aws_session_token
         self.profile_name = profile_name
         self.sts_arn = sts_arn
+        self.web_identity_token_file = web_identity_token_file
         self.session_name = session_name or uuid4().hex
         self.session_ttl = session_ttl
 
     def __get_session_credentials(self):
-        """
-        Get session credentials
-        """
-        session = Session(region_name=self.region_name, profile_name=self.profile_name)
+        session_args = {
+            "region_name": self.region_name,
+            "profile_name": self.profile_name,
+            "aws_access_key_id": self.aws_access_key_id,
+            "aws_secret_access_key": self.aws_secret_access_key,
+            "aws_session_token": self.aws_session_token,
+        }
+        session_args = {k: v for k, v in session_args.items() if v is not None}
+        session = Session(**session_args)
 
-        # if sts_arn is given, get credential by assuming the given role
         if self.sts_arn:
             sts_client = session.client(service_name="sts", region_name=self.region_name)
-            response = sts_client.assume_role(
-                RoleArn=self.sts_arn,
-                RoleSessionName=self.session_name,
-                DurationSeconds=self.session_ttl,
-            ).get("Credentials")
+
+            if self.web_identity_token_file:
+                with open(self.web_identity_token_file) as f:
+                    web_identity_token = f.read().strip()
+
+                response = sts_client.assume_role_with_web_identity(
+                    RoleArn=self.sts_arn,
+                    RoleSessionName=self.session_name,
+                    WebIdentityToken=web_identity_token,
+                    DurationSeconds=self.session_ttl,
+                ).get("Credentials")
+            else:
+                response = sts_client.assume_role(
+                    RoleArn=self.sts_arn,
+                    RoleSessionName=self.session_name,
+                    DurationSeconds=self.session_ttl,
+                ).get("Credentials")
 
             credentials = {
                 "access_key": response.get("AccessKeyId"),
@@ -93,17 +94,12 @@ def __get_session_credentials(self):
         return credentials
 
     def refreshable_session(self) -> Session:
-        """
-        Get refreshable boto3 session.
-        """
-        # Get refreshable credentials
         refreshable_credentials = RefreshableCredentials.create_from_metadata(
             metadata=self.__get_session_credentials(),
             refresh_using=self.__get_session_credentials,
             method="sts-assume-role",
         )
 
-        # attach refreshable credentials current session
         session = get_session()
         session._credentials = refreshable_credentials
         session.set_config_variable("region", self.region_name)
diff --git a/src/llama_stack/providers/utils/bedrock/sigv4_auth.py b/src/llama_stack/providers/utils/bedrock/sigv4_auth.py
new file mode 100644
index 0000000000..11a0d2a605
--- /dev/null
+++ b/src/llama_stack/providers/utils/bedrock/sigv4_auth.py
@@ -0,0 +1,189 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+SigV4 authentication for AWS Bedrock OpenAI-compatible endpoint.
+
+This module provides httpx.Auth implementation that signs requests using
+AWS Signature Version 4, enabling IAM/STS authentication with the Bedrock
+OpenAI-compatible API endpoint.
+
+Supported credential sources (via boto3 credential chain):
+- Static credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
+- Web Identity Federation (AWS_ROLE_ARN, AWS_WEB_IDENTITY_TOKEN_FILE)
+- IAM roles (IMDS for EC2, ECS task roles, Lambda execution roles)
+- AWS profiles (~/.aws/credentials)
+
+Web Identity Federation enables keyless authentication in:
+- Kubernetes/OpenShift with IRSA (IAM Roles for Service Accounts)
+- GitHub Actions with OIDC (aws-actions/configure-aws-credentials)
+- Any OIDC-compatible identity provider
+
+Environment variables for Web Identity:
+    AWS_ROLE_ARN: ARN of the IAM role to assume
+    AWS_WEB_IDENTITY_TOKEN_FILE: Path to the OIDC token file
+        Common paths:
+        - EKS: /var/run/secrets/eks.amazonaws.com/serviceaccount/token
+        - Generic Kubernetes: /var/run/secrets/kubernetes.io/serviceaccount/token
+        - GitHub Actions: Set automatically by aws-actions/configure-aws-credentials
+    AWS_DEFAULT_REGION: AWS region for the Bedrock endpoint
+
+Credentials are automatically refreshed by boto3 when they expire.
+
+References:
+- https://docs.aws.amazon.com/bedrock/latest/userguide/inference-chat-completions.html
+- https://github.com/meta-llama/llama-stack/issues/4730
+- https://github.com/opendatahub-io/llama-stack-distribution/issues/112
+"""
+
+from __future__ import annotations
+
+import asyncio
+import threading
+from collections.abc import AsyncGenerator, Generator
+from typing import Any
+
+import httpx
+from botocore.auth import SigV4Auth
+from botocore.awsrequest import AWSRequest
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.bedrock.config import DEFAULT_SESSION_TTL
+
+logger = get_logger(name=__name__, category="providers")
+
+
+class BedrockSigV4Auth(httpx.Auth):
+    """
+    httpx.Auth that signs requests with AWS SigV4.
+
+    Only signs headers that httpx won't touch after signing, to avoid
+    signature mismatches. Credential refresh is handled automatically
+    by boto3 for temporary credentials (STS, IRSA).
+    """
+
+    def __init__(
+        self,
+        region: str,
+        service: str = "bedrock",
+        aws_access_key_id: str | None = None,
+        aws_secret_access_key: str | None = None,
+        aws_session_token: str | None = None,
+        profile_name: str | None = None,
+        aws_role_arn: str | None = None,
+        aws_web_identity_token_file: str | None = None,
+        aws_role_session_name: str | None = None,
+        session_ttl: int | None = DEFAULT_SESSION_TTL,
+    ):
+        # service must be "bedrock" (the botocore signing name), not "bedrock-runtime"
+        # (the endpoint prefix) — using the wrong one causes SignatureDoesNotMatch
+        self._region = region
+        self._service = service
+        self._aws_access_key_id = aws_access_key_id
+        self._aws_secret_access_key = aws_secret_access_key
+        self._aws_session_token = aws_session_token
+        self._profile_name = profile_name
+        self._aws_role_arn = aws_role_arn
+        self._aws_web_identity_token_file = aws_web_identity_token_file
+        self._aws_role_session_name = aws_role_session_name
+        self._session_ttl = session_ttl or DEFAULT_SESSION_TTL
+        self._lock = threading.Lock()
+        self._session: Any = None  # boto3.Session | None — Any because boto3 is an optional dep
+
+    def _get_credentials(self) -> Any:
+        from llama_stack.providers.utils.bedrock.refreshable_boto_session import (
+            RefreshableBotoSession,
+        )
+
+        with self._lock:
+            if self._session is None:
+                if self._aws_role_arn:
+                    self._session = RefreshableBotoSession(
+                        region_name=self._region,
+                        aws_access_key_id=self._aws_access_key_id,
+                        aws_secret_access_key=self._aws_secret_access_key,
+                        aws_session_token=self._aws_session_token,
+                        profile_name=self._profile_name,
+                        sts_arn=self._aws_role_arn,
+                        web_identity_token_file=self._aws_web_identity_token_file,
+                        session_name=self._aws_role_session_name,
+                        session_ttl=self._session_ttl,
+                    ).refreshable_session()
+                else:
+                    import boto3
+
+                    self._session = boto3.Session(
+                        region_name=self._region,
+                        aws_access_key_id=self._aws_access_key_id,
+                        aws_secret_access_key=self._aws_secret_access_key,
+                        aws_session_token=self._aws_session_token,
+                        profile_name=self._profile_name,
+                    )
+
+            credentials = self._session.get_credentials()
+            if credentials is None:
+                raise RuntimeError(
+                    "Failed to load AWS credentials. Ensure AWS credentials are "
+                    "configured via environment variables (AWS_ACCESS_KEY_ID, "
+                    "AWS_SECRET_ACCESS_KEY), IAM role, or AWS profile."
+                )
+            return credentials.get_frozen_credentials()
+
+    def _sign_request(self, request: httpx.Request) -> None:
+        credentials = self._get_credentials()
+
+        # drop the openai sdk's "Bearer <NOTUSED>" placeholder before signing
+        if "authorization" in request.headers:
+            del request.headers["authorization"]
+
+        # sign only stable headers — anything httpx might rewrite after this point
+        # would invalidate the signature, so we leave those out
+        host = request.headers.get("host") or str(request.url.netloc)
+        headers_to_sign = {"host": host}
+
+        # only include content-type if the request already has one; injecting a
+        # default here would cause a mismatch if httpx sends a different value
+        if "content-type" in request.headers:
+            headers_to_sign["content-type"] = request.headers["content-type"]
+
+        for header_name in ["x-amz-content-sha256", "x-amz-security-token"]:
+            if header_name in request.headers:
+                headers_to_sign[header_name] = request.headers[header_name]
+
+        try:
+            content = request.content
+        except httpx.RequestNotRead:
+            content = request.read()
+
+        aws_request = AWSRequest(
+            method=request.method,
+            url=str(request.url),
+            data=content,
+            headers=headers_to_sign,
+        )
+
+        signer = SigV4Auth(credentials, self._service, self._region)
+        signer.add_auth(aws_request)
+
+        # copy Authorization, X-Amz-Date, and X-Amz-Security-Token back onto the live request
+        for key, value in aws_request.headers.items():
+            request.headers[key] = value
+
+        logger.debug(
+            f"SigV4 signed request: method={request.method}, "
+            f"path={request.url.path}, service={self._service}, region={self._region}"
+        )
+
+    def auth_flow(self, request: httpx.Request) -> Generator[httpx.Request, httpx.Response, None]:
+        self._sign_request(request)
+        yield request
+
+    async def async_auth_flow(self, request: httpx.Request) -> AsyncGenerator[httpx.Request, httpx.Response]:
+        # offload to a thread because credential resolution can do IMDS calls or file I/O;
+        # shield so a rolling-restart cancellation doesn't abort mid-sign and leave the
+        # connection in an inconsistent auth state
+        await asyncio.shield(asyncio.to_thread(self._sign_request, request))
+        yield request
diff --git a/src/llama_stack/providers/utils/inference/http_client.py b/src/llama_stack/providers/utils/inference/http_client.py
index 5014703386..3a408f74b2 100644
--- a/src/llama_stack/providers/utils/inference/http_client.py
+++ b/src/llama_stack/providers/utils/inference/http_client.py
@@ -4,6 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import hashlib
+import json
 import ssl
 from pathlib import Path
 from typing import Any
@@ -22,6 +24,29 @@
 logger = get_logger(name=__name__, category="providers::utils")
 
 
+_NETWORK_CONFIG_FINGERPRINT_ATTR = "_llama_stack_network_config_fingerprint"
+
+
+def network_config_fingerprint(network_config: NetworkConfig) -> str:
+    dumped = json.dumps(network_config.model_dump(mode="json"), sort_keys=True, separators=(",", ":"))
+    return hashlib.sha256(dumped.encode("utf-8")).hexdigest()
+
+
+def _get_client_network_fingerprint(existing_client: httpx.AsyncClient | DefaultAsyncHttpxClient) -> str | None:
+    if isinstance(existing_client, DefaultAsyncHttpxClient):
+        underlying_client = existing_client._client  # type: ignore[union-attr,attr-defined]
+        return getattr(underlying_client, _NETWORK_CONFIG_FINGERPRINT_ATTR, None)
+    return getattr(existing_client, _NETWORK_CONFIG_FINGERPRINT_ATTR, None)
+
+
+def set_client_network_fingerprint(client: httpx.AsyncClient | DefaultAsyncHttpxClient, fingerprint: str) -> None:
+    if isinstance(client, DefaultAsyncHttpxClient):
+        underlying_client = client._client  # type: ignore[union-attr,attr-defined]
+        setattr(underlying_client, _NETWORK_CONFIG_FINGERPRINT_ATTR, fingerprint)
+    else:
+        setattr(client, _NETWORK_CONFIG_FINGERPRINT_ATTR, fingerprint)
+
+
 def _build_ssl_context(tls_config: TLSConfig) -> ssl.SSLContext | bool | str:
     """
     Build an SSL context from TLS configuration.
@@ -92,7 +117,7 @@ def _build_proxy_mounts(proxy_config: ProxyConfig) -> dict[str, httpx.AsyncHTTPT
     return mounts if mounts else None
 
 
-def _build_network_client_kwargs(network_config: NetworkConfig | None) -> dict[str, Any]:
+def build_network_client_kwargs(network_config: NetworkConfig | None) -> dict[str, Any]:
     """
     Build httpx.AsyncClient kwargs from network configuration.
 
@@ -187,7 +212,11 @@ def _merge_network_config_into_client(
     if network_config is None:
         return existing_client
 
-    network_kwargs = _build_network_client_kwargs(network_config)
+    fingerprint = network_config_fingerprint(network_config)
+    if _get_client_network_fingerprint(existing_client) == fingerprint:
+        return existing_client
+
+    network_kwargs = build_network_client_kwargs(network_config)
     if not network_kwargs:
         return existing_client
 
@@ -210,9 +239,13 @@ def _merge_network_config_into_client(
         # Create new client with merged config
         new_client = httpx.AsyncClient(**network_kwargs)
 
+        set_client_network_fingerprint(new_client, fingerprint)
+
         # If original was DefaultAsyncHttpxClient, wrap the new client
         if isinstance(existing_client, DefaultAsyncHttpxClient):
-            return DefaultAsyncHttpxClient(client=new_client, headers=network_kwargs.get("headers"))  # type: ignore[call-arg]
+            wrapped = DefaultAsyncHttpxClient(client=new_client, headers=network_kwargs.get("headers"))  # type: ignore[call-arg]
+            set_client_network_fingerprint(wrapped, fingerprint)
+            return wrapped
 
         return new_client
     except Exception as e:
@@ -234,7 +267,7 @@ def build_http_client(network_config: NetworkConfig | None) -> dict[str, Any]:
         Dictionary of kwargs to pass to httpx.AsyncClient constructor,
         wrapped in {"http_client": AsyncClient(...)} for use with AsyncOpenAI
     """
-    network_kwargs = _build_network_client_kwargs(network_config)
+    network_kwargs = build_network_client_kwargs(network_config)
     if not network_kwargs:
         return {}
 
diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
index 253d73b949..f1de6eb4ff 100644
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@@ -18,8 +18,8 @@
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.http_client import (
-    _build_network_client_kwargs,
     _merge_network_config_into_client,
+    build_network_client_kwargs,
 )
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_compat import (
@@ -227,7 +227,7 @@ def client(self) -> AsyncOpenAI:
             raise ValueError(message)
 
         extra_params = self.get_extra_client_params()
-        network_kwargs = _build_network_client_kwargs(self.config.network)
+        network_kwargs = build_network_client_kwargs(self.config.network)
 
         # Handle http_client creation/merging:
         # - If get_extra_client_params() provides an http_client (e.g., OCI with custom auth),
diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-3ff721a4.json b/tests/integration/common/recordings/models-66c1506ac9a1c3cac55a2dabb6a81f068d753f4a69ae4edeb730b7ca374186c7-3ff721a4.json
similarity index 99%
rename from tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-3ff721a4.json
rename to tests/integration/common/recordings/models-66c1506ac9a1c3cac55a2dabb6a81f068d753f4a69ae4edeb730b7ca374186c7-3ff721a4.json
index 23216fedcf..edba78d30d 100644
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-3ff721a4.json
+++ b/tests/integration/common/recordings/models-66c1506ac9a1c3cac55a2dabb6a81f068d753f4a69ae4edeb730b7ca374186c7-3ff721a4.json
@@ -2,7 +2,7 @@
   "test_id": null,
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/inference/recordings/1675987cb787dc51be76c547577dd38172be220c805c37ba73ba56fa89cd1121.json b/tests/integration/inference/recordings/7e3382aab43824f89f365fb42a16f3a9a7592d5336fd3d2101fec1e143080184.json
similarity index 98%
rename from tests/integration/inference/recordings/1675987cb787dc51be76c547577dd38172be220c805c37ba73ba56fa89cd1121.json
rename to tests/integration/inference/recordings/7e3382aab43824f89f365fb42a16f3a9a7592d5336fd3d2101fec1e143080184.json
index f25555e9c2..0eec4eff66 100644
--- a/tests/integration/inference/recordings/1675987cb787dc51be76c547577dd38172be220c805c37ba73ba56fa89cd1121.json
+++ b/tests/integration/inference/recordings/7e3382aab43824f89f365fb42a16f3a9a7592d5336fd3d2101fec1e143080184.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:streaming_02]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/inference/recordings/718bb1cd939e26c4b7e6b940c3acd97304f46e13b606e081086dd6657e9d3db2.json b/tests/integration/inference/recordings/94bb6e3fe939afbdb25c9ebfe9e44fad66ae8a37504748ca0a620f241d8537bd.json
similarity index 97%
rename from tests/integration/inference/recordings/718bb1cd939e26c4b7e6b940c3acd97304f46e13b606e081086dd6657e9d3db2.json
rename to tests/integration/inference/recordings/94bb6e3fe939afbdb25c9ebfe9e44fad66ae8a37504748ca0a620f241d8537bd.json
index 28fe67627a..4e549bf6fb 100644
--- a/tests/integration/inference/recordings/718bb1cd939e26c4b7e6b940c3acd97304f46e13b606e081086dd6657e9d3db2.json
+++ b/tests/integration/inference/recordings/94bb6e3fe939afbdb25c9ebfe9e44fad66ae8a37504748ca0a620f241d8537bd.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=bedrock/openai.gpt-oss-20b-True]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/inference/recordings/42ba670916ebfdbe86b518ad1692971ccbb2c2e4a30372f43f38f8e1fc22daf0.json b/tests/integration/inference/recordings/a89357c63194c44cba530faecbbb3ecd51e713f8e6cbd96134c911cc5a9d25c2.json
similarity index 98%
rename from tests/integration/inference/recordings/42ba670916ebfdbe86b518ad1692971ccbb2c2e4a30372f43f38f8e1fc22daf0.json
rename to tests/integration/inference/recordings/a89357c63194c44cba530faecbbb3ecd51e713f8e6cbd96134c911cc5a9d25c2.json
index 1d86504abc..c599bab06c 100644
--- a/tests/integration/inference/recordings/42ba670916ebfdbe86b518ad1692971ccbb2c2e4a30372f43f38f8e1fc22daf0.json
+++ b/tests/integration/inference/recordings/a89357c63194c44cba530faecbbb3ecd51e713f8e6cbd96134c911cc5a9d25c2.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:streaming_01]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/inference/recordings/de019f1d2995412fd461393c644f07c743cf81d087249ef2db974576238ab1c7.json b/tests/integration/inference/recordings/d91f3615a81a97e7d86b3cd5bb05d8d92f852af7f2286b0a20624a45f4f5f380.json
similarity index 97%
rename from tests/integration/inference/recordings/de019f1d2995412fd461393c644f07c743cf81d087249ef2db974576238ab1c7.json
rename to tests/integration/inference/recordings/d91f3615a81a97e7d86b3cd5bb05d8d92f852af7f2286b0a20624a45f4f5f380.json
index fd00c0232f..7d28deb715 100644
--- a/tests/integration/inference/recordings/de019f1d2995412fd461393c644f07c743cf81d087249ef2db974576238ab1c7.json
+++ b/tests/integration/inference/recordings/d91f3615a81a97e7d86b3cd5bb05d8d92f852af7f2286b0a20624a45f4f5f380.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:non_streaming_02]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/inference/recordings/0241337625808a1602a0b4b1c715c7531cecf2491f2f2129e7e6884e3ffb62bb.json b/tests/integration/inference/recordings/e9d3ce4be836e5478689cf56f0375fc800499022cd1daf923df591207980513c.json
similarity index 96%
rename from tests/integration/inference/recordings/0241337625808a1602a0b4b1c715c7531cecf2491f2f2129e7e6884e3ffb62bb.json
rename to tests/integration/inference/recordings/e9d3ce4be836e5478689cf56f0375fc800499022cd1daf923df591207980513c.json
index 4024097bbe..163f37387e 100644
--- a/tests/integration/inference/recordings/0241337625808a1602a0b4b1c715c7531cecf2491f2f2129e7e6884e3ffb62bb.json
+++ b/tests/integration/inference/recordings/e9d3ce4be836e5478689cf56f0375fc800499022cd1daf923df591207980513c.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:non_streaming_01]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/inference/recordings/d6f772f62c859028bada2fe328498364e82c1d36ed256910623577c6fcd5696c.json b/tests/integration/inference/recordings/f4fa57034e80f69cb75c300241862e98911e3124f09225d42d29239045854ce4.json
similarity index 94%
rename from tests/integration/inference/recordings/d6f772f62c859028bada2fe328498364e82c1d36ed256910623577c6fcd5696c.json
rename to tests/integration/inference/recordings/f4fa57034e80f69cb75c300241862e98911e3124f09225d42d29239045854ce4.json
index bf3f8adbf6..b546698660 100644
--- a/tests/integration/inference/recordings/d6f772f62c859028bada2fe328498364e82c1d36ed256910623577c6fcd5696c.json
+++ b/tests/integration/inference/recordings/f4fa57034e80f69cb75c300241862e98911e3124f09225d42d29239045854ce4.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=bedrock/openai.gpt-oss-20b-False]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/7fe113f5fc7f0e055b4abd4e6ea0a183dc1a2b959cc410ec2c594a04028712b9.json b/tests/integration/responses/recordings/0469f7efcdf69ae378b7395f518d863badff5ace79116b718d28b8fa3e53a93a.json
similarity index 99%
rename from tests/integration/responses/recordings/7fe113f5fc7f0e055b4abd4e6ea0a183dc1a2b959cc410ec2c594a04028712b9.json
rename to tests/integration/responses/recordings/0469f7efcdf69ae378b7395f518d863badff5ace79116b718d28b8fa3e53a93a.json
index acbabb0973..ad2a6a5d08 100644
--- a/tests/integration/responses/recordings/7fe113f5fc7f0e055b4abd4e6ea0a183dc1a2b959cc410ec2c594a04028712b9.json
+++ b/tests/integration/responses/recordings/0469f7efcdf69ae378b7395f518d863badff5ace79116b718d28b8fa3e53a93a.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/d480b100b782370dd41e8dbbb53e2f37783b99074446dcb50d49034007bfb97e.json b/tests/integration/responses/recordings/04a4837c6c7974929a7328bb090e7c9d991bfb10ba71d307ed8ba39411c407df.json
similarity index 98%
rename from tests/integration/responses/recordings/d480b100b782370dd41e8dbbb53e2f37783b99074446dcb50d49034007bfb97e.json
rename to tests/integration/responses/recordings/04a4837c6c7974929a7328bb090e7c9d991bfb10ba71d307ed8ba39411c407df.json
index c4b64b33c0..bb85206433 100644
--- a/tests/integration/responses/recordings/d480b100b782370dd41e8dbbb53e2f37783b99074446dcb50d49034007bfb97e.json
+++ b/tests/integration/responses/recordings/04a4837c6c7974929a7328bb090e7c9d991bfb10ba71d307ed8ba39411c407df.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/c71d141a2ec044cc7eb01574672e5e6bc6689fb1d6aa8cc6494773fddff919d4.json b/tests/integration/responses/recordings/0c65c6fe47669a1dde7e3f13d4b357970f0e3dabc4442278caa66054f29cf2b2.json
similarity index 99%
rename from tests/integration/responses/recordings/c71d141a2ec044cc7eb01574672e5e6bc6689fb1d6aa8cc6494773fddff919d4.json
rename to tests/integration/responses/recordings/0c65c6fe47669a1dde7e3f13d4b357970f0e3dabc4442278caa66054f29cf2b2.json
index 1354ec7d90..9c3a603bfd 100644
--- a/tests/integration/responses/recordings/c71d141a2ec044cc7eb01574672e5e6bc6689fb1d6aa8cc6494773fddff919d4.json
+++ b/tests/integration/responses/recordings/0c65c6fe47669a1dde7e3f13d4b357970f0e3dabc4442278caa66054f29cf2b2.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/29462a1046f2fcc98301a8eed2fc861ba7f9de28bf2dc06bfae8c04bf664e645.json b/tests/integration/responses/recordings/1a5af1292127b401be03305c2d0a8f109a1bd0444c0394ad5e3ca4f6bce9a3c7.json
similarity index 98%
rename from tests/integration/responses/recordings/29462a1046f2fcc98301a8eed2fc861ba7f9de28bf2dc06bfae8c04bf664e645.json
rename to tests/integration/responses/recordings/1a5af1292127b401be03305c2d0a8f109a1bd0444c0394ad5e3ca4f6bce9a3c7.json
index b457375b69..4c58e51aa8 100644
--- a/tests/integration/responses/recordings/29462a1046f2fcc98301a8eed2fc861ba7f9de28bf2dc06bfae8c04bf664e645.json
+++ b/tests/integration/responses/recordings/1a5af1292127b401be03305c2d0a8f109a1bd0444c0394ad5e3ca4f6bce9a3c7.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/076784d55c6a712b559ad4061d7ed4391dc7f1cf35a2b141ad4b0b8f8c8503e1.json b/tests/integration/responses/recordings/1f1d016a1f7977d024de7a10f1e63acefb20e6839258b8b132471f404007c8f3.json
similarity index 98%
rename from tests/integration/responses/recordings/076784d55c6a712b559ad4061d7ed4391dc7f1cf35a2b141ad4b0b8f8c8503e1.json
rename to tests/integration/responses/recordings/1f1d016a1f7977d024de7a10f1e63acefb20e6839258b8b132471f404007c8f3.json
index fa214a2d26..4db3f44845 100644
--- a/tests/integration/responses/recordings/076784d55c6a712b559ad4061d7ed4391dc7f1cf35a2b141ad4b0b8f8c8503e1.json
+++ b/tests/integration/responses/recordings/1f1d016a1f7977d024de7a10f1e63acefb20e6839258b8b132471f404007c8f3.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[openai_client-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/95ae8c627acc6e165fa5046946ae7c009324d3704b64695049cbbd399a6348b8.json b/tests/integration/responses/recordings/23223eabe994fd8172f8650d7cc9c3cc6f11f0618996f28403378a85d7258c88.json
similarity index 99%
rename from tests/integration/responses/recordings/95ae8c627acc6e165fa5046946ae7c009324d3704b64695049cbbd399a6348b8.json
rename to tests/integration/responses/recordings/23223eabe994fd8172f8650d7cc9c3cc6f11f0618996f28403378a85d7258c88.json
index dfbf4d1684..e393f19733 100644
--- a/tests/integration/responses/recordings/95ae8c627acc6e165fa5046946ae7c009324d3704b64695049cbbd399a6348b8.json
+++ b/tests/integration/responses/recordings/23223eabe994fd8172f8650d7cc9c3cc6f11f0618996f28403378a85d7258c88.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_multi_version_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/21b2d947138024ef80069f89c5157185a70f43c4d128ffdb4337194ae2486429.json b/tests/integration/responses/recordings/23e253ab6f05ad695e1d2dfd2190797667d27d8a1a8a963ffba82cf69bf3696d.json
similarity index 98%
rename from tests/integration/responses/recordings/21b2d947138024ef80069f89c5157185a70f43c4d128ffdb4337194ae2486429.json
rename to tests/integration/responses/recordings/23e253ab6f05ad695e1d2dfd2190797667d27d8a1a8a963ffba82cf69bf3696d.json
index 2567646684..89159b2fdd 100644
--- a/tests/integration/responses/recordings/21b2d947138024ef80069f89c5157185a70f43c4d128ffdb4337194ae2486429.json
+++ b/tests/integration/responses/recordings/23e253ab6f05ad695e1d2dfd2190797667d27d8a1a8a963ffba82cf69bf3696d.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/68dd2778e5b1caa3ae28e5eed59c41db0d39c58c113612b94abaa459fc0fad3e.json b/tests/integration/responses/recordings/2c604588ef302f0e4a93ef93dc713522c7d7798542038b89b465af2e9ef0b299.json
similarity index 99%
rename from tests/integration/responses/recordings/68dd2778e5b1caa3ae28e5eed59c41db0d39c58c113612b94abaa459fc0fad3e.json
rename to tests/integration/responses/recordings/2c604588ef302f0e4a93ef93dc713522c7d7798542038b89b465af2e9ef0b299.json
index a6fe2f3a76..d3b85e36bd 100644
--- a/tests/integration/responses/recordings/68dd2778e5b1caa3ae28e5eed59c41db0d39c58c113612b94abaa459fc0fad3e.json
+++ b/tests/integration/responses/recordings/2c604588ef302f0e4a93ef93dc713522c7d7798542038b89b465af2e9ef0b299.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_multi_version_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/6e9164fd65ebd8121be120a06262efd9c0f603104e86d84ad07ff4e5b5ab4d69.json b/tests/integration/responses/recordings/2ecc2783d6558449838ced5f0180990a80c4cbaffa952f1fe5668b74e9e57ae9.json
similarity index 99%
rename from tests/integration/responses/recordings/6e9164fd65ebd8121be120a06262efd9c0f603104e86d84ad07ff4e5b5ab4d69.json
rename to tests/integration/responses/recordings/2ecc2783d6558449838ced5f0180990a80c4cbaffa952f1fe5668b74e9e57ae9.json
index dd88fbdc31..8df6a01cc9 100644
--- a/tests/integration/responses/recordings/6e9164fd65ebd8121be120a06262efd9c0f603104e86d84ad07ff4e5b5ab4d69.json
+++ b/tests/integration/responses/recordings/2ecc2783d6558449838ced5f0180990a80c4cbaffa952f1fe5668b74e9e57ae9.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[openai_client-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/efffb6e430b20dc2f20fbe77ef230b47660008ab6ae2102f73596d4331dcf683.json b/tests/integration/responses/recordings/30dee5b8b4a1716103dd23d5249b6649aa2c9d30931cccdbb2a8b2fefa69675d.json
similarity index 99%
rename from tests/integration/responses/recordings/efffb6e430b20dc2f20fbe77ef230b47660008ab6ae2102f73596d4331dcf683.json
rename to tests/integration/responses/recordings/30dee5b8b4a1716103dd23d5249b6649aa2c9d30931cccdbb2a8b2fefa69675d.json
index 0bd9da0c4e..39ba90fd12 100644
--- a/tests/integration/responses/recordings/efffb6e430b20dc2f20fbe77ef230b47660008ab6ae2102f73596d4331dcf683.json
+++ b/tests/integration/responses/recordings/30dee5b8b4a1716103dd23d5249b6649aa2c9d30931cccdbb2a8b2fefa69675d.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/e1bf2d00006e9b3c60f537c724c98c518a571caec500010e2b07a42662a0db94.json b/tests/integration/responses/recordings/3e79875697a99012661ee6e8a9160a9251d716e340408d20c5c7cc2b337c71b0.json
similarity index 98%
rename from tests/integration/responses/recordings/e1bf2d00006e9b3c60f537c724c98c518a571caec500010e2b07a42662a0db94.json
rename to tests/integration/responses/recordings/3e79875697a99012661ee6e8a9160a9251d716e340408d20c5c7cc2b337c71b0.json
index f9d5725488..befbab5abd 100644
--- a/tests/integration/responses/recordings/e1bf2d00006e9b3c60f537c724c98c518a571caec500010e2b07a42662a0db94.json
+++ b/tests/integration/responses/recordings/3e79875697a99012661ee6e8a9160a9251d716e340408d20c5c7cc2b337c71b0.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_multi_turn_and_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/a6204a1d782ab7d9f85b3ac2b792796b668ba731bf670254e3c9d9ede934193c.json b/tests/integration/responses/recordings/4290deaf6bf03c9ccf3228f6b51a2f05dc909966c425203775402388b7ced238.json
similarity index 98%
rename from tests/integration/responses/recordings/a6204a1d782ab7d9f85b3ac2b792796b668ba731bf670254e3c9d9ede934193c.json
rename to tests/integration/responses/recordings/4290deaf6bf03c9ccf3228f6b51a2f05dc909966c425203775402388b7ced238.json
index c16d810e5d..54ed03755d 100644
--- a/tests/integration/responses/recordings/a6204a1d782ab7d9f85b3ac2b792796b668ba731bf670254e3c9d9ede934193c.json
+++ b/tests/integration/responses/recordings/4290deaf6bf03c9ccf3228f6b51a2f05dc909966c425203775402388b7ced238.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/ae06979da89d9f6949f1ceb5ee057759bfbf01f3e0bd58f68d55996929192f5e.json b/tests/integration/responses/recordings/4752668af3df1f42fe5b16f6f0c7a4954472d2cda8559b8466fad6ba0aa1c642.json
similarity index 99%
rename from tests/integration/responses/recordings/ae06979da89d9f6949f1ceb5ee057759bfbf01f3e0bd58f68d55996929192f5e.json
rename to tests/integration/responses/recordings/4752668af3df1f42fe5b16f6f0c7a4954472d2cda8559b8466fad6ba0aa1c642.json
index 8689cb508f..e70fef56b6 100644
--- a/tests/integration/responses/recordings/ae06979da89d9f6949f1ceb5ee057759bfbf01f3e0bd58f68d55996929192f5e.json
+++ b/tests/integration/responses/recordings/4752668af3df1f42fe5b16f6f0c7a4954472d2cda8559b8466fad6ba0aa1c642.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/16ef4e2da1cf48a86471cb57c7ce4a4c11c6968d3e9607dbd632d3bd9a54493f.json b/tests/integration/responses/recordings/48d747eb358721344236340fdca57eb977700bad50a47dcde078625f507277cc.json
similarity index 99%
rename from tests/integration/responses/recordings/16ef4e2da1cf48a86471cb57c7ce4a4c11c6968d3e9607dbd632d3bd9a54493f.json
rename to tests/integration/responses/recordings/48d747eb358721344236340fdca57eb977700bad50a47dcde078625f507277cc.json
index 48db8bb1cb..9c481d8163 100644
--- a/tests/integration/responses/recordings/16ef4e2da1cf48a86471cb57c7ce4a4c11c6968d3e9607dbd632d3bd9a54493f.json
+++ b/tests/integration/responses/recordings/48d747eb358721344236340fdca57eb977700bad50a47dcde078625f507277cc.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_basic_workflow[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/2e46c50d186ad42569a701893e2fd59e4aa93e9b973d7780c32c6e864a4abed7.json b/tests/integration/responses/recordings/498ab18308a6e603d405bf5b5c1102f29c9250f8386e90baab57cbfa9782a558.json
similarity index 98%
rename from tests/integration/responses/recordings/2e46c50d186ad42569a701893e2fd59e4aa93e9b973d7780c32c6e864a4abed7.json
rename to tests/integration/responses/recordings/498ab18308a6e603d405bf5b5c1102f29c9250f8386e90baab57cbfa9782a558.json
index 0565553f98..35299f72fe 100644
--- a/tests/integration/responses/recordings/2e46c50d186ad42569a701893e2fd59e4aa93e9b973d7780c32c6e864a4abed7.json
+++ b/tests/integration/responses/recordings/498ab18308a6e603d405bf5b5c1102f29c9250f8386e90baab57cbfa9782a558.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_multi_variable_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/d004a76ddc87e3d1f10a07b50f2aa82cf39d20e5cfb3be1433c924a126fe1a58.json b/tests/integration/responses/recordings/4c112247d4a195fac2b27e6a4936b248ba0848b70882489e1a32a9e323dc081b.json
similarity index 99%
rename from tests/integration/responses/recordings/d004a76ddc87e3d1f10a07b50f2aa82cf39d20e5cfb3be1433c924a126fe1a58.json
rename to tests/integration/responses/recordings/4c112247d4a195fac2b27e6a4936b248ba0848b70882489e1a32a9e323dc081b.json
index d6f25547df..e7a54d580e 100644
--- a/tests/integration/responses/recordings/d004a76ddc87e3d1f10a07b50f2aa82cf39d20e5cfb3be1433c924a126fe1a58.json
+++ b/tests/integration/responses/recordings/4c112247d4a195fac2b27e6a4936b248ba0848b70882489e1a32a9e323dc081b.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[openai_client-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/f9f8f255a3e65fdea20eedd28f7febfb159dafa9a3aab7a83c8afc7e89ba0b91.json b/tests/integration/responses/recordings/4e1fd22f5d9ded67352dbe815bb0cb7e463ad85b856514425df394ba222b92c4.json
similarity index 98%
rename from tests/integration/responses/recordings/f9f8f255a3e65fdea20eedd28f7febfb159dafa9a3aab7a83c8afc7e89ba0b91.json
rename to tests/integration/responses/recordings/4e1fd22f5d9ded67352dbe815bb0cb7e463ad85b856514425df394ba222b92c4.json
index 48dee3d271..a5ac4ce369 100644
--- a/tests/integration/responses/recordings/f9f8f255a3e65fdea20eedd28f7febfb159dafa9a3aab7a83c8afc7e89ba0b91.json
+++ b/tests/integration/responses/recordings/4e1fd22f5d9ded67352dbe815bb0cb7e463ad85b856514425df394ba222b92c4.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/e6e1b682e9b368426efc1bd93b326bec2f7db76ab9d8c9c6334a2d5177672fd6.json b/tests/integration/responses/recordings/5100781c53bb19a320364926454cb1a17051bb6a1a72989937e1b42465983960.json
similarity index 99%
rename from tests/integration/responses/recordings/e6e1b682e9b368426efc1bd93b326bec2f7db76ab9d8c9c6334a2d5177672fd6.json
rename to tests/integration/responses/recordings/5100781c53bb19a320364926454cb1a17051bb6a1a72989937e1b42465983960.json
index b8052fc8a4..1876fb2b4c 100644
--- a/tests/integration/responses/recordings/e6e1b682e9b368426efc1bd93b326bec2f7db76ab9d8c9c6334a2d5177672fd6.json
+++ b/tests/integration/responses/recordings/5100781c53bb19a320364926454cb1a17051bb6a1a72989937e1b42465983960.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/5ab06b404383787d1a545eacc7674fc068f37fbdaaa8d3b2f991f884b0c1d07a.json b/tests/integration/responses/recordings/53e2e090845d1a09194d5f18dca914ab5eecc2a2af6add585ef5be9d61549d1a.json
similarity index 99%
rename from tests/integration/responses/recordings/5ab06b404383787d1a545eacc7674fc068f37fbdaaa8d3b2f991f884b0c1d07a.json
rename to tests/integration/responses/recordings/53e2e090845d1a09194d5f18dca914ab5eecc2a2af6add585ef5be9d61549d1a.json
index 524561bac2..9f7bac0205 100644
--- a/tests/integration/responses/recordings/5ab06b404383787d1a545eacc7674fc068f37fbdaaa8d3b2f991f884b0c1d07a.json
+++ b/tests/integration/responses/recordings/53e2e090845d1a09194d5f18dca914ab5eecc2a2af6add585ef5be9d61549d1a.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/6a8b76b4d8ba3bc65c2cbd242b17aa224cc77abb15beba937decfda457ba2b6e.json b/tests/integration/responses/recordings/6138d550fdb668e3a48400b36501a2a51cea83c4157e7d6fc35792837df40bdb.json
similarity index 99%
rename from tests/integration/responses/recordings/6a8b76b4d8ba3bc65c2cbd242b17aa224cc77abb15beba937decfda457ba2b6e.json
rename to tests/integration/responses/recordings/6138d550fdb668e3a48400b36501a2a51cea83c4157e7d6fc35792837df40bdb.json
index ae6a4b7715..ddb3c9e5c8 100644
--- a/tests/integration/responses/recordings/6a8b76b4d8ba3bc65c2cbd242b17aa224cc77abb15beba937decfda457ba2b6e.json
+++ b/tests/integration/responses/recordings/6138d550fdb668e3a48400b36501a2a51cea83c4157e7d6fc35792837df40bdb.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_with_function_tools[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/01675632398179e5013b897e113ce0bfcb3c8c06352c4884b31b84bcc88b8e43.json b/tests/integration/responses/recordings/66ca3acff5d7d5880103bbfde08c52bac60f1038d84e0591a279b941a9b49b32.json
similarity index 99%
rename from tests/integration/responses/recordings/01675632398179e5013b897e113ce0bfcb3c8c06352c4884b31b84bcc88b8e43.json
rename to tests/integration/responses/recordings/66ca3acff5d7d5880103bbfde08c52bac60f1038d84e0591a279b941a9b49b32.json
index a46192e4cf..b6006df3af 100644
--- a/tests/integration/responses/recordings/01675632398179e5013b897e113ce0bfcb3c8c06352c4884b31b84bcc88b8e43.json
+++ b/tests/integration/responses/recordings/66ca3acff5d7d5880103bbfde08c52bac60f1038d84e0591a279b941a9b49b32.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/c2735c8a02e57ab45fd2b72ac4478bad6a39cb2ff3ac1a457fe8476f73146934.json b/tests/integration/responses/recordings/67c29a33f0d688d469c0d59feb5ca085c51bf886c6836fce3b630c1cc693e8cd.json
similarity index 99%
rename from tests/integration/responses/recordings/c2735c8a02e57ab45fd2b72ac4478bad6a39cb2ff3ac1a457fe8476f73146934.json
rename to tests/integration/responses/recordings/67c29a33f0d688d469c0d59feb5ca085c51bf886c6836fce3b630c1cc693e8cd.json
index b3d394a752..3ab5fcc7f5 100644
--- a/tests/integration/responses/recordings/c2735c8a02e57ab45fd2b72ac4478bad6a39cb2ff3ac1a457fe8476f73146934.json
+++ b/tests/integration/responses/recordings/67c29a33f0d688d469c0d59feb5ca085c51bf886c6836fce3b630c1cc693e8cd.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/1c75005e90b5447d913967026e8bd7c13afa92ce90d2af9913fe81db06e8c88b.json b/tests/integration/responses/recordings/6becbeb1c15378d9a82f60c3a9309dad73dd604848005240745cb0f729c4330f.json
similarity index 98%
rename from tests/integration/responses/recordings/1c75005e90b5447d913967026e8bd7c13afa92ce90d2af9913fe81db06e8c88b.json
rename to tests/integration/responses/recordings/6becbeb1c15378d9a82f60c3a9309dad73dd604848005240745cb0f729c4330f.json
index 5009f42036..9bd9eff2fa 100644
--- a/tests/integration/responses/recordings/1c75005e90b5447d913967026e8bd7c13afa92ce90d2af9913fe81db06e8c88b.json
+++ b/tests/integration/responses/recordings/6becbeb1c15378d9a82f60c3a9309dad73dd604848005240745cb0f729c4330f.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_non_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/97a9427f4c20f3fb104a6d04e3c08380f50a86a947b89ddd9477d09ec713313e.json b/tests/integration/responses/recordings/6f80b6307533eb1efc08bb9252ba14579a5f7114057b9b774d18b72e7a932fb2.json
similarity index 98%
rename from tests/integration/responses/recordings/97a9427f4c20f3fb104a6d04e3c08380f50a86a947b89ddd9477d09ec713313e.json
rename to tests/integration/responses/recordings/6f80b6307533eb1efc08bb9252ba14579a5f7114057b9b774d18b72e7a932fb2.json
index c931916e1f..a36d46108c 100644
--- a/tests/integration/responses/recordings/97a9427f4c20f3fb104a6d04e3c08380f50a86a947b89ddd9477d09ec713313e.json
+++ b/tests/integration/responses/recordings/6f80b6307533eb1efc08bb9252ba14579a5f7114057b9b774d18b72e7a932fb2.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[openai_client-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/565887f8230acc7918b316e3428650fa2f8aa13aa8a01859da53f1bc10d7c8d5.json b/tests/integration/responses/recordings/72e8dd36d46c572167f589fe59a32bcdfbfed5ba7f272c55f2cf412b099f7c93.json
similarity index 99%
rename from tests/integration/responses/recordings/565887f8230acc7918b316e3428650fa2f8aa13aa8a01859da53f1bc10d7c8d5.json
rename to tests/integration/responses/recordings/72e8dd36d46c572167f589fe59a32bcdfbfed5ba7f272c55f2cf412b099f7c93.json
index 7df6152346..21f98bd2fa 100644
--- a/tests/integration/responses/recordings/565887f8230acc7918b316e3428650fa2f8aa13aa8a01859da53f1bc10d7c8d5.json
+++ b/tests/integration/responses/recordings/72e8dd36d46c572167f589fe59a32bcdfbfed5ba7f272c55f2cf412b099f7c93.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/6b1b3d872eb6746dbfe9b8eed7fcbe23ca9e3408e7b3d8b99df407ae62f0a9f8.json b/tests/integration/responses/recordings/7fce3badd53d491bbae372c12f38743876b1177094e1a97d08bbd600900388e1.json
similarity index 99%
rename from tests/integration/responses/recordings/6b1b3d872eb6746dbfe9b8eed7fcbe23ca9e3408e7b3d8b99df407ae62f0a9f8.json
rename to tests/integration/responses/recordings/7fce3badd53d491bbae372c12f38743876b1177094e1a97d08bbd600900388e1.json
index df200b467d..a7573dded0 100644
--- a/tests/integration/responses/recordings/6b1b3d872eb6746dbfe9b8eed7fcbe23ca9e3408e7b3d8b99df407ae62f0a9f8.json
+++ b/tests/integration/responses/recordings/7fce3badd53d491bbae372c12f38743876b1177094e1a97d08bbd600900388e1.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/612c6642c5fb5b4203098183335b17ac0d121a04e39cc737b6cd2c1cb1f8ab30.json b/tests/integration/responses/recordings/86985e69a4be4073af20f65ba9ec927389f1b18c13691565186eba89fd1df24e.json
similarity index 98%
rename from tests/integration/responses/recordings/612c6642c5fb5b4203098183335b17ac0d121a04e39cc737b6cd2c1cb1f8ab30.json
rename to tests/integration/responses/recordings/86985e69a4be4073af20f65ba9ec927389f1b18c13691565186eba89fd1df24e.json
index 5daeb5e399..45a3daea46 100644
--- a/tests/integration/responses/recordings/612c6642c5fb5b4203098183335b17ac0d121a04e39cc737b6cd2c1cb1f8ab30.json
+++ b/tests/integration/responses/recordings/86985e69a4be4073af20f65ba9ec927389f1b18c13691565186eba89fd1df24e.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/255d2b2c743eb4102b0938c2ece15c5ce5a534fc3ee5ea5067b677a9007fab40.json b/tests/integration/responses/recordings/8fffbf821fc03c867d710ee20471566dd845588cce7605101332153175738d64.json
similarity index 98%
rename from tests/integration/responses/recordings/255d2b2c743eb4102b0938c2ece15c5ce5a534fc3ee5ea5067b677a9007fab40.json
rename to tests/integration/responses/recordings/8fffbf821fc03c867d710ee20471566dd845588cce7605101332153175738d64.json
index eef2f4abbb..5c1f4c5359 100644
--- a/tests/integration/responses/recordings/255d2b2c743eb4102b0938c2ece15c5ce5a534fc3ee5ea5067b677a9007fab40.json
+++ b/tests/integration/responses/recordings/8fffbf821fc03c867d710ee20471566dd845588cce7605101332153175738d64.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/b4669f2ae72ba853cd152e36d2e1aed1fc55ff53d48e9fe7371ce36e67b75445.json b/tests/integration/responses/recordings/9a097dbf69b822064c0e709fc32196ae1825730205c6ccd617c7ab88f57b1bb1.json
similarity index 99%
rename from tests/integration/responses/recordings/b4669f2ae72ba853cd152e36d2e1aed1fc55ff53d48e9fe7371ce36e67b75445.json
rename to tests/integration/responses/recordings/9a097dbf69b822064c0e709fc32196ae1825730205c6ccd617c7ab88f57b1bb1.json
index 1fccfa99c0..d0f580f350 100644
--- a/tests/integration/responses/recordings/b4669f2ae72ba853cd152e36d2e1aed1fc55ff53d48e9fe7371ce36e67b75445.json
+++ b/tests/integration/responses/recordings/9a097dbf69b822064c0e709fc32196ae1825730205c6ccd617c7ab88f57b1bb1.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_responses_errors.py::TestResponsesAPIStreamingErrors::test_completed_response_has_no_error[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/fff9dbea699f84c0f924ee9dbc91c9f5587eff0d27a554fd55b916da9662ce2c.json b/tests/integration/responses/recordings/a0a8368aca6a21aff59a88f8eadae3bf8cbaa47c2c83fcfc439c29c3693cf6a3.json
similarity index 98%
rename from tests/integration/responses/recordings/fff9dbea699f84c0f924ee9dbc91c9f5587eff0d27a554fd55b916da9662ce2c.json
rename to tests/integration/responses/recordings/a0a8368aca6a21aff59a88f8eadae3bf8cbaa47c2c83fcfc439c29c3693cf6a3.json
index 4532cdae61..c70c2c852b 100644
--- a/tests/integration/responses/recordings/fff9dbea699f84c0f924ee9dbc91c9f5587eff0d27a554fd55b916da9662ce2c.json
+++ b/tests/integration/responses/recordings/a0a8368aca6a21aff59a88f8eadae3bf8cbaa47c2c83fcfc439c29c3693cf6a3.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_backward_compatibility[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/611613fb2c06bb1ed79fbe5dae18ac6e2a650340832aa7538498b5b26aff1f63.json b/tests/integration/responses/recordings/a3fea537eafccc69367c15360b09ca732197f2fbd3757e8afe5bc583df8f1c2c.json
similarity index 98%
rename from tests/integration/responses/recordings/611613fb2c06bb1ed79fbe5dae18ac6e2a650340832aa7538498b5b26aff1f63.json
rename to tests/integration/responses/recordings/a3fea537eafccc69367c15360b09ca732197f2fbd3757e8afe5bc583df8f1c2c.json
index 07098e7e33..c123f0e399 100644
--- a/tests/integration/responses/recordings/611613fb2c06bb1ed79fbe5dae18ac6e2a650340832aa7538498b5b26aff1f63.json
+++ b/tests/integration/responses/recordings/a3fea537eafccc69367c15360b09ca732197f2fbd3757e8afe5bc583df8f1c2c.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/516a186291f3c6a4cf45ceecd49e9f8f5b396f37caa0c557e664c608b29bcfcc.json b/tests/integration/responses/recordings/a640e097dcf17544825aa20ce937c0b783de5f4bd2916d9e370145768551075c.json
similarity index 98%
rename from tests/integration/responses/recordings/516a186291f3c6a4cf45ceecd49e9f8f5b396f37caa0c557e664c608b29bcfcc.json
rename to tests/integration/responses/recordings/a640e097dcf17544825aa20ce937c0b783de5f4bd2916d9e370145768551075c.json
index f5e7895174..e5c913a998 100644
--- a/tests/integration/responses/recordings/516a186291f3c6a4cf45ceecd49e9f8f5b396f37caa0c557e664c608b29bcfcc.json
+++ b/tests/integration/responses/recordings/a640e097dcf17544825aa20ce937c0b783de5f4bd2916d9e370145768551075c.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/68da149c99f86029626e9bca5df9a72d5204a4423c5caca31efe60384c4b2b37.json b/tests/integration/responses/recordings/aaed9594b345da41db3dd6754b1e7c6366dff704ea943bda851c2d3a658d44ec.json
similarity index 98%
rename from tests/integration/responses/recordings/68da149c99f86029626e9bca5df9a72d5204a4423c5caca31efe60384c4b2b37.json
rename to tests/integration/responses/recordings/aaed9594b345da41db3dd6754b1e7c6366dff704ea943bda851c2d3a658d44ec.json
index d0fb69b3ea..a58476f361 100644
--- a/tests/integration/responses/recordings/68da149c99f86029626e9bca5df9a72d5204a4423c5caca31efe60384c4b2b37.json
+++ b/tests/integration/responses/recordings/aaed9594b345da41db3dd6754b1e7c6366dff704ea943bda851c2d3a658d44ec.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/555654e0ddbd51a65af179d9300fdedc42077a90849824b037f9dae1edda4102.json b/tests/integration/responses/recordings/b2741f85f52f5eb88f5401e02cad1d4f5d4a9bf61953e12b0441c0c4c2a2831a.json
similarity index 98%
rename from tests/integration/responses/recordings/555654e0ddbd51a65af179d9300fdedc42077a90849824b037f9dae1edda4102.json
rename to tests/integration/responses/recordings/b2741f85f52f5eb88f5401e02cad1d4f5d4a9bf61953e12b0441c0c4c2a2831a.json
index 49b997b2ed..dedcc9a08e 100644
--- a/tests/integration/responses/recordings/555654e0ddbd51a65af179d9300fdedc42077a90849824b037f9dae1edda4102.json
+++ b/tests/integration/responses/recordings/b2741f85f52f5eb88f5401e02cad1d4f5d4a9bf61953e12b0441c0c4c2a2831a.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/f8eb947297e60407c022e3326b5bbcfa0e186abf83bf4b3a826640715e14a3a9.json b/tests/integration/responses/recordings/baa7ab6db86f51910a35c45cf2234670f308b01d4b905faf3e1313901d2a6b61.json
similarity index 99%
rename from tests/integration/responses/recordings/f8eb947297e60407c022e3326b5bbcfa0e186abf83bf4b3a826640715e14a3a9.json
rename to tests/integration/responses/recordings/baa7ab6db86f51910a35c45cf2234670f308b01d4b905faf3e1313901d2a6b61.json
index 5e63214137..d4ea806034 100644
--- a/tests/integration/responses/recordings/f8eb947297e60407c022e3326b5bbcfa0e186abf83bf4b3a826640715e14a3a9.json
+++ b/tests/integration/responses/recordings/baa7ab6db86f51910a35c45cf2234670f308b01d4b905faf3e1313901d2a6b61.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/bf90e1d57fcb8ac4ebb20ee417bb219a091ac9c45000f4c22e4fd16716dfaceb.json b/tests/integration/responses/recordings/c40e45bc283154cb2e212a514e5ab935e8851add8edeef48d6a65764716ce634.json
similarity index 98%
rename from tests/integration/responses/recordings/bf90e1d57fcb8ac4ebb20ee417bb219a091ac9c45000f4c22e4fd16716dfaceb.json
rename to tests/integration/responses/recordings/c40e45bc283154cb2e212a514e5ab935e8851add8edeef48d6a65764716ce634.json
index 56e96e3284..98c4e919a2 100644
--- a/tests/integration/responses/recordings/bf90e1d57fcb8ac4ebb20ee417bb219a091ac9c45000f4c22e4fd16716dfaceb.json
+++ b/tests/integration/responses/recordings/c40e45bc283154cb2e212a514e5ab935e8851add8edeef48d6a65764716ce634.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/da6a362ff3aadd43bc06ba806b0f3710f565e33c63b5f1aff08a68807e01ce8a.json b/tests/integration/responses/recordings/cc0c289ff3a932cbfc46188197ebf4074e790c3bbdae24c280c711aa752fb6b4.json
similarity index 99%
rename from tests/integration/responses/recordings/da6a362ff3aadd43bc06ba806b0f3710f565e33c63b5f1aff08a68807e01ce8a.json
rename to tests/integration/responses/recordings/cc0c289ff3a932cbfc46188197ebf4074e790c3bbdae24c280c711aa752fb6b4.json
index cadff507ca..e7e7f1d014 100644
--- a/tests/integration/responses/recordings/da6a362ff3aadd43bc06ba806b0f3710f565e33c63b5f1aff08a68807e01ce8a.json
+++ b/tests/integration/responses/recordings/cc0c289ff3a932cbfc46188197ebf4074e790c3bbdae24c280c711aa752fb6b4.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/2df2850b224738a8f7b945cc06681f73f38988a267cd1ba6a546a8352a57ffa5.json b/tests/integration/responses/recordings/d0996e5e764315da97c15eb62c37f1c374b3e46a69f5526ef45f70fd24fd18b9.json
similarity index 99%
rename from tests/integration/responses/recordings/2df2850b224738a8f7b945cc06681f73f38988a267cd1ba6a546a8352a57ffa5.json
rename to tests/integration/responses/recordings/d0996e5e764315da97c15eb62c37f1c374b3e46a69f5526ef45f70fd24fd18b9.json
index 5e507106ac..e102b33960 100644
--- a/tests/integration/responses/recordings/2df2850b224738a8f7b945cc06681f73f38988a267cd1ba6a546a8352a57ffa5.json
+++ b/tests/integration/responses/recordings/d0996e5e764315da97c15eb62c37f1c374b3e46a69f5526ef45f70fd24fd18b9.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/8dba857db6730d6ab259e2c80f5cdd1f8bbcce22fb4a14df4389f96e2db04eaa.json b/tests/integration/responses/recordings/d2cf88feac32878a5072007ed404bbc48650109698c6d0ac664a92426b242301.json
similarity index 98%
rename from tests/integration/responses/recordings/8dba857db6730d6ab259e2c80f5cdd1f8bbcce22fb4a14df4389f96e2db04eaa.json
rename to tests/integration/responses/recordings/d2cf88feac32878a5072007ed404bbc48650109698c6d0ac664a92426b242301.json
index ce613bbaf8..315fb4ae9f 100644
--- a/tests/integration/responses/recordings/8dba857db6730d6ab259e2c80f5cdd1f8bbcce22fb4a14df4389f96e2db04eaa.json
+++ b/tests/integration/responses/recordings/d2cf88feac32878a5072007ed404bbc48650109698c6d0ac664a92426b242301.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_multi_turn_and_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/448f5aad2604000a0616341f2a97086fef7087f561fa24e21b5f665b04cebff2.json b/tests/integration/responses/recordings/d490ef38aa100255e6031998088783380dbe25ba782d6e71e1ff3544628c06ab.json
similarity index 99%
rename from tests/integration/responses/recordings/448f5aad2604000a0616341f2a97086fef7087f561fa24e21b5f665b04cebff2.json
rename to tests/integration/responses/recordings/d490ef38aa100255e6031998088783380dbe25ba782d6e71e1ff3544628c06ab.json
index b1ada77efa..16b936f4e4 100644
--- a/tests/integration/responses/recordings/448f5aad2604000a0616341f2a97086fef7087f561fa24e21b5f665b04cebff2.json
+++ b/tests/integration/responses/recordings/d490ef38aa100255e6031998088783380dbe25ba782d6e71e1ff3544628c06ab.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_non_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/723ec06a9462e87536dbfaeb2b9396943dc3ea635425d9a1262212a1ccec2910.json b/tests/integration/responses/recordings/d508c01d0d6670167da0e8758e9ee46fcde6afb092f0b5b57da9fb1c40b3d506.json
similarity index 98%
rename from tests/integration/responses/recordings/723ec06a9462e87536dbfaeb2b9396943dc3ea635425d9a1262212a1ccec2910.json
rename to tests/integration/responses/recordings/d508c01d0d6670167da0e8758e9ee46fcde6afb092f0b5b57da9fb1c40b3d506.json
index 5c6a486afc..b1cd3601a6 100644
--- a/tests/integration/responses/recordings/723ec06a9462e87536dbfaeb2b9396943dc3ea635425d9a1262212a1ccec2910.json
+++ b/tests/integration/responses/recordings/d508c01d0d6670167da0e8758e9ee46fcde6afb092f0b5b57da9fb1c40b3d506.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/9ecdff3fb145793283aa9233afb27e687c1aa26f93586f5a624b7a78c4c15c3c.json b/tests/integration/responses/recordings/d510a5744713675c17f35596254f6704f32c46c11a85e585906992614239da54.json
similarity index 95%
rename from tests/integration/responses/recordings/9ecdff3fb145793283aa9233afb27e687c1aa26f93586f5a624b7a78c4c15c3c.json
rename to tests/integration/responses/recordings/d510a5744713675c17f35596254f6704f32c46c11a85e585906992614239da54.json
index 7d7b3f6d34..e0bbca0dc2 100644
--- a/tests/integration/responses/recordings/9ecdff3fb145793283aa9233afb27e687c1aa26f93586f5a624b7a78c4c15c3c.json
+++ b/tests/integration/responses/recordings/d510a5744713675c17f35596254f6704f32c46c11a85e585906992614239da54.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_responses_errors.py::TestResponsesAPIStreamingErrors::test_non_vision_model_with_base64_image_returns_server_error[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/06653e6e4563eb378ee18044c32cd36e1ace560b2ab90d84c901b67dde0827f8.json b/tests/integration/responses/recordings/d6784a627287aef1c3171bdbf1d4bdb3523bd9c5b73088a745c431dc2514ea16.json
similarity index 99%
rename from tests/integration/responses/recordings/06653e6e4563eb378ee18044c32cd36e1ace560b2ab90d84c901b67dde0827f8.json
rename to tests/integration/responses/recordings/d6784a627287aef1c3171bdbf1d4bdb3523bd9c5b73088a745c431dc2514ea16.json
index b5d6c97404..75c1947e9b 100644
--- a/tests/integration/responses/recordings/06653e6e4563eb378ee18044c32cd36e1ace560b2ab90d84c901b67dde0827f8.json
+++ b/tests/integration/responses/recordings/d6784a627287aef1c3171bdbf1d4bdb3523bd9c5b73088a745c431dc2514ea16.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/95d123bff66974b73beabe29f92030b8a24ebda6f06e83ed31b9a0749c60199e.json b/tests/integration/responses/recordings/d7b50078103e88d20c4955ba23c55193ed0cc70e43e577cbb0a3dec164e250c4.json
similarity index 98%
rename from tests/integration/responses/recordings/95d123bff66974b73beabe29f92030b8a24ebda6f06e83ed31b9a0749c60199e.json
rename to tests/integration/responses/recordings/d7b50078103e88d20c4955ba23c55193ed0cc70e43e577cbb0a3dec164e250c4.json
index 1c9db0929b..68f7c36128 100644
--- a/tests/integration/responses/recordings/95d123bff66974b73beabe29f92030b8a24ebda6f06e83ed31b9a0749c60199e.json
+++ b/tests/integration/responses/recordings/d7b50078103e88d20c4955ba23c55193ed0cc70e43e577cbb0a3dec164e250c4.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_no_variables[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/eebe9f0ac51599aa9d0f81b0d0a8b2a279054d2263d728fc1eb0b6250004b339.json b/tests/integration/responses/recordings/d89ee3f0b9b9ae608fd6bfd7efa57378f61a28811513ccddddffc77ff05a0417.json
similarity index 98%
rename from tests/integration/responses/recordings/eebe9f0ac51599aa9d0f81b0d0a8b2a279054d2263d728fc1eb0b6250004b339.json
rename to tests/integration/responses/recordings/d89ee3f0b9b9ae608fd6bfd7efa57378f61a28811513ccddddffc77ff05a0417.json
index 15df46028a..0db0c69e13 100644
--- a/tests/integration/responses/recordings/eebe9f0ac51599aa9d0f81b0d0a8b2a279054d2263d728fc1eb0b6250004b339.json
+++ b/tests/integration/responses/recordings/d89ee3f0b9b9ae608fd6bfd7efa57378f61a28811513ccddddffc77ff05a0417.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/3d42f101f95388471691e0c8d8c1a37a67a2234432874a79b684d8bd40f73eff.json b/tests/integration/responses/recordings/e779e2713de2d91ee559033a5e77397d77d36726335d9c6c1b22e5c0ad1c22ac.json
similarity index 98%
rename from tests/integration/responses/recordings/3d42f101f95388471691e0c8d8c1a37a67a2234432874a79b684d8bd40f73eff.json
rename to tests/integration/responses/recordings/e779e2713de2d91ee559033a5e77397d77d36726335d9c6c1b22e5c0ad1c22ac.json
index 17f2fac8a9..1cb39b7331 100644
--- a/tests/integration/responses/recordings/3d42f101f95388471691e0c8d8c1a37a67a2234432874a79b684d8bd40f73eff.json
+++ b/tests/integration/responses/recordings/e779e2713de2d91ee559033a5e77397d77d36726335d9c6c1b22e5c0ad1c22ac.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/2eafd5c6eb8e2cff3433007bf23b19e6d66547b2dcab1b1892d5bd53f8ad34b4.json b/tests/integration/responses/recordings/ecb3bc2e13c604f0003308df4fdb94149395c39cb195974f2893fbf340e50589.json
similarity index 95%
rename from tests/integration/responses/recordings/2eafd5c6eb8e2cff3433007bf23b19e6d66547b2dcab1b1892d5bd53f8ad34b4.json
rename to tests/integration/responses/recordings/ecb3bc2e13c604f0003308df4fdb94149395c39cb195974f2893fbf340e50589.json
index 7a7249a3c3..c06fda78f5 100644
--- a/tests/integration/responses/recordings/2eafd5c6eb8e2cff3433007bf23b19e6d66547b2dcab1b1892d5bd53f8ad34b4.json
+++ b/tests/integration/responses/recordings/ecb3bc2e13c604f0003308df4fdb94149395c39cb195974f2893fbf340e50589.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_responses_errors.py::TestResponsesAPIStreamingErrors::test_non_vision_model_returns_error_for_image_input[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/f7ba1ce7bbeef963f2002b812fd07386cecf9179bfc6f92a2e09348edbfd7f07.json b/tests/integration/responses/recordings/ee3d5360f48357a1f00d8fc9f1b01844fcb08bf9ff738f34d7f915514173f2c4.json
similarity index 99%
rename from tests/integration/responses/recordings/f7ba1ce7bbeef963f2002b812fd07386cecf9179bfc6f92a2e09348edbfd7f07.json
rename to tests/integration/responses/recordings/ee3d5360f48357a1f00d8fc9f1b01844fcb08bf9ff738f34d7f915514173f2c4.json
index fe4ed13ed2..8b7482585a 100644
--- a/tests/integration/responses/recordings/f7ba1ce7bbeef963f2002b812fd07386cecf9179bfc6f92a2e09348edbfd7f07.json
+++ b/tests/integration/responses/recordings/ee3d5360f48357a1f00d8fc9f1b01844fcb08bf9ff738f34d7f915514173f2c4.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/2a08014e5c5da31c16896c0e59ea432e71eb8eb83a3cf25a3e4ab7fa5700bd6b.json b/tests/integration/responses/recordings/f4770c02fdb697b36717c6713dacd87b12b16b020611dd32866110cbf0c699d9.json
similarity index 98%
rename from tests/integration/responses/recordings/2a08014e5c5da31c16896c0e59ea432e71eb8eb83a3cf25a3e4ab7fa5700bd6b.json
rename to tests/integration/responses/recordings/f4770c02fdb697b36717c6713dacd87b12b16b020611dd32866110cbf0c699d9.json
index 5670d23ef1..ae54912b15 100644
--- a/tests/integration/responses/recordings/2a08014e5c5da31c16896c0e59ea432e71eb8eb83a3cf25a3e4ab7fa5700bd6b.json
+++ b/tests/integration/responses/recordings/f4770c02fdb697b36717c6713dacd87b12b16b020611dd32866110cbf0c699d9.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_reasoning.py::test_reasoning_basic_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/b4d86c5231fc155339edbcb937b8e91a4be8c2c171a49120991e0eab9582b806.json b/tests/integration/responses/recordings/f91a97f69204421e717fc7d7660ea2123789abe1d080020cbe97402d51e074eb.json
similarity index 98%
rename from tests/integration/responses/recordings/b4d86c5231fc155339edbcb937b8e91a4be8c2c171a49120991e0eab9582b806.json
rename to tests/integration/responses/recordings/f91a97f69204421e717fc7d7660ea2123789abe1d080020cbe97402d51e074eb.json
index 26d44c5072..ef8065854c 100644
--- a/tests/integration/responses/recordings/b4d86c5231fc155339edbcb937b8e91a4be8c2c171a49120991e0eab9582b806.json
+++ b/tests/integration/responses/recordings/f91a97f69204421e717fc7d7660ea2123789abe1d080020cbe97402d51e074eb.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_basic_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/49aa949715243abb9094994f88b7fdc37a54757ec4c6579e6640d4e62d148c87.json b/tests/integration/responses/recordings/fd5280a00f415baa514839d1903263035e94bea9462e613798c48a8c39697217.json
similarity index 98%
rename from tests/integration/responses/recordings/49aa949715243abb9094994f88b7fdc37a54757ec4c6579e6640d4e62d148c87.json
rename to tests/integration/responses/recordings/fd5280a00f415baa514839d1903263035e94bea9462e613798c48a8c39697217.json
index ab6e7f5468..e38ff0baf4 100644
--- a/tests/integration/responses/recordings/49aa949715243abb9094994f88b7fdc37a54757ec4c6579e6640d4e62d148c87.json
+++ b/tests/integration/responses/recordings/fd5280a00f415baa514839d1903263035e94bea9462e613798c48a8c39697217.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_context_loading[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/models-ce53c4fcbb40594a10b3d758a338cccbd471bba93e04d38264d663c6dcec8578-31ec6c83.json b/tests/integration/responses/recordings/models-2f9eb79dbfa9f25cae2fdc739d4358a242c429f68d4fb635ddbeeb55595b7027-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-ce53c4fcbb40594a10b3d758a338cccbd471bba93e04d38264d663c6dcec8578-31ec6c83.json
rename to tests/integration/responses/recordings/models-2f9eb79dbfa9f25cae2fdc739d4358a242c429f68d4fb635ddbeeb55595b7027-31ec6c83.json
index 5d2b85f526..2003c2f1db 100644
--- a/tests/integration/responses/recordings/models-ce53c4fcbb40594a10b3d758a338cccbd471bba93e04d38264d663c6dcec8578-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-2f9eb79dbfa9f25cae2fdc739d4358a242c429f68d4fb635ddbeeb55595b7027-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_reasoning.py::test_reasoning_basic_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/responses/recordings/models-d487ac38a1f3f3bb9ebc5254525862e9fcefdf01d5184789f6ba4b3039dc17ba-31ec6c83.json b/tests/integration/responses/recordings/models-48bcf1eca39637c8a9767f122e10ed8a6421f340543238789bbbe8c516b79114-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-d487ac38a1f3f3bb9ebc5254525862e9fcefdf01d5184789f6ba4b3039dc17ba-31ec6c83.json
rename to tests/integration/responses/recordings/models-48bcf1eca39637c8a9767f122e10ed8a6421f340543238789bbbe8c516b79114-31ec6c83.json
index 7ed290b164..e95bcad895 100644
--- a/tests/integration/responses/recordings/models-d487ac38a1f3f3bb9ebc5254525862e9fcefdf01d5184789f6ba4b3039dc17ba-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-48bcf1eca39637c8a9767f122e10ed8a6421f340543238789bbbe8c516b79114-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/responses/recordings/models-0535aa1a1942a5b3191f8d08fb57457ef937494a6e6cb46918fd196f77016145-31ec6c83.json b/tests/integration/responses/recordings/models-c7239f3a8bc25af23106c20d3a0844f33356aaddf6594e08964cde42375f54dd-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-0535aa1a1942a5b3191f8d08fb57457ef937494a6e6cb46918fd196f77016145-31ec6c83.json
rename to tests/integration/responses/recordings/models-c7239f3a8bc25af23106c20d3a0844f33356aaddf6594e08964cde42375f54dd-31ec6c83.json
index 0090cedb61..2c2d94637b 100644
--- a/tests/integration/responses/recordings/models-0535aa1a1942a5b3191f8d08fb57457ef937494a6e6cb46918fd196f77016145-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-c7239f3a8bc25af23106c20d3a0844f33356aaddf6594e08964cde42375f54dd-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_with_web_search[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/responses/recordings/models-03ffd7273c9bf0c84abd4750391c19dfc7545772caacd9bdd02c953c5c3fb6cd-31ec6c83.json b/tests/integration/responses/recordings/models-e3014308c84bda792e9ce27641b08ece821283fca29f3ced3ffbff35583d1f9b-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-03ffd7273c9bf0c84abd4750391c19dfc7545772caacd9bdd02c953c5c3fb6cd-31ec6c83.json
rename to tests/integration/responses/recordings/models-e3014308c84bda792e9ce27641b08ece821283fca29f3ced3ffbff35583d1f9b-31ec6c83.json
index 7426212fe6..b223fe0158 100644
--- a/tests/integration/responses/recordings/models-03ffd7273c9bf0c84abd4750391c19dfc7545772caacd9bdd02c953c5c3fb6cd-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-e3014308c84bda792e9ce27641b08ece821283fca29f3ced3ffbff35583d1f9b-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_with_function_tools[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/responses/recordings/models-ffb55818ff4f53a50d044fbf570e18ed249a0245bfe68a7e91eed69ccf5f2193-31ec6c83.json b/tests/integration/responses/recordings/models-e400deaab35580927e2926f5b0d13b37ce15a07c9c0a13e6ecc80ea805e1d61b-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-ffb55818ff4f53a50d044fbf570e18ed249a0245bfe68a7e91eed69ccf5f2193-31ec6c83.json
rename to tests/integration/responses/recordings/models-e400deaab35580927e2926f5b0d13b37ce15a07c9c0a13e6ecc80ea805e1d61b-31ec6c83.json
index f8b2bf499f..2ef3e42b7c 100644
--- a/tests/integration/responses/recordings/models-ffb55818ff4f53a50d044fbf570e18ed249a0245bfe68a7e91eed69ccf5f2193-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-e400deaab35580927e2926f5b0d13b37ce15a07c9c0a13e6ecc80ea805e1d61b-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_extra_body_guided_choice[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/responses/recordings/models-879ae66c787975780a21251d2755da3ba56850f94c78577db27ba1e5ff8a95e8-31ec6c83.json b/tests/integration/responses/recordings/models-fca7382830039be10bc959fcbb2299c4835921a886e43c02c4f68ff4872e3024-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-879ae66c787975780a21251d2755da3ba56850f94c78577db27ba1e5ff8a95e8-31ec6c83.json
rename to tests/integration/responses/recordings/models-fca7382830039be10bc959fcbb2299c4835921a886e43c02c4f68ff4872e3024-31ec6c83.json
index fed332b52f..b5099e8a63 100644
--- a/tests/integration/responses/recordings/models-879ae66c787975780a21251d2755da3ba56850f94c78577db27ba1e5ff8a95e8-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-fca7382830039be10bc959fcbb2299c4835921a886e43c02c4f68ff4872e3024-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_non_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/unit/providers/inference/bedrock/test_openai_sdk_integration.py b/tests/unit/providers/inference/bedrock/test_openai_sdk_integration.py
new file mode 100644
index 0000000000..b11c8ccb40
--- /dev/null
+++ b/tests/unit/providers/inference/bedrock/test_openai_sdk_integration.py
@@ -0,0 +1,154 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Tests for Bedrock OpenAI SDK integration with SigV4 authentication.
+
+These tests verify:
+1. Base URL uses bedrock-runtime hostname
+2. SigV4 signing uses "bedrock" as the service name (NOT "bedrock-runtime")
+3. In SigV4 mode, no Bearer Authorization header is present
+4. STS credentials work properly with temporary tokens
+"""
+
+import importlib.util
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+
+HAS_BOTO3 = importlib.util.find_spec("boto3") is not None
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+class TestBedrockOpenAISDKIntegration:
+    """Tests for Bedrock OpenAI SDK integration with SigV4 auth."""
+
+    def test_base_url_uses_bedrock_runtime_hostname(self):
+        """Base URL should use bedrock-runtime hostname (endpoint prefix)."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        base_url = adapter.get_base_url()
+        # Hostname uses "bedrock-runtime" (endpoint prefix)
+        assert base_url == "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1"
+
+    def test_sigv4_uses_bedrock_signing_name_not_bedrock_runtime(self):
+        """
+        SigV4 signing must use 'bedrock' as the service name, NOT 'bedrock-runtime'.
+
+        The hostname is bedrock-runtime.<region>.amazonaws.com (endpoint prefix),
+        but the SigV4 credential scope uses the signing name 'bedrock'.
+        This is defined in botocore's service metadata.
+        """
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-west-2")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth_cls:
+            mock_auth_cls.return_value = MagicMock()
+            adapter._build_sigv4_http_client()
+
+            # Verify signing name is "bedrock", NOT "bedrock-runtime"
+            call_kwargs = mock_auth_cls.call_args[1]
+            assert call_kwargs["service"] == "bedrock", (
+                "SigV4 must use signing name 'bedrock', not endpoint prefix 'bedrock-runtime'"
+            )
+
+    def test_sigv4_mode_uses_placeholder_api_key(self):
+        """In SigV4 mode, api_key should be a placeholder (SigV4 auth replaces the header)."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            # Patch SigV4Auth to avoid actual boto3 calls
+            with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth_cls:
+                mock_auth = MagicMock()
+                mock_auth_cls.return_value = mock_auth
+
+                client = adapter.client
+
+                # OpenAI SDK requires a non-empty api_key for validation.
+                # We use a placeholder that SigV4 auth replaces with proper signature.
+                # This follows the same pattern as the OCI provider.
+                assert client.api_key == "<NOTUSED>"
+
+    def test_sigv4_authorization_header_format(self):
+        """SigV4 Authorization header should start with AWS4-HMAC-SHA256, not Bearer."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            # Use "bedrock" signing name (correct)
+            auth = BedrockSigV4Auth(region="us-west-2", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+        auth_header = captured_request.headers.get("authorization", "")
+
+        # Must be SigV4, NOT Bearer
+        assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}"
+        assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer"
+
+    def test_sts_credentials_include_security_token(self):
+        """SigV4 auth should include x-amz-security-token for STS credentials."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = "AQoDYXdzEJr...<remainder of security token>"
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            # Use "bedrock" signing name (correct)
+            auth = BedrockSigV4Auth(region="us-west-2", service="bedrock")
+
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions",
+                headers={"content-type": "application/json"},
+                content=b'{"model": "test"}',
+            )
+
+            gen = auth.auth_flow(request)
+            signed_request = next(gen)
+
+            # Verify session token header is present for STS credentials
+            assert "x-amz-security-token" in signed_request.headers
+            assert signed_request.headers["x-amz-security-token"] == mock_frozen_creds.token
diff --git a/tests/unit/providers/inference/bedrock/test_sigv4_auth.py b/tests/unit/providers/inference/bedrock/test_sigv4_auth.py
new file mode 100644
index 0000000000..a345247e00
--- /dev/null
+++ b/tests/unit/providers/inference/bedrock/test_sigv4_auth.py
@@ -0,0 +1,885 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Unit tests for Bedrock SigV4 authentication.
+
+These tests verify:
+1. SigV4 auth handler correctly signs requests
+2. Auth mode detection (bearer vs SigV4)
+3. Credential chain integration
+4. Error handling
+"""
+
+# Check if boto3 is available for SigV4 tests
+import importlib.util
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+
+HAS_BOTO3 = importlib.util.find_spec("boto3") is not None
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+class TestBedrockSigV4Auth:
+    """Tests for BedrockSigV4Auth httpx.Auth implementation."""
+
+    def test_auth_flow_signs_request(self):
+        """SigV4 auth should add AWS signature headers to request."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        # Mock boto3 credentials
+        mock_creds = MagicMock()
+        mock_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_creds.token = None
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = mock_creds.access_key
+        mock_frozen_creds.secret_key = mock_creds.secret_key
+        mock_frozen_creds.token = mock_creds.token
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+
+            # Create a test request
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                headers={"content-type": "application/json"},
+                content=b'{"model": "test"}',
+            )
+
+            # Run auth flow
+            gen = auth.auth_flow(request)
+            signed_request = next(gen)
+
+            # Verify SigV4 headers were added
+            assert "authorization" in signed_request.headers
+            assert "x-amz-date" in signed_request.headers
+            assert "AWS4-HMAC-SHA256" in signed_request.headers["authorization"]
+
+    def test_auth_flow_with_explicit_role_assumption(self):
+        """SigV4 auth should use RefreshableBotoSession when role_arn is provided."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAEXP_ROLE_KEY"
+        mock_frozen_creds.secret_key = "exp_secret"
+        mock_frozen_creds.token = "exp_token"
+
+        with patch(
+            "llama_stack.providers.utils.bedrock.refreshable_boto_session.RefreshableBotoSession"
+        ) as mock_refreshable_cls:
+            mock_refreshable = MagicMock()
+            mock_refreshable_cls.return_value = mock_refreshable
+            mock_session = MagicMock()
+            mock_refreshable.refreshable_session.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(
+                region="us-east-1",
+                aws_role_arn="arn:aws:iam::123456789012:role/test-role",
+                aws_web_identity_token_file="/path/to/token",
+                aws_role_session_name="test-session",
+            )
+
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                content=b"{}",
+            )
+
+            gen = auth.auth_flow(request)
+            signed_request = next(gen)
+
+            # Verify RefreshableBotoSession was called with correct args
+            mock_refreshable_cls.assert_called_once_with(
+                region_name="us-east-1",
+                aws_access_key_id=None,
+                aws_secret_access_key=None,
+                aws_session_token=None,
+                profile_name=None,
+                sts_arn="arn:aws:iam::123456789012:role/test-role",
+                web_identity_token_file="/path/to/token",
+                session_name="test-session",
+                session_ttl=3600,
+            )
+            assert signed_request.headers["x-amz-security-token"] == "exp_token"
+
+    def test_auth_flow_with_session_token(self):
+        """SigV4 auth should include X-Amz-Security-Token for STS credentials."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = "FwoGZXIvYXdzEBYaDG..."  # STS session token
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-west-2", service="bedrock")
+
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions",
+                headers={"content-type": "application/json"},
+                content=b'{"model": "test"}',
+            )
+
+            gen = auth.auth_flow(request)
+            signed_request = next(gen)
+
+            # Verify session token header is present
+            assert "x-amz-security-token" in signed_request.headers
+            assert signed_request.headers["x-amz-security-token"] == mock_frozen_creds.token
+
+    def test_auth_raises_on_missing_credentials(self):
+        """SigV4 auth should raise clear error when credentials unavailable."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value = None
+
+            auth = BedrockSigV4Auth(region="us-east-1")
+
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                content=b"{}",
+            )
+
+            with pytest.raises(RuntimeError, match="Failed to load AWS credentials"):
+                gen = auth.auth_flow(request)
+                next(gen)
+
+
+class TestBedrockConfigAuthDetection:
+    """Tests for BedrockConfig auth mode detection."""
+
+    def test_has_bearer_token_with_token(self):
+        """Config should detect when bearer token is present."""
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        # Use api_key as that's the alias for auth_credential
+        config = BedrockConfig(api_key="my-bearer-token")
+        assert config.has_bearer_token() is True
+
+    def test_has_bearer_token_without_token(self):
+        """Config should detect when bearer token is absent."""
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig()
+        assert config.has_bearer_token() is False
+
+    def test_has_bearer_token_with_empty_string(self):
+        """Empty string should be treated as no token."""
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(api_key="")
+        assert config.has_bearer_token() is False
+
+    def test_has_bearer_token_with_whitespace(self):
+        """Whitespace-only string should be treated as no token."""
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(api_key="   ")
+        assert config.has_bearer_token() is False
+
+
+class TestBedrockInferenceAdapterAuthMode:
+    """Tests for BedrockInferenceAdapter auth mode selection."""
+
+    def test_should_use_sigv4_when_no_bearer_token(self):
+        """Adapter should use SigV4 when no bearer token configured."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        # Mock get_request_provider_data to return None
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            assert adapter._should_use_sigv4() is True
+
+    def test_should_not_use_sigv4_when_bearer_token_in_config(self):
+        """Adapter should use bearer auth when token in config."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(
+            region_name="us-east-1",
+            api_key="my-bearer-token",  # Use api_key alias
+        )
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            assert adapter._should_use_sigv4() is False
+
+    def test_should_not_use_sigv4_when_bearer_token_in_provider_data(self):
+        """Adapter should use bearer auth when token in provider data."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import (
+            BedrockConfig,
+            BedrockProviderDataValidator,
+        )
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        provider_data = BedrockProviderDataValidator(aws_bearer_token_bedrock="per-request-token")
+        with patch.object(adapter, "get_request_provider_data", return_value=provider_data):
+            assert adapter._should_use_sigv4() is False
+
+    def test_get_extra_client_params_skips_sigv4_client_when_bearer_override(self):
+        """Per-request bearer token override must not be silently discarded by the SigV4 client.
+
+        When the server starts in SigV4 mode (_sigv4_http_client is not None) but a request
+        arrives with aws_bearer_token_bedrock in provider data, get_extra_client_params()
+        must return {} so the OpenAI SDK uses the bearer token instead of SigV4 auth.
+        """
+        from unittest.mock import MagicMock
+
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import (
+            BedrockConfig,
+            BedrockProviderDataValidator,
+        )
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        # Simulate that initialize() already built the SigV4 client
+        adapter._sigv4_http_client = MagicMock()
+
+        # Per-request bearer token override in provider data
+        provider_data = BedrockProviderDataValidator(aws_bearer_token_bedrock="per-request-token")
+        with patch.object(adapter, "get_request_provider_data", return_value=provider_data):
+            params = adapter.get_extra_client_params()
+            # Must return {} — the bearer token path must not receive the SigV4 http_client,
+            # which would strip and replace the Authorization header
+            assert params == {}
+
+    def test_get_extra_client_params_uses_sigv4_client_when_no_override(self):
+        """SigV4 client is returned when no per-request bearer token is present."""
+        from unittest.mock import MagicMock
+
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+        mock_client = MagicMock()
+        adapter._sigv4_http_client = mock_client
+
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            params = adapter.get_extra_client_params()
+            assert params == {"http_client": mock_client}
+
+    def test_should_use_sigv4_when_provider_data_token_is_whitespace(self):
+        """Adapter should use SigV4 when provider data token is whitespace-only."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import (
+            BedrockConfig,
+            BedrockProviderDataValidator,
+        )
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        # Whitespace-only token should be treated as no token (use SigV4)
+        provider_data = BedrockProviderDataValidator(aws_bearer_token_bedrock="   ")
+        with patch.object(adapter, "get_request_provider_data", return_value=provider_data):
+            assert adapter._should_use_sigv4() is True
+
+    def test_get_api_key_returns_placeholder_for_sigv4(self):
+        """When using SigV4, get_api_key should return placeholder to satisfy OpenAIMixin validation."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            api_key = adapter.get_api_key()
+            # Placeholder satisfies OpenAIMixin validation; SigV4 auth handler replaces
+            # the Bearer header with proper SigV4 signature (OCI pattern)
+            assert api_key == "<NOTUSED>"
+
+    @pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+    def test_client_uses_sigv4_auth_when_no_bearer_token(self):
+        """_build_sigv4_http_client should use correct service name and pass config fields."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-west-2")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth_cls:
+            mock_auth_cls.return_value = MagicMock()
+            adapter._build_sigv4_http_client()
+
+            # Verify auth was created with correct service name ("bedrock", not "bedrock-runtime")
+            call_kwargs = mock_auth_cls.call_args[1]
+            assert call_kwargs["region"] == "us-west-2"
+            assert call_kwargs["service"] == "bedrock"
+
+    @pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+    def test_sigv4_http_client_cached_after_initialize(self):
+        """_sigv4_http_client should be created once in initialize() and reused."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch.object(adapter, "_build_sigv4_http_client") as mock_build:
+            mock_build.return_value = MagicMock()
+
+            # Simulate initialize() — called once
+            adapter._sigv4_http_client = adapter._build_sigv4_http_client()
+            assert mock_build.call_count == 1
+
+            # get_extra_client_params reuses the cached client, does NOT rebuild
+            with patch.object(adapter, "get_request_provider_data", return_value=None):
+                adapter.get_extra_client_params()
+            assert mock_build.call_count == 1  # still 1, not 2
+
+
+class TestBedrockInferenceAdapterAuthErrors:
+    """Tests for user-facing auth error handling."""
+
+    def test_sigv4_auth_error_preserves_detail_in_internal_server_error(self):
+        """SigV4 auth failures should return a clear, generic 500 message."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+        from llama_stack_api.common.errors import InternalServerError
+
+        adapter = BedrockInferenceAdapter(config=BedrockConfig(region_name="us-east-1"))
+
+        with pytest.raises(InternalServerError) as exc_info:
+            adapter._handle_auth_error(
+                "request signed with invalid credentials",
+                RuntimeError("provider boom"),
+                use_sigv4=True,
+            )
+
+        message = str(exc_info.value)
+        assert (
+            message
+            == "Authentication failed because the configured cloud credentials could not authorize this request. "
+            "Please verify that the credentials available to the server are valid, unexpired, and allowed to access the requested model."
+        )
+        assert "AWS_ROLE_ARN" not in message
+        assert "Bedrock" not in message
+
+    def test_bearer_auth_error_preserves_detail_in_internal_server_error(self):
+        """Bearer auth failures should be actionable without exposing internal header/config details."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+        from llama_stack_api.common.errors import InternalServerError
+
+        adapter = BedrockInferenceAdapter(config=BedrockConfig(region_name="us-east-1"))
+
+        with pytest.raises(InternalServerError) as exc_info:
+            adapter._handle_auth_error(
+                "Error code: 401 - invalid api key format",
+                RuntimeError("provider boom"),
+                use_sigv4=False,
+            )
+
+        message = str(exc_info.value)
+        assert (
+            message == "Authentication failed because the provided request credential was rejected. "
+            "Please verify that the credential is valid, unexpired, and authorized for this request."
+        )
+        assert "x-llamastack-provider-data" not in message
+        assert "Bedrock" not in message
+
+    def test_expired_bearer_auth_error_preserves_sanitized_detail(self):
+        """Expired bearer auth failures should stay actionable without exposing config names."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+        from llama_stack_api.common.errors import InternalServerError
+
+        adapter = BedrockInferenceAdapter(config=BedrockConfig(region_name="us-east-1"))
+
+        with pytest.raises(InternalServerError) as exc_info:
+            adapter._handle_auth_error(
+                "Bearer Token has expired",
+                RuntimeError("provider boom"),
+                use_sigv4=False,
+            )
+
+        message = str(exc_info.value)
+        assert (
+            message == "Authentication failed because the provided request credential has expired. "
+            "Please refresh the credential and try again, or remove it so the server can use its configured cloud credentials."
+        )
+        assert "AWS_BEARER_TOKEN_BEDROCK" not in message
+        assert "Bedrock" not in message
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+class TestSigV4MockTransport:
+    """Integration-style tests using httpx.MockTransport to verify SigV4 signing."""
+
+    def test_sigv4_adds_aws4_signature_header(self):
+        """SigV4 auth should add AWS4-HMAC-SHA256 Authorization header."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        # Track the request that gets sent
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+        auth_header = captured_request.headers.get("authorization", "")
+
+        # Verify SigV4 signature format
+        assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}"
+        assert "Credential=" in auth_header
+        assert "SignedHeaders=" in auth_header
+        assert "Signature=" in auth_header
+
+        # Verify NO Bearer token is present
+        assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer token"
+
+    def test_sigv4_no_bearer_header_when_empty_api_key(self):
+        """When api_key is empty, no Bearer header should be added."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+        auth_header = captured_request.headers.get("authorization", "")
+
+        # Authorization header should be SigV4, not Bearer
+        assert "AWS4-HMAC-SHA256" in auth_header
+        assert "Bearer" not in auth_header
+
+    def test_sigv4_includes_security_token_for_sts(self):
+        """SigV4 auth should include x-amz-security-token for STS credentials."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = "FwoGZXIvYXdzEBYaDGTestSessionToken"
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-west-2", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+
+        # Verify security token header is present for STS credentials
+        assert "x-amz-security-token" in captured_request.headers
+        assert captured_request.headers["x-amz-security-token"] == mock_frozen_creds.token
+
+    def test_sigv4_replaces_existing_bearer_header(self):
+        """SigV4 auth should replace any existing Bearer Authorization header."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                # Simulate what OpenAI SDK does: add Bearer <NOTUSED> header
+                client.post(
+                    "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                    headers={"Authorization": "Bearer <NOTUSED>"},
+                )
+
+        assert captured_request is not None
+        auth_header = captured_request.headers.get("authorization", "")
+
+        # Verify SigV4 replaced the Bearer header (not appended)
+        assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}"
+        assert "Bearer" not in auth_header, "SigV4 auth should have replaced Bearer header"
+        assert "<NOTUSED>" not in auth_header, "Placeholder should be removed"
+
+    def test_sigv4_host_header_includes_port(self):
+        """Host header should include port for non-default ports."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            # Use non-default port
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://localhost:8443/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+
+        # Verify the Host header includes the port
+        host_header = captured_request.headers.get("host", "")
+        assert host_header == "localhost:8443", f"Expected host with port, got: {host_header}"
+
+        # The signed Authorization header should include host in SignedHeaders
+        auth_header = captured_request.headers.get("authorization", "")
+        assert "host" in auth_header.lower()
+
+        # Verify SigV4 signature format and no Bearer token
+        assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}"
+        assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer token"
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+class TestWebIdentityFederation:
+    """
+    Tests for Web Identity Federation (IRSA, GitHub Actions OIDC).
+
+    These tests verify that SigV4 auth works correctly with temporary credentials
+    obtained via AssumeRoleWithWebIdentity, as used in:
+    - Kubernetes/OpenShift with IRSA (IAM Roles for Service Accounts)
+    - GitHub Actions with OIDC (aws-actions/configure-aws-credentials)
+    """
+
+    def test_web_identity_credentials_include_session_token(self):
+        """
+        Web identity credentials should include x-amz-security-token header.
+
+        When using IRSA or GitHub Actions OIDC, boto3 calls AssumeRoleWithWebIdentity
+        which returns temporary credentials with a session token. This token must
+        be included in the x-amz-security-token header for the request to succeed.
+        """
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        # Simulate credentials from AssumeRoleWithWebIdentity
+        # Note: ASIA prefix indicates temporary credentials (vs AKIA for static)
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAQWERTYUIOPASDFGH"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYzxcvbnm123"
+        mock_frozen_creds.token = "IQoJb3JpZ2luX2VjEBYaCXVzLWVhc3QtMSJHMEUCIQDExample..."  # STS session token
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-2", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions",
+                    json={
+                        "model": "us.meta.llama3-2-1b-instruct-v1:0",
+                        "messages": [{"role": "user", "content": "Hi"}],
+                    },
+                )
+
+        assert captured_request is not None
+
+        # Verify STS session token is included
+        assert "x-amz-security-token" in captured_request.headers
+        assert captured_request.headers["x-amz-security-token"] == mock_frozen_creds.token
+
+        # Verify SigV4 signature is present and valid format
+        auth_header = captured_request.headers.get("authorization", "")
+        assert auth_header.startswith("AWS4-HMAC-SHA256")
+        assert "Credential=ASIAQWERTYUIOPASDFGH" in auth_header
+        assert "bedrock/aws4_request" in auth_header
+
+        # Verify no Bearer token (would conflict with SigV4)
+        assert "Bearer" not in auth_header
+
+    def test_adapter_uses_sigv4_with_web_identity_env(self, monkeypatch):
+        """
+        BedrockInferenceAdapter should use SigV4 when web identity env vars are set.
+
+        This simulates the Kubernetes/GitHub Actions scenario where no bearer token
+        is configured but AWS credentials are available via web identity federation.
+        """
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        # Set web identity environment variables
+        monkeypatch.setenv("AWS_ROLE_ARN", "arn:aws:iam::123456789012:role/test-role")
+        monkeypatch.setenv("AWS_WEB_IDENTITY_TOKEN_FILE", "/var/run/secrets/token")
+        monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-2")
+
+        # Create adapter without bearer token (should trigger SigV4)
+        config = BedrockConfig(region_name="us-east-2")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            # Should use SigV4 since no bearer token is configured
+            assert adapter._should_use_sigv4() is True
+
+            # API key should be placeholder to satisfy OpenAIMixin validation (OCI pattern)
+            # SigV4 auth handler replaces Bearer header with proper SigV4 signature
+            assert adapter.get_api_key() == "<NOTUSED>"
+
+    def test_credential_refresh_returns_fresh_credentials(self):
+        """
+        SigV4 auth should get fresh credentials on each request.
+
+        Web identity credentials are temporary and expire. boto3's credential
+        chain handles refresh automatically, but we need to call get_frozen_credentials()
+        on each request to get the current valid credentials.
+        """
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        call_count = 0
+        captured_requests = []
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            captured_requests.append(request)
+            return httpx.Response(200, json={"status": "ok"})
+
+        # Simulate credentials that change (as would happen after refresh)
+        initial_creds = MagicMock()
+        initial_creds.access_key = "ASIAFIRSTCREDENTIAL"
+        initial_creds.secret_key = "firstSecretKey123"
+        initial_creds.token = "firstSessionToken"
+
+        refreshed_creds = MagicMock()
+        refreshed_creds.access_key = "ASIASECONDCREDENTIAL"
+        refreshed_creds.secret_key = "secondSecretKey456"
+        refreshed_creds.token = "secondSessionToken"
+
+        def get_frozen_credentials():
+            nonlocal call_count
+            call_count += 1
+            # Return different credentials on second call (simulating refresh)
+            return initial_creds if call_count == 1 else refreshed_creds
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_creds = MagicMock()
+            mock_creds.get_frozen_credentials = get_frozen_credentials
+            mock_session.get_credentials.return_value = mock_creds
+
+            auth = BedrockSigV4Auth(region="us-east-2", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                # First request
+                client.post(
+                    "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+                # Second request (after simulated credential refresh)
+                client.post(
+                    "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert len(captured_requests) == 2
+
+        # First request should use initial credentials
+        first_auth = captured_requests[0].headers.get("authorization", "")
+        assert "ASIAFIRSTCREDENTIAL" in first_auth
+        assert captured_requests[0].headers.get("x-amz-security-token") == "firstSessionToken"
+
+        # Second request should use refreshed credentials
+        second_auth = captured_requests[1].headers.get("authorization", "")
+        assert "ASIASECONDCREDENTIAL" in second_auth
+        assert captured_requests[1].headers.get("x-amz-security-token") == "secondSessionToken"
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+class TestAsyncAuthFlow:
+    """Tests for async auth flow to verify non-blocking behavior."""
+
+    async def test_async_auth_flow_signs_request(self):
+        """Async auth flow should sign requests without blocking the event loop."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        async def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            async with httpx.AsyncClient(auth=auth, transport=transport) as client:
+                await client.post(
+                    "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+        auth_header = captured_request.headers.get("authorization", "")
+
+        # Verify SigV4 signature format
+        assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}"
+        assert "Credential=" in auth_header
+        assert "SignedHeaders=" in auth_header
+        assert "Signature=" in auth_header
+
+        # Verify NO Bearer token is present
+        assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer token"
+
+    async def test_async_auth_flow_includes_session_token(self):
+        """Async auth flow should include x-amz-security-token for STS credentials."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        async def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAQWERTYUIOPASDFGH"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYzxcvbnm123"
+        mock_frozen_creds.token = "IQoJb3JpZ2luX2VjAsyncTest..."
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-2", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            async with httpx.AsyncClient(auth=auth, transport=transport) as client:
+                await client.post(
+                    "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+
+        # Verify STS session token is included
+        assert "x-amz-security-token" in captured_request.headers
+        assert captured_request.headers["x-amz-security-token"] == mock_frozen_creds.token
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index ed3987e898..3ab886ed0b 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -13,7 +13,7 @@
 
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
-from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
+from llama_stack_api import InternalServerError, OpenAIChatCompletionRequestWithExtraBody
 
 
 def test_adapter_initialization():
@@ -28,7 +28,7 @@ def test_client_url_construction():
     config = BedrockConfig(api_key="test-key", region_name="us-west-2")
     adapter = BedrockInferenceAdapter(config=config)
 
-    assert adapter.get_base_url() == "https://bedrock-mantle.us-west-2.api.aws/v1"
+    assert adapter.get_base_url() == "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1"
 
 
 def test_api_key_from_config():
@@ -51,7 +51,7 @@ def test_api_key_from_header_overrides_config():
 
 
 async def test_authentication_error_handling():
-    """Test that AuthenticationError from OpenAI client is converted to ValueError with helpful message"""
+    """Authentication failures should surface as a sanitized InternalServerError."""
     config = BedrockConfig(api_key="invalid-key", region_name="us-east-1")
     adapter = BedrockInferenceAdapter(config=config)
 
@@ -68,14 +68,19 @@ async def test_authentication_error_handling():
     BedrockInferenceAdapter.__bases__[0].openai_chat_completion = mock_super
 
     try:
-        with pytest.raises(ValueError) as exc_info:
+        with pytest.raises(InternalServerError) as exc_info:
             params = OpenAIChatCompletionRequestWithExtraBody(
                 model="test-model", messages=[{"role": "user", "content": "test"}]
             )
             await adapter.openai_chat_completion(params=params)
 
-        assert "AWS Bedrock authentication failed" in str(exc_info.value)
-        assert "Please verify your API key" in str(exc_info.value)
+        message = str(exc_info.value)
+        assert (
+            message == "Authentication failed because the provided request credential was rejected. "
+            "Please verify that the credential is valid, unexpired, and authorized for this request."
+        )
+        assert "Bedrock" not in message
+        assert "x-llamastack-provider-data" not in message
     finally:
         # Restore original method
         BedrockInferenceAdapter.__bases__[0].openai_chat_completion = original_method
diff --git a/tests/unit/providers/inference/test_bedrock_config.py b/tests/unit/providers/inference/test_bedrock_config.py
index 6220804261..60e18a45d0 100644
--- a/tests/unit/providers/inference/test_bedrock_config.py
+++ b/tests/unit/providers/inference/test_bedrock_config.py
@@ -35,5 +35,17 @@ def test_bedrock_config_sample():
     sample = BedrockConfig.sample_run_config()
     assert "api_key" in sample
     assert "region_name" in sample
+    assert "aws_role_arn" in sample
+    assert "aws_web_identity_token_file" in sample
     assert sample["api_key"] == "${env.AWS_BEARER_TOKEN_BEDROCK:=}"
     assert sample["region_name"] == "${env.AWS_DEFAULT_REGION:=us-east-2}"
+    assert sample["aws_role_arn"] == "${env.AWS_ROLE_ARN:=}"
+    assert sample["aws_web_identity_token_file"] == "${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}"
+
+
+def test_bedrock_config_sts_fields(monkeypatch):
+    monkeypatch.setenv("AWS_ROLE_ARN", "arn:aws:iam::123:role/test")
+    monkeypatch.setenv("AWS_WEB_IDENTITY_TOKEN_FILE", "/tmp/token")
+    config = BedrockConfig()
+    assert config.aws_role_arn == "arn:aws:iam::123:role/test"
+    assert config.aws_web_identity_token_file == "/tmp/token"
diff --git a/tests/unit/providers/inference/test_bedrock_sts.py b/tests/unit/providers/inference/test_bedrock_sts.py
new file mode 100644
index 0000000000..65354765e3
--- /dev/null
+++ b/tests/unit/providers/inference/test_bedrock_sts.py
@@ -0,0 +1,84 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import importlib.util
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+HAS_BOTO3 = importlib.util.find_spec("boto3") is not None
+
+
+def test_sigv4_auth_initialization():
+    auth = BedrockSigV4Auth(
+        region="us-east-1",
+        aws_role_arn="arn:aws:iam::123:role/test",
+        aws_web_identity_token_file="/tmp/token",
+        aws_role_session_name="test-session",
+        session_ttl=1800,
+    )
+    assert auth._region == "us-east-1"
+    assert auth._aws_role_arn == "arn:aws:iam::123:role/test"
+    assert auth._aws_web_identity_token_file == "/tmp/token"
+    assert auth._aws_role_session_name == "test-session"
+    assert auth._session_ttl == 1800
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+@patch("llama_stack.providers.utils.bedrock.sigv4_auth.logger")
+def test_sigv4_auth_gets_refreshable_session(mock_logger):
+    with patch(
+        "llama_stack.providers.utils.bedrock.refreshable_boto_session.RefreshableBotoSession"
+    ) as mock_refreshable:
+        mock_session = MagicMock()
+        mock_refreshable.return_value.refreshable_session.return_value = mock_session
+
+        auth = BedrockSigV4Auth(
+            region="us-east-1",
+            aws_role_arn="arn:aws:iam::123:role/test",
+            aws_web_identity_token_file="/tmp/token",
+        )
+
+        auth._get_credentials()
+
+        mock_refreshable.assert_called_once_with(
+            region_name="us-east-1",
+            aws_access_key_id=None,
+            aws_secret_access_key=None,
+            aws_session_token=None,
+            profile_name=None,
+            sts_arn="arn:aws:iam::123:role/test",
+            web_identity_token_file="/tmp/token",
+            session_name=None,
+            session_ttl=3600,
+        )
+        assert auth._session == mock_session
+
+
+def test_adapter_passes_sts_config_to_auth():
+    config = BedrockConfig(
+        region_name="us-west-2",
+        aws_role_arn="arn:aws:iam::123:role/test",
+        aws_web_identity_token_file="/tmp/token",
+        session_ttl=1800,
+    )
+    adapter = BedrockInferenceAdapter(config=config)
+
+    with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth:
+        mock_auth.return_value = MagicMock()
+        adapter._build_sigv4_http_client()
+
+        mock_auth.assert_called_once_with(
+            region="us-west-2",
+            service="bedrock",
+            aws_role_arn="arn:aws:iam::123:role/test",
+            aws_web_identity_token_file="/tmp/token",
+            session_ttl=1800,
+        )
diff --git a/tests/unit/providers/safety/test_bedrock_safety_adapter.py b/tests/unit/providers/safety/test_bedrock_safety_adapter.py
new file mode 100644
index 0000000000..d761075fab
--- /dev/null
+++ b/tests/unit/providers/safety/test_bedrock_safety_adapter.py
@@ -0,0 +1,35 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import MagicMock, call, patch
+
+from llama_stack.providers.remote.safety.bedrock.bedrock import BedrockSafetyAdapter
+from llama_stack.providers.remote.safety.bedrock.config import BedrockSafetyConfig
+
+
+async def test_bedrock_safety_initialize_creates_clients():
+    config = BedrockSafetyConfig(
+        region_name="us-west-2",
+        aws_role_arn="arn:aws:iam::123:role/test",
+        aws_web_identity_token_file="/tmp/token",
+    )
+    adapter = BedrockSafetyAdapter(config=config)
+
+    runtime_client = MagicMock(name="bedrock-runtime-client")
+    bedrock_client = MagicMock(name="bedrock-client")
+    with patch("llama_stack.providers.remote.safety.bedrock.bedrock.create_bedrock_client") as mock_create:
+        mock_create.side_effect = [runtime_client, bedrock_client]
+
+        await adapter.initialize()
+
+        assert adapter.bedrock_runtime_client is runtime_client
+        assert adapter.bedrock_client is bedrock_client
+        mock_create.assert_has_calls(
+            [
+                call(config),
+                call(config, "bedrock"),
+            ]
+        )
diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py
deleted file mode 100644
index c6d6e54baa..0000000000
--- a/tests/unit/providers/test_bedrock.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from types import SimpleNamespace
-from unittest.mock import AsyncMock, PropertyMock, patch
-
-from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
-from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
-from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
-
-
-def test_can_create_adapter():
-    config = BedrockConfig(api_key="test-key", region_name="us-east-1")
-    adapter = BedrockInferenceAdapter(config=config)
-
-    assert adapter is not None
-    assert adapter.config.region_name == "us-east-1"
-    assert adapter.get_api_key() == "test-key"
-
-
-def test_different_aws_regions():
-    # just check a couple regions to verify URL construction works
-    config = BedrockConfig(api_key="key", region_name="us-east-1")
-    adapter = BedrockInferenceAdapter(config=config)
-    assert adapter.get_base_url() == "https://bedrock-mantle.us-east-1.api.aws/v1"
-
-    config = BedrockConfig(api_key="key", region_name="eu-west-1")
-    adapter = BedrockInferenceAdapter(config=config)
-    assert adapter.get_base_url() == "https://bedrock-mantle.eu-west-1.api.aws/v1"
-
-
-async def test_basic_chat_completion():
-    """Test basic chat completion works with OpenAIMixin"""
-    config = BedrockConfig(api_key="k", region_name="us-east-1")
-    adapter = BedrockInferenceAdapter(config=config)
-
-    class FakeModelStore:
-        async def has_model(self, model_id):
-            return True
-
-        async def get_model(self, model_id):
-            return SimpleNamespace(provider_resource_id="meta.llama3-1-8b-instruct-v1:0")
-
-    adapter.model_store = FakeModelStore()
-
-    fake_response = SimpleNamespace(
-        id="chatcmpl-123",
-        choices=[SimpleNamespace(message=SimpleNamespace(content="Hello!", role="assistant"), finish_reason="stop")],
-    )
-
-    mock_create = AsyncMock(return_value=fake_response)
-
-    class FakeClient:
-        def __init__(self):
-            self.chat = SimpleNamespace(completions=SimpleNamespace(create=mock_create))
-
-    with patch.object(type(adapter), "client", new_callable=PropertyMock, return_value=FakeClient()):
-        params = OpenAIChatCompletionRequestWithExtraBody(
-            model="llama3-1-8b",
-            messages=[{"role": "user", "content": "hello"}],
-            stream=False,
-        )
-        response = await adapter.openai_chat_completion(params=params)
-
-        assert response.id == "chatcmpl-123"
-        assert mock_create.await_count == 1
diff --git a/tests/unit/providers/utils/inference/test_network_config.py b/tests/unit/providers/utils/inference/test_network_config.py
index 40678668c7..ea00c8d7aa 100644
--- a/tests/unit/providers/utils/inference/test_network_config.py
+++ b/tests/unit/providers/utils/inference/test_network_config.py
@@ -12,11 +12,13 @@
 import pytest
 
 from llama_stack.providers.utils.inference.http_client import (
-    _build_network_client_kwargs,
     _build_proxy_mounts,
     _build_ssl_context,
     build_http_client,
 )
+from llama_stack.providers.utils.inference.http_client import (
+    build_network_client_kwargs as _build_network_client_kwargs,
+)
 from llama_stack.providers.utils.inference.model_registry import (
     NetworkConfig,
     ProxyConfig,