llamastack · skamenan7 · Mar 31, 2026 · Apr 1, 2026
@@ -33,7 +33,7 @@ Models, endpoints, and versions used during test recordings.
 | Provider | Model(s) | Endpoint | Version Info |
 |----------|----------|----------|--------------|
 | azure | gpt-4o | llama-stack-test.openai.azure.com, lls-test.openai.azure.com | openai sdk: 2.5.0 |
-| bedrock | openai.gpt-oss-20b | bedrock-mantle.us-east-2.api.aws, bedrock-mantle.us-west-2.api.aws | openai sdk: 2.5.0 |
+| bedrock | openai.gpt-oss-20b | bedrock-mantle.us-west-2.api.aws, bedrock-runtime.us-east-2.amazonaws.com | openai sdk: 2.5.0 |
 | ollama | gpt-oss:20b | — | openai sdk: 2.5.0, vllm server: 0.9.2rc2.dev136+g0b382b53a.d20250924 |
 | openai | gpt-4o, o4-mini, text-embedding-3-small | api.openai.com | openai sdk: 2.5.0 |
 | vllm | Qwen/Qwen3-0.6B | — | openai sdk: 2.5.0, vllm server: 0.18.1rc1.dev197+g0e9358c11 |

@@ -34,11 +34,25 @@ AWS Bedrock inference provider using OpenAI compatible endpoint.
 | `network.timeout.connect` | `float \| None` | No |  | Connection timeout in seconds. |
 | `network.timeout.read` | `float \| None` | No |  | Read timeout in seconds. |
 | `network.headers` | `dict[str, str] \| None` | No |  | Additional HTTP headers to include in all requests. |
-| `region_name` | `str` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
+| `aws_access_key_id` | `SecretStr \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
+| `aws_secret_access_key` | `SecretStr \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
+| `aws_session_token` | `SecretStr \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
+| `aws_role_arn` | `str \| None` | No |  | The AWS role ARN to assume. Default use environment variable: AWS_ROLE_ARN |
+| `aws_web_identity_token_file` | `str \| None` | No |  | The path to the web identity token file. Default use environment variable: AWS_WEB_IDENTITY_TOKEN_FILE |
+| `aws_role_session_name` | `str \| None` | No |  | The session name to use when assuming a role. Default use environment variable: AWS_ROLE_SESSION_NAME |
+| `region_name` | `str \| None` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
+| `profile_name` | `str \| None` | No |  | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
+| `total_max_attempts` | `int \| None` | No |  | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
+| `retry_mode` | `str \| None` | No |  | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
+| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
+| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
+| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
 
 ## Sample Configuration
 
 ```yaml
 api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
 region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+aws_role_arn: ${env.AWS_ROLE_ARN:=}
+aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
 ```
@@ -36,6 +36,9 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
 | `aws_access_key_id` | `SecretStr \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
 | `aws_secret_access_key` | `SecretStr \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
 | `aws_session_token` | `SecretStr \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
+| `aws_role_arn` | `str \| None` | No |  | The AWS role ARN to assume. Default use environment variable: AWS_ROLE_ARN |
+| `aws_web_identity_token_file` | `str \| None` | No |  | The path to the web identity token file. Default use environment variable: AWS_WEB_IDENTITY_TOKEN_FILE |
+| `aws_role_session_name` | `str \| None` | No |  | The session name to use when assuming a role. Default use environment variable: AWS_ROLE_SESSION_NAME |
 | `region_name` | `str \| None` | No |  | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
 | `profile_name` | `str \| None` | No |  | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
 | `total_max_attempts` | `int \| None` | No |  | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |

@@ -7,7 +7,7 @@
 import contextvars
 import json
 from contextlib import AbstractContextManager
-from typing import Any
+from typing import TYPE_CHECKING, Any, cast
 
 from starlette.types import Scope
 
@@ -16,6 +16,9 @@
 
 from .utils.dynamic import instantiate_class_type
 
+if TYPE_CHECKING:
+    from llama_stack_api import ProviderSpec
+
 log = get_logger(name=__name__, category="core")
 
 # Context variable for request provider data and auth attributes
@@ -26,6 +29,9 @@ class RequestProviderDataContext(AbstractContextManager[None]):
     """Context manager for request provider data"""
 
     def __init__(self, provider_data: dict[str, Any] | None = None, user: User | None = None) -> None:
+        if provider_data is not None and not isinstance(provider_data, dict):
+            log.error("Provider data must be a JSON object")
+            provider_data = None
         self.provider_data = provider_data or {}
         if user:
             self.provider_data["__authenticated_user"] = user
@@ -45,6 +51,8 @@ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
 class NeedsRequestProviderData:
     """Mixin for providers that require per-request provider data from request headers."""
 
+    __provider_spec__: "ProviderSpec"
+
     def get_request_provider_data(self) -> Any:
         spec = self.__provider_spec__  # type: ignore[attr-defined]
         if not spec:
@@ -84,11 +92,20 @@ def parse_request_provider_data(headers: dict[str, str]) -> dict[str, Any] | Non
         return None
 
     try:
-        return json.loads(val)  # type: ignore[no-any-return]
+        parsed = json.loads(val)
     except json.JSONDecodeError:
         log.error("Provider data not encoded as a JSON object!")
         return None
 
+    if parsed is None:
+        return None
+
+    if not isinstance(parsed, dict):
+        log.error("Provider data must be encoded as a JSON object")
+        return None
+
+    return cast(dict[str, Any], parsed)
+
 
 def request_provider_data_context(headers: dict[str, str], user: User | None = None) -> AbstractContextManager[None]:
     """Context manager that sets request provider data from headers and user for the duration of the context"""

@@ -52,9 +52,11 @@ def get_distribution_template() -> DistributionTemplate:
 
     # Bedrock model must be pre-registered because the recording system cannot
     # replay model-list discovery calls against the Bedrock endpoint in CI.
+    # Gate on AWS_DEFAULT_REGION (required for both bearer-token and SigV4 modes)
+    # rather than AWS_BEARER_TOKEN_BEDROCK so the model registers in OIDC/IRSA CI too.
     bedrock_model = ModelInput(
         model_id="bedrock/openai.gpt-oss-20b",
-        provider_id="${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock}",
+        provider_id="${env.AWS_DEFAULT_REGION:+bedrock}",
         provider_model_id="openai.gpt-oss-20b",
         model_type=ModelType.llm,
     )

@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
@@ -301,7 +303,7 @@ registered_resources:
     model_type: llm
   - metadata: {}
     model_id: bedrock/openai.gpt-oss-20b
-    provider_id: ${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock}
+    provider_id: ${env.AWS_DEFAULT_REGION:+bedrock}
     provider_model_id: openai.gpt-oss-20b
     model_type: llm
   shields:

@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
@@ -314,7 +316,7 @@ registered_resources:
     model_type: llm
   - metadata: {}
     model_id: bedrock/openai.gpt-oss-20b
-    provider_id: ${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock}
+    provider_id: ${env.AWS_DEFAULT_REGION:+bedrock}
     provider_model_id: openai.gpt-oss-20b
     model_type: llm
   shields:

@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:

@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:

@@ -121,7 +121,7 @@ def available_providers() -> list[ProviderSpec]:
             api=Api.inference,
             adapter_type="bedrock",
             provider_type="remote::bedrock",
-            pip_packages=[],
+            pip_packages=["boto3"],
             module="llama_stack.providers.remote.inference.bedrock",
             config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
             provider_data_validator="llama_stack.providers.remote.inference.bedrock.config.BedrockProviderDataValidator",

@@ -9,7 +9,8 @@
 async def get_adapter_impl(config: BedrockConfig, _deps):
     from .bedrock import BedrockInferenceAdapter
 
-    assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}"
+    if not isinstance(config, BedrockConfig):
+        raise TypeError(f"Unexpected config type: {type(config)}")
 
     impl = BedrockInferenceAdapter(config=config)