From a917c52461aa31a71a425c950fa9cf20d88f5c39 Mon Sep 17 00:00:00 2001
From: skamenan7 <skamenan@redhat.com>
Date: Tue, 31 Mar 2026 13:14:29 -0400
Subject: [PATCH 1/2] feat(bedrock): add AWS SigV4 and STS web identity
 authentication
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Bedrock inference provider previously required a pre-signed bearer
token (AWS_BEARER_TOKEN_BEDROCK). This PR adds full AWS credential chain
support so Bedrock works natively in EKS/IRSA, GitHub Actions OIDC,
EC2, ECS, and Lambda without managing long-lived credentials.

When no api_key is configured, requests are signed using AWS SigV4 via
botocore. STS role assumption and web identity federation are supported
through RefreshableBotoSession, which refreshes credentials
automatically. Bearer token mode is unchanged — if api_key is set in
config or passed via x-llamastack-provider-data, it takes precedence.

Also corrects the endpoint URL from bedrock-mantle to
bedrock-runtime.<region>.amazonaws.com/openai/v1, and gates the
bedrock model in ci-tests on AWS_DEFAULT_REGION (works for both
bearer and SigV4 modes) instead of AWS_BEARER_TOKEN_BEDROCK.

Closes #4730

Signed-off-by: skamenan7 <skamenan@redhat.com>
---
 docs/docs/api-openai/provider_matrix.md       |   2 +-
 .../providers/inference/remote_bedrock.mdx    |  16 +-
 docs/docs/providers/safety/remote_bedrock.mdx |   3 +
 src/llama_stack/core/request_headers.py       |  21 +-
 .../distributions/ci-tests/ci_tests.py        |   4 +-
 .../distributions/ci-tests/config.yaml        |   4 +-
 .../ci-tests/run-with-postgres-store.yaml     |   4 +-
 .../distributions/starter/config.yaml         |   2 +
 .../starter/run-with-postgres-store.yaml      |   2 +
 .../providers/registry/inference.py           |   2 +-
 .../remote/inference/bedrock/__init__.py      |   3 +-
 .../remote/inference/bedrock/bedrock.py       | 208 +++-
 .../remote/inference/bedrock/config.py        |  21 +-
 .../providers/remote/inference/vllm/vllm.py   |   4 +-
 .../providers/utils/bedrock/client.py         |  77 +-
 .../providers/utils/bedrock/config.py         |  17 +-
 .../utils/bedrock/refreshable_boto_session.py |  92 +-
 .../providers/utils/bedrock/sigv4_auth.py     | 189 ++++
 .../providers/utils/inference/http_client.py  |  41 +-
 .../providers/utils/inference/openai_mixin.py |   4 +-
 ...f4a69ae4edeb730b7ca374186c7-3ff721a4.json} |   2 +-
 ...f3a9a7592d5336fd3d2101fec1e143080184.json} |   2 +-
 ...4fad66ae8a37504748ca0a620f241d8537bd.json} |   2 +-
 ...3ecd51e713f8e6cbd96134c911cc5a9d25c2.json} |   2 +-
 ...d8d92f852af7f2286b0a20624a45f4f5f380.json} |   2 +-
 ...5fc800499022cd1daf923df591207980513c.json} |   2 +-
 ...2e98911e3124f09225d42d29239045854ce4.json} |   2 +-
 ...863badff5ace79116b718d28b8fa3e53a93a.json} |   2 +-
 ...7c9d991bfb10ba71d307ed8ba39411c407df.json} |   2 +-
 ...57970f0e3dabc4442278caa66054f29cf2b2.json} |   2 +-
 ...8f109a1bd0444c0394ad5e3ca4f6bce9a3c7.json} |   2 +-
 ...3acefb20e6839258b8b132471f404007c8f3.json} |   2 +-
 ...c3cc6f11f0618996f28403378a85d7258c88.json} |   2 +-
 ...797667d27d8a1a8a963ffba82cf69bf3696d.json} |   2 +-
 ...3522c7d7798542038b89b465af2e9ef0b299.json} |   2 +-
 ...990a80c4cbaffa952f1fe5668b74e9e57ae9.json} |   2 +-
 ...6649aa2c9d30931cccdbb2a8b2fefa69675d.json} |   2 +-
 ...0a9251d716e340408d20c5c7cc2b337c71b0.json} |   2 +-
 ...2f05dc909966c425203775402388b7ced238.json} |   2 +-
 ...a4954472d2cda8559b8466fad6ba0aa1c642.json} |   2 +-
 ...7eb977700bad50a47dcde078625f507277cc.json} |   2 +-
 ...02f29c9250f8386e90baab57cbfa9782a558.json} |   2 +-
 ...b248ba0848b70882489e1a32a9e323dc081b.json} |   2 +-
 ...cb7e463ad85b856514425df394ba222b92c4.json} |   2 +-
 ...b1a17051bb6a1a72989937e1b42465983960.json} |   2 +-
 ...14ab5eecc2a2af6add585ef5be9d61549d1a.json} |   2 +-
 ...a2a51cea83c4157e7d6fc35792837df40bdb.json} |   2 +-
 ...52bac60f1038d84e0591a279b941a9b49b32.json} |   2 +-
 ...a085c51bf886c6836fce3b630c1cc693e8cd.json} |   2 +-
 ...9dad73dd604848005240745cb0f729c4330f.json} |   2 +-
 ...14579a5f7114057b9b774d18b72e7a932fb2.json} |   2 +-
 ...2bcdfbfed5ba7f272c55f2cf412b099f7c93.json} |   2 +-
 ...743876b1177094e1a97d08bbd600900388e1.json} |   2 +-
 ...927389f1b18c13691565186eba89fd1df24e.json} |   2 +-
 ...566dd845588cce7605101332153175738d64.json} |   2 +-
 ...96ae1825730205c6ccd617c7ab88f57b1bb1.json} |   2 +-
 ...e3bf8cbaa47c2c83fcfc439c29c3693cf6a3.json} |   2 +-
 ...ca732197f2fbd3757e8afe5bc583df8f1c2c.json} |   2 +-
 ...c0b783de5f4bd2916d9e370145768551075c.json} |   2 +-
 ...7c6366dff704ea943bda851c2d3a658d44ec.json} |   2 +-
 ...1d4f5d4a9bf61953e12b0441c0c4c2a2831a.json} |   2 +-
 ...4670f308b01d4b905faf3e1313901d2a6b61.json} |   2 +-
 ...b935e8851add8edeef48d6a65764716ce634.json} |   2 +-
 ...f4074e790c3bbdae24c280c711aa752fb6b4.json} |   2 +-
 ...f1c374b3e46a69f5526ef45f70fd24fd18b9.json} |   2 +-
 ...bbc48650109698c6d0ac664a92426b242301.json} |   2 +-
 ...83380dbe25ba782d6e71e1ff3544628c06ab.json} |   2 +-
 ...e46fcde6afb092f0b5b57da9fb1c40b3d506.json} |   2 +-
 ...6704f32c46c11a85e585906992614239da54.json} |   2 +-
 ...bdb3523bd9c5b73088a745c431dc2514ea16.json} |   2 +-
 ...5193ed0cc70e43e577cbb0a3dec164e250c4.json} |   2 +-
 ...7378f61a28811513ccddddffc77ff05a0417.json} |   2 +-
 ...397d77d36726335d9c6c1b22e5c0ad1c22ac.json} |   2 +-
 ...94149395c39cb195974f2893fbf340e50589.json} |   2 +-
 ...1844fcb08bf9ff738f34d7f915514173f2c4.json} |   2 +-
 ...d87b12b16b020611dd32866110cbf0c699d9.json} |   2 +-
 ...a2123789abe1d080020cbe97402d51e074eb.json} |   2 +-
 ...63035e94bea9462e613798c48a8c39697217.json} |   2 +-
 ...9f68d4fb635ddbeeb55595b7027-31ec6c83.json} |   2 +-
 ...340543238789bbbe8c516b79114-31ec6c83.json} |   2 +-
 ...addf6594e08964cde42375f54dd-31ec6c83.json} |   2 +-
 ...3fca29f3ced3ffbff35583d1f9b-31ec6c83.json} |   2 +-
 ...07c9c0a13e6ecc80ea805e1d61b-31ec6c83.json} |   2 +-
 ...1a886e43c02c4f68ff4872e3024-31ec6c83.json} |   2 +-
 .../bedrock/test_openai_sdk_integration.py    | 154 +++
 .../inference/bedrock/test_sigv4_auth.py      | 885 ++++++++++++++++++
 .../inference/test_bedrock_adapter.py         |  17 +-
 .../inference/test_bedrock_config.py          |  12 +
 .../providers/inference/test_bedrock_sts.py   |  84 ++
 .../safety/test_bedrock_safety_adapter.py     |  35 +
 tests/unit/providers/test_bedrock.py          |  69 --
 .../utils/inference/test_network_config.py    |   4 +-
 92 files changed, 1836 insertions(+), 268 deletions(-)
 create mode 100644 src/llama_stack/providers/utils/bedrock/sigv4_auth.py
 rename tests/integration/common/recordings/{models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-3ff721a4.json => models-66c1506ac9a1c3cac55a2dabb6a81f068d753f4a69ae4edeb730b7ca374186c7-3ff721a4.json} (99%)
 rename tests/integration/inference/recordings/{1675987cb787dc51be76c547577dd38172be220c805c37ba73ba56fa89cd1121.json => 7e3382aab43824f89f365fb42a16f3a9a7592d5336fd3d2101fec1e143080184.json} (98%)
 rename tests/integration/inference/recordings/{718bb1cd939e26c4b7e6b940c3acd97304f46e13b606e081086dd6657e9d3db2.json => 94bb6e3fe939afbdb25c9ebfe9e44fad66ae8a37504748ca0a620f241d8537bd.json} (97%)
 rename tests/integration/inference/recordings/{42ba670916ebfdbe86b518ad1692971ccbb2c2e4a30372f43f38f8e1fc22daf0.json => a89357c63194c44cba530faecbbb3ecd51e713f8e6cbd96134c911cc5a9d25c2.json} (98%)
 rename tests/integration/inference/recordings/{de019f1d2995412fd461393c644f07c743cf81d087249ef2db974576238ab1c7.json => d91f3615a81a97e7d86b3cd5bb05d8d92f852af7f2286b0a20624a45f4f5f380.json} (97%)
 rename tests/integration/inference/recordings/{0241337625808a1602a0b4b1c715c7531cecf2491f2f2129e7e6884e3ffb62bb.json => e9d3ce4be836e5478689cf56f0375fc800499022cd1daf923df591207980513c.json} (96%)
 rename tests/integration/inference/recordings/{d6f772f62c859028bada2fe328498364e82c1d36ed256910623577c6fcd5696c.json => f4fa57034e80f69cb75c300241862e98911e3124f09225d42d29239045854ce4.json} (94%)
 rename tests/integration/responses/recordings/{7fe113f5fc7f0e055b4abd4e6ea0a183dc1a2b959cc410ec2c594a04028712b9.json => 0469f7efcdf69ae378b7395f518d863badff5ace79116b718d28b8fa3e53a93a.json} (99%)
 rename tests/integration/responses/recordings/{d480b100b782370dd41e8dbbb53e2f37783b99074446dcb50d49034007bfb97e.json => 04a4837c6c7974929a7328bb090e7c9d991bfb10ba71d307ed8ba39411c407df.json} (98%)
 rename tests/integration/responses/recordings/{c71d141a2ec044cc7eb01574672e5e6bc6689fb1d6aa8cc6494773fddff919d4.json => 0c65c6fe47669a1dde7e3f13d4b357970f0e3dabc4442278caa66054f29cf2b2.json} (99%)
 rename tests/integration/responses/recordings/{29462a1046f2fcc98301a8eed2fc861ba7f9de28bf2dc06bfae8c04bf664e645.json => 1a5af1292127b401be03305c2d0a8f109a1bd0444c0394ad5e3ca4f6bce9a3c7.json} (98%)
 rename tests/integration/responses/recordings/{076784d55c6a712b559ad4061d7ed4391dc7f1cf35a2b141ad4b0b8f8c8503e1.json => 1f1d016a1f7977d024de7a10f1e63acefb20e6839258b8b132471f404007c8f3.json} (98%)
 rename tests/integration/responses/recordings/{95ae8c627acc6e165fa5046946ae7c009324d3704b64695049cbbd399a6348b8.json => 23223eabe994fd8172f8650d7cc9c3cc6f11f0618996f28403378a85d7258c88.json} (99%)
 rename tests/integration/responses/recordings/{21b2d947138024ef80069f89c5157185a70f43c4d128ffdb4337194ae2486429.json => 23e253ab6f05ad695e1d2dfd2190797667d27d8a1a8a963ffba82cf69bf3696d.json} (98%)
 rename tests/integration/responses/recordings/{68dd2778e5b1caa3ae28e5eed59c41db0d39c58c113612b94abaa459fc0fad3e.json => 2c604588ef302f0e4a93ef93dc713522c7d7798542038b89b465af2e9ef0b299.json} (99%)
 rename tests/integration/responses/recordings/{6e9164fd65ebd8121be120a06262efd9c0f603104e86d84ad07ff4e5b5ab4d69.json => 2ecc2783d6558449838ced5f0180990a80c4cbaffa952f1fe5668b74e9e57ae9.json} (99%)
 rename tests/integration/responses/recordings/{efffb6e430b20dc2f20fbe77ef230b47660008ab6ae2102f73596d4331dcf683.json => 30dee5b8b4a1716103dd23d5249b6649aa2c9d30931cccdbb2a8b2fefa69675d.json} (99%)
 rename tests/integration/responses/recordings/{e1bf2d00006e9b3c60f537c724c98c518a571caec500010e2b07a42662a0db94.json => 3e79875697a99012661ee6e8a9160a9251d716e340408d20c5c7cc2b337c71b0.json} (98%)
 rename tests/integration/responses/recordings/{a6204a1d782ab7d9f85b3ac2b792796b668ba731bf670254e3c9d9ede934193c.json => 4290deaf6bf03c9ccf3228f6b51a2f05dc909966c425203775402388b7ced238.json} (98%)
 rename tests/integration/responses/recordings/{ae06979da89d9f6949f1ceb5ee057759bfbf01f3e0bd58f68d55996929192f5e.json => 4752668af3df1f42fe5b16f6f0c7a4954472d2cda8559b8466fad6ba0aa1c642.json} (99%)
 rename tests/integration/responses/recordings/{16ef4e2da1cf48a86471cb57c7ce4a4c11c6968d3e9607dbd632d3bd9a54493f.json => 48d747eb358721344236340fdca57eb977700bad50a47dcde078625f507277cc.json} (99%)
 rename tests/integration/responses/recordings/{2e46c50d186ad42569a701893e2fd59e4aa93e9b973d7780c32c6e864a4abed7.json => 498ab18308a6e603d405bf5b5c1102f29c9250f8386e90baab57cbfa9782a558.json} (98%)
 rename tests/integration/responses/recordings/{d004a76ddc87e3d1f10a07b50f2aa82cf39d20e5cfb3be1433c924a126fe1a58.json => 4c112247d4a195fac2b27e6a4936b248ba0848b70882489e1a32a9e323dc081b.json} (99%)
 rename tests/integration/responses/recordings/{f9f8f255a3e65fdea20eedd28f7febfb159dafa9a3aab7a83c8afc7e89ba0b91.json => 4e1fd22f5d9ded67352dbe815bb0cb7e463ad85b856514425df394ba222b92c4.json} (98%)
 rename tests/integration/responses/recordings/{e6e1b682e9b368426efc1bd93b326bec2f7db76ab9d8c9c6334a2d5177672fd6.json => 5100781c53bb19a320364926454cb1a17051bb6a1a72989937e1b42465983960.json} (99%)
 rename tests/integration/responses/recordings/{5ab06b404383787d1a545eacc7674fc068f37fbdaaa8d3b2f991f884b0c1d07a.json => 53e2e090845d1a09194d5f18dca914ab5eecc2a2af6add585ef5be9d61549d1a.json} (99%)
 rename tests/integration/responses/recordings/{6a8b76b4d8ba3bc65c2cbd242b17aa224cc77abb15beba937decfda457ba2b6e.json => 6138d550fdb668e3a48400b36501a2a51cea83c4157e7d6fc35792837df40bdb.json} (99%)
 rename tests/integration/responses/recordings/{01675632398179e5013b897e113ce0bfcb3c8c06352c4884b31b84bcc88b8e43.json => 66ca3acff5d7d5880103bbfde08c52bac60f1038d84e0591a279b941a9b49b32.json} (99%)
 rename tests/integration/responses/recordings/{c2735c8a02e57ab45fd2b72ac4478bad6a39cb2ff3ac1a457fe8476f73146934.json => 67c29a33f0d688d469c0d59feb5ca085c51bf886c6836fce3b630c1cc693e8cd.json} (99%)
 rename tests/integration/responses/recordings/{1c75005e90b5447d913967026e8bd7c13afa92ce90d2af9913fe81db06e8c88b.json => 6becbeb1c15378d9a82f60c3a9309dad73dd604848005240745cb0f729c4330f.json} (98%)
 rename tests/integration/responses/recordings/{97a9427f4c20f3fb104a6d04e3c08380f50a86a947b89ddd9477d09ec713313e.json => 6f80b6307533eb1efc08bb9252ba14579a5f7114057b9b774d18b72e7a932fb2.json} (98%)
 rename tests/integration/responses/recordings/{565887f8230acc7918b316e3428650fa2f8aa13aa8a01859da53f1bc10d7c8d5.json => 72e8dd36d46c572167f589fe59a32bcdfbfed5ba7f272c55f2cf412b099f7c93.json} (99%)
 rename tests/integration/responses/recordings/{6b1b3d872eb6746dbfe9b8eed7fcbe23ca9e3408e7b3d8b99df407ae62f0a9f8.json => 7fce3badd53d491bbae372c12f38743876b1177094e1a97d08bbd600900388e1.json} (99%)
 rename tests/integration/responses/recordings/{612c6642c5fb5b4203098183335b17ac0d121a04e39cc737b6cd2c1cb1f8ab30.json => 86985e69a4be4073af20f65ba9ec927389f1b18c13691565186eba89fd1df24e.json} (98%)
 rename tests/integration/responses/recordings/{255d2b2c743eb4102b0938c2ece15c5ce5a534fc3ee5ea5067b677a9007fab40.json => 8fffbf821fc03c867d710ee20471566dd845588cce7605101332153175738d64.json} (98%)
 rename tests/integration/responses/recordings/{b4669f2ae72ba853cd152e36d2e1aed1fc55ff53d48e9fe7371ce36e67b75445.json => 9a097dbf69b822064c0e709fc32196ae1825730205c6ccd617c7ab88f57b1bb1.json} (99%)
 rename tests/integration/responses/recordings/{fff9dbea699f84c0f924ee9dbc91c9f5587eff0d27a554fd55b916da9662ce2c.json => a0a8368aca6a21aff59a88f8eadae3bf8cbaa47c2c83fcfc439c29c3693cf6a3.json} (98%)
 rename tests/integration/responses/recordings/{611613fb2c06bb1ed79fbe5dae18ac6e2a650340832aa7538498b5b26aff1f63.json => a3fea537eafccc69367c15360b09ca732197f2fbd3757e8afe5bc583df8f1c2c.json} (98%)
 rename tests/integration/responses/recordings/{516a186291f3c6a4cf45ceecd49e9f8f5b396f37caa0c557e664c608b29bcfcc.json => a640e097dcf17544825aa20ce937c0b783de5f4bd2916d9e370145768551075c.json} (98%)
 rename tests/integration/responses/recordings/{68da149c99f86029626e9bca5df9a72d5204a4423c5caca31efe60384c4b2b37.json => aaed9594b345da41db3dd6754b1e7c6366dff704ea943bda851c2d3a658d44ec.json} (98%)
 rename tests/integration/responses/recordings/{555654e0ddbd51a65af179d9300fdedc42077a90849824b037f9dae1edda4102.json => b2741f85f52f5eb88f5401e02cad1d4f5d4a9bf61953e12b0441c0c4c2a2831a.json} (98%)
 rename tests/integration/responses/recordings/{f8eb947297e60407c022e3326b5bbcfa0e186abf83bf4b3a826640715e14a3a9.json => baa7ab6db86f51910a35c45cf2234670f308b01d4b905faf3e1313901d2a6b61.json} (99%)
 rename tests/integration/responses/recordings/{bf90e1d57fcb8ac4ebb20ee417bb219a091ac9c45000f4c22e4fd16716dfaceb.json => c40e45bc283154cb2e212a514e5ab935e8851add8edeef48d6a65764716ce634.json} (98%)
 rename tests/integration/responses/recordings/{da6a362ff3aadd43bc06ba806b0f3710f565e33c63b5f1aff08a68807e01ce8a.json => cc0c289ff3a932cbfc46188197ebf4074e790c3bbdae24c280c711aa752fb6b4.json} (99%)
 rename tests/integration/responses/recordings/{2df2850b224738a8f7b945cc06681f73f38988a267cd1ba6a546a8352a57ffa5.json => d0996e5e764315da97c15eb62c37f1c374b3e46a69f5526ef45f70fd24fd18b9.json} (99%)
 rename tests/integration/responses/recordings/{8dba857db6730d6ab259e2c80f5cdd1f8bbcce22fb4a14df4389f96e2db04eaa.json => d2cf88feac32878a5072007ed404bbc48650109698c6d0ac664a92426b242301.json} (98%)
 rename tests/integration/responses/recordings/{448f5aad2604000a0616341f2a97086fef7087f561fa24e21b5f665b04cebff2.json => d490ef38aa100255e6031998088783380dbe25ba782d6e71e1ff3544628c06ab.json} (99%)
 rename tests/integration/responses/recordings/{723ec06a9462e87536dbfaeb2b9396943dc3ea635425d9a1262212a1ccec2910.json => d508c01d0d6670167da0e8758e9ee46fcde6afb092f0b5b57da9fb1c40b3d506.json} (98%)
 rename tests/integration/responses/recordings/{9ecdff3fb145793283aa9233afb27e687c1aa26f93586f5a624b7a78c4c15c3c.json => d510a5744713675c17f35596254f6704f32c46c11a85e585906992614239da54.json} (95%)
 rename tests/integration/responses/recordings/{06653e6e4563eb378ee18044c32cd36e1ace560b2ab90d84c901b67dde0827f8.json => d6784a627287aef1c3171bdbf1d4bdb3523bd9c5b73088a745c431dc2514ea16.json} (99%)
 rename tests/integration/responses/recordings/{95d123bff66974b73beabe29f92030b8a24ebda6f06e83ed31b9a0749c60199e.json => d7b50078103e88d20c4955ba23c55193ed0cc70e43e577cbb0a3dec164e250c4.json} (98%)
 rename tests/integration/responses/recordings/{eebe9f0ac51599aa9d0f81b0d0a8b2a279054d2263d728fc1eb0b6250004b339.json => d89ee3f0b9b9ae608fd6bfd7efa57378f61a28811513ccddddffc77ff05a0417.json} (98%)
 rename tests/integration/responses/recordings/{3d42f101f95388471691e0c8d8c1a37a67a2234432874a79b684d8bd40f73eff.json => e779e2713de2d91ee559033a5e77397d77d36726335d9c6c1b22e5c0ad1c22ac.json} (98%)
 rename tests/integration/responses/recordings/{2eafd5c6eb8e2cff3433007bf23b19e6d66547b2dcab1b1892d5bd53f8ad34b4.json => ecb3bc2e13c604f0003308df4fdb94149395c39cb195974f2893fbf340e50589.json} (95%)
 rename tests/integration/responses/recordings/{f7ba1ce7bbeef963f2002b812fd07386cecf9179bfc6f92a2e09348edbfd7f07.json => ee3d5360f48357a1f00d8fc9f1b01844fcb08bf9ff738f34d7f915514173f2c4.json} (99%)
 rename tests/integration/responses/recordings/{2a08014e5c5da31c16896c0e59ea432e71eb8eb83a3cf25a3e4ab7fa5700bd6b.json => f4770c02fdb697b36717c6713dacd87b12b16b020611dd32866110cbf0c699d9.json} (98%)
 rename tests/integration/responses/recordings/{b4d86c5231fc155339edbcb937b8e91a4be8c2c171a49120991e0eab9582b806.json => f91a97f69204421e717fc7d7660ea2123789abe1d080020cbe97402d51e074eb.json} (98%)
 rename tests/integration/responses/recordings/{49aa949715243abb9094994f88b7fdc37a54757ec4c6579e6640d4e62d148c87.json => fd5280a00f415baa514839d1903263035e94bea9462e613798c48a8c39697217.json} (98%)
 rename tests/integration/responses/recordings/{models-ce53c4fcbb40594a10b3d758a338cccbd471bba93e04d38264d663c6dcec8578-31ec6c83.json => models-2f9eb79dbfa9f25cae2fdc739d4358a242c429f68d4fb635ddbeeb55595b7027-31ec6c83.json} (99%)
 rename tests/integration/responses/recordings/{models-d487ac38a1f3f3bb9ebc5254525862e9fcefdf01d5184789f6ba4b3039dc17ba-31ec6c83.json => models-48bcf1eca39637c8a9767f122e10ed8a6421f340543238789bbbe8c516b79114-31ec6c83.json} (99%)
 rename tests/integration/responses/recordings/{models-0535aa1a1942a5b3191f8d08fb57457ef937494a6e6cb46918fd196f77016145-31ec6c83.json => models-c7239f3a8bc25af23106c20d3a0844f33356aaddf6594e08964cde42375f54dd-31ec6c83.json} (99%)
 rename tests/integration/responses/recordings/{models-03ffd7273c9bf0c84abd4750391c19dfc7545772caacd9bdd02c953c5c3fb6cd-31ec6c83.json => models-e3014308c84bda792e9ce27641b08ece821283fca29f3ced3ffbff35583d1f9b-31ec6c83.json} (99%)
 rename tests/integration/responses/recordings/{models-ffb55818ff4f53a50d044fbf570e18ed249a0245bfe68a7e91eed69ccf5f2193-31ec6c83.json => models-e400deaab35580927e2926f5b0d13b37ce15a07c9c0a13e6ecc80ea805e1d61b-31ec6c83.json} (99%)
 rename tests/integration/responses/recordings/{models-879ae66c787975780a21251d2755da3ba56850f94c78577db27ba1e5ff8a95e8-31ec6c83.json => models-fca7382830039be10bc959fcbb2299c4835921a886e43c02c4f68ff4872e3024-31ec6c83.json} (99%)
 create mode 100644 tests/unit/providers/inference/bedrock/test_openai_sdk_integration.py
 create mode 100644 tests/unit/providers/inference/bedrock/test_sigv4_auth.py
 create mode 100644 tests/unit/providers/inference/test_bedrock_sts.py
 create mode 100644 tests/unit/providers/safety/test_bedrock_safety_adapter.py
 delete mode 100644 tests/unit/providers/test_bedrock.py
diff --git a/docs/docs/api-openai/provider_matrix.md b/docs/docs/api-openai/provider_matrix.md
index 6f3c13b61c..8f42d3fee5 100644
--- a/docs/docs/api-openai/provider_matrix.md
+++ b/docs/docs/api-openai/provider_matrix.md
@@ -33,7 +33,7 @@ Models, endpoints, and versions used during test recordings.
 | Provider | Model(s) | Endpoint | Version Info |
 |----------|----------|----------|--------------|
 | azure | gpt-4o | llama-stack-test.openai.azure.com, lls-test.openai.azure.com | openai sdk: 2.5.0 |
-| bedrock | openai.gpt-oss-20b | bedrock-mantle.us-east-2.api.aws, bedrock-mantle.us-west-2.api.aws | openai sdk: 2.5.0 |
+| bedrock | openai.gpt-oss-20b | bedrock-runtime.us-east-2.amazonaws.com, bedrock-runtime.us-west-2.amazonaws.com | openai sdk: 2.5.0 |
 | ollama | gpt-oss:20b | — | openai sdk: 2.5.0, vllm server: 0.9.2rc2.dev136+g0b382b53a.d20250924 |
 | openai | gpt-4o, o4-mini, text-embedding-3-small | api.openai.com | openai sdk: 2.5.0 |
 | vllm | Qwen/Qwen3-0.6B | — | openai sdk: 2.5.0, vllm server: 0.18.1rc1.dev197+g0e9358c11 |
diff --git a/docs/docs/providers/inference/remote_bedrock.mdx b/docs/docs/providers/inference/remote_bedrock.mdx
index be31b9a983..3095d3c6cc 100644
--- a/docs/docs/providers/inference/remote_bedrock.mdx
+++ b/docs/docs/providers/inference/remote_bedrock.mdx
@@ -34,11 +34,25 @@ AWS Bedrock inference provider using OpenAI compatible endpoint.
 | `network.timeout.connect` | `float \| None` | No |  | Connection timeout in seconds. |
 | `network.timeout.read` | `float \| None` | No |  | Read timeout in seconds. |
 | `network.headers` | `dict[str, str] \| None` | No |  | Additional HTTP headers to include in all requests. |
-| `region_name` | `str` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
+| `aws_access_key_id` | `SecretStr \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
+| `aws_secret_access_key` | `SecretStr \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
+| `aws_session_token` | `SecretStr \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
+| `aws_role_arn` | `str \| None` | No |  | The AWS role ARN to assume. Default use environment variable: AWS_ROLE_ARN |
+| `aws_web_identity_token_file` | `str \| None` | No |  | The path to the web identity token file. Default use environment variable: AWS_WEB_IDENTITY_TOKEN_FILE |
+| `aws_role_session_name` | `str \| None` | No |  | The session name to use when assuming a role. Default use environment variable: AWS_ROLE_SESSION_NAME |
+| `region_name` | `str \| None` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
+| `profile_name` | `str \| None` | No |  | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
+| `total_max_attempts` | `int \| None` | No |  | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
+| `retry_mode` | `str \| None` | No |  | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
+| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
+| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
+| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
 
 ## Sample Configuration
 
 ```yaml
 api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
 region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+aws_role_arn: ${env.AWS_ROLE_ARN:=}
+aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
 ```
diff --git a/docs/docs/providers/safety/remote_bedrock.mdx b/docs/docs/providers/safety/remote_bedrock.mdx
index fc18f726ea..0d3de70b94 100644
--- a/docs/docs/providers/safety/remote_bedrock.mdx
+++ b/docs/docs/providers/safety/remote_bedrock.mdx
@@ -36,6 +36,9 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
 | `aws_access_key_id` | `SecretStr \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
 | `aws_secret_access_key` | `SecretStr \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
 | `aws_session_token` | `SecretStr \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
+| `aws_role_arn` | `str \| None` | No |  | The AWS role ARN to assume. Default use environment variable: AWS_ROLE_ARN |
+| `aws_web_identity_token_file` | `str \| None` | No |  | The path to the web identity token file. Default use environment variable: AWS_WEB_IDENTITY_TOKEN_FILE |
+| `aws_role_session_name` | `str \| None` | No |  | The session name to use when assuming a role. Default use environment variable: AWS_ROLE_SESSION_NAME |
 | `region_name` | `str \| None` | No |  | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
 | `profile_name` | `str \| None` | No |  | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
 | `total_max_attempts` | `int \| None` | No |  | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
diff --git a/src/llama_stack/core/request_headers.py b/src/llama_stack/core/request_headers.py
index e0a61c4ba6..a55e13e016 100644
--- a/src/llama_stack/core/request_headers.py
+++ b/src/llama_stack/core/request_headers.py
@@ -7,7 +7,7 @@
 import contextvars
 import json
 from contextlib import AbstractContextManager
-from typing import Any
+from typing import TYPE_CHECKING, Any, cast
 
 from starlette.types import Scope
 
@@ -16,6 +16,9 @@
 
 from .utils.dynamic import instantiate_class_type
 
+if TYPE_CHECKING:
+    from llama_stack_api import ProviderSpec
+
 log = get_logger(name=__name__, category="core")
 
 # Context variable for request provider data and auth attributes
@@ -26,6 +29,9 @@ class RequestProviderDataContext(AbstractContextManager[None]):
     """Context manager for request provider data"""
 
     def __init__(self, provider_data: dict[str, Any] | None = None, user: User | None = None) -> None:
+        if provider_data is not None and not isinstance(provider_data, dict):
+            log.error("Provider data must be a JSON object")
+            provider_data = None
         self.provider_data = provider_data or {}
         if user:
             self.provider_data["__authenticated_user"] = user
@@ -45,6 +51,8 @@ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
 class NeedsRequestProviderData:
     """Mixin for providers that require per-request provider data from request headers."""
 
+    __provider_spec__: "ProviderSpec"
+
     def get_request_provider_data(self) -> Any:
         spec = self.__provider_spec__  # type: ignore[attr-defined]
         if not spec:
@@ -84,11 +92,20 @@ def parse_request_provider_data(headers: dict[str, str]) -> dict[str, Any] | Non
         return None
 
     try:
-        return json.loads(val)  # type: ignore[no-any-return]
+        parsed = json.loads(val)
     except json.JSONDecodeError:
         log.error("Provider data not encoded as a JSON object!")
         return None
 
+    if parsed is None:
+        return None
+
+    if not isinstance(parsed, dict):
+        log.error("Provider data must be encoded as a JSON object")
+        return None
+
+    return cast(dict[str, Any], parsed)
+
 
 def request_provider_data_context(headers: dict[str, str], user: User | None = None) -> AbstractContextManager[None]:
     """Context manager that sets request provider data from headers and user for the duration of the context"""
diff --git a/src/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py
index e100ad161d..302b1c07ee 100644
--- a/src/llama_stack/distributions/ci-tests/ci_tests.py
+++ b/src/llama_stack/distributions/ci-tests/ci_tests.py
@@ -52,9 +52,11 @@ def get_distribution_template() -> DistributionTemplate:
 
     # Bedrock model must be pre-registered because the recording system cannot
     # replay model-list discovery calls against the Bedrock endpoint in CI.
+    # Gate on AWS_DEFAULT_REGION (required for both bearer-token and SigV4 modes)
+    # rather than AWS_BEARER_TOKEN_BEDROCK so the model registers in OIDC/IRSA CI too.
     bedrock_model = ModelInput(
         model_id="bedrock/openai.gpt-oss-20b",
-        provider_id="${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock}",
+        provider_id="${env.AWS_DEFAULT_REGION:+bedrock}",
         provider_model_id="openai.gpt-oss-20b",
         model_type=ModelType.llm,
     )
diff --git a/src/llama_stack/distributions/ci-tests/config.yaml b/src/llama_stack/distributions/ci-tests/config.yaml
index 7bcbb6eee9..a51945e36f 100644
--- a/src/llama_stack/distributions/ci-tests/config.yaml
+++ b/src/llama_stack/distributions/ci-tests/config.yaml
@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
@@ -301,7 +303,7 @@ registered_resources:
     model_type: llm
   - metadata: {}
     model_id: bedrock/openai.gpt-oss-20b
-    provider_id: ${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock}
+    provider_id: ${env.AWS_DEFAULT_REGION:+bedrock}
     provider_model_id: openai.gpt-oss-20b
     model_type: llm
   shields:
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index 553ed41118..9583bbcd20 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
@@ -314,7 +316,7 @@ registered_resources:
     model_type: llm
   - metadata: {}
     model_id: bedrock/openai.gpt-oss-20b
-    provider_id: ${env.AWS_BEARER_TOKEN_BEDROCK:+bedrock}
+    provider_id: ${env.AWS_DEFAULT_REGION:+bedrock}
     provider_model_id: openai.gpt-oss-20b
     model_type: llm
   shields:
diff --git a/src/llama_stack/distributions/starter/config.yaml b/src/llama_stack/distributions/starter/config.yaml
index fb9cf9ae84..bbe35b85a3 100644
--- a/src/llama_stack/distributions/starter/config.yaml
+++ b/src/llama_stack/distributions/starter/config.yaml
@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index 52225576f5..86e00cf5ba 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -47,6 +47,8 @@ providers:
     config:
       api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+      aws_role_arn: ${env.AWS_ROLE_ARN:=}
+      aws_web_identity_token_file: ${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py
index 6bc2976d4d..12bf1a940b 100644
--- a/src/llama_stack/providers/registry/inference.py
+++ b/src/llama_stack/providers/registry/inference.py
@@ -121,7 +121,7 @@ def available_providers() -> list[ProviderSpec]:
             api=Api.inference,
             adapter_type="bedrock",
             provider_type="remote::bedrock",
-            pip_packages=[],
+            pip_packages=["boto3"],
             module="llama_stack.providers.remote.inference.bedrock",
             config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
             provider_data_validator="llama_stack.providers.remote.inference.bedrock.config.BedrockProviderDataValidator",
diff --git a/src/llama_stack/providers/remote/inference/bedrock/__init__.py b/src/llama_stack/providers/remote/inference/bedrock/__init__.py
index 4b0686b187..12726e32d0 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/__init__.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/__init__.py
@@ -9,7 +9,8 @@
 async def get_adapter_impl(config: BedrockConfig, _deps):
     from .bedrock import BedrockInferenceAdapter
 
-    assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}"
+    if not isinstance(config, BedrockConfig):
+        raise TypeError(f"Unexpected config type: {type(config)}")
 
     impl = BedrockInferenceAdapter(config=config)
 
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 02800d21ed..bd5ba32630 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -4,16 +4,29 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from collections.abc import AsyncIterator
+import asyncio
+from collections.abc import AsyncIterator, Iterable
+from typing import TYPE_CHECKING, Any, NoReturn
 
-from openai import AuthenticationError
+if TYPE_CHECKING:
+    from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+import httpx
+from openai import AuthenticationError, PermissionDeniedError
+from pydantic import PrivateAttr
 
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.responses.builtin.responses.types import (
     AssistantMessageWithReasoning,
 )
+from llama_stack.providers.utils.inference.http_client import (
+    build_network_client_kwargs,
+    network_config_fingerprint,
+    set_client_network_fingerprint,
+)
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
+    InternalServerError,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionChunkWithReasoning,
@@ -25,8 +38,6 @@
     OpenAIEmbeddingsResponse,
 )
 
-from .config import BedrockConfig
-
 logger = get_logger(name=__name__, category="inference::bedrock")
 
 
@@ -35,14 +46,139 @@ class BedrockInferenceAdapter(OpenAIMixin):
     Adapter for AWS Bedrock's OpenAI-compatible API endpoints.
 
     Supports Llama models across regions and GPT-OSS models (us-west-2 only).
+
+    Authentication modes:
+    1. Bearer token (legacy): Set AWS_BEARER_TOKEN_BEDROCK or api_key in config
+    2. AWS credential chain (enterprise): Leave api_key unset, configure AWS creds
+       - Web Identity Federation (IRSA, GitHub Actions OIDC)
+       - IAM roles (EC2, ECS, Lambda)
+       - AWS profiles
+       - Static credentials
+
+    When using AWS credential chain, requests are signed using SigV4 with the
+    "bedrock" signing name (note: the endpoint hostname uses "bedrock-runtime",
+    but SigV4 credential scope uses the signing name "bedrock").
+
+    Web Identity Federation Examples:
+
+    Kubernetes/OpenShift (IRSA):
+        Set these environment variables in your pod spec:
+        - AWS_ROLE_ARN=arn:aws:iam::123456789012:role/llama-stack-role
+        - AWS_WEB_IDENTITY_TOKEN_FILE=<path-to-serviceaccount-token>
+          Common paths:
+          - EKS: /var/run/secrets/eks.amazonaws.com/serviceaccount/token
+          - Generic K8s: /var/run/secrets/kubernetes.io/serviceaccount/token
+        - AWS_DEFAULT_REGION=us-east-2
+
+    GitHub Actions:
+        Use aws-actions/configure-aws-credentials with OIDC:
+
+        permissions:
+          id-token: write  # Required for OIDC
+
+        steps:
+          - uses: aws-actions/configure-aws-credentials@v4
+            with:
+              role-to-assume: arn:aws:iam::123456789012:role/github-actions-role
+              aws-region: us-east-2
+
+    Credentials are automatically refreshed by boto3 when they expire.
+
+    Note: Bedrock's OpenAI-compatible endpoint does not support /v1/models
+    for dynamic model discovery. Models must be pre-registered in the config.
     """
 
-    config: BedrockConfig
-    provider_data_api_key_field: str = "aws_bearer_token_bedrock"
+    provider_data_api_key_field: str | None = "aws_bearer_token_bedrock"
+
+    # built once in initialize() so get_extra_client_params() can stay sync;
+    # reusing one client also avoids opening a new socket per request
+    _sigv4_http_client: httpx.AsyncClient | None = PrivateAttr(default=None)
+
+    @property
+    def _bedrock_config(self) -> "BedrockConfig":
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        if not isinstance(self.config, BedrockConfig):
+            raise TypeError(f"Expected BedrockConfig, got {type(self.config)}")
+        return self.config
 
     def get_base_url(self) -> str:
-        """Get base URL for OpenAI client."""
-        return f"https://bedrock-mantle.{self.config.region_name}.api.aws/v1"
+        region = self._bedrock_config.region_name or "us-east-2"
+        return f"https://bedrock-runtime.{region}.amazonaws.com/openai/v1"
+
+    def _should_use_sigv4(self) -> bool:
+        # checked per-request so a bearer token in provider data can override SigV4 at runtime
+        if self._bedrock_config.has_bearer_token():
+            return False
+
+        provider_data = self.get_request_provider_data()
+        if provider_data and provider_data.aws_bearer_token_bedrock is not None:
+            val = provider_data.aws_bearer_token_bedrock.get_secret_value()
+            if val and val.strip():
+                return False
+
+        return True
+
+    def _build_sigv4_http_client(self) -> httpx.AsyncClient:
+        # lazy import so bearer-token installs don't need boto3/botocore
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        cfg = self._bedrock_config
+        sigv4_args: dict[str, Any] = {
+            "region": cfg.region_name or "us-east-2",
+            "service": "bedrock",  # botocore signing name, not the endpoint prefix "bedrock-runtime"
+            "aws_access_key_id": cfg.aws_access_key_id.get_secret_value() if cfg.aws_access_key_id else None,
+            "aws_secret_access_key": cfg.aws_secret_access_key.get_secret_value()
+            if cfg.aws_secret_access_key
+            else None,
+            "aws_session_token": cfg.aws_session_token.get_secret_value() if cfg.aws_session_token else None,
+            "profile_name": cfg.profile_name,
+            "aws_role_arn": cfg.aws_role_arn,
+            "aws_web_identity_token_file": cfg.aws_web_identity_token_file,
+            "aws_role_session_name": cfg.aws_role_session_name,
+            "session_ttl": cfg.session_ttl,
+        }
+        auth = BedrockSigV4Auth(**{k: v for k, v in sigv4_args.items() if v is not None})
+        network_config = cfg.network
+        network_kwargs = build_network_client_kwargs(network_config)
+        client = httpx.AsyncClient(auth=auth, **network_kwargs)
+        if network_config is not None:
+            set_client_network_fingerprint(client, network_config_fingerprint(network_config))
+        return client
+
+    async def initialize(self) -> None:
+        await super().initialize()
+        # no request context at init time, so only the static config is available;
+        # per-request bearer token overrides are handled in get_extra_client_params()
+        if not self._bedrock_config.has_bearer_token():
+            self._sigv4_http_client = self._build_sigv4_http_client()
+
+    def get_api_key(self) -> str | None:
+        if self._should_use_sigv4():
+            # openai sdk requires a non-empty api_key; sigv4_auth will overwrite
+            # the resulting "Bearer <NOTUSED>" header with the real SigV4 signature
+            return "<NOTUSED>"
+        return super().get_api_key()
+
+    def get_extra_client_params(self) -> dict[str, Any]:
+        # re-check per request so a runtime bearer token in provider data can bypass sigv4
+        if self._sigv4_http_client is not None and self._should_use_sigv4():
+            return {"http_client": self._sigv4_http_client}
+        return {}
+
+    async def list_provider_model_ids(self) -> Iterable[str]:
+        # bedrock's openai-compatible endpoint doesn't expose /v1/models
+        return []
+
+    async def check_model_availability(self, model: str) -> bool:
+        # no /v1/models to query — accept whatever is registered in config
+        return True
+
+    async def shutdown(self) -> None:
+        if self._sigv4_http_client is not None:
+            # shield so stack.py's 5s asyncio.wait_for doesn't abort mid-close and leak a socket
+            await asyncio.shield(self._sigv4_http_client.aclose())
+            self._sigv4_http_client = None
 
     async def openai_embeddings(
         self,
@@ -120,9 +256,10 @@ async def openai_chat_completion(
         self,
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        """Override to handle authentication errors and null responses."""
+        use_sigv4 = self._should_use_sigv4()
+
         try:
-            logger.debug("Calling Bedrock OpenAI API", model=params.model, stream=params.stream)
+            logger.debug("Calling Bedrock OpenAI API", model=params.model, stream=params.stream, sigv4=use_sigv4)
             result = await super().openai_chat_completion(params=params)
             logger.debug("Bedrock API returned", result_type=type(result).__name__ if result is not None else "None")
 
@@ -134,27 +271,44 @@ async def openai_chat_completion(
                 )
 
             return result
-        except AuthenticationError as e:
+        except (AuthenticationError, PermissionDeniedError) as e:
+            # PermissionDeniedError (403) covers SigV4 failures like SignatureDoesNotMatch
+            # and AccessDenied — same sanitized path as AuthenticationError (401)
             error_msg = str(e)
-
-            # Check if this is a token expiration error
-            if "expired" in error_msg.lower() or "Bearer Token has expired" in error_msg:
-                logger.error("AWS Bedrock authentication token expired", error=error_msg)
-                raise ValueError(
-                    "AWS Bedrock authentication failed: Bearer token has expired. "
-                    "The AWS_BEARER_TOKEN_BEDROCK environment variable contains an expired pre-signed URL. "
-                    "Please refresh your token by generating a new pre-signed URL with AWS credentials. "
-                    "Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints."
-                ) from e
-            else:
-                logger.error("AWS Bedrock authentication failed", error=error_msg)
-                raise ValueError(
-                    f"AWS Bedrock authentication failed: {error_msg}. "
-                    "Please verify your API key is correct in the provider config or x-llamastack-provider-data header. "
-                    "The API key should be a valid AWS pre-signed URL for Bedrock's OpenAI-compatible endpoint."
+            self._handle_auth_error(error_msg, e, use_sigv4=use_sigv4)
+        except (RuntimeError, OSError) as e:
+            # credential resolution failures (missing AWS creds, unreadable web identity
+            # token file, STS errors) should surface as sanitized auth errors, not raw
+            # exception messages that may leak internal paths or AWS account details
+            if use_sigv4:
+                logger.error("AWS Bedrock SigV4 credential resolution failed", error_type=type(e).__name__)
+                raise InternalServerError(
+                    "Authentication failed because the server could not resolve AWS credentials. "
+                    "Please verify that the server has valid AWS credentials configured."
                 ) from e
+            raise
         except Exception as e:
             logger.error(
                 "Unexpected error calling Bedrock API", error_type=type(e).__name__, error=str(e), exc_info=True
             )
             raise
+
+    def _handle_auth_error(self, error_msg: str, original_error: Exception, *, use_sigv4: bool) -> NoReturn:
+        if use_sigv4:
+            logger.error("AWS Bedrock SigV4 authentication failed")
+            raise InternalServerError(
+                "Authentication failed because the configured cloud credentials could not authorize this request. "
+                "Please verify that the credentials available to the server are valid, unexpired, and allowed to access the requested model."
+            ) from original_error
+
+        if "expired" in error_msg.lower() or "Bearer Token has expired" in error_msg:
+            logger.error("AWS Bedrock authentication token expired")
+            raise InternalServerError(
+                "Authentication failed because the provided request credential has expired. "
+                "Please refresh the credential and try again, or remove it so the server can use its configured cloud credentials."
+            ) from original_error
+        logger.error("AWS Bedrock authentication failed")
+        raise InternalServerError(
+            "Authentication failed because the provided request credential was rejected. "
+            "Please verify that the credential is valid, unexpired, and authorized for this request."
+        ) from original_error
diff --git a/src/llama_stack/providers/remote/inference/bedrock/config.py b/src/llama_stack/providers/remote/inference/bedrock/config.py
index 667b5a6e56..810e9e5a2c 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/config.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/config.py
@@ -8,7 +8,7 @@
 
 from pydantic import BaseModel, Field, SecretStr
 
-from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
 
 
 class BedrockProviderDataValidator(BaseModel):
@@ -20,17 +20,32 @@ class BedrockProviderDataValidator(BaseModel):
     )
 
 
-class BedrockConfig(RemoteInferenceProviderConfig):
+class BedrockConfig(BedrockBaseConfig):
     """Configuration for the AWS Bedrock inference provider."""
 
-    region_name: str = Field(
+    auth_credential: SecretStr | None = Field(
+        default=None,
+        description="Authentication credential for the provider",
+        alias="api_key",
+    )
+    # Override region_name to default to us-east-2 when unset
+    region_name: str | None = Field(
         default_factory=lambda: os.getenv("AWS_DEFAULT_REGION", "us-east-2"),
         description="AWS Region for the Bedrock Runtime endpoint",
     )
 
+    def has_bearer_token(self) -> bool:
+        """Check if a bearer token is configured."""
+        if self.auth_credential is None:
+            return False
+        token = self.auth_credential.get_secret_value()
+        return bool(token and token.strip())
+
     @classmethod
     def sample_run_config(cls, **kwargs):
         return {
             "api_key": "${env.AWS_BEARER_TOKEN_BEDROCK:=}",
             "region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}",
+            "aws_role_arn": "${env.AWS_ROLE_ARN:=}",
+            "aws_web_identity_token_file": "${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}",
         }
diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
index 045dfb83b3..3f6899d073 100644
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -13,7 +13,9 @@
 from llama_stack.providers.inline.responses.builtin.responses.types import (
     AssistantMessageWithReasoning,
 )
-from llama_stack.providers.utils.inference.http_client import _build_network_client_kwargs
+from llama_stack.providers.utils.inference.http_client import (
+    build_network_client_kwargs as _build_network_client_kwargs,
+)
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     HealthResponse,
diff --git a/src/llama_stack/providers/utils/bedrock/client.py b/src/llama_stack/providers/utils/bedrock/client.py
index 90b1b247b5..7f2a2d9abc 100644
--- a/src/llama_stack/providers/utils/bedrock/client.py
+++ b/src/llama_stack/providers/utils/bedrock/client.py
@@ -9,7 +9,7 @@
 from botocore.client import BaseClient
 from botocore.config import Config
 
-from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
+from llama_stack.providers.utils.bedrock.config import DEFAULT_SESSION_TTL, BedrockBaseConfig
 from llama_stack.providers.utils.bedrock.refreshable_boto_session import (
     RefreshableBotoSession,
 )
@@ -25,36 +25,50 @@ def create_bedrock_client(config: BedrockBaseConfig, service_name: str = "bedroc
     Returns:
         A configured boto3 client
     """
-    if config.aws_access_key_id and config.aws_secret_access_key:
-        retries_config = {
-            k: v
-            for k, v in dict(
-                total_max_attempts=config.total_max_attempts,
-                mode=config.retry_mode,
-            ).items()
-            if v is not None
-        }
-
-        config_args = {
-            k: v
-            for k, v in dict(
-                region_name=config.region_name,
-                retries=retries_config if retries_config else None,
-                connect_timeout=config.connect_timeout,
-                read_timeout=config.read_timeout,
-            ).items()
-            if v is not None
-        }
-
-        boto3_config = Config(**config_args)
+    retries_config = {
+        k: v
+        for k, v in dict(
+            total_max_attempts=config.total_max_attempts,
+            mode=config.retry_mode,
+        ).items()
+        if v is not None
+    }
+    boto3_config_args = {
+        k: v
+        for k, v in dict(
+            region_name=config.region_name,
+            retries=retries_config if retries_config else None,
+            connect_timeout=config.connect_timeout,
+            read_timeout=config.read_timeout,
+        ).items()
+        if v is not None
+    }
+    boto3_config = Config(**boto3_config_args) if boto3_config_args else None
 
+    if config.aws_role_arn:
+        # role assumption takes priority — source credentials (if any) are passed in
+        # so the refreshable session can use them as the base for assume-role calls
+        client = RefreshableBotoSession(
+            region_name=config.region_name,
+            aws_access_key_id=config.aws_access_key_id.get_secret_value() if config.aws_access_key_id else None,
+            aws_secret_access_key=config.aws_secret_access_key.get_secret_value()
+            if config.aws_secret_access_key
+            else None,
+            aws_session_token=config.aws_session_token.get_secret_value() if config.aws_session_token else None,
+            profile_name=config.profile_name,
+            sts_arn=config.aws_role_arn,
+            web_identity_token_file=config.aws_web_identity_token_file,
+            session_name=config.aws_role_session_name,
+            session_ttl=config.session_ttl or DEFAULT_SESSION_TTL,
+        ).refreshable_session()
+        return client.client(service_name, config=boto3_config) if boto3_config else client.client(service_name)
+    elif config.aws_access_key_id and config.aws_secret_access_key:
         session_args = {
             "aws_access_key_id": config.aws_access_key_id.get_secret_value(),
             "aws_secret_access_key": config.aws_secret_access_key.get_secret_value(),
             "aws_session_token": config.aws_session_token.get_secret_value() if config.aws_session_token else None,
             "region_name": config.region_name,
             "profile_name": config.profile_name,
-            "session_ttl": config.session_ttl,
         }
 
         # Remove None values
@@ -63,12 +77,9 @@ def create_bedrock_client(config: BedrockBaseConfig, service_name: str = "bedroc
         boto3_session = boto3.session.Session(**session_args)
         return boto3_session.client(service_name, config=boto3_config)
     else:
-        return (
-            RefreshableBotoSession(
-                region_name=config.region_name,
-                profile_name=config.profile_name,
-                session_ttl=config.session_ttl,
-            )
-            .refreshable_session()
-            .client(service_name)
-        )
+        session = RefreshableBotoSession(
+            region_name=config.region_name,
+            profile_name=config.profile_name,
+            session_ttl=config.session_ttl or DEFAULT_SESSION_TTL,
+        ).refreshable_session()
+        return session.client(service_name, config=boto3_config) if boto3_config else session.client(service_name)
diff --git a/src/llama_stack/providers/utils/bedrock/config.py b/src/llama_stack/providers/utils/bedrock/config.py
index b4bb0afa28..48385a3faf 100644
--- a/src/llama_stack/providers/utils/bedrock/config.py
+++ b/src/llama_stack/providers/utils/bedrock/config.py
@@ -8,6 +8,9 @@
 
 from pydantic import Field, SecretStr
 
+# 1 hour — matches AWS's default role expiration and minimum recommended TTL
+DEFAULT_SESSION_TTL = 3600
+
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 
 
@@ -27,6 +30,18 @@ class BedrockBaseConfig(RemoteInferenceProviderConfig):
         default_factory=lambda: SecretStr(val) if (val := os.getenv("AWS_SESSION_TOKEN")) else None,
         description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
     )
+    aws_role_arn: str | None = Field(
+        default_factory=lambda: os.getenv("AWS_ROLE_ARN"),
+        description="The AWS role ARN to assume. Default use environment variable: AWS_ROLE_ARN",
+    )
+    aws_web_identity_token_file: str | None = Field(
+        default_factory=lambda: os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE"),
+        description="The path to the web identity token file. Default use environment variable: AWS_WEB_IDENTITY_TOKEN_FILE",
+    )
+    aws_role_session_name: str | None = Field(
+        default_factory=lambda: os.getenv("AWS_ROLE_SESSION_NAME"),
+        description="The session name to use when assuming a role. Default use environment variable: AWS_ROLE_SESSION_NAME",
+    )
     region_name: str | None = Field(
         default_factory=lambda: os.getenv("AWS_DEFAULT_REGION"),
         description="The default AWS Region to use, for example, us-west-1 or us-west-2."
@@ -57,7 +72,7 @@ class BedrockBaseConfig(RemoteInferenceProviderConfig):
         "The default is 60 seconds.",
     )
     session_ttl: int | None = Field(
-        default_factory=lambda: int(os.getenv("AWS_SESSION_TTL", "3600")),
+        default_factory=lambda: int(os.getenv("AWS_SESSION_TTL", str(DEFAULT_SESSION_TTL))),
         description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
     )
 
diff --git a/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py b/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
index 8dab40424d..575d8d031b 100644
--- a/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
+++ b/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
@@ -12,68 +12,69 @@
 from botocore.credentials import RefreshableCredentials
 from botocore.session import get_session
 
+from llama_stack.providers.utils.bedrock.config import DEFAULT_SESSION_TTL
+
 
 class RefreshableBotoSession:
     """
-    Boto Helper class which lets us create a refreshable session so that we can cache the client or resource.
-
-    Usage
-    -----
-    session = RefreshableBotoSession().refreshable_session()
+    Wraps a boto3 session so credentials refresh automatically before they expire.
 
-    client = session.client("s3") # we now can cache this client object without worrying about expiring credentials
+    Use this when you need a long-lived boto3 client (e.g. a cached bedrock-runtime
+    client) without worrying about STS credentials timing out mid-request.
     """
 
     def __init__(
         self,
-        region_name: str = None,
-        profile_name: str = None,
-        sts_arn: str = None,
-        session_name: str = None,
-        session_ttl: int = 30000,
+        region_name: str | None = None,
+        aws_access_key_id: str | None = None,
+        aws_secret_access_key: str | None = None,
+        aws_session_token: str | None = None,
+        profile_name: str | None = None,
+        sts_arn: str | None = None,
+        web_identity_token_file: str | None = None,
+        session_name: str | None = None,
+        session_ttl: int = DEFAULT_SESSION_TTL,
     ):
-        """
-        Initialize `RefreshableBotoSession`
-
-        Parameters
-        ----------
-        region_name : str (optional)
-            Default region when creating a new connection.
-
-        profile_name : str (optional)
-            The name of a profile to use.
-
-        sts_arn : str (optional)
-            The role arn to sts before creating a session.
-
-        session_name : str (optional)
-            An identifier for the assumed role session. (required when `sts_arn` is given)
-
-        session_ttl : int (optional)
-            An integer number to set the TTL for each session. Beyond this session, it will renew the token.
-            50 minutes by default which is before the default role expiration of 1 hour
-        """
-
         self.region_name = region_name
+        self.aws_access_key_id = aws_access_key_id
+        self.aws_secret_access_key = aws_secret_access_key
+        self.aws_session_token = aws_session_token
         self.profile_name = profile_name
         self.sts_arn = sts_arn
+        self.web_identity_token_file = web_identity_token_file
         self.session_name = session_name or uuid4().hex
         self.session_ttl = session_ttl
 
     def __get_session_credentials(self):
-        """
-        Get session credentials
-        """
-        session = Session(region_name=self.region_name, profile_name=self.profile_name)
+        session_args = {
+            "region_name": self.region_name,
+            "profile_name": self.profile_name,
+            "aws_access_key_id": self.aws_access_key_id,
+            "aws_secret_access_key": self.aws_secret_access_key,
+            "aws_session_token": self.aws_session_token,
+        }
+        session_args = {k: v for k, v in session_args.items() if v is not None}
+        session = Session(**session_args)
 
-        # if sts_arn is given, get credential by assuming the given role
         if self.sts_arn:
             sts_client = session.client(service_name="sts", region_name=self.region_name)
-            response = sts_client.assume_role(
-                RoleArn=self.sts_arn,
-                RoleSessionName=self.session_name,
-                DurationSeconds=self.session_ttl,
-            ).get("Credentials")
+
+            if self.web_identity_token_file:
+                with open(self.web_identity_token_file) as f:
+                    web_identity_token = f.read().strip()
+
+                response = sts_client.assume_role_with_web_identity(
+                    RoleArn=self.sts_arn,
+                    RoleSessionName=self.session_name,
+                    WebIdentityToken=web_identity_token,
+                    DurationSeconds=self.session_ttl,
+                ).get("Credentials")
+            else:
+                response = sts_client.assume_role(
+                    RoleArn=self.sts_arn,
+                    RoleSessionName=self.session_name,
+                    DurationSeconds=self.session_ttl,
+                ).get("Credentials")
 
             credentials = {
                 "access_key": response.get("AccessKeyId"),
@@ -93,17 +94,12 @@ def __get_session_credentials(self):
         return credentials
 
     def refreshable_session(self) -> Session:
-        """
-        Get refreshable boto3 session.
-        """
-        # Get refreshable credentials
         refreshable_credentials = RefreshableCredentials.create_from_metadata(
             metadata=self.__get_session_credentials(),
             refresh_using=self.__get_session_credentials,
             method="sts-assume-role",
         )
 
-        # attach refreshable credentials current session
         session = get_session()
         session._credentials = refreshable_credentials
         session.set_config_variable("region", self.region_name)
diff --git a/src/llama_stack/providers/utils/bedrock/sigv4_auth.py b/src/llama_stack/providers/utils/bedrock/sigv4_auth.py
new file mode 100644
index 0000000000..11a0d2a605
--- /dev/null
+++ b/src/llama_stack/providers/utils/bedrock/sigv4_auth.py
@@ -0,0 +1,189 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+SigV4 authentication for AWS Bedrock OpenAI-compatible endpoint.
+
+This module provides httpx.Auth implementation that signs requests using
+AWS Signature Version 4, enabling IAM/STS authentication with the Bedrock
+OpenAI-compatible API endpoint.
+
+Supported credential sources (via boto3 credential chain):
+- Static credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
+- Web Identity Federation (AWS_ROLE_ARN, AWS_WEB_IDENTITY_TOKEN_FILE)
+- IAM roles (IMDS for EC2, ECS task roles, Lambda execution roles)
+- AWS profiles (~/.aws/credentials)
+
+Web Identity Federation enables keyless authentication in:
+- Kubernetes/OpenShift with IRSA (IAM Roles for Service Accounts)
+- GitHub Actions with OIDC (aws-actions/configure-aws-credentials)
+- Any OIDC-compatible identity provider
+
+Environment variables for Web Identity:
+    AWS_ROLE_ARN: ARN of the IAM role to assume
+    AWS_WEB_IDENTITY_TOKEN_FILE: Path to the OIDC token file
+        Common paths:
+        - EKS: /var/run/secrets/eks.amazonaws.com/serviceaccount/token
+        - Generic Kubernetes: /var/run/secrets/kubernetes.io/serviceaccount/token
+        - GitHub Actions: Set automatically by aws-actions/configure-aws-credentials
+    AWS_DEFAULT_REGION: AWS region for the Bedrock endpoint
+
+Credentials are automatically refreshed by boto3 when they expire.
+
+References:
+- https://docs.aws.amazon.com/bedrock/latest/userguide/inference-chat-completions.html
+- https://github.com/meta-llama/llama-stack/issues/4730
+- https://github.com/opendatahub-io/llama-stack-distribution/issues/112
+"""
+
+from __future__ import annotations
+
+import asyncio
+import threading
+from collections.abc import AsyncGenerator, Generator
+from typing import Any
+
+import httpx
+from botocore.auth import SigV4Auth
+from botocore.awsrequest import AWSRequest
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.bedrock.config import DEFAULT_SESSION_TTL
+
+logger = get_logger(name=__name__, category="providers")
+
+
+class BedrockSigV4Auth(httpx.Auth):
+    """
+    httpx.Auth that signs requests with AWS SigV4.
+
+    Only signs headers that httpx won't touch after signing, to avoid
+    signature mismatches. Credential refresh is handled automatically
+    by boto3 for temporary credentials (STS, IRSA).
+    """
+
+    def __init__(
+        self,
+        region: str,
+        service: str = "bedrock",
+        aws_access_key_id: str | None = None,
+        aws_secret_access_key: str | None = None,
+        aws_session_token: str | None = None,
+        profile_name: str | None = None,
+        aws_role_arn: str | None = None,
+        aws_web_identity_token_file: str | None = None,
+        aws_role_session_name: str | None = None,
+        session_ttl: int | None = DEFAULT_SESSION_TTL,
+    ):
+        # service must be "bedrock" (the botocore signing name), not "bedrock-runtime"
+        # (the endpoint prefix) — using the wrong one causes SignatureDoesNotMatch
+        self._region = region
+        self._service = service
+        self._aws_access_key_id = aws_access_key_id
+        self._aws_secret_access_key = aws_secret_access_key
+        self._aws_session_token = aws_session_token
+        self._profile_name = profile_name
+        self._aws_role_arn = aws_role_arn
+        self._aws_web_identity_token_file = aws_web_identity_token_file
+        self._aws_role_session_name = aws_role_session_name
+        self._session_ttl = session_ttl or DEFAULT_SESSION_TTL
+        self._lock = threading.Lock()
+        self._session: Any = None  # boto3.Session | None — Any because boto3 is an optional dep
+
+    def _get_credentials(self) -> Any:
+        from llama_stack.providers.utils.bedrock.refreshable_boto_session import (
+            RefreshableBotoSession,
+        )
+
+        with self._lock:
+            if self._session is None:
+                if self._aws_role_arn:
+                    self._session = RefreshableBotoSession(
+                        region_name=self._region,
+                        aws_access_key_id=self._aws_access_key_id,
+                        aws_secret_access_key=self._aws_secret_access_key,
+                        aws_session_token=self._aws_session_token,
+                        profile_name=self._profile_name,
+                        sts_arn=self._aws_role_arn,
+                        web_identity_token_file=self._aws_web_identity_token_file,
+                        session_name=self._aws_role_session_name,
+                        session_ttl=self._session_ttl,
+                    ).refreshable_session()
+                else:
+                    import boto3
+
+                    self._session = boto3.Session(
+                        region_name=self._region,
+                        aws_access_key_id=self._aws_access_key_id,
+                        aws_secret_access_key=self._aws_secret_access_key,
+                        aws_session_token=self._aws_session_token,
+                        profile_name=self._profile_name,
+                    )
+
+            credentials = self._session.get_credentials()
+            if credentials is None:
+                raise RuntimeError(
+                    "Failed to load AWS credentials. Ensure AWS credentials are "
+                    "configured via environment variables (AWS_ACCESS_KEY_ID, "
+                    "AWS_SECRET_ACCESS_KEY), IAM role, or AWS profile."
+                )
+            return credentials.get_frozen_credentials()
+
+    def _sign_request(self, request: httpx.Request) -> None:
+        credentials = self._get_credentials()
+
+        # drop the openai sdk's "Bearer <NOTUSED>" placeholder before signing
+        if "authorization" in request.headers:
+            del request.headers["authorization"]
+
+        # sign only stable headers — anything httpx might rewrite after this point
+        # would invalidate the signature, so we leave those out
+        host = request.headers.get("host") or str(request.url.netloc)
+        headers_to_sign = {"host": host}
+
+        # only include content-type if the request already has one; injecting a
+        # default here would cause a mismatch if httpx sends a different value
+        if "content-type" in request.headers:
+            headers_to_sign["content-type"] = request.headers["content-type"]
+
+        for header_name in ["x-amz-content-sha256", "x-amz-security-token"]:
+            if header_name in request.headers:
+                headers_to_sign[header_name] = request.headers[header_name]
+
+        try:
+            content = request.content
+        except httpx.RequestNotRead:
+            content = request.read()
+
+        aws_request = AWSRequest(
+            method=request.method,
+            url=str(request.url),
+            data=content,
+            headers=headers_to_sign,
+        )
+
+        signer = SigV4Auth(credentials, self._service, self._region)
+        signer.add_auth(aws_request)
+
+        # copy Authorization, X-Amz-Date, and X-Amz-Security-Token back onto the live request
+        for key, value in aws_request.headers.items():
+            request.headers[key] = value
+
+        logger.debug(
+            f"SigV4 signed request: method={request.method}, "
+            f"path={request.url.path}, service={self._service}, region={self._region}"
+        )
+
+    def auth_flow(self, request: httpx.Request) -> Generator[httpx.Request, httpx.Response, None]:
+        self._sign_request(request)
+        yield request
+
+    async def async_auth_flow(self, request: httpx.Request) -> AsyncGenerator[httpx.Request, httpx.Response]:
+        # offload to a thread because credential resolution can do IMDS calls or file I/O;
+        # shield so a rolling-restart cancellation doesn't abort mid-sign and leave the
+        # connection in an inconsistent auth state
+        await asyncio.shield(asyncio.to_thread(self._sign_request, request))
+        yield request
diff --git a/src/llama_stack/providers/utils/inference/http_client.py b/src/llama_stack/providers/utils/inference/http_client.py
index 5014703386..3a408f74b2 100644
--- a/src/llama_stack/providers/utils/inference/http_client.py
+++ b/src/llama_stack/providers/utils/inference/http_client.py
@@ -4,6 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import hashlib
+import json
 import ssl
 from pathlib import Path
 from typing import Any
@@ -22,6 +24,29 @@
 logger = get_logger(name=__name__, category="providers::utils")
 
 
+_NETWORK_CONFIG_FINGERPRINT_ATTR = "_llama_stack_network_config_fingerprint"
+
+
+def network_config_fingerprint(network_config: NetworkConfig) -> str:
+    dumped = json.dumps(network_config.model_dump(mode="json"), sort_keys=True, separators=(",", ":"))
+    return hashlib.sha256(dumped.encode("utf-8")).hexdigest()
+
+
+def _get_client_network_fingerprint(existing_client: httpx.AsyncClient | DefaultAsyncHttpxClient) -> str | None:
+    if isinstance(existing_client, DefaultAsyncHttpxClient):
+        underlying_client = existing_client._client  # type: ignore[union-attr,attr-defined]
+        return getattr(underlying_client, _NETWORK_CONFIG_FINGERPRINT_ATTR, None)
+    return getattr(existing_client, _NETWORK_CONFIG_FINGERPRINT_ATTR, None)
+
+
+def set_client_network_fingerprint(client: httpx.AsyncClient | DefaultAsyncHttpxClient, fingerprint: str) -> None:
+    if isinstance(client, DefaultAsyncHttpxClient):
+        underlying_client = client._client  # type: ignore[union-attr,attr-defined]
+        setattr(underlying_client, _NETWORK_CONFIG_FINGERPRINT_ATTR, fingerprint)
+    else:
+        setattr(client, _NETWORK_CONFIG_FINGERPRINT_ATTR, fingerprint)
+
+
 def _build_ssl_context(tls_config: TLSConfig) -> ssl.SSLContext | bool | str:
     """
     Build an SSL context from TLS configuration.
@@ -92,7 +117,7 @@ def _build_proxy_mounts(proxy_config: ProxyConfig) -> dict[str, httpx.AsyncHTTPT
     return mounts if mounts else None
 
 
-def _build_network_client_kwargs(network_config: NetworkConfig | None) -> dict[str, Any]:
+def build_network_client_kwargs(network_config: NetworkConfig | None) -> dict[str, Any]:
     """
     Build httpx.AsyncClient kwargs from network configuration.
 
@@ -187,7 +212,11 @@ def _merge_network_config_into_client(
     if network_config is None:
         return existing_client
 
-    network_kwargs = _build_network_client_kwargs(network_config)
+    fingerprint = network_config_fingerprint(network_config)
+    if _get_client_network_fingerprint(existing_client) == fingerprint:
+        return existing_client
+
+    network_kwargs = build_network_client_kwargs(network_config)
     if not network_kwargs:
         return existing_client
 
@@ -210,9 +239,13 @@ def _merge_network_config_into_client(
         # Create new client with merged config
         new_client = httpx.AsyncClient(**network_kwargs)
 
+        set_client_network_fingerprint(new_client, fingerprint)
+
         # If original was DefaultAsyncHttpxClient, wrap the new client
         if isinstance(existing_client, DefaultAsyncHttpxClient):
-            return DefaultAsyncHttpxClient(client=new_client, headers=network_kwargs.get("headers"))  # type: ignore[call-arg]
+            wrapped = DefaultAsyncHttpxClient(client=new_client, headers=network_kwargs.get("headers"))  # type: ignore[call-arg]
+            set_client_network_fingerprint(wrapped, fingerprint)
+            return wrapped
 
         return new_client
     except Exception as e:
@@ -234,7 +267,7 @@ def build_http_client(network_config: NetworkConfig | None) -> dict[str, Any]:
         Dictionary of kwargs to pass to httpx.AsyncClient constructor,
         wrapped in {"http_client": AsyncClient(...)} for use with AsyncOpenAI
     """
-    network_kwargs = _build_network_client_kwargs(network_config)
+    network_kwargs = build_network_client_kwargs(network_config)
     if not network_kwargs:
         return {}
 
diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
index 253d73b949..f1de6eb4ff 100644
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@@ -18,8 +18,8 @@
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.http_client import (
-    _build_network_client_kwargs,
     _merge_network_config_into_client,
+    build_network_client_kwargs,
 )
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_compat import (
@@ -227,7 +227,7 @@ def client(self) -> AsyncOpenAI:
             raise ValueError(message)
 
         extra_params = self.get_extra_client_params()
-        network_kwargs = _build_network_client_kwargs(self.config.network)
+        network_kwargs = build_network_client_kwargs(self.config.network)
 
         # Handle http_client creation/merging:
         # - If get_extra_client_params() provides an http_client (e.g., OCI with custom auth),
diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-3ff721a4.json b/tests/integration/common/recordings/models-66c1506ac9a1c3cac55a2dabb6a81f068d753f4a69ae4edeb730b7ca374186c7-3ff721a4.json
similarity index 99%
rename from tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-3ff721a4.json
rename to tests/integration/common/recordings/models-66c1506ac9a1c3cac55a2dabb6a81f068d753f4a69ae4edeb730b7ca374186c7-3ff721a4.json
index 23216fedcf..edba78d30d 100644
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-3ff721a4.json
+++ b/tests/integration/common/recordings/models-66c1506ac9a1c3cac55a2dabb6a81f068d753f4a69ae4edeb730b7ca374186c7-3ff721a4.json
@@ -2,7 +2,7 @@
   "test_id": null,
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/inference/recordings/1675987cb787dc51be76c547577dd38172be220c805c37ba73ba56fa89cd1121.json b/tests/integration/inference/recordings/7e3382aab43824f89f365fb42a16f3a9a7592d5336fd3d2101fec1e143080184.json
similarity index 98%
rename from tests/integration/inference/recordings/1675987cb787dc51be76c547577dd38172be220c805c37ba73ba56fa89cd1121.json
rename to tests/integration/inference/recordings/7e3382aab43824f89f365fb42a16f3a9a7592d5336fd3d2101fec1e143080184.json
index f25555e9c2..0eec4eff66 100644
--- a/tests/integration/inference/recordings/1675987cb787dc51be76c547577dd38172be220c805c37ba73ba56fa89cd1121.json
+++ b/tests/integration/inference/recordings/7e3382aab43824f89f365fb42a16f3a9a7592d5336fd3d2101fec1e143080184.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:streaming_02]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/inference/recordings/718bb1cd939e26c4b7e6b940c3acd97304f46e13b606e081086dd6657e9d3db2.json b/tests/integration/inference/recordings/94bb6e3fe939afbdb25c9ebfe9e44fad66ae8a37504748ca0a620f241d8537bd.json
similarity index 97%
rename from tests/integration/inference/recordings/718bb1cd939e26c4b7e6b940c3acd97304f46e13b606e081086dd6657e9d3db2.json
rename to tests/integration/inference/recordings/94bb6e3fe939afbdb25c9ebfe9e44fad66ae8a37504748ca0a620f241d8537bd.json
index 28fe67627a..4e549bf6fb 100644
--- a/tests/integration/inference/recordings/718bb1cd939e26c4b7e6b940c3acd97304f46e13b606e081086dd6657e9d3db2.json
+++ b/tests/integration/inference/recordings/94bb6e3fe939afbdb25c9ebfe9e44fad66ae8a37504748ca0a620f241d8537bd.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=bedrock/openai.gpt-oss-20b-True]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/inference/recordings/42ba670916ebfdbe86b518ad1692971ccbb2c2e4a30372f43f38f8e1fc22daf0.json b/tests/integration/inference/recordings/a89357c63194c44cba530faecbbb3ecd51e713f8e6cbd96134c911cc5a9d25c2.json
similarity index 98%
rename from tests/integration/inference/recordings/42ba670916ebfdbe86b518ad1692971ccbb2c2e4a30372f43f38f8e1fc22daf0.json
rename to tests/integration/inference/recordings/a89357c63194c44cba530faecbbb3ecd51e713f8e6cbd96134c911cc5a9d25c2.json
index 1d86504abc..c599bab06c 100644
--- a/tests/integration/inference/recordings/42ba670916ebfdbe86b518ad1692971ccbb2c2e4a30372f43f38f8e1fc22daf0.json
+++ b/tests/integration/inference/recordings/a89357c63194c44cba530faecbbb3ecd51e713f8e6cbd96134c911cc5a9d25c2.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:streaming_01]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/inference/recordings/de019f1d2995412fd461393c644f07c743cf81d087249ef2db974576238ab1c7.json b/tests/integration/inference/recordings/d91f3615a81a97e7d86b3cd5bb05d8d92f852af7f2286b0a20624a45f4f5f380.json
similarity index 97%
rename from tests/integration/inference/recordings/de019f1d2995412fd461393c644f07c743cf81d087249ef2db974576238ab1c7.json
rename to tests/integration/inference/recordings/d91f3615a81a97e7d86b3cd5bb05d8d92f852af7f2286b0a20624a45f4f5f380.json
index fd00c0232f..7d28deb715 100644
--- a/tests/integration/inference/recordings/de019f1d2995412fd461393c644f07c743cf81d087249ef2db974576238ab1c7.json
+++ b/tests/integration/inference/recordings/d91f3615a81a97e7d86b3cd5bb05d8d92f852af7f2286b0a20624a45f4f5f380.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:non_streaming_02]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/inference/recordings/0241337625808a1602a0b4b1c715c7531cecf2491f2f2129e7e6884e3ffb62bb.json b/tests/integration/inference/recordings/e9d3ce4be836e5478689cf56f0375fc800499022cd1daf923df591207980513c.json
similarity index 96%
rename from tests/integration/inference/recordings/0241337625808a1602a0b4b1c715c7531cecf2491f2f2129e7e6884e3ffb62bb.json
rename to tests/integration/inference/recordings/e9d3ce4be836e5478689cf56f0375fc800499022cd1daf923df591207980513c.json
index 4024097bbe..163f37387e 100644
--- a/tests/integration/inference/recordings/0241337625808a1602a0b4b1c715c7531cecf2491f2f2129e7e6884e3ffb62bb.json
+++ b/tests/integration/inference/recordings/e9d3ce4be836e5478689cf56f0375fc800499022cd1daf923df591207980513c.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b-inference:chat_completion:non_streaming_01]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/inference/recordings/d6f772f62c859028bada2fe328498364e82c1d36ed256910623577c6fcd5696c.json b/tests/integration/inference/recordings/f4fa57034e80f69cb75c300241862e98911e3124f09225d42d29239045854ce4.json
similarity index 94%
rename from tests/integration/inference/recordings/d6f772f62c859028bada2fe328498364e82c1d36ed256910623577c6fcd5696c.json
rename to tests/integration/inference/recordings/f4fa57034e80f69cb75c300241862e98911e3124f09225d42d29239045854ce4.json
index bf3f8adbf6..b546698660 100644
--- a/tests/integration/inference/recordings/d6f772f62c859028bada2fe328498364e82c1d36ed256910623577c6fcd5696c.json
+++ b/tests/integration/inference/recordings/f4fa57034e80f69cb75c300241862e98911e3124f09225d42d29239045854ce4.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=bedrock/openai.gpt-oss-20b-False]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-west-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/7fe113f5fc7f0e055b4abd4e6ea0a183dc1a2b959cc410ec2c594a04028712b9.json b/tests/integration/responses/recordings/0469f7efcdf69ae378b7395f518d863badff5ace79116b718d28b8fa3e53a93a.json
similarity index 99%
rename from tests/integration/responses/recordings/7fe113f5fc7f0e055b4abd4e6ea0a183dc1a2b959cc410ec2c594a04028712b9.json
rename to tests/integration/responses/recordings/0469f7efcdf69ae378b7395f518d863badff5ace79116b718d28b8fa3e53a93a.json
index acbabb0973..ad2a6a5d08 100644
--- a/tests/integration/responses/recordings/7fe113f5fc7f0e055b4abd4e6ea0a183dc1a2b959cc410ec2c594a04028712b9.json
+++ b/tests/integration/responses/recordings/0469f7efcdf69ae378b7395f518d863badff5ace79116b718d28b8fa3e53a93a.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/d480b100b782370dd41e8dbbb53e2f37783b99074446dcb50d49034007bfb97e.json b/tests/integration/responses/recordings/04a4837c6c7974929a7328bb090e7c9d991bfb10ba71d307ed8ba39411c407df.json
similarity index 98%
rename from tests/integration/responses/recordings/d480b100b782370dd41e8dbbb53e2f37783b99074446dcb50d49034007bfb97e.json
rename to tests/integration/responses/recordings/04a4837c6c7974929a7328bb090e7c9d991bfb10ba71d307ed8ba39411c407df.json
index c4b64b33c0..bb85206433 100644
--- a/tests/integration/responses/recordings/d480b100b782370dd41e8dbbb53e2f37783b99074446dcb50d49034007bfb97e.json
+++ b/tests/integration/responses/recordings/04a4837c6c7974929a7328bb090e7c9d991bfb10ba71d307ed8ba39411c407df.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/c71d141a2ec044cc7eb01574672e5e6bc6689fb1d6aa8cc6494773fddff919d4.json b/tests/integration/responses/recordings/0c65c6fe47669a1dde7e3f13d4b357970f0e3dabc4442278caa66054f29cf2b2.json
similarity index 99%
rename from tests/integration/responses/recordings/c71d141a2ec044cc7eb01574672e5e6bc6689fb1d6aa8cc6494773fddff919d4.json
rename to tests/integration/responses/recordings/0c65c6fe47669a1dde7e3f13d4b357970f0e3dabc4442278caa66054f29cf2b2.json
index 1354ec7d90..9c3a603bfd 100644
--- a/tests/integration/responses/recordings/c71d141a2ec044cc7eb01574672e5e6bc6689fb1d6aa8cc6494773fddff919d4.json
+++ b/tests/integration/responses/recordings/0c65c6fe47669a1dde7e3f13d4b357970f0e3dabc4442278caa66054f29cf2b2.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/29462a1046f2fcc98301a8eed2fc861ba7f9de28bf2dc06bfae8c04bf664e645.json b/tests/integration/responses/recordings/1a5af1292127b401be03305c2d0a8f109a1bd0444c0394ad5e3ca4f6bce9a3c7.json
similarity index 98%
rename from tests/integration/responses/recordings/29462a1046f2fcc98301a8eed2fc861ba7f9de28bf2dc06bfae8c04bf664e645.json
rename to tests/integration/responses/recordings/1a5af1292127b401be03305c2d0a8f109a1bd0444c0394ad5e3ca4f6bce9a3c7.json
index b457375b69..4c58e51aa8 100644
--- a/tests/integration/responses/recordings/29462a1046f2fcc98301a8eed2fc861ba7f9de28bf2dc06bfae8c04bf664e645.json
+++ b/tests/integration/responses/recordings/1a5af1292127b401be03305c2d0a8f109a1bd0444c0394ad5e3ca4f6bce9a3c7.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/076784d55c6a712b559ad4061d7ed4391dc7f1cf35a2b141ad4b0b8f8c8503e1.json b/tests/integration/responses/recordings/1f1d016a1f7977d024de7a10f1e63acefb20e6839258b8b132471f404007c8f3.json
similarity index 98%
rename from tests/integration/responses/recordings/076784d55c6a712b559ad4061d7ed4391dc7f1cf35a2b141ad4b0b8f8c8503e1.json
rename to tests/integration/responses/recordings/1f1d016a1f7977d024de7a10f1e63acefb20e6839258b8b132471f404007c8f3.json
index fa214a2d26..4db3f44845 100644
--- a/tests/integration/responses/recordings/076784d55c6a712b559ad4061d7ed4391dc7f1cf35a2b141ad4b0b8f8c8503e1.json
+++ b/tests/integration/responses/recordings/1f1d016a1f7977d024de7a10f1e63acefb20e6839258b8b132471f404007c8f3.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[openai_client-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/95ae8c627acc6e165fa5046946ae7c009324d3704b64695049cbbd399a6348b8.json b/tests/integration/responses/recordings/23223eabe994fd8172f8650d7cc9c3cc6f11f0618996f28403378a85d7258c88.json
similarity index 99%
rename from tests/integration/responses/recordings/95ae8c627acc6e165fa5046946ae7c009324d3704b64695049cbbd399a6348b8.json
rename to tests/integration/responses/recordings/23223eabe994fd8172f8650d7cc9c3cc6f11f0618996f28403378a85d7258c88.json
index dfbf4d1684..e393f19733 100644
--- a/tests/integration/responses/recordings/95ae8c627acc6e165fa5046946ae7c009324d3704b64695049cbbd399a6348b8.json
+++ b/tests/integration/responses/recordings/23223eabe994fd8172f8650d7cc9c3cc6f11f0618996f28403378a85d7258c88.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_multi_version_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/21b2d947138024ef80069f89c5157185a70f43c4d128ffdb4337194ae2486429.json b/tests/integration/responses/recordings/23e253ab6f05ad695e1d2dfd2190797667d27d8a1a8a963ffba82cf69bf3696d.json
similarity index 98%
rename from tests/integration/responses/recordings/21b2d947138024ef80069f89c5157185a70f43c4d128ffdb4337194ae2486429.json
rename to tests/integration/responses/recordings/23e253ab6f05ad695e1d2dfd2190797667d27d8a1a8a963ffba82cf69bf3696d.json
index 2567646684..89159b2fdd 100644
--- a/tests/integration/responses/recordings/21b2d947138024ef80069f89c5157185a70f43c4d128ffdb4337194ae2486429.json
+++ b/tests/integration/responses/recordings/23e253ab6f05ad695e1d2dfd2190797667d27d8a1a8a963ffba82cf69bf3696d.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/68dd2778e5b1caa3ae28e5eed59c41db0d39c58c113612b94abaa459fc0fad3e.json b/tests/integration/responses/recordings/2c604588ef302f0e4a93ef93dc713522c7d7798542038b89b465af2e9ef0b299.json
similarity index 99%
rename from tests/integration/responses/recordings/68dd2778e5b1caa3ae28e5eed59c41db0d39c58c113612b94abaa459fc0fad3e.json
rename to tests/integration/responses/recordings/2c604588ef302f0e4a93ef93dc713522c7d7798542038b89b465af2e9ef0b299.json
index a6fe2f3a76..d3b85e36bd 100644
--- a/tests/integration/responses/recordings/68dd2778e5b1caa3ae28e5eed59c41db0d39c58c113612b94abaa459fc0fad3e.json
+++ b/tests/integration/responses/recordings/2c604588ef302f0e4a93ef93dc713522c7d7798542038b89b465af2e9ef0b299.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_multi_version_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/6e9164fd65ebd8121be120a06262efd9c0f603104e86d84ad07ff4e5b5ab4d69.json b/tests/integration/responses/recordings/2ecc2783d6558449838ced5f0180990a80c4cbaffa952f1fe5668b74e9e57ae9.json
similarity index 99%
rename from tests/integration/responses/recordings/6e9164fd65ebd8121be120a06262efd9c0f603104e86d84ad07ff4e5b5ab4d69.json
rename to tests/integration/responses/recordings/2ecc2783d6558449838ced5f0180990a80c4cbaffa952f1fe5668b74e9e57ae9.json
index dd88fbdc31..8df6a01cc9 100644
--- a/tests/integration/responses/recordings/6e9164fd65ebd8121be120a06262efd9c0f603104e86d84ad07ff4e5b5ab4d69.json
+++ b/tests/integration/responses/recordings/2ecc2783d6558449838ced5f0180990a80c4cbaffa952f1fe5668b74e9e57ae9.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[openai_client-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/efffb6e430b20dc2f20fbe77ef230b47660008ab6ae2102f73596d4331dcf683.json b/tests/integration/responses/recordings/30dee5b8b4a1716103dd23d5249b6649aa2c9d30931cccdbb2a8b2fefa69675d.json
similarity index 99%
rename from tests/integration/responses/recordings/efffb6e430b20dc2f20fbe77ef230b47660008ab6ae2102f73596d4331dcf683.json
rename to tests/integration/responses/recordings/30dee5b8b4a1716103dd23d5249b6649aa2c9d30931cccdbb2a8b2fefa69675d.json
index 0bd9da0c4e..39ba90fd12 100644
--- a/tests/integration/responses/recordings/efffb6e430b20dc2f20fbe77ef230b47660008ab6ae2102f73596d4331dcf683.json
+++ b/tests/integration/responses/recordings/30dee5b8b4a1716103dd23d5249b6649aa2c9d30931cccdbb2a8b2fefa69675d.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/e1bf2d00006e9b3c60f537c724c98c518a571caec500010e2b07a42662a0db94.json b/tests/integration/responses/recordings/3e79875697a99012661ee6e8a9160a9251d716e340408d20c5c7cc2b337c71b0.json
similarity index 98%
rename from tests/integration/responses/recordings/e1bf2d00006e9b3c60f537c724c98c518a571caec500010e2b07a42662a0db94.json
rename to tests/integration/responses/recordings/3e79875697a99012661ee6e8a9160a9251d716e340408d20c5c7cc2b337c71b0.json
index f9d5725488..befbab5abd 100644
--- a/tests/integration/responses/recordings/e1bf2d00006e9b3c60f537c724c98c518a571caec500010e2b07a42662a0db94.json
+++ b/tests/integration/responses/recordings/3e79875697a99012661ee6e8a9160a9251d716e340408d20c5c7cc2b337c71b0.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_multi_turn_and_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/a6204a1d782ab7d9f85b3ac2b792796b668ba731bf670254e3c9d9ede934193c.json b/tests/integration/responses/recordings/4290deaf6bf03c9ccf3228f6b51a2f05dc909966c425203775402388b7ced238.json
similarity index 98%
rename from tests/integration/responses/recordings/a6204a1d782ab7d9f85b3ac2b792796b668ba731bf670254e3c9d9ede934193c.json
rename to tests/integration/responses/recordings/4290deaf6bf03c9ccf3228f6b51a2f05dc909966c425203775402388b7ced238.json
index c16d810e5d..54ed03755d 100644
--- a/tests/integration/responses/recordings/a6204a1d782ab7d9f85b3ac2b792796b668ba731bf670254e3c9d9ede934193c.json
+++ b/tests/integration/responses/recordings/4290deaf6bf03c9ccf3228f6b51a2f05dc909966c425203775402388b7ced238.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/ae06979da89d9f6949f1ceb5ee057759bfbf01f3e0bd58f68d55996929192f5e.json b/tests/integration/responses/recordings/4752668af3df1f42fe5b16f6f0c7a4954472d2cda8559b8466fad6ba0aa1c642.json
similarity index 99%
rename from tests/integration/responses/recordings/ae06979da89d9f6949f1ceb5ee057759bfbf01f3e0bd58f68d55996929192f5e.json
rename to tests/integration/responses/recordings/4752668af3df1f42fe5b16f6f0c7a4954472d2cda8559b8466fad6ba0aa1c642.json
index 8689cb508f..e70fef56b6 100644
--- a/tests/integration/responses/recordings/ae06979da89d9f6949f1ceb5ee057759bfbf01f3e0bd58f68d55996929192f5e.json
+++ b/tests/integration/responses/recordings/4752668af3df1f42fe5b16f6f0c7a4954472d2cda8559b8466fad6ba0aa1c642.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/16ef4e2da1cf48a86471cb57c7ce4a4c11c6968d3e9607dbd632d3bd9a54493f.json b/tests/integration/responses/recordings/48d747eb358721344236340fdca57eb977700bad50a47dcde078625f507277cc.json
similarity index 99%
rename from tests/integration/responses/recordings/16ef4e2da1cf48a86471cb57c7ce4a4c11c6968d3e9607dbd632d3bd9a54493f.json
rename to tests/integration/responses/recordings/48d747eb358721344236340fdca57eb977700bad50a47dcde078625f507277cc.json
index 48db8bb1cb..9c481d8163 100644
--- a/tests/integration/responses/recordings/16ef4e2da1cf48a86471cb57c7ce4a4c11c6968d3e9607dbd632d3bd9a54493f.json
+++ b/tests/integration/responses/recordings/48d747eb358721344236340fdca57eb977700bad50a47dcde078625f507277cc.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_basic_workflow[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/2e46c50d186ad42569a701893e2fd59e4aa93e9b973d7780c32c6e864a4abed7.json b/tests/integration/responses/recordings/498ab18308a6e603d405bf5b5c1102f29c9250f8386e90baab57cbfa9782a558.json
similarity index 98%
rename from tests/integration/responses/recordings/2e46c50d186ad42569a701893e2fd59e4aa93e9b973d7780c32c6e864a4abed7.json
rename to tests/integration/responses/recordings/498ab18308a6e603d405bf5b5c1102f29c9250f8386e90baab57cbfa9782a558.json
index 0565553f98..35299f72fe 100644
--- a/tests/integration/responses/recordings/2e46c50d186ad42569a701893e2fd59e4aa93e9b973d7780c32c6e864a4abed7.json
+++ b/tests/integration/responses/recordings/498ab18308a6e603d405bf5b5c1102f29c9250f8386e90baab57cbfa9782a558.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_multi_variable_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/d004a76ddc87e3d1f10a07b50f2aa82cf39d20e5cfb3be1433c924a126fe1a58.json b/tests/integration/responses/recordings/4c112247d4a195fac2b27e6a4936b248ba0848b70882489e1a32a9e323dc081b.json
similarity index 99%
rename from tests/integration/responses/recordings/d004a76ddc87e3d1f10a07b50f2aa82cf39d20e5cfb3be1433c924a126fe1a58.json
rename to tests/integration/responses/recordings/4c112247d4a195fac2b27e6a4936b248ba0848b70882489e1a32a9e323dc081b.json
index d6f25547df..e7a54d580e 100644
--- a/tests/integration/responses/recordings/d004a76ddc87e3d1f10a07b50f2aa82cf39d20e5cfb3be1433c924a126fe1a58.json
+++ b/tests/integration/responses/recordings/4c112247d4a195fac2b27e6a4936b248ba0848b70882489e1a32a9e323dc081b.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[openai_client-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/f9f8f255a3e65fdea20eedd28f7febfb159dafa9a3aab7a83c8afc7e89ba0b91.json b/tests/integration/responses/recordings/4e1fd22f5d9ded67352dbe815bb0cb7e463ad85b856514425df394ba222b92c4.json
similarity index 98%
rename from tests/integration/responses/recordings/f9f8f255a3e65fdea20eedd28f7febfb159dafa9a3aab7a83c8afc7e89ba0b91.json
rename to tests/integration/responses/recordings/4e1fd22f5d9ded67352dbe815bb0cb7e463ad85b856514425df394ba222b92c4.json
index 48dee3d271..a5ac4ce369 100644
--- a/tests/integration/responses/recordings/f9f8f255a3e65fdea20eedd28f7febfb159dafa9a3aab7a83c8afc7e89ba0b91.json
+++ b/tests/integration/responses/recordings/4e1fd22f5d9ded67352dbe815bb0cb7e463ad85b856514425df394ba222b92c4.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/e6e1b682e9b368426efc1bd93b326bec2f7db76ab9d8c9c6334a2d5177672fd6.json b/tests/integration/responses/recordings/5100781c53bb19a320364926454cb1a17051bb6a1a72989937e1b42465983960.json
similarity index 99%
rename from tests/integration/responses/recordings/e6e1b682e9b368426efc1bd93b326bec2f7db76ab9d8c9c6334a2d5177672fd6.json
rename to tests/integration/responses/recordings/5100781c53bb19a320364926454cb1a17051bb6a1a72989937e1b42465983960.json
index b8052fc8a4..1876fb2b4c 100644
--- a/tests/integration/responses/recordings/e6e1b682e9b368426efc1bd93b326bec2f7db76ab9d8c9c6334a2d5177672fd6.json
+++ b/tests/integration/responses/recordings/5100781c53bb19a320364926454cb1a17051bb6a1a72989937e1b42465983960.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/5ab06b404383787d1a545eacc7674fc068f37fbdaaa8d3b2f991f884b0c1d07a.json b/tests/integration/responses/recordings/53e2e090845d1a09194d5f18dca914ab5eecc2a2af6add585ef5be9d61549d1a.json
similarity index 99%
rename from tests/integration/responses/recordings/5ab06b404383787d1a545eacc7674fc068f37fbdaaa8d3b2f991f884b0c1d07a.json
rename to tests/integration/responses/recordings/53e2e090845d1a09194d5f18dca914ab5eecc2a2af6add585ef5be9d61549d1a.json
index 524561bac2..9f7bac0205 100644
--- a/tests/integration/responses/recordings/5ab06b404383787d1a545eacc7674fc068f37fbdaaa8d3b2f991f884b0c1d07a.json
+++ b/tests/integration/responses/recordings/53e2e090845d1a09194d5f18dca914ab5eecc2a2af6add585ef5be9d61549d1a.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/6a8b76b4d8ba3bc65c2cbd242b17aa224cc77abb15beba937decfda457ba2b6e.json b/tests/integration/responses/recordings/6138d550fdb668e3a48400b36501a2a51cea83c4157e7d6fc35792837df40bdb.json
similarity index 99%
rename from tests/integration/responses/recordings/6a8b76b4d8ba3bc65c2cbd242b17aa224cc77abb15beba937decfda457ba2b6e.json
rename to tests/integration/responses/recordings/6138d550fdb668e3a48400b36501a2a51cea83c4157e7d6fc35792837df40bdb.json
index ae6a4b7715..ddb3c9e5c8 100644
--- a/tests/integration/responses/recordings/6a8b76b4d8ba3bc65c2cbd242b17aa224cc77abb15beba937decfda457ba2b6e.json
+++ b/tests/integration/responses/recordings/6138d550fdb668e3a48400b36501a2a51cea83c4157e7d6fc35792837df40bdb.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_with_function_tools[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/01675632398179e5013b897e113ce0bfcb3c8c06352c4884b31b84bcc88b8e43.json b/tests/integration/responses/recordings/66ca3acff5d7d5880103bbfde08c52bac60f1038d84e0591a279b941a9b49b32.json
similarity index 99%
rename from tests/integration/responses/recordings/01675632398179e5013b897e113ce0bfcb3c8c06352c4884b31b84bcc88b8e43.json
rename to tests/integration/responses/recordings/66ca3acff5d7d5880103bbfde08c52bac60f1038d84e0591a279b941a9b49b32.json
index a46192e4cf..b6006df3af 100644
--- a/tests/integration/responses/recordings/01675632398179e5013b897e113ce0bfcb3c8c06352c4884b31b84bcc88b8e43.json
+++ b/tests/integration/responses/recordings/66ca3acff5d7d5880103bbfde08c52bac60f1038d84e0591a279b941a9b49b32.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/c2735c8a02e57ab45fd2b72ac4478bad6a39cb2ff3ac1a457fe8476f73146934.json b/tests/integration/responses/recordings/67c29a33f0d688d469c0d59feb5ca085c51bf886c6836fce3b630c1cc693e8cd.json
similarity index 99%
rename from tests/integration/responses/recordings/c2735c8a02e57ab45fd2b72ac4478bad6a39cb2ff3ac1a457fe8476f73146934.json
rename to tests/integration/responses/recordings/67c29a33f0d688d469c0d59feb5ca085c51bf886c6836fce3b630c1cc693e8cd.json
index b3d394a752..3ab5fcc7f5 100644
--- a/tests/integration/responses/recordings/c2735c8a02e57ab45fd2b72ac4478bad6a39cb2ff3ac1a457fe8476f73146934.json
+++ b/tests/integration/responses/recordings/67c29a33f0d688d469c0d59feb5ca085c51bf886c6836fce3b630c1cc693e8cd.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/1c75005e90b5447d913967026e8bd7c13afa92ce90d2af9913fe81db06e8c88b.json b/tests/integration/responses/recordings/6becbeb1c15378d9a82f60c3a9309dad73dd604848005240745cb0f729c4330f.json
similarity index 98%
rename from tests/integration/responses/recordings/1c75005e90b5447d913967026e8bd7c13afa92ce90d2af9913fe81db06e8c88b.json
rename to tests/integration/responses/recordings/6becbeb1c15378d9a82f60c3a9309dad73dd604848005240745cb0f729c4330f.json
index 5009f42036..9bd9eff2fa 100644
--- a/tests/integration/responses/recordings/1c75005e90b5447d913967026e8bd7c13afa92ce90d2af9913fe81db06e8c88b.json
+++ b/tests/integration/responses/recordings/6becbeb1c15378d9a82f60c3a9309dad73dd604848005240745cb0f729c4330f.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_non_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/97a9427f4c20f3fb104a6d04e3c08380f50a86a947b89ddd9477d09ec713313e.json b/tests/integration/responses/recordings/6f80b6307533eb1efc08bb9252ba14579a5f7114057b9b774d18b72e7a932fb2.json
similarity index 98%
rename from tests/integration/responses/recordings/97a9427f4c20f3fb104a6d04e3c08380f50a86a947b89ddd9477d09ec713313e.json
rename to tests/integration/responses/recordings/6f80b6307533eb1efc08bb9252ba14579a5f7114057b9b774d18b72e7a932fb2.json
index c931916e1f..a36d46108c 100644
--- a/tests/integration/responses/recordings/97a9427f4c20f3fb104a6d04e3c08380f50a86a947b89ddd9477d09ec713313e.json
+++ b/tests/integration/responses/recordings/6f80b6307533eb1efc08bb9252ba14579a5f7114057b9b774d18b72e7a932fb2.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[openai_client-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/565887f8230acc7918b316e3428650fa2f8aa13aa8a01859da53f1bc10d7c8d5.json b/tests/integration/responses/recordings/72e8dd36d46c572167f589fe59a32bcdfbfed5ba7f272c55f2cf412b099f7c93.json
similarity index 99%
rename from tests/integration/responses/recordings/565887f8230acc7918b316e3428650fa2f8aa13aa8a01859da53f1bc10d7c8d5.json
rename to tests/integration/responses/recordings/72e8dd36d46c572167f589fe59a32bcdfbfed5ba7f272c55f2cf412b099f7c93.json
index 7df6152346..21f98bd2fa 100644
--- a/tests/integration/responses/recordings/565887f8230acc7918b316e3428650fa2f8aa13aa8a01859da53f1bc10d7c8d5.json
+++ b/tests/integration/responses/recordings/72e8dd36d46c572167f589fe59a32bcdfbfed5ba7f272c55f2cf412b099f7c93.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/6b1b3d872eb6746dbfe9b8eed7fcbe23ca9e3408e7b3d8b99df407ae62f0a9f8.json b/tests/integration/responses/recordings/7fce3badd53d491bbae372c12f38743876b1177094e1a97d08bbd600900388e1.json
similarity index 99%
rename from tests/integration/responses/recordings/6b1b3d872eb6746dbfe9b8eed7fcbe23ca9e3408e7b3d8b99df407ae62f0a9f8.json
rename to tests/integration/responses/recordings/7fce3badd53d491bbae372c12f38743876b1177094e1a97d08bbd600900388e1.json
index df200b467d..a7573dded0 100644
--- a/tests/integration/responses/recordings/6b1b3d872eb6746dbfe9b8eed7fcbe23ca9e3408e7b3d8b99df407ae62f0a9f8.json
+++ b/tests/integration/responses/recordings/7fce3badd53d491bbae372c12f38743876b1177094e1a97d08bbd600900388e1.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/612c6642c5fb5b4203098183335b17ac0d121a04e39cc737b6cd2c1cb1f8ab30.json b/tests/integration/responses/recordings/86985e69a4be4073af20f65ba9ec927389f1b18c13691565186eba89fd1df24e.json
similarity index 98%
rename from tests/integration/responses/recordings/612c6642c5fb5b4203098183335b17ac0d121a04e39cc737b6cd2c1cb1f8ab30.json
rename to tests/integration/responses/recordings/86985e69a4be4073af20f65ba9ec927389f1b18c13691565186eba89fd1df24e.json
index 5daeb5e399..45a3daea46 100644
--- a/tests/integration/responses/recordings/612c6642c5fb5b4203098183335b17ac0d121a04e39cc737b6cd2c1cb1f8ab30.json
+++ b/tests/integration/responses/recordings/86985e69a4be4073af20f65ba9ec927389f1b18c13691565186eba89fd1df24e.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/255d2b2c743eb4102b0938c2ece15c5ce5a534fc3ee5ea5067b677a9007fab40.json b/tests/integration/responses/recordings/8fffbf821fc03c867d710ee20471566dd845588cce7605101332153175738d64.json
similarity index 98%
rename from tests/integration/responses/recordings/255d2b2c743eb4102b0938c2ece15c5ce5a534fc3ee5ea5067b677a9007fab40.json
rename to tests/integration/responses/recordings/8fffbf821fc03c867d710ee20471566dd845588cce7605101332153175738d64.json
index eef2f4abbb..5c1f4c5359 100644
--- a/tests/integration/responses/recordings/255d2b2c743eb4102b0938c2ece15c5ce5a534fc3ee5ea5067b677a9007fab40.json
+++ b/tests/integration/responses/recordings/8fffbf821fc03c867d710ee20471566dd845588cce7605101332153175738d64.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/b4669f2ae72ba853cd152e36d2e1aed1fc55ff53d48e9fe7371ce36e67b75445.json b/tests/integration/responses/recordings/9a097dbf69b822064c0e709fc32196ae1825730205c6ccd617c7ab88f57b1bb1.json
similarity index 99%
rename from tests/integration/responses/recordings/b4669f2ae72ba853cd152e36d2e1aed1fc55ff53d48e9fe7371ce36e67b75445.json
rename to tests/integration/responses/recordings/9a097dbf69b822064c0e709fc32196ae1825730205c6ccd617c7ab88f57b1bb1.json
index 1fccfa99c0..d0f580f350 100644
--- a/tests/integration/responses/recordings/b4669f2ae72ba853cd152e36d2e1aed1fc55ff53d48e9fe7371ce36e67b75445.json
+++ b/tests/integration/responses/recordings/9a097dbf69b822064c0e709fc32196ae1825730205c6ccd617c7ab88f57b1bb1.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_responses_errors.py::TestResponsesAPIStreamingErrors::test_completed_response_has_no_error[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/fff9dbea699f84c0f924ee9dbc91c9f5587eff0d27a554fd55b916da9662ce2c.json b/tests/integration/responses/recordings/a0a8368aca6a21aff59a88f8eadae3bf8cbaa47c2c83fcfc439c29c3693cf6a3.json
similarity index 98%
rename from tests/integration/responses/recordings/fff9dbea699f84c0f924ee9dbc91c9f5587eff0d27a554fd55b916da9662ce2c.json
rename to tests/integration/responses/recordings/a0a8368aca6a21aff59a88f8eadae3bf8cbaa47c2c83fcfc439c29c3693cf6a3.json
index 4532cdae61..c70c2c852b 100644
--- a/tests/integration/responses/recordings/fff9dbea699f84c0f924ee9dbc91c9f5587eff0d27a554fd55b916da9662ce2c.json
+++ b/tests/integration/responses/recordings/a0a8368aca6a21aff59a88f8eadae3bf8cbaa47c2c83fcfc439c29c3693cf6a3.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_backward_compatibility[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/611613fb2c06bb1ed79fbe5dae18ac6e2a650340832aa7538498b5b26aff1f63.json b/tests/integration/responses/recordings/a3fea537eafccc69367c15360b09ca732197f2fbd3757e8afe5bc583df8f1c2c.json
similarity index 98%
rename from tests/integration/responses/recordings/611613fb2c06bb1ed79fbe5dae18ac6e2a650340832aa7538498b5b26aff1f63.json
rename to tests/integration/responses/recordings/a3fea537eafccc69367c15360b09ca732197f2fbd3757e8afe5bc583df8f1c2c.json
index 07098e7e33..c123f0e399 100644
--- a/tests/integration/responses/recordings/611613fb2c06bb1ed79fbe5dae18ac6e2a650340832aa7538498b5b26aff1f63.json
+++ b/tests/integration/responses/recordings/a3fea537eafccc69367c15360b09ca732197f2fbd3757e8afe5bc583df8f1c2c.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_streaming[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/516a186291f3c6a4cf45ceecd49e9f8f5b396f37caa0c557e664c608b29bcfcc.json b/tests/integration/responses/recordings/a640e097dcf17544825aa20ce937c0b783de5f4bd2916d9e370145768551075c.json
similarity index 98%
rename from tests/integration/responses/recordings/516a186291f3c6a4cf45ceecd49e9f8f5b396f37caa0c557e664c608b29bcfcc.json
rename to tests/integration/responses/recordings/a640e097dcf17544825aa20ce937c0b783de5f4bd2916d9e370145768551075c.json
index f5e7895174..e5c913a998 100644
--- a/tests/integration/responses/recordings/516a186291f3c6a4cf45ceecd49e9f8f5b396f37caa0c557e664c608b29bcfcc.json
+++ b/tests/integration/responses/recordings/a640e097dcf17544825aa20ce937c0b783de5f4bd2916d9e370145768551075c.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/68da149c99f86029626e9bca5df9a72d5204a4423c5caca31efe60384c4b2b37.json b/tests/integration/responses/recordings/aaed9594b345da41db3dd6754b1e7c6366dff704ea943bda851c2d3a658d44ec.json
similarity index 98%
rename from tests/integration/responses/recordings/68da149c99f86029626e9bca5df9a72d5204a4423c5caca31efe60384c4b2b37.json
rename to tests/integration/responses/recordings/aaed9594b345da41db3dd6754b1e7c6366dff704ea943bda851c2d3a658d44ec.json
index d0fb69b3ea..a58476f361 100644
--- a/tests/integration/responses/recordings/68da149c99f86029626e9bca5df9a72d5204a4423c5caca31efe60384c4b2b37.json
+++ b/tests/integration/responses/recordings/aaed9594b345da41db3dd6754b1e7c6366dff704ea943bda851c2d3a658d44ec.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/555654e0ddbd51a65af179d9300fdedc42077a90849824b037f9dae1edda4102.json b/tests/integration/responses/recordings/b2741f85f52f5eb88f5401e02cad1d4f5d4a9bf61953e12b0441c0c4c2a2831a.json
similarity index 98%
rename from tests/integration/responses/recordings/555654e0ddbd51a65af179d9300fdedc42077a90849824b037f9dae1edda4102.json
rename to tests/integration/responses/recordings/b2741f85f52f5eb88f5401e02cad1d4f5d4a9bf61953e12b0441c0c4c2a2831a.json
index 49b997b2ed..dedcc9a08e 100644
--- a/tests/integration/responses/recordings/555654e0ddbd51a65af179d9300fdedc42077a90849824b037f9dae1edda4102.json
+++ b/tests/integration/responses/recordings/b2741f85f52f5eb88f5401e02cad1d4f5d4a9bf61953e12b0441c0c4c2a2831a.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/f8eb947297e60407c022e3326b5bbcfa0e186abf83bf4b3a826640715e14a3a9.json b/tests/integration/responses/recordings/baa7ab6db86f51910a35c45cf2234670f308b01d4b905faf3e1313901d2a6b61.json
similarity index 99%
rename from tests/integration/responses/recordings/f8eb947297e60407c022e3326b5bbcfa0e186abf83bf4b3a826640715e14a3a9.json
rename to tests/integration/responses/recordings/baa7ab6db86f51910a35c45cf2234670f308b01d4b905faf3e1313901d2a6b61.json
index 5e63214137..d4ea806034 100644
--- a/tests/integration/responses/recordings/f8eb947297e60407c022e3326b5bbcfa0e186abf83bf4b3a826640715e14a3a9.json
+++ b/tests/integration/responses/recordings/baa7ab6db86f51910a35c45cf2234670f308b01d4b905faf3e1313901d2a6b61.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_incremental_content[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/bf90e1d57fcb8ac4ebb20ee417bb219a091ac9c45000f4c22e4fd16716dfaceb.json b/tests/integration/responses/recordings/c40e45bc283154cb2e212a514e5ab935e8851add8edeef48d6a65764716ce634.json
similarity index 98%
rename from tests/integration/responses/recordings/bf90e1d57fcb8ac4ebb20ee417bb219a091ac9c45000f4c22e4fd16716dfaceb.json
rename to tests/integration/responses/recordings/c40e45bc283154cb2e212a514e5ab935e8851add8edeef48d6a65764716ce634.json
index 56e96e3284..98c4e919a2 100644
--- a/tests/integration/responses/recordings/bf90e1d57fcb8ac4ebb20ee417bb219a091ac9c45000f4c22e4fd16716dfaceb.json
+++ b/tests/integration/responses/recordings/c40e45bc283154cb2e212a514e5ab935e8851add8edeef48d6a65764716ce634.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_backward_compatibility[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/da6a362ff3aadd43bc06ba806b0f3710f565e33c63b5f1aff08a68807e01ce8a.json b/tests/integration/responses/recordings/cc0c289ff3a932cbfc46188197ebf4074e790c3bbdae24c280c711aa752fb6b4.json
similarity index 99%
rename from tests/integration/responses/recordings/da6a362ff3aadd43bc06ba806b0f3710f565e33c63b5f1aff08a68807e01ce8a.json
rename to tests/integration/responses/recordings/cc0c289ff3a932cbfc46188197ebf4074e790c3bbdae24c280c711aa752fb6b4.json
index cadff507ca..e7e7f1d014 100644
--- a/tests/integration/responses/recordings/da6a362ff3aadd43bc06ba806b0f3710f565e33c63b5f1aff08a68807e01ce8a.json
+++ b/tests/integration/responses/recordings/cc0c289ff3a932cbfc46188197ebf4074e790c3bbdae24c280c711aa752fb6b4.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/2df2850b224738a8f7b945cc06681f73f38988a267cd1ba6a546a8352a57ffa5.json b/tests/integration/responses/recordings/d0996e5e764315da97c15eb62c37f1c374b3e46a69f5526ef45f70fd24fd18b9.json
similarity index 99%
rename from tests/integration/responses/recordings/2df2850b224738a8f7b945cc06681f73f38988a267cd1ba6a546a8352a57ffa5.json
rename to tests/integration/responses/recordings/d0996e5e764315da97c15eb62c37f1c374b3e46a69f5526ef45f70fd24fd18b9.json
index 5e507106ac..e102b33960 100644
--- a/tests/integration/responses/recordings/2df2850b224738a8f7b945cc06681f73f38988a267cd1ba6a546a8352a57ffa5.json
+++ b/tests/integration/responses/recordings/d0996e5e764315da97c15eb62c37f1c374b3e46a69f5526ef45f70fd24fd18b9.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/8dba857db6730d6ab259e2c80f5cdd1f8bbcce22fb4a14df4389f96e2db04eaa.json b/tests/integration/responses/recordings/d2cf88feac32878a5072007ed404bbc48650109698c6d0ac664a92426b242301.json
similarity index 98%
rename from tests/integration/responses/recordings/8dba857db6730d6ab259e2c80f5cdd1f8bbcce22fb4a14df4389f96e2db04eaa.json
rename to tests/integration/responses/recordings/d2cf88feac32878a5072007ed404bbc48650109698c6d0ac664a92426b242301.json
index ce613bbaf8..315fb4ae9f 100644
--- a/tests/integration/responses/recordings/8dba857db6730d6ab259e2c80f5cdd1f8bbcce22fb4a14df4389f96e2db04eaa.json
+++ b/tests/integration/responses/recordings/d2cf88feac32878a5072007ed404bbc48650109698c6d0ac664a92426b242301.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_multi_turn_and_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/448f5aad2604000a0616341f2a97086fef7087f561fa24e21b5f665b04cebff2.json b/tests/integration/responses/recordings/d490ef38aa100255e6031998088783380dbe25ba782d6e71e1ff3544628c06ab.json
similarity index 99%
rename from tests/integration/responses/recordings/448f5aad2604000a0616341f2a97086fef7087f561fa24e21b5f665b04cebff2.json
rename to tests/integration/responses/recordings/d490ef38aa100255e6031998088783380dbe25ba782d6e71e1ff3544628c06ab.json
index b1ada77efa..16b936f4e4 100644
--- a/tests/integration/responses/recordings/448f5aad2604000a0616341f2a97086fef7087f561fa24e21b5f665b04cebff2.json
+++ b/tests/integration/responses/recordings/d490ef38aa100255e6031998088783380dbe25ba782d6e71e1ff3544628c06ab.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_non_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/723ec06a9462e87536dbfaeb2b9396943dc3ea635425d9a1262212a1ccec2910.json b/tests/integration/responses/recordings/d508c01d0d6670167da0e8758e9ee46fcde6afb092f0b5b57da9fb1c40b3d506.json
similarity index 98%
rename from tests/integration/responses/recordings/723ec06a9462e87536dbfaeb2b9396943dc3ea635425d9a1262212a1ccec2910.json
rename to tests/integration/responses/recordings/d508c01d0d6670167da0e8758e9ee46fcde6afb092f0b5b57da9fb1c40b3d506.json
index 5c6a486afc..b1cd3601a6 100644
--- a/tests/integration/responses/recordings/723ec06a9462e87536dbfaeb2b9396943dc3ea635425d9a1262212a1ccec2910.json
+++ b/tests/integration/responses/recordings/d508c01d0d6670167da0e8758e9ee46fcde6afb092f0b5b57da9fb1c40b3d506.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_multi_turn[openai_client-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/9ecdff3fb145793283aa9233afb27e687c1aa26f93586f5a624b7a78c4c15c3c.json b/tests/integration/responses/recordings/d510a5744713675c17f35596254f6704f32c46c11a85e585906992614239da54.json
similarity index 95%
rename from tests/integration/responses/recordings/9ecdff3fb145793283aa9233afb27e687c1aa26f93586f5a624b7a78c4c15c3c.json
rename to tests/integration/responses/recordings/d510a5744713675c17f35596254f6704f32c46c11a85e585906992614239da54.json
index 7d7b3f6d34..e0bbca0dc2 100644
--- a/tests/integration/responses/recordings/9ecdff3fb145793283aa9233afb27e687c1aa26f93586f5a624b7a78c4c15c3c.json
+++ b/tests/integration/responses/recordings/d510a5744713675c17f35596254f6704f32c46c11a85e585906992614239da54.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_responses_errors.py::TestResponsesAPIStreamingErrors::test_non_vision_model_with_base64_image_returns_server_error[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/06653e6e4563eb378ee18044c32cd36e1ace560b2ab90d84c901b67dde0827f8.json b/tests/integration/responses/recordings/d6784a627287aef1c3171bdbf1d4bdb3523bd9c5b73088a745c431dc2514ea16.json
similarity index 99%
rename from tests/integration/responses/recordings/06653e6e4563eb378ee18044c32cd36e1ace560b2ab90d84c901b67dde0827f8.json
rename to tests/integration/responses/recordings/d6784a627287aef1c3171bdbf1d4bdb3523bd9c5b73088a745c431dc2514ea16.json
index b5d6c97404..75c1947e9b 100644
--- a/tests/integration/responses/recordings/06653e6e4563eb378ee18044c32cd36e1ace560b2ab90d84c901b67dde0827f8.json
+++ b/tests/integration/responses/recordings/d6784a627287aef1c3171bdbf1d4bdb3523bd9c5b73088a745c431dc2514ea16.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_with_multi_turn[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/95d123bff66974b73beabe29f92030b8a24ebda6f06e83ed31b9a0749c60199e.json b/tests/integration/responses/recordings/d7b50078103e88d20c4955ba23c55193ed0cc70e43e577cbb0a3dec164e250c4.json
similarity index 98%
rename from tests/integration/responses/recordings/95d123bff66974b73beabe29f92030b8a24ebda6f06e83ed31b9a0749c60199e.json
rename to tests/integration/responses/recordings/d7b50078103e88d20c4955ba23c55193ed0cc70e43e577cbb0a3dec164e250c4.json
index 1c9db0929b..68f7c36128 100644
--- a/tests/integration/responses/recordings/95d123bff66974b73beabe29f92030b8a24ebda6f06e83ed31b9a0749c60199e.json
+++ b/tests/integration/responses/recordings/d7b50078103e88d20c4955ba23c55193ed0cc70e43e577cbb0a3dec164e250c4.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_prompt_template_no_variables[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/eebe9f0ac51599aa9d0f81b0d0a8b2a279054d2263d728fc1eb0b6250004b339.json b/tests/integration/responses/recordings/d89ee3f0b9b9ae608fd6bfd7efa57378f61a28811513ccddddffc77ff05a0417.json
similarity index 98%
rename from tests/integration/responses/recordings/eebe9f0ac51599aa9d0f81b0d0a8b2a279054d2263d728fc1eb0b6250004b339.json
rename to tests/integration/responses/recordings/d89ee3f0b9b9ae608fd6bfd7efa57378f61a28811513ccddddffc77ff05a0417.json
index 15df46028a..0db0c69e13 100644
--- a/tests/integration/responses/recordings/eebe9f0ac51599aa9d0f81b0d0a8b2a279054d2263d728fc1eb0b6250004b339.json
+++ b/tests/integration/responses/recordings/d89ee3f0b9b9ae608fd6bfd7efa57378f61a28811513ccddddffc77ff05a0417.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_mcp_authentication.py::test_mcp_authorization_bearer[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/3d42f101f95388471691e0c8d8c1a37a67a2234432874a79b684d8bd40f73eff.json b/tests/integration/responses/recordings/e779e2713de2d91ee559033a5e77397d77d36726335d9c6c1b22e5c0ad1c22ac.json
similarity index 98%
rename from tests/integration/responses/recordings/3d42f101f95388471691e0c8d8c1a37a67a2234432874a79b684d8bd40f73eff.json
rename to tests/integration/responses/recordings/e779e2713de2d91ee559033a5e77397d77d36726335d9c6c1b22e5c0ad1c22ac.json
index 17f2fac8a9..1cb39b7331 100644
--- a/tests/integration/responses/recordings/3d42f101f95388471691e0c8d8c1a37a67a2234432874a79b684d8bd40f73eff.json
+++ b/tests/integration/responses/recordings/e779e2713de2d91ee559033a5e77397d77d36726335d9c6c1b22e5c0ad1c22ac.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=bedrock/openai.gpt-oss-20b-earth]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/2eafd5c6eb8e2cff3433007bf23b19e6d66547b2dcab1b1892d5bd53f8ad34b4.json b/tests/integration/responses/recordings/ecb3bc2e13c604f0003308df4fdb94149395c39cb195974f2893fbf340e50589.json
similarity index 95%
rename from tests/integration/responses/recordings/2eafd5c6eb8e2cff3433007bf23b19e6d66547b2dcab1b1892d5bd53f8ad34b4.json
rename to tests/integration/responses/recordings/ecb3bc2e13c604f0003308df4fdb94149395c39cb195974f2893fbf340e50589.json
index 7a7249a3c3..c06fda78f5 100644
--- a/tests/integration/responses/recordings/2eafd5c6eb8e2cff3433007bf23b19e6d66547b2dcab1b1892d5bd53f8ad34b4.json
+++ b/tests/integration/responses/recordings/ecb3bc2e13c604f0003308df4fdb94149395c39cb195974f2893fbf340e50589.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_responses_errors.py::TestResponsesAPIStreamingErrors::test_non_vision_model_returns_error_for_image_input[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/f7ba1ce7bbeef963f2002b812fd07386cecf9179bfc6f92a2e09348edbfd7f07.json b/tests/integration/responses/recordings/ee3d5360f48357a1f00d8fc9f1b01844fcb08bf9ff738f34d7f915514173f2c4.json
similarity index 99%
rename from tests/integration/responses/recordings/f7ba1ce7bbeef963f2002b812fd07386cecf9179bfc6f92a2e09348edbfd7f07.json
rename to tests/integration/responses/recordings/ee3d5360f48357a1f00d8fc9f1b01844fcb08bf9ff738f34d7f915514173f2c4.json
index fe4ed13ed2..8b7482585a 100644
--- a/tests/integration/responses/recordings/f7ba1ce7bbeef963f2002b812fd07386cecf9179bfc6f92a2e09348edbfd7f07.json
+++ b/tests/integration/responses/recordings/ee3d5360f48357a1f00d8fc9f1b01844fcb08bf9ff738f34d7f915514173f2c4.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[openai_client-txt=bedrock/openai.gpt-oss-20b-saturn]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/2a08014e5c5da31c16896c0e59ea432e71eb8eb83a3cf25a3e4ab7fa5700bd6b.json b/tests/integration/responses/recordings/f4770c02fdb697b36717c6713dacd87b12b16b020611dd32866110cbf0c699d9.json
similarity index 98%
rename from tests/integration/responses/recordings/2a08014e5c5da31c16896c0e59ea432e71eb8eb83a3cf25a3e4ab7fa5700bd6b.json
rename to tests/integration/responses/recordings/f4770c02fdb697b36717c6713dacd87b12b16b020611dd32866110cbf0c699d9.json
index 5670d23ef1..ae54912b15 100644
--- a/tests/integration/responses/recordings/2a08014e5c5da31c16896c0e59ea432e71eb8eb83a3cf25a3e4ab7fa5700bd6b.json
+++ b/tests/integration/responses/recordings/f4770c02fdb697b36717c6713dacd87b12b16b020611dd32866110cbf0c699d9.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_reasoning.py::test_reasoning_basic_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/b4d86c5231fc155339edbcb937b8e91a4be8c2c171a49120991e0eab9582b806.json b/tests/integration/responses/recordings/f91a97f69204421e717fc7d7660ea2123789abe1d080020cbe97402d51e074eb.json
similarity index 98%
rename from tests/integration/responses/recordings/b4d86c5231fc155339edbcb937b8e91a4be8c2c171a49120991e0eab9582b806.json
rename to tests/integration/responses/recordings/f91a97f69204421e717fc7d7660ea2123789abe1d080020cbe97402d51e074eb.json
index 26d44c5072..ef8065854c 100644
--- a/tests/integration/responses/recordings/b4d86c5231fc155339edbcb937b8e91a4be8c2c171a49120991e0eab9582b806.json
+++ b/tests/integration/responses/recordings/f91a97f69204421e717fc7d7660ea2123789abe1d080020cbe97402d51e074eb.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_prompt_templates.py::test_basic_prompt_template[client_with_models-txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/49aa949715243abb9094994f88b7fdc37a54757ec4c6579e6640d4e62d148c87.json b/tests/integration/responses/recordings/fd5280a00f415baa514839d1903263035e94bea9462e613798c48a8c39697217.json
similarity index 98%
rename from tests/integration/responses/recordings/49aa949715243abb9094994f88b7fdc37a54757ec4c6579e6640d4e62d148c87.json
rename to tests/integration/responses/recordings/fd5280a00f415baa514839d1903263035e94bea9462e613798c48a8c39697217.json
index ab6e7f5468..e38ff0baf4 100644
--- a/tests/integration/responses/recordings/49aa949715243abb9094994f88b7fdc37a54757ec4c6579e6640d4e62d148c87.json
+++ b/tests/integration/responses/recordings/fd5280a00f415baa514839d1903263035e94bea9462e613798c48a8c39697217.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_conversation_responses.py::TestConversationResponses::test_conversation_context_loading[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/chat/completions",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/chat/completions",
     "headers": {},
     "body": {
       "model": "openai.gpt-oss-20b",
diff --git a/tests/integration/responses/recordings/models-ce53c4fcbb40594a10b3d758a338cccbd471bba93e04d38264d663c6dcec8578-31ec6c83.json b/tests/integration/responses/recordings/models-2f9eb79dbfa9f25cae2fdc739d4358a242c429f68d4fb635ddbeeb55595b7027-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-ce53c4fcbb40594a10b3d758a338cccbd471bba93e04d38264d663c6dcec8578-31ec6c83.json
rename to tests/integration/responses/recordings/models-2f9eb79dbfa9f25cae2fdc739d4358a242c429f68d4fb635ddbeeb55595b7027-31ec6c83.json
index 5d2b85f526..2003c2f1db 100644
--- a/tests/integration/responses/recordings/models-ce53c4fcbb40594a10b3d758a338cccbd471bba93e04d38264d663c6dcec8578-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-2f9eb79dbfa9f25cae2fdc739d4358a242c429f68d4fb635ddbeeb55595b7027-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_reasoning.py::test_reasoning_basic_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/responses/recordings/models-d487ac38a1f3f3bb9ebc5254525862e9fcefdf01d5184789f6ba4b3039dc17ba-31ec6c83.json b/tests/integration/responses/recordings/models-48bcf1eca39637c8a9767f122e10ed8a6421f340543238789bbbe8c516b79114-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-d487ac38a1f3f3bb9ebc5254525862e9fcefdf01d5184789f6ba4b3039dc17ba-31ec6c83.json
rename to tests/integration/responses/recordings/models-48bcf1eca39637c8a9767f122e10ed8a6421f340543238789bbbe8c516b79114-31ec6c83.json
index 7ed290b164..e95bcad895 100644
--- a/tests/integration/responses/recordings/models-d487ac38a1f3f3bb9ebc5254525862e9fcefdf01d5184789f6ba4b3039dc17ba-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-48bcf1eca39637c8a9767f122e10ed8a6421f340543238789bbbe8c516b79114-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/responses/recordings/models-0535aa1a1942a5b3191f8d08fb57457ef937494a6e6cb46918fd196f77016145-31ec6c83.json b/tests/integration/responses/recordings/models-c7239f3a8bc25af23106c20d3a0844f33356aaddf6594e08964cde42375f54dd-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-0535aa1a1942a5b3191f8d08fb57457ef937494a6e6cb46918fd196f77016145-31ec6c83.json
rename to tests/integration/responses/recordings/models-c7239f3a8bc25af23106c20d3a0844f33356aaddf6594e08964cde42375f54dd-31ec6c83.json
index 0090cedb61..2c2d94637b 100644
--- a/tests/integration/responses/recordings/models-0535aa1a1942a5b3191f8d08fb57457ef937494a6e6cb46918fd196f77016145-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-c7239f3a8bc25af23106c20d3a0844f33356aaddf6594e08964cde42375f54dd-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_with_web_search[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/responses/recordings/models-03ffd7273c9bf0c84abd4750391c19dfc7545772caacd9bdd02c953c5c3fb6cd-31ec6c83.json b/tests/integration/responses/recordings/models-e3014308c84bda792e9ce27641b08ece821283fca29f3ced3ffbff35583d1f9b-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-03ffd7273c9bf0c84abd4750391c19dfc7545772caacd9bdd02c953c5c3fb6cd-31ec6c83.json
rename to tests/integration/responses/recordings/models-e3014308c84bda792e9ce27641b08ece821283fca29f3ced3ffbff35583d1f9b-31ec6c83.json
index 7426212fe6..b223fe0158 100644
--- a/tests/integration/responses/recordings/models-03ffd7273c9bf0c84abd4750391c19dfc7545772caacd9bdd02c953c5c3fb6cd-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-e3014308c84bda792e9ce27641b08ece821283fca29f3ced3ffbff35583d1f9b-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_with_function_tools[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/responses/recordings/models-ffb55818ff4f53a50d044fbf570e18ed249a0245bfe68a7e91eed69ccf5f2193-31ec6c83.json b/tests/integration/responses/recordings/models-e400deaab35580927e2926f5b0d13b37ce15a07c9c0a13e6ecc80ea805e1d61b-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-ffb55818ff4f53a50d044fbf570e18ed249a0245bfe68a7e91eed69ccf5f2193-31ec6c83.json
rename to tests/integration/responses/recordings/models-e400deaab35580927e2926f5b0d13b37ce15a07c9c0a13e6ecc80ea805e1d61b-31ec6c83.json
index f8b2bf499f..2ef3e42b7c 100644
--- a/tests/integration/responses/recordings/models-ffb55818ff4f53a50d044fbf570e18ed249a0245bfe68a7e91eed69ccf5f2193-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-e400deaab35580927e2926f5b0d13b37ce15a07c9c0a13e6ecc80ea805e1d61b-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_response_extra_body_guided_choice[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/integration/responses/recordings/models-879ae66c787975780a21251d2755da3ba56850f94c78577db27ba1e5ff8a95e8-31ec6c83.json b/tests/integration/responses/recordings/models-fca7382830039be10bc959fcbb2299c4835921a886e43c02c4f68ff4872e3024-31ec6c83.json
similarity index 99%
rename from tests/integration/responses/recordings/models-879ae66c787975780a21251d2755da3ba56850f94c78577db27ba1e5ff8a95e8-31ec6c83.json
rename to tests/integration/responses/recordings/models-fca7382830039be10bc959fcbb2299c4835921a886e43c02c4f68ff4872e3024-31ec6c83.json
index fed332b52f..b5099e8a63 100644
--- a/tests/integration/responses/recordings/models-879ae66c787975780a21251d2755da3ba56850f94c78577db27ba1e5ff8a95e8-31ec6c83.json
+++ b/tests/integration/responses/recordings/models-fca7382830039be10bc959fcbb2299c4835921a886e43c02c4f68ff4872e3024-31ec6c83.json
@@ -2,7 +2,7 @@
   "test_id": "tests/integration/responses/test_basic_responses.py::test_include_logprobs_non_streaming[txt=bedrock/openai.gpt-oss-20b]",
   "request": {
     "method": "POST",
-    "url": "https://bedrock-mantle.us-east-2.api.aws/v1/v1/models",
+    "url": "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/v1/models",
     "headers": {},
     "body": {},
     "endpoint": "/v1/models",
diff --git a/tests/unit/providers/inference/bedrock/test_openai_sdk_integration.py b/tests/unit/providers/inference/bedrock/test_openai_sdk_integration.py
new file mode 100644
index 0000000000..b11c8ccb40
--- /dev/null
+++ b/tests/unit/providers/inference/bedrock/test_openai_sdk_integration.py
@@ -0,0 +1,154 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Tests for Bedrock OpenAI SDK integration with SigV4 authentication.
+
+These tests verify:
+1. Base URL uses bedrock-runtime hostname
+2. SigV4 signing uses "bedrock" as the service name (NOT "bedrock-runtime")
+3. In SigV4 mode, no Bearer Authorization header is present
+4. STS credentials work properly with temporary tokens
+"""
+
+import importlib.util
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+
+HAS_BOTO3 = importlib.util.find_spec("boto3") is not None
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+class TestBedrockOpenAISDKIntegration:
+    """Tests for Bedrock OpenAI SDK integration with SigV4 auth."""
+
+    def test_base_url_uses_bedrock_runtime_hostname(self):
+        """Base URL should use bedrock-runtime hostname (endpoint prefix)."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        base_url = adapter.get_base_url()
+        # Hostname uses "bedrock-runtime" (endpoint prefix)
+        assert base_url == "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1"
+
+    def test_sigv4_uses_bedrock_signing_name_not_bedrock_runtime(self):
+        """
+        SigV4 signing must use 'bedrock' as the service name, NOT 'bedrock-runtime'.
+
+        The hostname is bedrock-runtime.<region>.amazonaws.com (endpoint prefix),
+        but the SigV4 credential scope uses the signing name 'bedrock'.
+        This is defined in botocore's service metadata.
+        """
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-west-2")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth_cls:
+            mock_auth_cls.return_value = MagicMock()
+            adapter._build_sigv4_http_client()
+
+            # Verify signing name is "bedrock", NOT "bedrock-runtime"
+            call_kwargs = mock_auth_cls.call_args[1]
+            assert call_kwargs["service"] == "bedrock", (
+                "SigV4 must use signing name 'bedrock', not endpoint prefix 'bedrock-runtime'"
+            )
+
+    def test_sigv4_mode_uses_placeholder_api_key(self):
+        """In SigV4 mode, api_key should be a placeholder (SigV4 auth replaces the header)."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            # Patch SigV4Auth to avoid actual boto3 calls
+            with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth_cls:
+                mock_auth = MagicMock()
+                mock_auth_cls.return_value = mock_auth
+
+                client = adapter.client
+
+                # OpenAI SDK requires a non-empty api_key for validation.
+                # We use a placeholder that SigV4 auth replaces with proper signature.
+                # This follows the same pattern as the OCI provider.
+                assert client.api_key == "<NOTUSED>"
+
+    def test_sigv4_authorization_header_format(self):
+        """SigV4 Authorization header should start with AWS4-HMAC-SHA256, not Bearer."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            # Use "bedrock" signing name (correct)
+            auth = BedrockSigV4Auth(region="us-west-2", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+        auth_header = captured_request.headers.get("authorization", "")
+
+        # Must be SigV4, NOT Bearer
+        assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}"
+        assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer"
+
+    def test_sts_credentials_include_security_token(self):
+        """SigV4 auth should include x-amz-security-token for STS credentials."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = "AQoDYXdzEJr...<remainder of security token>"
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            # Use "bedrock" signing name (correct)
+            auth = BedrockSigV4Auth(region="us-west-2", service="bedrock")
+
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions",
+                headers={"content-type": "application/json"},
+                content=b'{"model": "test"}',
+            )
+
+            gen = auth.auth_flow(request)
+            signed_request = next(gen)
+
+            # Verify session token header is present for STS credentials
+            assert "x-amz-security-token" in signed_request.headers
+            assert signed_request.headers["x-amz-security-token"] == mock_frozen_creds.token
diff --git a/tests/unit/providers/inference/bedrock/test_sigv4_auth.py b/tests/unit/providers/inference/bedrock/test_sigv4_auth.py
new file mode 100644
index 0000000000..a345247e00
--- /dev/null
+++ b/tests/unit/providers/inference/bedrock/test_sigv4_auth.py
@@ -0,0 +1,885 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Unit tests for Bedrock SigV4 authentication.
+
+These tests verify:
+1. SigV4 auth handler correctly signs requests
+2. Auth mode detection (bearer vs SigV4)
+3. Credential chain integration
+4. Error handling
+"""
+
+# Check if boto3 is available for SigV4 tests
+import importlib.util
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+
+HAS_BOTO3 = importlib.util.find_spec("boto3") is not None
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+class TestBedrockSigV4Auth:
+    """Tests for BedrockSigV4Auth httpx.Auth implementation."""
+
+    def test_auth_flow_signs_request(self):
+        """SigV4 auth should add AWS signature headers to request."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        # Mock boto3 credentials
+        mock_creds = MagicMock()
+        mock_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_creds.token = None
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = mock_creds.access_key
+        mock_frozen_creds.secret_key = mock_creds.secret_key
+        mock_frozen_creds.token = mock_creds.token
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+
+            # Create a test request
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                headers={"content-type": "application/json"},
+                content=b'{"model": "test"}',
+            )
+
+            # Run auth flow
+            gen = auth.auth_flow(request)
+            signed_request = next(gen)
+
+            # Verify SigV4 headers were added
+            assert "authorization" in signed_request.headers
+            assert "x-amz-date" in signed_request.headers
+            assert "AWS4-HMAC-SHA256" in signed_request.headers["authorization"]
+
+    def test_auth_flow_with_explicit_role_assumption(self):
+        """SigV4 auth should use RefreshableBotoSession when role_arn is provided."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAEXP_ROLE_KEY"
+        mock_frozen_creds.secret_key = "exp_secret"
+        mock_frozen_creds.token = "exp_token"
+
+        with patch(
+            "llama_stack.providers.utils.bedrock.refreshable_boto_session.RefreshableBotoSession"
+        ) as mock_refreshable_cls:
+            mock_refreshable = MagicMock()
+            mock_refreshable_cls.return_value = mock_refreshable
+            mock_session = MagicMock()
+            mock_refreshable.refreshable_session.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(
+                region="us-east-1",
+                aws_role_arn="arn:aws:iam::123456789012:role/test-role",
+                aws_web_identity_token_file="/path/to/token",
+                aws_role_session_name="test-session",
+            )
+
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                content=b"{}",
+            )
+
+            gen = auth.auth_flow(request)
+            signed_request = next(gen)
+
+            # Verify RefreshableBotoSession was called with correct args
+            mock_refreshable_cls.assert_called_once_with(
+                region_name="us-east-1",
+                aws_access_key_id=None,
+                aws_secret_access_key=None,
+                aws_session_token=None,
+                profile_name=None,
+                sts_arn="arn:aws:iam::123456789012:role/test-role",
+                web_identity_token_file="/path/to/token",
+                session_name="test-session",
+                session_ttl=3600,
+            )
+            assert signed_request.headers["x-amz-security-token"] == "exp_token"
+
+    def test_auth_flow_with_session_token(self):
+        """SigV4 auth should include X-Amz-Security-Token for STS credentials."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = "FwoGZXIvYXdzEBYaDG..."  # STS session token
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-west-2", service="bedrock")
+
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions",
+                headers={"content-type": "application/json"},
+                content=b'{"model": "test"}',
+            )
+
+            gen = auth.auth_flow(request)
+            signed_request = next(gen)
+
+            # Verify session token header is present
+            assert "x-amz-security-token" in signed_request.headers
+            assert signed_request.headers["x-amz-security-token"] == mock_frozen_creds.token
+
+    def test_auth_raises_on_missing_credentials(self):
+        """SigV4 auth should raise clear error when credentials unavailable."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value = None
+
+            auth = BedrockSigV4Auth(region="us-east-1")
+
+            request = httpx.Request(
+                method="POST",
+                url="https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                content=b"{}",
+            )
+
+            with pytest.raises(RuntimeError, match="Failed to load AWS credentials"):
+                gen = auth.auth_flow(request)
+                next(gen)
+
+
+class TestBedrockConfigAuthDetection:
+    """Tests for BedrockConfig auth mode detection."""
+
+    def test_has_bearer_token_with_token(self):
+        """Config should detect when bearer token is present."""
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        # Use api_key as that's the alias for auth_credential
+        config = BedrockConfig(api_key="my-bearer-token")
+        assert config.has_bearer_token() is True
+
+    def test_has_bearer_token_without_token(self):
+        """Config should detect when bearer token is absent."""
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig()
+        assert config.has_bearer_token() is False
+
+    def test_has_bearer_token_with_empty_string(self):
+        """Empty string should be treated as no token."""
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(api_key="")
+        assert config.has_bearer_token() is False
+
+    def test_has_bearer_token_with_whitespace(self):
+        """Whitespace-only string should be treated as no token."""
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(api_key="   ")
+        assert config.has_bearer_token() is False
+
+
+class TestBedrockInferenceAdapterAuthMode:
+    """Tests for BedrockInferenceAdapter auth mode selection."""
+
+    def test_should_use_sigv4_when_no_bearer_token(self):
+        """Adapter should use SigV4 when no bearer token configured."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        # Mock get_request_provider_data to return None
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            assert adapter._should_use_sigv4() is True
+
+    def test_should_not_use_sigv4_when_bearer_token_in_config(self):
+        """Adapter should use bearer auth when token in config."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(
+            region_name="us-east-1",
+            api_key="my-bearer-token",  # Use api_key alias
+        )
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            assert adapter._should_use_sigv4() is False
+
+    def test_should_not_use_sigv4_when_bearer_token_in_provider_data(self):
+        """Adapter should use bearer auth when token in provider data."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import (
+            BedrockConfig,
+            BedrockProviderDataValidator,
+        )
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        provider_data = BedrockProviderDataValidator(aws_bearer_token_bedrock="per-request-token")
+        with patch.object(adapter, "get_request_provider_data", return_value=provider_data):
+            assert adapter._should_use_sigv4() is False
+
+    def test_get_extra_client_params_skips_sigv4_client_when_bearer_override(self):
+        """Per-request bearer token override must not be silently discarded by the SigV4 client.
+
+        When the server starts in SigV4 mode (_sigv4_http_client is not None) but a request
+        arrives with aws_bearer_token_bedrock in provider data, get_extra_client_params()
+        must return {} so the OpenAI SDK uses the bearer token instead of SigV4 auth.
+        """
+        from unittest.mock import MagicMock
+
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import (
+            BedrockConfig,
+            BedrockProviderDataValidator,
+        )
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        # Simulate that initialize() already built the SigV4 client
+        adapter._sigv4_http_client = MagicMock()
+
+        # Per-request bearer token override in provider data
+        provider_data = BedrockProviderDataValidator(aws_bearer_token_bedrock="per-request-token")
+        with patch.object(adapter, "get_request_provider_data", return_value=provider_data):
+            params = adapter.get_extra_client_params()
+            # Must return {} — the bearer token path must not receive the SigV4 http_client,
+            # which would strip and replace the Authorization header
+            assert params == {}
+
+    def test_get_extra_client_params_uses_sigv4_client_when_no_override(self):
+        """SigV4 client is returned when no per-request bearer token is present."""
+        from unittest.mock import MagicMock
+
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+        mock_client = MagicMock()
+        adapter._sigv4_http_client = mock_client
+
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            params = adapter.get_extra_client_params()
+            assert params == {"http_client": mock_client}
+
+    def test_should_use_sigv4_when_provider_data_token_is_whitespace(self):
+        """Adapter should use SigV4 when provider data token is whitespace-only."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import (
+            BedrockConfig,
+            BedrockProviderDataValidator,
+        )
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        # Whitespace-only token should be treated as no token (use SigV4)
+        provider_data = BedrockProviderDataValidator(aws_bearer_token_bedrock="   ")
+        with patch.object(adapter, "get_request_provider_data", return_value=provider_data):
+            assert adapter._should_use_sigv4() is True
+
+    def test_get_api_key_returns_placeholder_for_sigv4(self):
+        """When using SigV4, get_api_key should return placeholder to satisfy OpenAIMixin validation."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            api_key = adapter.get_api_key()
+            # Placeholder satisfies OpenAIMixin validation; SigV4 auth handler replaces
+            # the Bearer header with proper SigV4 signature (OCI pattern)
+            assert api_key == "<NOTUSED>"
+
+    @pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+    def test_client_uses_sigv4_auth_when_no_bearer_token(self):
+        """_build_sigv4_http_client should use correct service name and pass config fields."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-west-2")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth_cls:
+            mock_auth_cls.return_value = MagicMock()
+            adapter._build_sigv4_http_client()
+
+            # Verify auth was created with correct service name ("bedrock", not "bedrock-runtime")
+            call_kwargs = mock_auth_cls.call_args[1]
+            assert call_kwargs["region"] == "us-west-2"
+            assert call_kwargs["service"] == "bedrock"
+
+    @pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+    def test_sigv4_http_client_cached_after_initialize(self):
+        """_sigv4_http_client should be created once in initialize() and reused."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        config = BedrockConfig(region_name="us-east-1")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch.object(adapter, "_build_sigv4_http_client") as mock_build:
+            mock_build.return_value = MagicMock()
+
+            # Simulate initialize() — called once
+            adapter._sigv4_http_client = adapter._build_sigv4_http_client()
+            assert mock_build.call_count == 1
+
+            # get_extra_client_params reuses the cached client, does NOT rebuild
+            with patch.object(adapter, "get_request_provider_data", return_value=None):
+                adapter.get_extra_client_params()
+            assert mock_build.call_count == 1  # still 1, not 2
+
+
+class TestBedrockInferenceAdapterAuthErrors:
+    """Tests for user-facing auth error handling."""
+
+    def test_sigv4_auth_error_preserves_detail_in_internal_server_error(self):
+        """SigV4 auth failures should return a clear, generic 500 message."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+        from llama_stack_api.common.errors import InternalServerError
+
+        adapter = BedrockInferenceAdapter(config=BedrockConfig(region_name="us-east-1"))
+
+        with pytest.raises(InternalServerError) as exc_info:
+            adapter._handle_auth_error(
+                "request signed with invalid credentials",
+                RuntimeError("provider boom"),
+                use_sigv4=True,
+            )
+
+        message = str(exc_info.value)
+        assert (
+            message
+            == "Authentication failed because the configured cloud credentials could not authorize this request. "
+            "Please verify that the credentials available to the server are valid, unexpired, and allowed to access the requested model."
+        )
+        assert "AWS_ROLE_ARN" not in message
+        assert "Bedrock" not in message
+
+    def test_bearer_auth_error_preserves_detail_in_internal_server_error(self):
+        """Bearer auth failures should be actionable without exposing internal header/config details."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+        from llama_stack_api.common.errors import InternalServerError
+
+        adapter = BedrockInferenceAdapter(config=BedrockConfig(region_name="us-east-1"))
+
+        with pytest.raises(InternalServerError) as exc_info:
+            adapter._handle_auth_error(
+                "Error code: 401 - invalid api key format",
+                RuntimeError("provider boom"),
+                use_sigv4=False,
+            )
+
+        message = str(exc_info.value)
+        assert (
+            message == "Authentication failed because the provided request credential was rejected. "
+            "Please verify that the credential is valid, unexpired, and authorized for this request."
+        )
+        assert "x-llamastack-provider-data" not in message
+        assert "Bedrock" not in message
+
+    def test_expired_bearer_auth_error_preserves_sanitized_detail(self):
+        """Expired bearer auth failures should stay actionable without exposing config names."""
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+        from llama_stack_api.common.errors import InternalServerError
+
+        adapter = BedrockInferenceAdapter(config=BedrockConfig(region_name="us-east-1"))
+
+        with pytest.raises(InternalServerError) as exc_info:
+            adapter._handle_auth_error(
+                "Bearer Token has expired",
+                RuntimeError("provider boom"),
+                use_sigv4=False,
+            )
+
+        message = str(exc_info.value)
+        assert (
+            message == "Authentication failed because the provided request credential has expired. "
+            "Please refresh the credential and try again, or remove it so the server can use its configured cloud credentials."
+        )
+        assert "AWS_BEARER_TOKEN_BEDROCK" not in message
+        assert "Bedrock" not in message
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+class TestSigV4MockTransport:
+    """Integration-style tests using httpx.MockTransport to verify SigV4 signing."""
+
+    def test_sigv4_adds_aws4_signature_header(self):
+        """SigV4 auth should add AWS4-HMAC-SHA256 Authorization header."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        # Track the request that gets sent
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+        auth_header = captured_request.headers.get("authorization", "")
+
+        # Verify SigV4 signature format
+        assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}"
+        assert "Credential=" in auth_header
+        assert "SignedHeaders=" in auth_header
+        assert "Signature=" in auth_header
+
+        # Verify NO Bearer token is present
+        assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer token"
+
+    def test_sigv4_no_bearer_header_when_empty_api_key(self):
+        """When api_key is empty, no Bearer header should be added."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+        auth_header = captured_request.headers.get("authorization", "")
+
+        # Authorization header should be SigV4, not Bearer
+        assert "AWS4-HMAC-SHA256" in auth_header
+        assert "Bearer" not in auth_header
+
+    def test_sigv4_includes_security_token_for_sts(self):
+        """SigV4 auth should include x-amz-security-token for STS credentials."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = "FwoGZXIvYXdzEBYaDGTestSessionToken"
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-west-2", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+
+        # Verify security token header is present for STS credentials
+        assert "x-amz-security-token" in captured_request.headers
+        assert captured_request.headers["x-amz-security-token"] == mock_frozen_creds.token
+
+    def test_sigv4_replaces_existing_bearer_header(self):
+        """SigV4 auth should replace any existing Bearer Authorization header."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                # Simulate what OpenAI SDK does: add Bearer <NOTUSED> header
+                client.post(
+                    "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                    headers={"Authorization": "Bearer <NOTUSED>"},
+                )
+
+        assert captured_request is not None
+        auth_header = captured_request.headers.get("authorization", "")
+
+        # Verify SigV4 replaced the Bearer header (not appended)
+        assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}"
+        assert "Bearer" not in auth_header, "SigV4 auth should have replaced Bearer header"
+        assert "<NOTUSED>" not in auth_header, "Placeholder should be removed"
+
+    def test_sigv4_host_header_includes_port(self):
+        """Host header should include port for non-default ports."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            # Use non-default port
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://localhost:8443/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+
+        # Verify the Host header includes the port
+        host_header = captured_request.headers.get("host", "")
+        assert host_header == "localhost:8443", f"Expected host with port, got: {host_header}"
+
+        # The signed Authorization header should include host in SignedHeaders
+        auth_header = captured_request.headers.get("authorization", "")
+        assert "host" in auth_header.lower()
+
+        # Verify SigV4 signature format and no Bearer token
+        assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}"
+        assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer token"
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+class TestWebIdentityFederation:
+    """
+    Tests for Web Identity Federation (IRSA, GitHub Actions OIDC).
+
+    These tests verify that SigV4 auth works correctly with temporary credentials
+    obtained via AssumeRoleWithWebIdentity, as used in:
+    - Kubernetes/OpenShift with IRSA (IAM Roles for Service Accounts)
+    - GitHub Actions with OIDC (aws-actions/configure-aws-credentials)
+    """
+
+    def test_web_identity_credentials_include_session_token(self):
+        """
+        Web identity credentials should include x-amz-security-token header.
+
+        When using IRSA or GitHub Actions OIDC, boto3 calls AssumeRoleWithWebIdentity
+        which returns temporary credentials with a session token. This token must
+        be included in the x-amz-security-token header for the request to succeed.
+        """
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        # Simulate credentials from AssumeRoleWithWebIdentity
+        # Note: ASIA prefix indicates temporary credentials (vs AKIA for static)
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAQWERTYUIOPASDFGH"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYzxcvbnm123"
+        mock_frozen_creds.token = "IQoJb3JpZ2luX2VjEBYaCXVzLWVhc3QtMSJHMEUCIQDExample..."  # STS session token
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-2", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                client.post(
+                    "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions",
+                    json={
+                        "model": "us.meta.llama3-2-1b-instruct-v1:0",
+                        "messages": [{"role": "user", "content": "Hi"}],
+                    },
+                )
+
+        assert captured_request is not None
+
+        # Verify STS session token is included
+        assert "x-amz-security-token" in captured_request.headers
+        assert captured_request.headers["x-amz-security-token"] == mock_frozen_creds.token
+
+        # Verify SigV4 signature is present and valid format
+        auth_header = captured_request.headers.get("authorization", "")
+        assert auth_header.startswith("AWS4-HMAC-SHA256")
+        assert "Credential=ASIAQWERTYUIOPASDFGH" in auth_header
+        assert "bedrock/aws4_request" in auth_header
+
+        # Verify no Bearer token (would conflict with SigV4)
+        assert "Bearer" not in auth_header
+
+    def test_adapter_uses_sigv4_with_web_identity_env(self, monkeypatch):
+        """
+        BedrockInferenceAdapter should use SigV4 when web identity env vars are set.
+
+        This simulates the Kubernetes/GitHub Actions scenario where no bearer token
+        is configured but AWS credentials are available via web identity federation.
+        """
+        from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+        from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+
+        # Set web identity environment variables
+        monkeypatch.setenv("AWS_ROLE_ARN", "arn:aws:iam::123456789012:role/test-role")
+        monkeypatch.setenv("AWS_WEB_IDENTITY_TOKEN_FILE", "/var/run/secrets/token")
+        monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-2")
+
+        # Create adapter without bearer token (should trigger SigV4)
+        config = BedrockConfig(region_name="us-east-2")
+        adapter = BedrockInferenceAdapter(config=config)
+
+        with patch.object(adapter, "get_request_provider_data", return_value=None):
+            # Should use SigV4 since no bearer token is configured
+            assert adapter._should_use_sigv4() is True
+
+            # API key should be placeholder to satisfy OpenAIMixin validation (OCI pattern)
+            # SigV4 auth handler replaces Bearer header with proper SigV4 signature
+            assert adapter.get_api_key() == "<NOTUSED>"
+
+    def test_credential_refresh_returns_fresh_credentials(self):
+        """
+        SigV4 auth should get fresh credentials on each request.
+
+        Web identity credentials are temporary and expire. boto3's credential
+        chain handles refresh automatically, but we need to call get_frozen_credentials()
+        on each request to get the current valid credentials.
+        """
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        call_count = 0
+        captured_requests = []
+
+        def capture_request(request: httpx.Request) -> httpx.Response:
+            captured_requests.append(request)
+            return httpx.Response(200, json={"status": "ok"})
+
+        # Simulate credentials that change (as would happen after refresh)
+        initial_creds = MagicMock()
+        initial_creds.access_key = "ASIAFIRSTCREDENTIAL"
+        initial_creds.secret_key = "firstSecretKey123"
+        initial_creds.token = "firstSessionToken"
+
+        refreshed_creds = MagicMock()
+        refreshed_creds.access_key = "ASIASECONDCREDENTIAL"
+        refreshed_creds.secret_key = "secondSecretKey456"
+        refreshed_creds.token = "secondSessionToken"
+
+        def get_frozen_credentials():
+            nonlocal call_count
+            call_count += 1
+            # Return different credentials on second call (simulating refresh)
+            return initial_creds if call_count == 1 else refreshed_creds
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_creds = MagicMock()
+            mock_creds.get_frozen_credentials = get_frozen_credentials
+            mock_session.get_credentials.return_value = mock_creds
+
+            auth = BedrockSigV4Auth(region="us-east-2", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            with httpx.Client(auth=auth, transport=transport) as client:
+                # First request
+                client.post(
+                    "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+                # Second request (after simulated credential refresh)
+                client.post(
+                    "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert len(captured_requests) == 2
+
+        # First request should use initial credentials
+        first_auth = captured_requests[0].headers.get("authorization", "")
+        assert "ASIAFIRSTCREDENTIAL" in first_auth
+        assert captured_requests[0].headers.get("x-amz-security-token") == "firstSessionToken"
+
+        # Second request should use refreshed credentials
+        second_auth = captured_requests[1].headers.get("authorization", "")
+        assert "ASIASECONDCREDENTIAL" in second_auth
+        assert captured_requests[1].headers.get("x-amz-security-token") == "secondSessionToken"
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+class TestAsyncAuthFlow:
+    """Tests for async auth flow to verify non-blocking behavior."""
+
+    async def test_async_auth_flow_signs_request(self):
+        """Async auth flow should sign requests without blocking the event loop."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        async def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "AKIAIOSFODNN7EXAMPLE"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        mock_frozen_creds.token = None
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-1", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            async with httpx.AsyncClient(auth=auth, transport=transport) as client:
+                await client.post(
+                    "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+        auth_header = captured_request.headers.get("authorization", "")
+
+        # Verify SigV4 signature format
+        assert auth_header.startswith("AWS4-HMAC-SHA256"), f"Expected SigV4 header, got: {auth_header}"
+        assert "Credential=" in auth_header
+        assert "SignedHeaders=" in auth_header
+        assert "Signature=" in auth_header
+
+        # Verify NO Bearer token is present
+        assert "Bearer" not in auth_header, "SigV4 auth should not contain Bearer token"
+
+    async def test_async_auth_flow_includes_session_token(self):
+        """Async auth flow should include x-amz-security-token for STS credentials."""
+        from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+        captured_request = None
+
+        async def capture_request(request: httpx.Request) -> httpx.Response:
+            nonlocal captured_request
+            captured_request = request
+            return httpx.Response(200, json={"status": "ok"})
+
+        mock_frozen_creds = MagicMock()
+        mock_frozen_creds.access_key = "ASIAQWERTYUIOPASDFGH"
+        mock_frozen_creds.secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYzxcvbnm123"
+        mock_frozen_creds.token = "IQoJb3JpZ2luX2VjAsyncTest..."
+
+        with patch("boto3.Session") as mock_session_class:
+            mock_session = MagicMock()
+            mock_session_class.return_value = mock_session
+            mock_session.get_credentials.return_value.get_frozen_credentials.return_value = mock_frozen_creds
+
+            auth = BedrockSigV4Auth(region="us-east-2", service="bedrock")
+            transport = httpx.MockTransport(capture_request)
+
+            async with httpx.AsyncClient(auth=auth, transport=transport) as client:
+                await client.post(
+                    "https://bedrock-runtime.us-east-2.amazonaws.com/openai/v1/chat/completions",
+                    json={"model": "test"},
+                )
+
+        assert captured_request is not None
+
+        # Verify STS session token is included
+        assert "x-amz-security-token" in captured_request.headers
+        assert captured_request.headers["x-amz-security-token"] == mock_frozen_creds.token
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index ed3987e898..3ab886ed0b 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -13,7 +13,7 @@
 
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
-from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
+from llama_stack_api import InternalServerError, OpenAIChatCompletionRequestWithExtraBody
 
 
 def test_adapter_initialization():
@@ -28,7 +28,7 @@ def test_client_url_construction():
     config = BedrockConfig(api_key="test-key", region_name="us-west-2")
     adapter = BedrockInferenceAdapter(config=config)
 
-    assert adapter.get_base_url() == "https://bedrock-mantle.us-west-2.api.aws/v1"
+    assert adapter.get_base_url() == "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1"
 
 
 def test_api_key_from_config():
@@ -51,7 +51,7 @@ def test_api_key_from_header_overrides_config():
 
 
 async def test_authentication_error_handling():
-    """Test that AuthenticationError from OpenAI client is converted to ValueError with helpful message"""
+    """Authentication failures should surface as a sanitized InternalServerError."""
     config = BedrockConfig(api_key="invalid-key", region_name="us-east-1")
     adapter = BedrockInferenceAdapter(config=config)
 
@@ -68,14 +68,19 @@ async def test_authentication_error_handling():
     BedrockInferenceAdapter.__bases__[0].openai_chat_completion = mock_super
 
     try:
-        with pytest.raises(ValueError) as exc_info:
+        with pytest.raises(InternalServerError) as exc_info:
             params = OpenAIChatCompletionRequestWithExtraBody(
                 model="test-model", messages=[{"role": "user", "content": "test"}]
             )
             await adapter.openai_chat_completion(params=params)
 
-        assert "AWS Bedrock authentication failed" in str(exc_info.value)
-        assert "Please verify your API key" in str(exc_info.value)
+        message = str(exc_info.value)
+        assert (
+            message == "Authentication failed because the provided request credential was rejected. "
+            "Please verify that the credential is valid, unexpired, and authorized for this request."
+        )
+        assert "Bedrock" not in message
+        assert "x-llamastack-provider-data" not in message
     finally:
         # Restore original method
         BedrockInferenceAdapter.__bases__[0].openai_chat_completion = original_method
diff --git a/tests/unit/providers/inference/test_bedrock_config.py b/tests/unit/providers/inference/test_bedrock_config.py
index 6220804261..60e18a45d0 100644
--- a/tests/unit/providers/inference/test_bedrock_config.py
+++ b/tests/unit/providers/inference/test_bedrock_config.py
@@ -35,5 +35,17 @@ def test_bedrock_config_sample():
     sample = BedrockConfig.sample_run_config()
     assert "api_key" in sample
     assert "region_name" in sample
+    assert "aws_role_arn" in sample
+    assert "aws_web_identity_token_file" in sample
     assert sample["api_key"] == "${env.AWS_BEARER_TOKEN_BEDROCK:=}"
     assert sample["region_name"] == "${env.AWS_DEFAULT_REGION:=us-east-2}"
+    assert sample["aws_role_arn"] == "${env.AWS_ROLE_ARN:=}"
+    assert sample["aws_web_identity_token_file"] == "${env.AWS_WEB_IDENTITY_TOKEN_FILE:=}"
+
+
+def test_bedrock_config_sts_fields(monkeypatch):
+    monkeypatch.setenv("AWS_ROLE_ARN", "arn:aws:iam::123:role/test")
+    monkeypatch.setenv("AWS_WEB_IDENTITY_TOKEN_FILE", "/tmp/token")
+    config = BedrockConfig()
+    assert config.aws_role_arn == "arn:aws:iam::123:role/test"
+    assert config.aws_web_identity_token_file == "/tmp/token"
diff --git a/tests/unit/providers/inference/test_bedrock_sts.py b/tests/unit/providers/inference/test_bedrock_sts.py
new file mode 100644
index 0000000000..65354765e3
--- /dev/null
+++ b/tests/unit/providers/inference/test_bedrock_sts.py
@@ -0,0 +1,84 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import importlib.util
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
+from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+from llama_stack.providers.utils.bedrock.sigv4_auth import BedrockSigV4Auth
+
+HAS_BOTO3 = importlib.util.find_spec("boto3") is not None
+
+
+def test_sigv4_auth_initialization():
+    auth = BedrockSigV4Auth(
+        region="us-east-1",
+        aws_role_arn="arn:aws:iam::123:role/test",
+        aws_web_identity_token_file="/tmp/token",
+        aws_role_session_name="test-session",
+        session_ttl=1800,
+    )
+    assert auth._region == "us-east-1"
+    assert auth._aws_role_arn == "arn:aws:iam::123:role/test"
+    assert auth._aws_web_identity_token_file == "/tmp/token"
+    assert auth._aws_role_session_name == "test-session"
+    assert auth._session_ttl == 1800
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 not installed")
+@patch("llama_stack.providers.utils.bedrock.sigv4_auth.logger")
+def test_sigv4_auth_gets_refreshable_session(mock_logger):
+    with patch(
+        "llama_stack.providers.utils.bedrock.refreshable_boto_session.RefreshableBotoSession"
+    ) as mock_refreshable:
+        mock_session = MagicMock()
+        mock_refreshable.return_value.refreshable_session.return_value = mock_session
+
+        auth = BedrockSigV4Auth(
+            region="us-east-1",
+            aws_role_arn="arn:aws:iam::123:role/test",
+            aws_web_identity_token_file="/tmp/token",
+        )
+
+        auth._get_credentials()
+
+        mock_refreshable.assert_called_once_with(
+            region_name="us-east-1",
+            aws_access_key_id=None,
+            aws_secret_access_key=None,
+            aws_session_token=None,
+            profile_name=None,
+            sts_arn="arn:aws:iam::123:role/test",
+            web_identity_token_file="/tmp/token",
+            session_name=None,
+            session_ttl=3600,
+        )
+        assert auth._session == mock_session
+
+
+def test_adapter_passes_sts_config_to_auth():
+    config = BedrockConfig(
+        region_name="us-west-2",
+        aws_role_arn="arn:aws:iam::123:role/test",
+        aws_web_identity_token_file="/tmp/token",
+        session_ttl=1800,
+    )
+    adapter = BedrockInferenceAdapter(config=config)
+
+    with patch("llama_stack.providers.utils.bedrock.sigv4_auth.BedrockSigV4Auth") as mock_auth:
+        mock_auth.return_value = MagicMock()
+        adapter._build_sigv4_http_client()
+
+        mock_auth.assert_called_once_with(
+            region="us-west-2",
+            service="bedrock",
+            aws_role_arn="arn:aws:iam::123:role/test",
+            aws_web_identity_token_file="/tmp/token",
+            session_ttl=1800,
+        )
diff --git a/tests/unit/providers/safety/test_bedrock_safety_adapter.py b/tests/unit/providers/safety/test_bedrock_safety_adapter.py
new file mode 100644
index 0000000000..d761075fab
--- /dev/null
+++ b/tests/unit/providers/safety/test_bedrock_safety_adapter.py
@@ -0,0 +1,35 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import MagicMock, call, patch
+
+from llama_stack.providers.remote.safety.bedrock.bedrock import BedrockSafetyAdapter
+from llama_stack.providers.remote.safety.bedrock.config import BedrockSafetyConfig
+
+
+async def test_bedrock_safety_initialize_creates_clients():
+    config = BedrockSafetyConfig(
+        region_name="us-west-2",
+        aws_role_arn="arn:aws:iam::123:role/test",
+        aws_web_identity_token_file="/tmp/token",
+    )
+    adapter = BedrockSafetyAdapter(config=config)
+
+    runtime_client = MagicMock(name="bedrock-runtime-client")
+    bedrock_client = MagicMock(name="bedrock-client")
+    with patch("llama_stack.providers.remote.safety.bedrock.bedrock.create_bedrock_client") as mock_create:
+        mock_create.side_effect = [runtime_client, bedrock_client]
+
+        await adapter.initialize()
+
+        assert adapter.bedrock_runtime_client is runtime_client
+        assert adapter.bedrock_client is bedrock_client
+        mock_create.assert_has_calls(
+            [
+                call(config),
+                call(config, "bedrock"),
+            ]
+        )
diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py
deleted file mode 100644
index c6d6e54baa..0000000000
--- a/tests/unit/providers/test_bedrock.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from types import SimpleNamespace
-from unittest.mock import AsyncMock, PropertyMock, patch
-
-from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
-from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
-from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
-
-
-def test_can_create_adapter():
-    config = BedrockConfig(api_key="test-key", region_name="us-east-1")
-    adapter = BedrockInferenceAdapter(config=config)
-
-    assert adapter is not None
-    assert adapter.config.region_name == "us-east-1"
-    assert adapter.get_api_key() == "test-key"
-
-
-def test_different_aws_regions():
-    # just check a couple regions to verify URL construction works
-    config = BedrockConfig(api_key="key", region_name="us-east-1")
-    adapter = BedrockInferenceAdapter(config=config)
-    assert adapter.get_base_url() == "https://bedrock-mantle.us-east-1.api.aws/v1"
-
-    config = BedrockConfig(api_key="key", region_name="eu-west-1")
-    adapter = BedrockInferenceAdapter(config=config)
-    assert adapter.get_base_url() == "https://bedrock-mantle.eu-west-1.api.aws/v1"
-
-
-async def test_basic_chat_completion():
-    """Test basic chat completion works with OpenAIMixin"""
-    config = BedrockConfig(api_key="k", region_name="us-east-1")
-    adapter = BedrockInferenceAdapter(config=config)
-
-    class FakeModelStore:
-        async def has_model(self, model_id):
-            return True
-
-        async def get_model(self, model_id):
-            return SimpleNamespace(provider_resource_id="meta.llama3-1-8b-instruct-v1:0")
-
-    adapter.model_store = FakeModelStore()
-
-    fake_response = SimpleNamespace(
-        id="chatcmpl-123",
-        choices=[SimpleNamespace(message=SimpleNamespace(content="Hello!", role="assistant"), finish_reason="stop")],
-    )
-
-    mock_create = AsyncMock(return_value=fake_response)
-
-    class FakeClient:
-        def __init__(self):
-            self.chat = SimpleNamespace(completions=SimpleNamespace(create=mock_create))
-
-    with patch.object(type(adapter), "client", new_callable=PropertyMock, return_value=FakeClient()):
-        params = OpenAIChatCompletionRequestWithExtraBody(
-            model="llama3-1-8b",
-            messages=[{"role": "user", "content": "hello"}],
-            stream=False,
-        )
-        response = await adapter.openai_chat_completion(params=params)
-
-        assert response.id == "chatcmpl-123"
-        assert mock_create.await_count == 1
diff --git a/tests/unit/providers/utils/inference/test_network_config.py b/tests/unit/providers/utils/inference/test_network_config.py
index 40678668c7..ea00c8d7aa 100644
--- a/tests/unit/providers/utils/inference/test_network_config.py
+++ b/tests/unit/providers/utils/inference/test_network_config.py
@@ -12,11 +12,13 @@
 import pytest
 
 from llama_stack.providers.utils.inference.http_client import (
-    _build_network_client_kwargs,
     _build_proxy_mounts,
     _build_ssl_context,
     build_http_client,
 )
+from llama_stack.providers.utils.inference.http_client import (
+    build_network_client_kwargs as _build_network_client_kwargs,
+)
 from llama_stack.providers.utils.inference.model_registry import (
     NetworkConfig,
     ProxyConfig,

From f53d7b4c291356c0953939d4376fad4de3657f7e Mon Sep 17 00:00:00 2001
From: skamenan7 <skamenan@redhat.com>
Date: Wed, 1 Apr 2026 11:02:04 -0400
Subject: [PATCH 2/2] fix: accept generated provider_matrix.md from hook

the provider-compat-matrix pre-commit hook generates this file from
recording data; existing bedrock recordings used bedrock-mantle
(the old endpoint) so the hook output reflects that correctly

Signed-off-by: skamenan7 <skamenan@redhat.com>
---
 docs/docs/api-openai/provider_matrix.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docs/api-openai/provider_matrix.md b/docs/docs/api-openai/provider_matrix.md
index 8f42d3fee5..0e7d37cb00 100644
--- a/docs/docs/api-openai/provider_matrix.md
+++ b/docs/docs/api-openai/provider_matrix.md
@@ -33,7 +33,7 @@ Models, endpoints, and versions used during test recordings.
 | Provider | Model(s) | Endpoint | Version Info |
 |----------|----------|----------|--------------|
 | azure | gpt-4o | llama-stack-test.openai.azure.com, lls-test.openai.azure.com | openai sdk: 2.5.0 |
-| bedrock | openai.gpt-oss-20b | bedrock-runtime.us-east-2.amazonaws.com, bedrock-runtime.us-west-2.amazonaws.com | openai sdk: 2.5.0 |
+| bedrock | openai.gpt-oss-20b | bedrock-mantle.us-west-2.api.aws, bedrock-runtime.us-east-2.amazonaws.com | openai sdk: 2.5.0 |
 | ollama | gpt-oss:20b | — | openai sdk: 2.5.0, vllm server: 0.9.2rc2.dev136+g0b382b53a.d20250924 |
 | openai | gpt-4o, o4-mini, text-embedding-3-small | api.openai.com | openai sdk: 2.5.0 |
 | vllm | Qwen/Qwen3-0.6B | — | openai sdk: 2.5.0, vllm server: 0.18.1rc1.dev197+g0e9358c11 |