diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 06e361debc..d903796d75 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -4357,6 +4357,71 @@ paths:
           description: Authorization token
           title: Authorization
         description: Authorization token
+  /v1/messages:
+    post:
+      responses:
+        '200':
+          description: An AnthropicMessageResponse or a stream of Anthropic SSE events.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/AnthropicMessageResponse'
+            text/event-stream: {}
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Messages
+      summary: Create a message.
+      description: Create a message using the Anthropic Messages API format.
+      operationId: create_message_v1_messages_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/AnthropicCreateMessageRequest'
+        required: true
+  /v1/messages/count_tokens:
+    post:
+      responses:
+        '200':
+          description: Token count for the request.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/AnthropicCountTokensResponse'
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Messages
+      summary: Count tokens in a message.
+      description: Count the number of tokens in a message request.
+      operationId: count_message_tokens_v1_messages_count_tokens_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/AnthropicCountTokensRequest'
+        required: true
 components:
   schemas:
     Error:
@@ -11707,6 +11772,470 @@ components:
           - type: 'null'
       title: AllowedToolsFilter
       description: Filter configuration for restricting which MCP tools can be used.
+    AnthropicCountTokensRequest:
+      properties:
+        model:
+          type: string
+          title: Model
+          description: The model to use for token counting.
+        messages:
+          items:
+            $ref: '#/components/schemas/AnthropicMessage'
+          type: array
+          title: Messages
+          description: The messages to count tokens for.
+        system:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/AnthropicTextBlock'
+            type: array
+            title: list[AnthropicTextBlock]
+          - type: 'null'
+          title: string | list[AnthropicTextBlock]
+          description: System prompt.
+        tools:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/AnthropicToolDef'
+            type: array
+          - type: 'null'
+          description: Tools to include in token count.
+      required:
+      - model
+      - messages
+      title: AnthropicCountTokensRequest
+      description: Request body for POST /v1/messages/count_tokens.
+    AnthropicCountTokensResponse:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+      required:
+      - input_tokens
+      title: AnthropicCountTokensResponse
+      description: Response from POST /v1/messages/count_tokens.
+    AnthropicCreateMessageRequest:
+      properties:
+        model:
+          type: string
+          title: Model
+          description: The model to use for generation.
+        messages:
+          items:
+            $ref: '#/components/schemas/AnthropicMessage'
+          type: array
+          title: Messages
+          description: The messages in the conversation.
+        max_tokens:
+          type: integer
+          minimum: 1.0
+          title: Max Tokens
+          description: The maximum number of tokens to generate.
+        system:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/AnthropicTextBlock'
+            type: array
+            title: list[AnthropicTextBlock]
+          - type: 'null'
+          title: string | list[AnthropicTextBlock]
+          description: System prompt. A string or list of text blocks.
+        tools:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/AnthropicToolDef'
+            type: array
+          - type: 'null'
+          description: Tools available to the model.
+        tool_choice:
+          anyOf:
+          - {}
+          - type: 'null'
+          title: Tool Choice
+          description: "How the model should select tools. One of: 'auto', 'any', 'none', or {type: 'tool', name: '...'}."
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether to stream the response.
+          default: false
+        temperature:
+          anyOf:
+          - type: number
+            maximum: 1.0
+            minimum: 0.0
+          - type: 'null'
+          description: Sampling temperature.
+        top_p:
+          anyOf:
+          - type: number
+            maximum: 1.0
+            minimum: 0.0
+          - type: 'null'
+          description: Nucleus sampling parameter.
+        top_k:
+          anyOf:
+          - type: integer
+            minimum: 1.0
+          - type: 'null'
+          description: Top-k sampling parameter.
+        stop_sequences:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+          description: Custom stop sequences.
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+          description: Request metadata.
+        thinking:
+          anyOf:
+          - $ref: '#/components/schemas/AnthropicThinkingConfig'
+            title: AnthropicThinkingConfig
+          - type: 'null'
+          description: Extended thinking configuration.
+          title: AnthropicThinkingConfig
+        service_tier:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Service tier to use.
+      additionalProperties: true
+      required:
+      - model
+      - messages
+      - max_tokens
+      title: AnthropicCreateMessageRequest
+      description: Request body for POST /v1/messages.
+    AnthropicImageBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - image
+        source:
+          $ref: '#/components/schemas/AnthropicImageSource'
+      required:
+      - source
+      title: AnthropicImageBlock
+      description: An image content block.
+    AnthropicImageSource:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - base64
+        media_type:
+          type: string
+          title: Media Type
+          description: MIME type of the image (e.g. image/png).
+        data:
+          type: string
+          title: Data
+          description: Base64-encoded image data.
+      required:
+      - media_type
+      - data
+      title: AnthropicImageSource
+      description: Source for an image content block.
+    AnthropicMessage:
+      properties:
+        role:
+          type: string
+          enum:
+          - user
+          - assistant
+          title: Role
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              - $ref: '#/components/schemas/AnthropicToolUseBlock'
+                title: AnthropicToolUseBlock
+              - $ref: '#/components/schemas/AnthropicToolResultBlock-Input'
+                title: AnthropicToolResultBlock-Input
+              - $ref: '#/components/schemas/AnthropicThinkingBlock'
+                title: AnthropicThinkingBlock
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/AnthropicImageBlock'
+                  text: '#/components/schemas/AnthropicTextBlock'
+                  thinking: '#/components/schemas/AnthropicThinkingBlock'
+                  tool_result: '#/components/schemas/AnthropicToolResultBlock-Input'
+                  tool_use: '#/components/schemas/AnthropicToolUseBlock'
+              title: AnthropicTextBlock | ... (5 variants)
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock | ...]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock | ...]
+          description: 'Message content: a string for simple text, or a list of content blocks.'
+      required:
+      - role
+      - content
+      title: AnthropicMessage
+      description: A message in the conversation.
+    AnthropicMessageResponse:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: Unique message ID (msg_ prefix).
+        type:
+          type: string
+          title: Type
+          enum:
+          - message
+        role:
+          type: string
+          title: Role
+          enum:
+          - assistant
+        content:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/AnthropicTextBlock'
+              title: AnthropicTextBlock
+            - $ref: '#/components/schemas/AnthropicImageBlock'
+              title: AnthropicImageBlock
+            - $ref: '#/components/schemas/AnthropicToolUseBlock'
+              title: AnthropicToolUseBlock
+            - $ref: '#/components/schemas/AnthropicToolResultBlock-Output'
+              title: AnthropicToolResultBlock-Output
+            - $ref: '#/components/schemas/AnthropicThinkingBlock'
+              title: AnthropicThinkingBlock
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/AnthropicImageBlock'
+                text: '#/components/schemas/AnthropicTextBlock'
+                thinking: '#/components/schemas/AnthropicThinkingBlock'
+                tool_result: '#/components/schemas/AnthropicToolResultBlock-Output'
+                tool_use: '#/components/schemas/AnthropicToolUseBlock'
+            title: AnthropicTextBlock | ... (5 variants)
+          type: array
+          title: Content
+          description: Response content blocks.
+        model:
+          type: string
+          title: Model
+        stop_reason:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: 'Why the model stopped: end_turn, stop_sequence, tool_use, or max_tokens.'
+        stop_sequence:
+          anyOf:
+          - type: string
+          - type: 'null'
+        usage:
+          $ref: '#/components/schemas/AnthropicUsage'
+      required:
+      - id
+      - content
+      - model
+      title: AnthropicMessageResponse
+      description: Response from POST /v1/messages (non-streaming).
+    AnthropicTextBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - text
+        text:
+          type: string
+          title: Text
+      required:
+      - text
+      title: AnthropicTextBlock
+      description: A text content block.
+    AnthropicThinkingBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - thinking
+        thinking:
+          type: string
+          title: Thinking
+          description: The model's thinking text.
+        signature:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Signature for the thinking block.
+      required:
+      - thinking
+      title: AnthropicThinkingBlock
+      description: A thinking content block (extended thinking).
+    AnthropicThinkingConfig:
+      properties:
+        type:
+          type: string
+          enum:
+          - enabled
+          - disabled
+          - adaptive
+          title: Type
+          default: enabled
+        budget_tokens:
+          anyOf:
+          - type: integer
+            minimum: 1.0
+          - type: 'null'
+          description: Maximum tokens for thinking.
+      title: AnthropicThinkingConfig
+      description: Configuration for extended thinking.
+    AnthropicToolDef:
+      properties:
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        input_schema:
+          additionalProperties: true
+          type: object
+          title: Input Schema
+          description: JSON Schema for the tool's input.
+      required:
+      - name
+      - input_schema
+      title: AnthropicToolDef
+      description: Definition of a tool available to the model.
+    AnthropicToolResultBlock-Input:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_result
+        tool_use_id:
+          type: string
+          title: Tool Use Id
+          description: The ID of the tool_use block this result corresponds to.
+        content:
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              title: AnthropicTextBlock | AnthropicImageBlock
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock]
+          description: The result content.
+          default: ''
+        is_error:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether the tool call resulted in an error.
+      required:
+      - tool_use_id
+      title: AnthropicToolResultBlock
+      description: A tool result content block in a user message.
+    AnthropicToolResultBlock-Output:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_result
+        tool_use_id:
+          type: string
+          title: Tool Use Id
+          description: The ID of the tool_use block this result corresponds to.
+        content:
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              title: AnthropicTextBlock | AnthropicImageBlock
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock]
+          description: The result content.
+          default: ''
+        is_error:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether the tool call resulted in an error.
+      required:
+      - tool_use_id
+      title: AnthropicToolResultBlock
+      description: A tool result content block in a user message.
+    AnthropicToolUseBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_use
+        id:
+          type: string
+          title: Id
+          description: Unique ID for this tool invocation.
+        name:
+          type: string
+          title: Name
+          description: Name of the tool being called.
+        input:
+          additionalProperties: true
+          type: object
+          title: Input
+          description: Tool input arguments.
+      required:
+      - id
+      - name
+      - input
+      title: AnthropicToolUseBlock
+      description: A tool use content block in an assistant message.
+    AnthropicUsage:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+          default: 0
+        output_tokens:
+          type: integer
+          title: Output Tokens
+          default: 0
+        cache_creation_input_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        cache_read_input_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      title: AnthropicUsage
+      description: Token usage statistics.
     ApprovalFilter:
       properties:
         always:
@@ -13986,6 +14515,7 @@ components:
       - prompts
       - conversations
       - connectors
+      - messages
       - inspect
       - admin
       title: Api
diff --git a/docs/docs/providers/messages/index.mdx b/docs/docs/providers/messages/index.mdx
new file mode 100644
index 0000000000..7df084bc60
--- /dev/null
+++ b/docs/docs/providers/messages/index.mdx
@@ -0,0 +1,13 @@
+---
+description: "Protocol for the Anthropic Messages API."
+sidebar_label: Messages
+title: Messages
+---
+
+# Messages
+
+## Overview
+
+Protocol for the Anthropic Messages API.
+
+This section contains documentation for all available providers for the **messages** API.
diff --git a/docs/docs/providers/messages/inline_builtin.mdx b/docs/docs/providers/messages/inline_builtin.mdx
new file mode 100644
index 0000000000..9ed60b766b
--- /dev/null
+++ b/docs/docs/providers/messages/inline_builtin.mdx
@@ -0,0 +1,17 @@
+---
+description: "Anthropic Messages API adapter that translates to the inference API."
+sidebar_label: Builtin
+title: inline::builtin
+---
+
+# inline::builtin
+
+## Description
+
+Anthropic Messages API adapter that translates to the inference API.
+
+## Sample Configuration
+
+```yaml
+{}
+```
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index 12914642cd..3e1d774f82 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -7618,6 +7618,470 @@ components:
           - type: 'null'
       title: AllowedToolsFilter
       description: Filter configuration for restricting which MCP tools can be used.
+    AnthropicCountTokensRequest:
+      properties:
+        model:
+          type: string
+          title: Model
+          description: The model to use for token counting.
+        messages:
+          items:
+            $ref: '#/components/schemas/AnthropicMessage'
+          type: array
+          title: Messages
+          description: The messages to count tokens for.
+        system:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/AnthropicTextBlock'
+            type: array
+            title: list[AnthropicTextBlock]
+          - type: 'null'
+          title: string | list[AnthropicTextBlock]
+          description: System prompt.
+        tools:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/AnthropicToolDef'
+            type: array
+          - type: 'null'
+          description: Tools to include in token count.
+      required:
+      - model
+      - messages
+      title: AnthropicCountTokensRequest
+      description: Request body for POST /v1/messages/count_tokens.
+    AnthropicCountTokensResponse:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+      required:
+      - input_tokens
+      title: AnthropicCountTokensResponse
+      description: Response from POST /v1/messages/count_tokens.
+    AnthropicCreateMessageRequest:
+      properties:
+        model:
+          type: string
+          title: Model
+          description: The model to use for generation.
+        messages:
+          items:
+            $ref: '#/components/schemas/AnthropicMessage'
+          type: array
+          title: Messages
+          description: The messages in the conversation.
+        max_tokens:
+          type: integer
+          minimum: 1.0
+          title: Max Tokens
+          description: The maximum number of tokens to generate.
+        system:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/AnthropicTextBlock'
+            type: array
+            title: list[AnthropicTextBlock]
+          - type: 'null'
+          title: string | list[AnthropicTextBlock]
+          description: System prompt. A string or list of text blocks.
+        tools:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/AnthropicToolDef'
+            type: array
+          - type: 'null'
+          description: Tools available to the model.
+        tool_choice:
+          anyOf:
+          - {}
+          - type: 'null'
+          title: Tool Choice
+          description: "How the model should select tools. One of: 'auto', 'any', 'none', or {type: 'tool', name: '...'}."
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether to stream the response.
+          default: false
+        temperature:
+          anyOf:
+          - type: number
+            maximum: 1.0
+            minimum: 0.0
+          - type: 'null'
+          description: Sampling temperature.
+        top_p:
+          anyOf:
+          - type: number
+            maximum: 1.0
+            minimum: 0.0
+          - type: 'null'
+          description: Nucleus sampling parameter.
+        top_k:
+          anyOf:
+          - type: integer
+            minimum: 1.0
+          - type: 'null'
+          description: Top-k sampling parameter.
+        stop_sequences:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+          description: Custom stop sequences.
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+          description: Request metadata.
+        thinking:
+          anyOf:
+          - $ref: '#/components/schemas/AnthropicThinkingConfig'
+            title: AnthropicThinkingConfig
+          - type: 'null'
+          description: Extended thinking configuration.
+          title: AnthropicThinkingConfig
+        service_tier:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Service tier to use.
+      additionalProperties: true
+      required:
+      - model
+      - messages
+      - max_tokens
+      title: AnthropicCreateMessageRequest
+      description: Request body for POST /v1/messages.
+    AnthropicImageBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - image
+        source:
+          $ref: '#/components/schemas/AnthropicImageSource'
+      required:
+      - source
+      title: AnthropicImageBlock
+      description: An image content block.
+    AnthropicImageSource:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - base64
+        media_type:
+          type: string
+          title: Media Type
+          description: MIME type of the image (e.g. image/png).
+        data:
+          type: string
+          title: Data
+          description: Base64-encoded image data.
+      required:
+      - media_type
+      - data
+      title: AnthropicImageSource
+      description: Source for an image content block.
+    AnthropicMessage:
+      properties:
+        role:
+          type: string
+          enum:
+          - user
+          - assistant
+          title: Role
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              - $ref: '#/components/schemas/AnthropicToolUseBlock'
+                title: AnthropicToolUseBlock
+              - $ref: '#/components/schemas/AnthropicToolResultBlock-Input'
+                title: AnthropicToolResultBlock-Input
+              - $ref: '#/components/schemas/AnthropicThinkingBlock'
+                title: AnthropicThinkingBlock
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/AnthropicImageBlock'
+                  text: '#/components/schemas/AnthropicTextBlock'
+                  thinking: '#/components/schemas/AnthropicThinkingBlock'
+                  tool_result: '#/components/schemas/AnthropicToolResultBlock-Input'
+                  tool_use: '#/components/schemas/AnthropicToolUseBlock'
+              title: AnthropicTextBlock | ... (5 variants)
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock | ...]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock | ...]
+          description: 'Message content: a string for simple text, or a list of content blocks.'
+      required:
+      - role
+      - content
+      title: AnthropicMessage
+      description: A message in the conversation.
+    AnthropicMessageResponse:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: Unique message ID (msg_ prefix).
+        type:
+          type: string
+          title: Type
+          enum:
+          - message
+        role:
+          type: string
+          title: Role
+          enum:
+          - assistant
+        content:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/AnthropicTextBlock'
+              title: AnthropicTextBlock
+            - $ref: '#/components/schemas/AnthropicImageBlock'
+              title: AnthropicImageBlock
+            - $ref: '#/components/schemas/AnthropicToolUseBlock'
+              title: AnthropicToolUseBlock
+            - $ref: '#/components/schemas/AnthropicToolResultBlock-Output'
+              title: AnthropicToolResultBlock-Output
+            - $ref: '#/components/schemas/AnthropicThinkingBlock'
+              title: AnthropicThinkingBlock
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/AnthropicImageBlock'
+                text: '#/components/schemas/AnthropicTextBlock'
+                thinking: '#/components/schemas/AnthropicThinkingBlock'
+                tool_result: '#/components/schemas/AnthropicToolResultBlock-Output'
+                tool_use: '#/components/schemas/AnthropicToolUseBlock'
+            title: AnthropicTextBlock | ... (5 variants)
+          type: array
+          title: Content
+          description: Response content blocks.
+        model:
+          type: string
+          title: Model
+        stop_reason:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: 'Why the model stopped: end_turn, stop_sequence, tool_use, or max_tokens.'
+        stop_sequence:
+          anyOf:
+          - type: string
+          - type: 'null'
+        usage:
+          $ref: '#/components/schemas/AnthropicUsage'
+      required:
+      - id
+      - content
+      - model
+      title: AnthropicMessageResponse
+      description: Response from POST /v1/messages (non-streaming).
+    AnthropicTextBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - text
+        text:
+          type: string
+          title: Text
+      required:
+      - text
+      title: AnthropicTextBlock
+      description: A text content block.
+    AnthropicThinkingBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - thinking
+        thinking:
+          type: string
+          title: Thinking
+          description: The model's thinking text.
+        signature:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Signature for the thinking block.
+      required:
+      - thinking
+      title: AnthropicThinkingBlock
+      description: A thinking content block (extended thinking).
+    AnthropicThinkingConfig:
+      properties:
+        type:
+          type: string
+          enum:
+          - enabled
+          - disabled
+          - adaptive
+          title: Type
+          default: enabled
+        budget_tokens:
+          anyOf:
+          - type: integer
+            minimum: 1.0
+          - type: 'null'
+          description: Maximum tokens for thinking.
+      title: AnthropicThinkingConfig
+      description: Configuration for extended thinking.
+    AnthropicToolDef:
+      properties:
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        input_schema:
+          additionalProperties: true
+          type: object
+          title: Input Schema
+          description: JSON Schema for the tool's input.
+      required:
+      - name
+      - input_schema
+      title: AnthropicToolDef
+      description: Definition of a tool available to the model.
+    AnthropicToolResultBlock-Input:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_result
+        tool_use_id:
+          type: string
+          title: Tool Use Id
+          description: The ID of the tool_use block this result corresponds to.
+        content:
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              title: AnthropicTextBlock | AnthropicImageBlock
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock]
+          description: The result content.
+          default: ''
+        is_error:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether the tool call resulted in an error.
+      required:
+      - tool_use_id
+      title: AnthropicToolResultBlock
+      description: A tool result content block in a user message.
+    AnthropicToolResultBlock-Output:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_result
+        tool_use_id:
+          type: string
+          title: Tool Use Id
+          description: The ID of the tool_use block this result corresponds to.
+        content:
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              title: AnthropicTextBlock | AnthropicImageBlock
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock]
+          description: The result content.
+          default: ''
+        is_error:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether the tool call resulted in an error.
+      required:
+      - tool_use_id
+      title: AnthropicToolResultBlock
+      description: A tool result content block in a user message.
+    AnthropicToolUseBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_use
+        id:
+          type: string
+          title: Id
+          description: Unique ID for this tool invocation.
+        name:
+          type: string
+          title: Name
+          description: Name of the tool being called.
+        input:
+          additionalProperties: true
+          type: object
+          title: Input
+          description: Tool input arguments.
+      required:
+      - id
+      - name
+      - input
+      title: AnthropicToolUseBlock
+      description: A tool use content block in an assistant message.
+    AnthropicUsage:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+          default: 0
+        output_tokens:
+          type: integer
+          title: Output Tokens
+          default: 0
+        cache_creation_input_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        cache_read_input_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      title: AnthropicUsage
+      description: Token usage statistics.
     ApprovalFilter:
       properties:
         always:
@@ -9899,6 +10363,7 @@ components:
       - prompts
       - conversations
       - connectors
+      - messages
       - inspect
       - admin
       title: Api
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index 5e834f4308..0d3b517e83 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -10104,6 +10104,7 @@ components:
       - prompts
       - conversations
       - connectors
+      - messages
       - inspect
       - admin
       title: Api
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 033219262a..249488661f 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -3305,6 +3305,71 @@ paths:
 
           response = client.responses.cancel("resp_abc123")
           print(response)
+  /v1/messages:
+    post:
+      responses:
+        '200':
+          description: An AnthropicMessageResponse or a stream of Anthropic SSE events.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/AnthropicMessageResponse'
+            text/event-stream: {}
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Messages
+      summary: Create a message.
+      description: Create a message using the Anthropic Messages API format.
+      operationId: create_message_v1_messages_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/AnthropicCreateMessageRequest'
+        required: true
+  /v1/messages/count_tokens:
+    post:
+      responses:
+        '200':
+          description: Token count for the request.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/AnthropicCountTokensResponse'
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Messages
+      summary: Count tokens in a message.
+      description: Count the number of tokens in a message request.
+      operationId: count_message_tokens_v1_messages_count_tokens_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/AnthropicCountTokensRequest'
+        required: true
 components:
   schemas:
     Error:
@@ -10655,6 +10720,470 @@ components:
           - type: 'null'
       title: AllowedToolsFilter
       description: Filter configuration for restricting which MCP tools can be used.
+    AnthropicCountTokensRequest:
+      properties:
+        model:
+          type: string
+          title: Model
+          description: The model to use for token counting.
+        messages:
+          items:
+            $ref: '#/components/schemas/AnthropicMessage'
+          type: array
+          title: Messages
+          description: The messages to count tokens for.
+        system:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/AnthropicTextBlock'
+            type: array
+            title: list[AnthropicTextBlock]
+          - type: 'null'
+          title: string | list[AnthropicTextBlock]
+          description: System prompt.
+        tools:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/AnthropicToolDef'
+            type: array
+          - type: 'null'
+          description: Tools to include in token count.
+      required:
+      - model
+      - messages
+      title: AnthropicCountTokensRequest
+      description: Request body for POST /v1/messages/count_tokens.
+    AnthropicCountTokensResponse:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+      required:
+      - input_tokens
+      title: AnthropicCountTokensResponse
+      description: Response from POST /v1/messages/count_tokens.
+    AnthropicCreateMessageRequest:
+      properties:
+        model:
+          type: string
+          title: Model
+          description: The model to use for generation.
+        messages:
+          items:
+            $ref: '#/components/schemas/AnthropicMessage'
+          type: array
+          title: Messages
+          description: The messages in the conversation.
+        max_tokens:
+          type: integer
+          minimum: 1.0
+          title: Max Tokens
+          description: The maximum number of tokens to generate.
+        system:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/AnthropicTextBlock'
+            type: array
+            title: list[AnthropicTextBlock]
+          - type: 'null'
+          title: string | list[AnthropicTextBlock]
+          description: System prompt. A string or list of text blocks.
+        tools:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/AnthropicToolDef'
+            type: array
+          - type: 'null'
+          description: Tools available to the model.
+        tool_choice:
+          anyOf:
+          - {}
+          - type: 'null'
+          title: Tool Choice
+          description: "How the model should select tools. One of: 'auto', 'any', 'none', or {type: 'tool', name: '...'}."
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether to stream the response.
+          default: false
+        temperature:
+          anyOf:
+          - type: number
+            maximum: 1.0
+            minimum: 0.0
+          - type: 'null'
+          description: Sampling temperature.
+        top_p:
+          anyOf:
+          - type: number
+            maximum: 1.0
+            minimum: 0.0
+          - type: 'null'
+          description: Nucleus sampling parameter.
+        top_k:
+          anyOf:
+          - type: integer
+            minimum: 1.0
+          - type: 'null'
+          description: Top-k sampling parameter.
+        stop_sequences:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+          description: Custom stop sequences.
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+          description: Request metadata.
+        thinking:
+          anyOf:
+          - $ref: '#/components/schemas/AnthropicThinkingConfig'
+            title: AnthropicThinkingConfig
+          - type: 'null'
+          description: Extended thinking configuration.
+          title: AnthropicThinkingConfig
+        service_tier:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Service tier to use.
+      additionalProperties: true
+      required:
+      - model
+      - messages
+      - max_tokens
+      title: AnthropicCreateMessageRequest
+      description: Request body for POST /v1/messages.
+    AnthropicImageBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - image
+        source:
+          $ref: '#/components/schemas/AnthropicImageSource'
+      required:
+      - source
+      title: AnthropicImageBlock
+      description: An image content block.
+    AnthropicImageSource:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - base64
+        media_type:
+          type: string
+          title: Media Type
+          description: MIME type of the image (e.g. image/png).
+        data:
+          type: string
+          title: Data
+          description: Base64-encoded image data.
+      required:
+      - media_type
+      - data
+      title: AnthropicImageSource
+      description: Source for an image content block.
+    AnthropicMessage:
+      properties:
+        role:
+          type: string
+          enum:
+          - user
+          - assistant
+          title: Role
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              - $ref: '#/components/schemas/AnthropicToolUseBlock'
+                title: AnthropicToolUseBlock
+              - $ref: '#/components/schemas/AnthropicToolResultBlock-Input'
+                title: AnthropicToolResultBlock-Input
+              - $ref: '#/components/schemas/AnthropicThinkingBlock'
+                title: AnthropicThinkingBlock
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/AnthropicImageBlock'
+                  text: '#/components/schemas/AnthropicTextBlock'
+                  thinking: '#/components/schemas/AnthropicThinkingBlock'
+                  tool_result: '#/components/schemas/AnthropicToolResultBlock-Input'
+                  tool_use: '#/components/schemas/AnthropicToolUseBlock'
+              title: AnthropicTextBlock | ... (5 variants)
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock | ...]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock | ...]
+          description: 'Message content: a string for simple text, or a list of content blocks.'
+      required:
+      - role
+      - content
+      title: AnthropicMessage
+      description: A message in the conversation.
+    AnthropicMessageResponse:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: Unique message ID (msg_ prefix).
+        type:
+          type: string
+          title: Type
+          enum:
+          - message
+        role:
+          type: string
+          title: Role
+          enum:
+          - assistant
+        content:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/AnthropicTextBlock'
+              title: AnthropicTextBlock
+            - $ref: '#/components/schemas/AnthropicImageBlock'
+              title: AnthropicImageBlock
+            - $ref: '#/components/schemas/AnthropicToolUseBlock'
+              title: AnthropicToolUseBlock
+            - $ref: '#/components/schemas/AnthropicToolResultBlock-Output'
+              title: AnthropicToolResultBlock-Output
+            - $ref: '#/components/schemas/AnthropicThinkingBlock'
+              title: AnthropicThinkingBlock
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/AnthropicImageBlock'
+                text: '#/components/schemas/AnthropicTextBlock'
+                thinking: '#/components/schemas/AnthropicThinkingBlock'
+                tool_result: '#/components/schemas/AnthropicToolResultBlock-Output'
+                tool_use: '#/components/schemas/AnthropicToolUseBlock'
+            title: AnthropicTextBlock | ... (5 variants)
+          type: array
+          title: Content
+          description: Response content blocks.
+        model:
+          type: string
+          title: Model
+        stop_reason:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: 'Why the model stopped: end_turn, stop_sequence, tool_use, or max_tokens.'
+        stop_sequence:
+          anyOf:
+          - type: string
+          - type: 'null'
+        usage:
+          $ref: '#/components/schemas/AnthropicUsage'
+      required:
+      - id
+      - content
+      - model
+      title: AnthropicMessageResponse
+      description: Response from POST /v1/messages (non-streaming).
+    AnthropicTextBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - text
+        text:
+          type: string
+          title: Text
+      required:
+      - text
+      title: AnthropicTextBlock
+      description: A text content block.
+    AnthropicThinkingBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - thinking
+        thinking:
+          type: string
+          title: Thinking
+          description: The model's thinking text.
+        signature:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Signature for the thinking block.
+      required:
+      - thinking
+      title: AnthropicThinkingBlock
+      description: A thinking content block (extended thinking).
+    AnthropicThinkingConfig:
+      properties:
+        type:
+          type: string
+          enum:
+          - enabled
+          - disabled
+          - adaptive
+          title: Type
+          default: enabled
+        budget_tokens:
+          anyOf:
+          - type: integer
+            minimum: 1.0
+          - type: 'null'
+          description: Maximum tokens for thinking.
+      title: AnthropicThinkingConfig
+      description: Configuration for extended thinking.
+    AnthropicToolDef:
+      properties:
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        input_schema:
+          additionalProperties: true
+          type: object
+          title: Input Schema
+          description: JSON Schema for the tool's input.
+      required:
+      - name
+      - input_schema
+      title: AnthropicToolDef
+      description: Definition of a tool available to the model.
+    AnthropicToolResultBlock-Input:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_result
+        tool_use_id:
+          type: string
+          title: Tool Use Id
+          description: The ID of the tool_use block this result corresponds to.
+        content:
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              title: AnthropicTextBlock | AnthropicImageBlock
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock]
+          description: The result content.
+          default: ''
+        is_error:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether the tool call resulted in an error.
+      required:
+      - tool_use_id
+      title: AnthropicToolResultBlock
+      description: A tool result content block in a user message.
+    AnthropicToolResultBlock-Output:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_result
+        tool_use_id:
+          type: string
+          title: Tool Use Id
+          description: The ID of the tool_use block this result corresponds to.
+        content:
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              title: AnthropicTextBlock | AnthropicImageBlock
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock]
+          description: The result content.
+          default: ''
+        is_error:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether the tool call resulted in an error.
+      required:
+      - tool_use_id
+      title: AnthropicToolResultBlock
+      description: A tool result content block in a user message.
+    AnthropicToolUseBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_use
+        id:
+          type: string
+          title: Id
+          description: Unique ID for this tool invocation.
+        name:
+          type: string
+          title: Name
+          description: Name of the tool being called.
+        input:
+          additionalProperties: true
+          type: object
+          title: Input
+          description: Tool input arguments.
+      required:
+      - id
+      - name
+      - input
+      title: AnthropicToolUseBlock
+      description: A tool use content block in an assistant message.
+    AnthropicUsage:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+          default: 0
+        output_tokens:
+          type: integer
+          title: Output Tokens
+          default: 0
+        cache_creation_input_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        cache_read_input_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      title: AnthropicUsage
+      description: Token usage statistics.
     ApprovalFilter:
       properties:
         always:
@@ -12911,6 +13440,7 @@ components:
       - prompts
       - conversations
       - connectors
+      - messages
       - inspect
       - admin
       title: Api
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 06e361debc..d903796d75 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -4357,6 +4357,71 @@ paths:
           description: Authorization token
           title: Authorization
         description: Authorization token
+  /v1/messages:
+    post:
+      responses:
+        '200':
+          description: An AnthropicMessageResponse or a stream of Anthropic SSE events.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/AnthropicMessageResponse'
+            text/event-stream: {}
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Messages
+      summary: Create a message.
+      description: Create a message using the Anthropic Messages API format.
+      operationId: create_message_v1_messages_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/AnthropicCreateMessageRequest'
+        required: true
+  /v1/messages/count_tokens:
+    post:
+      responses:
+        '200':
+          description: Token count for the request.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/AnthropicCountTokensResponse'
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Messages
+      summary: Count tokens in a message.
+      description: Count the number of tokens in a message request.
+      operationId: count_message_tokens_v1_messages_count_tokens_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/AnthropicCountTokensRequest'
+        required: true
 components:
   schemas:
     Error:
@@ -11707,6 +11772,470 @@ components:
           - type: 'null'
       title: AllowedToolsFilter
       description: Filter configuration for restricting which MCP tools can be used.
+    AnthropicCountTokensRequest:
+      properties:
+        model:
+          type: string
+          title: Model
+          description: The model to use for token counting.
+        messages:
+          items:
+            $ref: '#/components/schemas/AnthropicMessage'
+          type: array
+          title: Messages
+          description: The messages to count tokens for.
+        system:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/AnthropicTextBlock'
+            type: array
+            title: list[AnthropicTextBlock]
+          - type: 'null'
+          title: string | list[AnthropicTextBlock]
+          description: System prompt.
+        tools:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/AnthropicToolDef'
+            type: array
+          - type: 'null'
+          description: Tools to include in token count.
+      required:
+      - model
+      - messages
+      title: AnthropicCountTokensRequest
+      description: Request body for POST /v1/messages/count_tokens.
+    AnthropicCountTokensResponse:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+      required:
+      - input_tokens
+      title: AnthropicCountTokensResponse
+      description: Response from POST /v1/messages/count_tokens.
+    AnthropicCreateMessageRequest:
+      properties:
+        model:
+          type: string
+          title: Model
+          description: The model to use for generation.
+        messages:
+          items:
+            $ref: '#/components/schemas/AnthropicMessage'
+          type: array
+          title: Messages
+          description: The messages in the conversation.
+        max_tokens:
+          type: integer
+          minimum: 1.0
+          title: Max Tokens
+          description: The maximum number of tokens to generate.
+        system:
+          anyOf:
+          - type: string
+          - items:
+              $ref: '#/components/schemas/AnthropicTextBlock'
+            type: array
+            title: list[AnthropicTextBlock]
+          - type: 'null'
+          title: string | list[AnthropicTextBlock]
+          description: System prompt. A string or list of text blocks.
+        tools:
+          anyOf:
+          - items:
+              $ref: '#/components/schemas/AnthropicToolDef'
+            type: array
+          - type: 'null'
+          description: Tools available to the model.
+        tool_choice:
+          anyOf:
+          - {}
+          - type: 'null'
+          title: Tool Choice
+          description: "How the model should select tools. One of: 'auto', 'any', 'none', or {type: 'tool', name: '...'}."
+        stream:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether to stream the response.
+          default: false
+        temperature:
+          anyOf:
+          - type: number
+            maximum: 1.0
+            minimum: 0.0
+          - type: 'null'
+          description: Sampling temperature.
+        top_p:
+          anyOf:
+          - type: number
+            maximum: 1.0
+            minimum: 0.0
+          - type: 'null'
+          description: Nucleus sampling parameter.
+        top_k:
+          anyOf:
+          - type: integer
+            minimum: 1.0
+          - type: 'null'
+          description: Top-k sampling parameter.
+        stop_sequences:
+          anyOf:
+          - items:
+              type: string
+            type: array
+          - type: 'null'
+          description: Custom stop sequences.
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
+          description: Request metadata.
+        thinking:
+          anyOf:
+          - $ref: '#/components/schemas/AnthropicThinkingConfig'
+            title: AnthropicThinkingConfig
+          - type: 'null'
+          description: Extended thinking configuration.
+          title: AnthropicThinkingConfig
+        service_tier:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Service tier to use.
+      additionalProperties: true
+      required:
+      - model
+      - messages
+      - max_tokens
+      title: AnthropicCreateMessageRequest
+      description: Request body for POST /v1/messages.
+    AnthropicImageBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - image
+        source:
+          $ref: '#/components/schemas/AnthropicImageSource'
+      required:
+      - source
+      title: AnthropicImageBlock
+      description: An image content block.
+    AnthropicImageSource:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - base64
+        media_type:
+          type: string
+          title: Media Type
+          description: MIME type of the image (e.g. image/png).
+        data:
+          type: string
+          title: Data
+          description: Base64-encoded image data.
+      required:
+      - media_type
+      - data
+      title: AnthropicImageSource
+      description: Source for an image content block.
+    AnthropicMessage:
+      properties:
+        role:
+          type: string
+          enum:
+          - user
+          - assistant
+          title: Role
+        content:
+          anyOf:
+          - type: string
+          - items:
+              oneOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              - $ref: '#/components/schemas/AnthropicToolUseBlock'
+                title: AnthropicToolUseBlock
+              - $ref: '#/components/schemas/AnthropicToolResultBlock-Input'
+                title: AnthropicToolResultBlock-Input
+              - $ref: '#/components/schemas/AnthropicThinkingBlock'
+                title: AnthropicThinkingBlock
+              discriminator:
+                propertyName: type
+                mapping:
+                  image: '#/components/schemas/AnthropicImageBlock'
+                  text: '#/components/schemas/AnthropicTextBlock'
+                  thinking: '#/components/schemas/AnthropicThinkingBlock'
+                  tool_result: '#/components/schemas/AnthropicToolResultBlock-Input'
+                  tool_use: '#/components/schemas/AnthropicToolUseBlock'
+              title: AnthropicTextBlock | ... (5 variants)
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock | ...]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock | ...]
+          description: 'Message content: a string for simple text, or a list of content blocks.'
+      required:
+      - role
+      - content
+      title: AnthropicMessage
+      description: A message in the conversation.
+    AnthropicMessageResponse:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: Unique message ID (msg_ prefix).
+        type:
+          type: string
+          title: Type
+          enum:
+          - message
+        role:
+          type: string
+          title: Role
+          enum:
+          - assistant
+        content:
+          items:
+            oneOf:
+            - $ref: '#/components/schemas/AnthropicTextBlock'
+              title: AnthropicTextBlock
+            - $ref: '#/components/schemas/AnthropicImageBlock'
+              title: AnthropicImageBlock
+            - $ref: '#/components/schemas/AnthropicToolUseBlock'
+              title: AnthropicToolUseBlock
+            - $ref: '#/components/schemas/AnthropicToolResultBlock-Output'
+              title: AnthropicToolResultBlock-Output
+            - $ref: '#/components/schemas/AnthropicThinkingBlock'
+              title: AnthropicThinkingBlock
+            discriminator:
+              propertyName: type
+              mapping:
+                image: '#/components/schemas/AnthropicImageBlock'
+                text: '#/components/schemas/AnthropicTextBlock'
+                thinking: '#/components/schemas/AnthropicThinkingBlock'
+                tool_result: '#/components/schemas/AnthropicToolResultBlock-Output'
+                tool_use: '#/components/schemas/AnthropicToolUseBlock'
+            title: AnthropicTextBlock | ... (5 variants)
+          type: array
+          title: Content
+          description: Response content blocks.
+        model:
+          type: string
+          title: Model
+        stop_reason:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: 'Why the model stopped: end_turn, stop_sequence, tool_use, or max_tokens.'
+        stop_sequence:
+          anyOf:
+          - type: string
+          - type: 'null'
+        usage:
+          $ref: '#/components/schemas/AnthropicUsage'
+      required:
+      - id
+      - content
+      - model
+      title: AnthropicMessageResponse
+      description: Response from POST /v1/messages (non-streaming).
+    AnthropicTextBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - text
+        text:
+          type: string
+          title: Text
+      required:
+      - text
+      title: AnthropicTextBlock
+      description: A text content block.
+    AnthropicThinkingBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - thinking
+        thinking:
+          type: string
+          title: Thinking
+          description: The model's thinking text.
+        signature:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: Signature for the thinking block.
+      required:
+      - thinking
+      title: AnthropicThinkingBlock
+      description: A thinking content block (extended thinking).
+    AnthropicThinkingConfig:
+      properties:
+        type:
+          type: string
+          enum:
+          - enabled
+          - disabled
+          - adaptive
+          title: Type
+          default: enabled
+        budget_tokens:
+          anyOf:
+          - type: integer
+            minimum: 1.0
+          - type: 'null'
+          description: Maximum tokens for thinking.
+      title: AnthropicThinkingConfig
+      description: Configuration for extended thinking.
+    AnthropicToolDef:
+      properties:
+        name:
+          type: string
+          title: Name
+        description:
+          anyOf:
+          - type: string
+          - type: 'null'
+        input_schema:
+          additionalProperties: true
+          type: object
+          title: Input Schema
+          description: JSON Schema for the tool's input.
+      required:
+      - name
+      - input_schema
+      title: AnthropicToolDef
+      description: Definition of a tool available to the model.
+    AnthropicToolResultBlock-Input:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_result
+        tool_use_id:
+          type: string
+          title: Tool Use Id
+          description: The ID of the tool_use block this result corresponds to.
+        content:
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              title: AnthropicTextBlock | AnthropicImageBlock
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock]
+          description: The result content.
+          default: ''
+        is_error:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether the tool call resulted in an error.
+      required:
+      - tool_use_id
+      title: AnthropicToolResultBlock
+      description: A tool result content block in a user message.
+    AnthropicToolResultBlock-Output:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_result
+        tool_use_id:
+          type: string
+          title: Tool Use Id
+          description: The ID of the tool_use block this result corresponds to.
+        content:
+          anyOf:
+          - type: string
+          - items:
+              anyOf:
+              - $ref: '#/components/schemas/AnthropicTextBlock'
+                title: AnthropicTextBlock
+              - $ref: '#/components/schemas/AnthropicImageBlock'
+                title: AnthropicImageBlock
+              title: AnthropicTextBlock | AnthropicImageBlock
+            type: array
+            title: list[AnthropicTextBlock | AnthropicImageBlock]
+          title: string | list[AnthropicTextBlock | AnthropicImageBlock]
+          description: The result content.
+          default: ''
+        is_error:
+          anyOf:
+          - type: boolean
+          - type: 'null'
+          description: Whether the tool call resulted in an error.
+      required:
+      - tool_use_id
+      title: AnthropicToolResultBlock
+      description: A tool result content block in a user message.
+    AnthropicToolUseBlock:
+      properties:
+        type:
+          type: string
+          title: Type
+          enum:
+          - tool_use
+        id:
+          type: string
+          title: Id
+          description: Unique ID for this tool invocation.
+        name:
+          type: string
+          title: Name
+          description: Name of the tool being called.
+        input:
+          additionalProperties: true
+          type: object
+          title: Input
+          description: Tool input arguments.
+      required:
+      - id
+      - name
+      - input
+      title: AnthropicToolUseBlock
+      description: A tool use content block in an assistant message.
+    AnthropicUsage:
+      properties:
+        input_tokens:
+          type: integer
+          title: Input Tokens
+          default: 0
+        output_tokens:
+          type: integer
+          title: Output Tokens
+          default: 0
+        cache_creation_input_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+        cache_read_input_tokens:
+          anyOf:
+          - type: integer
+          - type: 'null'
+      title: AnthropicUsage
+      description: Token usage statistics.
     ApprovalFilter:
       properties:
         always:
@@ -13986,6 +14515,7 @@ components:
       - prompts
       - conversations
       - connectors
+      - messages
       - inspect
       - admin
       title: Api
diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index 69ef47942e..5b96b57730 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -41,6 +41,7 @@
     Inference,
     InferenceProvider,
     Inspect,
+    Messages,
     Models,
     ModelsProtocolPrivate,
     Prompts,
@@ -107,6 +108,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
         Api.conversations: Conversations,
         Api.file_processors: FileProcessors,
         Api.connectors: Connectors,
+        Api.messages: Messages,
     }
 
     if external_apis:
diff --git a/src/llama_stack/distributions/ci-tests/config.yaml b/src/llama_stack/distributions/ci-tests/config.yaml
index 7bcbb6eee9..b0b87923f3 100644
--- a/src/llama_stack/distributions/ci-tests/config.yaml
+++ b/src/llama_stack/distributions/ci-tests/config.yaml
@@ -7,6 +7,7 @@ apis:
 - file_processors
 - files
 - inference
+- messages
 - responses
 - safety
 - scoring
@@ -197,6 +198,9 @@ providers:
       excluded_categories: []
   - provider_id: code-scanner
     provider_type: inline::code-scanner
+  messages:
+  - provider_id: builtin
+    provider_type: inline::builtin
   responses:
   - provider_id: builtin
     provider_type: inline::builtin
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index 553ed41118..a9a1e6e2cc 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -7,6 +7,7 @@ apis:
 - file_processors
 - files
 - inference
+- messages
 - responses
 - safety
 - scoring
@@ -197,6 +198,9 @@ providers:
       excluded_categories: []
   - provider_id: code-scanner
     provider_type: inline::code-scanner
+  messages:
+  - provider_id: builtin
+    provider_type: inline::builtin
   responses:
   - provider_id: builtin
     provider_type: inline::builtin
diff --git a/src/llama_stack/distributions/starter/config.yaml b/src/llama_stack/distributions/starter/config.yaml
index fb9cf9ae84..ed5b862130 100644
--- a/src/llama_stack/distributions/starter/config.yaml
+++ b/src/llama_stack/distributions/starter/config.yaml
@@ -7,6 +7,7 @@ apis:
 - file_processors
 - files
 - inference
+- messages
 - responses
 - safety
 - scoring
@@ -191,6 +192,9 @@ providers:
       excluded_categories: []
   - provider_id: code-scanner
     provider_type: inline::code-scanner
+  messages:
+  - provider_id: builtin
+    provider_type: inline::builtin
   responses:
   - provider_id: builtin
     provider_type: inline::builtin
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index 52225576f5..c2a814bc62 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -7,6 +7,7 @@ apis:
 - file_processors
 - files
 - inference
+- messages
 - responses
 - safety
 - scoring
@@ -191,6 +192,9 @@ providers:
       excluded_categories: []
   - provider_id: code-scanner
     provider_type: inline::code-scanner
+  messages:
+  - provider_id: builtin
+    provider_type: inline::builtin
   responses:
   - provider_id: builtin
     provider_type: inline::builtin
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index c99ce08e49..61e969c01b 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -152,6 +152,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
             BuildProvider(provider_type="inline::llama-guard"),
             BuildProvider(provider_type="inline::code-scanner"),
         ],
+        "messages": [BuildProvider(provider_type="inline::builtin")],
         "responses": [BuildProvider(provider_type="inline::builtin")],
         "eval": [BuildProvider(provider_type="inline::builtin")],
         "datasetio": [
diff --git a/src/llama_stack/log.py b/src/llama_stack/log.py
index 54e2afe348..7c37bd1b82 100644
--- a/src/llama_stack/log.py
+++ b/src/llama_stack/log.py
@@ -56,6 +56,7 @@ class LoggingConfig(BaseModel):
     "tests",
     "telemetry",
     "connectors",
+    "messages",
 ]
 UNCATEGORIZED = "uncategorized"
 
diff --git a/src/llama_stack/providers/inline/messages/__init__.py b/src/llama_stack/providers/inline/messages/__init__.py
new file mode 100644
index 0000000000..b292976c87
--- /dev/null
+++ b/src/llama_stack/providers/inline/messages/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.core.datatypes import Api
+
+from .config import MessagesConfig
+
+
+async def get_provider_impl(
+    config: MessagesConfig,
+    deps: dict[Api, Any],
+):
+    from .impl import BuiltinMessagesImpl
+
+    impl = BuiltinMessagesImpl(config, deps[Api.inference])
+    await impl.initialize()
+    return impl
diff --git a/src/llama_stack/providers/inline/messages/config.py b/src/llama_stack/providers/inline/messages/config.py
new file mode 100644
index 0000000000..c17a040607
--- /dev/null
+++ b/src/llama_stack/providers/inline/messages/config.py
@@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from pydantic import BaseModel
+
+
+class MessagesConfig(BaseModel):
+    """Configuration for the built-in Anthropic Messages API adapter."""
+
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str = "") -> dict[str, Any]:
+        return {}
diff --git a/src/llama_stack/providers/inline/messages/impl.py b/src/llama_stack/providers/inline/messages/impl.py
new file mode 100644
index 0000000000..990517502b
--- /dev/null
+++ b/src/llama_stack/providers/inline/messages/impl.py
@@ -0,0 +1,566 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Built-in Anthropic Messages API implementation.
+
+Translates Anthropic Messages format to/from OpenAI Chat Completions format,
+delegating to the inference API for actual model calls. When the underlying
+inference provider natively supports the Anthropic Messages API (e.g. Ollama),
+requests are forwarded directly without translation.
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+from collections.abc import AsyncIterator
+from typing import Any
+
+import httpx
+
+from llama_stack.log import get_logger
+from llama_stack_api import (
+    Inference,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
+)
+from llama_stack_api.messages import (
+    Messages,
+)
+from llama_stack_api.messages.models import (
+    AnthropicContentBlock,
+    AnthropicCountTokensRequest,
+    AnthropicCountTokensResponse,
+    AnthropicCreateMessageRequest,
+    AnthropicImageBlock,
+    AnthropicMessage,
+    AnthropicMessageResponse,
+    AnthropicStreamEvent,
+    AnthropicTextBlock,
+    AnthropicThinkingBlock,
+    AnthropicToolDef,
+    AnthropicToolResultBlock,
+    AnthropicToolUseBlock,
+    AnthropicUsage,
+    ContentBlockDeltaEvent,
+    ContentBlockStartEvent,
+    ContentBlockStopEvent,
+    MessageDeltaEvent,
+    MessageStartEvent,
+    MessageStopEvent,
+    _InputJsonDelta,
+    _MessageDelta,
+    _TextDelta,
+    _ThinkingDelta,
+)
+
+from .config import MessagesConfig
+
+logger = get_logger(name=__name__, category="messages")
+
+# Maps Anthropic stop_reason -> OpenAI finish_reason
+_STOP_REASON_TO_FINISH = {
+    "end_turn": "stop",
+    "stop_sequence": "stop",
+    "tool_use": "tool_calls",
+    "max_tokens": "length",
+}
+
+# Maps OpenAI finish_reason -> Anthropic stop_reason
+_FINISH_TO_STOP_REASON = {
+    "stop": "end_turn",
+    "tool_calls": "tool_use",
+    "length": "max_tokens",
+    "content_filter": "end_turn",
+}
+
+
+class BuiltinMessagesImpl(Messages):
+    """Anthropic Messages API adapter that translates to the inference API."""
+
+    def __init__(self, config: MessagesConfig, inference_api: Inference):
+        self.config = config
+        self.inference_api = inference_api
+
+    async def initialize(self) -> None:
+        self._client = httpx.AsyncClient()
+
+    async def shutdown(self) -> None:
+        await self._client.aclose()
+
+    async def create_message(
+        self,
+        request: AnthropicCreateMessageRequest,
+    ) -> AnthropicMessageResponse | AsyncIterator[AnthropicStreamEvent]:
+        # Try native passthrough for providers that support /v1/messages directly
+        passthrough_url = await self._get_passthrough_url(request.model)
+        if passthrough_url:
+            return await self._passthrough_request(passthrough_url, request)
+
+        openai_params = self._anthropic_to_openai(request)
+
+        result = await self.inference_api.openai_chat_completion(openai_params)
+
+        if isinstance(result, AsyncIterator):
+            return self._stream_openai_to_anthropic(result, request.model)
+
+        return self._openai_to_anthropic(result, request.model)
+
+    async def count_message_tokens(
+        self,
+        request: AnthropicCountTokensRequest,
+    ) -> AnthropicCountTokensResponse:
+        raise NotImplementedError("Token counting is not yet implemented")
+
+    # -- Native passthrough for providers with /v1/messages support --
+
+    # Module paths of provider impls known to support /v1/messages natively
+    _NATIVE_MESSAGES_MODULES = {"llama_stack.providers.remote.inference.ollama"}
+
+    async def _get_passthrough_url(self, model: str) -> str | None:
+        """Check if the model's provider supports /v1/messages natively.
+
+        Returns the base URL for passthrough, or None to use translation.
+        """
+        router = self.inference_api
+        if not hasattr(router, "routing_table"):
+            return None
+
+        try:
+            obj = await router.routing_table.get_object_by_identifier("model", model)
+            if not obj:
+                return None
+
+            provider_impl = await router.routing_table.get_provider_impl(obj.identifier)
+            provider_module = type(provider_impl).__module__
+            is_native = any(provider_module.startswith(m) for m in self._NATIVE_MESSAGES_MODULES)
+
+            if is_native and hasattr(provider_impl, "get_base_url"):
+                base_url = str(provider_impl.get_base_url()).rstrip("/")
+                # Ollama's /v1/messages sits at the root, not under /v1
+                if base_url.endswith("/v1"):
+                    base_url = base_url[:-3]
+                logger.info("Using native /v1/messages passthrough", model=model, base_url=base_url)
+                return base_url
+        except Exception:
+            logger.debug("Failed to resolve passthrough, falling back to translation", model=model)
+
+        return None
+
+    async def _passthrough_request(
+        self,
+        base_url: str,
+        request: AnthropicCreateMessageRequest,
+    ) -> AnthropicMessageResponse | AsyncIterator[AnthropicStreamEvent]:
+        """Forward the request directly to the provider's /v1/messages endpoint."""
+        url = f"{base_url}/v1/messages"
+        # Use the provider_resource_id (model name without provider prefix)
+        provider_model = request.model
+        router = self.inference_api
+        if hasattr(router, "routing_table"):
+            try:
+                obj = await router.routing_table.get_object_by_identifier("model", request.model)
+                if obj:
+                    provider_model = obj.provider_resource_id
+            except Exception:
+                pass
+
+        body = request.model_dump(exclude_none=True)
+        body["model"] = provider_model
+        headers = {
+            "content-type": "application/json",
+            "anthropic-version": "2023-06-01",
+            "x-api-key": "no-key-required",
+        }
+
+        if request.stream:
+            return self._passthrough_stream(url, headers, body)
+
+        resp = await self._client.post(url, json=body, headers=headers, timeout=300)
+        resp.raise_for_status()
+        return AnthropicMessageResponse(**resp.json())
+
+    async def _passthrough_stream(
+        self,
+        url: str,
+        headers: dict[str, str],
+        body: dict[str, Any],
+    ) -> AsyncIterator[AnthropicStreamEvent]:
+        """Stream SSE events directly from the provider."""
+        async with self._client.stream("POST", url, json=body, headers=headers, timeout=300) as resp:
+            resp.raise_for_status()
+            event_type = None
+            async for line in resp.aiter_lines():
+                line = line.strip()
+                if line.startswith("event: "):
+                    event_type = line[7:]
+                elif line.startswith("data: ") and event_type:
+                    data = json.loads(line[6:])
+                    event = self._parse_sse_event(event_type, data)
+                    if event:
+                        yield event
+                    event_type = None
+
+    def _parse_sse_event(self, event_type: str, data: dict[str, Any]) -> AnthropicStreamEvent | None:
+        """Parse an Anthropic SSE event from its type and data."""
+        if event_type == "message_start":
+            return MessageStartEvent(message=AnthropicMessageResponse(**data["message"]))
+        if event_type == "content_block_start":
+            block_data = data["content_block"]
+            content_block: AnthropicTextBlock | AnthropicToolUseBlock | AnthropicThinkingBlock
+            block_type = block_data.get("type")
+            if block_type == "tool_use":
+                content_block = AnthropicToolUseBlock(**block_data)
+            elif block_type == "thinking":
+                content_block = AnthropicThinkingBlock(**block_data)
+            else:
+                content_block = AnthropicTextBlock(**block_data)
+            return ContentBlockStartEvent(index=data["index"], content_block=content_block)
+        if event_type == "content_block_delta":
+            delta_data = data["delta"]
+            delta_type = delta_data.get("type")
+            delta: _TextDelta | _InputJsonDelta | _ThinkingDelta
+            if delta_type == "text_delta":
+                delta = _TextDelta(text=delta_data["text"])
+            elif delta_type == "input_json_delta":
+                delta = _InputJsonDelta(partial_json=delta_data["partial_json"])
+            elif delta_type == "thinking_delta":
+                delta = _ThinkingDelta(thinking=delta_data["thinking"])
+            else:
+                return None
+            return ContentBlockDeltaEvent(index=data["index"], delta=delta)
+        if event_type == "content_block_stop":
+            return ContentBlockStopEvent(index=data["index"])
+        if event_type == "message_delta":
+            return MessageDeltaEvent(
+                delta=_MessageDelta(stop_reason=data["delta"].get("stop_reason")),
+                usage=AnthropicUsage(**data.get("usage", {})),
+            )
+        if event_type == "message_stop":
+            return MessageStopEvent()
+        return None
+
+    # -- Request translation --
+
+    def _anthropic_to_openai(self, request: AnthropicCreateMessageRequest) -> OpenAIChatCompletionRequestWithExtraBody:
+        messages = self._convert_messages_to_openai(request.system, request.messages)
+        tools = self._convert_tools_to_openai(request.tools) if request.tools else None
+        tool_choice = self._convert_tool_choice_to_openai(request.tool_choice) if request.tool_choice else None
+
+        extra_body: dict[str, Any] = {}
+        if request.top_k is not None:
+            extra_body["top_k"] = request.top_k
+        # Note: Anthropic's "thinking" parameter has no equivalent in the OpenAI
+        # chat completions API and is intentionally not forwarded.
+
+        params = OpenAIChatCompletionRequestWithExtraBody(
+            model=request.model,
+            messages=messages,  # type: ignore[arg-type]
+            max_tokens=request.max_tokens,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            stop=request.stop_sequences,
+            tools=tools,
+            tool_choice=tool_choice,
+            stream=request.stream or False,
+            service_tier=request.service_tier,  # type: ignore[arg-type]
+            **(extra_body or {}),
+        )
+        return params
+
+    def _convert_messages_to_openai(
+        self,
+        system: str | list[AnthropicTextBlock] | None,
+        messages: list[AnthropicMessage],
+    ) -> list[dict[str, Any]]:
+        openai_messages: list[dict[str, Any]] = []
+
+        if system is not None:
+            if isinstance(system, str):
+                system_text = system
+            else:
+                system_text = "\n".join(block.text for block in system)
+            openai_messages.append({"role": "system", "content": system_text})
+
+        for msg in messages:
+            openai_messages.extend(self._convert_single_message(msg))
+
+        return openai_messages
+
+    def _convert_single_message(self, msg: AnthropicMessage) -> list[dict[str, Any]]:
+        """Convert a single Anthropic message to one or more OpenAI messages.
+
+        A single Anthropic user message with tool_result blocks may need to be
+        split into multiple OpenAI messages (tool messages).
+        """
+        if isinstance(msg.content, str):
+            return [{"role": msg.role, "content": msg.content}]
+
+        if msg.role == "assistant":
+            return [self._convert_assistant_message(msg.content)]
+
+        # User message: may contain text and/or tool_result blocks
+        result: list[dict[str, Any]] = []
+        text_parts: list[dict[str, Any]] = []
+
+        for block in msg.content:
+            if isinstance(block, AnthropicToolResultBlock):
+                # Flush accumulated text first
+                if text_parts:
+                    if len(text_parts) == 1 and text_parts[0].get("type") == "text":
+                        flush_content: str | list[dict[str, Any]] = text_parts[0]["text"]
+                    else:
+                        flush_content = text_parts
+                    result.append({"role": "user", "content": flush_content})
+                    text_parts = []
+                # Tool results become separate tool messages
+                tool_content = block.content
+                if isinstance(tool_content, list):
+                    tool_content = "\n".join(b.text for b in tool_content if isinstance(b, AnthropicTextBlock))
+                result.append(
+                    {
+                        "role": "tool",
+                        "tool_call_id": block.tool_use_id,
+                        "content": tool_content,
+                    }
+                )
+            elif isinstance(block, AnthropicTextBlock):
+                text_parts.append({"type": "text", "text": block.text})
+            elif isinstance(block, AnthropicImageBlock):
+                text_parts.append(
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:{block.source.media_type};base64,{block.source.data}",
+                        },
+                    }
+                )
+
+        if text_parts:
+            # OpenAI content must be a string or a list, never a single dict
+            if len(text_parts) == 1 and text_parts[0].get("type") == "text":
+                user_content: str | list[dict[str, Any]] = text_parts[0]["text"]
+            else:
+                user_content = text_parts
+            result.append({"role": "user", "content": user_content})
+
+        return result if result else [{"role": "user", "content": ""}]
+
+    def _convert_assistant_message(self, content: list[AnthropicContentBlock]) -> dict[str, Any]:
+        """Convert an assistant message with content blocks to OpenAI format."""
+        text_parts: list[str] = []
+        tool_calls: list[dict[str, Any]] = []
+
+        for block in content:
+            if isinstance(block, AnthropicTextBlock):
+                text_parts.append(block.text)
+            elif isinstance(block, AnthropicToolUseBlock):
+                tool_calls.append(
+                    {
+                        "id": block.id,
+                        "type": "function",
+                        "function": {
+                            "name": block.name,
+                            "arguments": json.dumps(block.input),
+                        },
+                    }
+                )
+
+        msg: dict[str, Any] = {"role": "assistant"}
+        if text_parts:
+            msg["content"] = "\n".join(text_parts)
+        if tool_calls:
+            msg["tool_calls"] = tool_calls
+
+        return msg
+
+    def _convert_tools_to_openai(self, tools: list[AnthropicToolDef]) -> list[dict[str, Any]]:
+        return [
+            {
+                "type": "function",
+                "function": {
+                    "name": tool.name,
+                    "description": tool.description or "",
+                    "parameters": tool.input_schema,
+                },
+            }
+            for tool in tools
+        ]
+
+    def _convert_tool_choice_to_openai(self, tool_choice: Any) -> Any:
+        if isinstance(tool_choice, str):
+            if tool_choice == "any":
+                return "required"
+            if tool_choice == "none":
+                return "none"
+            return "auto"
+
+        if isinstance(tool_choice, dict):
+            tc_type = tool_choice.get("type")
+            if tc_type == "tool":
+                return {"type": "function", "function": {"name": tool_choice["name"]}}
+            if tc_type == "any":
+                return "required"
+            if tc_type == "none":
+                return "none"
+            return "auto"
+
+        return "auto"
+
+    # -- Response translation --
+
+    def _openai_to_anthropic(self, response: OpenAIChatCompletion, request_model: str) -> AnthropicMessageResponse:
+        content: list[AnthropicContentBlock] = []
+
+        if response.choices:
+            choice = response.choices[0]
+            message = choice.message
+
+            if message and message.content:
+                content.append(AnthropicTextBlock(text=message.content))
+
+            if message and message.tool_calls:
+                for tc in message.tool_calls:
+                    if not hasattr(tc, "function") or tc.function is None:
+                        continue
+                    try:
+                        tool_input = json.loads(tc.function.arguments) if tc.function.arguments else {}
+                    except json.JSONDecodeError:
+                        tool_input = {}
+
+                    content.append(
+                        AnthropicToolUseBlock(
+                            id=tc.id or f"toolu_{uuid.uuid4().hex[:24]}",
+                            name=tc.function.name or "",
+                            input=tool_input,
+                        )
+                    )
+
+            finish_reason = choice.finish_reason or "stop"
+            stop_reason = _FINISH_TO_STOP_REASON.get(finish_reason, "end_turn")
+        else:
+            stop_reason = "end_turn"
+
+        usage = AnthropicUsage()
+        if response.usage:
+            usage = AnthropicUsage(
+                input_tokens=response.usage.prompt_tokens or 0,
+                output_tokens=response.usage.completion_tokens or 0,
+            )
+
+        return AnthropicMessageResponse(
+            id=f"msg_{uuid.uuid4().hex[:24]}",
+            content=content,
+            model=request_model,
+            stop_reason=stop_reason,
+            usage=usage,
+        )
+
+    # -- Streaming translation --
+
+    async def _stream_openai_to_anthropic(
+        self,
+        openai_stream: AsyncIterator[OpenAIChatCompletionChunk],
+        request_model: str,
+    ) -> AsyncIterator[AnthropicStreamEvent]:
+        """Translate OpenAI streaming chunks to Anthropic streaming events."""
+
+        # Emit message_start
+        yield MessageStartEvent(
+            message=AnthropicMessageResponse(
+                id=f"msg_{uuid.uuid4().hex[:24]}",
+                content=[],
+                model=request_model,
+                stop_reason=None,
+                usage=AnthropicUsage(input_tokens=0, output_tokens=0),
+            ),
+        )
+
+        content_block_index = 0
+        in_text_block = False
+        in_tool_blocks: dict[int, bool] = {}  # tool_call_index -> started
+        tool_call_index_to_block_index: dict[int, int] = {}
+        output_tokens = 0
+        input_tokens = 0
+        stop_reason = "end_turn"
+
+        async for chunk in openai_stream:
+            if not chunk.choices:
+                # Usage-only chunk
+                if chunk.usage:
+                    input_tokens = chunk.usage.prompt_tokens or 0
+                    output_tokens = chunk.usage.completion_tokens or 0
+                continue
+
+            choice = chunk.choices[0]
+            delta = choice.delta
+
+            if delta and delta.content:
+                if not in_text_block:
+                    yield ContentBlockStartEvent(
+                        index=content_block_index,
+                        content_block=AnthropicTextBlock(text=""),
+                    )
+                    in_text_block = True
+
+                yield ContentBlockDeltaEvent(
+                    index=content_block_index,
+                    delta=_TextDelta(text=delta.content),
+                )
+
+            if delta and delta.tool_calls:
+                for tc_delta in delta.tool_calls:
+                    tc_idx = tc_delta.index if tc_delta.index is not None else 0
+
+                    if tc_idx not in in_tool_blocks:
+                        # Close text block if open
+                        if in_text_block:
+                            yield ContentBlockStopEvent(index=content_block_index)
+                            content_block_index += 1
+                            in_text_block = False
+
+                        # Start new tool_use block
+                        in_tool_blocks[tc_idx] = True
+                        tool_call_index_to_block_index[tc_idx] = content_block_index
+
+                        yield ContentBlockStartEvent(
+                            index=content_block_index,
+                            content_block=AnthropicToolUseBlock(
+                                id=tc_delta.id or f"toolu_{uuid.uuid4().hex[:24]}",
+                                name=tc_delta.function.name if tc_delta.function and tc_delta.function.name else "",
+                                input={},
+                            ),
+                        )
+                        content_block_index += 1
+
+                    if tc_delta.function and tc_delta.function.arguments:
+                        block_idx = tool_call_index_to_block_index[tc_idx]
+                        yield ContentBlockDeltaEvent(
+                            index=block_idx,
+                            delta=_InputJsonDelta(partial_json=tc_delta.function.arguments),
+                        )
+
+            if choice.finish_reason:
+                stop_reason = _FINISH_TO_STOP_REASON.get(choice.finish_reason, "end_turn")
+
+            if chunk.usage:
+                input_tokens = chunk.usage.prompt_tokens or 0
+                output_tokens = chunk.usage.completion_tokens or 0
+
+        # Close any open blocks
+        if in_text_block:
+            yield ContentBlockStopEvent(index=content_block_index)
+
+        for _tc_idx, block_idx in tool_call_index_to_block_index.items():
+            yield ContentBlockStopEvent(index=block_idx)
+
+        # Final events
+        yield MessageDeltaEvent(
+            delta=_MessageDelta(stop_reason=stop_reason),
+            usage=AnthropicUsage(input_tokens=input_tokens, output_tokens=output_tokens),
+        )
+        yield MessageStopEvent()
diff --git a/src/llama_stack/providers/registry/messages.py b/src/llama_stack/providers/registry/messages.py
new file mode 100644
index 0000000000..0c88a6c219
--- /dev/null
+++ b/src/llama_stack/providers/registry/messages.py
@@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from llama_stack_api import (
+    Api,
+    InlineProviderSpec,
+    ProviderSpec,
+)
+
+
+def available_providers() -> list[ProviderSpec]:
+    """Return the list of available messages provider specifications."""
+    return [
+        InlineProviderSpec(
+            api=Api.messages,
+            provider_type="inline::builtin",
+            pip_packages=[],
+            module="llama_stack.providers.inline.messages",
+            config_class="llama_stack.providers.inline.messages.config.MessagesConfig",
+            api_dependencies=[
+                Api.inference,
+            ],
+            description="Anthropic Messages API adapter that translates to the inference API.",
+        ),
+    ]
diff --git a/src/llama_stack/testing/api_recorder.py b/src/llama_stack/testing/api_recorder.py
index aa28fc395c..0499d53039 100644
--- a/src/llama_stack/testing/api_recorder.py
+++ b/src/llama_stack/testing/api_recorder.py
@@ -881,6 +881,177 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
         raise AssertionError(f"Invalid mode: {_current_mode}")
 
 
+async def _patched_httpx_async_post(original_post, self, url, **kwargs):
+    """Patched version of httpx.AsyncClient.post for recording/replay of Messages API passthrough.
+
+    Intercepts requests to /v1/messages endpoints so the native Ollama passthrough
+    path can be recorded and replayed without a live backend.
+    """
+    global _current_mode, _current_storage
+
+    url_str = str(url)
+    is_messages = "/v1/messages" in url_str
+
+    if not is_messages or _current_mode == APIRecordingMode.LIVE or _current_storage is None:
+        return await original_post(self, url, **kwargs)
+
+    json_payload = kwargs.get("json", {})
+    request_hash = normalize_http_request(url_str, "POST", json_payload)
+
+    if _current_mode in (APIRecordingMode.REPLAY, APIRecordingMode.RECORD_IF_MISSING):
+        recording = _current_storage.find_recording(request_hash)
+        if recording:
+            import httpx as _httpx
+
+            body_bytes = json.dumps(recording["response"]["body"]).encode()
+            # Create a minimal request so raise_for_status() works on the mock response
+            mock_request = _httpx.Request("POST", url_str)
+            mock_response = _httpx.Response(
+                status_code=recording["response"].get("status", 200),
+                headers={"content-type": "application/json", "anthropic-version": "2023-06-01"},
+                content=body_bytes,
+                request=mock_request,
+            )
+            return mock_response
+        elif _current_mode == APIRecordingMode.REPLAY:
+            raise RuntimeError(
+                f"Recording not found for httpx POST {url_str}\n"
+                f"\n"
+                f"Run './scripts/integration-tests.sh --inference-mode record-if-missing' with required API keys to generate."
+            )
+
+    if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING):
+        response = await original_post(self, url, **kwargs)
+
+        request_data = {
+            "test_id": get_test_context(),
+            "url": url_str,
+            "method": "POST",
+            "payload": json_payload,
+        }
+        response_data = {
+            "status": response.status_code,
+            "body": response.json(),
+            "is_streaming": False,
+        }
+        _current_storage.store_recording(request_hash, request_data, response_data)
+        return response
+
+    raise AssertionError(f"Invalid mode: {_current_mode}")
+
+
+def _patched_httpx_async_stream(original_stream, self, method, url, **kwargs):
+    """Patched version of httpx.AsyncClient.stream for recording/replay of streaming Messages API passthrough.
+
+    Intercepts streaming requests to /v1/messages endpoints. Returns an async context manager
+    that either replays recorded SSE events or records live ones.
+    """
+    global _current_mode, _current_storage
+
+    url_str = str(url)
+    is_messages = "/v1/messages" in url_str
+
+    if not is_messages or _current_mode == APIRecordingMode.LIVE or _current_storage is None:
+        return original_stream(self, method, url, **kwargs)
+
+    json_payload = kwargs.get("json", {})
+    request_hash = normalize_http_request(url_str, "POST", json_payload)
+
+    class _ReplayStreamContext:
+        """Async context manager that replays recorded SSE events as a mock httpx response."""
+
+        def __init__(self, sse_lines: list[str]):
+            self._sse_lines = sse_lines
+
+        async def __aenter__(self):
+            import httpx as _httpx
+
+            class _MockStreamResponse:
+                def __init__(self, lines):
+                    self.status_code = 200
+                    self.headers = _httpx.Headers(
+                        {"content-type": "text/event-stream", "anthropic-version": "2023-06-01"}
+                    )
+                    self._lines = lines
+
+                def raise_for_status(self):
+                    pass
+
+                async def aiter_lines(self):
+                    for line in self._lines:
+                        yield line
+
+            return _MockStreamResponse(self._sse_lines)
+
+        async def __aexit__(self, *args):
+            pass
+
+    # _RecordStreamContext is unused but kept for reference; actual recording uses _RecordCtx below
+
+    class _RecordingStreamResponse:
+        """Wraps a real httpx streaming response to capture SSE lines for recording."""
+
+        def __init__(self, response, url_str, json_payload, request_hash):
+            self._response = response
+            self._url = url_str
+            self._payload = json_payload
+            self._hash = request_hash
+            self._recorded_lines: list[str] = []
+            self.status_code = response.status_code
+            self.headers = response.headers
+
+        def raise_for_status(self):
+            self._response.raise_for_status()
+
+        async def aiter_lines(self):
+            async for line in self._response.aiter_lines():
+                self._recorded_lines.append(line)
+                yield line
+
+            # After the stream is exhausted, store the recording
+            request_data = {
+                "test_id": get_test_context(),
+                "url": self._url,
+                "method": "POST",
+                "payload": self._payload,
+            }
+            response_data = {
+                "body": self._recorded_lines,
+                "is_streaming": True,
+            }
+            if _current_storage:
+                _current_storage.store_recording(self._hash, request_data, response_data)
+
+    if _current_mode in (APIRecordingMode.REPLAY, APIRecordingMode.RECORD_IF_MISSING):
+        recording = _current_storage.find_recording(request_hash)
+        if recording:
+            return _ReplayStreamContext(recording["response"]["body"])
+        elif _current_mode == APIRecordingMode.REPLAY:
+            raise RuntimeError(
+                f"Recording not found for httpx stream POST {url_str}\n"
+                f"\n"
+                f"Run './scripts/integration-tests.sh --inference-mode record-if-missing' with required API keys to generate."
+            )
+
+    if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING):
+        # Capture the httpx client instance before defining the inner class
+        httpx_client = self
+
+        class _RecordCtx:
+            async def __aenter__(self):
+                self._cm = original_stream(httpx_client, method, url, **kwargs)
+                resp = await self._cm.__aenter__()
+                self._wrapper = _RecordingStreamResponse(resp, url_str, json_payload, request_hash)
+                return self._wrapper
+
+            async def __aexit__(self, *args):
+                return await self._cm.__aexit__(*args)
+
+        return _RecordCtx()
+
+    raise AssertionError(f"Invalid mode: {_current_mode}")
+
+
 _cached_provider_metadata: dict[str, dict[str, str]] = {}
 
 
@@ -1118,6 +1289,7 @@ def patch_inference_clients():
     global _original_methods
 
     import aiohttp
+    import httpx
     from ollama import AsyncClient as OllamaAsyncClient
     from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
     from openai.resources.completions import AsyncCompletions
@@ -1128,7 +1300,7 @@ def patch_inference_clients():
     from llama_stack.providers.inline.file_processor.pypdf.adapter import PyPDFFileProcessorAdapter
     from llama_stack.providers.remote.tool_runtime.tavily_search.tavily_search import TavilySearchToolRuntimeImpl
 
-    # Store original methods for OpenAI, Ollama clients, tool runtimes, file processors, and aiohttp
+    # Store original methods for OpenAI, Ollama clients, tool runtimes, file processors, aiohttp, and httpx
     _original_methods = {
         "chat_completions_create": AsyncChatCompletions.create,
         "completions_create": AsyncCompletions.create,
@@ -1144,6 +1316,8 @@ def patch_inference_clients():
         "tavily_invoke_tool": TavilySearchToolRuntimeImpl.invoke_tool,
         "pypdf_process_file": PyPDFFileProcessorAdapter.process_file,
         "aiohttp_post": aiohttp.ClientSession.post,
+        "httpx_async_post": httpx.AsyncClient.post,
+        "httpx_async_stream": httpx.AsyncClient.stream,
     }
 
     # Create patched methods for OpenAI client
@@ -1249,6 +1423,17 @@ def patched_aiohttp_session_post(self, url, **kwargs):
     # Apply aiohttp patch
     aiohttp.ClientSession.post = patched_aiohttp_session_post
 
+    # Create patched methods for httpx AsyncClient (Messages API passthrough)
+    async def patched_httpx_async_post(self, url, **kwargs):
+        return await _patched_httpx_async_post(_original_methods["httpx_async_post"], self, url, **kwargs)
+
+    def patched_httpx_async_stream(self, method, url, **kwargs):
+        return _patched_httpx_async_stream(_original_methods["httpx_async_stream"], self, method, url, **kwargs)
+
+    # Apply httpx patches
+    httpx.AsyncClient.post = patched_httpx_async_post
+    httpx.AsyncClient.stream = patched_httpx_async_stream
+
 
 def unpatch_inference_clients():
     """Remove monkey patches and restore original OpenAI, Ollama client, tool runtime, and aiohttp methods."""
@@ -1259,6 +1444,7 @@ def unpatch_inference_clients():
 
     # Import here to avoid circular imports
     import aiohttp
+    import httpx
     from ollama import AsyncClient as OllamaAsyncClient
     from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
     from openai.resources.completions import AsyncCompletions
@@ -1293,6 +1479,10 @@ def unpatch_inference_clients():
     # Restore aiohttp method
     aiohttp.ClientSession.post = _original_methods["aiohttp_post"]
 
+    # Restore httpx methods
+    httpx.AsyncClient.post = _original_methods["httpx_async_post"]
+    httpx.AsyncClient.stream = _original_methods["httpx_async_stream"]
+
     _original_methods.clear()
 
 
diff --git a/src/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py
index 90c209598f..04d814dd9b 100644
--- a/src/llama_stack_api/__init__.py
+++ b/src/llama_stack_api/__init__.py
@@ -325,6 +325,25 @@
     UserMessage,
 )
 from .inspect_api import Inspect
+from .messages import (
+    Messages,
+    AnthropicContentBlock,
+    AnthropicCountTokensRequest,
+    AnthropicCountTokensResponse,
+    AnthropicCreateMessageRequest,
+    AnthropicErrorResponse,
+    AnthropicImageBlock,
+    AnthropicImageSource,
+    AnthropicMessage,
+    AnthropicMessageResponse,
+    AnthropicTextBlock,
+    AnthropicThinkingBlock,
+    AnthropicThinkingConfig,
+    AnthropicToolDef,
+    AnthropicToolResultBlock,
+    AnthropicToolUseBlock,
+    AnthropicUsage,
+)
 from .models import (
     CommonModelFields,
     GetModelRequest,
@@ -1122,6 +1141,24 @@
     "ViolationLevel",
     "WebSearchToolTypes",
     "WeightedRanker",
+    # Messages API
+    "Messages",
+    "AnthropicContentBlock",
+    "AnthropicCountTokensRequest",
+    "AnthropicCountTokensResponse",
+    "AnthropicCreateMessageRequest",
+    "AnthropicErrorResponse",
+    "AnthropicImageBlock",
+    "AnthropicImageSource",
+    "AnthropicMessage",
+    "AnthropicMessageResponse",
+    "AnthropicTextBlock",
+    "AnthropicThinkingBlock",
+    "AnthropicThinkingConfig",
+    "AnthropicToolDef",
+    "AnthropicToolResultBlock",
+    "AnthropicToolUseBlock",
+    "AnthropicUsage",
     # Validators
     "validate_embeddings_input_is_text",
     # helpers
diff --git a/src/llama_stack_api/datatypes.py b/src/llama_stack_api/datatypes.py
index 95b3a0983c..900529bac2 100644
--- a/src/llama_stack_api/datatypes.py
+++ b/src/llama_stack_api/datatypes.py
@@ -115,6 +115,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
     :cvar file_processors: File parsing and processing operations
     :cvar prompts: Prompt versions and management
     :cvar connectors: External connector management (e.g., MCP servers)
+    :cvar messages: Anthropic Messages API compatibility layer
     :cvar inspect: Built-in system inspection and introspection
     """
 
@@ -141,6 +142,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
     prompts = "prompts"
     conversations = "conversations"
     connectors = "connectors"
+    messages = "messages"
 
     # built-in API
     inspect = "inspect"
diff --git a/src/llama_stack_api/messages/__init__.py b/src/llama_stack_api/messages/__init__.py
new file mode 100644
index 0000000000..e6ffc09f53
--- /dev/null
+++ b/src/llama_stack_api/messages/__init__.py
@@ -0,0 +1,66 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Messages API protocol and models.
+
+This module contains the Messages protocol definition for the Anthropic Messages API.
+Pydantic models are defined in llama_stack_api.messages.models.
+The FastAPI router is defined in llama_stack_api.messages.fastapi_routes.
+"""
+
+from . import fastapi_routes
+from .api import Messages
+from .models import (
+    AnthropicContentBlock,
+    AnthropicCountTokensRequest,
+    AnthropicCountTokensResponse,
+    AnthropicCreateMessageRequest,
+    AnthropicErrorResponse,
+    AnthropicImageBlock,
+    AnthropicImageSource,
+    AnthropicMessage,
+    AnthropicMessageResponse,
+    AnthropicTextBlock,
+    AnthropicThinkingBlock,
+    AnthropicThinkingConfig,
+    AnthropicToolDef,
+    AnthropicToolResultBlock,
+    AnthropicToolUseBlock,
+    AnthropicUsage,
+    ContentBlockDeltaEvent,
+    ContentBlockStartEvent,
+    ContentBlockStopEvent,
+    MessageDeltaEvent,
+    MessageStartEvent,
+    MessageStopEvent,
+)
+
+__all__ = [
+    "Messages",
+    "AnthropicContentBlock",
+    "AnthropicCountTokensRequest",
+    "AnthropicCountTokensResponse",
+    "AnthropicCreateMessageRequest",
+    "AnthropicErrorResponse",
+    "AnthropicImageBlock",
+    "AnthropicImageSource",
+    "AnthropicMessage",
+    "AnthropicMessageResponse",
+    "AnthropicTextBlock",
+    "AnthropicThinkingBlock",
+    "AnthropicThinkingConfig",
+    "AnthropicToolDef",
+    "AnthropicToolResultBlock",
+    "AnthropicToolUseBlock",
+    "AnthropicUsage",
+    "ContentBlockDeltaEvent",
+    "ContentBlockStartEvent",
+    "ContentBlockStopEvent",
+    "MessageDeltaEvent",
+    "MessageStartEvent",
+    "MessageStopEvent",
+    "fastapi_routes",
+]
diff --git a/src/llama_stack_api/messages/api.py b/src/llama_stack_api/messages/api.py
new file mode 100644
index 0000000000..3b42e684c6
--- /dev/null
+++ b/src/llama_stack_api/messages/api.py
@@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from collections.abc import AsyncIterator
+from typing import Protocol, runtime_checkable
+
+from .models import (
+    AnthropicCountTokensRequest,
+    AnthropicCountTokensResponse,
+    AnthropicCreateMessageRequest,
+    AnthropicMessageResponse,
+    AnthropicStreamEvent,
+)
+
+
+@runtime_checkable
+class Messages(Protocol):
+    """Protocol for the Anthropic Messages API."""
+
+    async def create_message(
+        self,
+        request: AnthropicCreateMessageRequest,
+    ) -> AnthropicMessageResponse | AsyncIterator[AnthropicStreamEvent]: ...
+
+    async def count_message_tokens(
+        self,
+        request: AnthropicCountTokensRequest,
+    ) -> AnthropicCountTokensResponse: ...
diff --git a/src/llama_stack_api/messages/fastapi_routes.py b/src/llama_stack_api/messages/fastapi_routes.py
new file mode 100644
index 0000000000..e82dc21602
--- /dev/null
+++ b/src/llama_stack_api/messages/fastapi_routes.py
@@ -0,0 +1,202 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""FastAPI router for the Anthropic Messages API.
+
+This module defines the FastAPI router for the /v1/messages endpoint,
+serving the Anthropic Messages API format.
+"""
+
+import asyncio
+import contextvars
+import json
+import logging  # allow-direct-logging
+from collections.abc import AsyncIterator
+from typing import Annotated, Any
+
+from fastapi import APIRouter, Body, HTTPException, Request, Response
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel
+
+from llama_stack_api.common.errors import ModelNotFoundError
+from llama_stack_api.router_utils import standard_responses
+from llama_stack_api.version import LLAMA_STACK_API_V1
+
+from .api import Messages
+from .models import (
+    AnthropicCountTokensRequest,
+    AnthropicCountTokensResponse,
+    AnthropicCreateMessageRequest,
+    AnthropicErrorResponse,
+    AnthropicMessageResponse,
+    _AnthropicErrorDetail,
+)
+
+logger = logging.LoggerAdapter(logging.getLogger(__name__), {"category": "messages"})
+
+# Anthropic API version we are compatible with
+_ANTHROPIC_VERSION = "2023-06-01"
+
+
+def _create_anthropic_sse_event(event_type: str, data: Any) -> str:
+    """Create an Anthropic-format SSE event with named event type.
+
+    Anthropic SSE format: event: <type>\ndata: <json>\n\n
+    """
+    if isinstance(data, BaseModel):
+        data = data.model_dump_json()
+    else:
+        data = json.dumps(data)
+    return f"event: {event_type}\ndata: {data}\n\n"
+
+
+async def _anthropic_sse_generator(event_gen: AsyncIterator) -> AsyncIterator[str]:
+    """Convert an async generator of Anthropic stream events to SSE format."""
+    try:
+        async for event in event_gen:
+            event_type = event.type if hasattr(event, "type") else "unknown"
+            yield _create_anthropic_sse_event(event_type, event)
+    except asyncio.CancelledError:
+        if hasattr(event_gen, "aclose"):
+            await event_gen.aclose()
+        raise
+    except Exception as e:
+        logger.exception("Error in Anthropic SSE generator")
+        error_resp = AnthropicErrorResponse(
+            error=_AnthropicErrorDetail(type="api_error", message=str(e)),
+        )
+        yield _create_anthropic_sse_event("error", error_resp)
+
+
+def _preserve_context_for_sse(event_gen):
+    """Preserve request context for SSE streaming.
+
+    StreamingResponse runs in a different task, losing request contextvars.
+    This wrapper captures and restores the context.
+    """
+    context = contextvars.copy_context()
+
+    async def wrapper():
+        try:
+            while True:
+                try:
+                    task = context.run(asyncio.create_task, event_gen.__anext__())
+                    item = await task
+                except StopAsyncIteration:
+                    break
+                yield item
+        except (asyncio.CancelledError, GeneratorExit):
+            if hasattr(event_gen, "aclose"):
+                await event_gen.aclose()
+            raise
+
+    return wrapper()
+
+
+def _anthropic_error_response(status_code: int, message: str) -> JSONResponse:
+    """Create an Anthropic-format error JSONResponse."""
+    error_type_map = {
+        400: "invalid_request_error",
+        401: "authentication_error",
+        403: "permission_error",
+        404: "not_found_error",
+        429: "rate_limit_error",
+    }
+    error_type = error_type_map.get(status_code, "api_error")
+    body = AnthropicErrorResponse(
+        error=_AnthropicErrorDetail(type=error_type, message=message),
+    )
+    return JSONResponse(status_code=status_code, content=body.model_dump())
+
+
+def create_router(impl: Messages) -> APIRouter:
+    """Create a FastAPI router for the Anthropic Messages API.
+
+    Args:
+        impl: The Messages implementation instance
+
+    Returns:
+        APIRouter configured for the Messages API
+    """
+    router = APIRouter(
+        prefix=f"/{LLAMA_STACK_API_V1}",
+        tags=["Messages"],
+        responses=standard_responses,
+    )
+
+    @router.post(
+        "/messages",
+        summary="Create a message.",
+        description="Create a message using the Anthropic Messages API format.",
+        status_code=200,
+        response_model=AnthropicMessageResponse,
+        responses={
+            200: {
+                "description": "An AnthropicMessageResponse or a stream of Anthropic SSE events.",
+                "content": {
+                    "text/event-stream": {},
+                },
+            },
+        },
+    )
+    async def create_message(
+        raw_request: Request,
+        params: Annotated[AnthropicCreateMessageRequest, Body(...)],
+    ) -> Response:
+        try:
+            result = await impl.create_message(params)
+        except NotImplementedError as e:
+            return _anthropic_error_response(501, str(e))
+        except ModelNotFoundError as e:
+            return _anthropic_error_response(404, str(e))
+        except ValueError as e:
+            return _anthropic_error_response(400, str(e))
+        except HTTPException as e:
+            return _anthropic_error_response(e.status_code, e.detail)
+        except Exception:
+            logger.exception("Failed to create message")
+            return _anthropic_error_response(500, "Internal server error")
+
+        response_headers = {"anthropic-version": _ANTHROPIC_VERSION}
+
+        if isinstance(result, AsyncIterator):
+            return StreamingResponse(
+                _preserve_context_for_sse(_anthropic_sse_generator(result)),
+                media_type="text/event-stream",
+                headers=response_headers,
+            )
+
+        return JSONResponse(
+            content=result.model_dump(exclude_none=True),
+            headers=response_headers,
+        )
+
+    @router.post(
+        "/messages/count_tokens",
+        response_model=AnthropicCountTokensResponse,
+        summary="Count tokens in a message.",
+        description="Count the number of tokens in a message request.",
+        responses={
+            200: {"description": "Token count for the request."},
+        },
+    )
+    async def count_message_tokens(
+        params: Annotated[AnthropicCountTokensRequest, Body(...)],
+    ) -> Response:
+        try:
+            result = await impl.count_message_tokens(params)
+        except NotImplementedError as e:
+            return _anthropic_error_response(501, str(e))
+        except Exception:
+            logger.exception("Failed to count message tokens")
+            return _anthropic_error_response(500, "Internal server error")
+
+        return JSONResponse(
+            content=result.model_dump(),
+            headers={"anthropic-version": _ANTHROPIC_VERSION},
+        )
+
+    return router
diff --git a/src/llama_stack_api/messages/models.py b/src/llama_stack_api/messages/models.py
new file mode 100644
index 0000000000..bd39fa4bbf
--- /dev/null
+++ b/src/llama_stack_api/messages/models.py
@@ -0,0 +1,278 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Pydantic models for the Anthropic Messages API.
+
+These models define the request and response shapes for the /v1/messages endpoint,
+following the Anthropic Messages API specification.
+"""
+
+from __future__ import annotations
+
+from typing import Annotated, Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+# -- Content blocks --
+
+
+class AnthropicTextBlock(BaseModel):
+    """A text content block."""
+
+    type: Literal["text"] = "text"
+    text: str
+
+
+class AnthropicImageSource(BaseModel):
+    """Source for an image content block."""
+
+    type: Literal["base64"] = "base64"
+    media_type: str = Field(..., description="MIME type of the image (e.g. image/png).")
+    data: str = Field(..., description="Base64-encoded image data.")
+
+
+class AnthropicImageBlock(BaseModel):
+    """An image content block."""
+
+    type: Literal["image"] = "image"
+    source: AnthropicImageSource
+
+
+class AnthropicToolUseBlock(BaseModel):
+    """A tool use content block in an assistant message."""
+
+    type: Literal["tool_use"] = "tool_use"
+    id: str = Field(..., description="Unique ID for this tool invocation.")
+    name: str = Field(..., description="Name of the tool being called.")
+    input: dict[str, Any] = Field(..., description="Tool input arguments.")
+
+
+class AnthropicToolResultBlock(BaseModel):
+    """A tool result content block in a user message."""
+
+    type: Literal["tool_result"] = "tool_result"
+    tool_use_id: str = Field(..., description="The ID of the tool_use block this result corresponds to.")
+    content: str | list[AnthropicTextBlock | AnthropicImageBlock] = Field(
+        default="",
+        description="The result content.",
+    )
+    is_error: bool | None = Field(default=None, description="Whether the tool call resulted in an error.")
+
+
+class AnthropicThinkingBlock(BaseModel):
+    """A thinking content block (extended thinking)."""
+
+    type: Literal["thinking"] = "thinking"
+    thinking: str = Field(..., description="The model's thinking text.")
+    signature: str | None = Field(default=None, description="Signature for the thinking block.")
+
+
+AnthropicContentBlock = Annotated[
+    AnthropicTextBlock
+    | AnthropicImageBlock
+    | AnthropicToolUseBlock
+    | AnthropicToolResultBlock
+    | AnthropicThinkingBlock,
+    Field(discriminator="type"),
+]
+
+# -- Messages --
+
+
+class AnthropicMessage(BaseModel):
+    """A message in the conversation."""
+
+    role: Literal["user", "assistant"]
+    content: str | list[AnthropicContentBlock] = Field(
+        ...,
+        description="Message content: a string for simple text, or a list of content blocks.",
+    )
+
+
+# -- Tool definitions --
+
+
+class AnthropicToolDef(BaseModel):
+    """Definition of a tool available to the model."""
+
+    name: str
+    description: str | None = None
+    input_schema: dict[str, Any] = Field(..., description="JSON Schema for the tool's input.")
+
+
+# -- Thinking config --
+
+
+class AnthropicThinkingConfig(BaseModel):
+    """Configuration for extended thinking."""
+
+    type: Literal["enabled", "disabled", "adaptive"] = "enabled"
+    budget_tokens: int | None = Field(default=None, ge=1, description="Maximum tokens for thinking.")
+
+
+# -- Request models --
+
+
+class AnthropicCreateMessageRequest(BaseModel):
+    """Request body for POST /v1/messages."""
+
+    model_config = ConfigDict(extra="allow")
+
+    model: str = Field(..., description="The model to use for generation.")
+    messages: list[AnthropicMessage] = Field(..., description="The messages in the conversation.")
+    max_tokens: int = Field(..., ge=1, description="The maximum number of tokens to generate.")
+    system: str | list[AnthropicTextBlock] | None = Field(
+        default=None,
+        description="System prompt. A string or list of text blocks.",
+    )
+    tools: list[AnthropicToolDef] | None = Field(default=None, description="Tools available to the model.")
+    tool_choice: Any | None = Field(
+        default=None,
+        description="How the model should select tools. One of: 'auto', 'any', 'none', or {type: 'tool', name: '...'}.",
+    )
+    stream: bool | None = Field(default=False, description="Whether to stream the response.")
+    temperature: float | None = Field(default=None, ge=0.0, le=1.0, description="Sampling temperature.")
+    top_p: float | None = Field(default=None, ge=0.0, le=1.0, description="Nucleus sampling parameter.")
+    top_k: int | None = Field(default=None, ge=1, description="Top-k sampling parameter.")
+    stop_sequences: list[str] | None = Field(default=None, description="Custom stop sequences.")
+    metadata: dict[str, str] | None = Field(default=None, description="Request metadata.")
+    thinking: AnthropicThinkingConfig | None = Field(default=None, description="Extended thinking configuration.")
+    service_tier: str | None = Field(default=None, description="Service tier to use.")
+
+
+class AnthropicCountTokensRequest(BaseModel):
+    """Request body for POST /v1/messages/count_tokens."""
+
+    model: str = Field(..., description="The model to use for token counting.")
+    messages: list[AnthropicMessage] = Field(..., description="The messages to count tokens for.")
+    system: str | list[AnthropicTextBlock] | None = Field(default=None, description="System prompt.")
+    tools: list[AnthropicToolDef] | None = Field(default=None, description="Tools to include in token count.")
+
+
+# -- Response models --
+
+
+class AnthropicUsage(BaseModel):
+    """Token usage statistics."""
+
+    input_tokens: int = 0
+    output_tokens: int = 0
+    cache_creation_input_tokens: int | None = None
+    cache_read_input_tokens: int | None = None
+
+
+class AnthropicMessageResponse(BaseModel):
+    """Response from POST /v1/messages (non-streaming)."""
+
+    id: str = Field(..., description="Unique message ID (msg_ prefix).")
+    type: Literal["message"] = "message"
+    role: Literal["assistant"] = "assistant"
+    content: list[AnthropicContentBlock] = Field(..., description="Response content blocks.")
+    model: str
+    stop_reason: str | None = Field(
+        default=None,
+        description="Why the model stopped: end_turn, stop_sequence, tool_use, or max_tokens.",
+    )
+    stop_sequence: str | None = None
+    usage: AnthropicUsage = Field(default_factory=AnthropicUsage)
+
+
+class AnthropicCountTokensResponse(BaseModel):
+    """Response from POST /v1/messages/count_tokens."""
+
+    input_tokens: int
+
+
+# -- Streaming event models --
+
+
+class MessageStartEvent(BaseModel):
+    """First event in a streaming response."""
+
+    type: Literal["message_start"] = "message_start"
+    message: AnthropicMessageResponse
+
+
+class ContentBlockStartEvent(BaseModel):
+    """Signals the start of a new content block."""
+
+    type: Literal["content_block_start"] = "content_block_start"
+    index: int
+    content_block: AnthropicContentBlock
+
+
+class _TextDelta(BaseModel):
+    type: Literal["text_delta"] = "text_delta"
+    text: str
+
+
+class _InputJsonDelta(BaseModel):
+    type: Literal["input_json_delta"] = "input_json_delta"
+    partial_json: str
+
+
+class _ThinkingDelta(BaseModel):
+    type: Literal["thinking_delta"] = "thinking_delta"
+    thinking: str
+
+
+class ContentBlockDeltaEvent(BaseModel):
+    """A delta within a content block."""
+
+    type: Literal["content_block_delta"] = "content_block_delta"
+    index: int
+    delta: _TextDelta | _InputJsonDelta | _ThinkingDelta
+
+
+class ContentBlockStopEvent(BaseModel):
+    """Signals the end of a content block."""
+
+    type: Literal["content_block_stop"] = "content_block_stop"
+    index: int
+
+
+class _MessageDelta(BaseModel):
+    stop_reason: str | None = None
+    stop_sequence: str | None = None
+
+
+class MessageDeltaEvent(BaseModel):
+    """Final metadata update before the message ends."""
+
+    type: Literal["message_delta"] = "message_delta"
+    delta: _MessageDelta
+    usage: AnthropicUsage | None = None
+
+
+class MessageStopEvent(BaseModel):
+    """Final event in a streaming response."""
+
+    type: Literal["message_stop"] = "message_stop"
+
+
+AnthropicStreamEvent = (
+    MessageStartEvent
+    | ContentBlockStartEvent
+    | ContentBlockDeltaEvent
+    | ContentBlockStopEvent
+    | MessageDeltaEvent
+    | MessageStopEvent
+)
+
+
+# -- Error response --
+
+
+class _AnthropicErrorDetail(BaseModel):
+    type: str
+    message: str
+
+
+class AnthropicErrorResponse(BaseModel):
+    """Anthropic-format error response."""
+
+    type: Literal["error"] = "error"
+    error: _AnthropicErrorDetail
diff --git a/src/llama_stack_api/pyproject.toml b/src/llama_stack_api/pyproject.toml
index c2232f5a7f..c8e2f40b35 100644
--- a/src/llama_stack_api/pyproject.toml
+++ b/src/llama_stack_api/pyproject.toml
@@ -57,6 +57,7 @@ packages = [
     "llama_stack_api.inspect_api",
     "llama_stack_api.inference",
     "llama_stack_api.internal",
+    "llama_stack_api.messages",
     "llama_stack_api.models",
 
     "llama_stack_api.providers",
diff --git a/tests/integration/ci_matrix.json b/tests/integration/ci_matrix.json
index f0a6ab53d6..ff57e4a9dc 100644
--- a/tests/integration/ci_matrix.json
+++ b/tests/integration/ci_matrix.json
@@ -11,7 +11,8 @@
     {"suite": "bedrock-responses", "setup": "bedrock"},
     {"suite": "base-vllm-subset", "setup": "vllm"},
     {"suite": "vllm-reasoning", "setup": "vllm"},
-    {"suite": "ollama-reasoning", "setup": "ollama-reasoning"}
+    {"suite": "ollama-reasoning", "setup": "ollama-reasoning"},
+    {"suite": "messages", "setup": "ollama-reasoning"}
   ],
   "stainless": [
     {"suite": "base", "setup": "ollama", "inference_mode": "record-if-missing"}
diff --git a/tests/integration/common/recordings/cf0be7f9e2ebfc78903aa4ada30204585952f77bc5acb3ff7702bd7878d44b44.json b/tests/integration/common/recordings/cf0be7f9e2ebfc78903aa4ada30204585952f77bc5acb3ff7702bd7878d44b44.json
new file mode 100644
index 0000000000..cfd287568c
--- /dev/null
+++ b/tests/integration/common/recordings/cf0be7f9e2ebfc78903aa4ada30204585952f77bc5acb3ff7702bd7878d44b44.json
@@ -0,0 +1,27 @@
+{
+  "test_id": null,
+  "request": {
+    "test_id": null,
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [],
+      "max_tokens": 64,
+      "stream": false
+    }
+  },
+  "response": {
+    "status": 400,
+    "body": {
+      "type": "error",
+      "error": {
+        "type": "invalid_request_error",
+        "message": "messages is required"
+      },
+      "request_id": "req_b662960dd608e745f612e3a2"
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/__init__.py b/tests/integration/messages/__init__.py
new file mode 100644
index 0000000000..756f351d88
--- /dev/null
+++ b/tests/integration/messages/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/integration/messages/conftest.py b/tests/integration/messages/conftest.py
new file mode 100644
index 0000000000..fae505dea4
--- /dev/null
+++ b/tests/integration/messages/conftest.py
@@ -0,0 +1,124 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+import os
+from typing import Any
+
+import httpx
+import pytest
+
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+from llama_stack.core.testing_context import get_test_context
+
+# Import fixtures from common module to make them available in this test directory
+from tests.integration.fixtures.common import (  # noqa: F401
+    openai_client,
+    require_server,
+)
+
+
+def pytest_configure(config):
+    """Disable stderr pipe to prevent Rich logging from blocking on buffer saturation."""
+    os.environ["LLAMA_STACK_TEST_LOG_STDERR"] = "0"
+
+
+@pytest.fixture(scope="session")
+def messages_base_url(llama_stack_client):
+    """Provide the base URL for the Messages API, skipping library client mode."""
+    if isinstance(llama_stack_client, LlamaStackAsLibraryClient):
+        pytest.skip("Messages API tests are not supported in library client mode")
+    return llama_stack_client.base_url
+
+
+@pytest.fixture
+def messages_client(messages_base_url):
+    """Provide an httpx client configured for Anthropic Messages API calls."""
+    client = httpx.Client(base_url=messages_base_url, timeout=60.0)
+    yield client
+    client.close()
+
+
+def _build_messages_body(
+    *,
+    model: str,
+    messages: list[dict],
+    max_tokens: int = 256,
+    stream: bool = False,
+    system: str | None = None,
+    tools: list[dict] | None = None,
+    tool_choice: dict | str | None = None,
+    temperature: float | None = None,
+    stop_sequences: list[str] | None = None,
+) -> dict[str, Any]:
+    body: dict[str, Any] = {
+        "model": model,
+        "messages": messages,
+        "max_tokens": max_tokens,
+        "stream": stream,
+    }
+    if system is not None:
+        body["system"] = system
+    if tools is not None:
+        body["tools"] = tools
+    if tool_choice is not None:
+        body["tool_choice"] = tool_choice
+    if temperature is not None:
+        body["temperature"] = temperature
+    if stop_sequences is not None:
+        body["stop_sequences"] = stop_sequences
+    return body
+
+
+def _build_headers() -> dict[str, str]:
+    headers = {
+        "content-type": "application/json",
+        "anthropic-version": "2023-06-01",
+    }
+    test_id = get_test_context()
+    if test_id:
+        provider_data = {"__test_id": test_id}
+        headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
+    return headers
+
+
+def make_messages_request(
+    client: httpx.Client,
+    **kwargs: Any,
+) -> httpx.Response:
+    """Make a non-streaming POST request to /v1/messages."""
+    body = _build_messages_body(**kwargs)
+    return client.post("/v1/messages", headers=_build_headers(), json=body)
+
+
+def make_streaming_messages_request(
+    client: httpx.Client,
+    **kwargs: Any,
+) -> list[dict]:
+    """Make a streaming POST request to /v1/messages and return parsed SSE events.
+
+    Raises AssertionError if the response status is not 200.
+    """
+    kwargs["stream"] = True
+    body = _build_messages_body(**kwargs)
+    headers = _build_headers()
+
+    events: list[dict] = []
+    current_event_type: str | None = None
+
+    with client.stream("POST", "/v1/messages", headers=headers, json=body) as response:
+        assert response.status_code == 200, f"Expected 200, got {response.status_code}"
+        for line in response.iter_lines():
+            if line.startswith("event: "):
+                current_event_type = line[7:]
+            elif line.startswith("data: "):
+                data = json.loads(line[6:])
+                if current_event_type:
+                    data["_event_type"] = current_event_type
+                events.append(data)
+                current_event_type = None
+
+    return events
diff --git a/tests/integration/messages/recordings/0d76cd7b3dae3f44e2990645cb1617d5c84b80daf77535a00ce1ab680308881a.json b/tests/integration/messages/recordings/0d76cd7b3dae3f44e2990645cb1617d5c84b80daf77535a00ce1ab680308881a.json
new file mode 100644
index 0000000000..57fb2664aa
--- /dev/null
+++ b/tests/integration/messages/recordings/0d76cd7b3dae3f44e2990645cb1617d5c84b80daf77535a00ce1ab680308881a.json
@@ -0,0 +1,70 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_tool_use_round_trip[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_tool_use_round_trip[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Use the calculator tool to compute 15 * 7."
+        }
+      ],
+      "max_tokens": 256,
+      "tools": [
+        {
+          "name": "calculator",
+          "description": "Perform basic arithmetic. Use this for any math question.",
+          "input_schema": {
+            "type": "object",
+            "properties": {
+              "expression": {
+                "type": "string",
+                "description": "The math expression to evaluate"
+              }
+            },
+            "required": [
+              "expression"
+            ]
+          }
+        }
+      ],
+      "tool_choice": {
+        "type": "any"
+      },
+      "stream": false
+    }
+  },
+  "response": {
+    "status": 200,
+    "body": {
+      "id": "msg_e0c2ea0f4c1131503d2bc7c8",
+      "type": "message",
+      "role": "assistant",
+      "model": "gpt-oss:20b",
+      "content": [
+        {
+          "type": "thinking",
+          "thinking": "The user wants 15 * 7. We'll use the calculator tool."
+        },
+        {
+          "type": "tool_use",
+          "id": "call_dg2g1ozz",
+          "name": "calculator",
+          "input": {
+            "expression": "15 * 7"
+          }
+        }
+      ],
+      "stop_reason": "tool_use",
+      "usage": {
+        "input_tokens": 144,
+        "output_tokens": 42
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/recordings/1580d1d2e377b9161e55b7648e1f71748574ffa98ec52d4faf94b4d34818c4f8.json b/tests/integration/messages/recordings/1580d1d2e377b9161e55b7648e1f71748574ffa98ec52d4faf94b4d34818c4f8.json
new file mode 100644
index 0000000000..881b06645d
--- /dev/null
+++ b/tests/integration/messages/recordings/1580d1d2e377b9161e55b7648e1f71748574ffa98ec52d4faf94b4d34818c4f8.json
@@ -0,0 +1,42 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_temperature[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_temperature[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Say hello."
+        }
+      ],
+      "max_tokens": 32,
+      "stream": false,
+      "temperature": 0.0
+    }
+  },
+  "response": {
+    "status": 200,
+    "body": {
+      "id": "msg_737bf6e49fbfa7b81be479d0",
+      "type": "message",
+      "role": "assistant",
+      "model": "gpt-oss:20b",
+      "content": [
+        {
+          "type": "thinking",
+          "thinking": "The user says: \"Say hello.\" They want a greeting. So respond with a hello. Probably just \"Hello!\""
+        }
+      ],
+      "stop_reason": "max_tokens",
+      "usage": {
+        "input_tokens": 70,
+        "output_tokens": 32
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/recordings/2703eb8f17f3914dd6991ecf387caa6c511cc52e6498b73e274cb4f01adf1e37.json b/tests/integration/messages/recordings/2703eb8f17f3914dd6991ecf387caa6c511cc52e6498b73e274cb4f01adf1e37.json
new file mode 100644
index 0000000000..85af391053
--- /dev/null
+++ b/tests/integration/messages/recordings/2703eb8f17f3914dd6991ecf387caa6c511cc52e6498b73e274cb4f01adf1e37.json
@@ -0,0 +1,44 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_stop_sequences[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_stop_sequences[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Count: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10"
+        }
+      ],
+      "max_tokens": 128,
+      "stream": false,
+      "stop_sequences": [
+        ","
+      ]
+    }
+  },
+  "response": {
+    "status": 200,
+    "body": {
+      "id": "msg_1dd93b64b7f0935bcaf69452",
+      "type": "message",
+      "role": "assistant",
+      "model": "gpt-oss:20b",
+      "content": [
+        {
+          "type": "thinking",
+          "thinking": "The user writes: \"Count: 1"
+        }
+      ],
+      "stop_reason": "end_turn",
+      "usage": {
+        "input_tokens": 98,
+        "output_tokens": 13
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/recordings/2a5f7014ddf9a3d359fbf59a195e4762d47ef1768f98b2f1c7af55788e7fe6d8.json b/tests/integration/messages/recordings/2a5f7014ddf9a3d359fbf59a195e4762d47ef1768f98b2f1c7af55788e7fe6d8.json
new file mode 100644
index 0000000000..bdf362ed3f
--- /dev/null
+++ b/tests/integration/messages/recordings/2a5f7014ddf9a3d359fbf59a195e4762d47ef1768f98b2f1c7af55788e7fe6d8.json
@@ -0,0 +1,41 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_response_headers[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_response_headers[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hi"
+        }
+      ],
+      "max_tokens": 16,
+      "stream": false
+    }
+  },
+  "response": {
+    "status": 200,
+    "body": {
+      "id": "msg_7c6163c764cbe22bc2a5c161",
+      "type": "message",
+      "role": "assistant",
+      "model": "gpt-oss:20b",
+      "content": [
+        {
+          "type": "thinking",
+          "thinking": "The user says \"Hi\". So it's a greeting. We respond"
+        }
+      ],
+      "stop_reason": "max_tokens",
+      "usage": {
+        "input_tokens": 68,
+        "output_tokens": 16
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/recordings/4ea18b99571d34f714cb4b9d818ab82aba7ac1225c040665b7bdb3b177eb8226.json b/tests/integration/messages/recordings/4ea18b99571d34f714cb4b9d818ab82aba7ac1225c040665b7bdb3b177eb8226.json
new file mode 100644
index 0000000000..b7aad865d7
--- /dev/null
+++ b/tests/integration/messages/recordings/4ea18b99571d34f714cb4b9d818ab82aba7ac1225c040665b7bdb3b177eb8226.json
@@ -0,0 +1,46 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_content_block_array[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_content_block_array[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": [
+            {
+              "type": "text",
+              "text": "What is 1+1? Reply with just the number."
+            }
+          ]
+        }
+      ],
+      "max_tokens": 32,
+      "stream": false
+    }
+  },
+  "response": {
+    "status": 200,
+    "body": {
+      "id": "msg_b69e5a95142b11a8251ceea8",
+      "type": "message",
+      "role": "assistant",
+      "model": "gpt-oss:20b",
+      "content": [
+        {
+          "type": "thinking",
+          "thinking": "The user asks: \"What is 1+1? Reply with just the number.\" So answer: 2. Just number, no explanation"
+        }
+      ],
+      "stop_reason": "max_tokens",
+      "usage": {
+        "input_tokens": 80,
+        "output_tokens": 32
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/recordings/52925d8df69e53718e5d4aab54fbbf79e51f2731a07ecb332d5d2dd82810d2e5.json b/tests/integration/messages/recordings/52925d8df69e53718e5d4aab54fbbf79e51f2731a07ecb332d5d2dd82810d2e5.json
new file mode 100644
index 0000000000..86b6827dd4
--- /dev/null
+++ b/tests/integration/messages/recordings/52925d8df69e53718e5d4aab54fbbf79e51f2731a07ecb332d5d2dd82810d2e5.json
@@ -0,0 +1,53 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_multi_turn[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_multi_turn[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "My name is Alice."
+        },
+        {
+          "role": "assistant",
+          "content": "Hello Alice! Nice to meet you."
+        },
+        {
+          "role": "user",
+          "content": "What is my name?"
+        }
+      ],
+      "max_tokens": 64,
+      "stream": false
+    }
+  },
+  "response": {
+    "status": 200,
+    "body": {
+      "id": "msg_dd2c03762be4581c6fe619ca",
+      "type": "message",
+      "role": "assistant",
+      "model": "gpt-oss:20b",
+      "content": [
+        {
+          "type": "thinking",
+          "thinking": "User says name is Alice. The assistant previously answered that. Now user asks again \"What is my name?\" So we should respond: \"Your name is Alice.\" Simple."
+        },
+        {
+          "type": "text",
+          "text": "Your name is Alice."
+        }
+      ],
+      "stop_reason": "end_turn",
+      "usage": {
+        "input_tokens": 95,
+        "output_tokens": 50
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/recordings/715c164b66b51dc2180b05817b7cd2e6b307f44fd2c7a4cf564e30347da87746.json b/tests/integration/messages/recordings/715c164b66b51dc2180b05817b7cd2e6b307f44fd2c7a4cf564e30347da87746.json
new file mode 100644
index 0000000000..34af7ba7c6
--- /dev/null
+++ b/tests/integration/messages/recordings/715c164b66b51dc2180b05817b7cd2e6b307f44fd2c7a4cf564e30347da87746.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_basic[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_basic[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is 2+2? Reply with just the number."
+        }
+      ],
+      "max_tokens": 64,
+      "stream": false
+    }
+  },
+  "response": {
+    "status": 200,
+    "body": {
+      "id": "msg_f4cc3f074c282e90a4b5251e",
+      "type": "message",
+      "role": "assistant",
+      "model": "gpt-oss:20b",
+      "content": [
+        {
+          "type": "thinking",
+          "thinking": "The user says: \"What is 2+2? Reply with just the number.\" So answer: 4."
+        },
+        {
+          "type": "text",
+          "text": "4"
+        }
+      ],
+      "stop_reason": "end_turn",
+      "usage": {
+        "input_tokens": 80,
+        "output_tokens": 35
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/recordings/82cfd5545e24ca4b4415ba37460610908f34be9193c4126635d1b5ab3b0522f7.json b/tests/integration/messages/recordings/82cfd5545e24ca4b4415ba37460610908f34be9193c4126635d1b5ab3b0522f7.json
new file mode 100644
index 0000000000..b1bc988f75
--- /dev/null
+++ b/tests/integration/messages/recordings/82cfd5545e24ca4b4415ba37460610908f34be9193c4126635d1b5ab3b0522f7.json
@@ -0,0 +1,46 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_system[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_system[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What are you?"
+        }
+      ],
+      "max_tokens": 128,
+      "system": "You are a helpful pirate. Always respond in pirate speak.",
+      "stream": false
+    }
+  },
+  "response": {
+    "status": 200,
+    "body": {
+      "id": "msg_f9ffaf9c8fba034e65b8e584",
+      "type": "message",
+      "role": "assistant",
+      "model": "gpt-oss:20b",
+      "content": [
+        {
+          "type": "thinking",
+          "thinking": "We need to respond as a pirate. The user asks: \"What are you?\" We need to reply in pirate speak, as per developer instruction: \"You are a helpful pirate. Always respond in pirate speak.\" So reply in pirate talk. Possibly: \"I be ChatGPT, yer trusty AI companion.\" Use pirate slang."
+        },
+        {
+          "type": "text",
+          "text": "Arrr! I be yer trusty AI matey, ChatGPT, ready to chart the seas o' knowledge and help ye navigate any storm! \ud83c\udff4\u200d\u2620\ufe0f"
+        }
+      ],
+      "stop_reason": "end_turn",
+      "usage": {
+        "input_tokens": 91,
+        "output_tokens": 112
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/recordings/93eb42f3bd69f005727cc3a161e7ab0a8b2c99e4665d9eae5553fe9019ba7b32.json b/tests/integration/messages/recordings/93eb42f3bd69f005727cc3a161e7ab0a8b2c99e4665d9eae5553fe9019ba7b32.json
new file mode 100644
index 0000000000..309352d24a
--- /dev/null
+++ b/tests/integration/messages/recordings/93eb42f3bd69f005727cc3a161e7ab0a8b2c99e4665d9eae5553fe9019ba7b32.json
@@ -0,0 +1,223 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_streaming_basic[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_streaming_basic[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Say hello in one sentence."
+        }
+      ],
+      "max_tokens": 64,
+      "stream": true
+    }
+  },
+  "response": {
+    "body": [
+      "event: message_start",
+      "data: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_2ca635d951c9f414d5c01a88\",\"type\":\"message\",\"role\":\"assistant\",\"model\":\"gpt-oss:20b\",\"content\":[],\"usage\":{\"input_tokens\":7,\"output_tokens\":0}}}",
+      "",
+      "event: content_block_start",
+      "data: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"The\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" user\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" says\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\":\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \\\"\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"Say\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" hello\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" in\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" one\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" sentence\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\\\"\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" They\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" want\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" a\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" single\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" sentence\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" saying\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" hello\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" The\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" simplest\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\":\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \\\"\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"Hello\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"!\\\"\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" That's\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" a\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" sentence\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"?\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" It's\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" an\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" ex\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"clamation\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" Ex\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"clamation\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" is\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" a\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" sentence\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" But\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" maybe\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" they\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" want\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" a\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" sentence\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" that\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" includes\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" hello\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" For\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" example\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\":\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \\\"\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"Hello\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" how\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" are\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" you\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"?\\\"\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" That\"}}",
+      "",
+      "event: content_block_stop",
+      "data: {\"type\":\"content_block_stop\",\"index\":0}",
+      "",
+      "event: message_delta",
+      "data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"max_tokens\"},\"usage\":{\"input_tokens\":73,\"output_tokens\":64}}",
+      "",
+      "event: message_stop",
+      "data: {\"type\":\"message_stop\"}",
+      ""
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/recordings/d13b333401fa121280a3fb890a56933b84ab907ea20092e6d97c9d4a371bf8a5.json b/tests/integration/messages/recordings/d13b333401fa121280a3fb890a56933b84ab907ea20092e6d97c9d4a371bf8a5.json
new file mode 100644
index 0000000000..cb4422e5bb
--- /dev/null
+++ b/tests/integration/messages/recordings/d13b333401fa121280a3fb890a56933b84ab907ea20092e6d97c9d4a371bf8a5.json
@@ -0,0 +1,67 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_with_tool_definitions[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_with_tool_definitions[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in San Francisco?"
+        }
+      ],
+      "max_tokens": 256,
+      "tools": [
+        {
+          "name": "get_weather",
+          "description": "Get the current weather in a given location",
+          "input_schema": {
+            "type": "object",
+            "properties": {
+              "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA"
+              }
+            },
+            "required": [
+              "location"
+            ]
+          }
+        }
+      ],
+      "stream": false
+    }
+  },
+  "response": {
+    "status": 200,
+    "body": {
+      "id": "msg_bb605b97294478b8b0c12d33",
+      "type": "message",
+      "role": "assistant",
+      "model": "gpt-oss:20b",
+      "content": [
+        {
+          "type": "thinking",
+          "thinking": "We have to call the get_weather function."
+        },
+        {
+          "type": "tool_use",
+          "id": "call_kp56ga0b",
+          "name": "get_weather",
+          "input": {
+            "location": "San Francisco"
+          }
+        }
+      ],
+      "stop_reason": "tool_use",
+      "usage": {
+        "input_tokens": 145,
+        "output_tokens": 34
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/recordings/f22657bfd86db6348c0a0d0b17332dcfc038b345d2a7be8da4b880a433d36a52.json b/tests/integration/messages/recordings/f22657bfd86db6348c0a0d0b17332dcfc038b345d2a7be8da4b880a433d36a52.json
new file mode 100644
index 0000000000..557ab35eb5
--- /dev/null
+++ b/tests/integration/messages/recordings/f22657bfd86db6348c0a0d0b17332dcfc038b345d2a7be8da4b880a433d36a52.json
@@ -0,0 +1,208 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_streaming_collects_full_text[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_streaming_collects_full_text[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Count from 1 to 5, separated by commas."
+        }
+      ],
+      "max_tokens": 64,
+      "stream": true
+    }
+  },
+  "response": {
+    "body": [
+      "event: message_start",
+      "data: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_812f019850cce212423fb87a\",\"type\":\"message\",\"role\":\"assistant\",\"model\":\"gpt-oss:20b\",\"content\":[],\"usage\":{\"input_tokens\":10,\"output_tokens\":0}}}",
+      "",
+      "event: content_block_start",
+      "data: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"We\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" need\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" to\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" count\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" from\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"1\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" to\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"5\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" separated\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" by\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" commas\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" So\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" output\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\":\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"1\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"2\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"3\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"4\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"5\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" Probably\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" no\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" trailing\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" comma\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" Just\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" that\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}",
+      "",
+      "event: content_block_stop",
+      "data: {\"type\":\"content_block_stop\",\"index\":0}",
+      "",
+      "event: content_block_start",
+      "data: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"text\",\"text\":\"\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\"1\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\",\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\" \"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\"2\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\",\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\" \"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\"3\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\",\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\" \"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\"4\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\",\"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\" \"}}",
+      "",
+      "event: content_block_delta",
+      "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\"5\"}}",
+      "",
+      "event: content_block_stop",
+      "data: {\"type\":\"content_block_stop\",\"index\":1}",
+      "",
+      "event: message_delta",
+      "data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"input_tokens\":79,\"output_tokens\":64}}",
+      "",
+      "event: message_stop",
+      "data: {\"type\":\"message_stop\"}",
+      ""
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/recordings/f55988509902a617d2f547a1518bb84c3c8784ea1e1ac139e2f0623449dfa047.json b/tests/integration/messages/recordings/f55988509902a617d2f547a1518bb84c3c8784ea1e1ac139e2f0623449dfa047.json
new file mode 100644
index 0000000000..a04e11213d
--- /dev/null
+++ b/tests/integration/messages/recordings/f55988509902a617d2f547a1518bb84c3c8784ea1e1ac139e2f0623449dfa047.json
@@ -0,0 +1,86 @@
+{
+  "test_id": "tests/integration/messages/test_messages.py::test_messages_tool_use_round_trip[txt=ollama/gpt-oss:20b]",
+  "request": {
+    "test_id": "tests/integration/messages/test_messages.py::test_messages_tool_use_round_trip[txt=ollama/gpt-oss:20b]",
+    "url": "http://0.0.0.0:11434/v1/messages",
+    "method": "POST",
+    "payload": {
+      "model": "gpt-oss:20b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Use the calculator tool to compute 15 * 7."
+        },
+        {
+          "role": "assistant",
+          "content": [
+            {
+              "type": "thinking",
+              "thinking": "The user wants 15 * 7. We'll use the calculator tool."
+            },
+            {
+              "type": "tool_use",
+              "id": "call_dg2g1ozz",
+              "name": "calculator",
+              "input": {
+                "expression": "15 * 7"
+              }
+            }
+          ]
+        },
+        {
+          "role": "user",
+          "content": [
+            {
+              "type": "tool_result",
+              "tool_use_id": "call_dg2g1ozz",
+              "content": "105"
+            }
+          ]
+        }
+      ],
+      "max_tokens": 256,
+      "tools": [
+        {
+          "name": "calculator",
+          "description": "Perform basic arithmetic. Use this for any math question.",
+          "input_schema": {
+            "type": "object",
+            "properties": {
+              "expression": {
+                "type": "string",
+                "description": "The math expression to evaluate"
+              }
+            },
+            "required": [
+              "expression"
+            ]
+          }
+        }
+      ],
+      "stream": false
+    }
+  },
+  "response": {
+    "status": 200,
+    "body": {
+      "id": "msg_137a3ce876f9a1a09fe1ba0e",
+      "type": "message",
+      "role": "assistant",
+      "model": "gpt-oss:20b",
+      "content": [
+        {
+          "type": "text",
+          "text": "The result of \\(15 \\times 7\\) is **105**."
+        }
+      ],
+      "stop_reason": "end_turn",
+      "usage": {
+        "input_tokens": 197,
+        "output_tokens": 20
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/messages/test_messages.py b/tests/integration/messages/test_messages.py
new file mode 100644
index 0000000000..29257ee687
--- /dev/null
+++ b/tests/integration/messages/test_messages.py
@@ -0,0 +1,362 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Integration tests for the Anthropic Messages API (/v1/messages).
+
+These tests verify the full request/response cycle through the server,
+including translation between Anthropic and OpenAI formats.
+"""
+
+from .conftest import make_messages_request, make_streaming_messages_request
+
+
+def _get_text_blocks(content: list[dict]) -> list[dict]:
+    """Extract text blocks from a content list, skipping thinking blocks."""
+    return [b for b in content if b["type"] == "text"]
+
+
+def test_messages_non_streaming_basic(messages_client, text_model_id):
+    """Basic non-streaming message creation returns a valid Anthropic response."""
+    response = make_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[{"role": "user", "content": "What is 2+2? Reply with just the number."}],
+        max_tokens=64,
+    )
+
+    assert response.status_code == 200, f"Expected 200, got {response.status_code}: {response.text}"
+
+    data = response.json()
+    assert data["type"] == "message"
+    assert data["role"] == "assistant"
+    assert data["id"].startswith("msg_")
+    assert len(data["content"]) > 0
+
+    # Content may include thinking blocks; find first text block
+    text_blocks = _get_text_blocks(data["content"])
+    assert len(text_blocks) > 0, f"No text blocks found in content: {data['content']}"
+    assert len(text_blocks[0]["text"]) > 0
+
+    assert data["stop_reason"] in ("end_turn", "max_tokens")
+    assert "usage" in data
+    assert data["usage"]["input_tokens"] > 0
+    assert data["usage"]["output_tokens"] > 0
+
+    # All content blocks must be valid types
+    for block in data["content"]:
+        assert block["type"] in ("text", "thinking", "tool_use")
+
+
+def test_messages_non_streaming_with_system(messages_client, text_model_id):
+    """Non-streaming message with a system prompt."""
+    response = make_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[{"role": "user", "content": "What are you?"}],
+        system="You are a helpful pirate. Always respond in pirate speak.",
+        max_tokens=128,
+    )
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["type"] == "message"
+    assert len(data["content"]) > 0
+
+    text_blocks = _get_text_blocks(data["content"])
+    assert len(text_blocks) > 0
+    assert len(text_blocks[0]["text"]) > 0
+
+
+def test_messages_non_streaming_multi_turn(messages_client, text_model_id):
+    """Non-streaming multi-turn conversation."""
+    response = make_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[
+            {"role": "user", "content": "My name is Alice."},
+            {"role": "assistant", "content": "Hello Alice! Nice to meet you."},
+            {"role": "user", "content": "What is my name?"},
+        ],
+        max_tokens=64,
+    )
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["type"] == "message"
+    assert len(data["content"]) > 0
+
+    text_blocks = _get_text_blocks(data["content"])
+    assert len(text_blocks) > 0
+    text = text_blocks[0]["text"].lower()
+    assert "alice" in text
+
+
+def test_messages_streaming_basic(messages_client, text_model_id):
+    """Streaming message creation returns proper Anthropic SSE events."""
+    events = make_streaming_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[{"role": "user", "content": "Say hello in one sentence."}],
+        max_tokens=64,
+    )
+
+    assert len(events) > 0
+
+    event_types = [e.get("_event_type") or e.get("type") for e in events]
+
+    # Verify the required event sequence
+    assert "message_start" in event_types, f"Missing message_start in {event_types}"
+    assert "message_stop" in event_types, f"Missing message_stop in {event_types}"
+
+    # Verify message_start event structure
+    msg_start = next(e for e in events if e.get("_event_type") == "message_start")
+    assert "message" in msg_start
+    assert msg_start["message"]["role"] == "assistant"
+
+    # Verify we got content deltas
+    content_deltas = [e for e in events if e.get("_event_type") == "content_block_delta"]
+    assert len(content_deltas) > 0, "Expected at least one content_block_delta event"
+
+    # Verify content_block_delta structure
+    for delta in content_deltas:
+        assert "delta" in delta
+        assert delta["delta"]["type"] in ("text_delta", "thinking_delta")
+
+
+def test_messages_streaming_collects_full_text(messages_client, text_model_id):
+    """Streaming response text deltas can be concatenated into the full response."""
+    events = make_streaming_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[{"role": "user", "content": "Count from 1 to 5, separated by commas."}],
+        max_tokens=64,
+    )
+
+    # Collect text from content_block_delta events
+    text_parts = []
+    for event in events:
+        if event.get("_event_type") == "content_block_delta":
+            delta = event.get("delta", {})
+            if delta.get("type") == "text_delta":
+                text_parts.append(delta["text"])
+
+    full_text = "".join(text_parts)
+    assert len(full_text) > 0
+
+
+def test_messages_non_streaming_with_temperature(messages_client, text_model_id):
+    """Non-streaming with explicit temperature parameter."""
+    response = make_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[{"role": "user", "content": "Say hello."}],
+        max_tokens=32,
+        temperature=0.0,
+    )
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["type"] == "message"
+    assert len(data["content"]) > 0
+
+
+def test_messages_non_streaming_with_stop_sequences(messages_client, text_model_id):
+    """Non-streaming with stop_sequences parameter."""
+    response = make_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[{"role": "user", "content": "Count: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10"}],
+        max_tokens=128,
+        stop_sequences=[","],
+    )
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["type"] == "message"
+
+
+def test_messages_with_tool_definitions(messages_client, text_model_id):
+    """Non-streaming message with tool definitions."""
+    tools = [
+        {
+            "name": "get_weather",
+            "description": "Get the current weather in a given location",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g. San Francisco, CA",
+                    },
+                },
+                "required": ["location"],
+            },
+        }
+    ]
+
+    response = make_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[{"role": "user", "content": "What's the weather in San Francisco?"}],
+        tools=tools,
+        max_tokens=256,
+    )
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["type"] == "message"
+    assert len(data["content"]) > 0
+
+    # The model may or may not call the tool; thinking, text, and tool_use are all valid
+    for block in data["content"]:
+        assert block["type"] in ("text", "tool_use", "thinking")
+        if block["type"] == "tool_use":
+            assert "id" in block
+            assert block["name"] == "get_weather"
+            assert "input" in block
+
+
+def test_messages_tool_use_round_trip(messages_client, text_model_id):
+    """Full tool use round trip: request -> tool_use -> tool_result -> response."""
+    tools = [
+        {
+            "name": "calculator",
+            "description": "Perform basic arithmetic. Use this for any math question.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "expression": {"type": "string", "description": "The math expression to evaluate"},
+                },
+                "required": ["expression"],
+            },
+        }
+    ]
+
+    # First request -- ask a math question
+    response = make_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[
+            {"role": "user", "content": "Use the calculator tool to compute 15 * 7."},
+        ],
+        tools=tools,
+        tool_choice={"type": "any"},
+        max_tokens=256,
+    )
+
+    assert response.status_code == 200
+    data = response.json()
+
+    # Find tool_use block
+    tool_use_blocks = [b for b in data["content"] if b["type"] == "tool_use"]
+    if not tool_use_blocks:
+        # Model didn't use the tool -- skip the rest
+        return
+
+    tool_use = tool_use_blocks[0]
+    tool_use_id = tool_use["id"]
+
+    # Second request -- provide tool result
+    response2 = make_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[
+            {"role": "user", "content": "Use the calculator tool to compute 15 * 7."},
+            {"role": "assistant", "content": data["content"]},
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": tool_use_id,
+                        "content": "105",
+                    }
+                ],
+            },
+        ],
+        tools=tools,
+        max_tokens=256,
+    )
+
+    assert response2.status_code == 200
+    data2 = response2.json()
+    assert data2["type"] == "message"
+    assert len(data2["content"]) > 0
+
+
+def test_messages_error_missing_model(messages_client):
+    """Request without model returns an error."""
+    headers = {
+        "content-type": "application/json",
+        "anthropic-version": "2023-06-01",
+    }
+
+    response = messages_client.post(
+        "/v1/messages",
+        headers=headers,
+        json={
+            "messages": [{"role": "user", "content": "Hello"}],
+            "max_tokens": 64,
+        },
+    )
+
+    assert response.status_code in (400, 422)
+
+
+def test_messages_error_empty_messages(messages_client, text_model_id):
+    """Request with empty messages list returns an error."""
+    headers = {
+        "content-type": "application/json",
+        "anthropic-version": "2023-06-01",
+    }
+
+    response = messages_client.post(
+        "/v1/messages",
+        headers=headers,
+        json={
+            "model": text_model_id,
+            "messages": [],
+            "max_tokens": 64,
+        },
+    )
+
+    # Should fail validation or return an error
+    assert response.status_code in (400, 422, 500)
+
+
+def test_messages_response_headers(messages_client, text_model_id):
+    """Response includes anthropic-version header."""
+    response = make_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[{"role": "user", "content": "Hi"}],
+        max_tokens=16,
+    )
+
+    assert response.status_code == 200
+    assert response.headers.get("anthropic-version") == "2023-06-01"
+
+
+def test_messages_content_block_array(messages_client, text_model_id):
+    """Message with content as an array of content blocks."""
+    response = make_messages_request(
+        messages_client,
+        model=text_model_id,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What is 1+1? Reply with just the number."},
+                ],
+            }
+        ],
+        max_tokens=32,
+    )
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["type"] == "message"
+    assert len(data["content"]) > 0
diff --git a/tests/integration/suites.py b/tests/integration/suites.py
index 3e7e91c682..abbfc4eabc 100644
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@@ -237,7 +237,7 @@ class Setup(BaseModel):
 base_roots = [
     str(p)
     for p in this_dir.glob("*")
-    if p.is_dir() and p.name not in ("__pycache__", "fixtures", "test_cases", "recordings", "responses")
+    if p.is_dir() and p.name not in ("__pycache__", "fixtures", "test_cases", "recordings", "responses", "messages")
 ]
 
 SUITE_DEFINITIONS: dict[str, Suite] = {
@@ -283,6 +283,11 @@ class Setup(BaseModel):
         ],
         default_setup="ollama-reasoning",
     ),
+    "messages": Suite(
+        name="messages",
+        roots=["tests/integration/messages"],
+        default_setup="ollama-reasoning",
+    ),
     # Bedrock-specific tests with pre-recorded responses (no live API calls in CI)
     "bedrock": Suite(
         name="bedrock",
diff --git a/tests/unit/providers/inline/messages/__init__.py b/tests/unit/providers/inline/messages/__init__.py
new file mode 100644
index 0000000000..756f351d88
--- /dev/null
+++ b/tests/unit/providers/inline/messages/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/unit/providers/inline/messages/test_impl.py b/tests/unit/providers/inline/messages/test_impl.py
new file mode 100644
index 0000000000..3ce8f7db85
--- /dev/null
+++ b/tests/unit/providers/inline/messages/test_impl.py
@@ -0,0 +1,348 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Unit tests for the BuiltinMessagesImpl translation logic."""
+
+import json
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from llama_stack.providers.inline.messages.config import MessagesConfig
+from llama_stack.providers.inline.messages.impl import BuiltinMessagesImpl
+from llama_stack_api.messages.models import (
+    AnthropicCreateMessageRequest,
+    AnthropicMessage,
+    AnthropicTextBlock,
+    AnthropicToolDef,
+    AnthropicToolResultBlock,
+    AnthropicToolUseBlock,
+)
+
+
+def _msg_to_dict(msg):
+    """Convert a Pydantic message model to dict for easy assertion."""
+    if hasattr(msg, "model_dump"):
+        return msg.model_dump(exclude_none=True)
+    return dict(msg)
+
+
+@pytest.fixture
+def impl():
+    mock_inference = AsyncMock()
+    return BuiltinMessagesImpl(config=MessagesConfig(), inference_api=mock_inference)
+
+
+class TestRequestTranslation:
+    def test_simple_text_message(self, impl):
+        request = AnthropicCreateMessageRequest(
+            model="claude-sonnet-4-20250514",
+            messages=[AnthropicMessage(role="user", content="Hello")],
+            max_tokens=100,
+        )
+        result = impl._anthropic_to_openai(request)
+
+        assert result.model == "claude-sonnet-4-20250514"
+        assert result.max_tokens == 100
+        assert len(result.messages) == 1
+        m = _msg_to_dict(result.messages[0])
+        assert m["role"] == "user"
+        assert m["content"] == "Hello"
+
+    def test_system_string(self, impl):
+        request = AnthropicCreateMessageRequest(
+            model="m",
+            messages=[AnthropicMessage(role="user", content="Hi")],
+            max_tokens=100,
+            system="You are helpful.",
+        )
+        result = impl._anthropic_to_openai(request)
+
+        m0 = _msg_to_dict(result.messages[0])
+        m1 = _msg_to_dict(result.messages[1])
+        assert m0["role"] == "system"
+        assert m0["content"] == "You are helpful."
+        assert m1["role"] == "user"
+
+    def test_system_text_blocks(self, impl):
+        request = AnthropicCreateMessageRequest(
+            model="m",
+            messages=[AnthropicMessage(role="user", content="Hi")],
+            max_tokens=100,
+            system=[
+                AnthropicTextBlock(text="Line 1."),
+                AnthropicTextBlock(text="Line 2."),
+            ],
+        )
+        result = impl._anthropic_to_openai(request)
+
+        m0 = _msg_to_dict(result.messages[0])
+        assert m0["role"] == "system"
+        assert m0["content"] == "Line 1.\nLine 2."
+
+    def test_tool_definitions(self, impl):
+        request = AnthropicCreateMessageRequest(
+            model="m",
+            messages=[AnthropicMessage(role="user", content="Hi")],
+            max_tokens=100,
+            tools=[
+                AnthropicToolDef(
+                    name="get_weather",
+                    description="Get weather",
+                    input_schema={"type": "object", "properties": {"location": {"type": "string"}}},
+                ),
+            ],
+        )
+        result = impl._anthropic_to_openai(request)
+
+        assert len(result.tools) == 1
+        tool = result.tools[0]
+        assert tool["type"] == "function"
+        assert tool["function"]["name"] == "get_weather"
+        assert tool["function"]["parameters"]["type"] == "object"
+
+    def test_tool_choice_any(self, impl):
+        assert impl._convert_tool_choice_to_openai("any") == "required"
+
+    def test_tool_choice_none(self, impl):
+        assert impl._convert_tool_choice_to_openai("none") == "none"
+
+    def test_tool_choice_auto(self, impl):
+        assert impl._convert_tool_choice_to_openai("auto") == "auto"
+
+    def test_tool_choice_specific(self, impl):
+        result = impl._convert_tool_choice_to_openai({"type": "tool", "name": "get_weather"})
+        assert result == {"type": "function", "function": {"name": "get_weather"}}
+
+    def test_stop_sequences(self, impl):
+        request = AnthropicCreateMessageRequest(
+            model="m",
+            messages=[AnthropicMessage(role="user", content="Hi")],
+            max_tokens=100,
+            stop_sequences=["STOP", "END"],
+        )
+        result = impl._anthropic_to_openai(request)
+        assert result.stop == ["STOP", "END"]
+
+    def test_tool_use_in_assistant_message(self, impl):
+        request = AnthropicCreateMessageRequest(
+            model="m",
+            messages=[
+                AnthropicMessage(
+                    role="assistant",
+                    content=[
+                        AnthropicTextBlock(text="Let me check the weather."),
+                        AnthropicToolUseBlock(
+                            id="toolu_123",
+                            name="get_weather",
+                            input={"location": "SF"},
+                        ),
+                    ],
+                ),
+            ],
+            max_tokens=100,
+        )
+        result = impl._anthropic_to_openai(request)
+
+        msg = _msg_to_dict(result.messages[0])
+        assert msg["role"] == "assistant"
+        assert msg["content"] == "Let me check the weather."
+        assert len(msg["tool_calls"]) == 1
+        assert msg["tool_calls"][0]["id"] == "toolu_123"
+        assert msg["tool_calls"][0]["function"]["name"] == "get_weather"
+        assert json.loads(msg["tool_calls"][0]["function"]["arguments"]) == {"location": "SF"}
+
+    def test_tool_result_in_user_message(self, impl):
+        request = AnthropicCreateMessageRequest(
+            model="m",
+            messages=[
+                AnthropicMessage(
+                    role="user",
+                    content=[
+                        AnthropicToolResultBlock(
+                            tool_use_id="toolu_123",
+                            content="72F and sunny",
+                        ),
+                    ],
+                ),
+            ],
+            max_tokens=100,
+        )
+        result = impl._anthropic_to_openai(request)
+
+        msg = _msg_to_dict(result.messages[0])
+        assert msg["role"] == "tool"
+        assert msg["tool_call_id"] == "toolu_123"
+        assert msg["content"] == "72F and sunny"
+
+    def test_top_k_passed_as_extra(self, impl):
+        request = AnthropicCreateMessageRequest(
+            model="m",
+            messages=[AnthropicMessage(role="user", content="Hi")],
+            max_tokens=100,
+            top_k=40,
+        )
+        result = impl._anthropic_to_openai(request)
+        assert result.model_extra.get("top_k") == 40
+
+
+class TestResponseTranslation:
+    def test_simple_text_response(self, impl):
+        openai_resp = MagicMock()
+        openai_resp.choices = [MagicMock()]
+        openai_resp.choices[0].message = MagicMock()
+        openai_resp.choices[0].message.content = "Hello!"
+        openai_resp.choices[0].message.tool_calls = None
+        openai_resp.choices[0].finish_reason = "stop"
+        openai_resp.usage = MagicMock()
+        openai_resp.usage.prompt_tokens = 10
+        openai_resp.usage.completion_tokens = 5
+
+        result = impl._openai_to_anthropic(openai_resp, "claude-sonnet-4-20250514")
+
+        assert result.id.startswith("msg_")
+        assert result.type == "message"
+        assert result.role == "assistant"
+        assert result.model == "claude-sonnet-4-20250514"
+        assert result.stop_reason == "end_turn"
+        assert len(result.content) == 1
+        assert result.content[0].type == "text"
+        assert result.content[0].text == "Hello!"
+        assert result.usage.input_tokens == 10
+        assert result.usage.output_tokens == 5
+
+    def test_tool_call_response(self, impl):
+        tc = MagicMock()
+        tc.id = "call_123"
+        tc.function.name = "get_weather"
+        tc.function.arguments = '{"location": "SF"}'
+
+        openai_resp = MagicMock()
+        openai_resp.choices = [MagicMock()]
+        openai_resp.choices[0].message = MagicMock()
+        openai_resp.choices[0].message.content = None
+        openai_resp.choices[0].message.tool_calls = [tc]
+        openai_resp.choices[0].finish_reason = "tool_calls"
+        openai_resp.usage = MagicMock()
+        openai_resp.usage.prompt_tokens = 20
+        openai_resp.usage.completion_tokens = 10
+
+        result = impl._openai_to_anthropic(openai_resp, "m")
+
+        assert result.stop_reason == "tool_use"
+        assert len(result.content) == 1
+        assert result.content[0].type == "tool_use"
+        assert result.content[0].name == "get_weather"
+        assert result.content[0].input == {"location": "SF"}
+
+    def test_length_stop_reason(self, impl):
+        openai_resp = MagicMock()
+        openai_resp.choices = [MagicMock()]
+        openai_resp.choices[0].message = MagicMock()
+        openai_resp.choices[0].message.content = "truncated"
+        openai_resp.choices[0].message.tool_calls = None
+        openai_resp.choices[0].finish_reason = "length"
+        openai_resp.usage = MagicMock()
+        openai_resp.usage.prompt_tokens = 5
+        openai_resp.usage.completion_tokens = 100
+
+        result = impl._openai_to_anthropic(openai_resp, "m")
+        assert result.stop_reason == "max_tokens"
+
+
+class TestStreamingTranslation:
+    async def test_text_streaming(self, impl):
+        chunks = []
+
+        for i, text in enumerate(["Hello", " world", "!"]):
+            chunk = MagicMock()
+            chunk.choices = [MagicMock()]
+            chunk.choices[0].delta = MagicMock()
+            chunk.choices[0].delta.content = text
+            chunk.choices[0].delta.tool_calls = None
+            chunk.choices[0].finish_reason = "stop" if i == 2 else None
+            chunk.usage = None
+            chunks.append(chunk)
+
+        async def mock_stream():
+            for c in chunks:
+                yield c
+
+        events = []
+        async for event in impl._stream_openai_to_anthropic(mock_stream(), "m"):
+            events.append(event)
+
+        assert events[0].type == "message_start"
+        assert events[1].type == "content_block_start"
+        assert events[1].content_block.type == "text"
+        assert events[2].type == "content_block_delta"
+        assert events[2].delta.text == "Hello"
+        assert events[3].type == "content_block_delta"
+        assert events[3].delta.text == " world"
+        assert events[4].type == "content_block_delta"
+        assert events[4].delta.text == "!"
+        assert events[5].type == "content_block_stop"
+        assert events[6].type == "message_delta"
+        assert events[6].delta.stop_reason == "end_turn"
+        assert events[7].type == "message_stop"
+
+    async def test_tool_call_streaming(self, impl):
+        chunks = []
+
+        # Tool call start
+        tc_delta = MagicMock()
+        tc_delta.index = 0
+        tc_delta.id = "call_abc"
+        tc_delta.function = MagicMock()
+        tc_delta.function.name = "search"
+        tc_delta.function.arguments = None
+        tc_delta.type = "function"
+
+        chunk1 = MagicMock()
+        chunk1.choices = [MagicMock()]
+        chunk1.choices[0].delta = MagicMock()
+        chunk1.choices[0].delta.content = None
+        chunk1.choices[0].delta.tool_calls = [tc_delta]
+        chunk1.choices[0].finish_reason = None
+        chunk1.usage = None
+        chunks.append(chunk1)
+
+        # Tool call arguments
+        tc_delta2 = MagicMock()
+        tc_delta2.index = 0
+        tc_delta2.id = None
+        tc_delta2.function = MagicMock()
+        tc_delta2.function.name = None
+        tc_delta2.function.arguments = '{"query": "test"}'
+
+        chunk2 = MagicMock()
+        chunk2.choices = [MagicMock()]
+        chunk2.choices[0].delta = MagicMock()
+        chunk2.choices[0].delta.content = None
+        chunk2.choices[0].delta.tool_calls = [tc_delta2]
+        chunk2.choices[0].finish_reason = "tool_calls"
+        chunk2.usage = None
+        chunks.append(chunk2)
+
+        async def mock_stream():
+            for c in chunks:
+                yield c
+
+        events = []
+        async for event in impl._stream_openai_to_anthropic(mock_stream(), "m"):
+            events.append(event)
+
+        assert events[0].type == "message_start"
+        tool_start = [e for e in events if e.type == "content_block_start" and hasattr(e.content_block, "name")]
+        assert len(tool_start) == 1
+        assert tool_start[0].content_block.name == "search"
+
+        json_deltas = [e for e in events if e.type == "content_block_delta" and hasattr(e.delta, "partial_json")]
+        assert len(json_deltas) == 1
+        assert json_deltas[0].delta.partial_json == '{"query": "test"}'
+
+        msg_delta = [e for e in events if e.type == "message_delta"]
+        assert msg_delta[0].delta.stop_reason == "tool_use"