diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 06e361debc..d903796d75 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -4357,6 +4357,71 @@ paths: description: Authorization token title: Authorization description: Authorization token + /v1/messages: + post: + responses: + '200': + description: An AnthropicMessageResponse or a stream of Anthropic SSE events. + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicMessageResponse' + text/event-stream: {} + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Messages + summary: Create a message. + description: Create a message using the Anthropic Messages API format. + operationId: create_message_v1_messages_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicCreateMessageRequest' + required: true + /v1/messages/count_tokens: + post: + responses: + '200': + description: Token count for the request. + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicCountTokensResponse' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Messages + summary: Count tokens in a message. + description: Count the number of tokens in a message request. + operationId: count_message_tokens_v1_messages_count_tokens_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicCountTokensRequest' + required: true components: schemas: Error: @@ -11707,6 +11772,470 @@ components: - type: 'null' title: AllowedToolsFilter description: Filter configuration for restricting which MCP tools can be used. + AnthropicCountTokensRequest: + properties: + model: + type: string + title: Model + description: The model to use for token counting. + messages: + items: + $ref: '#/components/schemas/AnthropicMessage' + type: array + title: Messages + description: The messages to count tokens for. + system: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/AnthropicTextBlock' + type: array + title: list[AnthropicTextBlock] + - type: 'null' + title: string | list[AnthropicTextBlock] + description: System prompt. + tools: + anyOf: + - items: + $ref: '#/components/schemas/AnthropicToolDef' + type: array + - type: 'null' + description: Tools to include in token count. + required: + - model + - messages + title: AnthropicCountTokensRequest + description: Request body for POST /v1/messages/count_tokens. + AnthropicCountTokensResponse: + properties: + input_tokens: + type: integer + title: Input Tokens + required: + - input_tokens + title: AnthropicCountTokensResponse + description: Response from POST /v1/messages/count_tokens. + AnthropicCreateMessageRequest: + properties: + model: + type: string + title: Model + description: The model to use for generation. + messages: + items: + $ref: '#/components/schemas/AnthropicMessage' + type: array + title: Messages + description: The messages in the conversation. + max_tokens: + type: integer + minimum: 1.0 + title: Max Tokens + description: The maximum number of tokens to generate. + system: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/AnthropicTextBlock' + type: array + title: list[AnthropicTextBlock] + - type: 'null' + title: string | list[AnthropicTextBlock] + description: System prompt. A string or list of text blocks. + tools: + anyOf: + - items: + $ref: '#/components/schemas/AnthropicToolDef' + type: array + - type: 'null' + description: Tools available to the model. + tool_choice: + anyOf: + - {} + - type: 'null' + title: Tool Choice + description: "How the model should select tools. One of: 'auto', 'any', 'none', or {type: 'tool', name: '...'}." + stream: + anyOf: + - type: boolean + - type: 'null' + description: Whether to stream the response. + default: false + temperature: + anyOf: + - type: number + maximum: 1.0 + minimum: 0.0 + - type: 'null' + description: Sampling temperature. + top_p: + anyOf: + - type: number + maximum: 1.0 + minimum: 0.0 + - type: 'null' + description: Nucleus sampling parameter. + top_k: + anyOf: + - type: integer + minimum: 1.0 + - type: 'null' + description: Top-k sampling parameter. + stop_sequences: + anyOf: + - items: + type: string + type: array + - type: 'null' + description: Custom stop sequences. + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + description: Request metadata. + thinking: + anyOf: + - $ref: '#/components/schemas/AnthropicThinkingConfig' + title: AnthropicThinkingConfig + - type: 'null' + description: Extended thinking configuration. + title: AnthropicThinkingConfig + service_tier: + anyOf: + - type: string + - type: 'null' + description: Service tier to use. + additionalProperties: true + required: + - model + - messages + - max_tokens + title: AnthropicCreateMessageRequest + description: Request body for POST /v1/messages. + AnthropicImageBlock: + properties: + type: + type: string + title: Type + enum: + - image + source: + $ref: '#/components/schemas/AnthropicImageSource' + required: + - source + title: AnthropicImageBlock + description: An image content block. + AnthropicImageSource: + properties: + type: + type: string + title: Type + enum: + - base64 + media_type: + type: string + title: Media Type + description: MIME type of the image (e.g. image/png). + data: + type: string + title: Data + description: Base64-encoded image data. + required: + - media_type + - data + title: AnthropicImageSource + description: Source for an image content block. + AnthropicMessage: + properties: + role: + type: string + enum: + - user + - assistant + title: Role + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + - $ref: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicToolUseBlock + - $ref: '#/components/schemas/AnthropicToolResultBlock-Input' + title: AnthropicToolResultBlock-Input + - $ref: '#/components/schemas/AnthropicThinkingBlock' + title: AnthropicThinkingBlock + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/AnthropicImageBlock' + text: '#/components/schemas/AnthropicTextBlock' + thinking: '#/components/schemas/AnthropicThinkingBlock' + tool_result: '#/components/schemas/AnthropicToolResultBlock-Input' + tool_use: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicTextBlock | ... (5 variants) + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock | ...] + title: string | list[AnthropicTextBlock | AnthropicImageBlock | ...] + description: 'Message content: a string for simple text, or a list of content blocks.' + required: + - role + - content + title: AnthropicMessage + description: A message in the conversation. + AnthropicMessageResponse: + properties: + id: + type: string + title: Id + description: Unique message ID (msg_ prefix). + type: + type: string + title: Type + enum: + - message + role: + type: string + title: Role + enum: + - assistant + content: + items: + oneOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + - $ref: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicToolUseBlock + - $ref: '#/components/schemas/AnthropicToolResultBlock-Output' + title: AnthropicToolResultBlock-Output + - $ref: '#/components/schemas/AnthropicThinkingBlock' + title: AnthropicThinkingBlock + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/AnthropicImageBlock' + text: '#/components/schemas/AnthropicTextBlock' + thinking: '#/components/schemas/AnthropicThinkingBlock' + tool_result: '#/components/schemas/AnthropicToolResultBlock-Output' + tool_use: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicTextBlock | ... (5 variants) + type: array + title: Content + description: Response content blocks. + model: + type: string + title: Model + stop_reason: + anyOf: + - type: string + - type: 'null' + description: 'Why the model stopped: end_turn, stop_sequence, tool_use, or max_tokens.' + stop_sequence: + anyOf: + - type: string + - type: 'null' + usage: + $ref: '#/components/schemas/AnthropicUsage' + required: + - id + - content + - model + title: AnthropicMessageResponse + description: Response from POST /v1/messages (non-streaming). + AnthropicTextBlock: + properties: + type: + type: string + title: Type + enum: + - text + text: + type: string + title: Text + required: + - text + title: AnthropicTextBlock + description: A text content block. + AnthropicThinkingBlock: + properties: + type: + type: string + title: Type + enum: + - thinking + thinking: + type: string + title: Thinking + description: The model's thinking text. + signature: + anyOf: + - type: string + - type: 'null' + description: Signature for the thinking block. + required: + - thinking + title: AnthropicThinkingBlock + description: A thinking content block (extended thinking). + AnthropicThinkingConfig: + properties: + type: + type: string + enum: + - enabled + - disabled + - adaptive + title: Type + default: enabled + budget_tokens: + anyOf: + - type: integer + minimum: 1.0 + - type: 'null' + description: Maximum tokens for thinking. + title: AnthropicThinkingConfig + description: Configuration for extended thinking. + AnthropicToolDef: + properties: + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + input_schema: + additionalProperties: true + type: object + title: Input Schema + description: JSON Schema for the tool's input. + required: + - name + - input_schema + title: AnthropicToolDef + description: Definition of a tool available to the model. + AnthropicToolResultBlock-Input: + properties: + type: + type: string + title: Type + enum: + - tool_result + tool_use_id: + type: string + title: Tool Use Id + description: The ID of the tool_use block this result corresponds to. + content: + anyOf: + - type: string + - items: + anyOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + title: AnthropicTextBlock | AnthropicImageBlock + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock] + title: string | list[AnthropicTextBlock | AnthropicImageBlock] + description: The result content. + default: '' + is_error: + anyOf: + - type: boolean + - type: 'null' + description: Whether the tool call resulted in an error. + required: + - tool_use_id + title: AnthropicToolResultBlock + description: A tool result content block in a user message. + AnthropicToolResultBlock-Output: + properties: + type: + type: string + title: Type + enum: + - tool_result + tool_use_id: + type: string + title: Tool Use Id + description: The ID of the tool_use block this result corresponds to. + content: + anyOf: + - type: string + - items: + anyOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + title: AnthropicTextBlock | AnthropicImageBlock + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock] + title: string | list[AnthropicTextBlock | AnthropicImageBlock] + description: The result content. + default: '' + is_error: + anyOf: + - type: boolean + - type: 'null' + description: Whether the tool call resulted in an error. + required: + - tool_use_id + title: AnthropicToolResultBlock + description: A tool result content block in a user message. + AnthropicToolUseBlock: + properties: + type: + type: string + title: Type + enum: + - tool_use + id: + type: string + title: Id + description: Unique ID for this tool invocation. + name: + type: string + title: Name + description: Name of the tool being called. + input: + additionalProperties: true + type: object + title: Input + description: Tool input arguments. + required: + - id + - name + - input + title: AnthropicToolUseBlock + description: A tool use content block in an assistant message. + AnthropicUsage: + properties: + input_tokens: + type: integer + title: Input Tokens + default: 0 + output_tokens: + type: integer + title: Output Tokens + default: 0 + cache_creation_input_tokens: + anyOf: + - type: integer + - type: 'null' + cache_read_input_tokens: + anyOf: + - type: integer + - type: 'null' + title: AnthropicUsage + description: Token usage statistics. ApprovalFilter: properties: always: @@ -13986,6 +14515,7 @@ components: - prompts - conversations - connectors + - messages - inspect - admin title: Api diff --git a/docs/docs/providers/messages/index.mdx b/docs/docs/providers/messages/index.mdx new file mode 100644 index 0000000000..7df084bc60 --- /dev/null +++ b/docs/docs/providers/messages/index.mdx @@ -0,0 +1,13 @@ +--- +description: "Protocol for the Anthropic Messages API." +sidebar_label: Messages +title: Messages +--- + +# Messages + +## Overview + +Protocol for the Anthropic Messages API. + +This section contains documentation for all available providers for the **messages** API. diff --git a/docs/docs/providers/messages/inline_builtin.mdx b/docs/docs/providers/messages/inline_builtin.mdx new file mode 100644 index 0000000000..9ed60b766b --- /dev/null +++ b/docs/docs/providers/messages/inline_builtin.mdx @@ -0,0 +1,17 @@ +--- +description: "Anthropic Messages API adapter that translates to the inference API." +sidebar_label: Builtin +title: inline::builtin +--- + +# inline::builtin + +## Description + +Anthropic Messages API adapter that translates to the inference API. + +## Sample Configuration + +```yaml +{} +``` diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 12914642cd..3e1d774f82 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -7618,6 +7618,470 @@ components: - type: 'null' title: AllowedToolsFilter description: Filter configuration for restricting which MCP tools can be used. + AnthropicCountTokensRequest: + properties: + model: + type: string + title: Model + description: The model to use for token counting. + messages: + items: + $ref: '#/components/schemas/AnthropicMessage' + type: array + title: Messages + description: The messages to count tokens for. + system: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/AnthropicTextBlock' + type: array + title: list[AnthropicTextBlock] + - type: 'null' + title: string | list[AnthropicTextBlock] + description: System prompt. + tools: + anyOf: + - items: + $ref: '#/components/schemas/AnthropicToolDef' + type: array + - type: 'null' + description: Tools to include in token count. + required: + - model + - messages + title: AnthropicCountTokensRequest + description: Request body for POST /v1/messages/count_tokens. + AnthropicCountTokensResponse: + properties: + input_tokens: + type: integer + title: Input Tokens + required: + - input_tokens + title: AnthropicCountTokensResponse + description: Response from POST /v1/messages/count_tokens. + AnthropicCreateMessageRequest: + properties: + model: + type: string + title: Model + description: The model to use for generation. + messages: + items: + $ref: '#/components/schemas/AnthropicMessage' + type: array + title: Messages + description: The messages in the conversation. + max_tokens: + type: integer + minimum: 1.0 + title: Max Tokens + description: The maximum number of tokens to generate. + system: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/AnthropicTextBlock' + type: array + title: list[AnthropicTextBlock] + - type: 'null' + title: string | list[AnthropicTextBlock] + description: System prompt. A string or list of text blocks. + tools: + anyOf: + - items: + $ref: '#/components/schemas/AnthropicToolDef' + type: array + - type: 'null' + description: Tools available to the model. + tool_choice: + anyOf: + - {} + - type: 'null' + title: Tool Choice + description: "How the model should select tools. One of: 'auto', 'any', 'none', or {type: 'tool', name: '...'}." + stream: + anyOf: + - type: boolean + - type: 'null' + description: Whether to stream the response. + default: false + temperature: + anyOf: + - type: number + maximum: 1.0 + minimum: 0.0 + - type: 'null' + description: Sampling temperature. + top_p: + anyOf: + - type: number + maximum: 1.0 + minimum: 0.0 + - type: 'null' + description: Nucleus sampling parameter. + top_k: + anyOf: + - type: integer + minimum: 1.0 + - type: 'null' + description: Top-k sampling parameter. + stop_sequences: + anyOf: + - items: + type: string + type: array + - type: 'null' + description: Custom stop sequences. + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + description: Request metadata. + thinking: + anyOf: + - $ref: '#/components/schemas/AnthropicThinkingConfig' + title: AnthropicThinkingConfig + - type: 'null' + description: Extended thinking configuration. + title: AnthropicThinkingConfig + service_tier: + anyOf: + - type: string + - type: 'null' + description: Service tier to use. + additionalProperties: true + required: + - model + - messages + - max_tokens + title: AnthropicCreateMessageRequest + description: Request body for POST /v1/messages. + AnthropicImageBlock: + properties: + type: + type: string + title: Type + enum: + - image + source: + $ref: '#/components/schemas/AnthropicImageSource' + required: + - source + title: AnthropicImageBlock + description: An image content block. + AnthropicImageSource: + properties: + type: + type: string + title: Type + enum: + - base64 + media_type: + type: string + title: Media Type + description: MIME type of the image (e.g. image/png). + data: + type: string + title: Data + description: Base64-encoded image data. + required: + - media_type + - data + title: AnthropicImageSource + description: Source for an image content block. + AnthropicMessage: + properties: + role: + type: string + enum: + - user + - assistant + title: Role + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + - $ref: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicToolUseBlock + - $ref: '#/components/schemas/AnthropicToolResultBlock-Input' + title: AnthropicToolResultBlock-Input + - $ref: '#/components/schemas/AnthropicThinkingBlock' + title: AnthropicThinkingBlock + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/AnthropicImageBlock' + text: '#/components/schemas/AnthropicTextBlock' + thinking: '#/components/schemas/AnthropicThinkingBlock' + tool_result: '#/components/schemas/AnthropicToolResultBlock-Input' + tool_use: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicTextBlock | ... (5 variants) + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock | ...] + title: string | list[AnthropicTextBlock | AnthropicImageBlock | ...] + description: 'Message content: a string for simple text, or a list of content blocks.' + required: + - role + - content + title: AnthropicMessage + description: A message in the conversation. + AnthropicMessageResponse: + properties: + id: + type: string + title: Id + description: Unique message ID (msg_ prefix). + type: + type: string + title: Type + enum: + - message + role: + type: string + title: Role + enum: + - assistant + content: + items: + oneOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + - $ref: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicToolUseBlock + - $ref: '#/components/schemas/AnthropicToolResultBlock-Output' + title: AnthropicToolResultBlock-Output + - $ref: '#/components/schemas/AnthropicThinkingBlock' + title: AnthropicThinkingBlock + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/AnthropicImageBlock' + text: '#/components/schemas/AnthropicTextBlock' + thinking: '#/components/schemas/AnthropicThinkingBlock' + tool_result: '#/components/schemas/AnthropicToolResultBlock-Output' + tool_use: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicTextBlock | ... (5 variants) + type: array + title: Content + description: Response content blocks. + model: + type: string + title: Model + stop_reason: + anyOf: + - type: string + - type: 'null' + description: 'Why the model stopped: end_turn, stop_sequence, tool_use, or max_tokens.' + stop_sequence: + anyOf: + - type: string + - type: 'null' + usage: + $ref: '#/components/schemas/AnthropicUsage' + required: + - id + - content + - model + title: AnthropicMessageResponse + description: Response from POST /v1/messages (non-streaming). + AnthropicTextBlock: + properties: + type: + type: string + title: Type + enum: + - text + text: + type: string + title: Text + required: + - text + title: AnthropicTextBlock + description: A text content block. + AnthropicThinkingBlock: + properties: + type: + type: string + title: Type + enum: + - thinking + thinking: + type: string + title: Thinking + description: The model's thinking text. + signature: + anyOf: + - type: string + - type: 'null' + description: Signature for the thinking block. + required: + - thinking + title: AnthropicThinkingBlock + description: A thinking content block (extended thinking). + AnthropicThinkingConfig: + properties: + type: + type: string + enum: + - enabled + - disabled + - adaptive + title: Type + default: enabled + budget_tokens: + anyOf: + - type: integer + minimum: 1.0 + - type: 'null' + description: Maximum tokens for thinking. + title: AnthropicThinkingConfig + description: Configuration for extended thinking. + AnthropicToolDef: + properties: + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + input_schema: + additionalProperties: true + type: object + title: Input Schema + description: JSON Schema for the tool's input. + required: + - name + - input_schema + title: AnthropicToolDef + description: Definition of a tool available to the model. + AnthropicToolResultBlock-Input: + properties: + type: + type: string + title: Type + enum: + - tool_result + tool_use_id: + type: string + title: Tool Use Id + description: The ID of the tool_use block this result corresponds to. + content: + anyOf: + - type: string + - items: + anyOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + title: AnthropicTextBlock | AnthropicImageBlock + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock] + title: string | list[AnthropicTextBlock | AnthropicImageBlock] + description: The result content. + default: '' + is_error: + anyOf: + - type: boolean + - type: 'null' + description: Whether the tool call resulted in an error. + required: + - tool_use_id + title: AnthropicToolResultBlock + description: A tool result content block in a user message. + AnthropicToolResultBlock-Output: + properties: + type: + type: string + title: Type + enum: + - tool_result + tool_use_id: + type: string + title: Tool Use Id + description: The ID of the tool_use block this result corresponds to. + content: + anyOf: + - type: string + - items: + anyOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + title: AnthropicTextBlock | AnthropicImageBlock + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock] + title: string | list[AnthropicTextBlock | AnthropicImageBlock] + description: The result content. + default: '' + is_error: + anyOf: + - type: boolean + - type: 'null' + description: Whether the tool call resulted in an error. + required: + - tool_use_id + title: AnthropicToolResultBlock + description: A tool result content block in a user message. + AnthropicToolUseBlock: + properties: + type: + type: string + title: Type + enum: + - tool_use + id: + type: string + title: Id + description: Unique ID for this tool invocation. + name: + type: string + title: Name + description: Name of the tool being called. + input: + additionalProperties: true + type: object + title: Input + description: Tool input arguments. + required: + - id + - name + - input + title: AnthropicToolUseBlock + description: A tool use content block in an assistant message. + AnthropicUsage: + properties: + input_tokens: + type: integer + title: Input Tokens + default: 0 + output_tokens: + type: integer + title: Output Tokens + default: 0 + cache_creation_input_tokens: + anyOf: + - type: integer + - type: 'null' + cache_read_input_tokens: + anyOf: + - type: integer + - type: 'null' + title: AnthropicUsage + description: Token usage statistics. ApprovalFilter: properties: always: @@ -9899,6 +10363,7 @@ components: - prompts - conversations - connectors + - messages - inspect - admin title: Api diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index 5e834f4308..0d3b517e83 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -10104,6 +10104,7 @@ components: - prompts - conversations - connectors + - messages - inspect - admin title: Api diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 033219262a..249488661f 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -3305,6 +3305,71 @@ paths: response = client.responses.cancel("resp_abc123") print(response) + /v1/messages: + post: + responses: + '200': + description: An AnthropicMessageResponse or a stream of Anthropic SSE events. + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicMessageResponse' + text/event-stream: {} + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Messages + summary: Create a message. + description: Create a message using the Anthropic Messages API format. + operationId: create_message_v1_messages_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicCreateMessageRequest' + required: true + /v1/messages/count_tokens: + post: + responses: + '200': + description: Token count for the request. + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicCountTokensResponse' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Messages + summary: Count tokens in a message. + description: Count the number of tokens in a message request. + operationId: count_message_tokens_v1_messages_count_tokens_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicCountTokensRequest' + required: true components: schemas: Error: @@ -10655,6 +10720,470 @@ components: - type: 'null' title: AllowedToolsFilter description: Filter configuration for restricting which MCP tools can be used. + AnthropicCountTokensRequest: + properties: + model: + type: string + title: Model + description: The model to use for token counting. + messages: + items: + $ref: '#/components/schemas/AnthropicMessage' + type: array + title: Messages + description: The messages to count tokens for. + system: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/AnthropicTextBlock' + type: array + title: list[AnthropicTextBlock] + - type: 'null' + title: string | list[AnthropicTextBlock] + description: System prompt. + tools: + anyOf: + - items: + $ref: '#/components/schemas/AnthropicToolDef' + type: array + - type: 'null' + description: Tools to include in token count. + required: + - model + - messages + title: AnthropicCountTokensRequest + description: Request body for POST /v1/messages/count_tokens. + AnthropicCountTokensResponse: + properties: + input_tokens: + type: integer + title: Input Tokens + required: + - input_tokens + title: AnthropicCountTokensResponse + description: Response from POST /v1/messages/count_tokens. + AnthropicCreateMessageRequest: + properties: + model: + type: string + title: Model + description: The model to use for generation. + messages: + items: + $ref: '#/components/schemas/AnthropicMessage' + type: array + title: Messages + description: The messages in the conversation. + max_tokens: + type: integer + minimum: 1.0 + title: Max Tokens + description: The maximum number of tokens to generate. + system: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/AnthropicTextBlock' + type: array + title: list[AnthropicTextBlock] + - type: 'null' + title: string | list[AnthropicTextBlock] + description: System prompt. A string or list of text blocks. + tools: + anyOf: + - items: + $ref: '#/components/schemas/AnthropicToolDef' + type: array + - type: 'null' + description: Tools available to the model. + tool_choice: + anyOf: + - {} + - type: 'null' + title: Tool Choice + description: "How the model should select tools. One of: 'auto', 'any', 'none', or {type: 'tool', name: '...'}." + stream: + anyOf: + - type: boolean + - type: 'null' + description: Whether to stream the response. + default: false + temperature: + anyOf: + - type: number + maximum: 1.0 + minimum: 0.0 + - type: 'null' + description: Sampling temperature. + top_p: + anyOf: + - type: number + maximum: 1.0 + minimum: 0.0 + - type: 'null' + description: Nucleus sampling parameter. + top_k: + anyOf: + - type: integer + minimum: 1.0 + - type: 'null' + description: Top-k sampling parameter. + stop_sequences: + anyOf: + - items: + type: string + type: array + - type: 'null' + description: Custom stop sequences. + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + description: Request metadata. + thinking: + anyOf: + - $ref: '#/components/schemas/AnthropicThinkingConfig' + title: AnthropicThinkingConfig + - type: 'null' + description: Extended thinking configuration. + title: AnthropicThinkingConfig + service_tier: + anyOf: + - type: string + - type: 'null' + description: Service tier to use. + additionalProperties: true + required: + - model + - messages + - max_tokens + title: AnthropicCreateMessageRequest + description: Request body for POST /v1/messages. + AnthropicImageBlock: + properties: + type: + type: string + title: Type + enum: + - image + source: + $ref: '#/components/schemas/AnthropicImageSource' + required: + - source + title: AnthropicImageBlock + description: An image content block. + AnthropicImageSource: + properties: + type: + type: string + title: Type + enum: + - base64 + media_type: + type: string + title: Media Type + description: MIME type of the image (e.g. image/png). + data: + type: string + title: Data + description: Base64-encoded image data. + required: + - media_type + - data + title: AnthropicImageSource + description: Source for an image content block. + AnthropicMessage: + properties: + role: + type: string + enum: + - user + - assistant + title: Role + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + - $ref: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicToolUseBlock + - $ref: '#/components/schemas/AnthropicToolResultBlock-Input' + title: AnthropicToolResultBlock-Input + - $ref: '#/components/schemas/AnthropicThinkingBlock' + title: AnthropicThinkingBlock + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/AnthropicImageBlock' + text: '#/components/schemas/AnthropicTextBlock' + thinking: '#/components/schemas/AnthropicThinkingBlock' + tool_result: '#/components/schemas/AnthropicToolResultBlock-Input' + tool_use: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicTextBlock | ... (5 variants) + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock | ...] + title: string | list[AnthropicTextBlock | AnthropicImageBlock | ...] + description: 'Message content: a string for simple text, or a list of content blocks.' + required: + - role + - content + title: AnthropicMessage + description: A message in the conversation. + AnthropicMessageResponse: + properties: + id: + type: string + title: Id + description: Unique message ID (msg_ prefix). + type: + type: string + title: Type + enum: + - message + role: + type: string + title: Role + enum: + - assistant + content: + items: + oneOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + - $ref: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicToolUseBlock + - $ref: '#/components/schemas/AnthropicToolResultBlock-Output' + title: AnthropicToolResultBlock-Output + - $ref: '#/components/schemas/AnthropicThinkingBlock' + title: AnthropicThinkingBlock + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/AnthropicImageBlock' + text: '#/components/schemas/AnthropicTextBlock' + thinking: '#/components/schemas/AnthropicThinkingBlock' + tool_result: '#/components/schemas/AnthropicToolResultBlock-Output' + tool_use: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicTextBlock | ... (5 variants) + type: array + title: Content + description: Response content blocks. + model: + type: string + title: Model + stop_reason: + anyOf: + - type: string + - type: 'null' + description: 'Why the model stopped: end_turn, stop_sequence, tool_use, or max_tokens.' + stop_sequence: + anyOf: + - type: string + - type: 'null' + usage: + $ref: '#/components/schemas/AnthropicUsage' + required: + - id + - content + - model + title: AnthropicMessageResponse + description: Response from POST /v1/messages (non-streaming). + AnthropicTextBlock: + properties: + type: + type: string + title: Type + enum: + - text + text: + type: string + title: Text + required: + - text + title: AnthropicTextBlock + description: A text content block. + AnthropicThinkingBlock: + properties: + type: + type: string + title: Type + enum: + - thinking + thinking: + type: string + title: Thinking + description: The model's thinking text. + signature: + anyOf: + - type: string + - type: 'null' + description: Signature for the thinking block. + required: + - thinking + title: AnthropicThinkingBlock + description: A thinking content block (extended thinking). + AnthropicThinkingConfig: + properties: + type: + type: string + enum: + - enabled + - disabled + - adaptive + title: Type + default: enabled + budget_tokens: + anyOf: + - type: integer + minimum: 1.0 + - type: 'null' + description: Maximum tokens for thinking. + title: AnthropicThinkingConfig + description: Configuration for extended thinking. + AnthropicToolDef: + properties: + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + input_schema: + additionalProperties: true + type: object + title: Input Schema + description: JSON Schema for the tool's input. + required: + - name + - input_schema + title: AnthropicToolDef + description: Definition of a tool available to the model. + AnthropicToolResultBlock-Input: + properties: + type: + type: string + title: Type + enum: + - tool_result + tool_use_id: + type: string + title: Tool Use Id + description: The ID of the tool_use block this result corresponds to. + content: + anyOf: + - type: string + - items: + anyOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + title: AnthropicTextBlock | AnthropicImageBlock + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock] + title: string | list[AnthropicTextBlock | AnthropicImageBlock] + description: The result content. + default: '' + is_error: + anyOf: + - type: boolean + - type: 'null' + description: Whether the tool call resulted in an error. + required: + - tool_use_id + title: AnthropicToolResultBlock + description: A tool result content block in a user message. + AnthropicToolResultBlock-Output: + properties: + type: + type: string + title: Type + enum: + - tool_result + tool_use_id: + type: string + title: Tool Use Id + description: The ID of the tool_use block this result corresponds to. + content: + anyOf: + - type: string + - items: + anyOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + title: AnthropicTextBlock | AnthropicImageBlock + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock] + title: string | list[AnthropicTextBlock | AnthropicImageBlock] + description: The result content. + default: '' + is_error: + anyOf: + - type: boolean + - type: 'null' + description: Whether the tool call resulted in an error. + required: + - tool_use_id + title: AnthropicToolResultBlock + description: A tool result content block in a user message. + AnthropicToolUseBlock: + properties: + type: + type: string + title: Type + enum: + - tool_use + id: + type: string + title: Id + description: Unique ID for this tool invocation. + name: + type: string + title: Name + description: Name of the tool being called. + input: + additionalProperties: true + type: object + title: Input + description: Tool input arguments. + required: + - id + - name + - input + title: AnthropicToolUseBlock + description: A tool use content block in an assistant message. + AnthropicUsage: + properties: + input_tokens: + type: integer + title: Input Tokens + default: 0 + output_tokens: + type: integer + title: Output Tokens + default: 0 + cache_creation_input_tokens: + anyOf: + - type: integer + - type: 'null' + cache_read_input_tokens: + anyOf: + - type: integer + - type: 'null' + title: AnthropicUsage + description: Token usage statistics. ApprovalFilter: properties: always: @@ -12911,6 +13440,7 @@ components: - prompts - conversations - connectors + - messages - inspect - admin title: Api diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 06e361debc..d903796d75 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -4357,6 +4357,71 @@ paths: description: Authorization token title: Authorization description: Authorization token + /v1/messages: + post: + responses: + '200': + description: An AnthropicMessageResponse or a stream of Anthropic SSE events. + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicMessageResponse' + text/event-stream: {} + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Messages + summary: Create a message. + description: Create a message using the Anthropic Messages API format. + operationId: create_message_v1_messages_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicCreateMessageRequest' + required: true + /v1/messages/count_tokens: + post: + responses: + '200': + description: Token count for the request. + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicCountTokensResponse' + '400': + description: Bad Request + $ref: '#/components/responses/BadRequest400' + '429': + description: Too Many Requests + $ref: '#/components/responses/TooManyRequests429' + '500': + description: Internal Server Error + $ref: '#/components/responses/InternalServerError500' + default: + description: Default Response + $ref: '#/components/responses/DefaultError' + tags: + - Messages + summary: Count tokens in a message. + description: Count the number of tokens in a message request. + operationId: count_message_tokens_v1_messages_count_tokens_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AnthropicCountTokensRequest' + required: true components: schemas: Error: @@ -11707,6 +11772,470 @@ components: - type: 'null' title: AllowedToolsFilter description: Filter configuration for restricting which MCP tools can be used. + AnthropicCountTokensRequest: + properties: + model: + type: string + title: Model + description: The model to use for token counting. + messages: + items: + $ref: '#/components/schemas/AnthropicMessage' + type: array + title: Messages + description: The messages to count tokens for. + system: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/AnthropicTextBlock' + type: array + title: list[AnthropicTextBlock] + - type: 'null' + title: string | list[AnthropicTextBlock] + description: System prompt. + tools: + anyOf: + - items: + $ref: '#/components/schemas/AnthropicToolDef' + type: array + - type: 'null' + description: Tools to include in token count. + required: + - model + - messages + title: AnthropicCountTokensRequest + description: Request body for POST /v1/messages/count_tokens. + AnthropicCountTokensResponse: + properties: + input_tokens: + type: integer + title: Input Tokens + required: + - input_tokens + title: AnthropicCountTokensResponse + description: Response from POST /v1/messages/count_tokens. + AnthropicCreateMessageRequest: + properties: + model: + type: string + title: Model + description: The model to use for generation. + messages: + items: + $ref: '#/components/schemas/AnthropicMessage' + type: array + title: Messages + description: The messages in the conversation. + max_tokens: + type: integer + minimum: 1.0 + title: Max Tokens + description: The maximum number of tokens to generate. + system: + anyOf: + - type: string + - items: + $ref: '#/components/schemas/AnthropicTextBlock' + type: array + title: list[AnthropicTextBlock] + - type: 'null' + title: string | list[AnthropicTextBlock] + description: System prompt. A string or list of text blocks. + tools: + anyOf: + - items: + $ref: '#/components/schemas/AnthropicToolDef' + type: array + - type: 'null' + description: Tools available to the model. + tool_choice: + anyOf: + - {} + - type: 'null' + title: Tool Choice + description: "How the model should select tools. One of: 'auto', 'any', 'none', or {type: 'tool', name: '...'}." + stream: + anyOf: + - type: boolean + - type: 'null' + description: Whether to stream the response. + default: false + temperature: + anyOf: + - type: number + maximum: 1.0 + minimum: 0.0 + - type: 'null' + description: Sampling temperature. + top_p: + anyOf: + - type: number + maximum: 1.0 + minimum: 0.0 + - type: 'null' + description: Nucleus sampling parameter. + top_k: + anyOf: + - type: integer + minimum: 1.0 + - type: 'null' + description: Top-k sampling parameter. + stop_sequences: + anyOf: + - items: + type: string + type: array + - type: 'null' + description: Custom stop sequences. + metadata: + anyOf: + - additionalProperties: + type: string + type: object + - type: 'null' + description: Request metadata. + thinking: + anyOf: + - $ref: '#/components/schemas/AnthropicThinkingConfig' + title: AnthropicThinkingConfig + - type: 'null' + description: Extended thinking configuration. + title: AnthropicThinkingConfig + service_tier: + anyOf: + - type: string + - type: 'null' + description: Service tier to use. + additionalProperties: true + required: + - model + - messages + - max_tokens + title: AnthropicCreateMessageRequest + description: Request body for POST /v1/messages. + AnthropicImageBlock: + properties: + type: + type: string + title: Type + enum: + - image + source: + $ref: '#/components/schemas/AnthropicImageSource' + required: + - source + title: AnthropicImageBlock + description: An image content block. + AnthropicImageSource: + properties: + type: + type: string + title: Type + enum: + - base64 + media_type: + type: string + title: Media Type + description: MIME type of the image (e.g. image/png). + data: + type: string + title: Data + description: Base64-encoded image data. + required: + - media_type + - data + title: AnthropicImageSource + description: Source for an image content block. + AnthropicMessage: + properties: + role: + type: string + enum: + - user + - assistant + title: Role + content: + anyOf: + - type: string + - items: + oneOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + - $ref: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicToolUseBlock + - $ref: '#/components/schemas/AnthropicToolResultBlock-Input' + title: AnthropicToolResultBlock-Input + - $ref: '#/components/schemas/AnthropicThinkingBlock' + title: AnthropicThinkingBlock + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/AnthropicImageBlock' + text: '#/components/schemas/AnthropicTextBlock' + thinking: '#/components/schemas/AnthropicThinkingBlock' + tool_result: '#/components/schemas/AnthropicToolResultBlock-Input' + tool_use: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicTextBlock | ... (5 variants) + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock | ...] + title: string | list[AnthropicTextBlock | AnthropicImageBlock | ...] + description: 'Message content: a string for simple text, or a list of content blocks.' + required: + - role + - content + title: AnthropicMessage + description: A message in the conversation. + AnthropicMessageResponse: + properties: + id: + type: string + title: Id + description: Unique message ID (msg_ prefix). + type: + type: string + title: Type + enum: + - message + role: + type: string + title: Role + enum: + - assistant + content: + items: + oneOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + - $ref: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicToolUseBlock + - $ref: '#/components/schemas/AnthropicToolResultBlock-Output' + title: AnthropicToolResultBlock-Output + - $ref: '#/components/schemas/AnthropicThinkingBlock' + title: AnthropicThinkingBlock + discriminator: + propertyName: type + mapping: + image: '#/components/schemas/AnthropicImageBlock' + text: '#/components/schemas/AnthropicTextBlock' + thinking: '#/components/schemas/AnthropicThinkingBlock' + tool_result: '#/components/schemas/AnthropicToolResultBlock-Output' + tool_use: '#/components/schemas/AnthropicToolUseBlock' + title: AnthropicTextBlock | ... (5 variants) + type: array + title: Content + description: Response content blocks. + model: + type: string + title: Model + stop_reason: + anyOf: + - type: string + - type: 'null' + description: 'Why the model stopped: end_turn, stop_sequence, tool_use, or max_tokens.' + stop_sequence: + anyOf: + - type: string + - type: 'null' + usage: + $ref: '#/components/schemas/AnthropicUsage' + required: + - id + - content + - model + title: AnthropicMessageResponse + description: Response from POST /v1/messages (non-streaming). + AnthropicTextBlock: + properties: + type: + type: string + title: Type + enum: + - text + text: + type: string + title: Text + required: + - text + title: AnthropicTextBlock + description: A text content block. + AnthropicThinkingBlock: + properties: + type: + type: string + title: Type + enum: + - thinking + thinking: + type: string + title: Thinking + description: The model's thinking text. + signature: + anyOf: + - type: string + - type: 'null' + description: Signature for the thinking block. + required: + - thinking + title: AnthropicThinkingBlock + description: A thinking content block (extended thinking). + AnthropicThinkingConfig: + properties: + type: + type: string + enum: + - enabled + - disabled + - adaptive + title: Type + default: enabled + budget_tokens: + anyOf: + - type: integer + minimum: 1.0 + - type: 'null' + description: Maximum tokens for thinking. + title: AnthropicThinkingConfig + description: Configuration for extended thinking. + AnthropicToolDef: + properties: + name: + type: string + title: Name + description: + anyOf: + - type: string + - type: 'null' + input_schema: + additionalProperties: true + type: object + title: Input Schema + description: JSON Schema for the tool's input. + required: + - name + - input_schema + title: AnthropicToolDef + description: Definition of a tool available to the model. + AnthropicToolResultBlock-Input: + properties: + type: + type: string + title: Type + enum: + - tool_result + tool_use_id: + type: string + title: Tool Use Id + description: The ID of the tool_use block this result corresponds to. + content: + anyOf: + - type: string + - items: + anyOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + title: AnthropicTextBlock | AnthropicImageBlock + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock] + title: string | list[AnthropicTextBlock | AnthropicImageBlock] + description: The result content. + default: '' + is_error: + anyOf: + - type: boolean + - type: 'null' + description: Whether the tool call resulted in an error. + required: + - tool_use_id + title: AnthropicToolResultBlock + description: A tool result content block in a user message. + AnthropicToolResultBlock-Output: + properties: + type: + type: string + title: Type + enum: + - tool_result + tool_use_id: + type: string + title: Tool Use Id + description: The ID of the tool_use block this result corresponds to. + content: + anyOf: + - type: string + - items: + anyOf: + - $ref: '#/components/schemas/AnthropicTextBlock' + title: AnthropicTextBlock + - $ref: '#/components/schemas/AnthropicImageBlock' + title: AnthropicImageBlock + title: AnthropicTextBlock | AnthropicImageBlock + type: array + title: list[AnthropicTextBlock | AnthropicImageBlock] + title: string | list[AnthropicTextBlock | AnthropicImageBlock] + description: The result content. + default: '' + is_error: + anyOf: + - type: boolean + - type: 'null' + description: Whether the tool call resulted in an error. + required: + - tool_use_id + title: AnthropicToolResultBlock + description: A tool result content block in a user message. + AnthropicToolUseBlock: + properties: + type: + type: string + title: Type + enum: + - tool_use + id: + type: string + title: Id + description: Unique ID for this tool invocation. + name: + type: string + title: Name + description: Name of the tool being called. + input: + additionalProperties: true + type: object + title: Input + description: Tool input arguments. + required: + - id + - name + - input + title: AnthropicToolUseBlock + description: A tool use content block in an assistant message. + AnthropicUsage: + properties: + input_tokens: + type: integer + title: Input Tokens + default: 0 + output_tokens: + type: integer + title: Output Tokens + default: 0 + cache_creation_input_tokens: + anyOf: + - type: integer + - type: 'null' + cache_read_input_tokens: + anyOf: + - type: integer + - type: 'null' + title: AnthropicUsage + description: Token usage statistics. ApprovalFilter: properties: always: @@ -13986,6 +14515,7 @@ components: - prompts - conversations - connectors + - messages - inspect - admin title: Api diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py index 69ef47942e..5b96b57730 100644 --- a/src/llama_stack/core/resolver.py +++ b/src/llama_stack/core/resolver.py @@ -41,6 +41,7 @@ Inference, InferenceProvider, Inspect, + Messages, Models, ModelsProtocolPrivate, Prompts, @@ -107,6 +108,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) -> Api.conversations: Conversations, Api.file_processors: FileProcessors, Api.connectors: Connectors, + Api.messages: Messages, } if external_apis: diff --git a/src/llama_stack/distributions/ci-tests/config.yaml b/src/llama_stack/distributions/ci-tests/config.yaml index 7bcbb6eee9..b0b87923f3 100644 --- a/src/llama_stack/distributions/ci-tests/config.yaml +++ b/src/llama_stack/distributions/ci-tests/config.yaml @@ -7,6 +7,7 @@ apis: - file_processors - files - inference +- messages - responses - safety - scoring @@ -197,6 +198,9 @@ providers: excluded_categories: [] - provider_id: code-scanner provider_type: inline::code-scanner + messages: + - provider_id: builtin + provider_type: inline::builtin responses: - provider_id: builtin provider_type: inline::builtin diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml index 553ed41118..a9a1e6e2cc 100644 --- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -7,6 +7,7 @@ apis: - file_processors - files - inference +- messages - responses - safety - scoring @@ -197,6 +198,9 @@ providers: excluded_categories: [] - provider_id: code-scanner provider_type: inline::code-scanner + messages: + - provider_id: builtin + provider_type: inline::builtin responses: - provider_id: builtin provider_type: inline::builtin diff --git a/src/llama_stack/distributions/starter/config.yaml b/src/llama_stack/distributions/starter/config.yaml index fb9cf9ae84..ed5b862130 100644 --- a/src/llama_stack/distributions/starter/config.yaml +++ b/src/llama_stack/distributions/starter/config.yaml @@ -7,6 +7,7 @@ apis: - file_processors - files - inference +- messages - responses - safety - scoring @@ -191,6 +192,9 @@ providers: excluded_categories: [] - provider_id: code-scanner provider_type: inline::code-scanner + messages: + - provider_id: builtin + provider_type: inline::builtin responses: - provider_id: builtin provider_type: inline::builtin diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index 52225576f5..c2a814bc62 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -7,6 +7,7 @@ apis: - file_processors - files - inference +- messages - responses - safety - scoring @@ -191,6 +192,9 @@ providers: excluded_categories: [] - provider_id: code-scanner provider_type: inline::code-scanner + messages: + - provider_id: builtin + provider_type: inline::builtin responses: - provider_id: builtin provider_type: inline::builtin diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index c99ce08e49..61e969c01b 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -152,6 +152,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: BuildProvider(provider_type="inline::llama-guard"), BuildProvider(provider_type="inline::code-scanner"), ], + "messages": [BuildProvider(provider_type="inline::builtin")], "responses": [BuildProvider(provider_type="inline::builtin")], "eval": [BuildProvider(provider_type="inline::builtin")], "datasetio": [ diff --git a/src/llama_stack/log.py b/src/llama_stack/log.py index 54e2afe348..7c37bd1b82 100644 --- a/src/llama_stack/log.py +++ b/src/llama_stack/log.py @@ -56,6 +56,7 @@ class LoggingConfig(BaseModel): "tests", "telemetry", "connectors", + "messages", ] UNCATEGORIZED = "uncategorized" diff --git a/src/llama_stack/providers/inline/messages/__init__.py b/src/llama_stack/providers/inline/messages/__init__.py new file mode 100644 index 0000000000..b292976c87 --- /dev/null +++ b/src/llama_stack/providers/inline/messages/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from llama_stack.core.datatypes import Api + +from .config import MessagesConfig + + +async def get_provider_impl( + config: MessagesConfig, + deps: dict[Api, Any], +): + from .impl import BuiltinMessagesImpl + + impl = BuiltinMessagesImpl(config, deps[Api.inference]) + await impl.initialize() + return impl diff --git a/src/llama_stack/providers/inline/messages/config.py b/src/llama_stack/providers/inline/messages/config.py new file mode 100644 index 0000000000..c17a040607 --- /dev/null +++ b/src/llama_stack/providers/inline/messages/config.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any + +from pydantic import BaseModel + + +class MessagesConfig(BaseModel): + """Configuration for the built-in Anthropic Messages API adapter.""" + + @classmethod + def sample_run_config(cls, __distro_dir__: str = "") -> dict[str, Any]: + return {} diff --git a/src/llama_stack/providers/inline/messages/impl.py b/src/llama_stack/providers/inline/messages/impl.py new file mode 100644 index 0000000000..990517502b --- /dev/null +++ b/src/llama_stack/providers/inline/messages/impl.py @@ -0,0 +1,566 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Built-in Anthropic Messages API implementation. + +Translates Anthropic Messages format to/from OpenAI Chat Completions format, +delegating to the inference API for actual model calls. When the underlying +inference provider natively supports the Anthropic Messages API (e.g. Ollama), +requests are forwarded directly without translation. +""" + +from __future__ import annotations + +import json +import uuid +from collections.abc import AsyncIterator +from typing import Any + +import httpx + +from llama_stack.log import get_logger +from llama_stack_api import ( + Inference, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAIChatCompletionRequestWithExtraBody, +) +from llama_stack_api.messages import ( + Messages, +) +from llama_stack_api.messages.models import ( + AnthropicContentBlock, + AnthropicCountTokensRequest, + AnthropicCountTokensResponse, + AnthropicCreateMessageRequest, + AnthropicImageBlock, + AnthropicMessage, + AnthropicMessageResponse, + AnthropicStreamEvent, + AnthropicTextBlock, + AnthropicThinkingBlock, + AnthropicToolDef, + AnthropicToolResultBlock, + AnthropicToolUseBlock, + AnthropicUsage, + ContentBlockDeltaEvent, + ContentBlockStartEvent, + ContentBlockStopEvent, + MessageDeltaEvent, + MessageStartEvent, + MessageStopEvent, + _InputJsonDelta, + _MessageDelta, + _TextDelta, + _ThinkingDelta, +) + +from .config import MessagesConfig + +logger = get_logger(name=__name__, category="messages") + +# Maps Anthropic stop_reason -> OpenAI finish_reason +_STOP_REASON_TO_FINISH = { + "end_turn": "stop", + "stop_sequence": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", +} + +# Maps OpenAI finish_reason -> Anthropic stop_reason +_FINISH_TO_STOP_REASON = { + "stop": "end_turn", + "tool_calls": "tool_use", + "length": "max_tokens", + "content_filter": "end_turn", +} + + +class BuiltinMessagesImpl(Messages): + """Anthropic Messages API adapter that translates to the inference API.""" + + def __init__(self, config: MessagesConfig, inference_api: Inference): + self.config = config + self.inference_api = inference_api + + async def initialize(self) -> None: + self._client = httpx.AsyncClient() + + async def shutdown(self) -> None: + await self._client.aclose() + + async def create_message( + self, + request: AnthropicCreateMessageRequest, + ) -> AnthropicMessageResponse | AsyncIterator[AnthropicStreamEvent]: + # Try native passthrough for providers that support /v1/messages directly + passthrough_url = await self._get_passthrough_url(request.model) + if passthrough_url: + return await self._passthrough_request(passthrough_url, request) + + openai_params = self._anthropic_to_openai(request) + + result = await self.inference_api.openai_chat_completion(openai_params) + + if isinstance(result, AsyncIterator): + return self._stream_openai_to_anthropic(result, request.model) + + return self._openai_to_anthropic(result, request.model) + + async def count_message_tokens( + self, + request: AnthropicCountTokensRequest, + ) -> AnthropicCountTokensResponse: + raise NotImplementedError("Token counting is not yet implemented") + + # -- Native passthrough for providers with /v1/messages support -- + + # Module paths of provider impls known to support /v1/messages natively + _NATIVE_MESSAGES_MODULES = {"llama_stack.providers.remote.inference.ollama"} + + async def _get_passthrough_url(self, model: str) -> str | None: + """Check if the model's provider supports /v1/messages natively. + + Returns the base URL for passthrough, or None to use translation. + """ + router = self.inference_api + if not hasattr(router, "routing_table"): + return None + + try: + obj = await router.routing_table.get_object_by_identifier("model", model) + if not obj: + return None + + provider_impl = await router.routing_table.get_provider_impl(obj.identifier) + provider_module = type(provider_impl).__module__ + is_native = any(provider_module.startswith(m) for m in self._NATIVE_MESSAGES_MODULES) + + if is_native and hasattr(provider_impl, "get_base_url"): + base_url = str(provider_impl.get_base_url()).rstrip("/") + # Ollama's /v1/messages sits at the root, not under /v1 + if base_url.endswith("/v1"): + base_url = base_url[:-3] + logger.info("Using native /v1/messages passthrough", model=model, base_url=base_url) + return base_url + except Exception: + logger.debug("Failed to resolve passthrough, falling back to translation", model=model) + + return None + + async def _passthrough_request( + self, + base_url: str, + request: AnthropicCreateMessageRequest, + ) -> AnthropicMessageResponse | AsyncIterator[AnthropicStreamEvent]: + """Forward the request directly to the provider's /v1/messages endpoint.""" + url = f"{base_url}/v1/messages" + # Use the provider_resource_id (model name without provider prefix) + provider_model = request.model + router = self.inference_api + if hasattr(router, "routing_table"): + try: + obj = await router.routing_table.get_object_by_identifier("model", request.model) + if obj: + provider_model = obj.provider_resource_id + except Exception: + pass + + body = request.model_dump(exclude_none=True) + body["model"] = provider_model + headers = { + "content-type": "application/json", + "anthropic-version": "2023-06-01", + "x-api-key": "no-key-required", + } + + if request.stream: + return self._passthrough_stream(url, headers, body) + + resp = await self._client.post(url, json=body, headers=headers, timeout=300) + resp.raise_for_status() + return AnthropicMessageResponse(**resp.json()) + + async def _passthrough_stream( + self, + url: str, + headers: dict[str, str], + body: dict[str, Any], + ) -> AsyncIterator[AnthropicStreamEvent]: + """Stream SSE events directly from the provider.""" + async with self._client.stream("POST", url, json=body, headers=headers, timeout=300) as resp: + resp.raise_for_status() + event_type = None + async for line in resp.aiter_lines(): + line = line.strip() + if line.startswith("event: "): + event_type = line[7:] + elif line.startswith("data: ") and event_type: + data = json.loads(line[6:]) + event = self._parse_sse_event(event_type, data) + if event: + yield event + event_type = None + + def _parse_sse_event(self, event_type: str, data: dict[str, Any]) -> AnthropicStreamEvent | None: + """Parse an Anthropic SSE event from its type and data.""" + if event_type == "message_start": + return MessageStartEvent(message=AnthropicMessageResponse(**data["message"])) + if event_type == "content_block_start": + block_data = data["content_block"] + content_block: AnthropicTextBlock | AnthropicToolUseBlock | AnthropicThinkingBlock + block_type = block_data.get("type") + if block_type == "tool_use": + content_block = AnthropicToolUseBlock(**block_data) + elif block_type == "thinking": + content_block = AnthropicThinkingBlock(**block_data) + else: + content_block = AnthropicTextBlock(**block_data) + return ContentBlockStartEvent(index=data["index"], content_block=content_block) + if event_type == "content_block_delta": + delta_data = data["delta"] + delta_type = delta_data.get("type") + delta: _TextDelta | _InputJsonDelta | _ThinkingDelta + if delta_type == "text_delta": + delta = _TextDelta(text=delta_data["text"]) + elif delta_type == "input_json_delta": + delta = _InputJsonDelta(partial_json=delta_data["partial_json"]) + elif delta_type == "thinking_delta": + delta = _ThinkingDelta(thinking=delta_data["thinking"]) + else: + return None + return ContentBlockDeltaEvent(index=data["index"], delta=delta) + if event_type == "content_block_stop": + return ContentBlockStopEvent(index=data["index"]) + if event_type == "message_delta": + return MessageDeltaEvent( + delta=_MessageDelta(stop_reason=data["delta"].get("stop_reason")), + usage=AnthropicUsage(**data.get("usage", {})), + ) + if event_type == "message_stop": + return MessageStopEvent() + return None + + # -- Request translation -- + + def _anthropic_to_openai(self, request: AnthropicCreateMessageRequest) -> OpenAIChatCompletionRequestWithExtraBody: + messages = self._convert_messages_to_openai(request.system, request.messages) + tools = self._convert_tools_to_openai(request.tools) if request.tools else None + tool_choice = self._convert_tool_choice_to_openai(request.tool_choice) if request.tool_choice else None + + extra_body: dict[str, Any] = {} + if request.top_k is not None: + extra_body["top_k"] = request.top_k + # Note: Anthropic's "thinking" parameter has no equivalent in the OpenAI + # chat completions API and is intentionally not forwarded. + + params = OpenAIChatCompletionRequestWithExtraBody( + model=request.model, + messages=messages, # type: ignore[arg-type] + max_tokens=request.max_tokens, + temperature=request.temperature, + top_p=request.top_p, + stop=request.stop_sequences, + tools=tools, + tool_choice=tool_choice, + stream=request.stream or False, + service_tier=request.service_tier, # type: ignore[arg-type] + **(extra_body or {}), + ) + return params + + def _convert_messages_to_openai( + self, + system: str | list[AnthropicTextBlock] | None, + messages: list[AnthropicMessage], + ) -> list[dict[str, Any]]: + openai_messages: list[dict[str, Any]] = [] + + if system is not None: + if isinstance(system, str): + system_text = system + else: + system_text = "\n".join(block.text for block in system) + openai_messages.append({"role": "system", "content": system_text}) + + for msg in messages: + openai_messages.extend(self._convert_single_message(msg)) + + return openai_messages + + def _convert_single_message(self, msg: AnthropicMessage) -> list[dict[str, Any]]: + """Convert a single Anthropic message to one or more OpenAI messages. + + A single Anthropic user message with tool_result blocks may need to be + split into multiple OpenAI messages (tool messages). + """ + if isinstance(msg.content, str): + return [{"role": msg.role, "content": msg.content}] + + if msg.role == "assistant": + return [self._convert_assistant_message(msg.content)] + + # User message: may contain text and/or tool_result blocks + result: list[dict[str, Any]] = [] + text_parts: list[dict[str, Any]] = [] + + for block in msg.content: + if isinstance(block, AnthropicToolResultBlock): + # Flush accumulated text first + if text_parts: + if len(text_parts) == 1 and text_parts[0].get("type") == "text": + flush_content: str | list[dict[str, Any]] = text_parts[0]["text"] + else: + flush_content = text_parts + result.append({"role": "user", "content": flush_content}) + text_parts = [] + # Tool results become separate tool messages + tool_content = block.content + if isinstance(tool_content, list): + tool_content = "\n".join(b.text for b in tool_content if isinstance(b, AnthropicTextBlock)) + result.append( + { + "role": "tool", + "tool_call_id": block.tool_use_id, + "content": tool_content, + } + ) + elif isinstance(block, AnthropicTextBlock): + text_parts.append({"type": "text", "text": block.text}) + elif isinstance(block, AnthropicImageBlock): + text_parts.append( + { + "type": "image_url", + "image_url": { + "url": f"data:{block.source.media_type};base64,{block.source.data}", + }, + } + ) + + if text_parts: + # OpenAI content must be a string or a list, never a single dict + if len(text_parts) == 1 and text_parts[0].get("type") == "text": + user_content: str | list[dict[str, Any]] = text_parts[0]["text"] + else: + user_content = text_parts + result.append({"role": "user", "content": user_content}) + + return result if result else [{"role": "user", "content": ""}] + + def _convert_assistant_message(self, content: list[AnthropicContentBlock]) -> dict[str, Any]: + """Convert an assistant message with content blocks to OpenAI format.""" + text_parts: list[str] = [] + tool_calls: list[dict[str, Any]] = [] + + for block in content: + if isinstance(block, AnthropicTextBlock): + text_parts.append(block.text) + elif isinstance(block, AnthropicToolUseBlock): + tool_calls.append( + { + "id": block.id, + "type": "function", + "function": { + "name": block.name, + "arguments": json.dumps(block.input), + }, + } + ) + + msg: dict[str, Any] = {"role": "assistant"} + if text_parts: + msg["content"] = "\n".join(text_parts) + if tool_calls: + msg["tool_calls"] = tool_calls + + return msg + + def _convert_tools_to_openai(self, tools: list[AnthropicToolDef]) -> list[dict[str, Any]]: + return [ + { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description or "", + "parameters": tool.input_schema, + }, + } + for tool in tools + ] + + def _convert_tool_choice_to_openai(self, tool_choice: Any) -> Any: + if isinstance(tool_choice, str): + if tool_choice == "any": + return "required" + if tool_choice == "none": + return "none" + return "auto" + + if isinstance(tool_choice, dict): + tc_type = tool_choice.get("type") + if tc_type == "tool": + return {"type": "function", "function": {"name": tool_choice["name"]}} + if tc_type == "any": + return "required" + if tc_type == "none": + return "none" + return "auto" + + return "auto" + + # -- Response translation -- + + def _openai_to_anthropic(self, response: OpenAIChatCompletion, request_model: str) -> AnthropicMessageResponse: + content: list[AnthropicContentBlock] = [] + + if response.choices: + choice = response.choices[0] + message = choice.message + + if message and message.content: + content.append(AnthropicTextBlock(text=message.content)) + + if message and message.tool_calls: + for tc in message.tool_calls: + if not hasattr(tc, "function") or tc.function is None: + continue + try: + tool_input = json.loads(tc.function.arguments) if tc.function.arguments else {} + except json.JSONDecodeError: + tool_input = {} + + content.append( + AnthropicToolUseBlock( + id=tc.id or f"toolu_{uuid.uuid4().hex[:24]}", + name=tc.function.name or "", + input=tool_input, + ) + ) + + finish_reason = choice.finish_reason or "stop" + stop_reason = _FINISH_TO_STOP_REASON.get(finish_reason, "end_turn") + else: + stop_reason = "end_turn" + + usage = AnthropicUsage() + if response.usage: + usage = AnthropicUsage( + input_tokens=response.usage.prompt_tokens or 0, + output_tokens=response.usage.completion_tokens or 0, + ) + + return AnthropicMessageResponse( + id=f"msg_{uuid.uuid4().hex[:24]}", + content=content, + model=request_model, + stop_reason=stop_reason, + usage=usage, + ) + + # -- Streaming translation -- + + async def _stream_openai_to_anthropic( + self, + openai_stream: AsyncIterator[OpenAIChatCompletionChunk], + request_model: str, + ) -> AsyncIterator[AnthropicStreamEvent]: + """Translate OpenAI streaming chunks to Anthropic streaming events.""" + + # Emit message_start + yield MessageStartEvent( + message=AnthropicMessageResponse( + id=f"msg_{uuid.uuid4().hex[:24]}", + content=[], + model=request_model, + stop_reason=None, + usage=AnthropicUsage(input_tokens=0, output_tokens=0), + ), + ) + + content_block_index = 0 + in_text_block = False + in_tool_blocks: dict[int, bool] = {} # tool_call_index -> started + tool_call_index_to_block_index: dict[int, int] = {} + output_tokens = 0 + input_tokens = 0 + stop_reason = "end_turn" + + async for chunk in openai_stream: + if not chunk.choices: + # Usage-only chunk + if chunk.usage: + input_tokens = chunk.usage.prompt_tokens or 0 + output_tokens = chunk.usage.completion_tokens or 0 + continue + + choice = chunk.choices[0] + delta = choice.delta + + if delta and delta.content: + if not in_text_block: + yield ContentBlockStartEvent( + index=content_block_index, + content_block=AnthropicTextBlock(text=""), + ) + in_text_block = True + + yield ContentBlockDeltaEvent( + index=content_block_index, + delta=_TextDelta(text=delta.content), + ) + + if delta and delta.tool_calls: + for tc_delta in delta.tool_calls: + tc_idx = tc_delta.index if tc_delta.index is not None else 0 + + if tc_idx not in in_tool_blocks: + # Close text block if open + if in_text_block: + yield ContentBlockStopEvent(index=content_block_index) + content_block_index += 1 + in_text_block = False + + # Start new tool_use block + in_tool_blocks[tc_idx] = True + tool_call_index_to_block_index[tc_idx] = content_block_index + + yield ContentBlockStartEvent( + index=content_block_index, + content_block=AnthropicToolUseBlock( + id=tc_delta.id or f"toolu_{uuid.uuid4().hex[:24]}", + name=tc_delta.function.name if tc_delta.function and tc_delta.function.name else "", + input={}, + ), + ) + content_block_index += 1 + + if tc_delta.function and tc_delta.function.arguments: + block_idx = tool_call_index_to_block_index[tc_idx] + yield ContentBlockDeltaEvent( + index=block_idx, + delta=_InputJsonDelta(partial_json=tc_delta.function.arguments), + ) + + if choice.finish_reason: + stop_reason = _FINISH_TO_STOP_REASON.get(choice.finish_reason, "end_turn") + + if chunk.usage: + input_tokens = chunk.usage.prompt_tokens or 0 + output_tokens = chunk.usage.completion_tokens or 0 + + # Close any open blocks + if in_text_block: + yield ContentBlockStopEvent(index=content_block_index) + + for _tc_idx, block_idx in tool_call_index_to_block_index.items(): + yield ContentBlockStopEvent(index=block_idx) + + # Final events + yield MessageDeltaEvent( + delta=_MessageDelta(stop_reason=stop_reason), + usage=AnthropicUsage(input_tokens=input_tokens, output_tokens=output_tokens), + ) + yield MessageStopEvent() diff --git a/src/llama_stack/providers/registry/messages.py b/src/llama_stack/providers/registry/messages.py new file mode 100644 index 0000000000..0c88a6c219 --- /dev/null +++ b/src/llama_stack/providers/registry/messages.py @@ -0,0 +1,29 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from llama_stack_api import ( + Api, + InlineProviderSpec, + ProviderSpec, +) + + +def available_providers() -> list[ProviderSpec]: + """Return the list of available messages provider specifications.""" + return [ + InlineProviderSpec( + api=Api.messages, + provider_type="inline::builtin", + pip_packages=[], + module="llama_stack.providers.inline.messages", + config_class="llama_stack.providers.inline.messages.config.MessagesConfig", + api_dependencies=[ + Api.inference, + ], + description="Anthropic Messages API adapter that translates to the inference API.", + ), + ] diff --git a/src/llama_stack/testing/api_recorder.py b/src/llama_stack/testing/api_recorder.py index aa28fc395c..0499d53039 100644 --- a/src/llama_stack/testing/api_recorder.py +++ b/src/llama_stack/testing/api_recorder.py @@ -881,6 +881,177 @@ async def __aexit__(self, exc_type, exc_val, exc_tb): raise AssertionError(f"Invalid mode: {_current_mode}") +async def _patched_httpx_async_post(original_post, self, url, **kwargs): + """Patched version of httpx.AsyncClient.post for recording/replay of Messages API passthrough. + + Intercepts requests to /v1/messages endpoints so the native Ollama passthrough + path can be recorded and replayed without a live backend. + """ + global _current_mode, _current_storage + + url_str = str(url) + is_messages = "/v1/messages" in url_str + + if not is_messages or _current_mode == APIRecordingMode.LIVE or _current_storage is None: + return await original_post(self, url, **kwargs) + + json_payload = kwargs.get("json", {}) + request_hash = normalize_http_request(url_str, "POST", json_payload) + + if _current_mode in (APIRecordingMode.REPLAY, APIRecordingMode.RECORD_IF_MISSING): + recording = _current_storage.find_recording(request_hash) + if recording: + import httpx as _httpx + + body_bytes = json.dumps(recording["response"]["body"]).encode() + # Create a minimal request so raise_for_status() works on the mock response + mock_request = _httpx.Request("POST", url_str) + mock_response = _httpx.Response( + status_code=recording["response"].get("status", 200), + headers={"content-type": "application/json", "anthropic-version": "2023-06-01"}, + content=body_bytes, + request=mock_request, + ) + return mock_response + elif _current_mode == APIRecordingMode.REPLAY: + raise RuntimeError( + f"Recording not found for httpx POST {url_str}\n" + f"\n" + f"Run './scripts/integration-tests.sh --inference-mode record-if-missing' with required API keys to generate." + ) + + if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING): + response = await original_post(self, url, **kwargs) + + request_data = { + "test_id": get_test_context(), + "url": url_str, + "method": "POST", + "payload": json_payload, + } + response_data = { + "status": response.status_code, + "body": response.json(), + "is_streaming": False, + } + _current_storage.store_recording(request_hash, request_data, response_data) + return response + + raise AssertionError(f"Invalid mode: {_current_mode}") + + +def _patched_httpx_async_stream(original_stream, self, method, url, **kwargs): + """Patched version of httpx.AsyncClient.stream for recording/replay of streaming Messages API passthrough. + + Intercepts streaming requests to /v1/messages endpoints. Returns an async context manager + that either replays recorded SSE events or records live ones. + """ + global _current_mode, _current_storage + + url_str = str(url) + is_messages = "/v1/messages" in url_str + + if not is_messages or _current_mode == APIRecordingMode.LIVE or _current_storage is None: + return original_stream(self, method, url, **kwargs) + + json_payload = kwargs.get("json", {}) + request_hash = normalize_http_request(url_str, "POST", json_payload) + + class _ReplayStreamContext: + """Async context manager that replays recorded SSE events as a mock httpx response.""" + + def __init__(self, sse_lines: list[str]): + self._sse_lines = sse_lines + + async def __aenter__(self): + import httpx as _httpx + + class _MockStreamResponse: + def __init__(self, lines): + self.status_code = 200 + self.headers = _httpx.Headers( + {"content-type": "text/event-stream", "anthropic-version": "2023-06-01"} + ) + self._lines = lines + + def raise_for_status(self): + pass + + async def aiter_lines(self): + for line in self._lines: + yield line + + return _MockStreamResponse(self._sse_lines) + + async def __aexit__(self, *args): + pass + + # _RecordStreamContext is unused but kept for reference; actual recording uses _RecordCtx below + + class _RecordingStreamResponse: + """Wraps a real httpx streaming response to capture SSE lines for recording.""" + + def __init__(self, response, url_str, json_payload, request_hash): + self._response = response + self._url = url_str + self._payload = json_payload + self._hash = request_hash + self._recorded_lines: list[str] = [] + self.status_code = response.status_code + self.headers = response.headers + + def raise_for_status(self): + self._response.raise_for_status() + + async def aiter_lines(self): + async for line in self._response.aiter_lines(): + self._recorded_lines.append(line) + yield line + + # After the stream is exhausted, store the recording + request_data = { + "test_id": get_test_context(), + "url": self._url, + "method": "POST", + "payload": self._payload, + } + response_data = { + "body": self._recorded_lines, + "is_streaming": True, + } + if _current_storage: + _current_storage.store_recording(self._hash, request_data, response_data) + + if _current_mode in (APIRecordingMode.REPLAY, APIRecordingMode.RECORD_IF_MISSING): + recording = _current_storage.find_recording(request_hash) + if recording: + return _ReplayStreamContext(recording["response"]["body"]) + elif _current_mode == APIRecordingMode.REPLAY: + raise RuntimeError( + f"Recording not found for httpx stream POST {url_str}\n" + f"\n" + f"Run './scripts/integration-tests.sh --inference-mode record-if-missing' with required API keys to generate." + ) + + if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING): + # Capture the httpx client instance before defining the inner class + httpx_client = self + + class _RecordCtx: + async def __aenter__(self): + self._cm = original_stream(httpx_client, method, url, **kwargs) + resp = await self._cm.__aenter__() + self._wrapper = _RecordingStreamResponse(resp, url_str, json_payload, request_hash) + return self._wrapper + + async def __aexit__(self, *args): + return await self._cm.__aexit__(*args) + + return _RecordCtx() + + raise AssertionError(f"Invalid mode: {_current_mode}") + + _cached_provider_metadata: dict[str, dict[str, str]] = {} @@ -1118,6 +1289,7 @@ def patch_inference_clients(): global _original_methods import aiohttp + import httpx from ollama import AsyncClient as OllamaAsyncClient from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions from openai.resources.completions import AsyncCompletions @@ -1128,7 +1300,7 @@ def patch_inference_clients(): from llama_stack.providers.inline.file_processor.pypdf.adapter import PyPDFFileProcessorAdapter from llama_stack.providers.remote.tool_runtime.tavily_search.tavily_search import TavilySearchToolRuntimeImpl - # Store original methods for OpenAI, Ollama clients, tool runtimes, file processors, and aiohttp + # Store original methods for OpenAI, Ollama clients, tool runtimes, file processors, aiohttp, and httpx _original_methods = { "chat_completions_create": AsyncChatCompletions.create, "completions_create": AsyncCompletions.create, @@ -1144,6 +1316,8 @@ def patch_inference_clients(): "tavily_invoke_tool": TavilySearchToolRuntimeImpl.invoke_tool, "pypdf_process_file": PyPDFFileProcessorAdapter.process_file, "aiohttp_post": aiohttp.ClientSession.post, + "httpx_async_post": httpx.AsyncClient.post, + "httpx_async_stream": httpx.AsyncClient.stream, } # Create patched methods for OpenAI client @@ -1249,6 +1423,17 @@ def patched_aiohttp_session_post(self, url, **kwargs): # Apply aiohttp patch aiohttp.ClientSession.post = patched_aiohttp_session_post + # Create patched methods for httpx AsyncClient (Messages API passthrough) + async def patched_httpx_async_post(self, url, **kwargs): + return await _patched_httpx_async_post(_original_methods["httpx_async_post"], self, url, **kwargs) + + def patched_httpx_async_stream(self, method, url, **kwargs): + return _patched_httpx_async_stream(_original_methods["httpx_async_stream"], self, method, url, **kwargs) + + # Apply httpx patches + httpx.AsyncClient.post = patched_httpx_async_post + httpx.AsyncClient.stream = patched_httpx_async_stream + def unpatch_inference_clients(): """Remove monkey patches and restore original OpenAI, Ollama client, tool runtime, and aiohttp methods.""" @@ -1259,6 +1444,7 @@ def unpatch_inference_clients(): # Import here to avoid circular imports import aiohttp + import httpx from ollama import AsyncClient as OllamaAsyncClient from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions from openai.resources.completions import AsyncCompletions @@ -1293,6 +1479,10 @@ def unpatch_inference_clients(): # Restore aiohttp method aiohttp.ClientSession.post = _original_methods["aiohttp_post"] + # Restore httpx methods + httpx.AsyncClient.post = _original_methods["httpx_async_post"] + httpx.AsyncClient.stream = _original_methods["httpx_async_stream"] + _original_methods.clear() diff --git a/src/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py index 90c209598f..04d814dd9b 100644 --- a/src/llama_stack_api/__init__.py +++ b/src/llama_stack_api/__init__.py @@ -325,6 +325,25 @@ UserMessage, ) from .inspect_api import Inspect +from .messages import ( + Messages, + AnthropicContentBlock, + AnthropicCountTokensRequest, + AnthropicCountTokensResponse, + AnthropicCreateMessageRequest, + AnthropicErrorResponse, + AnthropicImageBlock, + AnthropicImageSource, + AnthropicMessage, + AnthropicMessageResponse, + AnthropicTextBlock, + AnthropicThinkingBlock, + AnthropicThinkingConfig, + AnthropicToolDef, + AnthropicToolResultBlock, + AnthropicToolUseBlock, + AnthropicUsage, +) from .models import ( CommonModelFields, GetModelRequest, @@ -1122,6 +1141,24 @@ "ViolationLevel", "WebSearchToolTypes", "WeightedRanker", + # Messages API + "Messages", + "AnthropicContentBlock", + "AnthropicCountTokensRequest", + "AnthropicCountTokensResponse", + "AnthropicCreateMessageRequest", + "AnthropicErrorResponse", + "AnthropicImageBlock", + "AnthropicImageSource", + "AnthropicMessage", + "AnthropicMessageResponse", + "AnthropicTextBlock", + "AnthropicThinkingBlock", + "AnthropicThinkingConfig", + "AnthropicToolDef", + "AnthropicToolResultBlock", + "AnthropicToolUseBlock", + "AnthropicUsage", # Validators "validate_embeddings_input_is_text", # helpers diff --git a/src/llama_stack_api/datatypes.py b/src/llama_stack_api/datatypes.py index 95b3a0983c..900529bac2 100644 --- a/src/llama_stack_api/datatypes.py +++ b/src/llama_stack_api/datatypes.py @@ -115,6 +115,7 @@ class Api(Enum, metaclass=DynamicApiMeta): :cvar file_processors: File parsing and processing operations :cvar prompts: Prompt versions and management :cvar connectors: External connector management (e.g., MCP servers) + :cvar messages: Anthropic Messages API compatibility layer :cvar inspect: Built-in system inspection and introspection """ @@ -141,6 +142,7 @@ class Api(Enum, metaclass=DynamicApiMeta): prompts = "prompts" conversations = "conversations" connectors = "connectors" + messages = "messages" # built-in API inspect = "inspect" diff --git a/src/llama_stack_api/messages/__init__.py b/src/llama_stack_api/messages/__init__.py new file mode 100644 index 0000000000..e6ffc09f53 --- /dev/null +++ b/src/llama_stack_api/messages/__init__.py @@ -0,0 +1,66 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Messages API protocol and models. + +This module contains the Messages protocol definition for the Anthropic Messages API. +Pydantic models are defined in llama_stack_api.messages.models. +The FastAPI router is defined in llama_stack_api.messages.fastapi_routes. +""" + +from . import fastapi_routes +from .api import Messages +from .models import ( + AnthropicContentBlock, + AnthropicCountTokensRequest, + AnthropicCountTokensResponse, + AnthropicCreateMessageRequest, + AnthropicErrorResponse, + AnthropicImageBlock, + AnthropicImageSource, + AnthropicMessage, + AnthropicMessageResponse, + AnthropicTextBlock, + AnthropicThinkingBlock, + AnthropicThinkingConfig, + AnthropicToolDef, + AnthropicToolResultBlock, + AnthropicToolUseBlock, + AnthropicUsage, + ContentBlockDeltaEvent, + ContentBlockStartEvent, + ContentBlockStopEvent, + MessageDeltaEvent, + MessageStartEvent, + MessageStopEvent, +) + +__all__ = [ + "Messages", + "AnthropicContentBlock", + "AnthropicCountTokensRequest", + "AnthropicCountTokensResponse", + "AnthropicCreateMessageRequest", + "AnthropicErrorResponse", + "AnthropicImageBlock", + "AnthropicImageSource", + "AnthropicMessage", + "AnthropicMessageResponse", + "AnthropicTextBlock", + "AnthropicThinkingBlock", + "AnthropicThinkingConfig", + "AnthropicToolDef", + "AnthropicToolResultBlock", + "AnthropicToolUseBlock", + "AnthropicUsage", + "ContentBlockDeltaEvent", + "ContentBlockStartEvent", + "ContentBlockStopEvent", + "MessageDeltaEvent", + "MessageStartEvent", + "MessageStopEvent", + "fastapi_routes", +] diff --git a/src/llama_stack_api/messages/api.py b/src/llama_stack_api/messages/api.py new file mode 100644 index 0000000000..3b42e684c6 --- /dev/null +++ b/src/llama_stack_api/messages/api.py @@ -0,0 +1,31 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import AsyncIterator +from typing import Protocol, runtime_checkable + +from .models import ( + AnthropicCountTokensRequest, + AnthropicCountTokensResponse, + AnthropicCreateMessageRequest, + AnthropicMessageResponse, + AnthropicStreamEvent, +) + + +@runtime_checkable +class Messages(Protocol): + """Protocol for the Anthropic Messages API.""" + + async def create_message( + self, + request: AnthropicCreateMessageRequest, + ) -> AnthropicMessageResponse | AsyncIterator[AnthropicStreamEvent]: ... + + async def count_message_tokens( + self, + request: AnthropicCountTokensRequest, + ) -> AnthropicCountTokensResponse: ... diff --git a/src/llama_stack_api/messages/fastapi_routes.py b/src/llama_stack_api/messages/fastapi_routes.py new file mode 100644 index 0000000000..e82dc21602 --- /dev/null +++ b/src/llama_stack_api/messages/fastapi_routes.py @@ -0,0 +1,202 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""FastAPI router for the Anthropic Messages API. + +This module defines the FastAPI router for the /v1/messages endpoint, +serving the Anthropic Messages API format. +""" + +import asyncio +import contextvars +import json +import logging # allow-direct-logging +from collections.abc import AsyncIterator +from typing import Annotated, Any + +from fastapi import APIRouter, Body, HTTPException, Request, Response +from fastapi.responses import JSONResponse, StreamingResponse +from pydantic import BaseModel + +from llama_stack_api.common.errors import ModelNotFoundError +from llama_stack_api.router_utils import standard_responses +from llama_stack_api.version import LLAMA_STACK_API_V1 + +from .api import Messages +from .models import ( + AnthropicCountTokensRequest, + AnthropicCountTokensResponse, + AnthropicCreateMessageRequest, + AnthropicErrorResponse, + AnthropicMessageResponse, + _AnthropicErrorDetail, +) + +logger = logging.LoggerAdapter(logging.getLogger(__name__), {"category": "messages"}) + +# Anthropic API version we are compatible with +_ANTHROPIC_VERSION = "2023-06-01" + + +def _create_anthropic_sse_event(event_type: str, data: Any) -> str: + """Create an Anthropic-format SSE event with named event type. + + Anthropic SSE format: event: \ndata: \n\n + """ + if isinstance(data, BaseModel): + data = data.model_dump_json() + else: + data = json.dumps(data) + return f"event: {event_type}\ndata: {data}\n\n" + + +async def _anthropic_sse_generator(event_gen: AsyncIterator) -> AsyncIterator[str]: + """Convert an async generator of Anthropic stream events to SSE format.""" + try: + async for event in event_gen: + event_type = event.type if hasattr(event, "type") else "unknown" + yield _create_anthropic_sse_event(event_type, event) + except asyncio.CancelledError: + if hasattr(event_gen, "aclose"): + await event_gen.aclose() + raise + except Exception as e: + logger.exception("Error in Anthropic SSE generator") + error_resp = AnthropicErrorResponse( + error=_AnthropicErrorDetail(type="api_error", message=str(e)), + ) + yield _create_anthropic_sse_event("error", error_resp) + + +def _preserve_context_for_sse(event_gen): + """Preserve request context for SSE streaming. + + StreamingResponse runs in a different task, losing request contextvars. + This wrapper captures and restores the context. + """ + context = contextvars.copy_context() + + async def wrapper(): + try: + while True: + try: + task = context.run(asyncio.create_task, event_gen.__anext__()) + item = await task + except StopAsyncIteration: + break + yield item + except (asyncio.CancelledError, GeneratorExit): + if hasattr(event_gen, "aclose"): + await event_gen.aclose() + raise + + return wrapper() + + +def _anthropic_error_response(status_code: int, message: str) -> JSONResponse: + """Create an Anthropic-format error JSONResponse.""" + error_type_map = { + 400: "invalid_request_error", + 401: "authentication_error", + 403: "permission_error", + 404: "not_found_error", + 429: "rate_limit_error", + } + error_type = error_type_map.get(status_code, "api_error") + body = AnthropicErrorResponse( + error=_AnthropicErrorDetail(type=error_type, message=message), + ) + return JSONResponse(status_code=status_code, content=body.model_dump()) + + +def create_router(impl: Messages) -> APIRouter: + """Create a FastAPI router for the Anthropic Messages API. + + Args: + impl: The Messages implementation instance + + Returns: + APIRouter configured for the Messages API + """ + router = APIRouter( + prefix=f"/{LLAMA_STACK_API_V1}", + tags=["Messages"], + responses=standard_responses, + ) + + @router.post( + "/messages", + summary="Create a message.", + description="Create a message using the Anthropic Messages API format.", + status_code=200, + response_model=AnthropicMessageResponse, + responses={ + 200: { + "description": "An AnthropicMessageResponse or a stream of Anthropic SSE events.", + "content": { + "text/event-stream": {}, + }, + }, + }, + ) + async def create_message( + raw_request: Request, + params: Annotated[AnthropicCreateMessageRequest, Body(...)], + ) -> Response: + try: + result = await impl.create_message(params) + except NotImplementedError as e: + return _anthropic_error_response(501, str(e)) + except ModelNotFoundError as e: + return _anthropic_error_response(404, str(e)) + except ValueError as e: + return _anthropic_error_response(400, str(e)) + except HTTPException as e: + return _anthropic_error_response(e.status_code, e.detail) + except Exception: + logger.exception("Failed to create message") + return _anthropic_error_response(500, "Internal server error") + + response_headers = {"anthropic-version": _ANTHROPIC_VERSION} + + if isinstance(result, AsyncIterator): + return StreamingResponse( + _preserve_context_for_sse(_anthropic_sse_generator(result)), + media_type="text/event-stream", + headers=response_headers, + ) + + return JSONResponse( + content=result.model_dump(exclude_none=True), + headers=response_headers, + ) + + @router.post( + "/messages/count_tokens", + response_model=AnthropicCountTokensResponse, + summary="Count tokens in a message.", + description="Count the number of tokens in a message request.", + responses={ + 200: {"description": "Token count for the request."}, + }, + ) + async def count_message_tokens( + params: Annotated[AnthropicCountTokensRequest, Body(...)], + ) -> Response: + try: + result = await impl.count_message_tokens(params) + except NotImplementedError as e: + return _anthropic_error_response(501, str(e)) + except Exception: + logger.exception("Failed to count message tokens") + return _anthropic_error_response(500, "Internal server error") + + return JSONResponse( + content=result.model_dump(), + headers={"anthropic-version": _ANTHROPIC_VERSION}, + ) + + return router diff --git a/src/llama_stack_api/messages/models.py b/src/llama_stack_api/messages/models.py new file mode 100644 index 0000000000..bd39fa4bbf --- /dev/null +++ b/src/llama_stack_api/messages/models.py @@ -0,0 +1,278 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Pydantic models for the Anthropic Messages API. + +These models define the request and response shapes for the /v1/messages endpoint, +following the Anthropic Messages API specification. +""" + +from __future__ import annotations + +from typing import Annotated, Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + +# -- Content blocks -- + + +class AnthropicTextBlock(BaseModel): + """A text content block.""" + + type: Literal["text"] = "text" + text: str + + +class AnthropicImageSource(BaseModel): + """Source for an image content block.""" + + type: Literal["base64"] = "base64" + media_type: str = Field(..., description="MIME type of the image (e.g. image/png).") + data: str = Field(..., description="Base64-encoded image data.") + + +class AnthropicImageBlock(BaseModel): + """An image content block.""" + + type: Literal["image"] = "image" + source: AnthropicImageSource + + +class AnthropicToolUseBlock(BaseModel): + """A tool use content block in an assistant message.""" + + type: Literal["tool_use"] = "tool_use" + id: str = Field(..., description="Unique ID for this tool invocation.") + name: str = Field(..., description="Name of the tool being called.") + input: dict[str, Any] = Field(..., description="Tool input arguments.") + + +class AnthropicToolResultBlock(BaseModel): + """A tool result content block in a user message.""" + + type: Literal["tool_result"] = "tool_result" + tool_use_id: str = Field(..., description="The ID of the tool_use block this result corresponds to.") + content: str | list[AnthropicTextBlock | AnthropicImageBlock] = Field( + default="", + description="The result content.", + ) + is_error: bool | None = Field(default=None, description="Whether the tool call resulted in an error.") + + +class AnthropicThinkingBlock(BaseModel): + """A thinking content block (extended thinking).""" + + type: Literal["thinking"] = "thinking" + thinking: str = Field(..., description="The model's thinking text.") + signature: str | None = Field(default=None, description="Signature for the thinking block.") + + +AnthropicContentBlock = Annotated[ + AnthropicTextBlock + | AnthropicImageBlock + | AnthropicToolUseBlock + | AnthropicToolResultBlock + | AnthropicThinkingBlock, + Field(discriminator="type"), +] + +# -- Messages -- + + +class AnthropicMessage(BaseModel): + """A message in the conversation.""" + + role: Literal["user", "assistant"] + content: str | list[AnthropicContentBlock] = Field( + ..., + description="Message content: a string for simple text, or a list of content blocks.", + ) + + +# -- Tool definitions -- + + +class AnthropicToolDef(BaseModel): + """Definition of a tool available to the model.""" + + name: str + description: str | None = None + input_schema: dict[str, Any] = Field(..., description="JSON Schema for the tool's input.") + + +# -- Thinking config -- + + +class AnthropicThinkingConfig(BaseModel): + """Configuration for extended thinking.""" + + type: Literal["enabled", "disabled", "adaptive"] = "enabled" + budget_tokens: int | None = Field(default=None, ge=1, description="Maximum tokens for thinking.") + + +# -- Request models -- + + +class AnthropicCreateMessageRequest(BaseModel): + """Request body for POST /v1/messages.""" + + model_config = ConfigDict(extra="allow") + + model: str = Field(..., description="The model to use for generation.") + messages: list[AnthropicMessage] = Field(..., description="The messages in the conversation.") + max_tokens: int = Field(..., ge=1, description="The maximum number of tokens to generate.") + system: str | list[AnthropicTextBlock] | None = Field( + default=None, + description="System prompt. A string or list of text blocks.", + ) + tools: list[AnthropicToolDef] | None = Field(default=None, description="Tools available to the model.") + tool_choice: Any | None = Field( + default=None, + description="How the model should select tools. One of: 'auto', 'any', 'none', or {type: 'tool', name: '...'}.", + ) + stream: bool | None = Field(default=False, description="Whether to stream the response.") + temperature: float | None = Field(default=None, ge=0.0, le=1.0, description="Sampling temperature.") + top_p: float | None = Field(default=None, ge=0.0, le=1.0, description="Nucleus sampling parameter.") + top_k: int | None = Field(default=None, ge=1, description="Top-k sampling parameter.") + stop_sequences: list[str] | None = Field(default=None, description="Custom stop sequences.") + metadata: dict[str, str] | None = Field(default=None, description="Request metadata.") + thinking: AnthropicThinkingConfig | None = Field(default=None, description="Extended thinking configuration.") + service_tier: str | None = Field(default=None, description="Service tier to use.") + + +class AnthropicCountTokensRequest(BaseModel): + """Request body for POST /v1/messages/count_tokens.""" + + model: str = Field(..., description="The model to use for token counting.") + messages: list[AnthropicMessage] = Field(..., description="The messages to count tokens for.") + system: str | list[AnthropicTextBlock] | None = Field(default=None, description="System prompt.") + tools: list[AnthropicToolDef] | None = Field(default=None, description="Tools to include in token count.") + + +# -- Response models -- + + +class AnthropicUsage(BaseModel): + """Token usage statistics.""" + + input_tokens: int = 0 + output_tokens: int = 0 + cache_creation_input_tokens: int | None = None + cache_read_input_tokens: int | None = None + + +class AnthropicMessageResponse(BaseModel): + """Response from POST /v1/messages (non-streaming).""" + + id: str = Field(..., description="Unique message ID (msg_ prefix).") + type: Literal["message"] = "message" + role: Literal["assistant"] = "assistant" + content: list[AnthropicContentBlock] = Field(..., description="Response content blocks.") + model: str + stop_reason: str | None = Field( + default=None, + description="Why the model stopped: end_turn, stop_sequence, tool_use, or max_tokens.", + ) + stop_sequence: str | None = None + usage: AnthropicUsage = Field(default_factory=AnthropicUsage) + + +class AnthropicCountTokensResponse(BaseModel): + """Response from POST /v1/messages/count_tokens.""" + + input_tokens: int + + +# -- Streaming event models -- + + +class MessageStartEvent(BaseModel): + """First event in a streaming response.""" + + type: Literal["message_start"] = "message_start" + message: AnthropicMessageResponse + + +class ContentBlockStartEvent(BaseModel): + """Signals the start of a new content block.""" + + type: Literal["content_block_start"] = "content_block_start" + index: int + content_block: AnthropicContentBlock + + +class _TextDelta(BaseModel): + type: Literal["text_delta"] = "text_delta" + text: str + + +class _InputJsonDelta(BaseModel): + type: Literal["input_json_delta"] = "input_json_delta" + partial_json: str + + +class _ThinkingDelta(BaseModel): + type: Literal["thinking_delta"] = "thinking_delta" + thinking: str + + +class ContentBlockDeltaEvent(BaseModel): + """A delta within a content block.""" + + type: Literal["content_block_delta"] = "content_block_delta" + index: int + delta: _TextDelta | _InputJsonDelta | _ThinkingDelta + + +class ContentBlockStopEvent(BaseModel): + """Signals the end of a content block.""" + + type: Literal["content_block_stop"] = "content_block_stop" + index: int + + +class _MessageDelta(BaseModel): + stop_reason: str | None = None + stop_sequence: str | None = None + + +class MessageDeltaEvent(BaseModel): + """Final metadata update before the message ends.""" + + type: Literal["message_delta"] = "message_delta" + delta: _MessageDelta + usage: AnthropicUsage | None = None + + +class MessageStopEvent(BaseModel): + """Final event in a streaming response.""" + + type: Literal["message_stop"] = "message_stop" + + +AnthropicStreamEvent = ( + MessageStartEvent + | ContentBlockStartEvent + | ContentBlockDeltaEvent + | ContentBlockStopEvent + | MessageDeltaEvent + | MessageStopEvent +) + + +# -- Error response -- + + +class _AnthropicErrorDetail(BaseModel): + type: str + message: str + + +class AnthropicErrorResponse(BaseModel): + """Anthropic-format error response.""" + + type: Literal["error"] = "error" + error: _AnthropicErrorDetail diff --git a/src/llama_stack_api/pyproject.toml b/src/llama_stack_api/pyproject.toml index c2232f5a7f..c8e2f40b35 100644 --- a/src/llama_stack_api/pyproject.toml +++ b/src/llama_stack_api/pyproject.toml @@ -57,6 +57,7 @@ packages = [ "llama_stack_api.inspect_api", "llama_stack_api.inference", "llama_stack_api.internal", + "llama_stack_api.messages", "llama_stack_api.models", "llama_stack_api.providers", diff --git a/tests/integration/ci_matrix.json b/tests/integration/ci_matrix.json index f0a6ab53d6..ff57e4a9dc 100644 --- a/tests/integration/ci_matrix.json +++ b/tests/integration/ci_matrix.json @@ -11,7 +11,8 @@ {"suite": "bedrock-responses", "setup": "bedrock"}, {"suite": "base-vllm-subset", "setup": "vllm"}, {"suite": "vllm-reasoning", "setup": "vllm"}, - {"suite": "ollama-reasoning", "setup": "ollama-reasoning"} + {"suite": "ollama-reasoning", "setup": "ollama-reasoning"}, + {"suite": "messages", "setup": "ollama-reasoning"} ], "stainless": [ {"suite": "base", "setup": "ollama", "inference_mode": "record-if-missing"} diff --git a/tests/integration/common/recordings/cf0be7f9e2ebfc78903aa4ada30204585952f77bc5acb3ff7702bd7878d44b44.json b/tests/integration/common/recordings/cf0be7f9e2ebfc78903aa4ada30204585952f77bc5acb3ff7702bd7878d44b44.json new file mode 100644 index 0000000000..cfd287568c --- /dev/null +++ b/tests/integration/common/recordings/cf0be7f9e2ebfc78903aa4ada30204585952f77bc5acb3ff7702bd7878d44b44.json @@ -0,0 +1,27 @@ +{ + "test_id": null, + "request": { + "test_id": null, + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [], + "max_tokens": 64, + "stream": false + } + }, + "response": { + "status": 400, + "body": { + "type": "error", + "error": { + "type": "invalid_request_error", + "message": "messages is required" + }, + "request_id": "req_b662960dd608e745f612e3a2" + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/__init__.py b/tests/integration/messages/__init__.py new file mode 100644 index 0000000000..756f351d88 --- /dev/null +++ b/tests/integration/messages/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/integration/messages/conftest.py b/tests/integration/messages/conftest.py new file mode 100644 index 0000000000..fae505dea4 --- /dev/null +++ b/tests/integration/messages/conftest.py @@ -0,0 +1,124 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +import os +from typing import Any + +import httpx +import pytest + +from llama_stack.core.library_client import LlamaStackAsLibraryClient +from llama_stack.core.testing_context import get_test_context + +# Import fixtures from common module to make them available in this test directory +from tests.integration.fixtures.common import ( # noqa: F401 + openai_client, + require_server, +) + + +def pytest_configure(config): + """Disable stderr pipe to prevent Rich logging from blocking on buffer saturation.""" + os.environ["LLAMA_STACK_TEST_LOG_STDERR"] = "0" + + +@pytest.fixture(scope="session") +def messages_base_url(llama_stack_client): + """Provide the base URL for the Messages API, skipping library client mode.""" + if isinstance(llama_stack_client, LlamaStackAsLibraryClient): + pytest.skip("Messages API tests are not supported in library client mode") + return llama_stack_client.base_url + + +@pytest.fixture +def messages_client(messages_base_url): + """Provide an httpx client configured for Anthropic Messages API calls.""" + client = httpx.Client(base_url=messages_base_url, timeout=60.0) + yield client + client.close() + + +def _build_messages_body( + *, + model: str, + messages: list[dict], + max_tokens: int = 256, + stream: bool = False, + system: str | None = None, + tools: list[dict] | None = None, + tool_choice: dict | str | None = None, + temperature: float | None = None, + stop_sequences: list[str] | None = None, +) -> dict[str, Any]: + body: dict[str, Any] = { + "model": model, + "messages": messages, + "max_tokens": max_tokens, + "stream": stream, + } + if system is not None: + body["system"] = system + if tools is not None: + body["tools"] = tools + if tool_choice is not None: + body["tool_choice"] = tool_choice + if temperature is not None: + body["temperature"] = temperature + if stop_sequences is not None: + body["stop_sequences"] = stop_sequences + return body + + +def _build_headers() -> dict[str, str]: + headers = { + "content-type": "application/json", + "anthropic-version": "2023-06-01", + } + test_id = get_test_context() + if test_id: + provider_data = {"__test_id": test_id} + headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data) + return headers + + +def make_messages_request( + client: httpx.Client, + **kwargs: Any, +) -> httpx.Response: + """Make a non-streaming POST request to /v1/messages.""" + body = _build_messages_body(**kwargs) + return client.post("/v1/messages", headers=_build_headers(), json=body) + + +def make_streaming_messages_request( + client: httpx.Client, + **kwargs: Any, +) -> list[dict]: + """Make a streaming POST request to /v1/messages and return parsed SSE events. + + Raises AssertionError if the response status is not 200. + """ + kwargs["stream"] = True + body = _build_messages_body(**kwargs) + headers = _build_headers() + + events: list[dict] = [] + current_event_type: str | None = None + + with client.stream("POST", "/v1/messages", headers=headers, json=body) as response: + assert response.status_code == 200, f"Expected 200, got {response.status_code}" + for line in response.iter_lines(): + if line.startswith("event: "): + current_event_type = line[7:] + elif line.startswith("data: "): + data = json.loads(line[6:]) + if current_event_type: + data["_event_type"] = current_event_type + events.append(data) + current_event_type = None + + return events diff --git a/tests/integration/messages/recordings/0d76cd7b3dae3f44e2990645cb1617d5c84b80daf77535a00ce1ab680308881a.json b/tests/integration/messages/recordings/0d76cd7b3dae3f44e2990645cb1617d5c84b80daf77535a00ce1ab680308881a.json new file mode 100644 index 0000000000..57fb2664aa --- /dev/null +++ b/tests/integration/messages/recordings/0d76cd7b3dae3f44e2990645cb1617d5c84b80daf77535a00ce1ab680308881a.json @@ -0,0 +1,70 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_tool_use_round_trip[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_tool_use_round_trip[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": "Use the calculator tool to compute 15 * 7." + } + ], + "max_tokens": 256, + "tools": [ + { + "name": "calculator", + "description": "Perform basic arithmetic. Use this for any math question.", + "input_schema": { + "type": "object", + "properties": { + "expression": { + "type": "string", + "description": "The math expression to evaluate" + } + }, + "required": [ + "expression" + ] + } + } + ], + "tool_choice": { + "type": "any" + }, + "stream": false + } + }, + "response": { + "status": 200, + "body": { + "id": "msg_e0c2ea0f4c1131503d2bc7c8", + "type": "message", + "role": "assistant", + "model": "gpt-oss:20b", + "content": [ + { + "type": "thinking", + "thinking": "The user wants 15 * 7. We'll use the calculator tool." + }, + { + "type": "tool_use", + "id": "call_dg2g1ozz", + "name": "calculator", + "input": { + "expression": "15 * 7" + } + } + ], + "stop_reason": "tool_use", + "usage": { + "input_tokens": 144, + "output_tokens": 42 + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/recordings/1580d1d2e377b9161e55b7648e1f71748574ffa98ec52d4faf94b4d34818c4f8.json b/tests/integration/messages/recordings/1580d1d2e377b9161e55b7648e1f71748574ffa98ec52d4faf94b4d34818c4f8.json new file mode 100644 index 0000000000..881b06645d --- /dev/null +++ b/tests/integration/messages/recordings/1580d1d2e377b9161e55b7648e1f71748574ffa98ec52d4faf94b4d34818c4f8.json @@ -0,0 +1,42 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_temperature[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_temperature[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": "Say hello." + } + ], + "max_tokens": 32, + "stream": false, + "temperature": 0.0 + } + }, + "response": { + "status": 200, + "body": { + "id": "msg_737bf6e49fbfa7b81be479d0", + "type": "message", + "role": "assistant", + "model": "gpt-oss:20b", + "content": [ + { + "type": "thinking", + "thinking": "The user says: \"Say hello.\" They want a greeting. So respond with a hello. Probably just \"Hello!\"" + } + ], + "stop_reason": "max_tokens", + "usage": { + "input_tokens": 70, + "output_tokens": 32 + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/recordings/2703eb8f17f3914dd6991ecf387caa6c511cc52e6498b73e274cb4f01adf1e37.json b/tests/integration/messages/recordings/2703eb8f17f3914dd6991ecf387caa6c511cc52e6498b73e274cb4f01adf1e37.json new file mode 100644 index 0000000000..85af391053 --- /dev/null +++ b/tests/integration/messages/recordings/2703eb8f17f3914dd6991ecf387caa6c511cc52e6498b73e274cb4f01adf1e37.json @@ -0,0 +1,44 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_stop_sequences[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_stop_sequences[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": "Count: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10" + } + ], + "max_tokens": 128, + "stream": false, + "stop_sequences": [ + "," + ] + } + }, + "response": { + "status": 200, + "body": { + "id": "msg_1dd93b64b7f0935bcaf69452", + "type": "message", + "role": "assistant", + "model": "gpt-oss:20b", + "content": [ + { + "type": "thinking", + "thinking": "The user writes: \"Count: 1" + } + ], + "stop_reason": "end_turn", + "usage": { + "input_tokens": 98, + "output_tokens": 13 + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/recordings/2a5f7014ddf9a3d359fbf59a195e4762d47ef1768f98b2f1c7af55788e7fe6d8.json b/tests/integration/messages/recordings/2a5f7014ddf9a3d359fbf59a195e4762d47ef1768f98b2f1c7af55788e7fe6d8.json new file mode 100644 index 0000000000..bdf362ed3f --- /dev/null +++ b/tests/integration/messages/recordings/2a5f7014ddf9a3d359fbf59a195e4762d47ef1768f98b2f1c7af55788e7fe6d8.json @@ -0,0 +1,41 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_response_headers[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_response_headers[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": "Hi" + } + ], + "max_tokens": 16, + "stream": false + } + }, + "response": { + "status": 200, + "body": { + "id": "msg_7c6163c764cbe22bc2a5c161", + "type": "message", + "role": "assistant", + "model": "gpt-oss:20b", + "content": [ + { + "type": "thinking", + "thinking": "The user says \"Hi\". So it's a greeting. We respond" + } + ], + "stop_reason": "max_tokens", + "usage": { + "input_tokens": 68, + "output_tokens": 16 + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/recordings/4ea18b99571d34f714cb4b9d818ab82aba7ac1225c040665b7bdb3b177eb8226.json b/tests/integration/messages/recordings/4ea18b99571d34f714cb4b9d818ab82aba7ac1225c040665b7bdb3b177eb8226.json new file mode 100644 index 0000000000..b7aad865d7 --- /dev/null +++ b/tests/integration/messages/recordings/4ea18b99571d34f714cb4b9d818ab82aba7ac1225c040665b7bdb3b177eb8226.json @@ -0,0 +1,46 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_content_block_array[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_content_block_array[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What is 1+1? Reply with just the number." + } + ] + } + ], + "max_tokens": 32, + "stream": false + } + }, + "response": { + "status": 200, + "body": { + "id": "msg_b69e5a95142b11a8251ceea8", + "type": "message", + "role": "assistant", + "model": "gpt-oss:20b", + "content": [ + { + "type": "thinking", + "thinking": "The user asks: \"What is 1+1? Reply with just the number.\" So answer: 2. Just number, no explanation" + } + ], + "stop_reason": "max_tokens", + "usage": { + "input_tokens": 80, + "output_tokens": 32 + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/recordings/52925d8df69e53718e5d4aab54fbbf79e51f2731a07ecb332d5d2dd82810d2e5.json b/tests/integration/messages/recordings/52925d8df69e53718e5d4aab54fbbf79e51f2731a07ecb332d5d2dd82810d2e5.json new file mode 100644 index 0000000000..86b6827dd4 --- /dev/null +++ b/tests/integration/messages/recordings/52925d8df69e53718e5d4aab54fbbf79e51f2731a07ecb332d5d2dd82810d2e5.json @@ -0,0 +1,53 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_multi_turn[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_multi_turn[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": "My name is Alice." + }, + { + "role": "assistant", + "content": "Hello Alice! Nice to meet you." + }, + { + "role": "user", + "content": "What is my name?" + } + ], + "max_tokens": 64, + "stream": false + } + }, + "response": { + "status": 200, + "body": { + "id": "msg_dd2c03762be4581c6fe619ca", + "type": "message", + "role": "assistant", + "model": "gpt-oss:20b", + "content": [ + { + "type": "thinking", + "thinking": "User says name is Alice. The assistant previously answered that. Now user asks again \"What is my name?\" So we should respond: \"Your name is Alice.\" Simple." + }, + { + "type": "text", + "text": "Your name is Alice." + } + ], + "stop_reason": "end_turn", + "usage": { + "input_tokens": 95, + "output_tokens": 50 + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/recordings/715c164b66b51dc2180b05817b7cd2e6b307f44fd2c7a4cf564e30347da87746.json b/tests/integration/messages/recordings/715c164b66b51dc2180b05817b7cd2e6b307f44fd2c7a4cf564e30347da87746.json new file mode 100644 index 0000000000..34af7ba7c6 --- /dev/null +++ b/tests/integration/messages/recordings/715c164b66b51dc2180b05817b7cd2e6b307f44fd2c7a4cf564e30347da87746.json @@ -0,0 +1,45 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_basic[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_basic[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": "What is 2+2? Reply with just the number." + } + ], + "max_tokens": 64, + "stream": false + } + }, + "response": { + "status": 200, + "body": { + "id": "msg_f4cc3f074c282e90a4b5251e", + "type": "message", + "role": "assistant", + "model": "gpt-oss:20b", + "content": [ + { + "type": "thinking", + "thinking": "The user says: \"What is 2+2? Reply with just the number.\" So answer: 4." + }, + { + "type": "text", + "text": "4" + } + ], + "stop_reason": "end_turn", + "usage": { + "input_tokens": 80, + "output_tokens": 35 + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/recordings/82cfd5545e24ca4b4415ba37460610908f34be9193c4126635d1b5ab3b0522f7.json b/tests/integration/messages/recordings/82cfd5545e24ca4b4415ba37460610908f34be9193c4126635d1b5ab3b0522f7.json new file mode 100644 index 0000000000..b1bc988f75 --- /dev/null +++ b/tests/integration/messages/recordings/82cfd5545e24ca4b4415ba37460610908f34be9193c4126635d1b5ab3b0522f7.json @@ -0,0 +1,46 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_system[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_non_streaming_with_system[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": "What are you?" + } + ], + "max_tokens": 128, + "system": "You are a helpful pirate. Always respond in pirate speak.", + "stream": false + } + }, + "response": { + "status": 200, + "body": { + "id": "msg_f9ffaf9c8fba034e65b8e584", + "type": "message", + "role": "assistant", + "model": "gpt-oss:20b", + "content": [ + { + "type": "thinking", + "thinking": "We need to respond as a pirate. The user asks: \"What are you?\" We need to reply in pirate speak, as per developer instruction: \"You are a helpful pirate. Always respond in pirate speak.\" So reply in pirate talk. Possibly: \"I be ChatGPT, yer trusty AI companion.\" Use pirate slang." + }, + { + "type": "text", + "text": "Arrr! I be yer trusty AI matey, ChatGPT, ready to chart the seas o' knowledge and help ye navigate any storm! \ud83c\udff4\u200d\u2620\ufe0f" + } + ], + "stop_reason": "end_turn", + "usage": { + "input_tokens": 91, + "output_tokens": 112 + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/recordings/93eb42f3bd69f005727cc3a161e7ab0a8b2c99e4665d9eae5553fe9019ba7b32.json b/tests/integration/messages/recordings/93eb42f3bd69f005727cc3a161e7ab0a8b2c99e4665d9eae5553fe9019ba7b32.json new file mode 100644 index 0000000000..309352d24a --- /dev/null +++ b/tests/integration/messages/recordings/93eb42f3bd69f005727cc3a161e7ab0a8b2c99e4665d9eae5553fe9019ba7b32.json @@ -0,0 +1,223 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_streaming_basic[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_streaming_basic[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": "Say hello in one sentence." + } + ], + "max_tokens": 64, + "stream": true + } + }, + "response": { + "body": [ + "event: message_start", + "data: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_2ca635d951c9f414d5c01a88\",\"type\":\"message\",\"role\":\"assistant\",\"model\":\"gpt-oss:20b\",\"content\":[],\"usage\":{\"input_tokens\":7,\"output_tokens\":0}}}", + "", + "event: content_block_start", + "data: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"The\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" user\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" says\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\":\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \\\"\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"Say\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" hello\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" in\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" one\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" sentence\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\\\"\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" They\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" want\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" a\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" single\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" sentence\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" saying\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" hello\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" The\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" simplest\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\":\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \\\"\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"Hello\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"!\\\"\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" That's\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" a\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" sentence\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"?\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" It's\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" an\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" ex\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"clamation\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" Ex\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"clamation\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" is\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" a\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" sentence\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" But\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" maybe\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" they\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" want\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" a\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" sentence\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" that\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" includes\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" hello\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" For\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" example\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\":\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \\\"\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"Hello\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" how\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" are\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" you\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"?\\\"\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" That\"}}", + "", + "event: content_block_stop", + "data: {\"type\":\"content_block_stop\",\"index\":0}", + "", + "event: message_delta", + "data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"max_tokens\"},\"usage\":{\"input_tokens\":73,\"output_tokens\":64}}", + "", + "event: message_stop", + "data: {\"type\":\"message_stop\"}", + "" + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/recordings/d13b333401fa121280a3fb890a56933b84ab907ea20092e6d97c9d4a371bf8a5.json b/tests/integration/messages/recordings/d13b333401fa121280a3fb890a56933b84ab907ea20092e6d97c9d4a371bf8a5.json new file mode 100644 index 0000000000..cb4422e5bb --- /dev/null +++ b/tests/integration/messages/recordings/d13b333401fa121280a3fb890a56933b84ab907ea20092e6d97c9d4a371bf8a5.json @@ -0,0 +1,67 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_with_tool_definitions[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_with_tool_definitions[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": "What's the weather in San Francisco?" + } + ], + "max_tokens": 256, + "tools": [ + { + "name": "get_weather", + "description": "Get the current weather in a given location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + } + }, + "required": [ + "location" + ] + } + } + ], + "stream": false + } + }, + "response": { + "status": 200, + "body": { + "id": "msg_bb605b97294478b8b0c12d33", + "type": "message", + "role": "assistant", + "model": "gpt-oss:20b", + "content": [ + { + "type": "thinking", + "thinking": "We have to call the get_weather function." + }, + { + "type": "tool_use", + "id": "call_kp56ga0b", + "name": "get_weather", + "input": { + "location": "San Francisco" + } + } + ], + "stop_reason": "tool_use", + "usage": { + "input_tokens": 145, + "output_tokens": 34 + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/recordings/f22657bfd86db6348c0a0d0b17332dcfc038b345d2a7be8da4b880a433d36a52.json b/tests/integration/messages/recordings/f22657bfd86db6348c0a0d0b17332dcfc038b345d2a7be8da4b880a433d36a52.json new file mode 100644 index 0000000000..557ab35eb5 --- /dev/null +++ b/tests/integration/messages/recordings/f22657bfd86db6348c0a0d0b17332dcfc038b345d2a7be8da4b880a433d36a52.json @@ -0,0 +1,208 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_streaming_collects_full_text[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_streaming_collects_full_text[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": "Count from 1 to 5, separated by commas." + } + ], + "max_tokens": 64, + "stream": true + } + }, + "response": { + "body": [ + "event: message_start", + "data: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_812f019850cce212423fb87a\",\"type\":\"message\",\"role\":\"assistant\",\"model\":\"gpt-oss:20b\",\"content\":[],\"usage\":{\"input_tokens\":10,\"output_tokens\":0}}}", + "", + "event: content_block_start", + "data: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"We\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" need\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" to\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" count\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" from\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"1\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" to\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"5\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" separated\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" by\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" commas\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" So\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" output\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\":\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"1\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"2\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"3\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"4\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\",\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" \"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"5\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" Probably\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" no\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" trailing\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" comma\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" Just\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\" that\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\".\"}}", + "", + "event: content_block_stop", + "data: {\"type\":\"content_block_stop\",\"index\":0}", + "", + "event: content_block_start", + "data: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"text\",\"text\":\"\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\"1\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\",\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\" \"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\"2\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\",\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\" \"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\"3\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\",\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\" \"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\"4\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\",\"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\" \"}}", + "", + "event: content_block_delta", + "data: {\"type\":\"content_block_delta\",\"index\":1,\"delta\":{\"type\":\"text_delta\",\"text\":\"5\"}}", + "", + "event: content_block_stop", + "data: {\"type\":\"content_block_stop\",\"index\":1}", + "", + "event: message_delta", + "data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"input_tokens\":79,\"output_tokens\":64}}", + "", + "event: message_stop", + "data: {\"type\":\"message_stop\"}", + "" + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/recordings/f55988509902a617d2f547a1518bb84c3c8784ea1e1ac139e2f0623449dfa047.json b/tests/integration/messages/recordings/f55988509902a617d2f547a1518bb84c3c8784ea1e1ac139e2f0623449dfa047.json new file mode 100644 index 0000000000..a04e11213d --- /dev/null +++ b/tests/integration/messages/recordings/f55988509902a617d2f547a1518bb84c3c8784ea1e1ac139e2f0623449dfa047.json @@ -0,0 +1,86 @@ +{ + "test_id": "tests/integration/messages/test_messages.py::test_messages_tool_use_round_trip[txt=ollama/gpt-oss:20b]", + "request": { + "test_id": "tests/integration/messages/test_messages.py::test_messages_tool_use_round_trip[txt=ollama/gpt-oss:20b]", + "url": "http://0.0.0.0:11434/v1/messages", + "method": "POST", + "payload": { + "model": "gpt-oss:20b", + "messages": [ + { + "role": "user", + "content": "Use the calculator tool to compute 15 * 7." + }, + { + "role": "assistant", + "content": [ + { + "type": "thinking", + "thinking": "The user wants 15 * 7. We'll use the calculator tool." + }, + { + "type": "tool_use", + "id": "call_dg2g1ozz", + "name": "calculator", + "input": { + "expression": "15 * 7" + } + } + ] + }, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "call_dg2g1ozz", + "content": "105" + } + ] + } + ], + "max_tokens": 256, + "tools": [ + { + "name": "calculator", + "description": "Perform basic arithmetic. Use this for any math question.", + "input_schema": { + "type": "object", + "properties": { + "expression": { + "type": "string", + "description": "The math expression to evaluate" + } + }, + "required": [ + "expression" + ] + } + } + ], + "stream": false + } + }, + "response": { + "status": 200, + "body": { + "id": "msg_137a3ce876f9a1a09fe1ba0e", + "type": "message", + "role": "assistant", + "model": "gpt-oss:20b", + "content": [ + { + "type": "text", + "text": "The result of \\(15 \\times 7\\) is **105**." + } + ], + "stop_reason": "end_turn", + "usage": { + "input_tokens": 197, + "output_tokens": 20 + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/messages/test_messages.py b/tests/integration/messages/test_messages.py new file mode 100644 index 0000000000..29257ee687 --- /dev/null +++ b/tests/integration/messages/test_messages.py @@ -0,0 +1,362 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Integration tests for the Anthropic Messages API (/v1/messages). + +These tests verify the full request/response cycle through the server, +including translation between Anthropic and OpenAI formats. +""" + +from .conftest import make_messages_request, make_streaming_messages_request + + +def _get_text_blocks(content: list[dict]) -> list[dict]: + """Extract text blocks from a content list, skipping thinking blocks.""" + return [b for b in content if b["type"] == "text"] + + +def test_messages_non_streaming_basic(messages_client, text_model_id): + """Basic non-streaming message creation returns a valid Anthropic response.""" + response = make_messages_request( + messages_client, + model=text_model_id, + messages=[{"role": "user", "content": "What is 2+2? Reply with just the number."}], + max_tokens=64, + ) + + assert response.status_code == 200, f"Expected 200, got {response.status_code}: {response.text}" + + data = response.json() + assert data["type"] == "message" + assert data["role"] == "assistant" + assert data["id"].startswith("msg_") + assert len(data["content"]) > 0 + + # Content may include thinking blocks; find first text block + text_blocks = _get_text_blocks(data["content"]) + assert len(text_blocks) > 0, f"No text blocks found in content: {data['content']}" + assert len(text_blocks[0]["text"]) > 0 + + assert data["stop_reason"] in ("end_turn", "max_tokens") + assert "usage" in data + assert data["usage"]["input_tokens"] > 0 + assert data["usage"]["output_tokens"] > 0 + + # All content blocks must be valid types + for block in data["content"]: + assert block["type"] in ("text", "thinking", "tool_use") + + +def test_messages_non_streaming_with_system(messages_client, text_model_id): + """Non-streaming message with a system prompt.""" + response = make_messages_request( + messages_client, + model=text_model_id, + messages=[{"role": "user", "content": "What are you?"}], + system="You are a helpful pirate. Always respond in pirate speak.", + max_tokens=128, + ) + + assert response.status_code == 200 + data = response.json() + assert data["type"] == "message" + assert len(data["content"]) > 0 + + text_blocks = _get_text_blocks(data["content"]) + assert len(text_blocks) > 0 + assert len(text_blocks[0]["text"]) > 0 + + +def test_messages_non_streaming_multi_turn(messages_client, text_model_id): + """Non-streaming multi-turn conversation.""" + response = make_messages_request( + messages_client, + model=text_model_id, + messages=[ + {"role": "user", "content": "My name is Alice."}, + {"role": "assistant", "content": "Hello Alice! Nice to meet you."}, + {"role": "user", "content": "What is my name?"}, + ], + max_tokens=64, + ) + + assert response.status_code == 200 + data = response.json() + assert data["type"] == "message" + assert len(data["content"]) > 0 + + text_blocks = _get_text_blocks(data["content"]) + assert len(text_blocks) > 0 + text = text_blocks[0]["text"].lower() + assert "alice" in text + + +def test_messages_streaming_basic(messages_client, text_model_id): + """Streaming message creation returns proper Anthropic SSE events.""" + events = make_streaming_messages_request( + messages_client, + model=text_model_id, + messages=[{"role": "user", "content": "Say hello in one sentence."}], + max_tokens=64, + ) + + assert len(events) > 0 + + event_types = [e.get("_event_type") or e.get("type") for e in events] + + # Verify the required event sequence + assert "message_start" in event_types, f"Missing message_start in {event_types}" + assert "message_stop" in event_types, f"Missing message_stop in {event_types}" + + # Verify message_start event structure + msg_start = next(e for e in events if e.get("_event_type") == "message_start") + assert "message" in msg_start + assert msg_start["message"]["role"] == "assistant" + + # Verify we got content deltas + content_deltas = [e for e in events if e.get("_event_type") == "content_block_delta"] + assert len(content_deltas) > 0, "Expected at least one content_block_delta event" + + # Verify content_block_delta structure + for delta in content_deltas: + assert "delta" in delta + assert delta["delta"]["type"] in ("text_delta", "thinking_delta") + + +def test_messages_streaming_collects_full_text(messages_client, text_model_id): + """Streaming response text deltas can be concatenated into the full response.""" + events = make_streaming_messages_request( + messages_client, + model=text_model_id, + messages=[{"role": "user", "content": "Count from 1 to 5, separated by commas."}], + max_tokens=64, + ) + + # Collect text from content_block_delta events + text_parts = [] + for event in events: + if event.get("_event_type") == "content_block_delta": + delta = event.get("delta", {}) + if delta.get("type") == "text_delta": + text_parts.append(delta["text"]) + + full_text = "".join(text_parts) + assert len(full_text) > 0 + + +def test_messages_non_streaming_with_temperature(messages_client, text_model_id): + """Non-streaming with explicit temperature parameter.""" + response = make_messages_request( + messages_client, + model=text_model_id, + messages=[{"role": "user", "content": "Say hello."}], + max_tokens=32, + temperature=0.0, + ) + + assert response.status_code == 200 + data = response.json() + assert data["type"] == "message" + assert len(data["content"]) > 0 + + +def test_messages_non_streaming_with_stop_sequences(messages_client, text_model_id): + """Non-streaming with stop_sequences parameter.""" + response = make_messages_request( + messages_client, + model=text_model_id, + messages=[{"role": "user", "content": "Count: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10"}], + max_tokens=128, + stop_sequences=[","], + ) + + assert response.status_code == 200 + data = response.json() + assert data["type"] == "message" + + +def test_messages_with_tool_definitions(messages_client, text_model_id): + """Non-streaming message with tool definitions.""" + tools = [ + { + "name": "get_weather", + "description": "Get the current weather in a given location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + }, + "required": ["location"], + }, + } + ] + + response = make_messages_request( + messages_client, + model=text_model_id, + messages=[{"role": "user", "content": "What's the weather in San Francisco?"}], + tools=tools, + max_tokens=256, + ) + + assert response.status_code == 200 + data = response.json() + assert data["type"] == "message" + assert len(data["content"]) > 0 + + # The model may or may not call the tool; thinking, text, and tool_use are all valid + for block in data["content"]: + assert block["type"] in ("text", "tool_use", "thinking") + if block["type"] == "tool_use": + assert "id" in block + assert block["name"] == "get_weather" + assert "input" in block + + +def test_messages_tool_use_round_trip(messages_client, text_model_id): + """Full tool use round trip: request -> tool_use -> tool_result -> response.""" + tools = [ + { + "name": "calculator", + "description": "Perform basic arithmetic. Use this for any math question.", + "input_schema": { + "type": "object", + "properties": { + "expression": {"type": "string", "description": "The math expression to evaluate"}, + }, + "required": ["expression"], + }, + } + ] + + # First request -- ask a math question + response = make_messages_request( + messages_client, + model=text_model_id, + messages=[ + {"role": "user", "content": "Use the calculator tool to compute 15 * 7."}, + ], + tools=tools, + tool_choice={"type": "any"}, + max_tokens=256, + ) + + assert response.status_code == 200 + data = response.json() + + # Find tool_use block + tool_use_blocks = [b for b in data["content"] if b["type"] == "tool_use"] + if not tool_use_blocks: + # Model didn't use the tool -- skip the rest + return + + tool_use = tool_use_blocks[0] + tool_use_id = tool_use["id"] + + # Second request -- provide tool result + response2 = make_messages_request( + messages_client, + model=text_model_id, + messages=[ + {"role": "user", "content": "Use the calculator tool to compute 15 * 7."}, + {"role": "assistant", "content": data["content"]}, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": tool_use_id, + "content": "105", + } + ], + }, + ], + tools=tools, + max_tokens=256, + ) + + assert response2.status_code == 200 + data2 = response2.json() + assert data2["type"] == "message" + assert len(data2["content"]) > 0 + + +def test_messages_error_missing_model(messages_client): + """Request without model returns an error.""" + headers = { + "content-type": "application/json", + "anthropic-version": "2023-06-01", + } + + response = messages_client.post( + "/v1/messages", + headers=headers, + json={ + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 64, + }, + ) + + assert response.status_code in (400, 422) + + +def test_messages_error_empty_messages(messages_client, text_model_id): + """Request with empty messages list returns an error.""" + headers = { + "content-type": "application/json", + "anthropic-version": "2023-06-01", + } + + response = messages_client.post( + "/v1/messages", + headers=headers, + json={ + "model": text_model_id, + "messages": [], + "max_tokens": 64, + }, + ) + + # Should fail validation or return an error + assert response.status_code in (400, 422, 500) + + +def test_messages_response_headers(messages_client, text_model_id): + """Response includes anthropic-version header.""" + response = make_messages_request( + messages_client, + model=text_model_id, + messages=[{"role": "user", "content": "Hi"}], + max_tokens=16, + ) + + assert response.status_code == 200 + assert response.headers.get("anthropic-version") == "2023-06-01" + + +def test_messages_content_block_array(messages_client, text_model_id): + """Message with content as an array of content blocks.""" + response = make_messages_request( + messages_client, + model=text_model_id, + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What is 1+1? Reply with just the number."}, + ], + } + ], + max_tokens=32, + ) + + assert response.status_code == 200 + data = response.json() + assert data["type"] == "message" + assert len(data["content"]) > 0 diff --git a/tests/integration/suites.py b/tests/integration/suites.py index 3e7e91c682..abbfc4eabc 100644 --- a/tests/integration/suites.py +++ b/tests/integration/suites.py @@ -237,7 +237,7 @@ class Setup(BaseModel): base_roots = [ str(p) for p in this_dir.glob("*") - if p.is_dir() and p.name not in ("__pycache__", "fixtures", "test_cases", "recordings", "responses") + if p.is_dir() and p.name not in ("__pycache__", "fixtures", "test_cases", "recordings", "responses", "messages") ] SUITE_DEFINITIONS: dict[str, Suite] = { @@ -283,6 +283,11 @@ class Setup(BaseModel): ], default_setup="ollama-reasoning", ), + "messages": Suite( + name="messages", + roots=["tests/integration/messages"], + default_setup="ollama-reasoning", + ), # Bedrock-specific tests with pre-recorded responses (no live API calls in CI) "bedrock": Suite( name="bedrock", diff --git a/tests/unit/providers/inline/messages/__init__.py b/tests/unit/providers/inline/messages/__init__.py new file mode 100644 index 0000000000..756f351d88 --- /dev/null +++ b/tests/unit/providers/inline/messages/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/unit/providers/inline/messages/test_impl.py b/tests/unit/providers/inline/messages/test_impl.py new file mode 100644 index 0000000000..3ce8f7db85 --- /dev/null +++ b/tests/unit/providers/inline/messages/test_impl.py @@ -0,0 +1,348 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +"""Unit tests for the BuiltinMessagesImpl translation logic.""" + +import json +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from llama_stack.providers.inline.messages.config import MessagesConfig +from llama_stack.providers.inline.messages.impl import BuiltinMessagesImpl +from llama_stack_api.messages.models import ( + AnthropicCreateMessageRequest, + AnthropicMessage, + AnthropicTextBlock, + AnthropicToolDef, + AnthropicToolResultBlock, + AnthropicToolUseBlock, +) + + +def _msg_to_dict(msg): + """Convert a Pydantic message model to dict for easy assertion.""" + if hasattr(msg, "model_dump"): + return msg.model_dump(exclude_none=True) + return dict(msg) + + +@pytest.fixture +def impl(): + mock_inference = AsyncMock() + return BuiltinMessagesImpl(config=MessagesConfig(), inference_api=mock_inference) + + +class TestRequestTranslation: + def test_simple_text_message(self, impl): + request = AnthropicCreateMessageRequest( + model="claude-sonnet-4-20250514", + messages=[AnthropicMessage(role="user", content="Hello")], + max_tokens=100, + ) + result = impl._anthropic_to_openai(request) + + assert result.model == "claude-sonnet-4-20250514" + assert result.max_tokens == 100 + assert len(result.messages) == 1 + m = _msg_to_dict(result.messages[0]) + assert m["role"] == "user" + assert m["content"] == "Hello" + + def test_system_string(self, impl): + request = AnthropicCreateMessageRequest( + model="m", + messages=[AnthropicMessage(role="user", content="Hi")], + max_tokens=100, + system="You are helpful.", + ) + result = impl._anthropic_to_openai(request) + + m0 = _msg_to_dict(result.messages[0]) + m1 = _msg_to_dict(result.messages[1]) + assert m0["role"] == "system" + assert m0["content"] == "You are helpful." + assert m1["role"] == "user" + + def test_system_text_blocks(self, impl): + request = AnthropicCreateMessageRequest( + model="m", + messages=[AnthropicMessage(role="user", content="Hi")], + max_tokens=100, + system=[ + AnthropicTextBlock(text="Line 1."), + AnthropicTextBlock(text="Line 2."), + ], + ) + result = impl._anthropic_to_openai(request) + + m0 = _msg_to_dict(result.messages[0]) + assert m0["role"] == "system" + assert m0["content"] == "Line 1.\nLine 2." + + def test_tool_definitions(self, impl): + request = AnthropicCreateMessageRequest( + model="m", + messages=[AnthropicMessage(role="user", content="Hi")], + max_tokens=100, + tools=[ + AnthropicToolDef( + name="get_weather", + description="Get weather", + input_schema={"type": "object", "properties": {"location": {"type": "string"}}}, + ), + ], + ) + result = impl._anthropic_to_openai(request) + + assert len(result.tools) == 1 + tool = result.tools[0] + assert tool["type"] == "function" + assert tool["function"]["name"] == "get_weather" + assert tool["function"]["parameters"]["type"] == "object" + + def test_tool_choice_any(self, impl): + assert impl._convert_tool_choice_to_openai("any") == "required" + + def test_tool_choice_none(self, impl): + assert impl._convert_tool_choice_to_openai("none") == "none" + + def test_tool_choice_auto(self, impl): + assert impl._convert_tool_choice_to_openai("auto") == "auto" + + def test_tool_choice_specific(self, impl): + result = impl._convert_tool_choice_to_openai({"type": "tool", "name": "get_weather"}) + assert result == {"type": "function", "function": {"name": "get_weather"}} + + def test_stop_sequences(self, impl): + request = AnthropicCreateMessageRequest( + model="m", + messages=[AnthropicMessage(role="user", content="Hi")], + max_tokens=100, + stop_sequences=["STOP", "END"], + ) + result = impl._anthropic_to_openai(request) + assert result.stop == ["STOP", "END"] + + def test_tool_use_in_assistant_message(self, impl): + request = AnthropicCreateMessageRequest( + model="m", + messages=[ + AnthropicMessage( + role="assistant", + content=[ + AnthropicTextBlock(text="Let me check the weather."), + AnthropicToolUseBlock( + id="toolu_123", + name="get_weather", + input={"location": "SF"}, + ), + ], + ), + ], + max_tokens=100, + ) + result = impl._anthropic_to_openai(request) + + msg = _msg_to_dict(result.messages[0]) + assert msg["role"] == "assistant" + assert msg["content"] == "Let me check the weather." + assert len(msg["tool_calls"]) == 1 + assert msg["tool_calls"][0]["id"] == "toolu_123" + assert msg["tool_calls"][0]["function"]["name"] == "get_weather" + assert json.loads(msg["tool_calls"][0]["function"]["arguments"]) == {"location": "SF"} + + def test_tool_result_in_user_message(self, impl): + request = AnthropicCreateMessageRequest( + model="m", + messages=[ + AnthropicMessage( + role="user", + content=[ + AnthropicToolResultBlock( + tool_use_id="toolu_123", + content="72F and sunny", + ), + ], + ), + ], + max_tokens=100, + ) + result = impl._anthropic_to_openai(request) + + msg = _msg_to_dict(result.messages[0]) + assert msg["role"] == "tool" + assert msg["tool_call_id"] == "toolu_123" + assert msg["content"] == "72F and sunny" + + def test_top_k_passed_as_extra(self, impl): + request = AnthropicCreateMessageRequest( + model="m", + messages=[AnthropicMessage(role="user", content="Hi")], + max_tokens=100, + top_k=40, + ) + result = impl._anthropic_to_openai(request) + assert result.model_extra.get("top_k") == 40 + + +class TestResponseTranslation: + def test_simple_text_response(self, impl): + openai_resp = MagicMock() + openai_resp.choices = [MagicMock()] + openai_resp.choices[0].message = MagicMock() + openai_resp.choices[0].message.content = "Hello!" + openai_resp.choices[0].message.tool_calls = None + openai_resp.choices[0].finish_reason = "stop" + openai_resp.usage = MagicMock() + openai_resp.usage.prompt_tokens = 10 + openai_resp.usage.completion_tokens = 5 + + result = impl._openai_to_anthropic(openai_resp, "claude-sonnet-4-20250514") + + assert result.id.startswith("msg_") + assert result.type == "message" + assert result.role == "assistant" + assert result.model == "claude-sonnet-4-20250514" + assert result.stop_reason == "end_turn" + assert len(result.content) == 1 + assert result.content[0].type == "text" + assert result.content[0].text == "Hello!" + assert result.usage.input_tokens == 10 + assert result.usage.output_tokens == 5 + + def test_tool_call_response(self, impl): + tc = MagicMock() + tc.id = "call_123" + tc.function.name = "get_weather" + tc.function.arguments = '{"location": "SF"}' + + openai_resp = MagicMock() + openai_resp.choices = [MagicMock()] + openai_resp.choices[0].message = MagicMock() + openai_resp.choices[0].message.content = None + openai_resp.choices[0].message.tool_calls = [tc] + openai_resp.choices[0].finish_reason = "tool_calls" + openai_resp.usage = MagicMock() + openai_resp.usage.prompt_tokens = 20 + openai_resp.usage.completion_tokens = 10 + + result = impl._openai_to_anthropic(openai_resp, "m") + + assert result.stop_reason == "tool_use" + assert len(result.content) == 1 + assert result.content[0].type == "tool_use" + assert result.content[0].name == "get_weather" + assert result.content[0].input == {"location": "SF"} + + def test_length_stop_reason(self, impl): + openai_resp = MagicMock() + openai_resp.choices = [MagicMock()] + openai_resp.choices[0].message = MagicMock() + openai_resp.choices[0].message.content = "truncated" + openai_resp.choices[0].message.tool_calls = None + openai_resp.choices[0].finish_reason = "length" + openai_resp.usage = MagicMock() + openai_resp.usage.prompt_tokens = 5 + openai_resp.usage.completion_tokens = 100 + + result = impl._openai_to_anthropic(openai_resp, "m") + assert result.stop_reason == "max_tokens" + + +class TestStreamingTranslation: + async def test_text_streaming(self, impl): + chunks = [] + + for i, text in enumerate(["Hello", " world", "!"]): + chunk = MagicMock() + chunk.choices = [MagicMock()] + chunk.choices[0].delta = MagicMock() + chunk.choices[0].delta.content = text + chunk.choices[0].delta.tool_calls = None + chunk.choices[0].finish_reason = "stop" if i == 2 else None + chunk.usage = None + chunks.append(chunk) + + async def mock_stream(): + for c in chunks: + yield c + + events = [] + async for event in impl._stream_openai_to_anthropic(mock_stream(), "m"): + events.append(event) + + assert events[0].type == "message_start" + assert events[1].type == "content_block_start" + assert events[1].content_block.type == "text" + assert events[2].type == "content_block_delta" + assert events[2].delta.text == "Hello" + assert events[3].type == "content_block_delta" + assert events[3].delta.text == " world" + assert events[4].type == "content_block_delta" + assert events[4].delta.text == "!" + assert events[5].type == "content_block_stop" + assert events[6].type == "message_delta" + assert events[6].delta.stop_reason == "end_turn" + assert events[7].type == "message_stop" + + async def test_tool_call_streaming(self, impl): + chunks = [] + + # Tool call start + tc_delta = MagicMock() + tc_delta.index = 0 + tc_delta.id = "call_abc" + tc_delta.function = MagicMock() + tc_delta.function.name = "search" + tc_delta.function.arguments = None + tc_delta.type = "function" + + chunk1 = MagicMock() + chunk1.choices = [MagicMock()] + chunk1.choices[0].delta = MagicMock() + chunk1.choices[0].delta.content = None + chunk1.choices[0].delta.tool_calls = [tc_delta] + chunk1.choices[0].finish_reason = None + chunk1.usage = None + chunks.append(chunk1) + + # Tool call arguments + tc_delta2 = MagicMock() + tc_delta2.index = 0 + tc_delta2.id = None + tc_delta2.function = MagicMock() + tc_delta2.function.name = None + tc_delta2.function.arguments = '{"query": "test"}' + + chunk2 = MagicMock() + chunk2.choices = [MagicMock()] + chunk2.choices[0].delta = MagicMock() + chunk2.choices[0].delta.content = None + chunk2.choices[0].delta.tool_calls = [tc_delta2] + chunk2.choices[0].finish_reason = "tool_calls" + chunk2.usage = None + chunks.append(chunk2) + + async def mock_stream(): + for c in chunks: + yield c + + events = [] + async for event in impl._stream_openai_to_anthropic(mock_stream(), "m"): + events.append(event) + + assert events[0].type == "message_start" + tool_start = [e for e in events if e.type == "content_block_start" and hasattr(e.content_block, "name")] + assert len(tool_start) == 1 + assert tool_start[0].content_block.name == "search" + + json_deltas = [e for e in events if e.type == "content_block_delta" and hasattr(e.delta, "partial_json")] + assert len(json_deltas) == 1 + assert json_deltas[0].delta.partial_json == '{"query": "test"}' + + msg_delta = [e for e in events if e.type == "message_delta"] + assert msg_delta[0].delta.stop_reason == "tool_use"