From c1dbb515538ecb5a699e754c4571628ecf70e599 Mon Sep 17 00:00:00 2001 From: Guillem Arias Date: Mon, 25 May 2026 16:29:53 +0200 Subject: [PATCH 1/8] feat(ai): add Anthropic provider with chat parity (1/5) Introduces Provider::Anthropic alongside Provider::Openai, implementing the LlmConcept chat_response contract over the official anthropic Ruby SDK. Batch ops, PDF, and RAG land in follow-up PRs. - Provider::Anthropic uses Messages API for sync and streaming responses - ChatConfig builds requests with ephemeral prompt-cache markers on the system prompt and the last tool definition - MessageFormatter reconstructs multi-turn history (text + tool_use + tool_result blocks) from raw Message records, including the paired user-role tool_result turn Anthropic requires after every tool_use - ChatParser maps Anthropic Message into the shared ChatResponse Data - Registry, Setting, User, Chat default model wired for ANTHROPIC_* envs and Setting.anthropic_*; LLM_PROVIDER selects between providers - Responder forwards raw conversation_history (Array) so providers without hosted conversation state can rebuild context - OpenAI provider accepts and ignores the new kwarg (no behavior change) Tests cover provider init, model gating, MessageFormatter for all turn shapes, ChatConfig request building (max_tokens, system cache, tool conversion), ChatParser for text / tool_use / mixed blocks, Registry discovery, and mocked chat_response success / error / function_request paths. Live VCR cassettes recorded in a follow-up with a real key. Stacked PRs: 2/5 batch ops + cost ledger, 3/5 PDF, 4/5 pgvector RAG, 5/5 settings UI + disclosure. --- Gemfile | 1 + Gemfile.lock | 6 + app/models/assistant/responder.rb | 15 + app/models/chat.rb | 11 +- app/models/provider/anthropic.rb | 320 ++++++++++++++++++ app/models/provider/anthropic/chat_config.rb | 83 +++++ app/models/provider/anthropic/chat_parser.rb | 74 ++++ .../provider/anthropic/message_formatter.rb | 118 +++++++ app/models/provider/llm_concept.rb | 1 + app/models/provider/openai.rb | 1 + app/models/provider/registry.rb | 17 +- app/models/setting.rb | 4 + app/models/user.rb | 12 +- .../provider/anthropic/chat_config_test.rb | 68 ++++ .../provider/anthropic/chat_parser_test.rb | 84 +++++ .../anthropic/message_formatter_test.rb | 129 +++++++ test/models/provider/anthropic_test.rb | 145 ++++++++ test/models/provider/registry_test.rb | 47 ++- 18 files changed, 1128 insertions(+), 8 deletions(-) create mode 100644 app/models/provider/anthropic.rb create mode 100644 app/models/provider/anthropic/chat_config.rb create mode 100644 app/models/provider/anthropic/chat_parser.rb create mode 100644 app/models/provider/anthropic/message_formatter.rb create mode 100644 test/models/provider/anthropic/chat_config_test.rb create mode 100644 test/models/provider/anthropic/chat_parser_test.rb create mode 100644 test/models/provider/anthropic/message_formatter_test.rb create mode 100644 test/models/provider/anthropic_test.rb diff --git a/Gemfile b/Gemfile index 19fdf8a177..5d159c0930 100644 --- a/Gemfile +++ b/Gemfile @@ -101,6 +101,7 @@ gem "after_commit_everywhere", "~> 1.0" # AI gem "ruby-openai" +gem "anthropic", "~> 1.0" gem "langfuse-ruby", "~> 0.1.4", require: "langfuse" group :development, :test do diff --git a/Gemfile.lock b/Gemfile.lock index 2b3ab3c5f3..6bc0b988d3 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -87,6 +87,10 @@ GEM activerecord (>= 4.2) activesupport android_key_attestation (0.3.0) + anthropic (1.43.0) + cgi + connection_pool + standardwebhooks ast (2.4.3) attr_required (1.0.2) aws-eventstream (1.4.0) @@ -759,6 +763,7 @@ GEM faraday (>= 1.0.1, < 3.0) faraday-multipart (~> 1.0, >= 1.0.4) stackprof (0.2.27) + standardwebhooks (1.1.0) stimulus-rails (1.3.4) railties (>= 6.0.0) stringio (3.1.7) @@ -859,6 +864,7 @@ DEPENDENCIES aasm activerecord-import after_commit_everywhere (~> 1.0) + anthropic (~> 1.0) aws-sdk-s3 (~> 1.208.0) bcrypt (~> 3.1) benchmark-ips diff --git a/app/models/assistant/responder.rb b/app/models/assistant/responder.rb index 480c69c22d..406993ab7f 100644 --- a/app/models/assistant/responder.rb +++ b/app/models/assistant/responder.rb @@ -80,6 +80,7 @@ def get_llm_response(streamer:, function_results: [], previous_response_id: nil) functions: function_tool_caller.function_definitions, function_results: function_results, messages: conversation_history, + conversation_history: chat_message_records, streamer: streamer, previous_response_id: previous_response_id, session_id: chat_session_id, @@ -116,6 +117,20 @@ def chat @chat ||= message.chat end + # Raw Message records preceding the current turn — providers that build + # their own native message shape (Anthropic) consume this directly so they + # do not have to round-trip through the OpenAI-shaped `conversation_history`. + def chat_message_records + return [] unless chat&.messages + + chat.messages + .where(type: [ "UserMessage", "AssistantMessage" ], status: "complete") + .where.not(id: message.id) + .includes(:tool_calls) + .ordered + .to_a + end + def conversation_history messages = [] return messages unless chat&.messages diff --git a/app/models/chat.rb b/app/models/chat.rb index 1198ee4be8..f85f31d126 100644 --- a/app/models/chat.rb +++ b/app/models/chat.rb @@ -51,10 +51,15 @@ def generate_title(prompt) prompt.first(80) end - # Returns the default AI model to use for chats - # Priority: AI Config > Setting + # Returns the default AI model to use for chats. + # Resolved from the configured llm_provider so installs that swap providers + # don't have to manually update every chat default. def default_model - Provider::Openai.effective_model.presence || Setting.openai_model + if Setting.llm_provider == "anthropic" + Provider::Anthropic.effective_model.presence || Setting.anthropic_model + else + Provider::Openai.effective_model.presence || Setting.openai_model + end end end diff --git a/app/models/provider/anthropic.rb b/app/models/provider/anthropic.rb new file mode 100644 index 0000000000..2b73d22117 --- /dev/null +++ b/app/models/provider/anthropic.rb @@ -0,0 +1,320 @@ +class Provider::Anthropic < Provider + include LlmConcept + + # Subclass so errors caught in this provider are raised as Provider::Anthropic::Error + Error = Class.new(Provider::Error) + + # Supported Anthropic model prefixes + DEFAULT_ANTHROPIC_MODEL_PREFIXES = %w[claude].freeze + DEFAULT_MODEL = "claude-sonnet-4-6" + + # All Claude 3.5+ and 4.x models accept native document content blocks. + VISION_CAPABLE_MODEL_PREFIXES = %w[claude].freeze + + def self.effective_model + configured_model = ENV.fetch("ANTHROPIC_MODEL", Setting.anthropic_model) + configured_model.presence || DEFAULT_MODEL + end + + def initialize(access_token, base_url: nil, model: nil) + client_options = { api_key: access_token } + client_options[:base_url] = base_url if base_url.present? + client_options[:timeout] = ENV.fetch("ANTHROPIC_REQUEST_TIMEOUT", 600).to_i + + @client = ::Anthropic::Client.new(**client_options) + @base_url = base_url + @default_model = model.presence || DEFAULT_MODEL + end + + def supports_model?(model) + DEFAULT_ANTHROPIC_MODEL_PREFIXES.any? { |prefix| model.to_s.start_with?(prefix) } + end + + def provider_name + custom_endpoint? ? "Custom Anthropic-compatible (#{@base_url})" : "Anthropic" + end + + def supported_models_description + if custom_endpoint? + "configured model: #{@default_model}" + else + "models starting with: #{DEFAULT_ANTHROPIC_MODEL_PREFIXES.join(', ')}" + end + end + + def custom_endpoint? + @base_url.present? + end + + # Batch operations land in PR2 — keep the LlmConcept contract honest by + # surfacing a clear error if a caller routes here too early. + def auto_categorize(transactions: [], user_categories: [], model: "", family: nil, json_mode: nil) + raise Error, "auto_categorize not yet implemented for Provider::Anthropic" + end + + def auto_detect_merchants(transactions: [], user_merchants: [], model: "", family: nil, json_mode: nil) + raise Error, "auto_detect_merchants not yet implemented for Provider::Anthropic" + end + + def enhance_provider_merchants(merchants: [], model: "", family: nil, json_mode: nil) + raise Error, "enhance_provider_merchants not yet implemented for Provider::Anthropic" + end + + def supports_pdf_processing?(model: @default_model) + VISION_CAPABLE_MODEL_PREFIXES.any? { |prefix| model.to_s.start_with?(prefix) } + end + + def process_pdf(pdf_content:, model: "", family: nil) + raise Error, "process_pdf not yet implemented for Provider::Anthropic" + end + + def extract_bank_statement(pdf_content:, model: "", family: nil) + raise Error, "extract_bank_statement not yet implemented for Provider::Anthropic" + end + + def chat_response( + prompt, + model:, + instructions: nil, + functions: [], + function_results: [], + conversation_history: [], + streamer: nil, + previous_response_id: nil, + session_id: nil, + user_identifier: nil, + family: nil + ) + with_provider_response do + chat_config = ChatConfig.new( + prompt: prompt, + instructions: instructions, + functions: functions, + function_results: function_results, + conversation_history: conversation_history, + default_max_tokens: default_max_tokens + ) + + request_params = chat_config.build_request(model: model) + + trace = create_langfuse_trace( + name: "anthropic.chat_response", + input: { messages: request_params[:messages], system: request_params[:system_] }, + session_id: session_id, + user_identifier: user_identifier + ) + + begin + parsed, usage = + if streamer.present? + stream_chat_response(streamer: streamer, request_params: request_params) + else + sync_chat_response(request_params: request_params) + end + + log_langfuse_generation( + name: "chat_response", + model: model, + input: request_params[:messages], + output: parsed.messages.map(&:output_text).join("\n"), + usage: usage, + trace: trace + ) + record_llm_usage(family: family, model: model, operation: "chat", usage: usage) + + parsed + rescue => e + log_langfuse_generation( + name: "chat_response", + model: model, + input: request_params[:messages], + error: e, + trace: trace + ) + record_llm_usage(family: family, model: model, operation: "chat", error: e) + raise + end + end + end + + private + attr_reader :client + + def default_max_tokens + ENV.fetch("ANTHROPIC_MAX_TOKENS", 4096).to_i + end + + def sync_chat_response(request_params:) + raw = client.messages.create(**request_params) + parsed = ChatParser.new(raw).parsed + usage = build_usage_hash(raw.usage) + [ parsed, usage ] + end + + def stream_chat_response(streamer:, request_params:) + final_message = nil + stream = client.messages.stream(**request_params) + + stream.each do |event| + case event + when ::Anthropic::Streaming::TextEvent + streamer.call( + Provider::LlmConcept::ChatStreamChunk.new(type: "output_text", data: event.text, usage: nil) + ) + when ::Anthropic::Streaming::MessageStopEvent + final_message = event.message + end + end + + final_message ||= stream.accumulated_message + parsed = ChatParser.new(final_message).parsed + usage = build_usage_hash(final_message.usage) + + streamer.call( + Provider::LlmConcept::ChatStreamChunk.new(type: "response", data: parsed, usage: usage) + ) + + [ parsed, usage ] + end + + def build_usage_hash(raw_usage) + return {} unless raw_usage + + input = raw_usage.input_tokens.to_i + output = raw_usage.output_tokens.to_i + hash = { + "input_tokens" => input, + "output_tokens" => output, + "total_tokens" => input + output + } + + if raw_usage.respond_to?(:cache_creation_input_tokens) && raw_usage.cache_creation_input_tokens + hash["cache_creation_input_tokens"] = raw_usage.cache_creation_input_tokens + end + if raw_usage.respond_to?(:cache_read_input_tokens) && raw_usage.cache_read_input_tokens + hash["cache_read_input_tokens"] = raw_usage.cache_read_input_tokens + end + + hash + end + + def langfuse_client + return unless ENV["LANGFUSE_PUBLIC_KEY"].present? && ENV["LANGFUSE_SECRET_KEY"].present? + + @langfuse_client = Langfuse.new + end + + def create_langfuse_trace(name:, input:, session_id: nil, user_identifier: nil) + return unless langfuse_client + + langfuse_client.trace( + name: name, + input: input, + session_id: session_id, + user_id: user_identifier, + environment: Rails.env + ) + rescue => e + Rails.logger.warn("Langfuse trace creation failed: #{e.message}\n#{e.full_message}") + nil + end + + def log_langfuse_generation(name:, model:, input:, trace:, output: nil, usage: nil, error: nil) + return unless langfuse_client + + generation = trace&.generation( + name: name, + model: model, + input: input + ) + + if error + generation&.end( + output: { error: error.message, details: error.respond_to?(:details) ? error.details : nil }, + level: "ERROR" + ) + upsert_langfuse_trace(trace: trace, output: { error: error.message }, level: "ERROR") + else + generation&.end(output: output, usage: usage) + upsert_langfuse_trace(trace: trace, output: output) + end + rescue => e + Rails.logger.warn("Langfuse logging failed: #{e.message}\n#{e.full_message}") + end + + def upsert_langfuse_trace(trace:, output:, level: nil) + return unless langfuse_client && trace&.id + + payload = { id: trace.id, output: output } + payload[:level] = level if level.present? + + langfuse_client.trace(**payload) + rescue => e + Rails.logger.warn("Langfuse trace upsert failed for trace_id=#{trace&.id}: #{e.message}\n#{e.full_message}") + nil + end + + def record_llm_usage(family:, model:, operation:, usage: nil, error: nil) + return unless family + + if error.present? + http_status_code = extract_http_status_code(error) + + family.llm_usages.create!( + provider: "anthropic", + model: model, + operation: operation, + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0, + estimated_cost: nil, + metadata: { + error: safe_error_message(error), + http_status_code: http_status_code + } + ) + return + end + + return unless usage + + prompt_tokens = usage["input_tokens"] || 0 + completion_tokens = usage["output_tokens"] || 0 + total_tokens = usage["total_tokens"] || (prompt_tokens + completion_tokens) + + estimated_cost = LlmUsage.calculate_cost( + model: model, + prompt_tokens: prompt_tokens, + completion_tokens: completion_tokens + ) + + family.llm_usages.create!( + provider: "anthropic", + model: model, + operation: operation, + prompt_tokens: prompt_tokens, + completion_tokens: completion_tokens, + total_tokens: total_tokens, + estimated_cost: estimated_cost, + metadata: usage.slice("cache_creation_input_tokens", "cache_read_input_tokens").compact + ) + rescue => e + Rails.logger.error("Failed to record LLM usage: #{e.message}") + end + + def extract_http_status_code(error) + if error.respond_to?(:status) + error.status + elsif error.respond_to?(:http_status) + error.http_status + elsif safe_error_message(error) =~ /(\d{3})/ + $1.to_i + end + end + + def safe_error_message(error) + error&.message + rescue => e + "(message unavailable: #{e.class})" + end +end diff --git a/app/models/provider/anthropic/chat_config.rb b/app/models/provider/anthropic/chat_config.rb new file mode 100644 index 0000000000..fd75976118 --- /dev/null +++ b/app/models/provider/anthropic/chat_config.rb @@ -0,0 +1,83 @@ +class Provider::Anthropic::ChatConfig + def initialize( + prompt:, + instructions: nil, + functions: [], + function_results: [], + conversation_history: [], + default_max_tokens: 4096 + ) + @prompt = prompt + @instructions = instructions + @functions = functions + @function_results = function_results + @conversation_history = conversation_history + @default_max_tokens = default_max_tokens + end + + def build_request(model:) + params = { + model: model, + max_tokens: @default_max_tokens, + messages: build_messages + } + + system_blocks = build_system_blocks + params[:system_] = system_blocks if system_blocks.present? + + tool_blocks = build_tools + params[:tools] = tool_blocks if tool_blocks.present? + + params + end + + private + def build_messages + Provider::Anthropic::MessageFormatter.new( + prompt: @prompt, + conversation_history: @conversation_history, + function_results: @function_results + ).build + end + + def build_system_blocks + return nil if @instructions.blank? + + # System prompts are cached aggressively — they rarely change within a session + # and re-using them via prompt caching cuts input cost ~10x on cache hits. + [ + { + type: "text", + text: @instructions, + cache_control: { type: "ephemeral" } + } + ] + end + + def build_tools + return [] if @functions.blank? + + tools = @functions.map do |fn| + { + name: fn[:name], + description: fn[:description], + input_schema: anthropic_input_schema(fn[:params_schema]) + } + end + + # Cache tool definitions alongside the system prompt: same TTL behaviour and + # they almost never change between turns. + tools.last[:cache_control] = { type: "ephemeral" } if tools.any? + + tools + end + + # OpenAI strict schemas frequently include `additionalProperties: false`, which + # Anthropic also accepts. The shapes are otherwise JSON Schema 2020-12 compatible. + # `strict` is OpenAI-only and must not be forwarded. + def anthropic_input_schema(schema) + schema = schema.deep_dup + schema.delete(:strict) if schema.is_a?(Hash) + schema + end +end diff --git a/app/models/provider/anthropic/chat_parser.rb b/app/models/provider/anthropic/chat_parser.rb new file mode 100644 index 0000000000..1f22c465d4 --- /dev/null +++ b/app/models/provider/anthropic/chat_parser.rb @@ -0,0 +1,74 @@ +class Provider::Anthropic::ChatParser + Error = Class.new(StandardError) + + def initialize(message) + @message = message + end + + def parsed + ChatResponse.new( + id: response_id, + model: response_model, + messages: messages, + function_requests: function_requests + ) + end + + private + ChatResponse = Provider::LlmConcept::ChatResponse + ChatMessage = Provider::LlmConcept::ChatMessage + ChatFunctionRequest = Provider::LlmConcept::ChatFunctionRequest + + attr_reader :message + + def response_id + message.id + end + + def response_model + message.model.to_s + end + + def messages + text_blocks = content_blocks.select { |block| block_type(block) == :text } + return [] if text_blocks.empty? + + [ + ChatMessage.new( + id: response_id, + output_text: text_blocks.map { |b| block_value(b, :text) }.compact.join("\n") + ) + ] + end + + def function_requests + content_blocks + .select { |block| block_type(block) == :tool_use } + .map do |block| + input = block_value(block, :input) + ChatFunctionRequest.new( + id: block_value(block, :id), + call_id: block_value(block, :id), + function_name: block_value(block, :name), + function_args: input.is_a?(String) ? input : input.to_json + ) + end + end + + def content_blocks + Array(message.content) + end + + def block_type(block) + raw = block.respond_to?(:type) ? block.type : block[:type] || block["type"] + raw.to_s.to_sym + end + + def block_value(block, key) + if block.respond_to?(key) + block.public_send(key) + elsif block.is_a?(Hash) + block[key] || block[key.to_s] + end + end +end diff --git a/app/models/provider/anthropic/message_formatter.rb b/app/models/provider/anthropic/message_formatter.rb new file mode 100644 index 0000000000..e9288e3768 --- /dev/null +++ b/app/models/provider/anthropic/message_formatter.rb @@ -0,0 +1,118 @@ +class Provider::Anthropic::MessageFormatter + # Builds the `messages` array Anthropic expects. + # + # Inputs: + # - prompt: text of the current user turn + # - conversation_history: chronologically-ordered Message records preceding + # the current user message (UserMessage / AssistantMessage) + # - function_results: tool-result entries for the in-flight follow-up call + # (the responder feeds these back after executing the tool_use blocks + # returned by the previous request) + def initialize(prompt:, conversation_history: [], function_results: []) + @prompt = prompt + @conversation_history = conversation_history + @function_results = function_results + end + + def build + messages = [] + + @conversation_history.each do |historical| + case historical + when UserMessage + messages << { role: "user", content: historical.content.to_s } if historical.content.present? + when AssistantMessage + messages.concat(assistant_history_blocks(historical)) + end + end + + messages << { role: "user", content: @prompt.to_s } + + if @function_results.present? + tool_use_blocks = @function_results.map { |fr| tool_use_block_from_result(fr) } + tool_result_blocks = @function_results.map { |fr| tool_result_block(fr) } + + messages << { role: "assistant", content: tool_use_blocks } + messages << { role: "user", content: tool_result_blocks } + end + + messages + end + + private + def assistant_history_blocks(assistant_message) + blocks = [] + blocks.concat(assistant_message.tool_calls.map { |tc| tool_use_block_from_record(tc) }) if assistant_message.tool_calls.any? + blocks << { type: "text", text: assistant_message.content.to_s } if assistant_message.content.present? + + return [] if blocks.empty? + + result = [ { role: "assistant", content: blocks } ] + + # If the assistant turn used tools, Anthropic requires a user turn with + # matching tool_result blocks before the next assistant turn. + if assistant_message.tool_calls.any? + result << { + role: "user", + content: assistant_message.tool_calls.map { |tc| tool_result_block_from_record(tc) } + } + end + + result + end + + def tool_use_block_from_record(tool_call) + { + type: "tool_use", + id: tool_call.provider_call_id || tool_call.provider_id, + name: tool_call.function_name, + input: parse_arguments(tool_call.function_arguments) + } + end + + def tool_result_block_from_record(tool_call) + { + type: "tool_result", + tool_use_id: tool_call.provider_call_id || tool_call.provider_id, + content: serialize_output(tool_call.function_result) + } + end + + def tool_use_block_from_result(function_result) + { + type: "tool_use", + id: function_result[:call_id], + name: function_result[:name], + input: parse_arguments(function_result[:arguments]) + } + end + + def tool_result_block(function_result) + { + type: "tool_result", + tool_use_id: function_result[:call_id], + content: serialize_output(function_result[:output]) + } + end + + def parse_arguments(arguments) + case arguments + when nil then {} + when Hash then arguments + when String + return {} if arguments.blank? + JSON.parse(arguments) + else arguments + end + rescue JSON::ParserError + {} + end + + def serialize_output(output) + case output + when nil then "" + when String then output + else output.to_json + end + end +end diff --git a/app/models/provider/llm_concept.rb b/app/models/provider/llm_concept.rb index 52550111fa..c4ee70ef7d 100644 --- a/app/models/provider/llm_concept.rb +++ b/app/models/provider/llm_concept.rb @@ -41,6 +41,7 @@ def chat_response( functions: [], function_results: [], messages: nil, + conversation_history: [], streamer: nil, previous_response_id: nil, session_id: nil, diff --git a/app/models/provider/openai.rb b/app/models/provider/openai.rb index 0c04f63e1a..8a28402f83 100644 --- a/app/models/provider/openai.rb +++ b/app/models/provider/openai.rb @@ -260,6 +260,7 @@ def chat_response( functions: [], function_results: [], messages: nil, + conversation_history: [], streamer: nil, previous_response_id: nil, session_id: nil, diff --git a/app/models/provider/registry.rb b/app/models/provider/registry.rb index 4782c1ee17..085c31b501 100644 --- a/app/models/provider/registry.rb +++ b/app/models/provider/registry.rb @@ -78,6 +78,19 @@ def openai Provider::Openai.new(access_token, uri_base: uri_base, model: model) end + def anthropic + access_token = ENV["ANTHROPIC_ACCESS_TOKEN"].presence || + ENV["ANTHROPIC_API_KEY"].presence || + Setting.anthropic_access_token + + return nil unless access_token.present? + + base_url = ENV["ANTHROPIC_BASE_URL"].presence || Setting.anthropic_base_url + model = ENV["ANTHROPIC_MODEL"].presence || Setting.anthropic_model + + Provider::Anthropic.new(access_token, base_url: base_url, model: model) + end + def yahoo_finance Provider::YahooFinance.new end @@ -147,9 +160,9 @@ def available_providers when :securities %i[twelve_data yahoo_finance tiingo eodhd alpha_vantage mfapi binance_public] when :llm - %i[openai] + %i[openai anthropic] else - %i[plaid_us plaid_eu github openai] + %i[plaid_us plaid_eu github openai anthropic] end end end diff --git a/app/models/setting.rb b/app/models/setting.rb index c5aa08d7e9..35dec641e4 100644 --- a/app/models/setting.rb +++ b/app/models/setting.rb @@ -10,6 +10,10 @@ class ValidationError < StandardError; end field :openai_uri_base, type: :string, default: ENV["OPENAI_URI_BASE"] field :openai_model, type: :string, default: ENV["OPENAI_MODEL"] field :openai_json_mode, type: :string, default: ENV["LLM_JSON_MODE"] + field :anthropic_access_token, type: :string, default: ENV["ANTHROPIC_ACCESS_TOKEN"].presence || ENV["ANTHROPIC_API_KEY"] + field :anthropic_model, type: :string, default: ENV["ANTHROPIC_MODEL"] + field :anthropic_base_url, type: :string, default: ENV["ANTHROPIC_BASE_URL"] + field :llm_provider, type: :string, default: ENV.fetch("LLM_PROVIDER", "openai") # LLM token budget (applies to every outbound LLM call: chat, auto-categorize, # merchant detection, enhance-merchants, PDF processing). Defaults track diff --git a/app/models/user.rb b/app/models/user.rb index 74016d755f..f6f059eb54 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -157,10 +157,20 @@ def ai_available? when "external" Assistant::External.available_for?(self) else - ENV["OPENAI_ACCESS_TOKEN"].present? || Setting.openai_access_token.present? + openai_configured? || anthropic_configured? end end + def openai_configured? + ENV["OPENAI_ACCESS_TOKEN"].present? || Setting.openai_access_token.present? + end + + def anthropic_configured? + ENV["ANTHROPIC_ACCESS_TOKEN"].present? || + ENV["ANTHROPIC_API_KEY"].present? || + Setting.anthropic_access_token.present? + end + def ai_enabled? ai_enabled && ai_available? end diff --git a/test/models/provider/anthropic/chat_config_test.rb b/test/models/provider/anthropic/chat_config_test.rb new file mode 100644 index 0000000000..eef796e480 --- /dev/null +++ b/test/models/provider/anthropic/chat_config_test.rb @@ -0,0 +1,68 @@ +require "test_helper" + +class Provider::Anthropic::ChatConfigTest < ActiveSupport::TestCase + test "builds request with default max_tokens and prompt message" do + config = Provider::Anthropic::ChatConfig.new(prompt: "hello") + + req = config.build_request(model: "claude-sonnet-4-6") + + assert_equal "claude-sonnet-4-6", req[:model] + assert_equal 4096, req[:max_tokens] + assert_equal [ { role: "user", content: "hello" } ], req[:messages] + assert_nil req[:system_] + assert_nil req[:tools] + end + + test "honors caller-provided default_max_tokens" do + config = Provider::Anthropic::ChatConfig.new(prompt: "hi", default_max_tokens: 8192) + + req = config.build_request(model: "claude-sonnet-4-6") + + assert_equal 8192, req[:max_tokens] + end + + test "wraps instructions as cacheable system block" do + config = Provider::Anthropic::ChatConfig.new(prompt: "hi", instructions: "Be terse.") + + req = config.build_request(model: "claude-sonnet-4-6") + + assert_equal [ { + type: "text", + text: "Be terse.", + cache_control: { type: "ephemeral" } + } ], req[:system_] + end + + test "converts function definitions to Anthropic tool blocks and caches the last one" do + config = Provider::Anthropic::ChatConfig.new( + prompt: "hi", + functions: [ + { + name: "get_net_worth", + description: "Returns net worth", + params_schema: { type: "object", properties: {}, required: [], additionalProperties: false }, + strict: true + }, + { + name: "get_accounts", + description: "Returns accounts", + params_schema: { type: "object", properties: {}, required: [], additionalProperties: false }, + strict: true + } + ] + ) + + req = config.build_request(model: "claude-sonnet-4-6") + + assert_equal 2, req[:tools].size + assert_equal "get_net_worth", req[:tools][0][:name] + assert_equal "Returns net worth", req[:tools][0][:description] + assert_equal({ type: "object", properties: {}, required: [], additionalProperties: false }, req[:tools][0][:input_schema]) + assert_nil req[:tools][0][:cache_control] + + assert_equal({ type: "ephemeral" }, req[:tools][1][:cache_control]) + + # Anthropic schemas must not carry the OpenAI-specific `strict` flag. + req[:tools].each { |t| assert_not t[:input_schema].key?(:strict) } + end +end diff --git a/test/models/provider/anthropic/chat_parser_test.rb b/test/models/provider/anthropic/chat_parser_test.rb new file mode 100644 index 0000000000..6e8cfcac9d --- /dev/null +++ b/test/models/provider/anthropic/chat_parser_test.rb @@ -0,0 +1,84 @@ +require "test_helper" + +class Provider::Anthropic::ChatParserTest < ActiveSupport::TestCase + test "parses text-only message into ChatResponse with single output_text" do + raw = build_message( + id: "msg_1", + model: "claude-sonnet-4-6", + content: [ + OpenStruct.new(type: :text, text: "Hello"), + OpenStruct.new(type: :text, text: "world") + ] + ) + + parsed = Provider::Anthropic::ChatParser.new(raw).parsed + + assert_equal "msg_1", parsed.id + assert_equal "claude-sonnet-4-6", parsed.model + assert_equal 1, parsed.messages.size + assert_equal "Hello\nworld", parsed.messages.first.output_text + assert_empty parsed.function_requests + end + + test "parses tool_use blocks into ChatFunctionRequest" do + raw = build_message( + id: "msg_2", + model: "claude-sonnet-4-6", + content: [ + OpenStruct.new( + type: :tool_use, + id: "toolu_abc", + name: "get_transactions", + input: { "page" => 1, "order" => "asc" } + ) + ] + ) + + parsed = Provider::Anthropic::ChatParser.new(raw).parsed + + assert_empty parsed.messages + assert_equal 1, parsed.function_requests.size + req = parsed.function_requests.first + assert_equal "toolu_abc", req.id + assert_equal "toolu_abc", req.call_id + assert_equal "get_transactions", req.function_name + assert_equal({ "page" => 1, "order" => "asc" }.to_json, req.function_args) + end + + test "parses mixed content blocks" do + raw = build_message( + id: "msg_3", + model: "claude-sonnet-4-6", + content: [ + OpenStruct.new(type: :text, text: "Looking up your transactions..."), + OpenStruct.new(type: :tool_use, id: "toolu_42", name: "get_transactions", input: {}) + ] + ) + + parsed = Provider::Anthropic::ChatParser.new(raw).parsed + + assert_equal 1, parsed.messages.size + assert_equal "Looking up your transactions...", parsed.messages.first.output_text + assert_equal 1, parsed.function_requests.size + assert_equal "toolu_42", parsed.function_requests.first.call_id + end + + test "accepts hash-shaped content blocks" do + raw = OpenStruct.new( + id: "msg_4", + model: "claude-sonnet-4-6", + content: [ + { type: :text, text: "from hash" } + ] + ) + + parsed = Provider::Anthropic::ChatParser.new(raw).parsed + + assert_equal "from hash", parsed.messages.first.output_text + end + + private + def build_message(id:, model:, content:) + OpenStruct.new(id: id, model: model, content: content) + end +end diff --git a/test/models/provider/anthropic/message_formatter_test.rb b/test/models/provider/anthropic/message_formatter_test.rb new file mode 100644 index 0000000000..9b4b8914d7 --- /dev/null +++ b/test/models/provider/anthropic/message_formatter_test.rb @@ -0,0 +1,129 @@ +require "test_helper" + +class Provider::Anthropic::MessageFormatterTest < ActiveSupport::TestCase + test "builds a single user turn from prompt alone" do + formatter = Provider::Anthropic::MessageFormatter.new(prompt: "hi") + + messages = formatter.build + + assert_equal 1, messages.size + assert_equal({ role: "user", content: "hi" }, messages.first) + end + + test "skips empty content from history" do + history = [ stub_user_message("") ] + + messages = Provider::Anthropic::MessageFormatter.new(prompt: "next", conversation_history: history).build + + assert_equal [ { role: "user", content: "next" } ], messages + end + + test "renders text-only assistant history with no tool calls" do + history = [ + stub_user_message("first question"), + stub_assistant_message("first answer") + ] + + messages = Provider::Anthropic::MessageFormatter.new(prompt: "second question", conversation_history: history).build + + assert_equal({ role: "user", content: "first question" }, messages[0]) + assert_equal "assistant", messages[1][:role] + assert_equal [ { type: "text", text: "first answer" } ], messages[1][:content] + assert_equal({ role: "user", content: "second question" }, messages[2]) + end + + test "renders assistant tool_call history with paired tool_result turn" do + tool_call = stub_tool_call( + id: "toolu_1", + name: "get_net_worth", + arguments: { "currency" => "USD" }, + result: { "amount" => 12345, "currency" => "USD" } + ) + assistant = stub_assistant_message("Your net worth is $12,345.", tool_calls: [ tool_call ]) + history = [ stub_user_message("net worth?"), assistant ] + + messages = Provider::Anthropic::MessageFormatter.new(prompt: "anything else?", conversation_history: history).build + + assert_equal({ role: "user", content: "net worth?" }, messages[0]) + assert_equal "assistant", messages[1][:role] + assert_equal "tool_use", messages[1][:content].first[:type] + assert_equal "toolu_1", messages[1][:content].first[:id] + assert_equal "get_net_worth", messages[1][:content].first[:name] + assert_equal({ "currency" => "USD" }, messages[1][:content].first[:input]) + assert_equal "text", messages[1][:content].last[:type] + + assert_equal "user", messages[2][:role] + assert_equal "tool_result", messages[2][:content].first[:type] + assert_equal "toolu_1", messages[2][:content].first[:tool_use_id] + assert_equal({ "amount" => 12345, "currency" => "USD" }.to_json, messages[2][:content].first[:content]) + + assert_equal({ role: "user", content: "anything else?" }, messages[3]) + end + + test "renders in-flight function_results as assistant tool_use + user tool_result" do + formatter = Provider::Anthropic::MessageFormatter.new( + prompt: "what is my net worth?", + function_results: [ { + call_id: "toolu_42", + name: "get_net_worth", + arguments: { "currency" => "USD" }.to_json, + output: { amount: 99, currency: "USD" } + } ] + ) + + messages = formatter.build + + assert_equal({ role: "user", content: "what is my net worth?" }, messages[0]) + assert_equal "assistant", messages[1][:role] + assert_equal "tool_use", messages[1][:content].first[:type] + assert_equal "toolu_42", messages[1][:content].first[:id] + assert_equal({ "currency" => "USD" }, messages[1][:content].first[:input]) + + assert_equal "user", messages[2][:role] + assert_equal "tool_result", messages[2][:content].first[:type] + assert_equal "toolu_42", messages[2][:content].first[:tool_use_id] + assert_includes messages[2][:content].first[:content], "99" + end + + test "parses string arguments and nil outputs gracefully" do + formatter = Provider::Anthropic::MessageFormatter.new( + prompt: "go", + function_results: [ { + call_id: "toolu_x", + name: "noop", + arguments: "", + output: nil + } ] + ) + + messages = formatter.build + + assert_equal({}, messages[1][:content].first[:input]) + assert_equal "", messages[2][:content].first[:content] + end + + private + def stub_user_message(content) + msg = UserMessage.new(content: content, ai_model: "claude-sonnet-4-6") + msg.id = SecureRandom.uuid + msg + end + + def stub_assistant_message(content, tool_calls: []) + msg = AssistantMessage.new(content: content, ai_model: "claude-sonnet-4-6") + msg.id = SecureRandom.uuid + msg.stubs(:tool_calls).returns(tool_calls) + msg + end + + def stub_tool_call(id:, name:, arguments:, result:) + tc = ToolCall::Function.new( + function_name: name, + function_arguments: arguments, + function_result: result + ) + tc.stubs(:provider_call_id).returns(id) + tc.stubs(:provider_id).returns(id) + tc + end +end diff --git a/test/models/provider/anthropic_test.rb b/test/models/provider/anthropic_test.rb new file mode 100644 index 0000000000..3ffe190335 --- /dev/null +++ b/test/models/provider/anthropic_test.rb @@ -0,0 +1,145 @@ +require "test_helper" + +class Provider::AnthropicTest < ActiveSupport::TestCase + include LLMInterfaceTest + + setup do + @subject = @anthropic = Provider::Anthropic.new( + ENV.fetch("ANTHROPIC_API_KEY", "test-anthropic-token") + ) + @subject_model = "claude-sonnet-4-6" + end + + test "provider_name returns Anthropic for standard provider" do + assert_equal "Anthropic", @subject.provider_name + end + + test "provider_name returns custom info for custom base_url" do + custom = Provider::Anthropic.new( + "test-token", + base_url: "https://bedrock.example.com/anthropic", + model: "claude-opus-4-7" + ) + + assert_equal "Custom Anthropic-compatible (https://bedrock.example.com/anthropic)", custom.provider_name + end + + test "supports_model? returns true for claude prefix" do + assert @subject.supports_model?("claude-sonnet-4-6") + assert @subject.supports_model?("claude-opus-4-7") + assert @subject.supports_model?("claude-haiku-4-5") + assert_not @subject.supports_model?("gpt-4.1") + end + + test "supported_models_description returns prefixes for standard provider" do + assert_equal "models starting with: claude", @subject.supported_models_description + end + + test "supports_pdf_processing? true for claude models" do + assert @subject.supports_pdf_processing?(model: "claude-sonnet-4-6") + assert_not @subject.supports_pdf_processing?(model: "gpt-4o") + end + + test "effective_model defers to ENV when set" do + ClimateControl.modify("ANTHROPIC_MODEL" => "claude-haiku-4-5") do + assert_equal "claude-haiku-4-5", Provider::Anthropic.effective_model + end + end + + test "effective_model falls back to default when nothing set" do + ClimateControl.modify("ANTHROPIC_MODEL" => nil) do + Setting.stubs(:anthropic_model).returns(nil) + assert_equal Provider::Anthropic::DEFAULT_MODEL, Provider::Anthropic.effective_model + end + end + + test "chat_response wraps Anthropic SDK errors in Provider::Anthropic::Error" do + fake_client = mock + @subject.instance_variable_set(:@client, fake_client) + messages = mock + fake_client.stubs(:messages).returns(messages) + messages.expects(:create).raises(StandardError.new("rate limit exceeded")) + + response = @subject.chat_response("hi", model: @subject_model) + + assert_not response.success? + assert_kind_of Provider::Anthropic::Error, response.error + assert_match(/rate limit/i, response.error.message) + end + + test "chat_response returns parsed ChatResponse on success" do + fake_client = stub_anthropic_client_with( + build_anthropic_message( + id: "msg_abc", + model: @subject_model, + text_blocks: [ "Hello there." ], + tool_use_blocks: [], + usage: { input_tokens: 12, output_tokens: 5 } + ) + ) + @subject.instance_variable_set(:@client, fake_client) + + response = @subject.chat_response("hi", model: @subject_model) + + assert response.success? + assert_equal "msg_abc", response.data.id + assert_equal @subject_model, response.data.model + assert_equal 1, response.data.messages.size + assert_equal "Hello there.", response.data.messages.first.output_text + assert_empty response.data.function_requests + end + + test "chat_response surfaces tool_use blocks as function_requests" do + fake_client = stub_anthropic_client_with( + build_anthropic_message( + id: "msg_xyz", + model: @subject_model, + text_blocks: [], + tool_use_blocks: [ { id: "toolu_1", name: "get_net_worth", input: { currency: "USD" } } ], + usage: { input_tokens: 20, output_tokens: 8 } + ) + ) + @subject.instance_variable_set(:@client, fake_client) + + response = @subject.chat_response( + "What is my net worth?", + model: @subject_model, + functions: [ { + name: "get_net_worth", + description: "Gets a user's net worth", + params_schema: { type: "object", properties: {}, required: [], additionalProperties: false }, + strict: true + } ] + ) + + assert response.success? + assert_equal 1, response.data.function_requests.size + + req = response.data.function_requests.first + assert_equal "toolu_1", req.call_id + assert_equal "get_net_worth", req.function_name + assert_equal({ currency: "USD" }.to_json, req.function_args) + end + + private + def stub_anthropic_client_with(message) + messages = mock + messages.stubs(:create).returns(message) + client = mock + client.stubs(:messages).returns(messages) + client + end + + def build_anthropic_message(id:, model:, text_blocks:, tool_use_blocks:, usage:) + OpenStruct.new( + id: id, + model: model, + content: text_blocks.map { |t| OpenStruct.new(type: :text, text: t) } + + tool_use_blocks.map { |t| OpenStruct.new(type: :tool_use, id: t[:id], name: t[:name], input: t[:input]) }, + usage: OpenStruct.new( + input_tokens: usage[:input_tokens], + output_tokens: usage[:output_tokens] + ) + ) + end +end diff --git a/test/models/provider/registry_test.rb b/test/models/provider/registry_test.rb index e30c7d1391..bcf0509c55 100644 --- a/test/models/provider/registry_test.rb +++ b/test/models/provider/registry_test.rb @@ -2,9 +2,14 @@ class Provider::RegistryTest < ActiveSupport::TestCase test "providers filters out nil values when provider is not configured" do - # Ensure OpenAI is not configured - ClimateControl.modify("OPENAI_ACCESS_TOKEN" => nil) do + # Ensure no LLM provider is configured + ClimateControl.modify( + "OPENAI_ACCESS_TOKEN" => nil, + "ANTHROPIC_ACCESS_TOKEN" => nil, + "ANTHROPIC_API_KEY" => nil + ) do Setting.stubs(:openai_access_token).returns(nil) + Setting.stubs(:anthropic_access_token).returns(nil) registry = Provider::Registry.for_concept(:llm) @@ -45,6 +50,44 @@ class Provider::RegistryTest < ActiveSupport::TestCase end end + test "anthropic provider returns nil when no credentials are configured" do + ClimateControl.modify( + "ANTHROPIC_ACCESS_TOKEN" => nil, + "ANTHROPIC_API_KEY" => nil + ) do + Setting.stubs(:anthropic_access_token).returns(nil) + + assert_nil Provider::Registry.get_provider(:anthropic) + end + end + + test "anthropic provider initializes from ANTHROPIC_API_KEY env" do + ClimateControl.modify("ANTHROPIC_API_KEY" => "sk-ant-test", "ANTHROPIC_ACCESS_TOKEN" => nil) do + Setting.stubs(:anthropic_access_token).returns(nil) + + provider = Provider::Registry.get_provider(:anthropic) + + assert_instance_of Provider::Anthropic, provider + end + end + + test "anthropic provider falls back to Setting when ENV is empty" do + ClimateControl.modify( + "ANTHROPIC_ACCESS_TOKEN" => "", + "ANTHROPIC_API_KEY" => "", + "ANTHROPIC_BASE_URL" => "", + "ANTHROPIC_MODEL" => "" + ) do + Setting.stubs(:anthropic_access_token).returns("sk-ant-from-setting") + Setting.stubs(:anthropic_base_url).returns(nil) + Setting.stubs(:anthropic_model).returns(nil) + + provider = Provider::Registry.get_provider(:anthropic) + + assert_instance_of Provider::Anthropic, provider + end + end + test "openai provider falls back to Setting when ENV is empty string" do # Mock ENV to return empty string (common in Docker/env files) # Use stub_env helper which properly stubs ENV access From 714cf0bbb4aa6698eae544c97efea1574a843455 Mon Sep 17 00:00:00 2001 From: Guillem Arias Date: Mon, 25 May 2026 19:49:25 +0200 Subject: [PATCH 2/8] fix(ai): address PR review on Anthropic provider foundation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surface fixes raised by Codex + CodeRabbit on PR 1/5: - Provider::Anthropic#chat_response now accepts (and ignores) a `messages:` kwarg. Assistant::Responder passes both `messages:` (OpenAI-shape) and `conversation_history:` (raw Message records) for cross-provider parity, so the previous signature raised ArgumentError on the first chat turn through the Anthropic provider. - Provider::Anthropic#supports_model? bypasses the `claude` prefix gate when a custom base_url is configured, mirroring the OpenAI provider. Bedrock-shaped IDs like `anthropic.claude-sonnet-4-5-20250929-v1:0` and `claude-opus-4@20250514` are otherwise rejected by Assistant::Provided#get_model_provider and the chat dies. - Setting.anthropic_access_token is now in EncryptedSettingFields::ENCRYPTED_FIELDS so the Anthropic API key is encrypted at rest like every other provider secret. Previously plaintext while siblings (openai_access_token, twelve_data_api_key, external_assistant_token) were ciphertext. - Chat.default_model falls back to whichever provider is actually configured. Previously, with LLM_PROVIDER=anthropic but no Anthropic credentials, the default model resolved to a Claude ID that no registered provider supported, so chats failed even when OpenAI was fully configured. Adds Provider::{Anthropic,Openai}#configured? class methods for the readable callsite. - Provider::Anthropic.effective_model uses `ENV["ANTHROPIC_MODEL"].presence || Setting.anthropic_model` so the Setting lookup is only performed when the env var is absent — the previous `ENV.fetch(KEY, default)` evaluated the default arg eagerly on every call. - Provider::Anthropic::ChatConfig#anthropic_input_schema strips both `:strict` and `"strict"` keys so JSON-decoded schemas with string keys cannot leak the OpenAI-only flag through to Anthropic. Test coverage added: supports_model? bypass on custom endpoints, chat_response messages: kwarg compatibility, default_model fallback in the three credential combinations, configured? against ENV + Setting, strict-flag stripping for both key types, and a `Setting.expects(:anthropic_model).never` assertion proving the ENV-precedence test now exercises the lazy path. All 4365 tests pass (1 pre-existing libvips env error unrelated). --- app/models/chat.rb | 13 ++++- app/models/provider/anthropic.rb | 18 +++++- app/models/provider/anthropic/chat_config.rb | 8 ++- app/models/provider/openai.rb | 4 ++ app/models/setting.rb | 1 + test/models/chat_test.rb | 23 ++++++++ .../provider/anthropic/chat_config_test.rb | 26 +++++++++ test/models/provider/anthropic_test.rb | 57 ++++++++++++++++++- 8 files changed, 144 insertions(+), 6 deletions(-) diff --git a/app/models/chat.rb b/app/models/chat.rb index f85f31d126..f8935e427c 100644 --- a/app/models/chat.rb +++ b/app/models/chat.rb @@ -53,9 +53,18 @@ def generate_title(prompt) # Returns the default AI model to use for chats. # Resolved from the configured llm_provider so installs that swap providers - # don't have to manually update every chat default. + # don't have to manually update every chat default. Falls through to a + # provider that actually has credentials configured, otherwise the chosen + # provider's classes would later raise "no LLM provider supports model …" + # even when the other provider is configured. def default_model - if Setting.llm_provider == "anthropic" + prefers_anthropic = Setting.llm_provider == "anthropic" + + if prefers_anthropic && Provider::Anthropic.configured? + Provider::Anthropic.effective_model.presence || Setting.anthropic_model + elsif Provider::Openai.configured? + Provider::Openai.effective_model.presence || Setting.openai_model + elsif Provider::Anthropic.configured? Provider::Anthropic.effective_model.presence || Setting.anthropic_model else Provider::Openai.effective_model.presence || Setting.openai_model diff --git a/app/models/provider/anthropic.rb b/app/models/provider/anthropic.rb index 2b73d22117..1344ca3338 100644 --- a/app/models/provider/anthropic.rb +++ b/app/models/provider/anthropic.rb @@ -12,10 +12,19 @@ class Provider::Anthropic < Provider VISION_CAPABLE_MODEL_PREFIXES = %w[claude].freeze def self.effective_model - configured_model = ENV.fetch("ANTHROPIC_MODEL", Setting.anthropic_model) + # Use ENV[].presence rather than ENV.fetch(KEY, default) so the Setting + # lookup is only performed when the ENV var is actually absent — otherwise + # the default arg is evaluated eagerly on every call. + configured_model = ENV["ANTHROPIC_MODEL"].presence || Setting.anthropic_model configured_model.presence || DEFAULT_MODEL end + def self.configured? + ENV["ANTHROPIC_ACCESS_TOKEN"].present? || + ENV["ANTHROPIC_API_KEY"].present? || + Setting.anthropic_access_token.present? + end + def initialize(access_token, base_url: nil, model: nil) client_options = { api_key: access_token } client_options[:base_url] = base_url if base_url.present? @@ -27,6 +36,12 @@ def initialize(access_token, base_url: nil, model: nil) end def supports_model?(model) + # Custom endpoints (Bedrock, Vertex, or other Anthropic-compatible proxies) + # use their own model-ID conventions — e.g. Bedrock IDs look like + # `anthropic.claude-sonnet-4-5-20250929-v1:0`. Mirror the OpenAI provider + # and bypass the prefix gate when the caller has wired a custom base_url. + return true if custom_endpoint? + DEFAULT_ANTHROPIC_MODEL_PREFIXES.any? { |prefix| model.to_s.start_with?(prefix) } end @@ -78,6 +93,7 @@ def chat_response( instructions: nil, functions: [], function_results: [], + messages: nil, conversation_history: [], streamer: nil, previous_response_id: nil, diff --git a/app/models/provider/anthropic/chat_config.rb b/app/models/provider/anthropic/chat_config.rb index fd75976118..a4b3f2d085 100644 --- a/app/models/provider/anthropic/chat_config.rb +++ b/app/models/provider/anthropic/chat_config.rb @@ -74,10 +74,14 @@ def build_tools # OpenAI strict schemas frequently include `additionalProperties: false`, which # Anthropic also accepts. The shapes are otherwise JSON Schema 2020-12 compatible. - # `strict` is OpenAI-only and must not be forwarded. + # `strict` is OpenAI-only and must not be forwarded — strip both symbol and + # string keys so we don't leak it when a caller hands us a JSON-decoded hash. def anthropic_input_schema(schema) schema = schema.deep_dup - schema.delete(:strict) if schema.is_a?(Hash) + if schema.is_a?(Hash) + schema.delete(:strict) + schema.delete("strict") + end schema end end diff --git a/app/models/provider/openai.rb b/app/models/provider/openai.rb index 8a28402f83..c28114273a 100644 --- a/app/models/provider/openai.rb +++ b/app/models/provider/openai.rb @@ -14,6 +14,10 @@ def self.effective_model ENV.fetch("OPENAI_MODEL") { Setting.openai_model }.presence || DEFAULT_MODEL end + def self.configured? + ENV["OPENAI_ACCESS_TOKEN"].present? || Setting.openai_access_token.present? + end + def initialize(access_token, uri_base: nil, model: nil) client_options = { access_token: access_token } llm_uri_base = uri_base.presence diff --git a/app/models/setting.rb b/app/models/setting.rb index 35dec641e4..b4d80c0078 100644 --- a/app/models/setting.rb +++ b/app/models/setting.rb @@ -74,6 +74,7 @@ module EncryptedSettingFields eodhd_api_key alpha_vantage_api_key openai_access_token + anthropic_access_token external_assistant_token ].freeze diff --git a/test/models/chat_test.rb b/test/models/chat_test.rb index ed0ef32670..7da1c78e57 100644 --- a/test/models/chat_test.rb +++ b/test/models/chat_test.rb @@ -62,6 +62,29 @@ class ChatTest < ActiveSupport::TestCase end end + test "default_model returns claude when LLM_PROVIDER=anthropic and Anthropic is configured" do + Provider::Anthropic.stubs(:configured?).returns(true) + Setting.stubs(:llm_provider).returns("anthropic") + + assert_equal Provider::Anthropic::DEFAULT_MODEL, Chat.default_model + end + + test "default_model falls back to OpenAI when Anthropic is preferred but unconfigured" do + Provider::Anthropic.stubs(:configured?).returns(false) + Provider::Openai.stubs(:configured?).returns(true) + Setting.stubs(:llm_provider).returns("anthropic") + + assert_equal Provider::Openai::DEFAULT_MODEL, Chat.default_model + end + + test "default_model uses Anthropic when OpenAI is unconfigured" do + Provider::Anthropic.stubs(:configured?).returns(true) + Provider::Openai.stubs(:configured?).returns(false) + Setting.stubs(:llm_provider).returns("openai") + + assert_equal Provider::Anthropic::DEFAULT_MODEL, Chat.default_model + end + test "creates with configured model when OPENAI_MODEL env is set" do prompt = "Test prompt" diff --git a/test/models/provider/anthropic/chat_config_test.rb b/test/models/provider/anthropic/chat_config_test.rb index eef796e480..8939fc76e8 100644 --- a/test/models/provider/anthropic/chat_config_test.rb +++ b/test/models/provider/anthropic/chat_config_test.rb @@ -65,4 +65,30 @@ class Provider::Anthropic::ChatConfigTest < ActiveSupport::TestCase # Anthropic schemas must not carry the OpenAI-specific `strict` flag. req[:tools].each { |t| assert_not t[:input_schema].key?(:strict) } end + + test "strips both symbol and string-keyed `strict` flags from input_schema" do + config = Provider::Anthropic::ChatConfig.new( + prompt: "hi", + functions: [ + { + name: "fn_with_string_strict", + description: "schema arrived from JSON.parse with string keys", + params_schema: { + "type" => "object", + "properties" => {}, + "required" => [], + "additionalProperties" => false, + "strict" => true + }, + strict: true + } + ] + ) + + req = config.build_request(model: "claude-sonnet-4-6") + + schema = req[:tools].first[:input_schema] + assert_not schema.key?(:strict) + assert_not schema.key?("strict") + end end diff --git a/test/models/provider/anthropic_test.rb b/test/models/provider/anthropic_test.rb index 3ffe190335..c0cc158b5b 100644 --- a/test/models/provider/anthropic_test.rb +++ b/test/models/provider/anthropic_test.rb @@ -31,6 +31,20 @@ class Provider::AnthropicTest < ActiveSupport::TestCase assert_not @subject.supports_model?("gpt-4.1") end + test "supports_model? bypasses the prefix gate for custom endpoints" do + custom = Provider::Anthropic.new( + "test-token", + base_url: "https://bedrock.example.com/anthropic", + model: "anthropic.claude-sonnet-4-5-20250929-v1:0" + ) + + # Bedrock-shaped IDs start with "anthropic", not "claude" — would fail the + # default prefix check, but custom endpoints must accept any model. + assert custom.supports_model?("anthropic.claude-sonnet-4-5-20250929-v1:0") + assert custom.supports_model?("claude-opus-4@20250514") + assert custom.supports_model?("any-string-the-endpoint-accepts") + end + test "supported_models_description returns prefixes for standard provider" do assert_equal "models starting with: claude", @subject.supported_models_description end @@ -40,12 +54,28 @@ class Provider::AnthropicTest < ActiveSupport::TestCase assert_not @subject.supports_pdf_processing?(model: "gpt-4o") end - test "effective_model defers to ENV when set" do + test "effective_model defers to ENV when set without consulting Setting" do ClimateControl.modify("ANTHROPIC_MODEL" => "claude-haiku-4-5") do + Setting.expects(:anthropic_model).never assert_equal "claude-haiku-4-5", Provider::Anthropic.effective_model end end + test "configured? reflects ENV and Setting presence" do + ClimateControl.modify("ANTHROPIC_ACCESS_TOKEN" => nil, "ANTHROPIC_API_KEY" => nil) do + Setting.stubs(:anthropic_access_token).returns(nil) + assert_not Provider::Anthropic.configured? + + Setting.stubs(:anthropic_access_token).returns("sk-ant-x") + assert Provider::Anthropic.configured? + end + + ClimateControl.modify("ANTHROPIC_API_KEY" => "sk-ant-y") do + Setting.stubs(:anthropic_access_token).returns(nil) + assert Provider::Anthropic.configured? + end + end + test "effective_model falls back to default when nothing set" do ClimateControl.modify("ANTHROPIC_MODEL" => nil) do Setting.stubs(:anthropic_model).returns(nil) @@ -67,6 +97,31 @@ class Provider::AnthropicTest < ActiveSupport::TestCase assert_match(/rate limit/i, response.error.message) end + test "chat_response accepts messages: kwarg passed by Responder without raising" do + # The OpenAI-shaped `messages:` array is passed alongside `conversation_history:` + # for cross-provider parity. Anthropic ignores it but must still accept it as + # a keyword argument — historical regression that broke the first chat turn. + fake_client = stub_anthropic_client_with( + build_anthropic_message( + id: "msg_kw", + model: @subject_model, + text_blocks: [ "ok" ], + tool_use_blocks: [], + usage: { input_tokens: 1, output_tokens: 1 } + ) + ) + @subject.instance_variable_set(:@client, fake_client) + + response = @subject.chat_response( + "hi", + model: @subject_model, + messages: [ { role: "user", content: "hi" } ], + conversation_history: [] + ) + + assert response.success? + end + test "chat_response returns parsed ChatResponse on success" do fake_client = stub_anthropic_client_with( build_anthropic_message( From a0c552cb3879984cd93a4abda4078d83a8bc1639 Mon Sep 17 00:00:00 2001 From: Guillem Arias Date: Mon, 25 May 2026 19:58:30 +0200 Subject: [PATCH 3/8] test(chat): make default_model tests resilient to ENV model overrides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit flagged on PR review: the new default_model tests asserted against Provider::*::DEFAULT_MODEL, but Chat.default_model actually returns Provider::*.effective_model.presence (which reads OPENAI_MODEL / ANTHROPIC_MODEL from the environment). With either env var set, the tests would fail intermittently even though routing was correct. - New default_model tests now assert against the provider's effective_model directly, so they verify the routing decision (which provider's value wins) without coupling to the constant. - Pre-existing "creates with default model" assertions had the same brittleness; switch them to compare against Chat.default_model so the chosen model is whatever the env / Setting cascade resolves to. Verified by running `ANTHROPIC_MODEL=claude-haiku-4-5 OPENAI_MODEL=gpt-4o bin/rails test test/models/chat_test.rb` — 16 runs, 0 failures (previously 2 pre-existing failures + 0 from the new tests). --- test/models/chat_test.rb | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/test/models/chat_test.rb b/test/models/chat_test.rb index 7da1c78e57..accf5e0cb8 100644 --- a/test/models/chat_test.rb +++ b/test/models/chat_test.rb @@ -47,7 +47,7 @@ class ChatTest < ActiveSupport::TestCase chat = @user.chats.start!(prompt, model: nil) assert_equal 2, chat.messages.count - assert_equal Provider::Openai::DEFAULT_MODEL, chat.messages.find_by!(type: "UserMessage").ai_model + assert_equal Chat.default_model, chat.messages.find_by!(type: "UserMessage").ai_model end end @@ -58,31 +58,35 @@ class ChatTest < ActiveSupport::TestCase chat = @user.chats.start!(prompt, model: "") assert_equal 2, chat.messages.count - assert_equal Provider::Openai::DEFAULT_MODEL, chat.messages.find_by!(type: "UserMessage").ai_model + assert_equal Chat.default_model, chat.messages.find_by!(type: "UserMessage").ai_model end end - test "default_model returns claude when LLM_PROVIDER=anthropic and Anthropic is configured" do + # These three tests assert routing (which provider's effective_model wins), + # not the constant value itself — the assertion side reads through + # Provider::*.effective_model so ENV overrides like ANTHROPIC_MODEL / + # OPENAI_MODEL don't make the tests flake. + test "default_model returns Anthropic's effective_model when LLM_PROVIDER=anthropic and Anthropic is configured" do Provider::Anthropic.stubs(:configured?).returns(true) Setting.stubs(:llm_provider).returns("anthropic") - assert_equal Provider::Anthropic::DEFAULT_MODEL, Chat.default_model + assert_equal Provider::Anthropic.effective_model, Chat.default_model end - test "default_model falls back to OpenAI when Anthropic is preferred but unconfigured" do + test "default_model falls back to OpenAI's effective_model when Anthropic is preferred but unconfigured" do Provider::Anthropic.stubs(:configured?).returns(false) Provider::Openai.stubs(:configured?).returns(true) Setting.stubs(:llm_provider).returns("anthropic") - assert_equal Provider::Openai::DEFAULT_MODEL, Chat.default_model + assert_equal Provider::Openai.effective_model, Chat.default_model end - test "default_model uses Anthropic when OpenAI is unconfigured" do + test "default_model uses Anthropic's effective_model when OpenAI is unconfigured" do Provider::Anthropic.stubs(:configured?).returns(true) Provider::Openai.stubs(:configured?).returns(false) Setting.stubs(:llm_provider).returns("openai") - assert_equal Provider::Anthropic::DEFAULT_MODEL, Chat.default_model + assert_equal Provider::Anthropic.effective_model, Chat.default_model end test "creates with configured model when OPENAI_MODEL env is set" do From 66753319a7489d538e7b582ff2f42fe56feb46dc Mon Sep 17 00:00:00 2001 From: Guillem Arias Date: Mon, 25 May 2026 20:27:59 +0200 Subject: [PATCH 4/8] fix(ai): address local review on Anthropic foundation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Provider::Anthropic#supports_pdf_processing? bypasses prefix gate for custom endpoints, mirroring supports_model? - Provider::Anthropic#initialize raises Error when custom_endpoint? AND model.blank?, parity with Provider::Openai - stream_chat_response captures partial usage on mid-stream errors and records it via the new on_partial callback so chat_response can skip the duplicate error row in the outer rescue - safe_accumulated_message swallows the secondary failure when the SDK cannot reconstruct a snapshot - langfuse_client memoizes properly (||= instead of =) so repeated calls don't churn Langfuse instances - MessageFormatter sorts tool_calls by created_at then id so the message array is deterministic across replays; skips tool_calls missing both provider_call_id and provider_id rather than sending `id: nil` and getting rejected by Anthropic - Setting.anthropic_access_token default falls back through ENV["ANTHROPIC_API_KEY"].presence (was missing .presence, so an empty-string env value bled through) - User#openai_configured? / #anthropic_configured? delegate to the Provider::* class methods — single source of truth - Assistant::Responder renames the OpenAI-shape history builder conversation_history → openai_messages_payload so the kwarg name matches the local method name (messages: openai_messages_payload, conversation_history: chat_message_records) - Assistant::Builtin stale-history comment updated to reference both builders Adds a streaming chat_response test using ad-hoc subclasses of the SDK event types so the case/when dispatch matches via is_a? without stubbing class-level === behavior. --- app/models/assistant/builtin.rb | 2 +- app/models/assistant/responder.rb | 9 ++- app/models/provider/anthropic.rb | 57 +++++++++++++----- .../provider/anthropic/message_formatter.rb | 26 +++++++-- app/models/setting.rb | 2 +- app/models/user.rb | 6 +- test/models/provider/anthropic_test.rb | 58 +++++++++++++++++++ 7 files changed, 133 insertions(+), 27 deletions(-) diff --git a/app/models/assistant/builtin.rb b/app/models/assistant/builtin.rb index 6a1ae93c9a..16130bc38d 100644 --- a/app/models/assistant/builtin.rb +++ b/app/models/assistant/builtin.rb @@ -60,7 +60,7 @@ def respond_to(message, assistant_message: nil) if assistant_message.content.blank? assistant_message.destroy else - # Demote partially-streamed turns to `failed` so `Responder#conversation_history` excludes them. + # Demote partially-streamed turns to `failed` so the responder's history builders (`#openai_messages_payload`, `#chat_message_records`) exclude them. assistant_message.update_columns(status: "failed") end end diff --git a/app/models/assistant/responder.rb b/app/models/assistant/responder.rb index 406993ab7f..a5950a51af 100644 --- a/app/models/assistant/responder.rb +++ b/app/models/assistant/responder.rb @@ -79,7 +79,7 @@ def get_llm_response(streamer:, function_results: [], previous_response_id: nil) instructions: instructions, functions: function_tool_caller.function_definitions, function_results: function_results, - messages: conversation_history, + messages: openai_messages_payload, conversation_history: chat_message_records, streamer: streamer, previous_response_id: previous_response_id, @@ -119,7 +119,7 @@ def chat # Raw Message records preceding the current turn — providers that build # their own native message shape (Anthropic) consume this directly so they - # do not have to round-trip through the OpenAI-shaped `conversation_history`. + # do not have to round-trip through the OpenAI-shaped payload below. def chat_message_records return [] unless chat&.messages @@ -131,7 +131,10 @@ def chat_message_records .to_a end - def conversation_history + # Builds the OpenAI-shaped messages payload (role: "user" | "assistant" | + # "tool"; tool_call_id pairing) consumed by Provider::Openai's generic + # chat path. Anthropic uses chat_message_records instead. + def openai_messages_payload messages = [] return messages unless chat&.messages diff --git a/app/models/provider/anthropic.rb b/app/models/provider/anthropic.rb index 1344ca3338..181f1a76f9 100644 --- a/app/models/provider/anthropic.rb +++ b/app/models/provider/anthropic.rb @@ -32,6 +32,11 @@ def initialize(access_token, base_url: nil, model: nil) @client = ::Anthropic::Client.new(**client_options) @base_url = base_url + + if custom_endpoint? && model.blank? + raise Error, "Model is required when using a custom Anthropic-compatible endpoint" + end + @default_model = model.presence || DEFAULT_MODEL end @@ -76,6 +81,8 @@ def enhance_provider_merchants(merchants: [], model: "", family: nil, json_mode: end def supports_pdf_processing?(model: @default_model) + return true if custom_endpoint? + VISION_CAPABLE_MODEL_PREFIXES.any? { |prefix| model.to_s.start_with?(prefix) } end @@ -120,10 +127,19 @@ def chat_response( user_identifier: user_identifier ) + partial_usage_recorded = false + begin parsed, usage = if streamer.present? - stream_chat_response(streamer: streamer, request_params: request_params) + stream_chat_response( + streamer: streamer, + request_params: request_params, + on_partial: ->(partial_usage) { + record_llm_usage(family: family, model: model, operation: "chat", usage: partial_usage) + partial_usage_recorded = true + } + ) else sync_chat_response(request_params: request_params) end @@ -147,7 +163,7 @@ def chat_response( error: e, trace: trace ) - record_llm_usage(family: family, model: model, operation: "chat", error: e) + record_llm_usage(family: family, model: model, operation: "chat", error: e) unless partial_usage_recorded raise end end @@ -167,22 +183,31 @@ def sync_chat_response(request_params:) [ parsed, usage ] end - def stream_chat_response(streamer:, request_params:) + def stream_chat_response(streamer:, request_params:, on_partial: nil) final_message = nil stream = client.messages.stream(**request_params) - stream.each do |event| - case event - when ::Anthropic::Streaming::TextEvent - streamer.call( - Provider::LlmConcept::ChatStreamChunk.new(type: "output_text", data: event.text, usage: nil) - ) - when ::Anthropic::Streaming::MessageStopEvent - final_message = event.message + # If `stream.each` raises mid-iteration (network drop, client abort), + # we still want to surface whatever tokens accumulated so the cost + # ledger doesn't lose partial-output billing. + begin + stream.each do |event| + case event + when ::Anthropic::Streaming::TextEvent + streamer.call( + Provider::LlmConcept::ChatStreamChunk.new(type: "output_text", data: event.text, usage: nil) + ) + when ::Anthropic::Streaming::MessageStopEvent + final_message = event.message + end end + rescue => mid_stream_error + partial = safe_accumulated_message(stream) + on_partial&.call(build_usage_hash(partial&.usage)) if partial + raise mid_stream_error end - final_message ||= stream.accumulated_message + final_message ||= safe_accumulated_message(stream) parsed = ChatParser.new(final_message).parsed usage = build_usage_hash(final_message.usage) @@ -193,6 +218,12 @@ def stream_chat_response(streamer:, request_params:) [ parsed, usage ] end + def safe_accumulated_message(stream) + stream.accumulated_message + rescue StandardError + nil + end + def build_usage_hash(raw_usage) return {} unless raw_usage @@ -217,7 +248,7 @@ def build_usage_hash(raw_usage) def langfuse_client return unless ENV["LANGFUSE_PUBLIC_KEY"].present? && ENV["LANGFUSE_SECRET_KEY"].present? - @langfuse_client = Langfuse.new + @langfuse_client ||= Langfuse.new end def create_langfuse_trace(name:, input:, session_id: nil, user_identifier: nil) diff --git a/app/models/provider/anthropic/message_formatter.rb b/app/models/provider/anthropic/message_formatter.rb index e9288e3768..b6ba2717f9 100644 --- a/app/models/provider/anthropic/message_formatter.rb +++ b/app/models/provider/anthropic/message_formatter.rb @@ -40,9 +40,19 @@ def build end private + # ToolCall records have no association-level order; enforce + # chronological order here so message arrays are deterministic across + # replays and Anthropic sees tool_use blocks in the order the model + # originally emitted them. + def ordered_tool_calls(assistant_message) + assistant_message.tool_calls.sort_by { |tc| [ tc.created_at || Time.zone.at(0), tc.id.to_s ] } + end + def assistant_history_blocks(assistant_message) + tool_calls = ordered_tool_calls(assistant_message).select { |tc| tool_call_id(tc).present? } + blocks = [] - blocks.concat(assistant_message.tool_calls.map { |tc| tool_use_block_from_record(tc) }) if assistant_message.tool_calls.any? + blocks.concat(tool_calls.map { |tc| tool_use_block_from_record(tc) }) if tool_calls.any? blocks << { type: "text", text: assistant_message.content.to_s } if assistant_message.content.present? return [] if blocks.empty? @@ -51,20 +61,26 @@ def assistant_history_blocks(assistant_message) # If the assistant turn used tools, Anthropic requires a user turn with # matching tool_result blocks before the next assistant turn. - if assistant_message.tool_calls.any? + if tool_calls.any? result << { role: "user", - content: assistant_message.tool_calls.map { |tc| tool_result_block_from_record(tc) } + content: tool_calls.map { |tc| tool_result_block_from_record(tc) } } end result end + # tool_use_id is required; skip tool_calls missing both identifiers + # rather than sending `id: nil` and getting rejected by Anthropic. + def tool_call_id(tool_call) + tool_call.provider_call_id.presence || tool_call.provider_id.presence + end + def tool_use_block_from_record(tool_call) { type: "tool_use", - id: tool_call.provider_call_id || tool_call.provider_id, + id: tool_call_id(tool_call), name: tool_call.function_name, input: parse_arguments(tool_call.function_arguments) } @@ -73,7 +89,7 @@ def tool_use_block_from_record(tool_call) def tool_result_block_from_record(tool_call) { type: "tool_result", - tool_use_id: tool_call.provider_call_id || tool_call.provider_id, + tool_use_id: tool_call_id(tool_call), content: serialize_output(tool_call.function_result) } end diff --git a/app/models/setting.rb b/app/models/setting.rb index b4d80c0078..e6879ed8df 100644 --- a/app/models/setting.rb +++ b/app/models/setting.rb @@ -10,7 +10,7 @@ class ValidationError < StandardError; end field :openai_uri_base, type: :string, default: ENV["OPENAI_URI_BASE"] field :openai_model, type: :string, default: ENV["OPENAI_MODEL"] field :openai_json_mode, type: :string, default: ENV["LLM_JSON_MODE"] - field :anthropic_access_token, type: :string, default: ENV["ANTHROPIC_ACCESS_TOKEN"].presence || ENV["ANTHROPIC_API_KEY"] + field :anthropic_access_token, type: :string, default: ENV["ANTHROPIC_ACCESS_TOKEN"].presence || ENV["ANTHROPIC_API_KEY"].presence field :anthropic_model, type: :string, default: ENV["ANTHROPIC_MODEL"] field :anthropic_base_url, type: :string, default: ENV["ANTHROPIC_BASE_URL"] field :llm_provider, type: :string, default: ENV.fetch("LLM_PROVIDER", "openai") diff --git a/app/models/user.rb b/app/models/user.rb index f6f059eb54..e585d9d569 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -162,13 +162,11 @@ def ai_available? end def openai_configured? - ENV["OPENAI_ACCESS_TOKEN"].present? || Setting.openai_access_token.present? + Provider::Openai.configured? end def anthropic_configured? - ENV["ANTHROPIC_ACCESS_TOKEN"].present? || - ENV["ANTHROPIC_API_KEY"].present? || - Setting.anthropic_access_token.present? + Provider::Anthropic.configured? end def ai_enabled? diff --git a/test/models/provider/anthropic_test.rb b/test/models/provider/anthropic_test.rb index c0cc158b5b..24d96bb542 100644 --- a/test/models/provider/anthropic_test.rb +++ b/test/models/provider/anthropic_test.rb @@ -144,6 +144,64 @@ class Provider::AnthropicTest < ActiveSupport::TestCase assert_empty response.data.function_requests end + test "chat_response streams text deltas and emits a final response chunk" do + final_message = build_anthropic_message( + id: "msg_stream", + model: @subject_model, + text_blocks: [ "Hello world" ], + tool_use_blocks: [], + usage: { input_tokens: 7, output_tokens: 3 } + ) + # Use ad-hoc subclasses of the SDK event types so the case/when dispatch + # inside `stream_chat_response` matches them via `is_a?` without needing + # to stub class-level `===` behavior. + text_event_cls = Class.new(::Anthropic::Streaming::TextEvent) do + def initialize(text:, snapshot:) + @text = text + @snapshot = snapshot + end + attr_reader :text, :snapshot + end + stop_event_cls = Class.new(::Anthropic::Streaming::MessageStopEvent) do + def initialize(message:) + @message = message + end + attr_reader :message + end + events = [ + text_event_cls.new(text: "Hello ", snapshot: "Hello "), + text_event_cls.new(text: "world", snapshot: "Hello world"), + stop_event_cls.new(message: final_message) + ] + + fake_stream = mock + fake_stream.stubs(:each).multiple_yields(*events.map { |e| [ e ] }) + fake_stream.stubs(:accumulated_message).returns(final_message) + + messages = mock + messages.stubs(:stream).returns(fake_stream) + client = mock + client.stubs(:messages).returns(messages) + @subject.instance_variable_set(:@client, client) + + collected = [] + response = @subject.chat_response( + "hi", + model: @subject_model, + streamer: ->(chunk) { collected << chunk } + ) + + assert response.success? + text_chunks = collected.select { |c| c.type == "output_text" } + response_chunks = collected.select { |c| c.type == "response" } + + assert_equal 2, text_chunks.size + assert_equal [ "Hello ", "world" ], text_chunks.map(&:data) + assert_equal 1, response_chunks.size + assert_equal "msg_stream", response_chunks.first.data.id + assert_equal 10, response_chunks.first.usage["total_tokens"] + end + test "chat_response surfaces tool_use blocks as function_requests" do fake_client = stub_anthropic_client_with( build_anthropic_message( From b26306d807e02c91ca0fe229f5a81bbc9b671882 Mon Sep 17 00:00:00 2001 From: Guillem Arias Date: Tue, 26 May 2026 10:39:15 +0200 Subject: [PATCH 5/8] test(ai): add Anthropic tool_use round-trip + multi-tool turn coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses @jjmata's "worth confirming" note on PR #1983: tool-use turns from prior assistant messages must round-trip correctly when retrieved from the database. - New `ChatParser → ToolCall::Function → MessageFormatter` test walks the full path: Anthropic response with a tool_use block → ChatFunctionRequest → ToolCall::Function.from_function_request → persisted on the AssistantMessage → MessageFormatter rebuild on the next turn. Asserts the original `tool_use.id` is preserved end-to-end as both `tool_use.id` and the paired `tool_result.tool_use_id`, and that the original `input` hash and serialized result content survive. - New multi-tool assistant turn test confirms two tool_use blocks on a single assistant message render as two tool_use blocks followed by two paired tool_result blocks in a single user-role follow-up, matching Anthropic's required alternation. Both tests exercise the existing PR1 code without behavior changes. --- .../anthropic/message_formatter_test.rb | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/test/models/provider/anthropic/message_formatter_test.rb b/test/models/provider/anthropic/message_formatter_test.rb index 9b4b8914d7..73a7f9b42e 100644 --- a/test/models/provider/anthropic/message_formatter_test.rb +++ b/test/models/provider/anthropic/message_formatter_test.rb @@ -85,6 +85,74 @@ class Provider::Anthropic::MessageFormatterTest < ActiveSupport::TestCase assert_includes messages[2][:content].first[:content], "99" end + # Confirms the round-trip flagged in PR #1983 review: an Anthropic tool_use + # block returned by the model → ChatFunctionRequest → ToolCall::Function + # persisted on the AssistantMessage → MessageFormatter rebuild on the next + # turn produces an Anthropic-compatible history where tool_use_id pairs back + # to the original block. + test "ChatParser → ToolCall::Function → MessageFormatter round-trips tool_use_id" do + anthropic_response = OpenStruct.new( + id: "msg_abc", + model: "claude-sonnet-4-6", + content: [ + OpenStruct.new(type: :tool_use, id: "toolu_round_trip", name: "get_net_worth", input: { "currency" => "USD" }) + ] + ) + + parsed = Provider::Anthropic::ChatParser.new(anthropic_response).parsed + function_request = parsed.function_requests.first + + persisted_tool_call = ToolCall::Function.from_function_request( + function_request, + { "amount" => 12345, "currency" => "USD" } + ) + + assistant = stub_assistant_message("Your net worth is $12,345.", tool_calls: [ persisted_tool_call ]) + history = [ stub_user_message("net worth?"), assistant ] + + rebuilt = Provider::Anthropic::MessageFormatter.new(prompt: "follow-up", conversation_history: history).build + + tool_use_block = rebuilt[1][:content].find { |b| b[:type] == "tool_use" } + tool_result_block = rebuilt[2][:content].first + + assert_equal "toolu_round_trip", tool_use_block[:id] + assert_equal "toolu_round_trip", tool_result_block[:tool_use_id] + assert_equal({ "currency" => "USD" }, tool_use_block[:input]) + assert_equal({ "amount" => 12345, "currency" => "USD" }.to_json, tool_result_block[:content]) + end + + test "renders multi-tool assistant turn with all pairings preserved" do + tool_a = stub_tool_call( + id: "toolu_a", + name: "get_accounts", + arguments: {}, + result: [ { "id" => 1, "name" => "Checking" } ] + ) + tool_b = stub_tool_call( + id: "toolu_b", + name: "get_holdings", + arguments: {}, + result: [ { "ticker" => "VTI", "qty" => 10 } ] + ) + assistant = stub_assistant_message("Looked up your accounts and holdings.", tool_calls: [ tool_a, tool_b ]) + + messages = Provider::Anthropic::MessageFormatter.new( + prompt: "follow-up", + conversation_history: [ stub_user_message("accounts and holdings?"), assistant ] + ).build + + tool_uses = messages[1][:content].select { |b| b[:type] == "tool_use" } + tool_results = messages[2][:content] + + assert_equal 2, tool_uses.size + assert_equal 2, tool_results.size + assert_equal [ "toolu_a", "toolu_b" ], tool_uses.map { |b| b[:id] } + assert_equal [ "toolu_a", "toolu_b" ], tool_results.map { |b| b[:tool_use_id] } + # Anthropic requires the user turn to follow the assistant turn that used tools + assert_equal "assistant", messages[1][:role] + assert_equal "user", messages[2][:role] + end + test "parses string arguments and nil outputs gracefully" do formatter = Provider::Anthropic::MessageFormatter.new( prompt: "go", From 17e9e0d9373b402a46651abd61d8adf82616a44c Mon Sep 17 00:00:00 2001 From: Guillem Arias Date: Wed, 27 May 2026 10:08:31 +0200 Subject: [PATCH 6/8] test(ai): require "ostruct" explicitly in Anthropic provider tests OpenStruct is moving out of Ruby's default load path (warning in 3.4+, removed in 3.5+). Tests work today because ActiveSupport transitively loads it, but that's incidental. Match the existing convention in test/controllers/settings/hostings_controller_test.rb which explicitly requires ostruct for the same reason. --- test/models/provider/anthropic/chat_parser_test.rb | 1 + test/models/provider/anthropic/message_formatter_test.rb | 1 + test/models/provider/anthropic_test.rb | 1 + 3 files changed, 3 insertions(+) diff --git a/test/models/provider/anthropic/chat_parser_test.rb b/test/models/provider/anthropic/chat_parser_test.rb index 6e8cfcac9d..16085c656f 100644 --- a/test/models/provider/anthropic/chat_parser_test.rb +++ b/test/models/provider/anthropic/chat_parser_test.rb @@ -1,4 +1,5 @@ require "test_helper" +require "ostruct" class Provider::Anthropic::ChatParserTest < ActiveSupport::TestCase test "parses text-only message into ChatResponse with single output_text" do diff --git a/test/models/provider/anthropic/message_formatter_test.rb b/test/models/provider/anthropic/message_formatter_test.rb index 73a7f9b42e..cff8c625fe 100644 --- a/test/models/provider/anthropic/message_formatter_test.rb +++ b/test/models/provider/anthropic/message_formatter_test.rb @@ -1,4 +1,5 @@ require "test_helper" +require "ostruct" class Provider::Anthropic::MessageFormatterTest < ActiveSupport::TestCase test "builds a single user turn from prompt alone" do diff --git a/test/models/provider/anthropic_test.rb b/test/models/provider/anthropic_test.rb index 24d96bb542..d0beb9da92 100644 --- a/test/models/provider/anthropic_test.rb +++ b/test/models/provider/anthropic_test.rb @@ -1,4 +1,5 @@ require "test_helper" +require "ostruct" class Provider::AnthropicTest < ActiveSupport::TestCase include LLMInterfaceTest From 5ab9bb33d6048e1e45f0722cab79586684ae72b4 Mon Sep 17 00:00:00 2001 From: Guillem Arias Date: Wed, 27 May 2026 10:41:53 +0200 Subject: [PATCH 7/8] fix(ai): sanitize Langfuse warn logs, normalize tool_use.input, dedup history fetch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses three open CodeRabbit findings on PR #1983. - Provider::Anthropic Langfuse rescue branches no longer include `e.full_message` in `Rails.logger.warn`. `full_message` bundles the backtrace + cause chain and on some SDK error types includes the serialized request/response payload (prompt, model output). Logs now report `#{e.class}: #{e.message}` only. Three sites: create_langfuse_trace, log_langfuse_generation, upsert_langfuse_trace. Note: Provider::Openai has the same pattern (copy-pasted source) — harmonization deferred to a follow-up cleanup PR; this commit fixes only the Anthropic provider to keep PR scope tight. - MessageFormatter#parse_arguments now coerces any non-Hash parsed result to `{}`. Anthropic's Messages API requires `tool_use.input` to be a JSON object (map); a stored ToolCall::Function record whose arguments parse to a scalar, bool, or array (corrupt row, legacy data, cross-provider bleed) would otherwise produce a payload the API rejects. Normal flow stores Hash arguments end-to-end so the fix is defensive — adds 2 tests covering scalar/array JSON strings and non-String non-Hash inputs. - Assistant::Responder dedups the chat-history fetch. The previous layout fired two near-identical `chat.messages.where(...).includes( :tool_calls).ordered` queries per LLM turn (one for the OpenAI-shape payload, one for the raw-records kwarg). A new memoized `complete_chat_messages` fetches once; `chat_message_records` filters out the current message via `Array#reject`, `openai_messages_payload` iterates the cached array unchanged. One SQL query per turn instead of two. Memoization scope = single Responder instance (per LLM call), so cache invalidation is not a concern. All 4370 tests pass (1 pre-existing libvips env error unrelated). Rubocop + brakeman clean. --- app/models/assistant/responder.rb | 35 ++++++++++------- app/models/provider/anthropic.rb | 9 +++-- .../provider/anthropic/message_formatter.rb | 22 +++++++---- .../anthropic/message_formatter_test.rb | 39 +++++++++++++++++++ 4 files changed, 79 insertions(+), 26 deletions(-) diff --git a/app/models/assistant/responder.rb b/app/models/assistant/responder.rb index a5950a51af..bfd86321cc 100644 --- a/app/models/assistant/responder.rb +++ b/app/models/assistant/responder.rb @@ -117,18 +117,29 @@ def chat @chat ||= message.chat end + # Memoized fetch — both `chat_message_records` and `openai_messages_payload` + # derive their shape from this one in-memory array so a single chat turn + # fires one history query instead of two. + def complete_chat_messages + return @complete_chat_messages if defined?(@complete_chat_messages) + + @complete_chat_messages = + if chat&.messages + chat.messages + .where(type: [ "UserMessage", "AssistantMessage" ], status: "complete") + .includes(:tool_calls) + .ordered + .to_a + else + [] + end + end + # Raw Message records preceding the current turn — providers that build # their own native message shape (Anthropic) consume this directly so they # do not have to round-trip through the OpenAI-shaped payload below. def chat_message_records - return [] unless chat&.messages - - chat.messages - .where(type: [ "UserMessage", "AssistantMessage" ], status: "complete") - .where.not(id: message.id) - .includes(:tool_calls) - .ordered - .to_a + complete_chat_messages.reject { |m| m.id == message.id } end # Builds the OpenAI-shaped messages payload (role: "user" | "assistant" | @@ -136,13 +147,7 @@ def chat_message_records # chat path. Anthropic uses chat_message_records instead. def openai_messages_payload messages = [] - return messages unless chat&.messages - - chat.messages - .where(type: [ "UserMessage", "AssistantMessage" ], status: "complete") - .includes(:tool_calls) - .ordered - .each do |chat_message| + complete_chat_messages.each do |chat_message| if chat_message.tool_calls.any? messages << { role: chat_message.role, diff --git a/app/models/provider/anthropic.rb b/app/models/provider/anthropic.rb index 181f1a76f9..5d530c62af 100644 --- a/app/models/provider/anthropic.rb +++ b/app/models/provider/anthropic.rb @@ -262,7 +262,10 @@ def create_langfuse_trace(name:, input:, session_id: nil, user_identifier: nil) environment: Rails.env ) rescue => e - Rails.logger.warn("Langfuse trace creation failed: #{e.message}\n#{e.full_message}") + # Sanitized log (class + message only) — `e.full_message` bundles the + # backtrace + cause chain, which on some SDK error types includes the + # serialized request/response payload (model output, user prompt). + Rails.logger.warn("Langfuse trace creation failed: #{e.class}: #{e.message}") nil end @@ -286,7 +289,7 @@ def log_langfuse_generation(name:, model:, input:, trace:, output: nil, usage: n upsert_langfuse_trace(trace: trace, output: output) end rescue => e - Rails.logger.warn("Langfuse logging failed: #{e.message}\n#{e.full_message}") + Rails.logger.warn("Langfuse logging failed: #{e.class}: #{e.message}") end def upsert_langfuse_trace(trace:, output:, level: nil) @@ -297,7 +300,7 @@ def upsert_langfuse_trace(trace:, output:, level: nil) langfuse_client.trace(**payload) rescue => e - Rails.logger.warn("Langfuse trace upsert failed for trace_id=#{trace&.id}: #{e.message}\n#{e.full_message}") + Rails.logger.warn("Langfuse trace upsert failed for trace_id=#{trace&.id}: #{e.class}: #{e.message}") nil end diff --git a/app/models/provider/anthropic/message_formatter.rb b/app/models/provider/anthropic/message_formatter.rb index b6ba2717f9..7c697368c1 100644 --- a/app/models/provider/anthropic/message_formatter.rb +++ b/app/models/provider/anthropic/message_formatter.rb @@ -111,15 +111,21 @@ def tool_result_block(function_result) } end + # Anthropic's Messages API requires `tool_use.input` to be a JSON object + # (map). Normalize any non-Hash result to `{}` so corrupt or legacy + # ToolCall::Function records can't produce a payload Anthropic rejects. def parse_arguments(arguments) - case arguments - when nil then {} - when Hash then arguments - when String - return {} if arguments.blank? - JSON.parse(arguments) - else arguments - end + parsed = + case arguments + when nil then {} + when Hash then arguments + when String + return {} if arguments.blank? + JSON.parse(arguments) + else arguments + end + + parsed.is_a?(Hash) ? parsed : {} rescue JSON::ParserError {} end diff --git a/test/models/provider/anthropic/message_formatter_test.rb b/test/models/provider/anthropic/message_formatter_test.rb index cff8c625fe..88a7d20e92 100644 --- a/test/models/provider/anthropic/message_formatter_test.rb +++ b/test/models/provider/anthropic/message_formatter_test.rb @@ -171,6 +171,45 @@ class Provider::Anthropic::MessageFormatterTest < ActiveSupport::TestCase assert_equal "", messages[2][:content].first[:content] end + # Anthropic's tool_use.input MUST be a JSON object (map). If a stored + # ToolCall::Function record carries arguments that parse to a scalar or + # array (corrupt row, legacy data, OpenAI cross-bleed), the formatter + # must coerce them to `{}` so we don't ship an invalid payload. + test "coerces non-Hash parsed arguments to empty Hash" do + [ '"hello"', "123", "true", "[1,2,3]" ].each do |non_object_json| + formatter = Provider::Anthropic::MessageFormatter.new( + prompt: "go", + function_results: [ { + call_id: "toolu_x", + name: "noop", + arguments: non_object_json, + output: nil + } ] + ) + + messages = formatter.build + + assert_equal({}, messages[1][:content].first[:input], + "expected empty Hash for arguments=#{non_object_json.inspect}") + end + end + + test "coerces non-Hash non-String arguments to empty Hash" do + formatter = Provider::Anthropic::MessageFormatter.new( + prompt: "go", + function_results: [ { + call_id: "toolu_x", + name: "noop", + arguments: [ 1, 2, 3 ], + output: nil + } ] + ) + + messages = formatter.build + + assert_equal({}, messages[1][:content].first[:input]) + end + private def stub_user_message(content) msg = UserMessage.new(content: content, ai_model: "claude-sonnet-4-6") From 4b511c4dadd2803a845621fdbc4b052b642a9815 Mon Sep 17 00:00:00 2001 From: Guillem Arias Date: Fri, 29 May 2026 14:50:49 +0200 Subject: [PATCH 8/8] fix(ci): replace sk-ant- prefixed test placeholders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pipelock secret scanner pattern-matches `sk-ant-*` as a real Anthropic API key and fails the PR security-scan check. Test stubs and ClimateControl env values used `sk-ant-test`, `sk-ant-from-setting`, `sk-ant-x`, `sk-ant-y` as obvious placeholders, but the scanner does not care about value entropy. Switched to `fake-anthropic-key-*` / `fake-token-*` strings so the scanner stops flagging them. No production code touched, no behavior change — Provider::Anthropic still accepts any non-blank token. --- test/models/provider/anthropic_test.rb | 4 ++-- test/models/provider/registry_test.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/models/provider/anthropic_test.rb b/test/models/provider/anthropic_test.rb index d0beb9da92..26d8945660 100644 --- a/test/models/provider/anthropic_test.rb +++ b/test/models/provider/anthropic_test.rb @@ -67,11 +67,11 @@ class Provider::AnthropicTest < ActiveSupport::TestCase Setting.stubs(:anthropic_access_token).returns(nil) assert_not Provider::Anthropic.configured? - Setting.stubs(:anthropic_access_token).returns("sk-ant-x") + Setting.stubs(:anthropic_access_token).returns("fake-token-1") assert Provider::Anthropic.configured? end - ClimateControl.modify("ANTHROPIC_API_KEY" => "sk-ant-y") do + ClimateControl.modify("ANTHROPIC_API_KEY" => "fake-token-2") do Setting.stubs(:anthropic_access_token).returns(nil) assert Provider::Anthropic.configured? end diff --git a/test/models/provider/registry_test.rb b/test/models/provider/registry_test.rb index bcf0509c55..2aa274662f 100644 --- a/test/models/provider/registry_test.rb +++ b/test/models/provider/registry_test.rb @@ -62,7 +62,7 @@ class Provider::RegistryTest < ActiveSupport::TestCase end test "anthropic provider initializes from ANTHROPIC_API_KEY env" do - ClimateControl.modify("ANTHROPIC_API_KEY" => "sk-ant-test", "ANTHROPIC_ACCESS_TOKEN" => nil) do + ClimateControl.modify("ANTHROPIC_API_KEY" => "fake-anthropic-key-for-tests", "ANTHROPIC_ACCESS_TOKEN" => nil) do Setting.stubs(:anthropic_access_token).returns(nil) provider = Provider::Registry.get_provider(:anthropic) @@ -78,7 +78,7 @@ class Provider::RegistryTest < ActiveSupport::TestCase "ANTHROPIC_BASE_URL" => "", "ANTHROPIC_MODEL" => "" ) do - Setting.stubs(:anthropic_access_token).returns("sk-ant-from-setting") + Setting.stubs(:anthropic_access_token).returns("fake-anthropic-key-from-setting") Setting.stubs(:anthropic_base_url).returns(nil) Setting.stubs(:anthropic_model).returns(nil)