Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions app/controllers/settings/hostings_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ def update
update_encrypted_setting(:tiingo_api_key)
update_encrypted_setting(:eodhd_api_key)
update_encrypted_setting(:alpha_vantage_api_key)
update_encrypted_setting(:gemini_api_key)

if hosting_params.key?(:gemini_model)
Setting.gemini_model = hosting_params[:gemini_model].presence
end

if hosting_params.key?(:syncs_include_pending)
Setting.syncs_include_pending = hosting_params[:syncs_include_pending] == "1"
Expand Down Expand Up @@ -175,7 +180,7 @@ def update
end
parsed = Integer(raw, 10) rescue nil
if parsed.nil? || parsed < minimum
label = t("settings.hostings.openai_settings.#{key}_label")
label = t("settings.hostings.llm_token_budget.#{key}_label")
raise Setting::ValidationError, t(".invalid_llm_budget", field: label, minimum: minimum)
end
Setting.public_send("#{key}=", parsed)
Expand Down Expand Up @@ -223,7 +228,7 @@ def disconnect_external_assistant
private
def hosting_params
return ActionController::Parameters.new unless params.key?(:setting)
params.require(:setting).permit(:onboarding_state, :require_email_confirmation, :invite_only_default_family_id, :brand_fetch_client_id, :brand_fetch_high_res_logos, :twelve_data_api_key, :tiingo_api_key, :eodhd_api_key, :alpha_vantage_api_key, :openai_access_token, :openai_uri_base, :openai_model, :openai_json_mode, :llm_context_window, :llm_max_response_tokens, :llm_max_items_per_call, :exchange_rate_provider, :securities_provider, :syncs_include_pending, :auto_sync_enabled, :auto_sync_time, :external_assistant_url, :external_assistant_token, :external_assistant_agent_id, securities_providers: [])
params.require(:setting).permit(:onboarding_state, :require_email_confirmation, :invite_only_default_family_id, :brand_fetch_client_id, :brand_fetch_high_res_logos, :twelve_data_api_key, :tiingo_api_key, :eodhd_api_key, :alpha_vantage_api_key, :openai_access_token, :openai_uri_base, :openai_model, :openai_json_mode, :gemini_api_key, :gemini_model, :llm_context_window, :llm_max_response_tokens, :llm_max_items_per_call, :exchange_rate_provider, :securities_provider, :syncs_include_pending, :auto_sync_enabled, :auto_sync_time, :external_assistant_url, :external_assistant_token, :external_assistant_agent_id, securities_providers: [])
end

def update_assistant_type
Expand Down
9 changes: 7 additions & 2 deletions app/models/provider/openai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -266,13 +266,18 @@ def chat_response(
user_identifier: nil,
family: nil
)
# For custom providers (Gemini, Ollama, etc.) always use the configured
# model — the caller may pass an OpenAI model name like "gpt-4.1" which
# the custom endpoint won't recognise.
effective_model = custom_provider? ? @default_model : (model.presence || @default_model)

if supports_responses_endpoint?
# Native path uses the Responses API which chains history via
# `previous_response_id`; it does NOT need (and must not receive)
# inline message history in the input payload.
native_chat_response(
prompt: prompt,
model: model,
model: effective_model,
instructions: instructions,
functions: functions,
function_results: function_results,
Expand All @@ -285,7 +290,7 @@ def chat_response(
else
generic_chat_response(
prompt: prompt,
model: model,
model: effective_model,
instructions: instructions,
functions: functions,
function_results: function_results,
Expand Down
22 changes: 20 additions & 2 deletions app/models/provider/registry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ def github
def openai
access_token = ENV["OPENAI_ACCESS_TOKEN"].presence || Setting.openai_access_token

return nil unless access_token.present?
# Fall back to Gemini (via its OpenAI-compatible endpoint) when no
# OpenAI key is configured — all existing call-sites continue to work.
return gemini unless access_token.present?
Comment on lines +68 to +70
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Avoid OpenAI→Gemini fallback for OpenAI-specific model calls

Returning gemini from openai here changes all Provider::Registry.get_provider(:openai) call sites to use Gemini when no OpenAI key is set, but some of those paths still pass OpenAI model names explicitly. For example, Assistant::Function::ImportBankStatement#execute always sends model: openai_model (defaulting to gpt-4.1), and Provider::Openai#extract_bank_statement does not override that model for custom providers, so Gemini-only deployments will send an invalid model and fail extraction requests.

Useful? React with 👍 / 👎.


uri_base = ENV["OPENAI_URI_BASE"].presence || Setting.openai_uri_base
model = ENV["OPENAI_MODEL"].presence || Setting.openai_model
Expand All @@ -78,6 +80,22 @@ def openai
Provider::Openai.new(access_token, uri_base: uri_base, model: model)
end

def gemini
api_key = ENV["GEMINI_API_KEY"].presence || Setting.gemini_api_key

return nil unless api_key.present?

# Gemini exposes an OpenAI-compatible endpoint so we reuse Provider::Openai.
# Default model is gemini-2.0-flash; override via Setting or GEMINI_MODEL env var.
model = ENV["GEMINI_MODEL"].presence || Setting.gemini_model.presence || "gemini-2.5-flash"

Provider::Openai.new(
api_key,
uri_base: "https://generativelanguage.googleapis.com/v1beta/openai/",
model: model
)
end

def yahoo_finance
Provider::YahooFinance.new
end
Expand Down Expand Up @@ -147,7 +165,7 @@ def available_providers
when :securities
%i[twelve_data yahoo_finance tiingo eodhd alpha_vantage mfapi binance_public]
when :llm
%i[openai]
%i[openai gemini]
else
%i[plaid_us plaid_eu github openai]
end
Expand Down
3 changes: 3 additions & 0 deletions app/models/setting.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ class ValidationError < StandardError; end
field :openai_uri_base, type: :string, default: ENV["OPENAI_URI_BASE"]
field :openai_model, type: :string, default: ENV["OPENAI_MODEL"]
field :openai_json_mode, type: :string, default: ENV["LLM_JSON_MODE"]
field :gemini_api_key, type: :string, default: ENV["GEMINI_API_KEY"]
field :gemini_model, type: :string, default: ENV["GEMINI_MODEL"]
Comment on lines +13 to +14
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major | ⚡ Quick win

Mirror cache invalidation behavior for gemini_model changes.

openai_model= clears AI cache when the model changes, but gemini_model currently has no equivalent hook. This can leave stale cached AI outputs after switching Gemini models.

💡 Suggested parity fix
 class << self
   alias_method :raw_onboarding_state, :onboarding_state
   alias_method :raw_onboarding_state=, :onboarding_state=
   alias_method :raw_openai_model, :openai_model
   alias_method :raw_openai_model=, :openai_model=
+  alias_method :raw_gemini_model, :gemini_model
+  alias_method :raw_gemini_model=, :gemini_model=
@@
   def openai_model=(value)
     old_value = raw_openai_model
     self.raw_openai_model = value

     if old_value != value && old_value.present?
       Rails.logger.info("OpenAI model changed from #{old_value} to #{value}, clearing AI cache for all families")
       Family.find_each do |family|
         ClearAiCacheJob.perform_later(family)
       end
     end
   end
+
+  def gemini_model=(value)
+    old_value = raw_gemini_model
+    self.raw_gemini_model = value
+
+    if old_value != value && old_value.present?
+      Rails.logger.info("Gemini model changed from #{old_value} to #{value}, clearing AI cache for all families")
+      Family.find_each do |family|
+        ClearAiCacheJob.perform_later(family)
+      end
+    end
+  end
 end
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@app/models/setting.rb` around lines 13 - 14, Add a setter for gemini_model
that mirrors openai_model=: implement a gemini_model=(val) method in the Setting
model that compares the new value to the current value, writes the attribute
(same persistence approach used by openai_model=, e.g. write_attribute or
self[:gemini_model]=) and, if the model changed, calls the same cache
invalidation routine used by openai_model= (the method used there — e.g.
clear_ai_cache!, clear_all_ai_cache, or equivalent) so Gemini model changes
flush the AI cache.


# LLM token budget (applies to every outbound LLM call: chat, auto-categorize,
# merchant detection, enhance-merchants, PDF processing). Defaults track
Expand Down Expand Up @@ -70,6 +72,7 @@ module EncryptedSettingFields
eodhd_api_key
alpha_vantage_api_key
openai_access_token
gemini_api_key
external_assistant_token
].freeze

Expand Down
3 changes: 2 additions & 1 deletion app/models/user.rb
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ def ai_available?
when "external"
Assistant::External.available_for?(self)
else
ENV["OPENAI_ACCESS_TOKEN"].present? || Setting.openai_access_token.present?
ENV["OPENAI_ACCESS_TOKEN"].present? || Setting.openai_access_token.present? ||
ENV["GEMINI_API_KEY"].present? || Setting.gemini_api_key.present?
end
end

Expand Down
43 changes: 43 additions & 0 deletions app/views/settings/hostings/_gemini_settings.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<div class="space-y-4">
<div>
<h2 class="font-medium mb-1"><%= t(".title") %></h2>
<% if ENV["GEMINI_API_KEY"].present? %>
<p class="text-sm text-secondary"><%= t(".env_configured_message") %></p>
<% else %>
<p class="text-secondary text-sm mb-4"><%= t(".description") %></p>
<% end %>
</div>

<%= styled_form_with model: Setting.new,
url: settings_hosting_path,
method: :patch,
class: "space-y-4",
data: {
controller: "auto-submit-form",
"auto-submit-form-trigger-event-value": "blur"
} do |form| %>
<%= form.password_field :gemini_api_key,
label: t(".api_key_label"),
placeholder: t(".api_key_placeholder"),
value: (Setting.gemini_api_key.present? ? "********" : nil),
autocomplete: "off",
autocapitalize: "none",
spellcheck: "false",
inputmode: "text",
disabled: ENV["GEMINI_API_KEY"].present?,
data: { "auto-submit-form-target": "auto" } %>
<p class="text-xs text-secondary mt-1"><%= t(".api_key_help_html") %></p>

<%= form.text_field :gemini_model,
label: t(".model_label"),
placeholder: t(".model_placeholder"),
value: Setting.gemini_model,
autocomplete: "off",
autocapitalize: "none",
spellcheck: "false",
inputmode: "text",
disabled: ENV["GEMINI_MODEL"].present?,
data: { "auto-submit-form-target": "auto" } %>
<p class="text-xs text-secondary mt-1"><%= t(".model_help_html") %></p>
<% end %>
</div>
39 changes: 39 additions & 0 deletions app/views/settings/hostings/_llm_token_budget.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<div class="space-y-4">
<p class="text-secondary text-sm"><%= t(".description") %></p>

<%= styled_form_with model: Setting.new,
url: settings_hosting_path,
method: :patch,
class: "space-y-4",
data: {
controller: "auto-submit-form",
"auto-submit-form-trigger-event-value": "blur"
} do |form| %>
<%= form.number_field :llm_context_window,
label: t(".context_window_label"),
placeholder: "2048",
value: Setting.llm_context_window,
min: 256,
disabled: ENV["LLM_CONTEXT_WINDOW"].present?,
data: { "auto-submit-form-target": "auto" } %>
<p class="text-xs text-secondary mt-1 mb-3"><%= t(".context_window_help") %></p>

<%= form.number_field :llm_max_response_tokens,
label: t(".max_response_tokens_label"),
placeholder: "512",
value: Setting.llm_max_response_tokens,
min: 64,
disabled: ENV["LLM_MAX_RESPONSE_TOKENS"].present?,
data: { "auto-submit-form-target": "auto" } %>
<p class="text-xs text-secondary mt-1 mb-3"><%= t(".max_response_tokens_help") %></p>

<%= form.number_field :llm_max_items_per_call,
label: t(".max_items_per_call_label"),
placeholder: "25",
value: Setting.llm_max_items_per_call,
min: 1,
disabled: ENV["LLM_MAX_ITEMS_PER_CALL"].present?,
data: { "auto-submit-form-target": "auto" } %>
<p class="text-xs text-secondary mt-1"><%= t(".max_items_per_call_help") %></p>
<% end %>
</div>
32 changes: 0 additions & 32 deletions app/views/settings/hostings/_openai_settings.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -63,37 +63,5 @@
{ disabled: ENV["LLM_JSON_MODE"].present?,
data: { "auto-submit-form-target": "auto" } } %>
<p class="text-xs text-secondary mt-1"><%= t(".json_mode_help") %></p>

<div class="pt-4 border-t border-secondary">
<h3 class="font-medium mb-1"><%= t(".budget_heading") %></h3>
<p class="text-xs text-secondary mb-3"><%= t(".budget_description") %></p>

<%= form.number_field :llm_context_window,
label: t(".context_window_label"),
placeholder: "2048",
value: Setting.llm_context_window,
min: 256,
disabled: ENV["LLM_CONTEXT_WINDOW"].present?,
data: { "auto-submit-form-target": "auto" } %>
<p class="text-xs text-secondary mt-1 mb-3"><%= t(".context_window_help") %></p>

<%= form.number_field :llm_max_response_tokens,
label: t(".max_response_tokens_label"),
placeholder: "512",
value: Setting.llm_max_response_tokens,
min: 64,
disabled: ENV["LLM_MAX_RESPONSE_TOKENS"].present?,
data: { "auto-submit-form-target": "auto" } %>
<p class="text-xs text-secondary mt-1 mb-3"><%= t(".max_response_tokens_help") %></p>

<%= form.number_field :llm_max_items_per_call,
label: t(".max_items_per_call_label"),
placeholder: "25",
value: Setting.llm_max_items_per_call,
min: 1,
disabled: ENV["LLM_MAX_ITEMS_PER_CALL"].present?,
data: { "auto-submit-form-target": "auto" } %>
<p class="text-xs text-secondary mt-1"><%= t(".max_items_per_call_help") %></p>
</div>
<% end %>
</div>
18 changes: 15 additions & 3 deletions app/views/settings/hostings/show.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,24 @@
<%= settings_section title: t(".ai_assistant") do %>
<%= render "settings/hostings/assistant_settings" %>
<% end %>
<%= settings_section title: t(".general") do %>
<div class="space-y-6">
<%= settings_section title: t(".ai_provider") do %>
<p class="text-sm text-secondary mb-6"><%= t(".ai_provider_description") %></p>
<div class="space-y-6">
<%= render "settings/hostings/openai_settings" %>
<%= render "settings/hostings/brand_fetch_settings" %>
<div class="relative flex items-center py-1">
<div class="flex-grow border-t border-secondary"></div>
<span class="flex-shrink mx-4 text-xs text-secondary uppercase tracking-wider"><%= t(".ai_provider_or") %></span>
<div class="flex-grow border-t border-secondary"></div>
</div>
<%= render "settings/hostings/gemini_settings" %>
</div>
<% end %>
<%= settings_section title: t(".llm_token_budget") do %>
<%= render "settings/hostings/llm_token_budget" %>
<% end %>
<%= settings_section title: t(".general") do %>
<%= render "settings/hostings/brand_fetch_settings" %>
<% end %>
<%= settings_section title: t(".financial_data_providers") do %>
<div class="space-y-6">
<%= render "settings/hostings/provider_selection" %>
Expand Down
2 changes: 1 addition & 1 deletion config/locales/views/chats/en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ en:
ai_consent:
title: "Enable AI Chats"
available_description: "AI chat can answer financial questions and provide insights based on your data. To use this feature you'll need to explicitly enable it."
unavailable_description_html: "To use the AI assistant, you need to set the <code class=\"bg-surface-inset px-1 py-0.5 rounded font-mono text-xs\">OPENAI_ACCESS_TOKEN</code> environment variable or configure it in the Self-Hosting settings of your instance."
unavailable_description_html: "To use the AI assistant, configure an AI provider in the <a href=\"/settings/hosting\" class=\"underline\">Self-Hosting settings</a>. You can use OpenAI (<code class=\"bg-surface-inset px-1 py-0.5 rounded font-mono text-xs\">OPENAI_ACCESS_TOKEN</code>) or Google Gemini (<code class=\"bg-surface-inset px-1 py-0.5 rounded font-mono text-xs\">GEMINI_API_KEY</code>)."
enable_button: "Enable AI Chats"
disable_note: "Disable anytime. All data sent to our LLM providers is anonymized."
assistant_messages:
Expand Down
30 changes: 22 additions & 8 deletions config/locales/views/settings/hostings/en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ en:
show:
general: General Settings
ai_assistant: AI Assistant
ai_provider: AI Provider
ai_provider_description: "You only need one. Configure either OpenAI or Google Gemini — if both are set, OpenAI takes priority."
ai_provider_or: "or"
llm_token_budget: AI Token Budget
financial_data_providers: Financial Data Providers
sync_settings: Sync Settings
invites: Invite Codes
Expand Down Expand Up @@ -90,6 +94,24 @@ en:
title: Brand Fetch Settings
high_res_label: Enable high-resolution logos
high_res_description: When enabled, logos will be retrieved at 120x120 resolution instead of 40x40. This provides sharper images on high-DPI displays.
llm_token_budget:
description: Applies to every LLM call regardless of provider — chat history, auto-categorization, merchant detection, and PDF processing. Defaults are conservative for small-context local models. Raise for cloud models (OpenAI, Gemini) with large context windows.
context_window_label: Context Window (Optional)
context_window_help: "Total tokens the model will accept. Default: 2048 — raise to 8192+ for cloud OpenAI or large-context local models."
max_response_tokens_label: Max Response Tokens (Optional)
max_response_tokens_help: "Tokens reserved for the model's reply. Default: 512. Lower to free up room for longer history."
max_items_per_call_label: Max Items Per Batch (Optional)
max_items_per_call_help: "Upper bound for auto-categorize / merchant detection batches. Default: 25. Larger batches are auto-sliced to fit the context window."
gemini_settings:
title: Google Gemini
description: Enter your Google Gemini API key to use Gemini as your AI provider. When set (and no OpenAI key is configured), all AI features — chat, auto-categorization, and merchant detection — will use Gemini.
env_configured_message: Successfully configured through the GEMINI_API_KEY environment variable.
api_key_label: API Key
api_key_placeholder: Enter your Gemini API key here
api_key_help_html: 'Get your API key from <a href="https://aistudio.google.com/apikey" target="_blank" rel="noopener noreferrer" class="underline">Google AI Studio</a>. The free tier includes generous usage limits. Defaults to <code>gemini-2.5-flash</code>.'
model_label: Model (Optional)
model_placeholder: "gemini-2.5-flash (default)"
model_help_html: 'Leave blank for <code>gemini-2.5-flash</code> (recommended). Smarter: <code>gemini-2.5-pro</code>. Cheaper/faster: <code>gemini-2.5-flash-lite</code>. Thinking: <code>gemini-2.0-flash-thinking-exp-01-21</code>. <a href="https://ai.google.dev/gemini-api/docs/models/gemini" target="_blank" rel="noopener noreferrer" class="underline">All models →</a>'
openai_settings:
description: Enter the access token and optionally configure a custom OpenAI-compatible provider
env_configured_message: Successfully configured through environment variables.
Expand All @@ -105,14 +127,6 @@ en:
json_mode_none: None (best for standard models)
json_mode_json_object: JSON Object
json_mode_help: "Strict mode works best with thinking models (qwen-thinking, deepseek-reasoner). None mode works best with standard models (llama, mistral, gpt-oss)."
budget_heading: Token Budget
budget_description: Applies to every LLM call — chat history, auto-categorization, merchant detection, and PDF processing. Defaults are conservative for small-context local models. Raise for cloud models with larger context windows.
context_window_label: Context Window (Optional)
context_window_help: "Total tokens the model will accept. Default: 2048 — raise to 8192+ for cloud OpenAI or large-context local models."
max_response_tokens_label: Max Response Tokens (Optional)
max_response_tokens_help: "Tokens reserved for the model's reply. Default: 512. Lower to free up room for longer history."
max_items_per_call_label: Max Items Per Batch (Optional)
max_items_per_call_help: "Upper bound for auto-categorize / merchant detection batches. Default: 25. Larger batches are auto-sliced to fit the context window."
title: OpenAI
yahoo_finance_settings:
title: Yahoo Finance
Expand Down
Loading
Loading