diff --git a/FirebaseAI/CHANGELOG.md b/FirebaseAI/CHANGELOG.md index 6329e05419a..1d2b651e78c 100644 --- a/FirebaseAI/CHANGELOG.md +++ b/FirebaseAI/CHANGELOG.md @@ -2,6 +2,8 @@ - [fixed] Fixed `Sendable` warnings introduced in the Xcode 26 beta. (#14947) - [added] Added support for setting `title` in string, number and array `Schema` types. (#14971) +- [added] Added support for configuring the "thinking" budget when using Gemini + 2.5 series models. (#14909) # 11.13.0 - [feature] Initial release of the Firebase AI Logic SDK (`FirebaseAI`). This diff --git a/FirebaseAI/Sources/GenerateContentResponse.swift b/FirebaseAI/Sources/GenerateContentResponse.swift index 6d4ba6932ec..cb212e5a616 100644 --- a/FirebaseAI/Sources/GenerateContentResponse.swift +++ b/FirebaseAI/Sources/GenerateContentResponse.swift @@ -26,6 +26,16 @@ public struct GenerateContentResponse: Sendable { /// The total number of tokens across the generated response candidates. public let candidatesTokenCount: Int + /// The number of tokens used by the model's internal "thinking" process. + /// + /// For models that support thinking (like Gemini 2.5 Pro and Flash), this represents the actual + /// number of tokens consumed for reasoning before the model generated a response. For models + /// that do not support thinking, this value will be `0`. + /// + /// When thinking is used, this count will be less than or equal to the `thinkingBudget` set in + /// the ``ThinkingConfig``. + public let thoughtsTokenCount: Int + /// The total number of tokens in both the request and response. public let totalTokenCount: Int @@ -330,6 +340,7 @@ extension GenerateContentResponse.UsageMetadata: Decodable { enum CodingKeys: CodingKey { case promptTokenCount case candidatesTokenCount + case thoughtsTokenCount case totalTokenCount case promptTokensDetails case candidatesTokensDetails @@ -340,6 +351,7 @@ extension GenerateContentResponse.UsageMetadata: Decodable { promptTokenCount = try container.decodeIfPresent(Int.self, forKey: .promptTokenCount) ?? 0 candidatesTokenCount = try container.decodeIfPresent(Int.self, forKey: .candidatesTokenCount) ?? 0 + thoughtsTokenCount = try container.decodeIfPresent(Int.self, forKey: .thoughtsTokenCount) ?? 0 totalTokenCount = try container.decodeIfPresent(Int.self, forKey: .totalTokenCount) ?? 0 promptTokensDetails = try container.decodeIfPresent([ModalityTokenCount].self, forKey: .promptTokensDetails) ?? [] diff --git a/FirebaseAI/Sources/GenerationConfig.swift b/FirebaseAI/Sources/GenerationConfig.swift index 3daebbae692..27c4310f12d 100644 --- a/FirebaseAI/Sources/GenerationConfig.swift +++ b/FirebaseAI/Sources/GenerationConfig.swift @@ -51,6 +51,9 @@ public struct GenerationConfig: Sendable { /// Supported modalities of the response. let responseModalities: [ResponseModality]? + /// Configuration for controlling the "thinking" behavior of compatible Gemini models. + let thinkingConfig: ThinkingConfig? + /// Creates a new `GenerationConfig` value. /// /// See the @@ -152,11 +155,14 @@ public struct GenerationConfig: Sendable { /// > Warning: Specifying response modalities is a **Public Preview** feature, which means /// > that it is not subject to any SLA or deprecation policy and could change in /// > backwards-incompatible ways. + /// - thinkingConfig: Configuration for controlling the "thinking" behavior of compatible Gemini + /// models; see ``ThinkingConfig`` for more details. public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, candidateCount: Int? = nil, maxOutputTokens: Int? = nil, presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, stopSequences: [String]? = nil, responseMIMEType: String? = nil, - responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil) { + responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil, + thinkingConfig: ThinkingConfig? = nil) { // Explicit init because otherwise if we re-arrange the above variables it changes the API // surface. self.temperature = temperature @@ -170,6 +176,7 @@ public struct GenerationConfig: Sendable { self.responseMIMEType = responseMIMEType self.responseSchema = responseSchema self.responseModalities = responseModalities + self.thinkingConfig = thinkingConfig } } @@ -189,5 +196,6 @@ extension GenerationConfig: Encodable { case responseMIMEType = "responseMimeType" case responseSchema case responseModalities + case thinkingConfig } } diff --git a/FirebaseAI/Sources/Types/Public/ThinkingConfig.swift b/FirebaseAI/Sources/Types/Public/ThinkingConfig.swift new file mode 100644 index 00000000000..c0e8f31465b --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/ThinkingConfig.swift @@ -0,0 +1,51 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Configuration for controlling the "thinking" behavior of compatible Gemini models. +/// +/// Certain models, like Gemini 2.5 Flash and Pro, utilize a thinking process before generating a +/// response. This allows them to reason through complex problems and plan a more coherent and +/// accurate answer. +public struct ThinkingConfig: Sendable { + /// The thinking budget in tokens. + /// + /// This parameter sets an upper limit on the number of tokens the model can use for its internal + /// "thinking" process. A higher budget may result in better quality responses for complex tasks + /// but can also increase latency and cost. + /// + /// If you don't specify a budget (`nil`), the model will automatically determine the appropriate + /// amount of thinking based on the complexity of the prompt. + /// + /// **Model-Specific Behavior:** + /// - **Gemini 2.5 Flash:** The budget can range from `0` to `24576`. Setting the budget to `0` + /// disables the thinking process, which prioritizes the lowest latency and cost. + /// - **Gemini 2.5 Pro:** The budget must be an integer between `128` and `32768`. Thinking cannot + /// be disabled for this model. + /// + /// An error will be thrown if you set a thinking budget for a model that does not support this + /// feature or if the specified budget is not within the model's supported range. + let thinkingBudget: Int? + + /// Initializes a new `ThinkingConfig`. + /// + /// - Parameters: + /// - thinkingBudget: The maximum number of tokens to be used for the model's thinking process. + public init(thinkingBudget: Int? = nil) { + self.thinkingBudget = thinkingBudget + } +} + +// MARK: - Codable Conformances + +extension ThinkingConfig: Encodable {} diff --git a/FirebaseAI/Tests/TestApp/Sources/Constants.swift b/FirebaseAI/Tests/TestApp/Sources/Constants.swift index 1010b27cee3..71305646ab3 100644 --- a/FirebaseAI/Tests/TestApp/Sources/Constants.swift +++ b/FirebaseAI/Tests/TestApp/Sources/Constants.swift @@ -24,5 +24,7 @@ public enum ModelNames { public static let gemini2Flash = "gemini-2.0-flash-001" public static let gemini2FlashLite = "gemini-2.0-flash-lite-001" public static let gemini2FlashExperimental = "gemini-2.0-flash-exp" + public static let gemini2_5_FlashPreview = "gemini-2.5-flash-preview-05-20" + public static let gemini2_5_ProPreview = "gemini-2.5-pro-preview-06-05" public static let gemma3_4B = "gemma-3-4b-it" } diff --git a/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift b/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift index 8513ddfa484..962645d7ee3 100644 --- a/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift +++ b/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift @@ -76,6 +76,7 @@ struct GenerateContentIntegrationTests { let promptTokensDetails = try #require(usageMetadata.promptTokensDetails.first) #expect(promptTokensDetails.modality == .text) #expect(promptTokensDetails.tokenCount == usageMetadata.promptTokenCount) + #expect(usageMetadata.thoughtsTokenCount == 0) // The fields `candidatesTokenCount` and `candidatesTokensDetails` are not included when using // Gemma models. if modelName.hasPrefix("gemma") { @@ -119,6 +120,7 @@ struct GenerateContentIntegrationTests { let usageMetadata = try #require(response.usageMetadata) #expect(usageMetadata.promptTokenCount.isEqual(to: 15, accuracy: tokenCountAccuracy)) #expect(usageMetadata.candidatesTokenCount.isEqual(to: 1, accuracy: tokenCountAccuracy)) + #expect(usageMetadata.thoughtsTokenCount == 0) #expect(usageMetadata.totalTokenCount == usageMetadata.promptTokenCount + usageMetadata.candidatesTokenCount) #expect(usageMetadata.promptTokensDetails.count == 1) @@ -131,6 +133,68 @@ struct GenerateContentIntegrationTests { #expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount) } + @Test(arguments: [ + (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_FlashPreview, 0), + (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_FlashPreview, 24576), + // TODO: Add Vertex AI Gemini 2.5 Pro tests when available. + // (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_ProPreview, 128), + // (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2_5_ProPreview, 32768), + (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashPreview, 0), + (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashPreview, 24576), + (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_ProPreview, 128), + (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_ProPreview, 32768), + (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2_5_FlashPreview, 0), + (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2_5_FlashPreview, 24576), + ]) + func generateContentThinking(_ config: InstanceConfig, modelName: String, + thinkingBudget: Int) async throws { + let model = FirebaseAI.componentInstance(config).generativeModel( + modelName: modelName, + generationConfig: GenerationConfig( + temperature: 0.0, + topP: 0.0, + topK: 1, + thinkingConfig: ThinkingConfig(thinkingBudget: thinkingBudget) + ), + safetySettings: safetySettings + ) + let prompt = "Where is Google headquarters located? Answer with the city name only." + + let response = try await model.generateContent(prompt) + + let text = try #require(response.text).trimmingCharacters(in: .whitespacesAndNewlines) + #expect(text == "Mountain View") + + let usageMetadata = try #require(response.usageMetadata) + #expect(usageMetadata.promptTokenCount.isEqual(to: 13, accuracy: tokenCountAccuracy)) + #expect(usageMetadata.promptTokensDetails.count == 1) + let promptTokensDetails = try #require(usageMetadata.promptTokensDetails.first) + #expect(promptTokensDetails.modality == .text) + #expect(promptTokensDetails.tokenCount == usageMetadata.promptTokenCount) + if thinkingBudget == 0 { + #expect(usageMetadata.thoughtsTokenCount == 0) + } else { + #expect(usageMetadata.thoughtsTokenCount <= thinkingBudget) + } + #expect(usageMetadata.candidatesTokenCount.isEqual(to: 3, accuracy: tokenCountAccuracy)) + // The `candidatesTokensDetails` field is erroneously omitted when using the Google AI (Gemini + // Developer API) backend. + if case .googleAI = config.apiConfig.service { + #expect(usageMetadata.candidatesTokensDetails.isEmpty) + } else { + #expect(usageMetadata.candidatesTokensDetails.count == 1) + let candidatesTokensDetails = try #require(usageMetadata.candidatesTokensDetails.first) + #expect(candidatesTokensDetails.modality == .text) + #expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount) + } + #expect(usageMetadata.totalTokenCount > 0) + #expect(usageMetadata.totalTokenCount == ( + usageMetadata.promptTokenCount + + usageMetadata.thoughtsTokenCount + + usageMetadata.candidatesTokenCount + )) + } + @Test(arguments: [ InstanceConfig.vertexAI_v1beta, InstanceConfig.googleAI_v1beta, diff --git a/FirebaseAI/Tests/TestApp/Tests/Utilities/InstanceConfig.swift b/FirebaseAI/Tests/TestApp/Tests/Utilities/InstanceConfig.swift index 82f345d99fc..fbea0796f26 100644 --- a/FirebaseAI/Tests/TestApp/Tests/Utilities/InstanceConfig.swift +++ b/FirebaseAI/Tests/TestApp/Tests/Utilities/InstanceConfig.swift @@ -32,6 +32,10 @@ struct InstanceConfig: Equatable, Encodable { static let googleAI_v1beta_staging = InstanceConfig( apiConfig: APIConfig(service: .googleAI(endpoint: .firebaseProxyStaging), version: .v1beta) ) + static let googleAI_v1beta_freeTier = InstanceConfig( + appName: FirebaseAppNames.spark, + apiConfig: APIConfig(service: .googleAI(endpoint: .firebaseProxyProd), version: .v1beta) + ) static let googleAI_v1beta_freeTier_bypassProxy = InstanceConfig( appName: FirebaseAppNames.spark, apiConfig: APIConfig(service: .googleAI(endpoint: .googleAIBypassProxy), version: .v1beta)