diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAIChatCompletionServiceTests.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAIChatCompletionServiceTests.cs index 844501f729fa..4c306fc630a7 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAIChatCompletionServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAIChatCompletionServiceTests.cs @@ -273,6 +273,39 @@ public async Task GetChatMessageContentsHandlesResponseFormatCorrectlyAsync(obje Assert.Equal(expectedResponseType, content.GetProperty("response_format").GetProperty("type").GetString()); } + [Theory] + [InlineData(true, "max_completion_tokens")] + [InlineData(false, "max_tokens")] + public async Task GetChatMessageContentsHandlesMaxTokensCorrectlyAsync(bool useNewMaxTokens, string expectedPropertyName) + { + // Arrange + var service = new AzureOpenAIChatCompletionService("deployment", "https://endpoint", "api-key", "model-id", this._httpClient); + var settings = new AzureOpenAIPromptExecutionSettings + { + SetNewMaxCompletionTokensEnabled = useNewMaxTokens, + MaxTokens = 123 + }; + + using var responseMessage = new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new StringContent(AzureOpenAITestHelper.GetTestResponse("chat_completion_test_response.json")) + }; + this._messageHandlerStub.ResponsesToReturn.Add(responseMessage); + + // Act + var result = await service.GetChatMessageContentsAsync(new ChatHistory("System message"), settings); + + // Assert + var requestContent = this._messageHandlerStub.RequestContents[0]; + + Assert.NotNull(requestContent); + + var content = JsonSerializer.Deserialize(Encoding.UTF8.GetString(requestContent)); + + Assert.True(content.TryGetProperty(expectedPropertyName, out var propertyValue)); + Assert.Equal(123, propertyValue.GetInt32()); + } + [Theory] [InlineData(null, null)] [InlineData("string", "low")] diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs index 6b4b16c574af..8f3b9a245634 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Settings/AzureOpenAIPromptExecutionSettingsTests.cs @@ -35,6 +35,7 @@ public void ItCreatesOpenAIExecutionSettingsWithCorrectDefaults() Assert.Null(executionSettings.TopLogprobs); Assert.Null(executionSettings.Logprobs); Assert.Null(executionSettings.AzureChatDataSource); + Assert.False(executionSettings.SetNewMaxCompletionTokensEnabled); Assert.Equal(maxTokensSettings, executionSettings.MaxTokens); Assert.Null(executionSettings.Store); Assert.Null(executionSettings.Metadata); @@ -58,7 +59,8 @@ public void ItUsesExistingOpenAIExecutionSettings() TokenSelectionBiases = new Dictionary() { { 1, 2 }, { 3, 4 } }, Seed = 123456, Store = true, - Metadata = new Dictionary() { { "foo", "bar" } } + Metadata = new Dictionary() { { "foo", "bar" } }, + SetNewMaxCompletionTokensEnabled = true, }; // Act @@ -74,6 +76,7 @@ public void ItUsesExistingOpenAIExecutionSettings() Assert.Equal(actualSettings.Seed, executionSettings.Seed); Assert.Equal(actualSettings.Store, executionSettings.Store); Assert.Equal(actualSettings.Metadata, executionSettings.Metadata); + Assert.Equal(actualSettings.SetNewMaxCompletionTokensEnabled, executionSettings.SetNewMaxCompletionTokensEnabled); } [Fact] @@ -259,6 +262,7 @@ public void PromptExecutionSettingsFreezeWorksAsExpected() Assert.Throws(() => executionSettings.TokenSelectionBiases?.Add(5, 6)); Assert.Throws(() => executionSettings.Store = false); Assert.Throws(() => executionSettings.Metadata?.Add("bar", "foo")); + Assert.Throws(() => executionSettings.SetNewMaxCompletionTokensEnabled = true); executionSettings!.Freeze(); // idempotent Assert.True(executionSettings.IsFrozen); diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.ChatCompletion.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.ChatCompletion.cs index 687addf37087..a0068ee2a737 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.ChatCompletion.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.ChatCompletion.cs @@ -1,5 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. +using System; +using System.ClientModel.Primitives; using System.Diagnostics; using Azure.AI.OpenAI.Chat; using Microsoft.SemanticKernel.ChatCompletion; @@ -35,23 +37,29 @@ protected override ChatCompletionOptions CreateChatCompletionOptions( { return base.CreateChatCompletionOptions(executionSettings, chatHistory, toolCallingConfig, kernel); } - - var options = new ChatCompletionOptions - { - MaxOutputTokenCount = executionSettings.MaxTokens, - Temperature = (float?)executionSettings.Temperature, - TopP = (float?)executionSettings.TopP, - FrequencyPenalty = (float?)executionSettings.FrequencyPenalty, - PresencePenalty = (float?)executionSettings.PresencePenalty, + ChatCompletionOptions options = ModelReaderWriter.Read(BinaryData.FromString("{}")!)!; + options.MaxOutputTokenCount = executionSettings.MaxTokens; + options.Temperature = (float?)executionSettings.Temperature; + options.TopP = (float?)executionSettings.TopP; + options.FrequencyPenalty = (float?)executionSettings.FrequencyPenalty; + options.PresencePenalty = (float?)executionSettings.PresencePenalty; #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - Seed = executionSettings.Seed, + + options.Seed = executionSettings.Seed; #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - EndUserId = executionSettings.User, - TopLogProbabilityCount = executionSettings.TopLogprobs, - IncludeLogProbabilities = executionSettings.Logprobs, - StoredOutputEnabled = executionSettings.Store, - ReasoningEffortLevel = GetEffortLevel(executionSettings), - }; + options.EndUserId = executionSettings.User; + options.TopLogProbabilityCount = executionSettings.TopLogprobs; + options.IncludeLogProbabilities = executionSettings.Logprobs; + options.StoredOutputEnabled = executionSettings.Store; + options.ReasoningEffortLevel = GetEffortLevel(executionSettings); + options.ResponseModalities = ChatResponseModalities.Default; + + if (azureSettings.SetNewMaxCompletionTokensEnabled) + { +#pragma warning disable AOAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + options.SetNewMaxCompletionTokensPropertyEnabled(true); +#pragma warning restore AOAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + } var responseFormat = GetResponseFormat(executionSettings); if (responseFormat is not null) diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Settings/AzureOpenAIPromptExecutionSettings.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Settings/AzureOpenAIPromptExecutionSettings.cs index 1d00ba3207f5..8852e5fd35df 100644 --- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Settings/AzureOpenAIPromptExecutionSettings.cs +++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Settings/AzureOpenAIPromptExecutionSettings.cs @@ -16,6 +16,26 @@ namespace Microsoft.SemanticKernel.Connectors.AzureOpenAI; [JsonNumberHandling(JsonNumberHandling.AllowReadingFromString)] public sealed class AzureOpenAIPromptExecutionSettings : OpenAIPromptExecutionSettings { + /// + /// Enabling this property will enforce the new max_completion_tokens parameter to be send the Azure OpenAI API. + /// + /// + /// This setting is temporary and flags the underlying Azure SDK to use the new max_completion_tokens parameter using the + /// + /// SetNewMaxCompletionTokensPropertyEnabled extension. + /// + [Experimental("SKEXP0010")] + [JsonIgnore] + public bool SetNewMaxCompletionTokensEnabled + { + get => this._setNewMaxCompletionTokensEnabled; + set + { + this.ThrowIfFrozen(); + this._setNewMaxCompletionTokensEnabled = value; + } + } + /// /// An abstraction of additional settings for chat completion, see https://learn.microsoft.com/en-us/dotnet/api/azure.ai.openai.azurechatextensionsoptions. /// This property is compatible only with Azure OpenAI. @@ -38,6 +58,7 @@ public override PromptExecutionSettings Clone() { var settings = base.Clone(); settings.AzureChatDataSource = this.AzureChatDataSource; + settings.SetNewMaxCompletionTokensEnabled = this.SetNewMaxCompletionTokensEnabled; return settings; } @@ -103,6 +124,7 @@ public static AzureOpenAIPromptExecutionSettings FromExecutionSettingsWithData(P #region private ================================================================================ [Experimental("SKEXP0010")] private AzureSearchChatDataSource? _azureChatDataSource; + private bool _setNewMaxCompletionTokensEnabled; #endregion }