From dc89b6b92727ccfd5a5834bdb2c355cc3138326c Mon Sep 17 00:00:00 2001 From: Selena Yang <179177246+selenayang888@users.noreply.github.com> Date: Fri, 23 Jan 2026 23:38:12 -0800 Subject: [PATCH 1/2] Add multi-language and temperature support in js SDK --- sdk_v2/js/src/openai/audioClient.ts | 12 ++-- sdk_v2/js/test/openai/audioClient.test.ts | 77 ++++++++++++++++++++++- 2 files changed, 83 insertions(+), 6 deletions(-) diff --git a/sdk_v2/js/src/openai/audioClient.ts b/sdk_v2/js/src/openai/audioClient.ts index 6eb2836..14c17a6 100644 --- a/sdk_v2/js/src/openai/audioClient.ts +++ b/sdk_v2/js/src/openai/audioClient.ts @@ -18,10 +18,12 @@ export class AudioClient { * @param audioFilePath - Path to the audio file to transcribe. * @returns The transcription result. */ - public async transcribe(audioFilePath: string): Promise { + public async transcribe(audioFilePath: string, language: string | null = null, temperature: number = 0.0): Promise { const request = { Model: this.modelId, - FileName: audioFilePath + FileName: audioFilePath, + Language: language, + Temperature: temperature }; const response = this.coreInterop.executeCommand("audio_transcribe", { Params: { OpenAICreateRequest: JSON.stringify(request) } }); @@ -34,10 +36,12 @@ export class AudioClient { * @param callback - A callback function that receives each chunk of the streaming response. * @returns A promise that resolves when the stream is complete. */ - public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void): Promise { + public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void, language: string | null = null, temperature: number = 0.0): Promise { const request = { Model: this.modelId, - FileName: audioFilePath + FileName: audioFilePath, + Language: language, + Temperature: temperature }; await this.coreInterop.executeCommandStreaming( diff --git a/sdk_v2/js/test/openai/audioClient.test.ts b/sdk_v2/js/test/openai/audioClient.test.ts index e0650cd..7a6d80d 100644 --- a/sdk_v2/js/test/openai/audioClient.test.ts +++ b/sdk_v2/js/test/openai/audioClient.test.ts @@ -30,7 +30,42 @@ describe('Audio Client Tests', () => { const audioClient = model.createAudioClient(); expect(audioClient).to.not.be.undefined; - const response = await audioClient.transcribe(AUDIO_FILE_PATH); + const response = await audioClient.transcribe(AUDIO_FILE_PATH, "en"); + + expect(response).to.not.be.undefined; + expect(response.text).to.not.be.undefined; + expect(response.text).to.be.a('string'); + expect(response.text.length).to.be.greaterThan(0); + expect(response.text).to.equal(EXPECTED_TEXT); + console.log(`Response: ${response.text}`); + } finally { + await model.unload(); + } + }); + + it('should transcribe audio without streaming with temperature', async function() { + this.timeout(30000); + const manager = getTestManager(); + const catalog = manager.catalog; + + const cachedModels = await catalog.getCachedModels(); + expect(cachedModels.length).to.be.greaterThan(0); + + const cachedVariant = cachedModels.find(m => m.alias === WHISPER_MODEL_ALIAS); + expect(cachedVariant, 'whisper-tiny should be cached').to.not.be.undefined; + + const model = await catalog.getModel(WHISPER_MODEL_ALIAS); + expect(model).to.not.be.undefined; + if (!model || !cachedVariant) return; + + model.selectVariant(cachedVariant.id); + await model.load(); + + try { + const audioClient = model.createAudioClient(); + expect(audioClient).to.not.be.undefined; + + const response = await audioClient.transcribe(AUDIO_FILE_PATH, "en", 0.1); expect(response).to.not.be.undefined; expect(response.text).to.not.be.undefined; @@ -72,7 +107,45 @@ describe('Audio Client Tests', () => { expect(chunk.text).to.be.a('string'); expect(chunk.text.length).to.be.greaterThan(0); fullResponse += chunk.text; - }); + }, "en"); + + console.log(`Full response: ${fullResponse}`); + expect(fullResponse).to.equal(EXPECTED_TEXT); + } finally { + await model.unload(); + } + }); + + it('should transcribe audio with streaming with temperature', async function() { + this.timeout(30000); + const manager = getTestManager(); + const catalog = manager.catalog; + + const cachedModels = await catalog.getCachedModels(); + expect(cachedModels.length).to.be.greaterThan(0); + + const cachedVariant = cachedModels.find(m => m.alias === WHISPER_MODEL_ALIAS); + expect(cachedVariant, 'whisper-tiny should be cached').to.not.be.undefined; + + const model = await catalog.getModel(WHISPER_MODEL_ALIAS); + expect(model).to.not.be.undefined; + if (!model || !cachedVariant) return; + + model.selectVariant(cachedVariant.id); + await model.load(); + + try { + const audioClient = model.createAudioClient(); + expect(audioClient).to.not.be.undefined; + + let fullResponse = ''; + await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => { + expect(chunk).to.not.be.undefined; + expect(chunk.text).to.not.be.undefined; + expect(chunk.text).to.be.a('string'); + expect(chunk.text.length).to.be.greaterThan(0); + fullResponse += chunk.text; + }, "en", 0.1); console.log(`Full response: ${fullResponse}`); expect(fullResponse).to.equal(EXPECTED_TEXT); From bfffd226ae14bdeee7def970bf2f1fad68a24709 Mon Sep 17 00:00:00 2001 From: Selena Yang <179177246+selenayang888@users.noreply.github.com> Date: Wed, 28 Jan 2026 00:37:57 -0800 Subject: [PATCH 2/2] Modify the audio input to match chat client input --- sdk_v2/js/src/openai/audioClient.ts | 48 ++++++++++++++++++++--- sdk_v2/js/test/openai/audioClient.test.ts | 20 ++++++++-- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/sdk_v2/js/src/openai/audioClient.ts b/sdk_v2/js/src/openai/audioClient.ts index 14c17a6..98a4540 100644 --- a/sdk_v2/js/src/openai/audioClient.ts +++ b/sdk_v2/js/src/openai/audioClient.ts @@ -1,5 +1,38 @@ import { CoreInterop } from '../detail/coreInterop.js'; +export class AudioClientSettings { + language?: string; + temperature?: number; + + /** + * Serializes the settings into an OpenAI-compatible request object. + * @internal + */ + _serialize() { + // Standard OpenAI properties + const result: any = { + Language: this.language, + Temperature: this.temperature, + }; + + // Foundry specific metadata properties + const metadata: Record = {}; + if (this.language !== undefined) { + metadata["language"] = this.language; + } + if (this.temperature !== undefined) { + metadata["temperature"] = this.temperature.toString(); + } + + if (Object.keys(metadata).length > 0) { + result.metadata = metadata; + } + + // Filter out undefined properties + return Object.fromEntries(Object.entries(result).filter(([_, v]) => v !== undefined)); + } +} + /** * Client for performing audio operations (transcription, translation) with a loaded model. * Follows the OpenAI Audio API structure. @@ -7,6 +40,11 @@ import { CoreInterop } from '../detail/coreInterop.js'; export class AudioClient { private modelId: string; private coreInterop: CoreInterop; + + /** + * Configuration settings for audio operations. + */ + public settings = new AudioClientSettings(); constructor(modelId: string, coreInterop: CoreInterop) { this.modelId = modelId; @@ -18,12 +56,11 @@ export class AudioClient { * @param audioFilePath - Path to the audio file to transcribe. * @returns The transcription result. */ - public async transcribe(audioFilePath: string, language: string | null = null, temperature: number = 0.0): Promise { + public async transcribe(audioFilePath: string): Promise { const request = { Model: this.modelId, FileName: audioFilePath, - Language: language, - Temperature: temperature + ...this.settings._serialize() }; const response = this.coreInterop.executeCommand("audio_transcribe", { Params: { OpenAICreateRequest: JSON.stringify(request) } }); @@ -36,12 +73,11 @@ export class AudioClient { * @param callback - A callback function that receives each chunk of the streaming response. * @returns A promise that resolves when the stream is complete. */ - public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void, language: string | null = null, temperature: number = 0.0): Promise { + public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void): Promise { const request = { Model: this.modelId, FileName: audioFilePath, - Language: language, - Temperature: temperature + ...this.settings._serialize() }; await this.coreInterop.executeCommandStreaming( diff --git a/sdk_v2/js/test/openai/audioClient.test.ts b/sdk_v2/js/test/openai/audioClient.test.ts index 7a6d80d..ff6a356 100644 --- a/sdk_v2/js/test/openai/audioClient.test.ts +++ b/sdk_v2/js/test/openai/audioClient.test.ts @@ -29,8 +29,11 @@ describe('Audio Client Tests', () => { try { const audioClient = model.createAudioClient(); expect(audioClient).to.not.be.undefined; + + audioClient.settings.language = 'en'; + audioClient.settings.temperature = 0.0; // for deterministic results - const response = await audioClient.transcribe(AUDIO_FILE_PATH, "en"); + const response = await audioClient.transcribe(AUDIO_FILE_PATH); expect(response).to.not.be.undefined; expect(response.text).to.not.be.undefined; @@ -65,7 +68,10 @@ describe('Audio Client Tests', () => { const audioClient = model.createAudioClient(); expect(audioClient).to.not.be.undefined; - const response = await audioClient.transcribe(AUDIO_FILE_PATH, "en", 0.1); + audioClient.settings.language = 'en'; + audioClient.settings.temperature = 0.0; // for deterministic results + + const response = await audioClient.transcribe(AUDIO_FILE_PATH); expect(response).to.not.be.undefined; expect(response.text).to.not.be.undefined; @@ -100,6 +106,9 @@ describe('Audio Client Tests', () => { const audioClient = model.createAudioClient(); expect(audioClient).to.not.be.undefined; + audioClient.settings.language = 'en'; + audioClient.settings.temperature = 0.0; // for deterministic results + let fullResponse = ''; await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => { expect(chunk).to.not.be.undefined; @@ -107,7 +116,7 @@ describe('Audio Client Tests', () => { expect(chunk.text).to.be.a('string'); expect(chunk.text.length).to.be.greaterThan(0); fullResponse += chunk.text; - }, "en"); + }); console.log(`Full response: ${fullResponse}`); expect(fullResponse).to.equal(EXPECTED_TEXT); @@ -138,6 +147,9 @@ describe('Audio Client Tests', () => { const audioClient = model.createAudioClient(); expect(audioClient).to.not.be.undefined; + audioClient.settings.language = 'en'; + audioClient.settings.temperature = 0.0; // for deterministic results + let fullResponse = ''; await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => { expect(chunk).to.not.be.undefined; @@ -145,7 +157,7 @@ describe('Audio Client Tests', () => { expect(chunk.text).to.be.a('string'); expect(chunk.text.length).to.be.greaterThan(0); fullResponse += chunk.text; - }, "en", 0.1); + }); console.log(`Full response: ${fullResponse}`); expect(fullResponse).to.equal(EXPECTED_TEXT);