From dc89b6b92727ccfd5a5834bdb2c355cc3138326c Mon Sep 17 00:00:00 2001
From: Selena Yang <179177246+selenayang888@users.noreply.github.com>
Date: Fri, 23 Jan 2026 23:38:12 -0800
Subject: [PATCH 1/2] Add multi-language and temperature support in js SDK

---
 sdk_v2/js/src/openai/audioClient.ts       | 12 ++--
 sdk_v2/js/test/openai/audioClient.test.ts | 77 ++++++++++++++++++++++-
 2 files changed, 83 insertions(+), 6 deletions(-)
diff --git a/sdk_v2/js/src/openai/audioClient.ts b/sdk_v2/js/src/openai/audioClient.ts
index 6eb2836..14c17a6 100644
--- a/sdk_v2/js/src/openai/audioClient.ts
+++ b/sdk_v2/js/src/openai/audioClient.ts
@@ -18,10 +18,12 @@ export class AudioClient {
      * @param audioFilePath - Path to the audio file to transcribe.
      * @returns The transcription result.
      */
-    public async transcribe(audioFilePath: string): Promise<any> {
+    public async transcribe(audioFilePath: string, language: string | null = null, temperature: number = 0.0): Promise<any> {
         const request = {
             Model: this.modelId,
-            FileName: audioFilePath
+            FileName: audioFilePath,
+            Language: language,
+            Temperature: temperature
         };
 
         const response = this.coreInterop.executeCommand("audio_transcribe", { Params: { OpenAICreateRequest: JSON.stringify(request) } });
@@ -34,10 +36,12 @@ export class AudioClient {
      * @param callback - A callback function that receives each chunk of the streaming response.
      * @returns A promise that resolves when the stream is complete.
      */
-    public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void): Promise<void> {
+    public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void, language: string | null = null, temperature: number = 0.0): Promise<void> {
         const request = {
             Model: this.modelId,
-            FileName: audioFilePath
+            FileName: audioFilePath,
+            Language: language,
+            Temperature: temperature
         };
         
         await this.coreInterop.executeCommandStreaming(
diff --git a/sdk_v2/js/test/openai/audioClient.test.ts b/sdk_v2/js/test/openai/audioClient.test.ts
index e0650cd..7a6d80d 100644
--- a/sdk_v2/js/test/openai/audioClient.test.ts
+++ b/sdk_v2/js/test/openai/audioClient.test.ts
@@ -30,7 +30,42 @@ describe('Audio Client Tests', () => {
             const audioClient = model.createAudioClient();
             expect(audioClient).to.not.be.undefined;
 
-            const response = await audioClient.transcribe(AUDIO_FILE_PATH);
+            const response = await audioClient.transcribe(AUDIO_FILE_PATH, "en");
+
+            expect(response).to.not.be.undefined;
+            expect(response.text).to.not.be.undefined;
+            expect(response.text).to.be.a('string');
+            expect(response.text.length).to.be.greaterThan(0);
+            expect(response.text).to.equal(EXPECTED_TEXT);
+            console.log(`Response: ${response.text}`);
+        } finally {
+            await model.unload();
+        }
+    });
+
+    it('should transcribe audio without streaming with temperature', async function() {
+        this.timeout(30000);
+        const manager = getTestManager();
+        const catalog = manager.catalog;
+        
+        const cachedModels = await catalog.getCachedModels();
+        expect(cachedModels.length).to.be.greaterThan(0);
+
+        const cachedVariant = cachedModels.find(m => m.alias === WHISPER_MODEL_ALIAS);
+        expect(cachedVariant, 'whisper-tiny should be cached').to.not.be.undefined;
+
+        const model = await catalog.getModel(WHISPER_MODEL_ALIAS);
+        expect(model).to.not.be.undefined;
+        if (!model || !cachedVariant) return;
+
+        model.selectVariant(cachedVariant.id);
+        await model.load();
+        
+        try {
+            const audioClient = model.createAudioClient();
+            expect(audioClient).to.not.be.undefined;
+
+            const response = await audioClient.transcribe(AUDIO_FILE_PATH, "en", 0.1);
 
             expect(response).to.not.be.undefined;
             expect(response.text).to.not.be.undefined;
@@ -72,7 +107,45 @@ describe('Audio Client Tests', () => {
                 expect(chunk.text).to.be.a('string');
                 expect(chunk.text.length).to.be.greaterThan(0);
                 fullResponse += chunk.text;
-            });
+            }, "en");
+
+            console.log(`Full response: ${fullResponse}`);
+            expect(fullResponse).to.equal(EXPECTED_TEXT);
+        } finally {
+            await model.unload();
+        }
+    });
+
+    it('should transcribe audio with streaming with temperature', async function() {
+        this.timeout(30000);
+        const manager = getTestManager();
+        const catalog = manager.catalog;
+        
+        const cachedModels = await catalog.getCachedModels();
+        expect(cachedModels.length).to.be.greaterThan(0);
+
+        const cachedVariant = cachedModels.find(m => m.alias === WHISPER_MODEL_ALIAS);
+        expect(cachedVariant, 'whisper-tiny should be cached').to.not.be.undefined;
+
+        const model = await catalog.getModel(WHISPER_MODEL_ALIAS);
+        expect(model).to.not.be.undefined;
+        if (!model || !cachedVariant) return;
+
+        model.selectVariant(cachedVariant.id);
+        await model.load();
+        
+        try {
+            const audioClient = model.createAudioClient();
+            expect(audioClient).to.not.be.undefined;
+
+            let fullResponse = '';
+            await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => {
+                expect(chunk).to.not.be.undefined;
+                expect(chunk.text).to.not.be.undefined;
+                expect(chunk.text).to.be.a('string');
+                expect(chunk.text.length).to.be.greaterThan(0);
+                fullResponse += chunk.text;
+            }, "en", 0.1);
 
             console.log(`Full response: ${fullResponse}`);
             expect(fullResponse).to.equal(EXPECTED_TEXT);

From bfffd226ae14bdeee7def970bf2f1fad68a24709 Mon Sep 17 00:00:00 2001
From: Selena Yang <179177246+selenayang888@users.noreply.github.com>
Date: Wed, 28 Jan 2026 00:37:57 -0800
Subject: [PATCH 2/2] Modify the audio input to match chat client input

---
 sdk_v2/js/src/openai/audioClient.ts       | 48 ++++++++++++++++++++---
 sdk_v2/js/test/openai/audioClient.test.ts | 20 ++++++++--
 2 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/sdk_v2/js/src/openai/audioClient.ts b/sdk_v2/js/src/openai/audioClient.ts
index 14c17a6..98a4540 100644
--- a/sdk_v2/js/src/openai/audioClient.ts
+++ b/sdk_v2/js/src/openai/audioClient.ts
@@ -1,5 +1,38 @@
 import { CoreInterop } from '../detail/coreInterop.js';
 
+export class AudioClientSettings {
+    language?: string;
+    temperature?: number;
+
+    /**
+     * Serializes the settings into an OpenAI-compatible request object.
+     * @internal
+     */
+    _serialize() {
+        // Standard OpenAI properties
+        const result: any = {
+            Language: this.language,
+            Temperature: this.temperature,
+        };
+
+        // Foundry specific metadata properties
+        const metadata: Record<string, string> = {};
+        if (this.language !== undefined) {
+          metadata["language"] = this.language;
+        }
+        if (this.temperature !== undefined) {
+            metadata["temperature"] = this.temperature.toString();
+        }
+        
+        if (Object.keys(metadata).length > 0) {
+            result.metadata = metadata;
+        }
+
+        // Filter out undefined properties
+        return Object.fromEntries(Object.entries(result).filter(([_, v]) => v !== undefined));
+    }
+}
+
 /**
  * Client for performing audio operations (transcription, translation) with a loaded model.
  * Follows the OpenAI Audio API structure.
@@ -7,6 +40,11 @@ import { CoreInterop } from '../detail/coreInterop.js';
 export class AudioClient {
     private modelId: string;
     private coreInterop: CoreInterop;
+    
+    /**
+     * Configuration settings for audio operations.
+     */
+    public settings = new AudioClientSettings();
 
     constructor(modelId: string, coreInterop: CoreInterop) {
         this.modelId = modelId;
@@ -18,12 +56,11 @@ export class AudioClient {
      * @param audioFilePath - Path to the audio file to transcribe.
      * @returns The transcription result.
      */
-    public async transcribe(audioFilePath: string, language: string | null = null, temperature: number = 0.0): Promise<any> {
+    public async transcribe(audioFilePath: string): Promise<any> {
         const request = {
             Model: this.modelId,
             FileName: audioFilePath,
-            Language: language,
-            Temperature: temperature
+            ...this.settings._serialize()
         };
 
         const response = this.coreInterop.executeCommand("audio_transcribe", { Params: { OpenAICreateRequest: JSON.stringify(request) } });
@@ -36,12 +73,11 @@ export class AudioClient {
      * @param callback - A callback function that receives each chunk of the streaming response.
      * @returns A promise that resolves when the stream is complete.
      */
-    public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void, language: string | null = null, temperature: number = 0.0): Promise<void> {
+    public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void): Promise<void> {
         const request = {
             Model: this.modelId,
             FileName: audioFilePath,
-            Language: language,
-            Temperature: temperature
+            ...this.settings._serialize()
         };
         
         await this.coreInterop.executeCommandStreaming(
diff --git a/sdk_v2/js/test/openai/audioClient.test.ts b/sdk_v2/js/test/openai/audioClient.test.ts
index 7a6d80d..ff6a356 100644
--- a/sdk_v2/js/test/openai/audioClient.test.ts
+++ b/sdk_v2/js/test/openai/audioClient.test.ts
@@ -29,8 +29,11 @@ describe('Audio Client Tests', () => {
         try {
             const audioClient = model.createAudioClient();
             expect(audioClient).to.not.be.undefined;
+            
+            audioClient.settings.language = 'en';
+            audioClient.settings.temperature = 0.0; // for deterministic results
 
-            const response = await audioClient.transcribe(AUDIO_FILE_PATH, "en");
+            const response = await audioClient.transcribe(AUDIO_FILE_PATH);
 
             expect(response).to.not.be.undefined;
             expect(response.text).to.not.be.undefined;
@@ -65,7 +68,10 @@ describe('Audio Client Tests', () => {
             const audioClient = model.createAudioClient();
             expect(audioClient).to.not.be.undefined;
 
-            const response = await audioClient.transcribe(AUDIO_FILE_PATH, "en", 0.1);
+            audioClient.settings.language = 'en';
+            audioClient.settings.temperature = 0.0; // for deterministic results
+
+            const response = await audioClient.transcribe(AUDIO_FILE_PATH);
 
             expect(response).to.not.be.undefined;
             expect(response.text).to.not.be.undefined;
@@ -100,6 +106,9 @@ describe('Audio Client Tests', () => {
             const audioClient = model.createAudioClient();
             expect(audioClient).to.not.be.undefined;
 
+            audioClient.settings.language = 'en';
+            audioClient.settings.temperature = 0.0; // for deterministic results
+
             let fullResponse = '';
             await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => {
                 expect(chunk).to.not.be.undefined;
@@ -107,7 +116,7 @@ describe('Audio Client Tests', () => {
                 expect(chunk.text).to.be.a('string');
                 expect(chunk.text.length).to.be.greaterThan(0);
                 fullResponse += chunk.text;
-            }, "en");
+            });
 
             console.log(`Full response: ${fullResponse}`);
             expect(fullResponse).to.equal(EXPECTED_TEXT);
@@ -138,6 +147,9 @@ describe('Audio Client Tests', () => {
             const audioClient = model.createAudioClient();
             expect(audioClient).to.not.be.undefined;
 
+            audioClient.settings.language = 'en';
+            audioClient.settings.temperature = 0.0; // for deterministic results
+
             let fullResponse = '';
             await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => {
                 expect(chunk).to.not.be.undefined;
@@ -145,7 +157,7 @@ describe('Audio Client Tests', () => {
                 expect(chunk.text).to.be.a('string');
                 expect(chunk.text.length).to.be.greaterThan(0);
                 fullResponse += chunk.text;
-            }, "en", 0.1);
+            });
 
             console.log(`Full response: ${fullResponse}`);
             expect(fullResponse).to.equal(EXPECTED_TEXT);