diff --git a/sdk_v2/js/src/openai/audioClient.ts b/sdk_v2/js/src/openai/audioClient.ts index 6eb2836..98a4540 100644 --- a/sdk_v2/js/src/openai/audioClient.ts +++ b/sdk_v2/js/src/openai/audioClient.ts @@ -1,5 +1,38 @@ import { CoreInterop } from '../detail/coreInterop.js'; +export class AudioClientSettings { + language?: string; + temperature?: number; + + /** + * Serializes the settings into an OpenAI-compatible request object. + * @internal + */ + _serialize() { + // Standard OpenAI properties + const result: any = { + Language: this.language, + Temperature: this.temperature, + }; + + // Foundry specific metadata properties + const metadata: Record = {}; + if (this.language !== undefined) { + metadata["language"] = this.language; + } + if (this.temperature !== undefined) { + metadata["temperature"] = this.temperature.toString(); + } + + if (Object.keys(metadata).length > 0) { + result.metadata = metadata; + } + + // Filter out undefined properties + return Object.fromEntries(Object.entries(result).filter(([_, v]) => v !== undefined)); + } +} + /** * Client for performing audio operations (transcription, translation) with a loaded model. * Follows the OpenAI Audio API structure. @@ -7,6 +40,11 @@ import { CoreInterop } from '../detail/coreInterop.js'; export class AudioClient { private modelId: string; private coreInterop: CoreInterop; + + /** + * Configuration settings for audio operations. + */ + public settings = new AudioClientSettings(); constructor(modelId: string, coreInterop: CoreInterop) { this.modelId = modelId; @@ -21,7 +59,8 @@ export class AudioClient { public async transcribe(audioFilePath: string): Promise { const request = { Model: this.modelId, - FileName: audioFilePath + FileName: audioFilePath, + ...this.settings._serialize() }; const response = this.coreInterop.executeCommand("audio_transcribe", { Params: { OpenAICreateRequest: JSON.stringify(request) } }); @@ -37,7 +76,8 @@ export class AudioClient { public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void): Promise { const request = { Model: this.modelId, - FileName: audioFilePath + FileName: audioFilePath, + ...this.settings._serialize() }; await this.coreInterop.executeCommandStreaming( diff --git a/sdk_v2/js/test/openai/audioClient.test.ts b/sdk_v2/js/test/openai/audioClient.test.ts index e0650cd..ff6a356 100644 --- a/sdk_v2/js/test/openai/audioClient.test.ts +++ b/sdk_v2/js/test/openai/audioClient.test.ts @@ -29,6 +29,47 @@ describe('Audio Client Tests', () => { try { const audioClient = model.createAudioClient(); expect(audioClient).to.not.be.undefined; + + audioClient.settings.language = 'en'; + audioClient.settings.temperature = 0.0; // for deterministic results + + const response = await audioClient.transcribe(AUDIO_FILE_PATH); + + expect(response).to.not.be.undefined; + expect(response.text).to.not.be.undefined; + expect(response.text).to.be.a('string'); + expect(response.text.length).to.be.greaterThan(0); + expect(response.text).to.equal(EXPECTED_TEXT); + console.log(`Response: ${response.text}`); + } finally { + await model.unload(); + } + }); + + it('should transcribe audio without streaming with temperature', async function() { + this.timeout(30000); + const manager = getTestManager(); + const catalog = manager.catalog; + + const cachedModels = await catalog.getCachedModels(); + expect(cachedModels.length).to.be.greaterThan(0); + + const cachedVariant = cachedModels.find(m => m.alias === WHISPER_MODEL_ALIAS); + expect(cachedVariant, 'whisper-tiny should be cached').to.not.be.undefined; + + const model = await catalog.getModel(WHISPER_MODEL_ALIAS); + expect(model).to.not.be.undefined; + if (!model || !cachedVariant) return; + + model.selectVariant(cachedVariant.id); + await model.load(); + + try { + const audioClient = model.createAudioClient(); + expect(audioClient).to.not.be.undefined; + + audioClient.settings.language = 'en'; + audioClient.settings.temperature = 0.0; // for deterministic results const response = await audioClient.transcribe(AUDIO_FILE_PATH); @@ -65,6 +106,50 @@ describe('Audio Client Tests', () => { const audioClient = model.createAudioClient(); expect(audioClient).to.not.be.undefined; + audioClient.settings.language = 'en'; + audioClient.settings.temperature = 0.0; // for deterministic results + + let fullResponse = ''; + await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => { + expect(chunk).to.not.be.undefined; + expect(chunk.text).to.not.be.undefined; + expect(chunk.text).to.be.a('string'); + expect(chunk.text.length).to.be.greaterThan(0); + fullResponse += chunk.text; + }); + + console.log(`Full response: ${fullResponse}`); + expect(fullResponse).to.equal(EXPECTED_TEXT); + } finally { + await model.unload(); + } + }); + + it('should transcribe audio with streaming with temperature', async function() { + this.timeout(30000); + const manager = getTestManager(); + const catalog = manager.catalog; + + const cachedModels = await catalog.getCachedModels(); + expect(cachedModels.length).to.be.greaterThan(0); + + const cachedVariant = cachedModels.find(m => m.alias === WHISPER_MODEL_ALIAS); + expect(cachedVariant, 'whisper-tiny should be cached').to.not.be.undefined; + + const model = await catalog.getModel(WHISPER_MODEL_ALIAS); + expect(model).to.not.be.undefined; + if (!model || !cachedVariant) return; + + model.selectVariant(cachedVariant.id); + await model.load(); + + try { + const audioClient = model.createAudioClient(); + expect(audioClient).to.not.be.undefined; + + audioClient.settings.language = 'en'; + audioClient.settings.temperature = 0.0; // for deterministic results + let fullResponse = ''; await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => { expect(chunk).to.not.be.undefined;