Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions sdk_v2/js/src/openai/audioClient.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,50 @@
import { CoreInterop } from '../detail/coreInterop.js';

export class AudioClientSettings {
language?: string;
temperature?: number;

/**
* Serializes the settings into an OpenAI-compatible request object.
* @internal
*/
_serialize() {
// Standard OpenAI properties
const result: any = {
Language: this.language,
Temperature: this.temperature,
};

// Foundry specific metadata properties
const metadata: Record<string, string> = {};
if (this.language !== undefined) {
metadata["language"] = this.language;
}
if (this.temperature !== undefined) {
metadata["temperature"] = this.temperature.toString();
}

if (Object.keys(metadata).length > 0) {
result.metadata = metadata;
}

// Filter out undefined properties
return Object.fromEntries(Object.entries(result).filter(([_, v]) => v !== undefined));
}
}

/**
* Client for performing audio operations (transcription, translation) with a loaded model.
* Follows the OpenAI Audio API structure.
*/
export class AudioClient {
private modelId: string;
private coreInterop: CoreInterop;

/**
* Configuration settings for audio operations.
*/
public settings = new AudioClientSettings();

constructor(modelId: string, coreInterop: CoreInterop) {
this.modelId = modelId;
Expand All @@ -21,7 +59,8 @@ export class AudioClient {
public async transcribe(audioFilePath: string): Promise<any> {
const request = {
Model: this.modelId,
FileName: audioFilePath
FileName: audioFilePath,
...this.settings._serialize()
};

const response = this.coreInterop.executeCommand("audio_transcribe", { Params: { OpenAICreateRequest: JSON.stringify(request) } });
Expand All @@ -37,7 +76,8 @@ export class AudioClient {
public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void): Promise<void> {
const request = {
Model: this.modelId,
FileName: audioFilePath
FileName: audioFilePath,
...this.settings._serialize()
};

await this.coreInterop.executeCommandStreaming(
Expand Down
85 changes: 85 additions & 0 deletions sdk_v2/js/test/openai/audioClient.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,47 @@ describe('Audio Client Tests', () => {
try {
const audioClient = model.createAudioClient();
expect(audioClient).to.not.be.undefined;

audioClient.settings.language = 'en';
audioClient.settings.temperature = 0.0; // for deterministic results

const response = await audioClient.transcribe(AUDIO_FILE_PATH);

expect(response).to.not.be.undefined;
expect(response.text).to.not.be.undefined;
expect(response.text).to.be.a('string');
expect(response.text.length).to.be.greaterThan(0);
expect(response.text).to.equal(EXPECTED_TEXT);
console.log(`Response: ${response.text}`);
} finally {
await model.unload();
}
});

it('should transcribe audio without streaming with temperature', async function() {
this.timeout(30000);
const manager = getTestManager();
const catalog = manager.catalog;

const cachedModels = await catalog.getCachedModels();
expect(cachedModels.length).to.be.greaterThan(0);

const cachedVariant = cachedModels.find(m => m.alias === WHISPER_MODEL_ALIAS);
expect(cachedVariant, 'whisper-tiny should be cached').to.not.be.undefined;

const model = await catalog.getModel(WHISPER_MODEL_ALIAS);
expect(model).to.not.be.undefined;
if (!model || !cachedVariant) return;

model.selectVariant(cachedVariant.id);
await model.load();

try {
const audioClient = model.createAudioClient();
expect(audioClient).to.not.be.undefined;

audioClient.settings.language = 'en';
audioClient.settings.temperature = 0.0; // for deterministic results

const response = await audioClient.transcribe(AUDIO_FILE_PATH);

Expand Down Expand Up @@ -65,6 +106,50 @@ describe('Audio Client Tests', () => {
const audioClient = model.createAudioClient();
expect(audioClient).to.not.be.undefined;

audioClient.settings.language = 'en';
audioClient.settings.temperature = 0.0; // for deterministic results

let fullResponse = '';
await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => {
expect(chunk).to.not.be.undefined;
expect(chunk.text).to.not.be.undefined;
expect(chunk.text).to.be.a('string');
expect(chunk.text.length).to.be.greaterThan(0);
fullResponse += chunk.text;
});

console.log(`Full response: ${fullResponse}`);
expect(fullResponse).to.equal(EXPECTED_TEXT);
} finally {
await model.unload();
}
});

it('should transcribe audio with streaming with temperature', async function() {
this.timeout(30000);
const manager = getTestManager();
const catalog = manager.catalog;

const cachedModels = await catalog.getCachedModels();
expect(cachedModels.length).to.be.greaterThan(0);

const cachedVariant = cachedModels.find(m => m.alias === WHISPER_MODEL_ALIAS);
expect(cachedVariant, 'whisper-tiny should be cached').to.not.be.undefined;

const model = await catalog.getModel(WHISPER_MODEL_ALIAS);
expect(model).to.not.be.undefined;
if (!model || !cachedVariant) return;

model.selectVariant(cachedVariant.id);
await model.load();

try {
const audioClient = model.createAudioClient();
expect(audioClient).to.not.be.undefined;

audioClient.settings.language = 'en';
audioClient.settings.temperature = 0.0; // for deterministic results

let fullResponse = '';
await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => {
expect(chunk).to.not.be.undefined;
Expand Down