Skip to content
This repository was archived by the owner on Jul 16, 2025. It is now read-only.

feat: add support for whisper on openai #251

Merged
merged 1 commit into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ $embeddings = new Embeddings();
* Embeddings Models
* [OpenAI's Text Embeddings](https://platform.openai.com/docs/guides/embeddings/embedding-models) with [OpenAI](https://platform.openai.com/docs/overview) and [Azure](https://learn.microsoft.com/azure/ai-services/openai/concepts/models) as Platform
* [Voyage's Embeddings](https://docs.voyageai.com/docs/embeddings) with [Voyage](https://www.voyageai.com/) as Platform
* Other Models
* [OpenAI's Dall·E](https://platform.openai.com/docs/guides/image-generation) with [OpenAI](https://platform.openai.com/docs/overview) as Platform
* [OpenAI's Whisper](https://platform.openai.com/docs/guides/speech-to-text) with [OpenAI](https://platform.openai.com/docs/overview) as Platform

See [issue #28](https://github.com/php-llm/llm-chain/issues/28) for planned support of other models and platforms.

Expand Down
22 changes: 22 additions & 0 deletions examples/audio-transcript-whisper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

use PhpLlm\LlmChain\Bridge\OpenAI\PlatformFactory;
use PhpLlm\LlmChain\Bridge\OpenAI\Whisper;
use PhpLlm\LlmChain\Bridge\OpenAI\Whisper\File;
use Symfony\Component\Dotenv\Dotenv;

require_once dirname(__DIR__).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');

if (empty($_ENV['OPENAI_API_KEY'])) {
echo 'Please set the OPENAI_API_KEY environment variable.'.PHP_EOL;
exit(1);
}

$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']);
$model = new Whisper();
$file = new File(dirname(__DIR__).'/tests/Fixture/audio.mp3');

$response = $platform->request($model, $file);

echo $response->getContent().PHP_EOL;
4 changes: 4 additions & 0 deletions src/Bridge/OpenAI/PlatformFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use PhpLlm\LlmChain\Bridge\OpenAI\Embeddings\ResponseConverter as EmbeddingsResponseConverter;
use PhpLlm\LlmChain\Bridge\OpenAI\GPT\ModelClient as GPTModelClient;
use PhpLlm\LlmChain\Bridge\OpenAI\GPT\ResponseConverter as GPTResponseConverter;
use PhpLlm\LlmChain\Bridge\OpenAI\Whisper\ModelClient as WhisperModelClient;
use PhpLlm\LlmChain\Platform;
use Symfony\Component\HttpClient\EventSourceHttpClient;
use Symfony\Contracts\HttpClient\HttpClientInterface;
Expand All @@ -23,17 +24,20 @@ public static function create(
$httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient);

$dallEModelClient = new DallEModelClient($httpClient, $apiKey);
$whisperModelClient = new WhisperModelClient($httpClient, $apiKey);

return new Platform(
[
new GPTModelClient($httpClient, $apiKey),
new EmbeddingsModelClient($httpClient, $apiKey),
$dallEModelClient,
$whisperModelClient,
],
[
new GPTResponseConverter(),
new EmbeddingsResponseConverter(),
$dallEModelClient,
$whisperModelClient,
],
);
}
Expand Down
31 changes: 31 additions & 0 deletions src/Bridge/OpenAI/Whisper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?php

declare(strict_types=1);

namespace PhpLlm\LlmChain\Bridge\OpenAI;

use PhpLlm\LlmChain\Model\Model;

final readonly class Whisper implements Model
{
public const WHISPER_1 = 'whisper-1';

/**
* @param array<string, mixed> $options
*/
public function __construct(
private string $version = self::WHISPER_1,
private array $options = [],
) {
}

public function getVersion(): string
{
return $this->version;
}

public function getOptions(): array
{
return $this->options;
}
}
18 changes: 18 additions & 0 deletions src/Bridge/OpenAI/Whisper/File.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?php

declare(strict_types=1);

namespace PhpLlm\LlmChain\Bridge\OpenAI\Whisper;

use PhpLlm\LlmChain\Exception\InvalidArgumentException;

final readonly class File
{
public function __construct(
public string $path,
) {
if (!is_readable($path) || false === file_get_contents($path)) {
throw new InvalidArgumentException(sprintf('The file "%s" does not exist or is not readable.', $path));
}
}
}
52 changes: 52 additions & 0 deletions src/Bridge/OpenAI/Whisper/ModelClient.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

declare(strict_types=1);

namespace PhpLlm\LlmChain\Bridge\OpenAI\Whisper;

use PhpLlm\LlmChain\Bridge\OpenAI\Whisper;
use PhpLlm\LlmChain\Model\Model;
use PhpLlm\LlmChain\Model\Response\ResponseInterface as LlmResponse;
use PhpLlm\LlmChain\Model\Response\TextResponse;
use PhpLlm\LlmChain\Platform\ModelClient as PlatformResponseFactory;
use PhpLlm\LlmChain\Platform\ResponseConverter as PlatformResponseConverter;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Symfony\Contracts\HttpClient\ResponseInterface;
use Webmozart\Assert\Assert;

final readonly class ModelClient implements PlatformResponseFactory, PlatformResponseConverter
{
public function __construct(
private HttpClientInterface $httpClient,
#[\SensitiveParameter]
private string $apiKey,
) {
Assert::stringNotEmpty($apiKey, 'The API key must not be empty.');
}

public function supports(Model $model, object|array|string $input): bool
{
return $model instanceof Whisper && $input instanceof File;
}

public function request(Model $model, object|array|string $input, array $options = []): ResponseInterface
{
assert($input instanceof File);

return $this->httpClient->request('POST', 'https://api.openai.com/v1/audio/transcriptions', [
'auth_bearer' => $this->apiKey,
'headers' => ['Content-Type' => 'multipart/form-data'],
'body' => array_merge($options, $model->getOptions(), [
'model' => $model->getVersion(),
'file' => fopen($input->path, 'r'),
]),
]);
}

public function convert(ResponseInterface $response, array $options = []): LlmResponse
{
$data = $response->toArray();

return new TextResponse($data['text']);
}
}