Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,16 @@ jobs:
- name: Install Python dependencies (Ubuntu, <=3.12)
if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.13'
run: |
python -m pip install .[dev,audio,pocketsphinx,google-cloud,whisper-local,faster-whisper,openai,groq,vosk]
python -m pip install .[dev,audio,pocketsphinx,google-cloud,whisper-local,faster-whisper,openai,groq,vosk,cohere-api]
- name: Install Python dependencies (Ubuntu, 3.13)
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
run: |
python -m pip install standard-aifc setuptools
python -m pip install .[dev,audio,pocketsphinx,google-cloud,openai,groq,vosk]
python -m pip install .[dev,audio,pocketsphinx,google-cloud,openai,groq,vosk,cohere-api]
- name: Install Python dependencies (Windows)
if: matrix.os == 'windows-latest'
run: |
python -m pip install .[dev,whisper-local,faster-whisper,google-cloud,openai,groq,vosk]
python -m pip install .[dev,whisper-local,faster-whisper,google-cloud,openai,groq,vosk,cohere-api]
- name: Set up vosk model
run: python -m speech_recognition.cli download vosk
- name: Test with unittest
Expand Down
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ Speech recognition engine/API support:
* `OpenAI Whisper API <https://platform.openai.com/docs/guides/speech-to-text>`__
* OpenAI compatible self-hosted endpoints (e.g. vLLM, Ollama)
* `Groq Whisper API <https://console.groq.com/docs/speech-to-text>`__
* `Cohere Transcribe API <https://docs.cohere.com/docs/transcribe>`__

**Quickstart:** ``pip install SpeechRecognition``. See the "Installing" section for more details.

Expand Down Expand Up @@ -123,6 +124,7 @@ To use all of the functionality of the library, you should have:
* **openai** (required only if you need to use OpenAI Whisper API speech recognition ``recognizer_instance.recognize_openai``)
* includes OpenAI compatible self-hosted endpoints (e.g. vLLM, Ollama)
* **groq** (required only if you need to use Groq Whisper API speech recognition ``recognizer_instance.recognize_groq``)
* **cohere** (required only if you need to use Cohere Transcribe API speech recognition ``recognizer_instance.recognize_cohere_api``; install with ``pip install SpeechRecognition[cohere-api]``. Set ``CO_API_KEY`` as documented by the Cohere SDK.)

The following requirements are optional, but can improve or extend functionality in some situations:

Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ groq = [
"groq",
"httpx < 0.28",
]
cohere-api = [
"cohere>=5.21.0",
]
assemblyai = ["requests"]
vosk = ["vosk"]

Expand Down
5 changes: 5 additions & 0 deletions reference/library-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,11 @@ Raises a ``speech_recognition.UnknownValueError`` exception if the speech is uni

.. autofunction:: speech_recognition.recognizers.whisper_api.groq.recognize

``recognizer_instance.recognize_cohere_api(audio_data: AudioData, *, language: str, model = "cohere-transcribe-03-2026")``
--------------------------------------------------------------------------------------------------------------------------

.. autofunction:: speech_recognition.recognizers.cohere_api.recognize

``AudioSource``
---------------

Expand Down
3 changes: 2 additions & 1 deletion speech_recognition/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1281,7 +1281,7 @@ def flush(self, *args, **kwargs):
# At this time, the dependencies are not yet installed, resulting in a ModuleNotFoundError.
# This is a workaround to resolve this issue
try:
from .recognizers import google, google_cloud, pocketsphinx, vosk
from .recognizers import cohere_api, google, google_cloud, pocketsphinx, vosk
from .recognizers.whisper_api import groq, openai
from .recognizers.whisper_local import faster_whisper, whisper
except (ModuleNotFoundError, ImportError):
Expand All @@ -1293,6 +1293,7 @@ def flush(self, *args, **kwargs):
Recognizer.recognize_faster_whisper = faster_whisper.recognize # type: ignore[attr-defined]
Recognizer.recognize_openai = openai.recognize # type: ignore[attr-defined]
Recognizer.recognize_groq = groq.recognize # type: ignore[attr-defined]
Recognizer.recognize_cohere_api = cohere_api.recognize # type: ignore[attr-defined]
Recognizer.recognize_sphinx = pocketsphinx.recognize # type: ignore[attr-defined]
Recognizer.recognize_vosk = vosk.recognize # type: ignore[attr-defined]

Expand Down
53 changes: 53 additions & 0 deletions speech_recognition/recognizers/cohere_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from __future__ import annotations

import logging
from io import BytesIO

from speech_recognition.audio import AudioData
from speech_recognition.exceptions import SetupError

logger = logging.getLogger(__name__)


def recognize(
recognizer,
audio_data: AudioData,
*,
language: str,
model: str = "cohere-transcribe-03-2026",
) -> str:
"""Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the `Cohere Transcribe <https://docs.cohere.com/docs/transcribe>`__ API via the official Python SDK.

Requires the ``cohere`` package (install with ``pip install SpeechRecognition[cohere-api]``).
Set environment variable ``CO_API_KEY`` as documented by Cohere; this library does not read or override it in code.

``language`` is required by the Cohere transcription API (e.g. ``\"en\"``, ``\"ja\"``).

Detail: https://docs.cohere.com/reference/create-audio-transcription
"""
try:
import cohere
except ImportError:
raise SetupError(
"missing cohere module: ensure that cohere is set up correctly "
"(e.g. pip install SpeechRecognition[cohere-api])."
)

if not isinstance(audio_data, AudioData):
raise ValueError("``audio_data`` must be an ``AudioData`` instance")

wav_data = BytesIO(audio_data.get_wav_data())
wav_data.name = "SpeechRecognition_audio.wav"

client = cohere.ClientV2()
logger.debug(
"cohere audio.transcriptions.create: model=%r language=%r",
model,
language,
)
response = client.audio.transcriptions.create(
model=model,
file=wav_data,
language=language,
)
return response.text
50 changes: 50 additions & 0 deletions tests/recognizers/test_cohere_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from unittest.mock import MagicMock, patch

from speech_recognition import AudioData, Recognizer
from speech_recognition.recognizers import cohere_api


@patch("cohere.ClientV2")
def test_transcribe_default_model(mock_client_cls):
mock_response = MagicMock()
mock_response.text = "Transcription by Cohere"
mock_client = MagicMock()
mock_client.audio.transcriptions.create.return_value = mock_response
mock_client_cls.return_value = mock_client

audio_data = MagicMock(spec=AudioData)
audio_data.get_wav_data.return_value = b"fake_wav"

actual = cohere_api.recognize(
MagicMock(spec=Recognizer), audio_data, language="en"
)

assert actual == "Transcription by Cohere"
audio_data.get_wav_data.assert_called_once()
mock_client_cls.assert_called_once_with()
mock_client.audio.transcriptions.create.assert_called_once()
call_kw = mock_client.audio.transcriptions.create.call_args.kwargs
assert call_kw["model"] == "cohere-transcribe-03-2026"
assert call_kw["language"] == "en"
assert "file" in call_kw


@patch("cohere.ClientV2")
def test_transcribe_with_language(mock_client_cls):
mock_response = MagicMock()
mock_response.text = "Japanese transcription"
mock_client = MagicMock()
mock_client.audio.transcriptions.create.return_value = mock_response
mock_client_cls.return_value = mock_client

audio_data = MagicMock(spec=AudioData)
audio_data.get_wav_data.return_value = b"fake_wav"

actual = cohere_api.recognize(
MagicMock(spec=Recognizer), audio_data, language="ja"
)

assert actual == "Japanese transcription"
call_kw = mock_client.audio.transcriptions.create.call_args.kwargs
assert call_kw["model"] == "cohere-transcribe-03-2026"
assert call_kw["language"] == "ja"
Loading