diff --git a/README.rst b/README.rst
index f9bde14e..604bc1a4 100644
--- a/README.rst
+++ b/README.rst
@@ -39,6 +39,7 @@ Speech recognition engine/API support:
* `Tensorflow `__
* `Vosk API `__ (works offline)
* `OpenAI whisper `__ (works offline)
+* `Speechmatics ASR API `__
**Quickstart:** ``pip install SpeechRecognition``. See the "Installing" section for more details.
@@ -95,6 +96,7 @@ To use all of the functionality of the library, you should have:
* **FLAC encoder** (required only if the system is not x86-based Windows/Linux/OS X)
* **Vosk** (required only if you need to use Vosk API speech recognition ``recognizer_instance.recognize_vosk``)
* **Whisper** (required only if you need to use Whisper ``recognizer_instance.recognize_whisper``)
+* **Speechmatics** (required only if you need to use Speechmatics ``recognizer_instance.recognize_speechmatics``)
The following requirements are optional, but can improve or extend functionality in some situations:
@@ -169,6 +171,12 @@ Whisper is **required if and only if you want to use whisper** (``recognizer_ins
You can install it with ``python3 -m pip install git+https://github.com/openai/whisper.git soundfile``.
+Speechmatics (for Speechmatics users)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Speechmatics is **required if and only if you want to use speechmatics** (``recognizer_instance.recognize_speechmatics``).
+
+You can install it with ``python3 -m pip install speechmatics-python``. You will also need an API key from `__`.
+
Troubleshooting
---------------
diff --git a/examples/audio_transcribe.py b/examples/audio_transcribe.py
index 7806023f..0736b02a 100644
--- a/examples/audio_transcribe.py
+++ b/examples/audio_transcribe.py
@@ -13,6 +13,16 @@
with sr.AudioFile(AUDIO_FILE) as source:
audio = r.record(source) # read the entire audio file
+# recognize speech using Speechmatics
+SPEECHMATICS_KEY = "INSERT SPEECHMATICS API KEY HERE"
+try:
+ print("Speechmatics thinks you said " + r.recognize_speechmatics(audio, key=SPEECHMATICS_KEY))
+except sr.UnknownValueError:
+ print("Speechmatics could not understand audio")
+except sr.RequestError as e:
+ print("Could not request results from the Speechmatics service; {0}".format(e))
+
+
# recognize speech using Sphinx
try:
print("Sphinx thinks you said " + r.recognize_sphinx(audio))
diff --git a/examples/extended_results.py b/examples/extended_results.py
index 599c67f2..c848212b 100644
--- a/examples/extended_results.py
+++ b/examples/extended_results.py
@@ -16,6 +16,16 @@
with sr.AudioFile(AUDIO_FILE) as source:
audio = r.record(source) # read the entire audio file
+# recognize speech using Speechmatics
+SPEECHMATICS_KEY = "INSERT SPEECHMATICS API KEY HERE"
+try:
+ print("Speechmatics results:")
+ pprint(r.recognize_speechmatics(audio, key=SPEECHMATICS_KEY, transcript_format="json-v2"))
+except sr.UnknownValueError:
+ print("Speechmatics could not understand audio")
+except sr.RequestError as e:
+ print("Speechmatics error; {0}".format(e))
+
# recognize speech using Sphinx
try:
print("Sphinx thinks you said " + r.recognize_sphinx(audio))
diff --git a/examples/microphone_recognition.py b/examples/microphone_recognition.py
index 56168b29..863abe87 100644
--- a/examples/microphone_recognition.py
+++ b/examples/microphone_recognition.py
@@ -10,6 +10,15 @@
print("Say something!")
audio = r.listen(source)
+# recognize speech using Speechmatics
+SPEECHMATICS_KEY = "INSERT SPEECHMATICS API KEY HERE"
+try:
+ print("Speechmatics thinks you said " + r.recognize_speechmatics(audio, key=SPEECHMATICS_KEY))
+except sr.UnknownValueError:
+ print("Speechmatics could not understand audio")
+except sr.RequestError as e:
+ print("Could not request results from Speechmatics service; {0}".format(e))
+
# recognize speech using Sphinx
try:
print("Sphinx thinks you said " + r.recognize_sphinx(audio))
diff --git a/reference/library-reference.rst b/reference/library-reference.rst
index 7323bd9b..c6bb2b43 100644
--- a/reference/library-reference.rst
+++ b/reference/library-reference.rst
@@ -314,6 +314,19 @@ You can translate the result to english with Whisper by passing translate=True
Other values are passed directly to whisper. See https://github.com/openai/whisper/blob/main/whisper/transcribe.py for all options
+``recognize_speechmatics(self, audio_data, key=None, language="en", transcript_format="txt")``
+----------------------------------------------------------------------------------------------
+
+Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Speechmatics ASR
+
+The key value is your speechmatics API key. You can get an API key by creating an account and signing into the portal at https://portal.speechmatics.com/manage-access/.
+
+The recognition language is determined by ``language``, an RFC5646 language tag like "en" or "es". The full list of supported languages can be found at https://docs.speechmatics.com/introduction/supported-languages.
+
+Returns a text representation of the transcript by default. You can alson get a json representation of the transcript by setting transcript_format='json-v2', which comes with a range of meta-data about each word in the transcript. The full transcript schema is documented here: https://docs.speechmatics.com/features. You can also request an SRT format by setting `format='srt'`
+
+Raises errors directly from the speechmatics-python package. Read more at https://speechmatics.github.io/speechmatics-python/exceptions.html.
+
``AudioSource``
---------------
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 66ebc04c..2f3e62bb 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1702,6 +1702,45 @@ def recognize_vosk(self, audio_data, language='en'):
return finalRecognition
+ def recognize_speechmatics(self, audio_data, key=None, language="en", transcript_format="txt"):
+ """
+ Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Speechmatics ASR
+
+ The key value is your speechmatics API key. You can get an API key by creating an account and signing into the portal at https://portal.speechmatics.com/manage-access/.
+
+ The recognition language is determined by ``language``, an RFC5646 language tag like "en" or "es". The full list of supported languages can be found at https://docs.speechmatics.com/introduction/supported-languages.
+
+ Returns a text representation of the transcript by default. You can alson get a json representation of the transcript by setting transcript_format='json-v2', which comes with a range of meta-data about each word in the transcript. The full transcript schema is documented here: https://docs.speechmatics.com/features. You can also request an SRT format by setting `format='srt'`
+
+ Raises errors directly from the speechmatics-python package. Read more at https://speechmatics.github.io/speechmatics-python/exceptions.html.
+ """
+ assert isinstance(audio_data, AudioData), "Data must be audio data"
+ assert isinstance(key, str), "``key`` must be a string"
+
+ try:
+ from speechmatics.models import ConnectionSettings, BatchTranscriptionConfig
+ from speechmatics.batch_client import BatchClient
+ from speechmatics.constants import BATCH_SELF_SERVICE_URL
+ except:
+ raise RequestError("missing speechmatics python module: install using `pip install speechmatics-python`")
+
+ wav_data = audio_data.get_wav_data()
+ audio_input = ("audio_file.wav", wav_data)
+ settings = ConnectionSettings(
+ url=BATCH_SELF_SERVICE_URL,
+ auth_token=key,
+ )
+ conf = BatchTranscriptionConfig(
+ language=language,
+ )
+ with BatchClient(settings) as client:
+ job_id = client.submit_job(
+ audio=audio_input,
+ transcription_config=conf,
+ )
+ transcript = client.wait_for_completion(job_id, transcription_format=transcript_format)
+ return transcript
+
def get_flac_converter():
"""Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
flac_converter = shutil_which("flac") # check for installed version first
diff --git a/tests/test_recognition.py b/tests/test_recognition.py
index 5759d657..96fade84 100644
--- a/tests/test_recognition.py
+++ b/tests/test_recognition.py
@@ -34,6 +34,24 @@ def test_google_chinese(self):
with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source)
self.assertEqual(r.recognize_google(audio, language="zh-CN"), u"砸自己的脚")
+ @unittest.skipUnless("SPEECHMATICS_KEY" in os.environ, "requires Speechmatics key to be specified in SPEECHMATICS_KEY environment variable")
+ def test_speechmatics_english(self):
+ r = sr.Recognizer()
+ with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
+ self.assertEqual(r.recognize_speechmatics(audio, key=os.environ["SPEECHMATICS_KEY"]), "One, two, three.")
+
+ @unittest.skipUnless("SPEECHMATICS_KEY" in os.environ, "requires Speechmatics key to be specified in SPEECHMATICS_KEY environment variable")
+ def test_speechmatics_french(self):
+ r = sr.Recognizer()
+ with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source)
+ self.assertEqual(r.recognize_speechmatics(audio, key=os.environ["SPEECHMATICS_KEY"], language="fr"), u"C'est la dictée numéro un.")
+
+ @unittest.skipUnless("SPEECHMATICS_KEY" in os.environ, "requires Speechmatics key to be specified in SPEECHMATICS_KEY environment variable")
+ def test_speechmatics_mandarin(self):
+ r = sr.Recognizer()
+ with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source)
+ self.assertEqual(r.recognize_speechmatics(audio, key=os.environ["SPEECHMATICS_KEY"], language="cmn"), u"砸自己的脚。")
+
@unittest.skipUnless("WIT_AI_KEY" in os.environ, "requires Wit.ai key to be specified in WIT_AI_KEY environment variable")
def test_wit_english(self):
r = sr.Recognizer()