Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,23 @@ You can install it with ``python3 -m pip install SpeechRecognition[groq]``.

Please set the environment variable ``GROQ_API_KEY`` before calling ``recognizer_instance.recognize_groq``.

Proxy Support
~~~~~~~~~~~~~

All cloud-based recognizers support proxying via the ``recognizer_instance.proxy_url`` attribute:

.. code:: python

import speech_recognition as sr
r = sr.Recognizer()
r.proxy_url = "http://proxy.example.com:8080" # HTTP proxy
# r.proxy_url = "socks5://proxy.example.com:1080" # SOCKS5 proxy (requires PySocks)
# r.proxy_url = "" # explicitly disable proxies

By default ``proxy_url`` is ``None``, which preserves existing behaviour (system/environment proxy settings are used).

SOCKS proxy support requires the ``PySocks`` package: ``pip install PySocks``.

Troubleshooting
---------------

Expand Down
18 changes: 18 additions & 0 deletions reference/library-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,24 @@ Represents the timeout (in seconds) for internal operations, such as API request

Setting this to a reasonable value ensures that these operations will never block indefinitely, though good values depend on your network speed and the expected length of the audio to recognize.

``recognizer_instance.proxy_url = None # type: Union[str, None]``
------------------------------------------------------------------

Configures an HTTP or SOCKS proxy for all cloud-based API requests. Can be changed.

* ``None`` (default) -- use system/environment proxy settings (backward compatible).
* ``""`` (empty string) -- explicitly disable all proxies.
* ``"http://host:port"`` -- use an HTTP proxy.
* ``"socks5://host:port"`` -- use a SOCKS5 proxy (requires the ``PySocks`` package: ``pip install PySocks``).

Example:

.. code:: python

import speech_recognition as sr
r = sr.Recognizer()
r.proxy_url = "http://proxy.example.com:8080"

``recognizer_instance.record(source: AudioSource, duration: Union[float, None] = None, offset: Union[float, None] = None) -> AudioData``
----------------------------------------------------------------------------------------------------------------------------------------

Expand Down
45 changes: 29 additions & 16 deletions speech_recognition/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from collections.abc import Iterable
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import Request, urlopen
from urllib.request import Request

from .audio import AudioData, get_flac_converter
from .exceptions import (
Expand All @@ -34,6 +34,11 @@
UnknownValueError,
WaitTimeoutError,
)
from .proxy import (
build_boto3_proxy_config,
build_requests_proxies,
urlopen_with_proxy,
)

__author__ = "Anthony Zhang (Uberi)"
__version__ = "3.14.5"
Expand Down Expand Up @@ -327,6 +332,7 @@ def __init__(self):
self.dynamic_energy_ratio = 1.5
self.pause_threshold = 0.8 # seconds of non-speaking audio before a phrase is considered complete
self.operation_timeout = None # seconds after an internal operation (e.g., an API request) starts before it times out, or ``None`` for no timeout
self.proxy_url = None # proxy URL for API requests: ``None`` uses system/env defaults, ``""`` disables proxies, or a URL like ``"http://host:port"`` or ``"socks5://host:port"``

self.phrase_threshold = 0.3 # minimum seconds of speaking audio before we consider the speaking audio a phrase - values below this are ignored (for filtering out clicks and pops)
self.non_speaking_duration = 0.5 # seconds of non-speaking audio to keep on both sides of the recording
Expand Down Expand Up @@ -625,7 +631,7 @@ def recognize_wit(self, audio_data, key, show_all=False):
url = "https://api.wit.ai/speech?v=20170307"
request = Request(url, data=wav_data, headers={"Authorization": "Bearer {}".format(key), "Content-Type": "audio/wav"})
try:
response = urlopen(request, timeout=self.operation_timeout)
response = urlopen_with_proxy(request, timeout=self.operation_timeout, proxy_url=self.proxy_url)
except HTTPError as e:
raise RequestError("recognition request failed: {}".format(e.reason))
except URLError as e:
Expand Down Expand Up @@ -680,7 +686,7 @@ def recognize_azure(self, audio_data, key, language="en-US", profanity="masked",
start_time = monotonic()

try:
credential_response = urlopen(credential_request, timeout=60) # credential response can take longer, use longer timeout instead of default one
credential_response = urlopen_with_proxy(credential_request, timeout=60, proxy_url=self.proxy_url) # credential response can take longer, use longer timeout instead of default one
except HTTPError as e:
raise RequestError("credential request failed: {}".format(e.reason))
except URLError as e:
Expand Down Expand Up @@ -719,7 +725,7 @@ def recognize_azure(self, audio_data, key, language="en-US", profanity="masked",
})

try:
response = urlopen(request, timeout=self.operation_timeout)
response = urlopen_with_proxy(request, timeout=self.operation_timeout, proxy_url=self.proxy_url)
except HTTPError as e:
raise RequestError("recognition request failed: {}".format(e.reason))
except URLError as e:
Expand Down Expand Up @@ -774,7 +780,7 @@ def recognize_bing(self, audio_data, key, language="en-US", show_all=False):
start_time = monotonic()

try:
credential_response = urlopen(credential_request, timeout=60) # credential response can take longer, use longer timeout instead of default one
credential_response = urlopen_with_proxy(credential_request, timeout=60, proxy_url=self.proxy_url) # credential response can take longer, use longer timeout instead of default one
except HTTPError as e:
raise RequestError("credential request failed: {}".format(e.reason))
except URLError as e:
Expand Down Expand Up @@ -813,7 +819,7 @@ def recognize_bing(self, audio_data, key, language="en-US", show_all=False):
})

try:
response = urlopen(request, timeout=self.operation_timeout)
response = urlopen_with_proxy(request, timeout=self.operation_timeout, proxy_url=self.proxy_url)
except HTTPError as e:
raise RequestError("recognition request failed: {}".format(e.reason))
except URLError as e:
Expand Down Expand Up @@ -849,7 +855,8 @@ def recognize_lex(self, audio_data, bot_name, bot_alias, user_id, content_type="

client = boto3.client('lex-runtime', aws_access_key_id=access_key_id,
aws_secret_access_key=secret_access_key,
region_name=region)
region_name=region,
config=build_boto3_proxy_config(self.proxy_url))

raw_data = audio_data.get_raw_data(
convert_rate=16000, convert_width=2
Expand Down Expand Up @@ -899,7 +906,7 @@ def recognize_houndify(self, audio_data, client_id, client_key, show_all=False):
"Hound-Client-Authentication": "{};{};{}".format(client_id, request_time, request_signature)
})
try:
response = urlopen(request, timeout=self.operation_timeout)
response = urlopen_with_proxy(request, timeout=self.operation_timeout, proxy_url=self.proxy_url)
except HTTPError as e:
raise RequestError("recognition request failed: {}".format(e.reason))
except URLError as e:
Expand Down Expand Up @@ -944,13 +951,15 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec
'transcribe',
aws_access_key_id=access_key_id,
aws_secret_access_key=secret_access_key,
region_name=region)
region_name=region,
config=build_boto3_proxy_config(self.proxy_url))

s3 = boto3.client(
's3',
aws_access_key_id=access_key_id,
aws_secret_access_key=secret_access_key,
region_name=region)
region_name=region,
config=build_boto3_proxy_config(self.proxy_url))

session = boto3.Session(
aws_access_key_id=access_key_id,
Expand Down Expand Up @@ -1004,7 +1013,8 @@ def recognize_amazon(self, audio_data, bucket_name=None, access_key_id=None, sec
transcript_uri = job['Transcript']['TranscriptFileUri']
import json
import urllib.request
with urllib.request.urlopen(transcript_uri) as json_data:
transcript_request = urllib.request.Request(transcript_uri)
with urlopen_with_proxy(transcript_request, timeout=self.operation_timeout, proxy_url=self.proxy_url) as json_data:
d = json.load(json_data)
confidences = []
for item in d['results']['items']:
Expand Down Expand Up @@ -1093,6 +1103,8 @@ def read_file(filename, chunk_size=5242880):

import requests

proxies = build_requests_proxies(self.proxy_url)

check_existing = audio_data is None and job_name
if check_existing:
# Query status.
Expand All @@ -1101,7 +1113,7 @@ def read_file(filename, chunk_size=5242880):
headers = {
"authorization": api_token,
}
response = requests.get(endpoint, headers=headers)
response = requests.get(endpoint, headers=headers, proxies=proxies)
data = response.json()
status = data['status']

Expand All @@ -1128,7 +1140,8 @@ def read_file(filename, chunk_size=5242880):
headers = {'authorization': api_token}
response = requests.post('https://api.assemblyai.com/v2/upload',
headers=headers,
data=read_file(audio_data))
data=read_file(audio_data),
proxies=proxies)
upload_url = response.json()['upload_url']

# Queue file for transcription.
Expand All @@ -1138,7 +1151,7 @@ def read_file(filename, chunk_size=5242880):
"authorization": api_token,
"content-type": "application/json"
}
response = requests.post(endpoint, json=json, headers=headers)
response = requests.post(endpoint, json=json, headers=headers, proxies=proxies)
data = response.json()
transciption_id = data['id']
exc = TranscriptionNotReady()
Expand Down Expand Up @@ -1175,7 +1188,7 @@ def recognize_ibm(self, audio_data, key, language="en-US", show_all=False):
authorization_value = base64.standard_b64encode("{}:{}".format(username, password).encode("utf-8")).decode("utf-8")
request.add_header("Authorization", "Basic {}".format(authorization_value))
try:
response = urlopen(request, timeout=self.operation_timeout)
response = urlopen_with_proxy(request, timeout=self.operation_timeout, proxy_url=self.proxy_url)
except HTTPError as e:
raise RequestError("recognition request failed: {}".format(e.reason))
except URLError as e:
Expand Down Expand Up @@ -1312,7 +1325,7 @@ def recognize_api(self, audio_data, client_access_token, language="en", session_
if session_id is None: session_id = uuid.uuid4().hex
data = b"--" + boundary.encode("utf-8") + b"\r\n" + b"Content-Disposition: form-data; name=\"request\"\r\n" + b"Content-Type: application/json\r\n" + b"\r\n" + b"{\"v\": \"20150910\", \"sessionId\": \"" + session_id.encode("utf-8") + b"\", \"lang\": \"" + language.encode("utf-8") + b"\"}\r\n" + b"--" + boundary.encode("utf-8") + b"\r\n" + b"Content-Disposition: form-data; name=\"voiceData\"; filename=\"audio.wav\"\r\n" + b"Content-Type: audio/wav\r\n" + b"\r\n" + wav_data + b"\r\n" + b"--" + boundary.encode("utf-8") + b"--\r\n"
request = Request(url, data=data, headers={"Authorization": "Bearer {}".format(client_access_token), "Content-Length": str(len(data)), "Expect": "100-continue", "Content-Type": "multipart/form-data; boundary={}".format(boundary)})
try: response = urlopen(request, timeout=10)
try: response = urlopen_with_proxy(request, timeout=10, proxy_url=getattr(self, 'proxy_url', None))
except HTTPError as e: raise RequestError("recognition request failed: {}".format(e.reason))
except URLError as e: raise RequestError("recognition connection failed: {}".format(e.reason))
response_text = response.read().decode("utf-8")
Expand Down
173 changes: 173 additions & 0 deletions speech_recognition/proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
"""Centralized proxy utilities for SpeechRecognition.

Provides helper functions that build proxy-aware clients for each HTTP
library used in the project (urllib, httpx, requests, boto3, gRPC).

Proxy URL semantics:
None - use system/env proxy settings (default, backward compatible)
"" - explicitly disable proxies
"http://host:port" - use that HTTP proxy
"socks5://host:port" - SOCKS proxy (requires PySocks)
"""

from __future__ import annotations

import contextlib
import os
from urllib.request import (
OpenerDirector,
ProxyHandler,
Request,
build_opener,
urlopen,
)

from speech_recognition.exceptions import SetupError


def build_urllib_opener(proxy_url: str | None) -> OpenerDirector | None:
"""Return an ``OpenerDirector`` configured with *proxy_url*.

Returns ``None`` when *proxy_url* is ``None`` (use default behaviour).
An empty string disables proxying; a ``socks5://`` URL requires
``PySocks`` + ``sockshandler``.
"""
if proxy_url is None:
return None

if proxy_url == "":
return build_opener(ProxyHandler({}))

if proxy_url.startswith("socks"):
try:
from sockshandler import SocksiPyHandler
except ImportError:
raise SetupError(
"SOCKS proxy support requires the PySocks package. "
"Install it with: pip install PySocks"
)

from urllib.parse import urlparse

parsed = urlparse(proxy_url)
import socks

socks_type_map = {
"socks4": socks.SOCKS4,
"socks5": socks.SOCKS5,
"socks5h": socks.SOCKS5,
}
socks_type = socks_type_map.get(parsed.scheme)
if socks_type is None:
raise SetupError(
f"Unsupported SOCKS scheme: {parsed.scheme!r}. "
"Use socks4, socks5, or socks5h."
)

return build_opener(
SocksiPyHandler(
socks_type,
parsed.hostname,
parsed.port or 1080,
username=parsed.username,
password=parsed.password,
)
)

# HTTP/HTTPS proxy
return build_opener(
ProxyHandler({"http": proxy_url, "https": proxy_url})
)


def urlopen_with_proxy(request: Request, timeout: int | None, proxy_url: str | None):
"""Drop-in replacement for ``urlopen()`` that respects *proxy_url*."""
opener = build_urllib_opener(proxy_url)
kwargs = {}
if timeout is not None:
Comment on lines +83 to +87
Copy link

Copilot AI Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

urlopen_with_proxy types timeout as int | None, but Recognizer.operation_timeout is documented as Union[float, None] and is passed through in several call sites. Update the annotation here (and any related ones) to accept float | None to match the public API and actual usage.

Copilot uses AI. Check for mistakes.
kwargs["timeout"] = timeout

if opener is not None:
return opener.open(request, **kwargs)
return urlopen(request, **kwargs)


def build_httpx_client(proxy_url: str | None):
"""Return an ``httpx.Client`` configured with *proxy_url*.

Returns ``None`` when *proxy_url* is ``None`` (caller should use
the SDK default client).
"""
if proxy_url is None:
return None

import httpx

if proxy_url == "":
return httpx.Client(proxy=None)

return httpx.Client(proxy=proxy_url)
Comment on lines +107 to +109
Copy link

Copilot AI Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For proxy_url == "" the intent is to disable proxies, but httpx.Client(proxy=None) is also the default and can still honor environment proxies when trust_env=True (httpx default). Consider constructing the client with trust_env=False (and optionally set trust_env=False when an explicit proxy_url is provided) so the documented semantics are actually enforced.

Suggested change
return httpx.Client(proxy=None)
return httpx.Client(proxy=proxy_url)
# Explicitly disable proxies: ignore environment proxies as well.
return httpx.Client(proxy=None, trust_env=False)
# Use the explicit proxy URL and ignore environment proxy settings.
return httpx.Client(proxy=proxy_url, trust_env=False)

Copilot uses AI. Check for mistakes.


def build_requests_proxies(proxy_url: str | None) -> dict | None:
"""Return a *proxies* dict suitable for ``requests.get/post(proxies=...)``.

Returns ``None`` when *proxy_url* is ``None`` (use default behaviour).
"""
if proxy_url is None:
return None

if proxy_url == "":
return {"http": None, "https": None}

return {"http": proxy_url, "https": proxy_url}


def build_boto3_proxy_config(proxy_url: str | None):
"""Return a ``botocore.config.Config`` with proxy settings.

Returns ``None`` when *proxy_url* is ``None`` (use default behaviour).
"""
if proxy_url is None:
return None

from botocore.config import Config

if proxy_url == "":
return Config(proxies={})

return Config(proxies={"http": proxy_url, "https": proxy_url})


@contextlib.contextmanager
def grpc_proxy_env(proxy_url: str | None):
"""Context manager that temporarily sets gRPC-compatible env vars.

gRPC reads ``http_proxy`` / ``https_proxy`` from the environment.
This sets them for the duration of the ``with`` block, then restores
the previous values.

.. warning:: This is NOT thread-safe.
"""
if proxy_url is None:
yield
return

env_keys = ("http_proxy", "https_proxy",
"HTTP_PROXY", "HTTPS_PROXY")
saved = {k: os.environ.get(k) for k in env_keys}

try:
if proxy_url == "":
for k in env_keys:
os.environ.pop(k, None)
else:
for k in env_keys:
os.environ[k] = proxy_url
yield
finally:
for k, v in saved.items():
if v is None:
os.environ.pop(k, None)
else:
os.environ[k] = v
Loading
Loading