diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b89668c8..83dc8254 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,3 +6,12 @@ repos: entry: bash -c 'make format && make check-sort-imports && make check-types' language: system pass_filenames: false + - repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + name: Check spelling + args: + - --write-changes + - --skip=*.pyc,*.pyo,*.lock,*.git,*.mypy_cache,__pycache__,*.egg-info,.pytest_cache,docs/_build,env,venv,.venv + - --ignore-words-list=enginee diff --git a/docs/user_guide/02_hybrid_queries.ipynb b/docs/user_guide/02_hybrid_queries.ipynb index 02ec6c5a..9414c07d 100644 --- a/docs/user_guide/02_hybrid_queries.ipynb +++ b/docs/user_guide/02_hybrid_queries.ipynb @@ -1090,7 +1090,7 @@ "source": [ "## Non-vector Queries\n", "\n", - "In some cases, you may not want to run a vector query, but just use a ``FilterExpression`` similar to a SQL query. The ``FilterQuery`` class enable this functionality. It is similar to the ``VectorQuery`` class but soley takes a ``FilterExpression``." + "In some cases, you may not want to run a vector query, but just use a ``FilterExpression`` similar to a SQL query. The ``FilterQuery`` class enable this functionality. It is similar to the ``VectorQuery`` class but solely takes a ``FilterExpression``." ] }, { @@ -1448,7 +1448,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.2" + "version": "3.12.8" }, "orig_nbformat": 4 }, diff --git a/docs/user_guide/03_llmcache.ipynb b/docs/user_guide/03_llmcache.ipynb index be604f3a..cd60298b 100644 --- a/docs/user_guide/03_llmcache.ipynb +++ b/docs/user_guide/03_llmcache.ipynb @@ -1,16 +1,5 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Semantic Caching for LLMs\n", - "\n", - "RedisVL provides a ``SemanticCache`` interface to utilize Redis' built-in caching capabilities AND vector search in order to store responses from previously-answered questions. This reduces the number of requests and tokens sent to the Large Language Models (LLM) service, decreasing costs and enhancing application throughput (by reducing the time taken to generate responses).\n", - "\n", - "This notebook will go over how to use Redis as a Semantic Cache for your applications" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -110,7 +99,7 @@ " name=\"llmcache\", # underlying search index name\n", " redis_url=\"redis://localhost:6379\", # redis connection url string\n", " distance_threshold=0.1, # semantic cache distance threshold\n", - " vectorizer=HFTextVectorizer(\"redis/langcache-embed-v1\"), # embdding model\n", + " vectorizer=HFTextVectorizer(\"redis/langcache-embed-v1\"), # embedding model\n", ")" ] }, @@ -315,12 +304,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Customize the Distance Threshhold\n", + "## Customize the Distance Threshold\n", "\n", - "For most use cases, the right semantic similarity threshhold is not a fixed quantity. Depending on the choice of embedding model,\n", - "the properties of the input query, and even business use case -- the threshhold might need to change. \n", + "For most use cases, the right semantic similarity threshold is not a fixed quantity. Depending on the choice of embedding model,\n", + "the properties of the input query, and even business use case -- the threshold might need to change. \n", "\n", - "Fortunately, you can seamlessly adjust the threshhold at any point like below:" + "Fortunately, you can seamlessly adjust the threshold at any point like below:" ] }, { @@ -930,7 +919,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.2" + "version": "3.12.8" }, "orig_nbformat": 4 }, diff --git a/docs/user_guide/04_vectorizers.ipynb b/docs/user_guide/04_vectorizers.ipynb index b139bdda..03097fad 100644 --- a/docs/user_guide/04_vectorizers.ipynb +++ b/docs/user_guide/04_vectorizers.ipynb @@ -175,7 +175,7 @@ } ], "source": [ - "# openai also supports asyncronous requests, which we can use to speed up the vectorization process.\n", + "# openai also supports asynchronous requests, which we can use to speed up the vectorization process.\n", "embeddings = await oai.aembed_many(sentences)\n", "print(\"Number of Embeddings:\", len(embeddings))\n" ] @@ -495,7 +495,7 @@ "\n", "mistral = MistralAITextVectorizer()\n", "\n", - "# embed a sentence using their asyncronous method\n", + "# embed a sentence using their asynchronous method\n", "test = await mistral.aembed(\"This is a test sentence.\")\n", "print(\"Vector dimensions: \", len(test))\n", "print(test[:10])" diff --git a/docs/user_guide/05_hash_vs_json.ipynb b/docs/user_guide/05_hash_vs_json.ipynb index e88c5e7b..550bec55 100644 --- a/docs/user_guide/05_hash_vs_json.ipynb +++ b/docs/user_guide/05_hash_vs_json.ipynb @@ -282,7 +282,7 @@ "from redisvl.query import VectorQuery\n", "from redisvl.query.filter import Tag, Text, Num\n", "\n", - "t = (Tag(\"credit_score\") == \"high\") & (Text(\"job\") % \"enginee*\") & (Num(\"age\") > 17)\n", + "t = (Tag(\"credit_score\") == \"high\") & (Text(\"job\") % \"enginee*\") & (Num(\"age\") > 17) # codespell:ignore enginee\n", "\n", "v = VectorQuery(\n", " vector=[0.1, 0.1, 0.5],\n", diff --git a/docs/user_guide/07_message_history.ipynb b/docs/user_guide/07_message_history.ipynb index bfb5a3b3..ff3195e0 100644 --- a/docs/user_guide/07_message_history.ipynb +++ b/docs/user_guide/07_message_history.ipynb @@ -11,7 +11,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Large Language Models are inherently stateless and have no knowledge of previous interactions with a user, or even of previous parts of the current conversation. While this may not be noticable when asking simple questions, it becomes a hinderance when engaging in long running conversations that rely on conversational context.\n", + "Large Language Models are inherently stateless and have no knowledge of previous interactions with a user, or even of previous parts of the current conversation. While this may not be noticeable when asking simple questions, it becomes a hindrance when engaging in long running conversations that rely on conversational context.\n", "\n", "The solution to this problem is to append the previous conversation history to each subsequent call to the LLM.\n", "\n", @@ -276,7 +276,7 @@ "source": [ "You can adjust the degree of semantic similarity needed to be included in your context.\n", "\n", - "Setting a distance threshold close to 0.0 will require an exact semantic match, while a distance threshold of 1.0 will include everthing." + "Setting a distance threshold close to 0.0 will require an exact semantic match, while a distance threshold of 1.0 will include everything." ] }, { diff --git a/pyproject.toml b/pyproject.toml index 61cc03c8..0c68803f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ dev = [ "types-pyopenssl", "testcontainers>=4.3.1,<5", "cryptography>=44.0.1 ; python_version > '3.9.1'", + "codespell>=2.4.1,<3", ] docs = [ "sphinx>=4.4.0", @@ -118,3 +119,5 @@ asyncio_mode = "auto" [tool.mypy] warn_unused_configs = true ignore_missing_imports = true +exclude = ["env", "venv", ".venv"] + diff --git a/redisvl/extensions/cache/llm/semantic.py b/redisvl/extensions/cache/llm/semantic.py index 8d0a4f40..d90a130e 100644 --- a/redisvl/extensions/cache/llm/semantic.py +++ b/redisvl/extensions/cache/llm/semantic.py @@ -385,7 +385,7 @@ def check( .. code-block:: python response = cache.check( - prompt="What is the captial city of France?" + prompt="What is the capital city of France?" ) """ if not any([prompt, vector]): @@ -476,7 +476,7 @@ async def acheck( .. code-block:: python response = await cache.acheck( - prompt="What is the captial city of France?" + prompt="What is the capital city of France?" ) """ aindex = await self._get_async_index() @@ -588,7 +588,7 @@ def store( .. code-block:: python key = cache.store( - prompt="What is the captial city of France?", + prompt="What is the capital city of France?", response="Paris", metadata={"city": "Paris", "country": "France"} ) @@ -656,7 +656,7 @@ async def astore( .. code-block:: python key = await cache.astore( - prompt="What is the captial city of France?", + prompt="What is the capital city of France?", response="Paris", metadata={"city": "Paris", "country": "France"} ) diff --git a/redisvl/extensions/constants.py b/redisvl/extensions/constants.py index d6d7945a..a7c78a18 100644 --- a/redisvl/extensions/constants.py +++ b/redisvl/extensions/constants.py @@ -1,7 +1,7 @@ """ Constants used within the extension classes SemanticCache, BaseMessageHistory, MessageHistory, SemanticMessageHistory and SemanticRouter. -These constants are also used within theses classes corresponding schema. +These constants are also used within these classes' corresponding schemas. """ # BaseMessageHistory diff --git a/redisvl/extensions/message_history/base_history.py b/redisvl/extensions/message_history/base_history.py index 72825877..0b40be44 100644 --- a/redisvl/extensions/message_history/base_history.py +++ b/redisvl/extensions/message_history/base_history.py @@ -60,7 +60,7 @@ def get_recent( raw: bool = False, session_tag: Optional[str] = None, ) -> Union[List[str], List[Dict[str, str]]]: - """Retreive the recent conversation history in sequential order. + """Retrieve the recent conversation history in sequential order. Args: top_k (int): The number of previous exchanges to return. Default is 5. diff --git a/redisvl/extensions/message_history/semantic_history.py b/redisvl/extensions/message_history/semantic_history.py index 529a9a86..0c6906d5 100644 --- a/redisvl/extensions/message_history/semantic_history.py +++ b/redisvl/extensions/message_history/semantic_history.py @@ -248,7 +248,7 @@ def get_recent( raw: bool = False, session_tag: Optional[str] = None, ) -> Union[List[str], List[Dict[str, str]]]: - """Retreive the recent message history in sequential order. + """Retrieve the recent message history in sequential order. Args: top_k (int): The number of previous exchanges to return. Default is 5. diff --git a/redisvl/query/filter.py b/redisvl/query/filter.py index d489d935..3b6d8496 100644 --- a/redisvl/query/filter.py +++ b/redisvl/query/filter.py @@ -624,7 +624,7 @@ def __str__(self) -> str: if not self._filter and not self._operator: raise ValueError("Improperly initialized FilterExpression") - # if theres an operator, combine expressions accordingly + # if there's an operator, combine expressions accordingly if self._operator: if not isinstance(self._left, FilterExpression) or not isinstance( self._right, FilterExpression diff --git a/redisvl/redis/connection.py b/redisvl/redis/connection.py index 78a4c10c..6f1c2661 100644 --- a/redisvl/redis/connection.py +++ b/redisvl/redis/connection.py @@ -402,7 +402,7 @@ def validate_sync_redis( # Fall back to a simple log echo # For RedisCluster, echo is not available if hasattr(redis_client, "echo"): - await redis_client.echo(_lib_name) + redis_client.echo(_lib_name) # Get list of modules installed_modules = RedisConnectionFactory.get_modules(redis_client) diff --git a/redisvl/utils/vectorize/text/custom.py b/redisvl/utils/vectorize/text/custom.py index a4e80787..0b8f425e 100644 --- a/redisvl/utils/vectorize/text/custom.py +++ b/redisvl/utils/vectorize/text/custom.py @@ -41,7 +41,7 @@ class CustomTextVectorizer(BaseVectorizer): This vectorizer is designed to accept a provided callable text vectorizer and provides a class definition to allow for compatibility with RedisVL. The vectorizer may support both synchronous and asynchronous operations which - allows for batch processing of texts, but at a minimum only syncronous embedding + allows for batch processing of texts, but at a minimum only synchronous embedding is required to satisfy the 'embed()' method. You can optionally enable caching to improve performance when generating @@ -94,8 +94,8 @@ def __init__( Args: embed (Callable): a Callable function that accepts a string object and returns a list of floats. embed_many (Optional[Callable]): a Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. - aembed (Optional[Callable]): an asyncronous Callable function that accepts a string object and returns a lists of floats. Defaults to None. - aembed_many (Optional[Callable]): an asyncronous Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. + aembed (Optional[Callable]): an asynchronous Callable function that accepts a string object and returns a lists of floats. Defaults to None. + aembed_many (Optional[Callable]): an asynchronous Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. dtype (str): the default datatype to use when embedding text as byte arrays. Used when setting `as_buffer=True` in calls to embed() and embed_many(). Defaults to 'float32'. diff --git a/tests/integration/test_llmcache.py b/tests/integration/test_llmcache.py index 6da425de..dc33f680 100644 --- a/tests/integration/test_llmcache.py +++ b/tests/integration/test_llmcache.py @@ -744,7 +744,7 @@ def test_cache_filtering(cache_with_filters): ) assert len(results) == 4 - # test no results are returned if we pass a nonexistant tag + # test no results are returned if we pass a nonexistent tag bad_filter = Tag("label") == "bad tag" results = cache_with_filters.check( "test prompt 1", filter_expression=bad_filter, num_results=5 diff --git a/tests/unit/test_aggregation_types.py b/tests/unit/test_aggregation_types.py index aaf34d5d..55adcdb8 100644 --- a/tests/unit/test_aggregation_types.py +++ b/tests/unit/test_aggregation_types.py @@ -26,7 +26,7 @@ def test_aggregate_hybrid_query(): assert isinstance(hybrid_query, AggregateRequest) - # Check defaut properties + # Check default properties assert hybrid_query._text == sample_text assert hybrid_query._text_field == text_field_name assert hybrid_query._vector == sample_vector diff --git a/tests/unit/test_filter.py b/tests/unit/test_filter.py index 7b2a1261..b4890028 100644 --- a/tests/unit/test_filter.py +++ b/tests/unit/test_filter.py @@ -37,8 +37,8 @@ ("==", "tag/with/slashes", "@tag_field:{tag\\/with\\/slashes}"), ( "==", - ["hypen-tag", "under_score", "dot.tag"], - "@tag_field:{hypen\\-tag|under_score|dot\\.tag}", + ["hyphen-tag", "under_score", "dot.tag"], + "@tag_field:{hyphen\\-tag|under_score|dot\\.tag}", ), # ...additional unique cases as desired... ], diff --git a/tests/unit/test_query_types.py b/tests/unit/test_query_types.py index 9e051efc..9254ffe4 100644 --- a/tests/unit/test_query_types.py +++ b/tests/unit/test_query_types.py @@ -367,9 +367,9 @@ def test_string_filter_expressions(query): assert query.query_string().__contains__("hello world") # Optional flag - query.set_filter("~(@desciption:(hello | world))") - assert query._filter_expression == "~(@desciption:(hello | world))" - assert query.query_string().__contains__("~(@desciption:(hello | world))") + query.set_filter("~(@description:(hello | world))") + assert query._filter_expression == "~(@description:(hello | world))" + assert query.query_string().__contains__("~(@description:(hello | world))") def test_vector_query_hybrid_policy(): diff --git a/uv.lock b/uv.lock index acc149f4..e469007a 100644 --- a/uv.lock +++ b/uv.lock @@ -545,6 +545,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215 }, ] +[[package]] +name = "codespell" +version = "2.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/15/e0/709453393c0ea77d007d907dd436b3ee262e28b30995ea1aa36c6ffbccaf/codespell-2.4.1.tar.gz", hash = "sha256:299fcdcb09d23e81e35a671bbe746d5ad7e8385972e65dbb833a2eaac33c01e5", size = 344740 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/01/b394922252051e97aab231d416c86da3d8a6d781eeadcdca1082867de64e/codespell-2.4.1-py3-none-any.whl", hash = "sha256:3dadafa67df7e4a3dbf51e0d7315061b80d265f9552ebd699b3dd6834b47e425", size = 344501 }, +] + [[package]] name = "cohere" version = "5.15.0" @@ -3631,6 +3640,7 @@ voyageai = [ [package.dev-dependencies] dev = [ { name = "black" }, + { name = "codespell" }, { name = "cryptography", marker = "python_full_version >= '3.10'" }, { name = "isort" }, { name = "mypy" }, @@ -3681,6 +3691,7 @@ provides-extras = ["mistralai", "openai", "nltk", "cohere", "voyageai", "sentenc [package.metadata.requires-dev] dev = [ { name = "black", specifier = ">=25.1.0,<26" }, + { name = "codespell", specifier = ">=2.4.1,<3" }, { name = "cryptography", marker = "python_full_version >= '3.10'", specifier = ">=44.0.1" }, { name = "isort", specifier = ">=5.6.4,<6" }, { name = "mypy", specifier = ">=1.11.0,<2" },