diff --git a/content/integrate/redisvl/user_guide/_index.md b/content/integrate/redisvl/user_guide/_index.md index 83e9a976e..61f0ea04a 100644 --- a/content/integrate/redisvl/user_guide/_index.md +++ b/content/integrate/redisvl/user_guide/_index.md @@ -84,7 +84,7 @@ User guides provide helpful resources for using RedisVL and its different compon * [Optimize](threshold_optimization/#optimize) * [Test it out](threshold_optimization/#test-it-out) * [Cleanup](threshold_optimization/#cleanup) -* [Release Guides](release_guide/index/) +* [Release Guides](release_guide/) * [0.5.1 Feature Overview](release_guide/0_5_0_release/) * [HybridQuery class](release_guide/0_5_0_release/#hybridquery-class) * [TextQueries](release_guide/0_5_0_release/#textqueries) diff --git a/content/integrate/redisvl/user_guide/release_guide/0_5_0_release.md b/content/integrate/redisvl/user_guide/release_guide/0_5_0_release.md new file mode 100644 index 000000000..4e5c0bfdd --- /dev/null +++ b/content/integrate/redisvl/user_guide/release_guide/0_5_0_release.md @@ -0,0 +1,545 @@ +--- +linkTitle: 0.5.1 feature overview +title: 0.5.1 Feature Overview +type: integration +--- + + +This notebook provides an overview of what's new with the 0.5.1 release of redisvl. It also highlights changes and potential enhancements for existing usage. + +## What's new? + +- Hybrid query and text query classes +- Threshold optimizer classes +- Schema validation +- Timestamp filters +- Batched queries +- Vector normalization +- Hybrid policy on knn with filters + +## Define and load index for examples + + +```python +from redisvl.utils.vectorize import HFTextVectorizer +from redisvl.index import SearchIndex +import datetime as dt + +import warnings +warnings.filterwarnings("ignore", category=UserWarning, module="redis") + +# Embedding model +emb_model = HFTextVectorizer() + +REDIS_URL = "redis://localhost:6379/0" +NOW = dt.datetime.now() + +job_data = [ + { + "job_title": "Software Engineer", + "job_description": "Develop and maintain web applications using JavaScript, React, and Node.js.", + "posted": (NOW - dt.timedelta(days=1)).timestamp() # day ago + }, + { + "job_title": "Data Analyst", + "job_description": "Analyze large datasets to provide business insights and create data visualizations.", + "posted": (NOW - dt.timedelta(days=7)).timestamp() # week ago + }, + { + "job_title": "Marketing Manager", + "job_description": "Develop and implement marketing strategies to drive brand awareness and customer engagement.", + "posted": (NOW - dt.timedelta(days=30)).timestamp() # month ago + } +] + +job_data = [{**job, "job_embedding": emb_model.embed(job["job_description"], as_buffer=True)} for job in job_data] + + +job_schema = { + "index": { + "name": "jobs", + "prefix": "jobs", + "storage_type": "hash", + }, + "fields": [ + {"name": "job_title", "type": "text"}, + {"name": "job_description", "type": "text"}, + {"name": "posted", "type": "numeric"}, + { + "name": "job_embedding", + "type": "vector", + "attrs": { + "dims": 768, + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32" + } + + } + ], +} + +index = SearchIndex.from_dict(job_schema, redis_url=REDIS_URL) +index.create(overwrite=True, drop=True) +index.load(job_data) +``` + + 12:44:52 redisvl.index.index INFO Index already exists, overwriting. + + + + + + ['jobs:01JR0V1SA29RVD9AAVSTBV9P5H', + 'jobs:01JR0V1SA209KMVHMD7G54P3H5', + 'jobs:01JR0V1SA23ZE7BRERXTZWC33Z'] + + + +# HybridQuery class + +Perform hybrid lexical (BM25) and vector search where results are ranked by: `hybrid_score = (1-alpha)*lexical_Score + alpha*vector_similarity`. + + +```python +from redisvl.query import HybridQuery + +text = "Find a job as a where you develop software" +vec = emb_model.embed(text, as_buffer=True) + +query = HybridQuery( + text=text, + text_field_name="job_description", + vector=vec, + vector_field_name="job_embedding", + alpha=0.7, + num_results=10, + return_fields=["job_title"], +) + +results = index.query(query) +results +``` + + + + + [{'vector_distance': '0.61871612072', + 'job_title': 'Software Engineer', + 'vector_similarity': '0.69064193964', + 'text_score': '49.6242910712', + 'hybrid_score': '15.3707366791'}, + {'vector_distance': '0.937997639179', + 'job_title': 'Marketing Manager', + 'vector_similarity': '0.53100118041', + 'text_score': '49.6242910712', + 'hybrid_score': '15.2589881476'}, + {'vector_distance': '0.859166145325', + 'job_title': 'Data Analyst', + 'vector_similarity': '0.570416927338', + 'text_score': '0', + 'hybrid_score': '0.399291849136'}] + + + +# TextQueries + +TextQueries make it easy to perform pure lexical search with redisvl. + + +```python +from redisvl.query import TextQuery + +text = "Find where you develop software" + +query = TextQuery( + text=text, + text_field_name="job_description", + return_fields=["job_title"], + num_results=10, +) + +results = index.query(query) +results +``` + + + + + [{'id': 'jobs:01JR0V1SA29RVD9AAVSTBV9P5H', + 'score': 49.62429107116745, + 'job_title': 'Software Engineer'}, + {'id': 'jobs:01JR0V1SA23ZE7BRERXTZWC33Z', + 'score': 49.62429107116745, + 'job_title': 'Marketing Manager'}] + + + +# Threshold optimization + +In redis 0.5.0 we added the ability to quickly configure either your semantic cache or semantic router with test data examples. + +For a step by step guide see: [09_threshold_optimization.ipynb](../09_threshold_optimization.ipynb). + +For a more advanced routing example see: [this example](https://github.com/redis-developer/redis-ai-resources/blob/main/python-recipes/semantic-router/01_routing_optimization.ipynb). + + +```python +from redisvl.utils.optimize import CacheThresholdOptimizer +from redisvl.extensions.cache.llm import SemanticCache + +sem_cache = SemanticCache( + name="sem_cache", # underlying search index name + redis_url="redis://localhost:6379", # redis connection url string + distance_threshold=0.5 # semantic cache distance threshold +) + +paris_key = sem_cache.store(prompt="what is the capital of france?", response="paris") +rabat_key = sem_cache.store(prompt="what is the capital of morocco?", response="rabat") + +test_data = [ + { + "query": "What's the capital of Britain?", + "query_match": "" + }, + { + "query": "What's the capital of France??", + "query_match": paris_key + }, + { + "query": "What's the capital city of Morocco?", + "query_match": rabat_key + }, +] + +print(f"\nDistance threshold before: {sem_cache.distance_threshold} \n") +optimizer = CacheThresholdOptimizer(sem_cache, test_data) +optimizer.optimize() +print(f"\nDistance threshold after: {sem_cache.distance_threshold} \n") +``` + + + Distance threshold before: 0.5 + + + Distance threshold after: 0.13050847457627118 + + + +# Schema validation + +This feature makes it easier to make sure your data is in the right format. To demo this we will create a new index with the `validate_on_load` flag set to `True` + + +```python +# NBVAL_SKIP +from redisvl.index import SearchIndex + +# sample schema +car_schema = { + "index": { + "name": "cars", + "prefix": "cars", + "storage_type": "json", + }, + "fields": [ + {"name": "make", "type": "text"}, + {"name": "model", "type": "text"}, + {"name": "description", "type": "text"}, + {"name": "mpg", "type": "numeric"}, + { + "name": "car_embedding", + "type": "vector", + "attrs": { + "dims": 3, + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32" + } + + } + ], +} + +sample_data_bad = [ + { + "make": "Toyota", + "model": "Camry", + "description": "A reliable sedan with great fuel economy.", + "mpg": 28, + "car_embedding": [0.1, 0.2, 0.3] + }, + { + "make": "Honda", + "model": "CR-V", + "description": "A practical SUV with advanced technology.", + # incorrect type will throw an error + "mpg": "twenty-two", + "car_embedding": [0.4, 0.5, 0.6] + } +] + +# this should now throw an error +car_index = SearchIndex.from_dict(car_schema, redis_url=REDIS_URL, validate_on_load=True) +car_index.create(overwrite=True) + +try: + car_index.load(sample_data_bad) +except Exception as e: + print(f"Error loading data: {e}") +``` + + 16:20:25 redisvl.index.index ERROR Schema validation error while loading data + Traceback (most recent call last): + File "/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/redisvl/index/storage.py", line 204, in _preprocess_and_validate_objects + processed_obj = self._validate(processed_obj) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/redisvl/index/storage.py", line 160, in _validate + return validate_object(self.index_schema, obj) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/redisvl/schema/validation.py", line 276, in validate_object + validated = model_class.model_validate(flat_obj) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/pydantic/main.py", line 627, in model_validate + return cls.__pydantic_validator__.validate_python( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + pydantic_core._pydantic_core.ValidationError: 2 validation errors for cars__PydanticModel + mpg.int + Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='twenty-two', input_type=str] + For further information visit https://errors.pydantic.dev/2.10/v/int_parsing + mpg.float + Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='twenty-two', input_type=str] + For further information visit https://errors.pydantic.dev/2.10/v/float_parsing + + The above exception was the direct cause of the following exception: + + Traceback (most recent call last): + File "/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/redisvl/index/index.py", line 615, in load + return self._storage.write( + ^^^^^^^^^^^^^^^^^^^^ + File "/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/redisvl/index/storage.py", line 265, in write + prepared_objects = self._preprocess_and_validate_objects( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/redisvl/index/storage.py", line 211, in _preprocess_and_validate_objects + raise SchemaValidationError(str(e), index=i) from e + redisvl.exceptions.SchemaValidationError: Validation failed for object at index 1: 2 validation errors for cars__PydanticModel + mpg.int + Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='twenty-two', input_type=str] + For further information visit https://errors.pydantic.dev/2.10/v/int_parsing + mpg.float + Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='twenty-two', input_type=str] + For further information visit https://errors.pydantic.dev/2.10/v/float_parsing + Error loading data: Validation failed for object at index 1: 2 validation errors for cars__PydanticModel + mpg.int + Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='twenty-two', input_type=str] + For further information visit https://errors.pydantic.dev/2.10/v/int_parsing + mpg.float + Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='twenty-two', input_type=str] + For further information visit https://errors.pydantic.dev/2.10/v/float_parsing + + +# Timestamp filters + +In Redis datetime objects are stored as numeric epoch times. Timestamp filter makes it easier to handle querying by these fields by handling conversion for you. + + +```python +from redisvl.query import FilterQuery +from redisvl.query.filter import Timestamp + +# find all jobs +ts = Timestamp("posted") < NOW # now datetime created above + +filter_query = FilterQuery( + return_fields=["job_title", "job_description", "posted"], + filter_expression=ts, + num_results=10, +) +res = index.query(filter_query) +res +``` + + + + + [{'id': 'jobs:01JQYMYZBA6NM6DX9YW35MCHJZ', + 'job_title': 'Software Engineer', + 'job_description': 'Develop and maintain web applications using JavaScript, React, and Node.js.', + 'posted': '1743625199.9'}, + {'id': 'jobs:01JQYMYZBABXYR96H96SQ99ZPS', + 'job_title': 'Data Analyst', + 'job_description': 'Analyze large datasets to provide business insights and create data visualizations.', + 'posted': '1743106799.9'}, + {'id': 'jobs:01JQYMYZBAGEBDS270EZADQ1TM', + 'job_title': 'Marketing Manager', + 'job_description': 'Develop and implement marketing strategies to drive brand awareness and customer engagement.', + 'posted': '1741123199.9'}] + + + + +```python +# jobs posted in the last 3 days => 1 job +ts = Timestamp("posted") > NOW - dt.timedelta(days=3) + +filter_query = FilterQuery( + return_fields=["job_title", "job_description", "posted"], + filter_expression=ts, + num_results=10, +) +res = index.query(filter_query) +res +``` + + + + + [{'id': 'jobs:01JQYMYZBA6NM6DX9YW35MCHJZ', + 'job_title': 'Software Engineer', + 'job_description': 'Develop and maintain web applications using JavaScript, React, and Node.js.', + 'posted': '1743625199.9'}] + + + + +```python +# more than 3 days ago but less than 14 days ago => 1 job +ts = Timestamp("posted").between( + NOW - dt.timedelta(days=14), + NOW - dt.timedelta(days=3), +) + +filter_query = FilterQuery( + return_fields=["job_title", "job_description", "posted"], + filter_expression=ts, + num_results=10, +) + +res = index.query(filter_query) +res +``` + + + + + [{'id': 'jobs:01JQYMYZBABXYR96H96SQ99ZPS', + 'job_title': 'Data Analyst', + 'job_description': 'Analyze large datasets to provide business insights and create data visualizations.', + 'posted': '1743106799.9'}] + + + +# Batch search + +This enhancement allows you to speed up the execution of queries by reducing the impact of network latency. + + +```python +import time +num_queries = 200 + +start = time.time() +for i in range(num_queries): + # run the same filter query + res = index.query(filter_query) +end = time.time() +print(f"Time taken for {num_queries} queries: {end - start:.2f} seconds") +``` + + Time taken for 200 queries: 0.11 seconds + + + +```python +batched_queries = [filter_query] * num_queries + +start = time.time() + +index.batch_search(batched_queries, batch_size=10) + +end = time.time() +print(f"Time taken for {num_queries} batched queries: {end - start:.2f} seconds") +``` + + Time taken for 200 batched queries: 0.03 seconds + + +# Vector normalization + +By default, Redis returns the vector cosine distance when performing a search, which yields a value between 0 and 2, where 0 represents a perfect match. However, you may sometimes prefer a similarity score between 0 and 1, where 1 indicates a perfect match. When enabled, this flag performs the conversion for you. Additionally, if this flag is set to true for L2 distance, it normalizes the Euclidean distance to a value between 0 and 1 as well. + + + +```python +from redisvl.query import VectorQuery + +query = VectorQuery( + vector=emb_model.embed("Software Engineer", as_buffer=True), + vector_field_name="job_embedding", + return_fields=["job_title", "job_description", "posted"], + normalize_vector_distance=True, +) + +res = index.query(query) +res +``` + + + + + [{'id': 'jobs:01JQYMYZBA6NM6DX9YW35MCHJZ', + 'vector_distance': '0.7090711295605', + 'job_title': 'Software Engineer', + 'job_description': 'Develop and maintain web applications using JavaScript, React, and Node.js.', + 'posted': '1743625199.9'}, + {'id': 'jobs:01JQYMYZBABXYR96H96SQ99ZPS', + 'vector_distance': '0.6049451231955', + 'job_title': 'Data Analyst', + 'job_description': 'Analyze large datasets to provide business insights and create data visualizations.', + 'posted': '1743106799.9'}, + {'id': 'jobs:01JQYMYZBAGEBDS270EZADQ1TM', + 'vector_distance': '0.553376108408', + 'job_title': 'Marketing Manager', + 'job_description': 'Develop and implement marketing strategies to drive brand awareness and customer engagement.', + 'posted': '1741123199.9'}] + + + +# Hybrid policy on knn with filters + +Within the default redis client you can set the `HYBRID_POLICY` which specifies the filter mode to use during vector search with filters. It can take values `BATCHES` or `ADHOC_BF`. Previously this option was not exposed by redisvl. + + +```python +from redisvl.query.filter import Text + +filter = Text("job_description") % "Develop" + +query = VectorQuery( + vector=emb_model.embed("Software Engineer", as_buffer=True), + vector_field_name="job_embedding", + return_fields=["job_title", "job_description", "posted"], + hybrid_policy="BATCHES" +) + +query.set_filter(filter) + +res = index.query(query) +res +``` + + + + + [{'id': 'jobs:01JQYMYZBA6NM6DX9YW35MCHJZ', + 'vector_distance': '0.581857740879', + 'job_title': 'Software Engineer', + 'job_description': 'Develop and maintain web applications using JavaScript, React, and Node.js.', + 'posted': '1743625199.9'}, + {'id': 'jobs:01JQYMYZBAGEBDS270EZADQ1TM', + 'vector_distance': '0.893247783184', + 'job_title': 'Marketing Manager', + 'job_description': 'Develop and implement marketing strategies to drive brand awareness and customer engagement.', + 'posted': '1741123199.9'}] + + diff --git a/content/integrate/redisvl/user_guide/release_guide/_index.md b/content/integrate/redisvl/user_guide/release_guide/_index.md new file mode 100644 index 000000000..4b8a392d5 --- /dev/null +++ b/content/integrate/redisvl/user_guide/release_guide/_index.md @@ -0,0 +1,23 @@ +--- +linkTitle: Release guides +title: Release Guides +type: integration +hideListLinks: true +--- + + +This section contains guidelines and information for RedisVL releases. + + + +* [0.5.1 Feature Overview](0_5_0_release/) + * [What’s new?](0_5_0_release/#what-s-new) + * [Define and load index for examples](0_5_0_release/#define-and-load-index-for-examples) +* [HybridQuery class](0_5_0_release/#hybridquery-class) +* [TextQueries](0_5_0_release/#textqueries) +* [Threshold optimization](0_5_0_release/#threshold-optimization) +* [Schema validation](0_5_0_release/#schema-validation) +* [Timestamp filters](0_5_0_release/#timestamp-filters) +* [Batch search](0_5_0_release/#batch-search) +* [Vector normalization](0_5_0_release/#vector-normalization) +* [Hybrid policy on knn with filters](0_5_0_release/#hybrid-policy-on-knn-with-filters)