Skip to content

Commit 7523a54

Browse files
committed
fix: improve key separator handling to avoid double separators (#368)
Normalize key prefixes by removing trailing separators when constructing Redis keys to prevent double separator issues (e.g., "user::123" becomes "user:123"). Changes: - Fix BaseStorage._key() to strip trailing separators from prefix - Update SemanticRouter to use index's key_separator instead of hardcoded ':' - Extract route pattern generation into reusable _route_pattern() method - Fix scan pattern generation to respect custom separators
1 parent b32ed95 commit 7523a54

File tree

3 files changed

+317
-10
lines changed

3 files changed

+317
-10
lines changed

redisvl/extensions/router/semantic.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,25 @@ def update_route_thresholds(self, route_thresholds: Dict[str, Optional[float]]):
221221

222222
@staticmethod
223223
def _route_ref_key(index: SearchIndex, route_name: str, reference_hash: str) -> str:
224-
"""Generate the route reference key."""
225-
return f"{index.prefix}:{route_name}:{reference_hash}"
224+
"""Generate the route reference key using the index's key_separator."""
225+
sep = index.key_separator
226+
# Normalize prefix to avoid double separators
227+
prefix = index.prefix.rstrip(sep) if sep and index.prefix else index.prefix
228+
if prefix:
229+
return f"{prefix}{sep}{route_name}{sep}{reference_hash}"
230+
else:
231+
return f"{route_name}{sep}{reference_hash}"
232+
233+
@staticmethod
234+
def _route_pattern(index: SearchIndex, route_name: str) -> str:
235+
"""Generate a search pattern for route references."""
236+
sep = index.key_separator
237+
# Normalize prefix to avoid double separators
238+
prefix = index.prefix.rstrip(sep) if sep and index.prefix else index.prefix
239+
if prefix:
240+
return f"{prefix}{sep}{route_name}{sep}*"
241+
else:
242+
return f"{route_name}{sep}*"
226243

227244
def _add_routes(self, routes: List[Route]):
228245
"""Add routes to the router and index.
@@ -731,12 +748,12 @@ def get_route_references(
731748
queries = self._make_filter_queries(reference_ids)
732749
elif route_name:
733750
if not keys:
734-
keys = scan_by_pattern(
735-
self._index.client, f"{self._index.prefix}:{route_name}:*" # type: ignore
736-
)
751+
pattern = self._route_pattern(self._index, route_name)
752+
keys = scan_by_pattern(self._index.client, pattern) # type: ignore
737753

754+
sep = self._index.key_separator
738755
queries = self._make_filter_queries(
739-
[key.split(":")[-1] for key in convert_bytes(keys)]
756+
[key.split(sep)[-1] for key in convert_bytes(keys)]
740757
)
741758
else:
742759
raise ValueError(
@@ -769,9 +786,8 @@ def delete_route_references(
769786
res = self._index.batch_query(queries)
770787
keys = [r[0]["id"] for r in res if len(r) > 0]
771788
elif not keys:
772-
keys = scan_by_pattern(
773-
self._index.client, f"{self._index.prefix}:{route_name}:*" # type: ignore
774-
)
789+
pattern = self._route_pattern(self._index, route_name)
790+
keys = scan_by_pattern(self._index.client, pattern) # type: ignore
775791

776792
if not keys:
777793
raise ValueError(f"No references found for route {route_name}")

redisvl/index/storage.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,15 @@ def _key(id: str, prefix: str, key_separator: str) -> str:
8181
if not prefix:
8282
return id
8383
else:
84-
return f"{prefix}{key_separator}{id}"
84+
# Normalize prefix by removing trailing separators to avoid doubles
85+
normalized_prefix = (
86+
prefix.rstrip(key_separator) if key_separator else prefix
87+
)
88+
if normalized_prefix:
89+
return f"{normalized_prefix}{key_separator}{id}"
90+
else:
91+
# If prefix was only separators, just return the id
92+
return id
8593

8694
def _create_key(self, obj: Dict[str, Any], id_field: Optional[str] = None) -> str:
8795
"""Construct a Redis key for a given object, optionally using a
Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
"""
2+
Test proper handling of key separators and prefixes.
3+
4+
These tests verify that key separators are handled correctly when:
5+
1. Prefix ends with the separator
6+
2. Custom separators are used
7+
3. Keys are constructed in different components
8+
"""
9+
10+
import pytest
11+
from redis import Redis
12+
from redis.commands.search.index_definition import IndexType
13+
14+
from redisvl.extensions.router import Route, SemanticRouter
15+
from redisvl.index import SearchIndex
16+
from redisvl.index.storage import HashStorage, JsonStorage
17+
from redisvl.schema import IndexSchema
18+
19+
20+
class TestKeySeparatorHandling:
21+
"""Tests for proper key separator handling across the codebase."""
22+
23+
def test_prefix_ending_with_separator_no_double_separator(self):
24+
"""Test that prefix ending with separator doesn't create double separators."""
25+
# Create schema with prefix ending in separator
26+
schema_dict = {
27+
"index": {
28+
"name": "test_index",
29+
"prefix": "user:", # Prefix ends with separator
30+
"key_separator": ":",
31+
"storage_type": "hash",
32+
},
33+
"fields": [{"name": "content", "type": "text"}],
34+
}
35+
schema = IndexSchema.from_dict(schema_dict)
36+
storage = HashStorage(index_schema=schema)
37+
38+
# Create a key
39+
key = storage._key("123", schema.index.prefix, schema.index.key_separator)
40+
41+
# Should not have double separator
42+
assert key == "user:123", f"Expected 'user:123' but got '{key}'"
43+
assert "::" not in key, f"Key has double separator: {key}"
44+
45+
def test_custom_separator_used_consistently(self):
46+
"""Test that custom key_separator is used throughout."""
47+
# Create schema with custom separator
48+
schema_dict = {
49+
"index": {
50+
"name": "test_index",
51+
"prefix": "user",
52+
"key_separator": "-", # Custom separator
53+
"storage_type": "json",
54+
},
55+
"fields": [{"name": "content", "type": "text"}],
56+
}
57+
schema = IndexSchema.from_dict(schema_dict)
58+
storage = JsonStorage(index_schema=schema)
59+
60+
# Create a key with custom separator
61+
key = storage._key("456", schema.index.prefix, schema.index.key_separator)
62+
63+
# Should use custom separator
64+
assert key == "user-456", f"Expected 'user-456' but got '{key}'"
65+
assert ":" not in key, f"Key uses default separator instead of custom: {key}"
66+
67+
def test_empty_prefix_handled_correctly(self):
68+
"""Test that empty prefix is handled correctly."""
69+
schema_dict = {
70+
"index": {
71+
"name": "test_index",
72+
"prefix": "", # Empty prefix
73+
"key_separator": ":",
74+
"storage_type": "hash",
75+
},
76+
"fields": [{"name": "content", "type": "text"}],
77+
}
78+
schema = IndexSchema.from_dict(schema_dict)
79+
storage = HashStorage(index_schema=schema)
80+
81+
# Create a key with empty prefix
82+
key = storage._key("789", schema.index.prefix, schema.index.key_separator)
83+
84+
# Should return just the ID without prefix or separator
85+
assert key == "789", f"Expected '789' but got '{key}'"
86+
87+
def test_semantic_router_uses_index_separator(self, redis_url):
88+
"""Test that SemanticRouter uses the index's key_separator."""
89+
# Create a route
90+
route = Route(
91+
name="test_route", references=["hello", "hi"], distance_threshold=0.5
92+
)
93+
94+
# Create router with routes
95+
router = SemanticRouter(
96+
name="test_router_sep",
97+
routes=[route],
98+
redis_url=redis_url,
99+
overwrite=True,
100+
)
101+
102+
# Modify the index schema to use custom separator
103+
router._index.schema.index.key_separator = "|"
104+
router._index.schema.index.prefix = "router"
105+
106+
# Check that route reference keys use the custom separator
107+
route_key = router._route_ref_key(router._index, "test_route", "ref123")
108+
109+
# Should use custom separator
110+
assert "|" in route_key, f"Route key doesn't use custom separator: {route_key}"
111+
assert (
112+
route_key.count(":") == 0
113+
), f"Route key uses default separator: {route_key}"
114+
assert (
115+
route_key == "router|test_route|ref123"
116+
), f"Unexpected route key: {route_key}"
117+
118+
def test_prefix_with_separator_and_custom_separator(self):
119+
"""Test handling when prefix contains old separator and we use a new one."""
120+
schema_dict = {
121+
"index": {
122+
"name": "test_index",
123+
"prefix": "app:user", # Prefix contains ':'
124+
"key_separator": "-", # But we use '-' as separator
125+
"storage_type": "hash",
126+
},
127+
"fields": [{"name": "content", "type": "text"}],
128+
}
129+
schema = IndexSchema.from_dict(schema_dict)
130+
storage = HashStorage(index_schema=schema)
131+
132+
# Create a key
133+
key = storage._key("999", schema.index.prefix, schema.index.key_separator)
134+
135+
# Should use the key_separator, not the : in prefix
136+
assert key == "app:user-999", f"Expected 'app:user-999' but got '{key}'"
137+
138+
def test_special_characters_in_separator(self):
139+
"""Test that special characters work as separators."""
140+
special_separators = ["_", "::", "->", ".", "/"]
141+
142+
for sep in special_separators:
143+
schema_dict = {
144+
"index": {
145+
"name": "test_index",
146+
"prefix": "data",
147+
"key_separator": sep,
148+
"storage_type": "json",
149+
},
150+
"fields": [{"name": "content", "type": "text"}],
151+
}
152+
schema = IndexSchema.from_dict(schema_dict)
153+
storage = JsonStorage(index_schema=schema)
154+
155+
key = storage._key("id", schema.index.prefix, schema.index.key_separator)
156+
expected = f"data{sep}id"
157+
assert (
158+
key == expected
159+
), f"For separator '{sep}': expected '{expected}' but got '{key}'"
160+
161+
def test_trailing_separator_normalization(self):
162+
"""Test that trailing separators in prefix are normalized."""
163+
test_cases = [
164+
("user:", ":", "123", "user:123"), # Prefix ends with separator
165+
("user::", ":", "456", "user:456"), # Prefix ends with double separator
166+
("user", ":", "789", "user:789"), # Normal case
167+
("user-", "-", "abc", "user-abc"), # Custom separator
168+
]
169+
170+
for prefix, separator, id_val, expected in test_cases:
171+
schema_dict = {
172+
"index": {
173+
"name": "test_index",
174+
"prefix": prefix,
175+
"key_separator": separator,
176+
"storage_type": "hash",
177+
},
178+
"fields": [{"name": "content", "type": "text"}],
179+
}
180+
schema = IndexSchema.from_dict(schema_dict)
181+
storage = HashStorage(index_schema=schema)
182+
183+
key = storage._key(id_val, schema.index.prefix, schema.index.key_separator)
184+
185+
# Check for expected normalization
186+
assert (
187+
key == expected
188+
), f"For prefix='{prefix}', sep='{separator}', id='{id_val}': expected '{expected}' but got '{key}'"
189+
190+
191+
class TestSemanticRouterKeyConstruction:
192+
"""Test SemanticRouter's key construction with separators."""
193+
194+
def test_router_respects_modified_key_separator(self, redis_url):
195+
"""Test that SemanticRouter respects modified key separators."""
196+
route = Route(
197+
name="test_route", references=["hello", "hi"], distance_threshold=0.5
198+
)
199+
200+
router = SemanticRouter(
201+
name="router_sep_test",
202+
routes=[route],
203+
redis_url=redis_url,
204+
overwrite=True,
205+
)
206+
207+
# Test with different separators
208+
for separator in [":", "-", "_", "|"]:
209+
router._index.schema.index.key_separator = separator
210+
router._index.schema.index.prefix = "routes"
211+
212+
# Test internal key generation
213+
route_key = router._route_ref_key(router._index, "route1", "ref1")
214+
215+
# Should use the configured separator
216+
expected = f"routes{separator}route1{separator}ref1"
217+
assert (
218+
route_key == expected
219+
), f"For sep '{separator}': Expected '{expected}' but got '{route_key}'"
220+
221+
def test_router_with_prefix_ending_in_separator(self, redis_url):
222+
"""Test SemanticRouter when prefix ends with separator."""
223+
route = Route(
224+
name="test_route", references=["hello", "hi"], distance_threshold=0.5
225+
)
226+
227+
router = SemanticRouter(
228+
name="router_trailing_test",
229+
routes=[route],
230+
redis_url=redis_url,
231+
overwrite=True,
232+
)
233+
234+
# Modify to have prefix ending with separator
235+
router._index.schema.index.prefix = "routes:"
236+
router._index.schema.index.key_separator = ":"
237+
238+
# Generate a route key
239+
route_key = router._route_ref_key(router._index, "route1", "ref1")
240+
241+
# Should not have double separator
242+
assert "::" not in route_key, f"Route key has double separator: {route_key}"
243+
assert route_key == "routes:route1:ref1", f"Unexpected route key: {route_key}"
244+
245+
246+
class TestSearchIndexKeyConstruction:
247+
"""Test SearchIndex's key construction with separators."""
248+
249+
def test_search_index_key_construction(self, redis_url):
250+
"""Test that SearchIndex properly handles key construction."""
251+
schema_dict = {
252+
"index": {
253+
"name": "search_test",
254+
"prefix": "doc:", # Ends with separator
255+
"key_separator": ":",
256+
"storage_type": "hash",
257+
},
258+
"fields": [
259+
{"name": "text", "type": "text"},
260+
{"name": "tag", "type": "tag"},
261+
],
262+
}
263+
264+
index = SearchIndex(
265+
IndexSchema.from_dict(schema_dict),
266+
redis_url=redis_url,
267+
)
268+
index.create(overwrite=True)
269+
270+
# Add a document
271+
data = [{"id": "123", "text": "test content", "tag": "test"}]
272+
keys = index.load(data, id_field="id")
273+
274+
# Check the generated key
275+
assert len(keys) == 1
276+
key = keys[0]
277+
278+
# Should not have double separator
279+
assert "::" not in key, f"Key has double separator: {key}"
280+
assert key == "doc:123", f"Expected 'doc:123' but got '{key}'"
281+
282+
# Clean up
283+
index.delete(drop=True)

0 commit comments

Comments
 (0)