Skip to content

Commit b9549d5

Browse files
authored
Add filter-based deletion to Cassandra Vector Store (#2143)
Implement filter-based deletion for CassandraVectorStore to support more flexible document removal based on metadata filters. Key changes: - Add doDelete(Filter.Expression) implementation - Implement workaround for Cassandra's limitations in direct filtered deletion - Fetch matching documents first, then delete by IDs - Add integration tests for various filter deletion scenarios - Support simple and complex filter expressions - Includes tests for: * Deleting by simple equality filter * Deleting by string-based filter expression * Deleting by complex AND filter expression Provides consistent deletion capabilities across vector store implementations while addressing Cassandra-specific constraints. Signed-off-by: Soby Chacko <[email protected]>
1 parent bd52786 commit b9549d5

File tree

2 files changed

+148
-1
lines changed

2 files changed

+148
-1
lines changed

vector-stores/spring-ai-cassandra-store/src/main/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStore.java

+40
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import java.util.concurrent.Executor;
3232
import java.util.concurrent.Executors;
3333
import java.util.function.Function;
34+
import java.util.stream.Collectors;
3435
import java.util.stream.Stream;
3536

3637
import com.datastax.oss.driver.api.core.CqlSession;
@@ -73,6 +74,7 @@
7374
import org.springframework.ai.vectorstore.AbstractVectorStoreBuilder;
7475
import org.springframework.ai.vectorstore.SearchRequest;
7576
import org.springframework.ai.vectorstore.VectorStore;
77+
import org.springframework.ai.vectorstore.filter.Filter;
7678
import org.springframework.ai.vectorstore.filter.FilterExpressionConverter;
7779
import org.springframework.ai.vectorstore.observation.AbstractObservationVectorStore;
7880
import org.springframework.ai.vectorstore.observation.VectorStoreObservationContext;
@@ -315,6 +317,44 @@ public Optional<Boolean> doDelete(List<String> idList) {
315317
return Optional.of(Boolean.TRUE);
316318
}
317319

320+
@Override
321+
protected void doDelete(Filter.Expression filterExpression) {
322+
Assert.notNull(filterExpression, "Filter expression must not be null");
323+
324+
try {
325+
// TODO - Investigate why we can't do a direct filter based delete in
326+
// Cassandra
327+
// This SO thread seems to indicate that this is not possible in Cassandra
328+
// https://stackoverflow.com/questions/70953262/unable-to-delete-multiple-rows-getting-some-partition-key-parts-are-missing-i
329+
// Needs more research into this matter.
330+
SearchRequest searchRequest = SearchRequest.builder()
331+
.query("") // empty query since we only want filter matches
332+
.filterExpression(filterExpression)
333+
.topK(1000) // large enough to get all matches
334+
.similarityThresholdAll()
335+
.build();
336+
337+
List<Document> matchingDocs = similaritySearch(searchRequest);
338+
339+
if (!matchingDocs.isEmpty()) {
340+
// Then delete those documents by ID
341+
List<String> idsToDelete = matchingDocs.stream().map(Document::getId).collect(Collectors.toList());
342+
343+
Optional<Boolean> result = delete(idsToDelete);
344+
345+
if (result.isPresent() && !result.get()) {
346+
throw new IllegalStateException("Failed to delete some documents");
347+
}
348+
349+
logger.debug(() -> "Deleted " + idsToDelete.size() + " documents matching filter expression");
350+
}
351+
}
352+
catch (Exception e) {
353+
logger.error(e, () -> "Failed to delete documents by filter");
354+
throw new IllegalStateException("Failed to delete documents by filter", e);
355+
}
356+
}
357+
318358
@Override
319359
public List<Document> doSimilaritySearch(SearchRequest request) {
320360
Preconditions.checkArgument(request.getTopK() <= 1000);

vector-stores/spring-ai-cassandra-store/src/test/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStoreIT.java

+108-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
2222
import java.util.List;
2323
import java.util.Map;
2424
import java.util.UUID;
25+
import java.util.stream.Collectors;
2526

2627
import com.datastax.oss.driver.api.core.CqlSession;
2728
import com.datastax.oss.driver.api.core.CqlSessionBuilder;
@@ -42,6 +43,7 @@
4243
import org.springframework.ai.vectorstore.SearchRequest;
4344
import org.springframework.ai.vectorstore.cassandra.CassandraVectorStore.SchemaColumn;
4445
import org.springframework.ai.vectorstore.cassandra.CassandraVectorStore.SchemaColumnTags;
46+
import org.springframework.ai.vectorstore.filter.Filter;
4547
import org.springframework.boot.SpringBootConfiguration;
4648
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
4749
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
@@ -57,6 +59,7 @@
5759
*
5860
* @author Mick Semb Wever
5961
* @author Thomas Vitale
62+
* @author Soby Chacko
6063
* @since 1.0.0
6164
*/
6265
@Testcontainers
@@ -417,6 +420,110 @@ void searchWithThreshold() {
417420
});
418421
}
419422

423+
@Test
424+
void deleteByFilter() {
425+
this.contextRunner.run(context -> {
426+
try (CassandraVectorStore store = createTestStore(context,
427+
new SchemaColumn("country", DataTypes.TEXT, SchemaColumnTags.INDEXED),
428+
new SchemaColumn("year", DataTypes.SMALLINT, SchemaColumnTags.INDEXED))) {
429+
430+
var bgDocument = new Document("The World is Big and Salvation Lurks Around the Corner",
431+
Map.of("country", "BG", "year", (short) 2020));
432+
var nlDocument = new Document("The World is Big and Salvation Lurks Around the Corner",
433+
Map.of("country", "NL"));
434+
var bgDocument2 = new Document("The World is Big and Salvation Lurks Around the Corner",
435+
Map.of("country", "BG", "year", (short) 2023));
436+
437+
store.add(List.of(bgDocument, nlDocument, bgDocument2));
438+
439+
// Verify initial state
440+
List<Document> results = store
441+
.similaritySearch(SearchRequest.builder().query("The World").topK(5).build());
442+
assertThat(results).hasSize(3);
443+
444+
// Delete documents with country = BG
445+
Filter.Expression filterExpression = new Filter.Expression(Filter.ExpressionType.EQ,
446+
new Filter.Key("country"), new Filter.Value("BG"));
447+
448+
store.delete(filterExpression);
449+
450+
results = store.similaritySearch(
451+
SearchRequest.builder().query("The World").topK(5).similarityThresholdAll().build());
452+
453+
assertThat(results).hasSize(1);
454+
assertThat(results.get(0).getMetadata()).containsEntry("country", "NL");
455+
}
456+
});
457+
}
458+
459+
@Test
460+
void deleteWithStringFilterExpression() {
461+
this.contextRunner.run(context -> {
462+
try (CassandraVectorStore store = createTestStore(context,
463+
new SchemaColumn("country", DataTypes.TEXT, SchemaColumnTags.INDEXED),
464+
new SchemaColumn("year", DataTypes.SMALLINT, SchemaColumnTags.INDEXED))) {
465+
466+
var bgDocument = new Document("The World is Big and Salvation Lurks Around the Corner",
467+
Map.of("country", "BG", "year", (short) 2020));
468+
var nlDocument = new Document("The World is Big and Salvation Lurks Around the Corner",
469+
Map.of("country", "NL"));
470+
var bgDocument2 = new Document("The World is Big and Salvation Lurks Around the Corner",
471+
Map.of("country", "BG", "year", (short) 2023));
472+
473+
store.add(List.of(bgDocument, nlDocument, bgDocument2));
474+
475+
// Verify initial state
476+
List<Document> results = store
477+
.similaritySearch(SearchRequest.builder().query("The World").topK(5).build());
478+
assertThat(results).hasSize(3);
479+
480+
store.delete("country == 'BG'");
481+
482+
results = store.similaritySearch(
483+
SearchRequest.builder().query("The World").topK(5).similarityThresholdAll().build());
484+
485+
assertThat(results).hasSize(1);
486+
assertThat(results.get(0).getMetadata()).containsEntry("country", "NL");
487+
}
488+
});
489+
}
490+
491+
@Test
492+
void deleteWithComplexFilterExpression() {
493+
this.contextRunner.run(context -> {
494+
try (CassandraVectorStore store = createTestStore(context,
495+
new SchemaColumn("type", DataTypes.TEXT, SchemaColumnTags.INDEXED),
496+
new SchemaColumn("priority", DataTypes.SMALLINT, SchemaColumnTags.INDEXED))) {
497+
498+
var doc1 = new Document("Content 1", Map.of("type", "A", "priority", (short) 1));
499+
var doc2 = new Document("Content 2", Map.of("type", "A", "priority", (short) 2));
500+
var doc3 = new Document("Content 3", Map.of("type", "B", "priority", (short) 1));
501+
502+
store.add(List.of(doc1, doc2, doc3));
503+
504+
// Complex filter expression: (type == 'A' AND priority > 1)
505+
Filter.Expression priorityFilter = new Filter.Expression(Filter.ExpressionType.GT,
506+
new Filter.Key("priority"), new Filter.Value((short) 1));
507+
Filter.Expression typeFilter = new Filter.Expression(Filter.ExpressionType.EQ, new Filter.Key("type"),
508+
new Filter.Value("A"));
509+
Filter.Expression complexFilter = new Filter.Expression(Filter.ExpressionType.AND, typeFilter,
510+
priorityFilter);
511+
512+
store.delete(complexFilter);
513+
514+
var results = store.similaritySearch(
515+
SearchRequest.builder().query("Content").topK(5).similarityThresholdAll().build());
516+
517+
assertThat(results).hasSize(2);
518+
assertThat(results.stream().map(doc -> doc.getMetadata().get("type")).collect(Collectors.toList()))
519+
.containsExactlyInAnyOrder("A", "B");
520+
assertThat(results.stream()
521+
.map(doc -> ((Short) doc.getMetadata().get("priority")).intValue())
522+
.collect(Collectors.toList())).containsExactlyInAnyOrder(1, 1);
523+
}
524+
});
525+
}
526+
420527
@SpringBootConfiguration
421528
@EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class })
422529
public static class TestApplication {

0 commit comments

Comments
 (0)