Skip to content

Commit

Permalink
CNDB-12456: Add ANN_OPTIONS to CQL SELECT queries
Browse files Browse the repository at this point in the history
Add new CQL syntax to specify ANN options in ANN queries.

The only available ANN option at the moment is `rerank_k`,
which is the amplified limit for the ANN query to get more accurate results`.

This doesn't add support for ANN options to SAI,
which is planned to be done separately.

The new options are included in the ANN expression of the `RowFilter` of
the `ReadCommand` that represents the query. Thus, they are included in
the serialiazation of the command in the coordiantor and delivered to the
replicas.

The new ANN options require that the messaging version of all the nodes
is at least  VERSION_DS_11.
  • Loading branch information
adelapena committed Jan 31, 2025
1 parent b2107ca commit 601a78b
Show file tree
Hide file tree
Showing 36 changed files with 1,190 additions and 104 deletions.
1 change: 1 addition & 0 deletions doc/cql3/CQL.textile
Original file line number Diff line number Diff line change
Expand Up @@ -1073,6 +1073,7 @@ bc(syntax)..
( PER PARTITION LIMIT <integer> )?
( LIMIT <integer> ( OFFSET <integer> )? )?
( ALLOW FILTERING )?
( WITH ann_options = <map-literal> )?

<select-clause> ::= DISTINCT? <selection-list>

Expand Down
1 change: 1 addition & 0 deletions doc/modules/cassandra/examples/BNF/select_statement.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ select_statement::= SELECT [ JSON | DISTINCT ] ( select_clause | '*' )
[ PER PARTITION LIMIT (`integer` | `bind_marker`) ]
[ LIMIT (`integer` | `bind_marker`) [ OFFSET (`integer` | `bind_marker`) ] ]
[ ALLOW FILTERING ]
[ WITH ann_options = map-literal ]
select_clause::= `selector` [ AS `identifier` ] ( ',' `selector` [ AS `identifier` ] )
selector::== `column_name`
| `term`
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT * FROM embeddings ORDER BY vector ANN OF [1.2, 3.4] LIMIT 100 WITH ann_options = { 'rerank_k': 1000 }
1 change: 1 addition & 0 deletions doc/modules/cassandra/pages/cql/cql_singlefile.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -1625,6 +1625,7 @@ FROM +
( PER PARTITION LIMIT )? +
( LIMIT ( OFFSET )? )? +
( ALLOW FILTERING )?
( WITH ann_options = )?

::= DISTINCT?

Expand Down
10 changes: 10 additions & 0 deletions doc/modules/cassandra/pages/cql/dml.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,16 @@ execute:
include::example$CQL/query_nofail_allow_filtering.cql[]
----

[[ann-options]]
=== ANN options

`SELECT` queries using `ANN` ordering can provide a set of options to control the behavior of the ANN search:

[source,cql]
----
include::example$CQL/query_with_ann_options.cql[]
----

[[insert-statement]]
== INSERT

Expand Down
7 changes: 6 additions & 1 deletion pylib/cqlshlib/cql3handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ def dequote_value(cqlword):
| <boolean>
| <blobLiteral>
| <collectionLiteral>
| <vectorLiteral>
| <functionLiteral> <functionArguments>
| "NULL"
;
Expand Down Expand Up @@ -231,6 +232,9 @@ def dequote_value(cqlword):
;
<mapLiteral> ::= "{" <term> ":" <term> ( "," <term> ":" <term> )* "}"
;
<vectorLiteral> ::= "[" ( <float> ( "," <float> )* )? "]"
;
<anyFunctionName> ::= ( ksname=<cfOrKsName> dot="." )? udfname=<cfOrKsName> ;
Expand Down Expand Up @@ -716,6 +720,7 @@ def working_on_keyspace(ctxt):
( "PER" "PARTITION" "LIMIT" perPartitionLimit=<wholenumber> )?
( "LIMIT" limit=<wholenumber> ( "OFFSET" offset=<wholenumber> )? )?
( "ALLOW" "FILTERING" )?
( "WITH" "ann_options" "=" <mapLiteral> )?
;
<whereClause> ::= <relation> ( "AND" <relation> )*
;
Expand All @@ -741,7 +746,7 @@ def working_on_keyspace(ctxt):
;
<selectionFunctionArguments> ::= "(" ( <selector> ( "," <selector> )* )? ")"
;
<orderByClause> ::= [ordercol]=<cident> ( "ASC" | "DESC" )?
<orderByClause> ::= [ordercol]=<cident> ( "ANN" "OF" <vectorLiteral> )? ( "ASC" | "DESC" )?
;
<groupByClause> ::= [groupcol]=<cident>
;
Expand Down
11 changes: 9 additions & 2 deletions pylib/cqlshlib/test/test_cqlsh_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,14 @@ def test_complete_in_alter_keyspace(self):
def test_complete_in_select_limit_clause(self):
self.trycompletions('SELECT * FROM system.peers LI', immediate='MIT ')
self.trycompletions('SELECT * FROM system.peers LIMIT ', choices=['<wholenumber>'])
self.trycompletions('SELECT * FROM system.peers LIMIT 1 ', choices=[';', 'ALLOW', 'OFFSET'])
self.trycompletions('SELECT * FROM system.peers LIMIT 1 ', choices=[';', 'ALLOW', 'OFFSET', 'WITH'])
self.trycompletions('SELECT * FROM system.peers LIMIT 1 OF', immediate='FSET ')
self.trycompletions('SELECT * FROM system.peers LIMIT 1 OFFSET ', choices=['<wholenumber>'])
self.trycompletions('SELECT * FROM system.peers LIMIT 1 OFFSET 1 ', choices=[';', 'ALLOW'])
self.trycompletions('SELECT * FROM system.peers LIMIT 1 OFFSET 1 ', choices=[';', 'ALLOW', 'WITH'])

def test_complete_in_select_order_by_ann_of(self):
self.trycompletions('SELECT * FROM system.peers ORDER BY vectorcol ', choices=[',', ';', 'ALLOW', 'ANN', 'ASC', 'DESC', 'LIMIT', 'PER', 'WITH'])
self.trycompletions('SELECT * FROM system.peers ORDER BY vectorcol ANN ', immediate='OF [ ')
self.trycompletions('SELECT * FROM system.peers ORDER BY vectorcol ANN OF [ 1.2, ', choices=['<float>'])
self.trycompletions('SELECT * FROM system.peers ORDER BY vectorcol ANN OF [ 1.0, 2.0] LIMIT 100 ', choices=[';', 'ALLOW', 'OFFSET', 'WITH'])
self.trycompletions('SELECT * FROM system.peers ORDER BY vectorcol ANN OF [ 1.0, 2.0] LIMIT 100 WITH ', immediate='ann_options = { ')
4 changes: 3 additions & 1 deletion src/antlr/Parser.g
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ selectStatement returns [SelectStatement.RawStatement expr]
List<ColumnIdentifier> groups = new ArrayList<>();
boolean allowFiltering = false;
boolean isJson = false;
SelectOptions options = new SelectOptions();
}
: K_SELECT
// json is a valid column name. By consequence, we need to resolve the ambiguity for "json - json"
Expand All @@ -271,14 +272,15 @@ selectStatement returns [SelectStatement.RawStatement expr]
( K_PER K_PARTITION K_LIMIT rows=intValue { perPartitionLimit = rows; } )?
( K_LIMIT rows=intValue { limit = rows; } ( K_OFFSET rows=intValue { offset = rows; } )? )?
( K_ALLOW K_FILTERING { allowFiltering = true; } )?
( K_WITH properties[options] )?
{
SelectStatement.Parameters params = new SelectStatement.Parameters(orderings,
groups,
$sclause.isDistinct,
allowFiltering,
isJson);
WhereClause where = wclause == null ? WhereClause.empty() : wclause.build();
$expr = new SelectStatement.RawStatement(cf, params, $sclause.selectors, where, limit, perPartitionLimit, offset);
$expr = new SelectStatement.RawStatement(cf, params, $sclause.selectors, where, limit, perPartitionLimit, offset, options);
}
;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.*;

import org.apache.cassandra.guardrails.Guardrails;
import org.apache.cassandra.db.filter.ANNOptions;
import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.schema.TableMetadata;
import org.apache.cassandra.cql3.QueryOptions;
Expand Down Expand Up @@ -124,7 +125,8 @@ public boolean needFiltering()
@Override
public void addToRowFilter(RowFilter.Builder filter,
IndexRegistry indexRegistry,
QueryOptions options) throws InvalidRequestException
QueryOptions options,
ANNOptions annOptions) throws InvalidRequestException
{
int position = 0;

Expand All @@ -135,7 +137,7 @@ public void addToRowFilter(RowFilter.Builder filter,
// We ignore all the clustering columns that can be handled by slices.
if (handleInFilter(restriction, position) || restriction.hasSupportingIndex(indexRegistry))
{
restriction.addToRowFilter(filter, indexRegistry, options);
restriction.addToRowFilter(filter, indexRegistry, options, annOptions);
continue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.util.List;
import java.util.Set;

import org.apache.cassandra.db.filter.ANNOptions;
import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.serializers.CollectionSerializer;

Expand Down Expand Up @@ -238,7 +239,10 @@ public MultiClusteringBuilder appendTo(MultiClusteringBuilder builder, QueryOpti
}

@Override
public final void addToRowFilter(RowFilter.Builder filter, IndexRegistry indexRegistry, QueryOptions options)
public final void addToRowFilter(RowFilter.Builder filter,
IndexRegistry indexRegistry,
QueryOptions options,
ANNOptions annOptions)
{
Tuples.Value t = ((Tuples.Value) term.bind(options));
List<ByteBuffer> values = t.getElements();
Expand Down Expand Up @@ -303,7 +307,8 @@ protected boolean isSupportedBy(Index index, ColumnMetadata column)
@Override
public final void addToRowFilter(RowFilter.Builder filter,
IndexRegistry indexRegistry,
QueryOptions options)
QueryOptions options,
ANNOptions annOptions)
{
// If the relation is of the type (c) IN ((x),(y),(z)) then it is equivalent to
// c IN (x, y, z) and we can perform filtering
Expand Down Expand Up @@ -571,7 +576,8 @@ public SingleRestriction doMergeWith(SingleRestriction otherRestriction)
@Override
public final void addToRowFilter(RowFilter.Builder filter,
IndexRegistry indexRegistry,
QueryOptions options)
QueryOptions options,
ANNOptions annOptions)
{
throw invalidRequest("Multi-column slice restrictions cannot be used for filtering.");
}
Expand Down Expand Up @@ -663,7 +669,7 @@ public MultiClusteringBuilder appendTo(MultiClusteringBuilder builder, QueryOpti
}

@Override
public final void addToRowFilter(RowFilter.Builder filter, IndexRegistry indexRegistry, QueryOptions options)
public final void addToRowFilter(RowFilter.Builder filter, IndexRegistry indexRegistry, QueryOptions options, ANNOptions annOptions)
{
throw new UnsupportedOperationException("Secondary indexes do not support IS NOT NULL restrictions");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.*;

import org.apache.cassandra.guardrails.Guardrails;
import org.apache.cassandra.db.filter.ANNOptions;
import org.apache.cassandra.schema.TableMetadata;
import org.apache.cassandra.cql3.QueryOptions;
import org.apache.cassandra.cql3.statements.Bound;
Expand Down Expand Up @@ -124,13 +125,14 @@ public boolean isInclusive(Bound b)
@Override
public void addToRowFilter(RowFilter.Builder filter,
IndexRegistry indexRegistry,
QueryOptions options)
QueryOptions options,
ANNOptions annOptions)
{
List<SingleRestriction> restrictions = restrictions();
for (int i = 0; i < restrictions.size(); i++)
{
SingleRestriction r = restrictions.get(i);
r.addToRowFilter(filter, indexRegistry, options);
r.addToRowFilter(filter, indexRegistry, options, annOptions);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import java.util.List;

import org.apache.cassandra.db.filter.ANNOptions;
import org.apache.cassandra.index.Index;
import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.cql3.QueryOptions;
Expand Down Expand Up @@ -87,8 +88,10 @@ public default boolean isOnToken()
* @param filter the row filter to add expressions to
* @param indexRegistry the index registry
* @param options the query options
* @param annOptions the query ANN options
*/
public void addToRowFilter(RowFilter.Builder filter,
IndexRegistry indexRegistry,
QueryOptions options);
QueryOptions options,
ANNOptions annOptions);
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.cassandra.cql3.restrictions.SingleColumnRestriction.ContainsRestriction;
import org.apache.cassandra.db.filter.RowFilter;
import org.apache.cassandra.exceptions.InvalidRequestException;
import org.apache.cassandra.db.filter.ANNOptions;
import org.apache.cassandra.index.Index;
import org.apache.cassandra.index.IndexRegistry;
import org.apache.cassandra.schema.ColumnMetadata;
Expand All @@ -55,8 +56,9 @@ private EmptyRestrictionSet()
}

@Override
public void addToRowFilter(RowFilter.Builder rowFilter, IndexRegistry indexRegistry, QueryOptions options) throws InvalidRequestException
public void addToRowFilter(RowFilter.Builder rowFilter, IndexRegistry indexRegistry, QueryOptions options, ANNOptions annOptions)
{
// nothing to do here, since there are no restrictions
}

@Override
Expand Down Expand Up @@ -225,10 +227,11 @@ else if (!singleRestriction.isEQ())
@Override
public void addToRowFilter(RowFilter.Builder rowFilter,
IndexRegistry indexRegistry,
QueryOptions options) throws InvalidRequestException
QueryOptions options,
ANNOptions annOptions) throws InvalidRequestException
{
for (SingleRestriction restriction : restrictionsMap.values())
rowFilter.addAllAsConjunction(b -> restriction.addToRowFilter(b, indexRegistry, options));
rowFilter.addAllAsConjunction(b -> restriction.addToRowFilter(b, indexRegistry, options, annOptions));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.List;
import java.util.Set;

import org.apache.cassandra.db.filter.ANNOptions;
import org.apache.cassandra.index.Index;
import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.cql3.QueryOptions;
Expand All @@ -46,9 +47,10 @@ class RestrictionSetWrapper implements Restrictions
@Override
public void addToRowFilter(RowFilter.Builder rowFilter,
IndexRegistry indexRegistry,
QueryOptions options)
QueryOptions options,
ANNOptions annOptions)
{
restrictions.addToRowFilter(rowFilter, indexRegistry, options);
restrictions.addToRowFilter(rowFilter, indexRegistry, options, annOptions);
}

@Override
Expand Down
Loading

0 comments on commit 601a78b

Please sign in to comment.