Skip to content

Commit f8e86eb

Browse files
committed
SASI index options validation
patch by xedin; reviewed by beobal for CASSANDRA-11136
1 parent b11fba7 commit f8e86eb

File tree

11 files changed

+251
-110
lines changed

11 files changed

+251
-110
lines changed

CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
3.4
2+
* SASI index options validation (CASSANDRA-11136)
23
* Optimize disk seek using min/max column name meta data when the LIMIT clause is used
34
(CASSANDRA-8180)
45
* Add LIKE support to CQL3 (CASSANDRA-11067)

doc/SASI.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,12 @@ first being the default. The `last_name` index is created with the
7070
mode `CONTAINS` which matches terms on suffixes instead of prefix
7171
only. Examples of this are available below and more detail can be
7272
found in the section on
73-
[OnDiskIndex](https://github.com/xedin/sasi#ondiskindexbuilder).The
73+
[OnDiskIndex](#ondiskindexbuilder).The
7474
`created_at` column is created with its mode set to `SPARSE`, which is
7575
meant to improve performance of querying large, dense number ranges
7676
like timestamps for data inserted every millisecond. Details of the
7777
`SPARSE` implementation can also be found in the section on the
78-
[OnDiskIndex](https://github.com/xedin/sasi#ondiskindexbuilder). The `age`
78+
[OnDiskIndex](#ondiskindexbuilder). The `age`
7979
index is created with the default `PREFIX` mode and no
8080
case-sensitivity or text analysis options are specified since the
8181
field is numeric.
@@ -186,7 +186,7 @@ pitfalls of such a query. With SASI, while the requirement to include
186186
performance pitfalls do not exist because filtering is not
187187
performed. Details on how SASI joins data from multiple predicates is
188188
available below in the
189-
[Implementation Details](https://github.com/xedin/sasi#implementation-details)
189+
[Implementation Details](#implementation-details)
190190
section.
191191

192192
```
@@ -507,7 +507,7 @@ converts from Cassandra's internal representation of
507507
`IndexExpression`s, which has also been modified to support encoding
508508
queries that contain ORs and groupings of expressions using
509509
parentheses (see the
510-
[Cassandra Internal Changes](https://github.com/xedin/sasi#cassandra-internal-changes)
510+
[Cassandra Internal Changes](#cassandra-internal-changes)
511511
section below for more details). This process produces a tree of
512512
[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s, which in turn may contain [`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s, all of which
513513
provide an alternative, more efficient, representation of the query.
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.cassandra.index;
19+
20+
import java.util.regex.Matcher;
21+
import java.util.regex.Pattern;
22+
23+
import org.apache.commons.lang3.StringUtils;
24+
25+
import org.apache.cassandra.config.CFMetaData;
26+
import org.apache.cassandra.config.ColumnDefinition;
27+
import org.apache.cassandra.cql3.ColumnIdentifier;
28+
import org.apache.cassandra.cql3.statements.IndexTarget;
29+
import org.apache.cassandra.exceptions.ConfigurationException;
30+
import org.apache.cassandra.schema.IndexMetadata;
31+
import org.apache.cassandra.utils.Pair;
32+
33+
public class TargetParser
34+
{
35+
private static final Pattern TARGET_REGEX = Pattern.compile("^(keys|entries|values|full)\\((.+)\\)$");
36+
private static final Pattern TWO_QUOTES = Pattern.compile("\"\"");
37+
private static final String QUOTE = "\"";
38+
39+
public static Pair<ColumnDefinition, IndexTarget.Type> parse(CFMetaData cfm, IndexMetadata indexDef)
40+
{
41+
String target = indexDef.options.get("target");
42+
assert target != null : String.format("No target definition found for index %s", indexDef.name);
43+
Pair<ColumnDefinition, IndexTarget.Type> result = parse(cfm, target);
44+
if (result == null)
45+
throw new ConfigurationException(String.format("Unable to parse targets for index %s (%s)", indexDef.name, target));
46+
return result;
47+
}
48+
49+
public static Pair<ColumnDefinition, IndexTarget.Type> parse(CFMetaData cfm, String target)
50+
{
51+
// if the regex matches then the target is in the form "keys(foo)", "entries(bar)" etc
52+
// if not, then it must be a simple column name and implictly its type is VALUES
53+
Matcher matcher = TARGET_REGEX.matcher(target);
54+
String columnName;
55+
IndexTarget.Type targetType;
56+
if (matcher.matches())
57+
{
58+
targetType = IndexTarget.Type.fromString(matcher.group(1));
59+
columnName = matcher.group(2);
60+
}
61+
else
62+
{
63+
columnName = target;
64+
targetType = IndexTarget.Type.VALUES;
65+
}
66+
67+
// in the case of a quoted column name the name in the target string
68+
// will be enclosed in quotes, which we need to unwrap. It may also
69+
// include quote characters internally, escaped like so:
70+
// abc"def -> abc""def.
71+
// Because the target string is stored in a CQL compatible form, we
72+
// need to un-escape any such quotes to get the actual column name
73+
if (columnName.startsWith(QUOTE))
74+
{
75+
columnName = StringUtils.substring(StringUtils.substring(columnName, 1), 0, -1);
76+
columnName = TWO_QUOTES.matcher(columnName).replaceAll(QUOTE);
77+
}
78+
79+
// if it's not a CQL table, we can't assume that the column name is utf8, so
80+
// in that case we have to do a linear scan of the cfm's columns to get the matching one
81+
if (cfm.isCQLTable())
82+
return Pair.create(cfm.getColumnDefinition(new ColumnIdentifier(columnName, true)), targetType);
83+
else
84+
for (ColumnDefinition column : cfm.allColumns())
85+
if (column.name.toString().equals(columnName))
86+
return Pair.create(column, targetType);
87+
88+
return null;
89+
}
90+
}

src/java/org/apache/cassandra/index/internal/CassandraIndex.java

Lines changed: 4 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,17 @@
55
import java.util.concurrent.Callable;
66
import java.util.concurrent.Future;
77
import java.util.function.BiFunction;
8-
import java.util.regex.Matcher;
98
import java.util.regex.Pattern;
109
import java.util.stream.Collectors;
1110
import java.util.stream.StreamSupport;
1211

1312
import com.google.common.collect.ImmutableSet;
14-
import org.apache.commons.lang3.StringUtils;
13+
import org.apache.cassandra.index.TargetParser;
1514
import org.slf4j.Logger;
1615
import org.slf4j.LoggerFactory;
1716

1817
import org.apache.cassandra.config.CFMetaData;
1918
import org.apache.cassandra.config.ColumnDefinition;
20-
import org.apache.cassandra.cql3.ColumnIdentifier;
2119
import org.apache.cassandra.cql3.Operator;
2220
import org.apache.cassandra.cql3.statements.IndexTarget;
2321
import org.apache.cassandra.db.*;
@@ -56,8 +54,6 @@ public abstract class CassandraIndex implements Index
5654
{
5755
private static final Logger logger = LoggerFactory.getLogger(CassandraIndex.class);
5856

59-
public static final Pattern TARGET_REGEX = Pattern.compile("^(keys|entries|values|full)\\((.+)\\)$");
60-
6157
public final ColumnFamilyStore baseCfs;
6258
protected IndexMetadata metadata;
6359
protected ColumnFamilyStore indexCfs;
@@ -195,7 +191,7 @@ public Callable<?> getMetadataReloadTask(IndexMetadata indexDef)
195191
private void setMetadata(IndexMetadata indexDef)
196192
{
197193
metadata = indexDef;
198-
Pair<ColumnDefinition, IndexTarget.Type> target = parseTarget(baseCfs.metadata, indexDef);
194+
Pair<ColumnDefinition, IndexTarget.Type> target = TargetParser.parse(baseCfs.metadata, indexDef);
199195
functions = getFunctions(indexDef, target);
200196
CFMetaData cfm = indexCfsMetadata(baseCfs.metadata, indexDef);
201197
indexCfs = ColumnFamilyStore.createColumnFamilyStore(baseCfs.keyspace,
@@ -708,7 +704,7 @@ private static String getSSTableNames(Collection<SSTableReader> sstables)
708704
*/
709705
public static final CFMetaData indexCfsMetadata(CFMetaData baseCfsMetadata, IndexMetadata indexMetadata)
710706
{
711-
Pair<ColumnDefinition, IndexTarget.Type> target = parseTarget(baseCfsMetadata, indexMetadata);
707+
Pair<ColumnDefinition, IndexTarget.Type> target = TargetParser.parse(baseCfsMetadata, indexMetadata);
712708
CassandraIndexFunctions utils = getFunctions(indexMetadata, target);
713709
ColumnDefinition indexedColumn = target.left;
714710
AbstractType<?> indexedValueType = utils.getIndexedValueType(indexedColumn);
@@ -752,57 +748,7 @@ public static final CFMetaData indexCfsMetadata(CFMetaData baseCfsMetadata, Inde
752748
*/
753749
public static CassandraIndex newIndex(ColumnFamilyStore baseCfs, IndexMetadata indexMetadata)
754750
{
755-
return getFunctions(indexMetadata, parseTarget(baseCfs.metadata, indexMetadata)).newIndexInstance(baseCfs, indexMetadata);
756-
}
757-
758-
private static final Pattern TWO_QUOTES = Pattern.compile("\"\"");
759-
private static final String QUOTE = "\"";
760-
761-
// Public because it's also used to convert index metadata into a thrift-compatible format
762-
public static Pair<ColumnDefinition, IndexTarget.Type> parseTarget(CFMetaData cfm,
763-
IndexMetadata indexDef)
764-
{
765-
String target = indexDef.options.get("target");
766-
assert target != null : String.format("No target definition found for index %s", indexDef.name);
767-
768-
// if the regex matches then the target is in the form "keys(foo)", "entries(bar)" etc
769-
// if not, then it must be a simple column name and implictly its type is VALUES
770-
Matcher matcher = TARGET_REGEX.matcher(target);
771-
String columnName;
772-
IndexTarget.Type targetType;
773-
if (matcher.matches())
774-
{
775-
targetType = IndexTarget.Type.fromString(matcher.group(1));
776-
columnName = matcher.group(2);
777-
}
778-
else
779-
{
780-
columnName = target;
781-
targetType = IndexTarget.Type.VALUES;
782-
}
783-
784-
// in the case of a quoted column name the name in the target string
785-
// will be enclosed in quotes, which we need to unwrap. It may also
786-
// include quote characters internally, escaped like so:
787-
// abc"def -> abc""def.
788-
// Because the target string is stored in a CQL compatible form, we
789-
// need to un-escape any such quotes to get the actual column name
790-
if (columnName.startsWith(QUOTE))
791-
{
792-
columnName = StringUtils.substring(StringUtils.substring(columnName, 1), 0, -1);
793-
columnName = TWO_QUOTES.matcher(columnName).replaceAll(QUOTE);
794-
}
795-
796-
// if it's not a CQL table, we can't assume that the column name is utf8, so
797-
// in that case we have to do a linear scan of the cfm's columns to get the matching one
798-
if (cfm.isCQLTable())
799-
return Pair.create(cfm.getColumnDefinition(new ColumnIdentifier(columnName, true)), targetType);
800-
else
801-
for (ColumnDefinition column : cfm.allColumns())
802-
if (column.name.toString().equals(columnName))
803-
return Pair.create(column, targetType);
804-
805-
throw new RuntimeException(String.format("Unable to parse targets for index %s (%s)", indexDef.name, target));
751+
return getFunctions(indexMetadata, TargetParser.parse(baseCfs.metadata, indexMetadata)).newIndexInstance(baseCfs, indexMetadata);
806752
}
807753

808754
static CassandraIndexFunctions getFunctions(IndexMetadata indexDef,

src/java/org/apache/cassandra/index/sasi/SASIIndex.java

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@
2222
import java.util.function.BiFunction;
2323

2424
import com.googlecode.concurrenttrees.common.Iterables;
25-
import org.apache.cassandra.config.CFMetaData;
26-
import org.apache.cassandra.config.ColumnDefinition;
27-
import org.apache.cassandra.config.DatabaseDescriptor;
28-
import org.apache.cassandra.config.Schema;
25+
26+
import org.apache.cassandra.config.*;
2927
import org.apache.cassandra.cql3.Operator;
28+
import org.apache.cassandra.cql3.statements.IndexTarget;
3029
import org.apache.cassandra.db.*;
3130
import org.apache.cassandra.db.compaction.CompactionManager;
3231
import org.apache.cassandra.db.compaction.OperationType;
@@ -36,12 +35,15 @@
3635
import org.apache.cassandra.db.partitions.PartitionIterator;
3736
import org.apache.cassandra.db.partitions.PartitionUpdate;
3837
import org.apache.cassandra.db.rows.Row;
38+
import org.apache.cassandra.exceptions.ConfigurationException;
3939
import org.apache.cassandra.exceptions.InvalidRequestException;
4040
import org.apache.cassandra.index.Index;
4141
import org.apache.cassandra.index.IndexRegistry;
4242
import org.apache.cassandra.index.SecondaryIndexBuilder;
43-
import org.apache.cassandra.index.internal.CassandraIndex;
43+
import org.apache.cassandra.index.TargetParser;
4444
import org.apache.cassandra.index.sasi.conf.ColumnIndex;
45+
import org.apache.cassandra.index.sasi.conf.IndexMode;
46+
import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder.Mode;
4547
import org.apache.cassandra.index.sasi.disk.PerSSTableIndexWriter;
4648
import org.apache.cassandra.index.sasi.plan.QueryPlan;
4749
import org.apache.cassandra.index.transactions.IndexTransaction;
@@ -51,6 +53,7 @@
5153
import org.apache.cassandra.notifications.*;
5254
import org.apache.cassandra.schema.IndexMetadata;
5355
import org.apache.cassandra.utils.FBUtilities;
56+
import org.apache.cassandra.utils.Pair;
5457
import org.apache.cassandra.utils.concurrent.OpOrder;
5558

5659
public class SASIIndex implements Index, INotificationConsumer
@@ -95,7 +98,7 @@ public SASIIndex(ColumnFamilyStore baseCfs, IndexMetadata config)
9598
this.baseCfs = baseCfs;
9699
this.config = config;
97100

98-
ColumnDefinition column = CassandraIndex.parseTarget(baseCfs.metadata, config).left;
101+
ColumnDefinition column = TargetParser.parse(baseCfs.metadata, config).left;
99102
this.index = new ColumnIndex(baseCfs.metadata.getKeyValidator(), column, config);
100103

101104
Tracker tracker = baseCfs.getTracker();
@@ -116,8 +119,36 @@ public SASIIndex(ColumnFamilyStore baseCfs, IndexMetadata config)
116119
CompactionManager.instance.submitIndexBuild(new SASIIndexBuilder(baseCfs, toRebuild));
117120
}
118121

119-
public static Map<String, String> validateOptions(Map<String, String> options)
122+
public static Map<String, String> validateOptions(Map<String, String> options, CFMetaData cfm)
120123
{
124+
String targetColumn = options.get("target");
125+
if (targetColumn == null)
126+
throw new ConfigurationException("unknown target column");
127+
128+
Pair<ColumnDefinition, IndexTarget.Type> target = TargetParser.parse(cfm, targetColumn);
129+
if (target == null)
130+
throw new ConfigurationException("failed to retrieve target column for: " + targetColumn);
131+
132+
IndexMode.validateAnalyzer(options);
133+
134+
IndexMode mode = IndexMode.getMode(target.left, options);
135+
if (mode.mode == Mode.SPARSE)
136+
{
137+
if (mode.isLiteral)
138+
throw new ConfigurationException("SPARSE mode is only supported on non-literal columns.");
139+
140+
if (mode.isAnalyzed)
141+
throw new ConfigurationException("SPARSE mode doesn't support analyzers.");
142+
}
143+
144+
ColumnFamilyStore store = Schema.instance.getColumnFamilyStoreIncludingIndexes(Pair.create(cfm.ksName, cfm.cfName));
145+
if (store != null && store.indexManager.listIndexes()
146+
.stream()
147+
.filter((index) -> index.dependsOn(target.left)
148+
&& index.getClass().isAssignableFrom(SASIIndex.class))
149+
.findFirst().isPresent())
150+
throw new ConfigurationException("Index on '" + targetColumn + "' already exists, SASI doesn't support multiple indexes per column.");
151+
121152
return Collections.emptyMap();
122153
}
123154

src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
import org.apache.cassandra.db.marshal.UTF8Type;
3333
import org.apache.cassandra.db.rows.Cell;
3434
import org.apache.cassandra.db.rows.Row;
35-
import org.apache.cassandra.exceptions.ConfigurationException;
3635
import org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer;
3736
import org.apache.cassandra.index.sasi.conf.view.View;
3837
import org.apache.cassandra.index.sasi.disk.Token;
@@ -70,11 +69,6 @@ public ColumnIndex(AbstractType<?> keyValidator, ColumnDefinition column, IndexM
7069
this.component = new Component(Component.Type.SECONDARY_INDEX, String.format(FILE_NAME_FORMAT, getIndexName()));
7170
}
7271

73-
public void validate() throws ConfigurationException
74-
{
75-
mode.validate(config);
76-
}
77-
7872
/**
7973
* Initialize this column index with specific set of SSTables.
8074
*

0 commit comments

Comments
 (0)