Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Redesign of the DatePartitionedQueryPlanner (formerly know as the FederatedQueryPlanner) #2717

Open
wants to merge 20 commits into
base: integration
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
<version.datawave.common-utils>3.0.0</version.datawave.common-utils>
<version.datawave.dictionary-api>4.0.1</version.datawave.dictionary-api>
<version.datawave.mapreduce-query-api>1.0.0</version.datawave.mapreduce-query-api>
<version.datawave.metadata-utils>4.0.8</version.datawave.metadata-utils>
<version.datawave.metadata-utils>4.0.9-2702.3</version.datawave.metadata-utils>
<version.datawave.metrics-reporter>3.0.0</version.datawave.metrics-reporter>
<version.datawave.query-api>1.0.0</version.datawave.query-api>
<version.datawave.query-metric-api>4.0.7</version.datawave.query-metric-api>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
import java.io.Serializable;

/**
* This class represents a hole in the global index. Used by the PushdownMissingIndexRangeNodesVisitor.
* This class represents a hole in the global index for a set of values in a specified date range. Used by the PushdownMissingIndexRangeNodesVisitor.
*/
public class IndexHole implements Serializable, Comparable<IndexHole> {
public class IndexValueHole implements Serializable, Comparable<IndexValueHole> {
private static final long serialVersionUID = -6778479621810682281L;

private String startValue;
private String endValue;
private String startDate;
private String endDate;

public IndexHole() {}
public IndexValueHole() {}

/**
* Create an index with a date range and value range.
Expand All @@ -23,7 +23,7 @@ public IndexHole() {}
* @param valueRange
* the start and end values of the known hole
*/
public IndexHole(String[] dateRange, String[] valueRange) {
public IndexValueHole(String[] dateRange, String[] valueRange) {
setStartValue(valueRange[0]);
setEndValue(valueRange[1]);
setStartDate(dateRange[0]);
Expand Down Expand Up @@ -106,8 +106,8 @@ public String toString() {

@Override
public boolean equals(Object o) {
if (o instanceof IndexHole) {
IndexHole hole = (IndexHole) o;
if (o instanceof IndexValueHole) {
IndexValueHole hole = (IndexValueHole) o;
return startValue.equals(hole.startValue) && endValue.equals(hole.endValue) && startDate.equals(hole.startDate) && endDate.equals(hole.endDate);
}
return false;
Expand All @@ -129,7 +129,7 @@ public int hashCode() {
* the index hole
* @return the comparison
*/
public int compareTo(IndexHole hole) {
public int compareTo(IndexValueHole hole) {
int comparison = startValue.compareTo(hole.startValue);
if (comparison == 0) {
comparison = endValue.compareTo(hole.endValue);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
// Filter results on datatypes. Default to having no filters
private Set<String> datatypeFilter = UniversalSet.instance();
// A set of sorted index holes
private List<IndexHole> indexHoles = new ArrayList<>();
private List<IndexValueHole> indexValueHoles = new ArrayList<>();
// a set of user specified mappings
private Set<String> renameFields = new HashSet<>(0);
// Limit fields returned per event
Expand Down Expand Up @@ -527,7 +527,7 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
* The minimum percentage threshold that the count for an index row must meet compared to the count for the corresponding frequency row in the metadata
* table in order to NOT be considered a field index hole. The value must be between 0.0-1.0, where 1.0 is equivalent to 100%.
*/
private double fieldIndexHoleMinThreshold = 1.0d;
private double indexFieldHoleMinThreshold = 1.0d;

/**
* The set of date types that, if the query's end date is the current date, will NOT result in any date range adjustments or the addition of a
Expand Down Expand Up @@ -646,7 +646,7 @@ public void copyFrom(ShardQueryConfiguration other) {
this.setUnevaluatedFields(null == other.getUnevaluatedFields() ? null : Sets.newHashSet(other.getUnevaluatedFields()));
this.setDatatypeFilter(null == other.getDatatypeFilter() ? null
: (other.getDatatypeFilter() instanceof UniversalSet) ? UniversalSet.instance() : Sets.newHashSet(other.getDatatypeFilter()));
this.setIndexHoles(null == other.getIndexHoles() ? null : Lists.newArrayList(other.getIndexHoles()));
this.setIndexValueHoles(null == other.getIndexValueHoles() ? null : Lists.newArrayList(other.getIndexValueHoles()));
this.setProjectFields(null == other.getProjectFields() ? null : Sets.newHashSet(other.getProjectFields()));
this.setRenameFields(null == other.getRenameFields() ? null : Sets.newHashSet(other.getRenameFields()));
this.setDisallowlistedFields(null == other.getDisallowlistedFields() ? null : Sets.newHashSet(other.getDisallowlistedFields()));
Expand Down Expand Up @@ -783,7 +783,7 @@ public void copyFrom(ShardQueryConfiguration other) {
this.setCardinalityThreshold(other.getCardinalityThreshold());
this.setUseQueryTreeScanHintRules(other.isUseQueryTreeScanHintRules());
this.setQueryTreeScanHintRules(other.getQueryTreeScanHintRules());
this.setFieldIndexHoleMinThreshold(other.getFieldIndexHoleMinThreshold());
this.setIndexFieldHoleMinThreshold(other.getIndexFieldHoleMinThreshold());
this.setNoExpansionIfCurrentDateTypes(
other.getNoExpansionIfCurrentDateTypes() == null ? null : Sets.newHashSet(other.getNoExpansionIfCurrentDateTypes()));
this.setShardsAndDaysHintAllowed(other.isShardsAndDaysHintAllowed());
Expand Down Expand Up @@ -2335,12 +2335,12 @@ public void setAccrueStats(boolean accrueStats) {

}

public List<IndexHole> getIndexHoles() {
return indexHoles;
public List<IndexValueHole> getIndexValueHoles() {
return indexValueHoles;
}

public void setIndexHoles(List<IndexHole> indexHoles) {
this.indexHoles = indexHoles;
public void setIndexValueHoles(List<IndexValueHole> indexValueHoles) {
this.indexValueHoles = indexValueHoles;
}

public boolean getCollectTimingDetails() {
Expand Down Expand Up @@ -2789,12 +2789,12 @@ public void setRebuildDatatypeFilterPerShard(boolean rebuildDatatypeFilterPerSha
this.rebuildDatatypeFilterPerShard = rebuildDatatypeFilterPerShard;
}

public double getFieldIndexHoleMinThreshold() {
return fieldIndexHoleMinThreshold;
public double getIndexFieldHoleMinThreshold() {
return indexFieldHoleMinThreshold;
}

public void setFieldIndexHoleMinThreshold(double fieldIndexHoleMinThreshold) {
this.fieldIndexHoleMinThreshold = fieldIndexHoleMinThreshold;
public void setIndexFieldHoleMinThreshold(double indexFieldHoleMinThreshold) {
this.indexFieldHoleMinThreshold = indexFieldHoleMinThreshold;
}

public boolean getReduceIngestTypes() {
Expand Down Expand Up @@ -3004,7 +3004,7 @@ public boolean equals(Object o) {
Objects.equals(getNonEventKeyPrefixes(), that.getNonEventKeyPrefixes()) &&
Objects.equals(getUnevaluatedFields(), that.getUnevaluatedFields()) &&
Objects.equals(getDatatypeFilter(), that.getDatatypeFilter()) &&
Objects.equals(getIndexHoles(), that.getIndexHoles()) &&
Objects.equals(getIndexValueHoles(), that.getIndexValueHoles()) &&
Objects.equals(getProjectFields(), that.getProjectFields()) &&
Objects.equals(getRenameFields(), that.getRenameFields()) &&
Objects.equals(getDisallowlistedFields(), that.getDisallowlistedFields()) &&
Expand Down Expand Up @@ -3147,7 +3147,7 @@ public int hashCode() {
getNonEventKeyPrefixes(),
getUnevaluatedFields(),
getDatatypeFilter(),
getIndexHoles(),
getIndexValueHoles(),
getProjectFields(),
getRenameFields(),
getDisallowlistedFields(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import org.apache.commons.jexl3.parser.JexlNodes;
import org.apache.log4j.Logger;

import datawave.query.config.IndexHole;
import datawave.query.config.IndexValueHole;
import datawave.query.config.ShardQueryConfiguration;
import datawave.query.exceptions.DatawaveFatalQueryException;
import datawave.query.jexl.JexlASTHelper;
Expand All @@ -41,7 +41,7 @@ public class PushdownMissingIndexRangeNodesVisitor extends RebuildingVisitor {
// datatype filter
protected Set<String> dataTypeFilter;
// the set of holes known to exist in the index
protected SortedSet<IndexHole> indexHoles = new TreeSet<>();
protected SortedSet<IndexValueHole> indexHoles = new TreeSet<>();

/**
* Construct the visitor
Expand All @@ -57,7 +57,7 @@ public PushdownMissingIndexRangeNodesVisitor(ShardQueryConfiguration config, Met
this.beginDate = format.format(config.getBeginDate());
this.endDate = format.format(config.getEndDate());
this.dataTypeFilter = config.getDatatypeFilter();
this.indexHoles.addAll(config.getIndexHoles());
this.indexHoles.addAll(config.getIndexValueHoles());
}

/**
Expand Down Expand Up @@ -164,7 +164,7 @@ private boolean missingIndexRange(ASTEQNode node) {
Object literal = JexlASTHelper.getLiteralValue(node);
if (literal != null) {
String strLiteral = String.valueOf(literal);
for (IndexHole hole : this.indexHoles) {
for (IndexValueHole hole : this.indexHoles) {
if (hole.overlaps(this.beginDate, this.endDate, strLiteral)) {
return true;
} else if (hole.after(strLiteral)) {
Expand All @@ -176,6 +176,8 @@ private boolean missingIndexRange(ASTEQNode node) {
}

private boolean missingIndexRange(ASTERNode node) {
// TODO: need implementation for FieldIndexHole? Need field name, not values...
// why is FieldIndexHole not related to IndexHole?
Object literal = JexlASTHelper.getLiteralValue(node);
if (literal != null) {
String strLiteral = String.valueOf(literal);
Expand All @@ -193,7 +195,8 @@ private boolean missingIndexRange(ASTERNode node) {
endRange.append((char) 0);
}

for (IndexHole hole : indexHoles) {
for (IndexValueHole hole : indexHoles) {
// TODO: add overlaps method to FieldIndexHole...seriously what's up with the values
if (hole.overlaps(this.beginDate, this.endDate, leadingLiteral, endRange.toString())) {
return true;
} else if (hole.after(strLiteral)) {
Expand All @@ -212,7 +215,7 @@ private boolean missingIndexRange(ASTERNode node) {
private boolean missingIndexRange(LiteralRange range) {
String strUpper = String.valueOf(range.getUpper());
String strLower = String.valueOf(range.getLower());
for (IndexHole hole : indexHoles) {
for (IndexValueHole hole : indexHoles) {
if (hole.overlaps(this.beginDate, this.endDate, strLower, strUpper)) {
return true;
} else if (hole.after(strLower)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package datawave.query.jexl.visitors;

import static datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType.BOUNDED_RANGE;
import static datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType.EVALUATION_ONLY;

import java.util.Set;

import org.apache.commons.jexl3.parser.ASTAndNode;
import org.apache.commons.jexl3.parser.ASTEQNode;
import org.apache.commons.jexl3.parser.ASTERNode;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.JexlNodes;
import org.apache.log4j.Logger;

import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.LiteralRange;
import datawave.query.jexl.nodes.QueryPropertyMarker;

/**
* Visitor meant to 'push down' predicates for expressions that are not executable against the global OR field index.
*/
public class PushdownUnindexedFieldsVisitor extends RebuildingVisitor {
ivakegg marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will you be adding any tests for this visitor?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBD


private static final Logger log = Logger.getLogger(PushdownUnindexedFieldsVisitor.class);
ivakegg marked this conversation as resolved.
Show resolved Hide resolved

protected Set<String> unindexedFields;

/**
* Construct the visitor
*
* @param unindexedFields
* the fields being considered unindexed
*/
public PushdownUnindexedFieldsVisitor(Set<String> unindexedFields) {
this.unindexedFields = unindexedFields;
}

/**
* helper method that constructs and applies the visitor.
*
* @param unindexedFields
* the fields considered unindexed
* @param queryTree
* the query tree
* @param <T>
* type of the query tree
* @return a reference to the node
*/
public static <T extends JexlNode> T pushdownPredicates(T queryTree, Set<String> unindexedFields) {
PushdownUnindexedFieldsVisitor visitor = new PushdownUnindexedFieldsVisitor(unindexedFields);
return (T) (queryTree.jjtAccept(visitor, null));
}

@Override
public Object visit(ASTAndNode node, Object data) {
// if not already delayed somehow
if (QueryPropertyMarker.findInstance(node).isType(BOUNDED_RANGE)) {
LiteralRange range = JexlASTHelper.findRange().getRange(node);
ivakegg marked this conversation as resolved.
Show resolved Hide resolved

if (range != null) {
return delayBoundedIndexHole(range, node);
} else {
JexlNode andNode = JexlNodes.newInstanceOfType(node);
JexlNodes.copyIdentifierOrLiteral(node, andNode);
andNode.jjtSetParent(node.jjtGetParent());

// We have no bounded range to replace, just proceed as normal
JexlNodes.ensureCapacity(andNode, node.jjtGetNumChildren());
for (int i = 0; i < node.jjtGetNumChildren(); i++) {
JexlNode newChild = (JexlNode) node.jjtGetChild(i).jjtAccept(this, data);
andNode.jjtAddChild(newChild, i);
newChild.jjtSetParent(andNode);
}
return andNode;
}
} else {
return node;
}
}

/**
* Delay the ranges that overlap holes. The range map is expected to only be indexed ranges.
*
* @param range
* the range
* @param currentNode
* the current node
* @return a jexl node
*/
protected JexlNode delayBoundedIndexHole(LiteralRange range, ASTAndNode currentNode) {

if (missingIndexRange(range)) {
if (log.isDebugEnabled()) {
log.debug("Pushing down unindexed " + range);
}
return QueryPropertyMarker.create(currentNode, EVALUATION_ONLY);
} else {
return currentNode;
}

}

@Override
public Object visit(ASTEQNode node, Object data) {
if (missingIndexRange(node)) {
if (log.isDebugEnabled()) {
log.debug("Pushing down unindexed " + JexlStringBuildingVisitor.buildQuery(node));
}
return QueryPropertyMarker.create(node, EVALUATION_ONLY);
}
return node;
}

@Override
public Object visit(ASTERNode node, Object data) {
if (missingIndexRange(node)) {
if (log.isDebugEnabled()) {
log.debug("Pushing down unindexed " + JexlStringBuildingVisitor.buildQuery(node));
}
return QueryPropertyMarker.create(node, EVALUATION_ONLY);
}
return node;
}

private boolean missingIndexRange(ASTEQNode node) {
return unindexedFields.contains(JexlASTHelper.getIdentifier(node));
}

private boolean missingIndexRange(ASTERNode node) {
return unindexedFields.contains(JexlASTHelper.getIdentifier(node));
}

private boolean missingIndexRange(LiteralRange range) {
return (unindexedFields.contains(range.getFieldName()));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
import datawave.query.CloseableIterable;

/**
* Implementation of {@link CloseableIterable} intended to be used by {@link FederatedQueryPlanner}. This iterable
* Implementation of {@link CloseableIterable} intended to be used by {@link DatePartitionedQueryPlanner}. This iterable
*/
public class FederatedQueryIterable implements CloseableIterable<QueryData> {
public class DatePartitionedQueryIterable implements CloseableIterable<QueryData> {

private final List<CloseableIterable<QueryData>> iterables = new ArrayList<>();

/**
* Add an iterable to this {@link FederatedQueryIterable}.
* Add an iterable to this {@link DatePartitionedQueryIterable}.
*
* @param iterable
* the iterable to add
Expand All @@ -28,7 +28,7 @@ public void addIterable(CloseableIterable<QueryData> iterable) {
}

/**
* Closes and clears each iterable in this {@link FederatedQueryIterable}.
* Closes and clears each iterable in this {@link DatePartitionedQueryIterable}.
*
* @throws IOException
* if an error occurred when closing an iterable
Expand All @@ -42,7 +42,7 @@ public void close() throws IOException {
}

/**
* Returns an iterator that will iterate over the {@link QueryData} returned by each iterable in this {@link FederatedQueryIterable}.
* Returns an iterator that will iterate over the {@link QueryData} returned by each iterable in this {@link DatePartitionedQueryIterable}.
*
* @return the iterator
*/
Expand Down
Loading
Loading