Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add date capabilities to #GROUPBY #2731

Open
wants to merge 1 commit into
base: integration
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,100 +12,101 @@
import datawave.data.normalizer.DateNormalizer;

/**
* Represents different levels of granularity supported by the {@code #unique()} function. This class is also responsible for providing the functionality to
* transform values such that they conform to the specified granularity.
* Represents different levels of granularity supported by the {@code #unique()} and {@code #groupby} function. This class is also responsible for providing the
* functionality to transform values such that they conform to the specified granularity.
*/
public enum UniqueGranularity {
public enum TemporalGranularity {

/**
* A {@link UniqueGranularity} implementation that will always return the original value.
* A {@link TemporalGranularity} implementation that will always return the original value.
*/
ALL("ALL", Function.identity()),

/**
* A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the day. Otherwise, the original
* A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the day. Otherwise, the original
* value will be returned.
*/
TRUNCATE_TEMPORAL_TO_DAY("DAY", new DateTimeValueFormatter("yyyy-MM-dd")),

/**
* A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the hour. Otherwise, the original
* A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the hour. Otherwise, the original
* value will be returned.
*/
TRUNCATE_TEMPORAL_TO_HOUR("HOUR", new DateTimeValueFormatter("yyyy-MM-dd'T'HH")),

/**
* A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the month. Otherwise, the original
* A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the month. Otherwise, the original
* value will be returned.
*/
TRUNCATE_TEMPORAL_TO_MONTH("MONTH", new DateTimeValueFormatter("yyyy-MM")),

/**
* A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the year. Otherwise, the original
* A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the year. Otherwise, the original
* value will be returned.
*/
TRUNCATE_TEMPORAL_TO_YEAR("YEAR", new DateTimeValueFormatter("yyyy")),

/**
* A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the second. Otherwise, the original
* value will be returned.
* A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the second. Otherwise, the
* original value will be returned.
*/
TRUNCATE_TEMPORAL_TO_SECOND("SECOND", new DateTimeValueFormatter("yyyy-MM-dd'T'HH:mm:ss")),

/**
* A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the millisecond. Otherwise, the
* A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the millisecond. Otherwise, the
* original value will be returned.
*/
TRUNCATE_TEMPORAL_TO_MILLISECOND("MILLISECOND", new DateTimeValueFormatter("yyyy-MM-dd'T'HH:mm:ss.SSS")),

/**
* A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the tenth of an hour. Otherwise, the
* original value will be returned.
* A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the tenth of an hour. Otherwise,
* the original value will be returned.
*/
TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR("TENTH_OF_HOUR", new DateTimeValueFormatter("yyyy-MM-dd'T'HH:m", true)),

/**
* A {@link UniqueGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the minute. Otherwise, the original
* value will be returned.
* A {@link TemporalGranularity} implementation that, if provided a datetime value, will return the datetime truncated to the minute. Otherwise, the
* original value will be returned.
*/
TRUNCATE_TEMPORAL_TO_MINUTE("MINUTE", new DateTimeValueFormatter("yyyy-MM-dd'T'HH:mm"));

private final String name;
private final Function<String,String> function;

@JsonCreator
public static UniqueGranularity of(String name) {
public static TemporalGranularity of(String name) {
name = name.toUpperCase();
switch (name) {
case "ALL":
return UniqueGranularity.ALL;
return TemporalGranularity.ALL;
case "YEAR":
return UniqueGranularity.TRUNCATE_TEMPORAL_TO_YEAR;
return TemporalGranularity.TRUNCATE_TEMPORAL_TO_YEAR;
case "MONTH":
return UniqueGranularity.TRUNCATE_TEMPORAL_TO_MONTH;
return TemporalGranularity.TRUNCATE_TEMPORAL_TO_MONTH;
case "DAY":
return UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY;
return TemporalGranularity.TRUNCATE_TEMPORAL_TO_DAY;
case "HOUR":
return UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR;
return TemporalGranularity.TRUNCATE_TEMPORAL_TO_HOUR;
case "TENTH_OF_HOUR":
return UniqueGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR;
return TemporalGranularity.TRUNCATE_TEMPORAL_TO_TENTH_OF_HOUR;
case "MINUTE":
return UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE;
return TemporalGranularity.TRUNCATE_TEMPORAL_TO_MINUTE;
case "SECOND":
return UniqueGranularity.TRUNCATE_TEMPORAL_TO_SECOND;
return TemporalGranularity.TRUNCATE_TEMPORAL_TO_SECOND;
case "MILLISECOND":
return UniqueGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND;
return TemporalGranularity.TRUNCATE_TEMPORAL_TO_MILLISECOND;
default:
throw new IllegalArgumentException("No " + UniqueGranularity.class.getSimpleName() + " exists with the name " + name);
throw new IllegalArgumentException("No " + TemporalGranularity.class.getSimpleName() + " exists with the name " + name);
}
}

UniqueGranularity(String name, Function<String,String> function) {
TemporalGranularity(String name, Function<String,String> function) {
this.name = name;
this.function = function;
}

/**
* Return the unique name of this {@link UniqueGranularity}.
* Return the unique name of this {@link TemporalGranularity}.
*
* @return the name
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@
import java.util.NavigableSet;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;

import org.apache.commons.lang.StringUtils;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;
Expand All @@ -30,19 +28,19 @@
*/
public class UniqueFields implements Serializable, Cloneable {

private final TreeMultimap<String,UniqueGranularity> fieldMap = TreeMultimap.create();
private static final String MOST_RECENT_UNIQUE = "_MOST_RECENT_";
private final TreeMultimap<String,TemporalGranularity> fieldMap = TreeMultimap.create();
private boolean mostRecent = false;
private static String MOST_RECENT_UNIQUE = "_MOST_RECENT_";

/**
* Returns a new {@link UniqueFields} parsed from this string. The provided string is expected to have the format returned by
* {@link UniqueFields#toString()}. Any fields not specified with a {@link UniqueGranularity} name will be added with the default ALL granularity. All
* {@link UniqueFields#toString()}. Any fields not specified with a {@link TemporalGranularity} name will be added with the default ALL granularity. All
* whitespace will be stripped before parsing. See below for certain edge cases:
* <ul>
* <li>Given null, null will be returned.</li>
* <li>Given an empty or blank string, an empty {@link UniqueFields} will be returned.</li>
* <li>Given {@code field1[],field2[DAY]}, or {@code field1,field2[DAY]}, or {@code field1[ALL],field2[DAY]}, a {@link UniqueFields} will be returned where
* field1 is added with {@link UniqueGranularity#ALL}, and field2 is added with {@link UniqueGranularity#TRUNCATE_TEMPORAL_TO_DAY}.</li>
* field1 is added with {@link TemporalGranularity#ALL}, and field2 is added with {@link TemporalGranularity#TRUNCATE_TEMPORAL_TO_DAY}.</li>
* </ul>
*
* @param string
Expand Down Expand Up @@ -81,7 +79,7 @@ public static UniqueFields from(String string) {
uniqueFields.setMostRecent(true);
} else {
// Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],,
uniqueFields.put(field, UniqueGranularity.ALL);
uniqueFields.put(field, TemporalGranularity.ALL);
}
}
break; // There are no more fields to be parsed.
Expand All @@ -100,7 +98,7 @@ public static UniqueFields from(String string) {
uniqueFields.setMostRecent(true);
} else {
// Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY]
uniqueFields.put(field, UniqueGranularity.ALL);
uniqueFields.put(field, TemporalGranularity.ALL);
}
}
currentIndex = nextComma + 1; // Advance to the start of the next field.
Expand All @@ -119,11 +117,11 @@ public static UniqueFields from(String string) {
String granularityList = string.substring((nextStartBracket + 1), nextEndBracket);
// An empty granularity list, e.g. field[] is equivalent to field[ALL].
if (granularityList.isEmpty()) {
uniqueFields.put(field, UniqueGranularity.ALL);
uniqueFields.put(field, TemporalGranularity.ALL);
} else {
String[] granularities = StringUtils.split(granularityList, Constants.COMMA);
for (String granularity : granularities) {
uniqueFields.put(field, parseGranularity(granularity));
uniqueFields.put(field, TemporalGranularity.of(granularity));
}
}
}
Expand All @@ -135,15 +133,6 @@ public static UniqueFields from(String string) {
return uniqueFields;
}

// Return the parsed granularity instance, or throw an exception if one could not be parsed.
private static UniqueGranularity parseGranularity(String granularity) {
try {
return UniqueGranularity.of(granularity.toUpperCase());
} catch (Exception e) {
throw new IllegalArgumentException("Invalid unique granularity given: " + granularity);
}
}

/**
* Return a clone of this class
*
Expand All @@ -165,7 +154,7 @@ public UniqueFields() {}
* @param fieldMap
* the field map to use
*/
public UniqueFields(SortedSetMultimap<String,UniqueGranularity> fieldMap) {
public UniqueFields(SortedSetMultimap<String,TemporalGranularity> fieldMap) {
putAll(fieldMap);
}

Expand All @@ -182,20 +171,20 @@ public UniqueFields clear() {
*
* @param fields
*/
public UniqueFields set(Multimap<String,UniqueGranularity> fields) {
public UniqueFields set(Multimap<String,TemporalGranularity> fields) {
return clear().putAll(fields);
}

/**
* Put a field-{@link UniqueGranularity} key pair into this {@link UniqueFields}.
* Put a field-{@link TemporalGranularity} key pair into this {@link UniqueFields}.
*
* @param field
* the field
* @param uniqueGranularity
* @param temporalGranularity
* the granularity
*/
public UniqueFields put(String field, UniqueGranularity uniqueGranularity) {
fieldMap.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), uniqueGranularity);
public UniqueFields put(String field, TemporalGranularity temporalGranularity) {
fieldMap.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), temporalGranularity);
return this;
}

Expand All @@ -205,7 +194,7 @@ public UniqueFields put(String field, UniqueGranularity uniqueGranularity) {
* @param fieldMap
* the field map to add entries from
*/
public UniqueFields putAll(Multimap<String,UniqueGranularity> fieldMap) {
public UniqueFields putAll(Multimap<String,TemporalGranularity> fieldMap) {
if (fieldMap != null) {
for (String field : fieldMap.keySet()) {
this.fieldMap.putAll(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), fieldMap.get(field));
Expand All @@ -221,7 +210,7 @@ public UniqueFields putAll(Multimap<String,UniqueGranularity> fieldMap) {
* @param replacement
*/
public void replace(String field, String replacement) {
Collection<UniqueGranularity> value = fieldMap.removeAll(field);
Collection<TemporalGranularity> value = fieldMap.removeAll(field);
if (value != null && !value.isEmpty()) {
fieldMap.putAll(replacement, value);
}
Expand All @@ -241,7 +230,7 @@ public NavigableSet<String> getFields() {
*
* @return the field map
*/
public TreeMultimap<String,UniqueGranularity> getFieldMap() {
public TreeMultimap<String,TemporalGranularity> getFieldMap() {
return fieldMap;
}

Expand All @@ -252,9 +241,9 @@ public TreeMultimap<String,UniqueGranularity> getFieldMap() {
* the model to find mappings from
*/
public void remapFields(Multimap<String,String> model) {
Multimap<String,UniqueGranularity> newFieldMap = TreeMultimap.create(fieldMap);
Multimap<String,TemporalGranularity> newFieldMap = TreeMultimap.create(fieldMap);
for (String field : fieldMap.keySet()) {
Collection<UniqueGranularity> granularities = fieldMap.get(field);
Collection<TemporalGranularity> granularities = fieldMap.get(field);
if (model.containsKey(field)) {
model.get(field).forEach((newField) -> newFieldMap.putAll(newField, granularities));
}
Expand Down Expand Up @@ -282,28 +271,28 @@ public boolean isEmpty() {
* @return a set containing the result of each transformation
*/
public Set<String> transformValues(String field, Collection<String> values) {
Collection<UniqueGranularity> granularities = fieldMap.get(field);
Collection<TemporalGranularity> granularities = fieldMap.get(field);
// If there is no granularity, or only the ALL granularity was specified, return the original values.
if (granularities.isEmpty() || (granularities.size() == 1 && granularities.contains(UniqueGranularity.ALL))) {
if (granularities.isEmpty() || (granularities.size() == 1 && granularities.contains(TemporalGranularity.ALL))) {
return Sets.newHashSet(values);
} else {
Set<String> transformedValues = new HashSet<>();
for (UniqueGranularity granularity : granularities) {
for (TemporalGranularity granularity : granularities) {
values.stream().map(granularity::transform).forEach(transformedValues::add);
}
return transformedValues;
}
}

public String transformValue(String field, String value) {
Collection<UniqueGranularity> granularities = fieldMap.get(field);
Collection<TemporalGranularity> granularities = fieldMap.get(field);
// If there is no granularity, or only the ALL granularity was specified, return the original values.
if (granularities.isEmpty() || (granularities.size() == 1 && granularities.contains(UniqueGranularity.ALL))) {
if (granularities.isEmpty() || (granularities.size() == 1 && granularities.contains(TemporalGranularity.ALL))) {
return value;
} else {
StringBuilder combinedValue = new StringBuilder();
String separator = "";
for (UniqueGranularity granularity : granularities) {
for (TemporalGranularity granularity : granularities) {
combinedValue.append(separator).append(granularity.transform(value));
}
return combinedValue.toString();
Expand Down Expand Up @@ -331,7 +320,7 @@ public String toString() {
String field = fieldIterator.next();
sb.append(field).append(Constants.BRACKET_START);
// Write each granularity for the field.
Iterator<UniqueGranularity> valueIterator = fieldMap.get(field).iterator();
Iterator<TemporalGranularity> valueIterator = fieldMap.get(field).iterator();
while (valueIterator.hasNext()) {
sb.append(valueIterator.next().getName());
if (valueIterator.hasNext()) {
Expand Down
Loading
Loading