Skip to content

Commit 5cea1e2

Browse files
committed
fix(filter): add support for Arrays to JSONFilter (#47)
1 parent 3f05bb9 commit 5cea1e2

File tree

7 files changed

+218
-55
lines changed

7 files changed

+218
-55
lines changed

connect-file-pulse-api/src/main/java/io/streamthoughts/kafka/connect/filepulse/data/TypedField.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,10 @@ public String name() {
6565
public boolean equals(Object o) {
6666
if (this == o) return true;
6767
if (!(o instanceof TypedField)) return false;
68-
TypedField typeField = (TypedField) o;
69-
return index == typeField.index &&
70-
schema == typeField.schema &&
71-
Objects.equals(name, typeField.name);
68+
TypedField that = (TypedField) o;
69+
return index == that.index &&
70+
Objects.equals(schema, that.schema) &&
71+
Objects.equals(name, that.name);
7272
}
7373

7474
/**

connect-file-pulse-filters/src/main/java/io/streamthoughts/kafka/connect/filepulse/config/JSONFilterConfig.java

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,20 @@
2929

3030
public class JSONFilterConfig extends CommonFilterConfig {
3131

32-
public static final String JSON_TARGET_CONFIG = "target";
33-
public static final String JSON_TARGET_DOC = "The target field to put the parsed JSON value (optional)";
32+
public static final String JSON_TARGET_CONFIG = "target";
33+
public static final String JSON_TARGET_DOC = "The target field to put the parsed JSON value (optional)";
34+
35+
public static final String JSON_MERGE_CONFIG = "merge";
36+
public static final String JSON_MERGE_DOC = "A boolean that specifies whether to merge the JSON " +
37+
"object into the top level of the input record (default: false).";
38+
39+
public static final String JSON_EXPLODE_ARRAY_CONFIG = "explode.array";
40+
public static final String JSON_EXPLODE_ARRAY_DOC = "A boolean that specifies whether to explode arrays " +
41+
" into separate records (default: false)";
3442

3543
public static final String JSON_SOURCE_CHARSET_CONFIG = "source.charset";
36-
public static final String JSON_SOURCE_CHARSET_DOC = "The charset to be used for reading the source" +
37-
" field (default: UTF-8)";
44+
public static final String JSON_SOURCE_CHARSET_DOC = "The charset to be used for reading the source " +
45+
" field (default: UTF-8)";
3846

3947
/**
4048
* Creates a new {@link JSONFilterConfig} instance.
@@ -52,6 +60,14 @@ public String target() {
5260
return getString(JSON_TARGET_CONFIG);
5361
}
5462

63+
public boolean explode() {
64+
return getBoolean(JSON_EXPLODE_ARRAY_CONFIG);
65+
}
66+
67+
public boolean merge() {
68+
return getBoolean(JSON_MERGE_CONFIG);
69+
}
70+
5571
public Charset charset() {
5672
String name = getString(JSON_SOURCE_CHARSET_CONFIG);
5773
return name == null ? StandardCharsets.UTF_8 : Charset.forName(name);
@@ -66,7 +82,11 @@ public static ConfigDef configDef() {
6682
.define(JSON_TARGET_CONFIG, ConfigDef.Type.STRING, null,
6783
ConfigDef.Importance.HIGH, JSON_TARGET_DOC)
6884
.define(JSON_SOURCE_CHARSET_CONFIG, ConfigDef.Type.STRING, null,
69-
ConfigDef.Importance.MEDIUM, JSON_SOURCE_CHARSET_DOC);
85+
ConfigDef.Importance.MEDIUM, JSON_SOURCE_CHARSET_DOC)
86+
.define(JSON_EXPLODE_ARRAY_CONFIG, ConfigDef.Type.BOOLEAN, false,
87+
ConfigDef.Importance.MEDIUM, JSON_EXPLODE_ARRAY_DOC)
88+
.define(JSON_MERGE_CONFIG, ConfigDef.Type.BOOLEAN, false,
89+
ConfigDef.Importance.MEDIUM, JSON_MERGE_DOC);
7090
CommonFilterConfig.withOverwrite(def);
7191
CommonFilterConfig.withSource(def);
7292

connect-file-pulse-filters/src/main/java/io/streamthoughts/kafka/connect/filepulse/filter/JSONFilter.java

Lines changed: 68 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,38 +19,34 @@
1919
package io.streamthoughts.kafka.connect.filepulse.filter;
2020

2121
import io.streamthoughts.kafka.connect.filepulse.config.JSONFilterConfig;
22+
import io.streamthoughts.kafka.connect.filepulse.data.ArraySchema;
23+
import io.streamthoughts.kafka.connect.filepulse.data.Type;
2224
import io.streamthoughts.kafka.connect.filepulse.data.TypedStruct;
2325
import io.streamthoughts.kafka.connect.filepulse.data.TypedValue;
2426
import io.streamthoughts.kafka.connect.filepulse.json.DefaultJSONStructConverter;
2527
import io.streamthoughts.kafka.connect.filepulse.reader.RecordsIterable;
2628
import org.apache.kafka.common.config.ConfigDef;
2729

28-
import java.nio.charset.Charset;
30+
import java.util.Collection;
31+
import java.util.Collections;
32+
import java.util.List;
2933
import java.util.Map;
3034
import java.util.Set;
35+
import java.util.stream.Collectors;
3136

3237
public class JSONFilter extends AbstractMergeRecordFilter<JSONFilter> {
3338

3439
private final DefaultJSONStructConverter converter = new DefaultJSONStructConverter();
3540

3641
private JSONFilterConfig configs;
3742

38-
private String source;
39-
40-
private String target;
41-
42-
private Charset charset;
43-
4443
/**
4544
* {@inheritDoc}
4645
*/
4746
@Override
4847
public void configure(final Map<String, ?> props) {
4948
super.configure(props);
5049
configs = new JSONFilterConfig(props);
51-
source = configs.source();
52-
target = configs.target();
53-
charset = configs.charset();
5450
}
5551

5652
/**
@@ -66,44 +62,95 @@ public ConfigDef configDef() {
6662
*/
6763
@Override
6864
protected RecordsIterable<TypedStruct> apply(final FilterContext context, final TypedStruct record) {
69-
final String value = extractJsonField(checkIsNotNull(record.get(source)));
65+
final String value = extractJsonField(checkIsNotNull(record.get(configs.source())));
66+
final TypedValue typedValue;
67+
7068
try {
71-
final TypedStruct json = converter.readJson(value);
72-
if (target != null) {
73-
record.put(target, json);
74-
return RecordsIterable.of(record);
75-
}
76-
return RecordsIterable.of(json);
69+
typedValue = converter.readJson(value);
7770
} catch (Exception e) {
7871
throw new FilterException(e.getLocalizedMessage(), e.getCause());
7972
}
73+
74+
final Type type = typedValue.type();
75+
76+
if (type != Type.ARRAY && type != Type.STRUCT) {
77+
throw new FilterException(
78+
"Cannot process JSON value with unsupported type. Expected Array or Object, was " + type);
79+
}
80+
81+
if (type == Type.STRUCT && configs.merge()) {
82+
return RecordsIterable.of(typedValue.getStruct());
83+
}
84+
85+
if (type == Type.ARRAY) {
86+
if (configs.explode()) {
87+
88+
Collection<?> items = typedValue.getArray();
89+
ArraySchema arraySchema = (ArraySchema)typedValue.schema();
90+
Type arrayValueType = arraySchema.valueSchema().type();
91+
92+
if (configs.merge()) {
93+
if (arrayValueType == Type.STRUCT) {
94+
final List<TypedStruct> records = items
95+
.stream()
96+
.map(it -> TypedValue.any(it).getStruct())
97+
.collect(Collectors.toList());
98+
return new RecordsIterable<>(records);
99+
}
100+
101+
throw new FilterException(
102+
"Unsupported operation. Cannot merge array value of type '"
103+
+ arrayValueType + "' into the top level of the input record");
104+
}
105+
106+
final List<TypedStruct> records = items
107+
.stream()
108+
.map(it -> TypedStruct.create().put(targetField(), TypedValue.of(it, arrayValueType)))
109+
.collect(Collectors.toList());
110+
return new RecordsIterable<>(records);
111+
}
112+
113+
if (configs.merge()) {
114+
throw new FilterException(
115+
"Unsupported operation. Cannot merge JSON Array into the top level of the input record");
116+
}
117+
}
118+
119+
return RecordsIterable.of(TypedStruct.create().put(targetField(), typedValue));
80120
}
81121

82122
private String extractJsonField(final TypedValue value) {
83123
switch (value.type()) {
84124
case STRING:
85125
return value.getString();
86126
case BYTES:
87-
return new String(value.getBytes(), charset);
127+
return new String(value.getBytes(), configs.charset());
88128
default:
89129
throw new FilterException(
90-
"Invalid field '" + source + "', cannot parse JSON field of type '" + value.type() + "'");
130+
"Invalid field '" + configs.source() + "', cannot parse JSON field of type '" + value.type() + "'"
131+
);
91132
}
92133
}
93134

94135
private TypedValue checkIsNotNull(final TypedValue value) {
95136
if (value.isNull()) {
96137
throw new FilterException(
97-
"Invalid field '" + source + "', cannot convert empty value to JSON");
138+
"Invalid field '" + configs.source() + "', cannot convert empty value to JSON");
98139
}
99140
return value;
100141
}
101142

143+
private String targetField() {
144+
return configs.target() != null ? configs.target() : configs.source();
145+
}
146+
102147
/**
103148
* {@inheritDoc}
104149
*/
105150
@Override
106151
protected Set<String> overwrite() {
107-
return configs.overwrite();
152+
return configs.target() == null && !configs.merge() ?
153+
Collections.singleton(configs.source())
154+
: configs.overwrite() ;
108155
}
109156
}

connect-file-pulse-filters/src/main/java/io/streamthoughts/kafka/connect/filepulse/json/DefaultJSONStructConverter.java

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,14 @@
3535
public class DefaultJSONStructConverter implements JSONStructConverter {
3636

3737
private static final Map<ValueType, JsonFieldAccessor<?>> ACCESSORS = new HashMap<>();
38-
private static final ObjectJsonFieldAccessor DEFAULT_ACCESSOR = new ObjectJsonFieldAccessor();
3938

4039
/**
4140
* Creates a new {@link DefaultJSONStructConverter} instance.
4241
*/
4342
public DefaultJSONStructConverter() {
4443
ACCESSORS.put(ValueType.ARRAY, new ArrayJsonFieldAccessor());
4544
ACCESSORS.put(ValueType.STRING, new StringJsonFieldAccessor());
46-
ACCESSORS.put(ValueType.OBJECT, DEFAULT_ACCESSOR);
45+
ACCESSORS.put(ValueType.OBJECT, new ObjectJsonFieldAccessor());
4746
ACCESSORS.put(ValueType.BOOLEAN, new BooleanJsonFieldAccessor());
4847
ACCESSORS.put(ValueType.NUMBER, new NumberJsonFieldAccessor());
4948
}
@@ -65,15 +64,13 @@ private static JsonFieldAccessor<?> getAccessorForType(final ValueType type) {
6564
* {@inheritDoc}
6665
*/
6766
@Override
68-
public TypedStruct readJson(final String data) {
67+
public TypedValue readJson(final String data) {
6968

70-
if (data == null) {
71-
return null;
72-
}
69+
if (data == null) return null;
7370

7471
try {
7572
JsonIterator it = JsonIterator.parse(data);
76-
return DEFAULT_ACCESSOR.read(it).getStruct();
73+
return getAccessorForType(it.whatIsNext()).read(it);
7774

7875
} catch (Exception e) {
7976
throw new ReaderException("Error while reading json value, invalid JSON message.", e);

connect-file-pulse-filters/src/main/java/io/streamthoughts/kafka/connect/filepulse/json/JSONStructConverter.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package io.streamthoughts.kafka.connect.filepulse.json;
2020

2121
import io.streamthoughts.kafka.connect.filepulse.data.TypedStruct;
22+
import io.streamthoughts.kafka.connect.filepulse.data.TypedValue;
2223

2324
/**
2425
* Default interface to manage conversion from input JSON message to {@link TypedStruct} object.
@@ -31,5 +32,5 @@ public interface JSONStructConverter {
3132
* @param data the json message to convert to {@link TypedStruct}.
3233
* @return the new {@link TypedStruct} instance.
3334
*/
34-
TypedStruct readJson(final String data) throws Exception;
35+
TypedValue readJson(final String data) throws Exception;
3536
}

0 commit comments

Comments
 (0)