Skip to content

Commit a8c4f86

Browse files
authored
apacheGH-40339: [Java] StringView Initial Implementation (apache#40340)
### Rationale for this change StringView implementation in Java. This PR only includes the core implementation of StringView ### What changes are included in this PR? - [X] Adding ViewVarBinaryVector - [X] Adding ViewVarCharVector - [X] Adding corresponding test cases in the given scope - [X] Including required implementation extensions with not supported warnings - [X] Interface for Holders ### Non Goals of this PR - [ ] apache#40937 - [ ] apache#40936 - [ ] apache#40932 - [ ] apache#40943 - [ ] apache#40944 - [ ] apache#40942 - [ ] https://github.com/apache/arrow/issues/40945 - [ ] https://github.com/apache/arrow/issues/40941 - [ ] https://github.com/apache/arrow/issues/40946 ### Are these changes tested? Yes. Existing test cases on `VarCharVector` and `VarBinaryVector` are verified with view implementations and additional test cases have also been added to check view functionality. And explitly tests have been added to evaluate the view functionality with `ViewVarCharVector` ### Are there any user-facing changes? Yes, this introduces a new API and some public methods have been included in an interface so that it can be extended to write custom functionality like done for views. * GitHub Issue: apache#40339 Lead-authored-by: Vibhatha Abeykoon <[email protected]> Co-authored-by: vibhatha <[email protected]> Co-authored-by: Vibhatha Lakmal Abeykoon <[email protected]> Signed-off-by: David Li <[email protected]>
1 parent 9090e67 commit a8c4f86

40 files changed

+3898
-20
lines changed

java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java

+10
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ public ColumnBinder visit(ArrowType.Utf8 type) {
148148
new VarCharBinder<>(varChar, jdbcType);
149149
}
150150

151+
@Override
152+
public ColumnBinder visit(ArrowType.Utf8View type) {
153+
throw new UnsupportedOperationException("Column binder implemented for type " + type + " is not supported");
154+
}
155+
151156
@Override
152157
public ColumnBinder visit(ArrowType.LargeUtf8 type) {
153158
LargeVarCharVector varChar = (LargeVarCharVector) vector;
@@ -162,6 +167,11 @@ public ColumnBinder visit(ArrowType.Binary type) {
162167
new VarBinaryBinder<>(varBinary, jdbcType);
163168
}
164169

170+
@Override
171+
public ColumnBinder visit(ArrowType.BinaryView type) {
172+
throw new UnsupportedOperationException("Column binder implemented for type " + type + " is not supported");
173+
}
174+
165175
@Override
166176
public ColumnBinder visit(ArrowType.LargeBinary type) {
167177
LargeVarBinaryVector varBinary = (LargeVarBinaryVector) vector;

java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java

+10
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,11 @@ public List<ArrowBuf> visit(ArrowType.Utf8 type) {
209209
}
210210
}
211211

212+
@Override
213+
public List<ArrowBuf> visit(ArrowType.Utf8View type) {
214+
throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported");
215+
}
216+
212217
@Override
213218
public List<ArrowBuf> visit(ArrowType.LargeUtf8 type) {
214219
try (ArrowBuf offsets = importOffsets(type, LargeVarCharVector.OFFSET_WIDTH)) {
@@ -237,6 +242,11 @@ public List<ArrowBuf> visit(ArrowType.Binary type) {
237242
}
238243
}
239244

245+
@Override
246+
public List<ArrowBuf> visit(ArrowType.BinaryView type) {
247+
throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported");
248+
}
249+
240250
@Override
241251
public List<ArrowBuf> visit(ArrowType.LargeBinary type) {
242252
try (ArrowBuf offsets = importOffsets(type, LargeVarBinaryVector.OFFSET_WIDTH)) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.arrow.driver.jdbc.converter.impl;
19+
20+
import org.apache.arrow.vector.FieldVector;
21+
import org.apache.arrow.vector.types.pojo.ArrowType;
22+
import org.apache.arrow.vector.types.pojo.Field;
23+
import org.apache.calcite.avatica.AvaticaParameter;
24+
import org.apache.calcite.avatica.remote.TypedValue;
25+
26+
/** AvaticaParameterConverter for BinaryView Arrow types. */
27+
public class BinaryViewAvaticaParameterConverter extends BaseAvaticaParameterConverter {
28+
29+
public BinaryViewAvaticaParameterConverter(ArrowType.BinaryView type) {
30+
31+
}
32+
33+
@Override
34+
public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) {
35+
throw new UnsupportedOperationException("Not implemented");
36+
}
37+
38+
@Override
39+
public AvaticaParameter createParameter(Field field) {
40+
return createParameter(field, false);
41+
}
42+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.arrow.driver.jdbc.converter.impl;
19+
20+
import org.apache.arrow.vector.FieldVector;
21+
import org.apache.arrow.vector.types.pojo.ArrowType;
22+
import org.apache.arrow.vector.types.pojo.Field;
23+
import org.apache.calcite.avatica.AvaticaParameter;
24+
import org.apache.calcite.avatica.remote.TypedValue;
25+
26+
/**
27+
* AvaticaParameterConverter for Utf8View Arrow types.
28+
*/
29+
public class Utf8ViewAvaticaParameterConverter extends BaseAvaticaParameterConverter {
30+
31+
public Utf8ViewAvaticaParameterConverter(ArrowType.Utf8View type) {
32+
}
33+
34+
@Override
35+
public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) {
36+
throw new UnsupportedOperationException("Utf8View not supported");
37+
}
38+
39+
@Override
40+
public AvaticaParameter createParameter(Field field) {
41+
return createParameter(field, false);
42+
}
43+
}

java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java

+10
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,11 @@ public Boolean visit(ArrowType.Utf8 type) {
190190
return new Utf8AvaticaParameterConverter(type).bindParameter(vector, typedValue, index);
191191
}
192192

193+
@Override
194+
public Boolean visit(ArrowType.Utf8View type) {
195+
throw new UnsupportedOperationException("Utf8View is unsupported");
196+
}
197+
193198
@Override
194199
public Boolean visit(ArrowType.LargeUtf8 type) {
195200
return new LargeUtf8AvaticaParameterConverter(type).bindParameter(vector, typedValue, index);
@@ -200,6 +205,11 @@ public Boolean visit(ArrowType.Binary type) {
200205
return new BinaryAvaticaParameterConverter(type).bindParameter(vector, typedValue, index);
201206
}
202207

208+
@Override
209+
public Boolean visit(ArrowType.BinaryView type) {
210+
throw new UnsupportedOperationException("BinaryView is unsupported");
211+
}
212+
203213
@Override
204214
public Boolean visit(ArrowType.LargeBinary type) {
205215
return new LargeBinaryAvaticaParameterConverter(type).bindParameter(vector, typedValue, index);

java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java

+12
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.util.stream.Stream;
2424

2525
import org.apache.arrow.driver.jdbc.converter.impl.BinaryAvaticaParameterConverter;
26+
import org.apache.arrow.driver.jdbc.converter.impl.BinaryViewAvaticaParameterConverter;
2627
import org.apache.arrow.driver.jdbc.converter.impl.BoolAvaticaParameterConverter;
2728
import org.apache.arrow.driver.jdbc.converter.impl.DateAvaticaParameterConverter;
2829
import org.apache.arrow.driver.jdbc.converter.impl.DecimalAvaticaParameterConverter;
@@ -43,6 +44,7 @@
4344
import org.apache.arrow.driver.jdbc.converter.impl.TimestampAvaticaParameterConverter;
4445
import org.apache.arrow.driver.jdbc.converter.impl.UnionAvaticaParameterConverter;
4546
import org.apache.arrow.driver.jdbc.converter.impl.Utf8AvaticaParameterConverter;
47+
import org.apache.arrow.driver.jdbc.converter.impl.Utf8ViewAvaticaParameterConverter;
4648
import org.apache.arrow.flight.sql.FlightSqlColumnMetadata;
4749
import org.apache.arrow.vector.types.pojo.ArrowType;
4850
import org.apache.arrow.vector.types.pojo.Field;
@@ -208,6 +210,11 @@ public AvaticaParameter visit(ArrowType.Utf8 type) {
208210
return new Utf8AvaticaParameterConverter(type).createParameter(field);
209211
}
210212

213+
@Override
214+
public AvaticaParameter visit(ArrowType.Utf8View type) {
215+
return new Utf8ViewAvaticaParameterConverter(type).createParameter(field);
216+
}
217+
211218
@Override
212219
public AvaticaParameter visit(ArrowType.LargeUtf8 type) {
213220
return new LargeUtf8AvaticaParameterConverter(type).createParameter(field);
@@ -218,6 +225,11 @@ public AvaticaParameter visit(ArrowType.Binary type) {
218225
return new BinaryAvaticaParameterConverter(type).createParameter(field);
219226
}
220227

228+
@Override
229+
public AvaticaParameter visit(ArrowType.BinaryView type) {
230+
return new BinaryViewAvaticaParameterConverter(type).createParameter(field);
231+
}
232+
221233
@Override
222234
public AvaticaParameter visit(ArrowType.LargeBinary type) {
223235
return new LargeBinaryAvaticaParameterConverter(type).createParameter(field);

java/memory/memory-core/src/main/java/module-info.java

+1
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@
2525
requires jsr305;
2626
requires org.immutables.value;
2727
requires org.slf4j;
28+
requires org.checkerframework.checker.qual;
2829
}

java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java

+2
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,6 @@ public interface ReusableBuffer<T> {
4444
* @param len the number of bytes of the new data
4545
*/
4646
void set(ArrowBuf srcBytes, long start, long len);
47+
48+
void set(byte[] srcBytes, long start, long len);
4749
}

java/vector/src/main/codegen/data/ArrowTypes.tdd

+10
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@
6565
fields: [],
6666
complex: false
6767
},
68+
{
69+
name: "Utf8View",
70+
fields: [],
71+
complex: false
72+
},
6873
{
6974
name: "LargeUtf8",
7075
fields: [],
@@ -75,6 +80,11 @@
7580
fields: [],
7681
complex: false
7782
},
83+
{
84+
name: "BinaryView",
85+
fields: [],
86+
complex: false
87+
},
7888
{
7989
name: "LargeBinary",
8090
fields: [],

java/vector/src/main/codegen/data/ValueVectorTypes.tdd

+3-1
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,9 @@
189189
fields: [{name: "start", type: "int"}, {name: "end", type: "int"}, {name: "buffer", type: "ArrowBuf"}],
190190
minor: [
191191
{ class: "VarBinary" , friendlyType: "byte[]" },
192-
{ class: "VarChar" , friendlyType: "Text" }
192+
{ class: "VarChar" , friendlyType: "Text" },
193+
{ class: "ViewVarBinary" , friendlyType: "byte[]" },
194+
{ class: "ViewVarChar" , friendlyType: "Text" }
193195
]
194196
},
195197
{

java/vector/src/main/codegen/templates/HolderReaderImpl.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,9 @@ public void read(Nullable${name}Holder h) {
109109
</#if>
110110
byte[] value = new byte [length];
111111
holder.buffer.getBytes(holder.start, value, 0, length);
112-
<#if minor.class == "VarBinary" || minor.class == "LargeVarBinary">
112+
<#if minor.class == "VarBinary" || minor.class == "LargeVarBinary" || minor.class == "ViewVarBinary">
113113
return value;
114-
<#elseif minor.class == "VarChar" || minor.class == "LargeVarChar">
114+
<#elseif minor.class == "VarChar" || minor.class == "LargeVarChar" || minor.class == "ViewVarChar">
115115
Text text = new Text();
116116
text.set(value);
117117
return text;

java/vector/src/main/codegen/templates/UnionReader.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
@SuppressWarnings("unused")
4040
public class UnionReader extends AbstractFieldReader {
4141

42-
private static final int NUM_SUPPORTED_TYPES = 46;
42+
private static final int NUM_SUPPORTED_TYPES = 48;
4343

4444
private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES];
4545
public UnionVector data;

java/vector/src/main/codegen/templates/ValueHolders.java

+3-7
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
package org.apache.arrow.vector.holders;
2828

2929
<#include "/@includes/vv_imports.ftl" />
30-
3130
/**
3231
* Source code generated using FreeMarker template ${.template_name}
3332
*/
@@ -40,11 +39,12 @@ public final class ${className} implements ValueHolder{
4039
4140
/** The last index (exclusive) into the Vector. **/
4241
public int end;
43-
42+
4443
/** The Vector holding the actual values. **/
4544
public ${minor.class}Vector vector;
46-
45+
4746
<#else>
47+
4848
public static final int WIDTH = ${type.width};
4949
5050
<#if mode.name == "Optional">public int isSet;
@@ -70,10 +70,6 @@ public String toString(){
7070
throw new UnsupportedOperationException();
7171
}
7272
</#if>
73-
74-
75-
76-
7773
}
7874

7975
</#list>

java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import org.apache.arrow.util.Preconditions;
2121

2222
/**
23-
* Tuple class containing a vector and whether is was created.
23+
* Tuple class containing a vector and whether it was created.
2424
*
2525
* @param <V> The type of vector the result is for.
2626
*/

java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@
4242
/**
4343
* BaseLargeVariableWidthVector is a base class providing functionality for large strings/large bytes types.
4444
*/
45-
public abstract class BaseLargeVariableWidthVector extends BaseValueVector
46-
implements VariableWidthVector, FieldVector, VectorDefinitionSetter {
45+
public abstract class BaseLargeVariableWidthVector extends BaseValueVector implements VariableWidthFieldVector {
4746
private static final int DEFAULT_RECORD_BYTE_COUNT = 12;
4847
private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT;
4948
private int lastValueCapacity;
@@ -942,6 +941,7 @@ public void setValueCount(int valueCount) {
942941
*
943942
* @param index target index
944943
*/
944+
@Override
945945
public void fillEmpties(int index) {
946946
handleSafe(index, emptyByteArray.length);
947947
fillHoles(index);
@@ -955,6 +955,7 @@ public void fillEmpties(int index) {
955955
*
956956
* @param value desired index of last non-null element.
957957
*/
958+
@Override
958959
public void setLastSet(int value) {
959960
lastSet = value;
960961
}
@@ -964,6 +965,7 @@ public void setLastSet(int value) {
964965
*
965966
* @return index of the last non-null element
966967
*/
968+
@Override
967969
public int getLastSet() {
968970
return lastSet;
969971
}
@@ -1003,6 +1005,7 @@ public void setValueLengthSafe(int index, int length) {
10031005
* @param index position of element to get
10041006
* @return greater than 0 length for non-null element, 0 otherwise
10051007
*/
1008+
@Override
10061009
public int getValueLength(int index) {
10071010
assert index >= 0;
10081011
if (isSet(index) == 0) {
@@ -1021,6 +1024,7 @@ public int getValueLength(int index) {
10211024
* @param index position of the element to set
10221025
* @param value array of bytes to write
10231026
*/
1027+
@Override
10241028
public void set(int index, byte[] value) {
10251029
assert index >= 0;
10261030
fillHoles(index);
@@ -1037,6 +1041,7 @@ public void set(int index, byte[] value) {
10371041
* @param index position of the element to set
10381042
* @param value array of bytes to write
10391043
*/
1044+
@Override
10401045
public void setSafe(int index, byte[] value) {
10411046
assert index >= 0;
10421047
handleSafe(index, value.length);
@@ -1055,6 +1060,7 @@ public void setSafe(int index, byte[] value) {
10551060
* @param start start index in array of bytes
10561061
* @param length length of data in array of bytes
10571062
*/
1063+
@Override
10581064
public void set(int index, byte[] value, int start, int length) {
10591065
assert index >= 0;
10601066
fillHoles(index);
@@ -1091,6 +1097,7 @@ public void setSafe(int index, byte[] value, int start, int length) {
10911097
* @param start start index in ByteBuffer
10921098
* @param length length of data in ByteBuffer
10931099
*/
1100+
@Override
10941101
public void set(int index, ByteBuffer value, int start, int length) {
10951102
assert index >= 0;
10961103
fillHoles(index);

0 commit comments

Comments
 (0)