From 01a4d5d9a092af19bb12386924b4de270d240f3a Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Wed, 9 Apr 2025 15:19:25 +0300 Subject: [PATCH 01/11] GH-87: [Vector] Add ExtensionWriter (#697) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on changes from https://github.com/apache/arrow/pull/41731. Added writer ExtensionWriter with 3 methods: - write method for writing values from Extension holders; - writeExtensionType method for writing values (arguments is Object because we don't know exact type); - addExtensionTypeFactory method - because the exact vector and value type are unknown, the user should create their own extension type vector, write for it, and ExtensionTypeFactory, which should map the vector and writer. Closes #87. Co-authored-by: Finn Völkel --- .../templates/AbstractFieldWriter.java | 22 ++++ .../AbstractPromotableFieldWriter.java | 10 ++ .../main/codegen/templates/BaseWriter.java | 31 +++++ .../codegen/templates/PromotableWriter.java | 14 +++ .../main/codegen/templates/StructWriters.java | 26 ++++ .../codegen/templates/UnionListWriter.java | 23 ++++ .../codegen/templates/UnionMapWriter.java | 12 ++ .../main/codegen/templates/UnionWriter.java | 20 +++ .../impl/AbstractExtensionTypeWriter.java | 66 ++++++++++ .../impl/ExtensionTypeWriterFactory.java | 38 ++++++ .../complex/impl/UnionExtensionWriter.java | 79 ++++++++++++ .../vector/complex/writer/FieldWriter.java | 4 +- .../arrow/vector/holders/ExtensionHolder.java | 22 ++++ .../apache/arrow/vector/TestStructVector.java | 37 ++++++ .../org/apache/arrow/vector/UuidVector.java | 114 ++++++++++++++++++ .../complex/impl/TestPromotableWriter.java | 29 +++++ .../complex/impl/UuidWriterFactory.java | 31 +++++ .../vector/complex/impl/UuidWriterImpl.java | 47 ++++++++ .../complex/writer/TestSimpleWriter.java | 20 +++ .../arrow/vector/holder/UuidHolder.java | 23 ++++ .../vector/types/pojo/TestExtensionType.java | 70 +---------- .../arrow/vector/types/pojo/UuidType.java | 60 +++++++++ 22 files changed, 728 insertions(+), 70 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/UuidVector.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java index 5ebfb6877fc..e6fa06958d9 100644 --- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -107,6 +107,16 @@ public void endEntry() { throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); } + public void write(ExtensionHolder var1) { + this.fail("ExtensionType"); + } + public void writeExtension(Object var1) { + this.fail("ExtensionType"); + } + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { + this.fail("ExtensionType"); + } + <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> <#assign fields = minor.fields!type.fields /> <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> @@ -241,6 +251,18 @@ public MapWriter map(String name, boolean keysSorted) { fail("Map"); return null; } + + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + fail("Extension"); + return null; + } + + @Override + public ExtensionWriter extension(ArrowType arrowType) { + fail("Extension"); + return null; + } <#list vv.types as type><#list type.minor as minor> <#assign lowerName = minor.class?uncap_first /> <#if lowerName == "int" ><#assign lowerName = "integer" /> diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java index 06cb235f7dd..951edd5eeef 100644 --- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java @@ -293,6 +293,11 @@ public MapWriter map(boolean keysSorted) { return getWriter(MinorType.MAP, new ArrowType.Map(keysSorted)); } + @Override + public ExtensionWriter extension(ArrowType arrowType) { + return getWriter(MinorType.EXTENSIONTYPE).extension(arrowType); + } + @Override public StructWriter struct(String name) { return getWriter(MinorType.STRUCT).struct(name); @@ -318,6 +323,11 @@ public MapWriter map(String name, boolean keysSorted) { return getWriter(MinorType.STRUCT).map(name, keysSorted); } + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + return getWriter(MinorType.EXTENSIONTYPE).extension(name, arrowType); + } + <#list vv.types as type><#list type.minor as minor> <#assign lowerName = minor.class?uncap_first /> <#if lowerName == "int" ><#assign lowerName = "integer" /> diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java b/java/vector/src/main/codegen/templates/BaseWriter.java index e952d46f1f2..78da7fddc3d 100644 --- a/java/vector/src/main/codegen/templates/BaseWriter.java +++ b/java/vector/src/main/codegen/templates/BaseWriter.java @@ -61,6 +61,7 @@ public interface StructWriter extends BaseWriter { void copyReaderToField(String name, FieldReader reader); StructWriter struct(String name); + ExtensionWriter extension(String name, ArrowType arrowType); ListWriter list(String name); ListWriter listView(String name); MapWriter map(String name); @@ -79,6 +80,7 @@ public interface ListWriter extends BaseWriter { ListWriter listView(); MapWriter map(); MapWriter map(boolean keysSorted); + ExtensionWriter extension(ArrowType arrowType); void copyReader(FieldReader reader); <#list vv.types as type><#list type.minor as minor> @@ -101,6 +103,35 @@ public interface MapWriter extends ListWriter { MapWriter value(); } + public interface ExtensionWriter extends BaseWriter { + + /** + * Writes a null value. + */ + void writeNull(); + + /** + * Writes value from the given extension holder. + * + * @param holder the extension holder to write + */ + void write(ExtensionHolder holder); + + /** + * Writes the given extension type value. + * + * @param value the extension type value to write + */ + void writeExtension(Object value); + + /** + * Adds the given extension type factory. This factory allows configuring writer implementations for specific ExtensionTypeVector. + * + * @param factory the extension type factory to add + */ + void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory); + } + public interface ScalarWriter extends <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Writer, BaseWriter {} diff --git a/java/vector/src/main/codegen/templates/PromotableWriter.java b/java/vector/src/main/codegen/templates/PromotableWriter.java index c0e686f3178..8d7d57bb9d2 100644 --- a/java/vector/src/main/codegen/templates/PromotableWriter.java +++ b/java/vector/src/main/codegen/templates/PromotableWriter.java @@ -285,6 +285,9 @@ protected void setWriter(ValueVector v) { case UNION: writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory); break; + case EXTENSIONTYPE: + writer = new UnionExtensionWriter((ExtensionTypeVector) vector); + break; default: writer = type.getNewFieldWriter(vector); break; @@ -316,6 +319,7 @@ protected boolean requiresArrowType(MinorType type) { || type == MinorType.MAP || type == MinorType.DURATION || type == MinorType.FIXEDSIZEBINARY + || type == MinorType.EXTENSIONTYPE || (type.name().startsWith("TIMESTAMP") && type.name().endsWith("TZ")); } @@ -536,6 +540,16 @@ public void writeLargeVarChar(String value) { getWriter(MinorType.LARGEVARCHAR).writeLargeVarChar(value); } + @Override + public void writeExtension(Object value) { + getWriter(MinorType.EXTENSIONTYPE).writeExtension(value); + } + + @Override + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) { + getWriter(MinorType.EXTENSIONTYPE).addExtensionTypeWriterFactory(factory); + } + @Override public void allocate() { getWriter().allocate(); diff --git a/java/vector/src/main/codegen/templates/StructWriters.java b/java/vector/src/main/codegen/templates/StructWriters.java index 3e6258a0c6c..71bd7a5bc4c 100644 --- a/java/vector/src/main/codegen/templates/StructWriters.java +++ b/java/vector/src/main/codegen/templates/StructWriters.java @@ -83,6 +83,9 @@ public class ${mode}StructWriter extends AbstractFieldWriter { fields.put(handleCase(child.getName()), writer); break; } + case EXTENSIONTYPE: + extension(child.getName(), child.getType()); + break; case UNION: FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.UNION.getType(), null, null); UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), fieldType, UnionVector.class), getNullableStructWriterFactory()); @@ -159,6 +162,29 @@ public StructWriter struct(String name) { return writer; } + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + String finalName = handleCase(name); + FieldWriter writer = fields.get(finalName); + if(writer == null){ + int vectorCount=container.size(); + FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null); + ExtensionTypeVector vector = container.addOrGet(name, fieldType, ExtensionTypeVector.class); + writer = new PromotableWriter(vector, container, getNullableStructWriterFactory()); + if(vectorCount != container.size()) { + writer.allocate(); + } + writer.setPosition(idx()); + fields.put(finalName, writer); + } else { + if (writer instanceof PromotableWriter) { + // ensure writers are initialized + ((PromotableWriter)writer).getWriter(MinorType.EXTENSIONTYPE, arrowType); + } + } + return (ExtensionWriter) writer; + } + @Override public void close() throws Exception { clear(); diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 3962e1d0731..9424533f290 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -201,6 +201,17 @@ public MapWriter map(String name, boolean keysSorted) { return mapWriter; } + @Override + public ExtensionWriter extension(ArrowType arrowType) { + writer.extension(arrowType); + return writer; + } + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + ExtensionWriter extensionWriter = writer.extension(name, arrowType); + return extensionWriter; + } + <#if listName == "LargeList"> @Override public void startList() { @@ -323,6 +334,18 @@ public void writeNull() { } } + @Override + public void writeExtension(Object value) { + writer.writeExtension(value); + } + @Override + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { + writer.addExtensionTypeWriterFactory(var1); + } + public void write(ExtensionHolder var1) { + writer.write(var1); + } + <#list vv.types as type> <#list type.minor as minor> <#assign name = minor.class?cap_first /> diff --git a/java/vector/src/main/codegen/templates/UnionMapWriter.java b/java/vector/src/main/codegen/templates/UnionMapWriter.java index 90b55cb65e6..8b2f091215f 100644 --- a/java/vector/src/main/codegen/templates/UnionMapWriter.java +++ b/java/vector/src/main/codegen/templates/UnionMapWriter.java @@ -231,4 +231,16 @@ public MapWriter map() { return super.map(); } } + + @Override + public ExtensionWriter extension(ArrowType type) { + switch (mode) { + case KEY: + return entryWriter.extension(MapVector.KEY_NAME, type); + case VALUE: + return entryWriter.extension(MapVector.VALUE_NAME, type); + default: + return super.extension(type); + } + } } diff --git a/java/vector/src/main/codegen/templates/UnionWriter.java b/java/vector/src/main/codegen/templates/UnionWriter.java index bfe97e27705..272edab17c8 100644 --- a/java/vector/src/main/codegen/templates/UnionWriter.java +++ b/java/vector/src/main/codegen/templates/UnionWriter.java @@ -213,6 +213,10 @@ public MapWriter asMap(ArrowType arrowType) { return getMapWriter(arrowType); } + private ExtensionWriter getExtensionWriter(ArrowType arrowType) { + throw new UnsupportedOperationException("ExtensionTypes are not supported yet."); + } + BaseWriter getWriter(MinorType minorType) { return getWriter(minorType, null); } @@ -227,6 +231,8 @@ BaseWriter getWriter(MinorType minorType, ArrowType arrowType) { return getListViewWriter(); case MAP: return getMapWriter(arrowType); + case EXTENSIONTYPE: + return getExtensionWriter(arrowType); <#list vv.types as type> <#list type.minor as minor> <#assign name = minor.class?cap_first /> @@ -460,6 +466,20 @@ public MapWriter map(String name, boolean keysSorted) { return getStructWriter().map(name, keysSorted); } + @Override + public ExtensionWriter extension(ArrowType arrowType) { + data.setType(idx(), MinorType.EXTENSIONTYPE); + getListWriter().setPosition(idx()); + return getListWriter().extension(arrowType); + } + + @Override + public ExtensionWriter extension(String name, ArrowType arrowType) { + data.setType(idx(), MinorType.EXTENSIONTYPE); + getStructWriter().setPosition(idx()); + return getStructWriter().extension(name, arrowType); + } + <#list vv.types as type><#list type.minor as minor> <#assign lowerName = minor.class?uncap_first /> <#if lowerName == "int" ><#assign lowerName = "integer" /> diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java new file mode 100644 index 00000000000..fccff6c21fa --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractExtensionTypeWriter.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.types.pojo.Field; + +/** + * Base {@link AbstractFieldWriter} class for an {@link + * org.apache.arrow.vector.ExtensionTypeVector}. + * + * @param a specific {@link ExtensionTypeVector}. + */ +public class AbstractExtensionTypeWriter + extends AbstractFieldWriter { + protected final T vector; + + public AbstractExtensionTypeWriter(T vector) { + this.vector = vector; + } + + @Override + public Field getField() { + return this.vector.getField(); + } + + @Override + public int getValueCapacity() { + return this.vector.getValueCapacity(); + } + + @Override + public void allocate() { + this.vector.allocateNew(); + } + + @Override + public void close() { + this.vector.close(); + } + + @Override + public void clear() { + this.vector.clear(); + } + + @Override + public void writeNull() { + this.vector.setNull(getPosition()); + this.vector.setValueCount(getPosition() + 1); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java new file mode 100644 index 00000000000..09f0314c5fc --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ExtensionTypeWriterFactory.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.complex.writer.FieldWriter; + +/** + * A factory interface for creating instances of {@link ExtensionTypeWriter}. This factory allows + * configuring writer implementations for specific {@link ExtensionTypeVector}. + * + * @param the type of writer implementation for a specific {@link ExtensionTypeVector}. + */ +public interface ExtensionTypeWriterFactory { + + /** + * Returns an instance of the writer implementation for the given {@link ExtensionTypeVector}. + * + * @param vector the {@link ExtensionTypeVector} for which the writer implementation is to be + * returned. + * @return an instance of the writer implementation for the given {@link ExtensionTypeVector}. + */ + T getWriterImpl(ExtensionTypeVector vector); +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java new file mode 100644 index 00000000000..d341384bd95 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.types.pojo.Field; + +public class UnionExtensionWriter extends AbstractFieldWriter { + protected ExtensionTypeVector vector; + protected FieldWriter writer; + + public UnionExtensionWriter(ExtensionTypeVector vector) { + this.vector = vector; + } + + @Override + public void allocate() { + vector.allocateNew(); + } + + @Override + public void clear() { + vector.clear(); + } + + @Override + public int getValueCapacity() { + return vector.getValueCapacity(); + } + + @Override + public Field getField() { + return vector.getField(); + } + + @Override + public void close() throws Exception { + vector.close(); + } + + @Override + public void writeExtension(Object var1) { + this.writer.writeExtension(var1); + } + + @Override + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory factory) { + this.writer = factory.getWriterImpl(vector); + this.writer.setPosition(idx()); + } + + public void write(ExtensionHolder holder) { + this.writer.write(holder); + } + + @Override + public void setPosition(int index) { + super.setPosition(index); + if (this.writer != null) { + this.writer.setPosition(index); + } + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java index 949eb35d8eb..51bf1066859 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java @@ -16,6 +16,7 @@ */ package org.apache.arrow.vector.complex.writer; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ScalarWriter; @@ -25,7 +26,8 @@ * Composite of all writer types. Writers are convenience classes for incrementally adding values to * {@linkplain org.apache.arrow.vector.ValueVector}s. */ -public interface FieldWriter extends StructWriter, ListWriter, MapWriter, ScalarWriter { +public interface FieldWriter + extends StructWriter, ListWriter, MapWriter, ScalarWriter, ExtensionWriter { void allocate(); void clear(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java b/java/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java new file mode 100644 index 00000000000..fc7ed85878d --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/holders/ExtensionHolder.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.holders; + +/** Base {@link ValueHolder} class for a {@link org.apache.arrow.vector.ExtensionTypeVector}. */ +public abstract class ExtensionHolder implements ValueHolder { + public int isSet; +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index 4ef0fbe2d99..d40af9ae890 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -26,6 +26,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.UUID; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.AbstractStructVector; import org.apache.arrow.vector.complex.ListVector; @@ -37,9 +38,11 @@ import org.apache.arrow.vector.holders.ComplexHolder; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -336,6 +339,40 @@ public void testGetTransferPairWithFieldAndCallBack() { } } + @Test + public void testStructVectorWithExtensionTypes() { + UuidType uuidType = new UuidType(); + Field uuidField = new Field("struct_child", FieldType.nullable(uuidType), null); + Field structField = + new Field("struct", FieldType.nullable(new ArrowType.Struct()), List.of(uuidField)); + StructVector s1 = new StructVector(structField, allocator, null); + StructVector s2 = (StructVector) structField.createVector(allocator); + s1.close(); + s2.close(); + } + + @Test + public void testStructVectorTransferPairWithExtensionType() { + UuidType uuidType = new UuidType(); + Field uuidField = new Field("uuid_child", FieldType.nullable(uuidType), null); + Field structField = + new Field("struct", FieldType.nullable(new ArrowType.Struct()), List.of(uuidField)); + + StructVector s1 = (StructVector) structField.createVector(allocator); + UuidVector uuidVector = + s1.addOrGet("uuid_child", FieldType.nullable(uuidType), UuidVector.class); + s1.setValueCount(1); + uuidVector.set(0, new UUID(1, 2)); + s1.setIndexDefined(0); + + TransferPair tp = s1.getTransferPair(structField, allocator); + final StructVector toVector = (StructVector) tp.getTo(); + assertEquals(s1.getField(), toVector.getField()); + + s1.close(); + toVector.close(); + } + private StructVector simpleStructVector(String name, BufferAllocator allocator) { final String INT_COL = "struct_int_child"; final String FLT_COL = "struct_flt_child"; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/UuidVector.java b/java/vector/src/test/java/org/apache/arrow/vector/UuidVector.java new file mode 100644 index 00000000000..5c90d45f60f --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/UuidVector.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; +import org.apache.arrow.vector.util.TransferPair; + +public class UuidVector extends ExtensionTypeVector + implements ValueIterableVector { + private final Field field; + + public UuidVector( + String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { + super(name, allocator, underlyingVector); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); + } + + public UuidVector(String name, BufferAllocator allocator) { + super(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); + this.field = new Field(name, FieldType.nullable(new UuidType()), null); + } + + @Override + public UUID getObject(int index) { + final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); + return new UUID(bb.getLong(), bb.getLong()); + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + return getUnderlyingVector().hashCode(index, hasher); + } + + public void set(int index, UUID uuid) { + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + getUnderlyingVector().set(index, bb.array()); + } + + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + getUnderlyingVector() + .copyFromSafe(fromIndex, thisIndex, ((UuidVector) from).getUnderlyingVector()); + } + + @Override + public Field getField() { + return field; + } + + @Override + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((UuidVector) to); + } + + public void setSafe(int index, byte[] value) { + getUnderlyingVector().setIndexDefined(index); + getUnderlyingVector().setSafe(index, value); + } + + public class TransferImpl implements TransferPair { + UuidVector to; + ValueVector targetUnderlyingVector; + TransferPair tp; + + public TransferImpl(UuidVector to) { + this.to = to; + targetUnderlyingVector = this.to.getUnderlyingVector(); + tp = getUnderlyingVector().makeTransferPair(targetUnderlyingVector); + } + + public UuidVector getTo() { + return this.to; + } + + public void transfer() { + tp.transfer(); + } + + public void splitAndTransfer(int startIndex, int length) { + tp.splitAndTransfer(startIndex, length); + } + + public void copyValueSafe(int fromIndex, int toIndex) { + tp.copyValueSafe(fromIndex, toIndex); + } + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index a791e551357..1556852c5a1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -26,12 +26,14 @@ import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.util.Objects; +import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.DirtyRootAllocator; import org.apache.arrow.vector.LargeVarBinaryVector; import org.apache.arrow.vector.LargeVarCharVector; +import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.ListVector; @@ -52,6 +54,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; @@ -776,4 +779,30 @@ public void testPromoteToUnionFromDecimal() throws Exception { assertEquals(1, intHolder.value); } } + + @Test + public void testExtensionType() throws Exception { + try (final NonNullableStructVector container = + NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final UuidVector v = + container.addOrGet("uuid", FieldType.nullable(new UuidType()), UuidVector.class); + final PromotableWriter writer = new PromotableWriter(v, container)) { + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + container.allocateNew(); + container.setValueCount(1); + writer.addExtensionTypeWriterFactory(new UuidWriterFactory()); + + writer.setPosition(0); + writer.writeExtension(u1); + writer.setPosition(1); + writer.writeExtension(u2); + + container.setValueCount(2); + + UuidVector uuidVector = (UuidVector) container.getChild("uuid"); + assertEquals(u1, uuidVector.getObject(0)); + assertEquals(u2, uuidVector.getObject(1)); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java new file mode 100644 index 00000000000..1b1bf4e6e48 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterFactory.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.ExtensionTypeVector; +import org.apache.arrow.vector.UuidVector; + +public class UuidWriterFactory implements ExtensionTypeWriterFactory { + + @Override + public AbstractFieldWriter getWriterImpl(ExtensionTypeVector extensionTypeVector) { + if (extensionTypeVector instanceof UuidVector) { + return new UuidWriterImpl((UuidVector) extensionTypeVector); + } + return null; + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java new file mode 100644 index 00000000000..68029b1df55 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import java.nio.ByteBuffer; +import java.util.UUID; +import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.holder.UuidHolder; +import org.apache.arrow.vector.holders.ExtensionHolder; + +public class UuidWriterImpl extends AbstractExtensionTypeWriter { + + public UuidWriterImpl(UuidVector vector) { + super(vector); + } + + @Override + public void writeExtension(Object value) { + UUID uuid = (UUID) value; + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + vector.setSafe(getPosition(), bb.array()); + vector.setValueCount(getPosition() + 1); + } + + @Override + public void write(ExtensionHolder holder) { + UuidHolder uuidHolder = (UuidHolder) holder; + vector.setSafe(getPosition(), uuidHolder.value); + vector.setValueCount(getPosition() + 1); + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java index 5bb59627041..bf1b9b0dfa1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java @@ -20,16 +20,20 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.nio.ByteBuffer; +import java.util.UUID; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.LargeVarBinaryVector; import org.apache.arrow.vector.LargeVarCharVector; +import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.impl.LargeVarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.LargeVarCharWriterImpl; +import org.apache.arrow.vector.complex.impl.UuidWriterImpl; import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -184,4 +188,20 @@ public void testWriteTextToLargeVarChar() throws Exception { assertEquals(input, result); } } + + @Test + public void testWriteToExtensionVector() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator); + UuidWriterImpl writer = new UuidWriterImpl(vector)) { + UUID uuid = UUID.randomUUID(); + ByteBuffer bb = ByteBuffer.allocate(16); + bb.putLong(uuid.getMostSignificantBits()); + bb.putLong(uuid.getLeastSignificantBits()); + UuidHolder holder = new UuidHolder(); + holder.value = bb.array(); + writer.write(holder); + UUID result = vector.getObject(0); + assertEquals(uuid, result); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java b/java/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java new file mode 100644 index 00000000000..207b0951a70 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/holder/UuidHolder.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.holder; + +import org.apache.arrow.vector.holders.ExtensionHolder; + +public class UuidHolder extends ExtensionHolder { + public byte[] value; +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index 8f54a6e5d74..d24708d66c4 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -41,6 +41,7 @@ import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.FixedSizeBinaryVector; import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.UuidVector; import org.apache.arrow.vector.ValueIterableVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.compare.Range; @@ -295,75 +296,6 @@ public void testVectorCompare() { } } - static class UuidType extends ExtensionType { - - @Override - public ArrowType storageType() { - return new ArrowType.FixedSizeBinary(16); - } - - @Override - public String extensionName() { - return "uuid"; - } - - @Override - public boolean extensionEquals(ExtensionType other) { - return other instanceof UuidType; - } - - @Override - public ArrowType deserialize(ArrowType storageType, String serializedData) { - if (!storageType.equals(storageType())) { - throw new UnsupportedOperationException( - "Cannot construct UuidType from underlying type " + storageType); - } - return new UuidType(); - } - - @Override - public String serialize() { - return ""; - } - - @Override - public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { - return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); - } - } - - static class UuidVector extends ExtensionTypeVector - implements ValueIterableVector { - - public UuidVector( - String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { - super(name, allocator, underlyingVector); - } - - @Override - public UUID getObject(int index) { - final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); - return new UUID(bb.getLong(), bb.getLong()); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); - } - - public void set(int index, UUID uuid) { - ByteBuffer bb = ByteBuffer.allocate(16); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - getUnderlyingVector().set(index, bb.array()); - } - } - static class LocationType extends ExtensionType { @Override diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java new file mode 100644 index 00000000000..5e2bd8881be --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/UuidType.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.types.pojo; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.arrow.vector.UuidVector; +import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; + +public class UuidType extends ExtensionType { + + @Override + public ArrowType storageType() { + return new ArrowType.FixedSizeBinary(16); + } + + @Override + public String extensionName() { + return "uuid"; + } + + @Override + public boolean extensionEquals(ExtensionType other) { + return other instanceof UuidType; + } + + @Override + public ArrowType deserialize(ArrowType storageType, String serializedData) { + if (!storageType.equals(storageType())) { + throw new UnsupportedOperationException( + "Cannot construct UuidType from underlying type " + storageType); + } + return new UuidType(); + } + + @Override + public String serialize() { + return ""; + } + + @Override + public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { + return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); + } +} From aacba195b73518ad00abd2bd14042d12872ab317 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Mon, 5 May 2025 15:48:16 +0300 Subject: [PATCH 02/11] just checking --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6c0c1323645..7ca984cce6e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.5) message(STATUS "Building using CMake version: ${CMAKE_VERSION}") # https://www.cmake.org/cmake/help/latest/policy/CMP0025.html From e07577237c73a9062c0ae5ba9269f0ff834ca45e Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Mon, 5 May 2025 16:00:23 +0300 Subject: [PATCH 03/11] Update Brewfile --- cpp/Brewfile | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/Brewfile b/cpp/Brewfile index 0f552798536..e7b52dba9cc 100644 --- a/cpp/Brewfile +++ b/cpp/Brewfile @@ -23,7 +23,6 @@ brew "bzip2" brew "c-ares" brew "curl" brew "ccache" -brew "cmake" brew "flatbuffers" brew "git" brew "glog" From ac201434e6a01ea1a0efcd6c279758ca3b6c52f0 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Mon, 5 May 2025 16:01:17 +0300 Subject: [PATCH 04/11] revert --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7ca984cce6e..6c0c1323645 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 3.16) message(STATUS "Building using CMake version: ${CMAKE_VERSION}") # https://www.cmake.org/cmake/help/latest/policy/CMP0025.html From 26cd21ce4b275dd877ec1a36878753a5313fd700 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Mon, 23 Jun 2025 09:24:56 +0300 Subject: [PATCH 05/11] Fix for Cmake 3.xx installation --- dev/tasks/java-jars/github.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index ada81d09bdc..120b4511b8b 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -148,6 +148,18 @@ jobs: # used on test We uninstall Homebrew's Protobuf to ensure using # bundled Protobuf. brew uninstall protobuf + + # fix cmake and boost versionsAdd commentMore actions + brew uninstall -f boost || true + brew uninstall -f cmake || true + mkdir -p homebrew-custom/Formula + curl -o homebrew-custom/Formula/cmake.rb https://raw.githubusercontent.com/Homebrew/homebrew-core/f68532bfe5cb87474093df8a839c3818c6aa44dd/Formula/c/cmake.rb + curl -o homebrew-custom/Formula/boost.rb https://raw.githubusercontent.com/Homebrew/homebrew-core/23f9c56c5075dd56b4471e2c93f89f6400b49ddd/Formula/b/boost.rb + brew install -v ./homebrew-custom/Formula/cmake.rb + brew install -v ./homebrew-custom/Formula/boost.rb + brew pin cmake + brew pin boost + # brew bundle --file=arrow/java/Brewfile - name: Build C++ libraries From 133700957ee5592dc4a352d1cbadeaab72f2dbf8 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Mon, 23 Jun 2025 09:36:51 +0300 Subject: [PATCH 06/11] Fix for Cmake 3.xx installation --- cpp/Brewfile | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/Brewfile b/cpp/Brewfile index e7b52dba9cc..0f552798536 100644 --- a/cpp/Brewfile +++ b/cpp/Brewfile @@ -23,6 +23,7 @@ brew "bzip2" brew "c-ares" brew "curl" brew "ccache" +brew "cmake" brew "flatbuffers" brew "git" brew "glog" From 44e30f5d97f3b8045985dbde9a2754ee415581dc Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Mon, 23 Jun 2025 09:49:12 +0300 Subject: [PATCH 07/11] update thrift --- ci/vcpkg/ports.patch | 8 ++++---- cpp/thirdparty/versions.txt | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/vcpkg/ports.patch b/ci/vcpkg/ports.patch index 67fb2a4a3ea..87e8106c3c0 100644 --- a/ci/vcpkg/ports.patch +++ b/ci/vcpkg/ports.patch @@ -85,7 +85,7 @@ index 2d5a854..9ff49ec 100644 @@ -1,6 +1,7 @@ { "name": "thrift", - "version": "0.20.0", + "version": "0.21.0", + "port-version": 1, "description": "Apache Thrift is a software project spanning a variety of programming languages and use cases. Our goal is to make reliable, performant communication and data serialization across languages as efficient and seamless as possible.", "homepage": "https://github.com/apache/thrift", @@ -97,7 +97,7 @@ index c6ce736..9ad1d63 100644 @@ -8622,7 +8622,7 @@ }, "thrift": { - "baseline": "0.20.0", + "baseline": "0.21.0", - "port-version": 0 + "port-version": 1 }, @@ -112,9 +112,9 @@ index 3db38c5..7464bde 100644 "versions": [ + { + "git-tree": "13757a6b05741cf3c9c39e3a1dcc5e5cd685e025", -+ "version": "0.20.0", ++ "version": "0.21.0", + "port-version": 1 + }, { "git-tree": "6855be1ce96497811d4eb0a9879baf6cf1b3610c", - "version": "0.20.0", + "version": "0.21.0", diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 369a49744b4..21c669830d5 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -107,8 +107,8 @@ ARROW_SUBSTRAIT_BUILD_VERSION=v0.44.0 ARROW_SUBSTRAIT_BUILD_SHA256_CHECKSUM=f989a862f694e7dbb695925ddb7c4ce06aa6c51aca945105c075139aed7e55a2 ARROW_S2N_TLS_BUILD_VERSION=v1.3.35 ARROW_S2N_TLS_BUILD_SHA256_CHECKSUM=9d32b26e6bfcc058d98248bf8fc231537e347395dd89cf62bb432b55c5da990d -ARROW_THRIFT_BUILD_VERSION=0.20.0 -ARROW_THRIFT_BUILD_SHA256_CHECKSUM=b5d8311a779470e1502c027f428a1db542f5c051c8e1280ccd2163fa935ff2d6 +ARROW_THRIFT_BUILD_VERSION=0.21.0 +ARROW_THRIFT_BUILD_SHA256_CHECKSUM=9a24f3eba9a4ca493602226c16d8c228037db3b9291c6fc4019bfe3bd39fc67c ARROW_UCX_BUILD_VERSION=1.12.1 ARROW_UCX_BUILD_SHA256_CHECKSUM=9bef31aed0e28bf1973d28d74d9ac4f8926c43ca3b7010bd22a084e164e31b71 ARROW_UTF8PROC_BUILD_VERSION=v2.7.0 From 197ec5eedce0bb1b0208eee4ced9bd7d76e109e3 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Mon, 23 Jun 2025 10:35:40 +0300 Subject: [PATCH 08/11] update thrift url --- ci/vcpkg/ports.patch | 8 ++++---- cpp/thirdparty/versions.txt | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ci/vcpkg/ports.patch b/ci/vcpkg/ports.patch index 87e8106c3c0..67fb2a4a3ea 100644 --- a/ci/vcpkg/ports.patch +++ b/ci/vcpkg/ports.patch @@ -85,7 +85,7 @@ index 2d5a854..9ff49ec 100644 @@ -1,6 +1,7 @@ { "name": "thrift", - "version": "0.21.0", + "version": "0.20.0", + "port-version": 1, "description": "Apache Thrift is a software project spanning a variety of programming languages and use cases. Our goal is to make reliable, performant communication and data serialization across languages as efficient and seamless as possible.", "homepage": "https://github.com/apache/thrift", @@ -97,7 +97,7 @@ index c6ce736..9ad1d63 100644 @@ -8622,7 +8622,7 @@ }, "thrift": { - "baseline": "0.21.0", + "baseline": "0.20.0", - "port-version": 0 + "port-version": 1 }, @@ -112,9 +112,9 @@ index 3db38c5..7464bde 100644 "versions": [ + { + "git-tree": "13757a6b05741cf3c9c39e3a1dcc5e5cd685e025", -+ "version": "0.21.0", ++ "version": "0.20.0", + "port-version": 1 + }, { "git-tree": "6855be1ce96497811d4eb0a9879baf6cf1b3610c", - "version": "0.21.0", + "version": "0.20.0", diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 21c669830d5..736c13d4522 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -107,8 +107,8 @@ ARROW_SUBSTRAIT_BUILD_VERSION=v0.44.0 ARROW_SUBSTRAIT_BUILD_SHA256_CHECKSUM=f989a862f694e7dbb695925ddb7c4ce06aa6c51aca945105c075139aed7e55a2 ARROW_S2N_TLS_BUILD_VERSION=v1.3.35 ARROW_S2N_TLS_BUILD_SHA256_CHECKSUM=9d32b26e6bfcc058d98248bf8fc231537e347395dd89cf62bb432b55c5da990d -ARROW_THRIFT_BUILD_VERSION=0.21.0 -ARROW_THRIFT_BUILD_SHA256_CHECKSUM=9a24f3eba9a4ca493602226c16d8c228037db3b9291c6fc4019bfe3bd39fc67c +ARROW_THRIFT_BUILD_VERSION=0.20.0 +ARROW_THRIFT_BUILD_SHA256_CHECKSUM=b5d8311a779470e1502c027f428a1db542f5c051c8e1280ccd2163fa935ff2d6 ARROW_UCX_BUILD_VERSION=1.12.1 ARROW_UCX_BUILD_SHA256_CHECKSUM=9bef31aed0e28bf1973d28d74d9ac4f8926c43ca3b7010bd22a084e164e31b71 ARROW_UTF8PROC_BUILD_VERSION=v2.7.0 @@ -164,7 +164,7 @@ DEPENDENCIES=( "ARROW_RE2_URL re2-${ARROW_RE2_BUILD_VERSION}.tar.gz https://github.com/google/re2/archive/${ARROW_RE2_BUILD_VERSION}.tar.gz" "ARROW_S2N_TLS_URL s2n-${ARROW_S2N_TLS_BUILD_VERSION}.tar.gz https://github.com/aws/s2n-tls/archive/${ARROW_S2N_TLS_BUILD_VERSION}.tar.gz" "ARROW_SNAPPY_URL snappy-${ARROW_SNAPPY_BUILD_VERSION}.tar.gz https://github.com/google/snappy/archive/${ARROW_SNAPPY_BUILD_VERSION}.tar.gz" - "ARROW_THRIFT_URL thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz https://dlcdn.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" + "ARROW_THRIFT_URL thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz https://www.apache.org/dyn/closer.lua/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz?action=download" "ARROW_UCX_URL ucx-${ARROW_UCX_BUILD_VERSION}.tar.gz https://github.com/openucx/ucx/archive/v${ARROW_UCX_BUILD_VERSION}.tar.gz" "ARROW_UTF8PROC_URL utf8proc-${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz https://github.com/JuliaStrings/utf8proc/archive/${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz" "ARROW_XSIMD_URL xsimd-${ARROW_XSIMD_BUILD_VERSION}.tar.gz https://github.com/xtensor-stack/xsimd/archive/${ARROW_XSIMD_BUILD_VERSION}.tar.gz" From d1a6102530eacfac540c0cf4a99cce09c9b5d691 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Mon, 23 Jun 2025 10:47:00 +0300 Subject: [PATCH 09/11] update thrift url --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 35ad4089e7f..9dcf4d2c06f 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -817,6 +817,7 @@ if(DEFINED ENV{ARROW_THRIFT_URL}) set(THRIFT_SOURCE_URL "$ENV{ARROW_THRIFT_URL}") else() set(THRIFT_SOURCE_URL + "https://www.apache.org/dyn/closer.lua/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz?action=download" "https://dlcdn.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" ) endif() From a8be8002ab3782767a1d9998f4d7e8353d11002c Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Mon, 23 Jun 2025 16:29:37 +0300 Subject: [PATCH 10/11] Update github.yml --- dev/tasks/java-jars/github.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index 120b4511b8b..ccc93979b34 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -148,7 +148,6 @@ jobs: # used on test We uninstall Homebrew's Protobuf to ensure using # bundled Protobuf. brew uninstall protobuf - # fix cmake and boost versionsAdd commentMore actions brew uninstall -f boost || true brew uninstall -f cmake || true @@ -161,6 +160,7 @@ jobs: brew pin boost # + brew bundle --file=arrow/java/Brewfile - name: Build C++ libraries env: From d2c5febe1ce608bb82f539e7dbe5106ba211478f Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Mon, 23 Jun 2025 17:12:12 +0300 Subject: [PATCH 11/11] fix format --- java/vector/src/main/codegen/templates/StructWriters.java | 4 ++-- java/vector/src/main/codegen/templates/UnionListWriter.java | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/java/vector/src/main/codegen/templates/StructWriters.java b/java/vector/src/main/codegen/templates/StructWriters.java index 71bd7a5bc4c..f1c00bc7490 100644 --- a/java/vector/src/main/codegen/templates/StructWriters.java +++ b/java/vector/src/main/codegen/templates/StructWriters.java @@ -166,12 +166,12 @@ public StructWriter struct(String name) { public ExtensionWriter extension(String name, ArrowType arrowType) { String finalName = handleCase(name); FieldWriter writer = fields.get(finalName); - if(writer == null){ + if (writer == null) { int vectorCount=container.size(); FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null); ExtensionTypeVector vector = container.addOrGet(name, fieldType, ExtensionTypeVector.class); writer = new PromotableWriter(vector, container, getNullableStructWriterFactory()); - if(vectorCount != container.size()) { + if (vectorCount != container.size()) { writer.allocate(); } writer.setPosition(idx()); diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 9424533f290..d8a61cf1a60 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -206,6 +206,7 @@ public ExtensionWriter extension(ArrowType arrowType) { writer.extension(arrowType); return writer; } + @Override public ExtensionWriter extension(String name, ArrowType arrowType) { ExtensionWriter extensionWriter = writer.extension(name, arrowType); @@ -338,10 +339,12 @@ public void writeNull() { public void writeExtension(Object value) { writer.writeExtension(value); } + @Override public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { writer.addExtensionTypeWriterFactory(var1); } + public void write(ExtensionHolder var1) { writer.write(var1); }