diff --git a/performance/src/main/java/org/apache/arrow/vector/UuidVectorBenchmarks.java b/performance/src/main/java/org/apache/arrow/vector/UuidVectorBenchmarks.java new file mode 100644 index 0000000000..b5f87e7a75 --- /dev/null +++ b/performance/src/main/java/org/apache/arrow/vector/UuidVectorBenchmarks.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.complex.impl.UuidWriterImpl; +import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.profile.GCProfiler; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** Benchmarks for {@link UuidVector}. */ +@State(Scope.Benchmark) +public class UuidVectorBenchmarks { + // checkstyle:off: MissingJavadocMethod + + private static final int VECTOR_LENGTH = 10_000; + + private static final int ALLOCATOR_CAPACITY = 1024 * 1024; + + private BufferAllocator allocator; + + private UuidVector vector; + + private UUID[] testUuids; + + @Setup + public void prepare() { + allocator = new RootAllocator(ALLOCATOR_CAPACITY); + vector = new UuidVector("vector", allocator); + vector.allocateNew(VECTOR_LENGTH); + vector.setValueCount(VECTOR_LENGTH); + + // Pre-generate UUIDs for consistent benchmarking + testUuids = new UUID[VECTOR_LENGTH]; + for (int i = 0; i < VECTOR_LENGTH; i++) { + testUuids[i] = new UUID(i, i * 2L); + } + } + + @TearDown + public void tearDown() { + vector.close(); + allocator.close(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setWithHolder() { + NullableUuidHolder holder = new NullableUuidHolder(); + for (int i = 0; i < VECTOR_LENGTH; i++) { + vector.get(i, holder); + vector.setSafe(i, holder); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setUuidDirectly() { + for (int i = 0; i < VECTOR_LENGTH; i++) { + vector.setSafe(i, testUuids[i]); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void setWithWriter() { + UuidWriterImpl writer = new UuidWriterImpl(vector); + for (int i = 0; i < VECTOR_LENGTH; i++) { + writer.writeExtension(testUuids[i]); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void getWithUuidHolder() { + NullableUuidHolder holder = new NullableUuidHolder(); + for (int i = 0; i < VECTOR_LENGTH; i++) { + vector.get(i, holder); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MICROSECONDS) + public void getUuidDirectly() { + for (int i = 0; i < VECTOR_LENGTH; i++) { + UUID uuid = vector.getObject(i); + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = + new OptionsBuilder() + .include(UuidVectorBenchmarks.class.getSimpleName()) + .forks(1) + .addProfiler(GCProfiler.class) + .build(); + + new Runner(opt).run(); + } + // checkstyle:on: MissingJavadocMethod +} diff --git a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java index c662a6e064..a16a29dd36 100644 --- a/vector/src/main/java/org/apache/arrow/vector/UuidVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/UuidVector.java @@ -23,7 +23,9 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.complex.impl.UuidReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.extension.UuidType; @@ -132,7 +134,8 @@ public int hashCode(int index) { @Override public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); + int start = this.getStartOffset(index); + return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, start + UUID_BYTE_WIDTH); } /** @@ -145,21 +148,6 @@ public int isSet(int index) { return getUnderlyingVector().isSet(index); } - /** - * Gets the UUID value at the given index as an ArrowBuf. - * - * @param index the index to retrieve - * @return a buffer slice containing the 16-byte UUID - * @throws IllegalStateException if the value at the index is null and null checking is enabled - */ - public ArrowBuf get(int index) throws IllegalStateException { - if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } else { - return getBufferSlicePostNullCheck(index); - } - } - /** * Reads the UUID value at the given index into a NullableUuidHolder. * @@ -167,23 +155,24 @@ public ArrowBuf get(int index) throws IllegalStateException { * @param holder the holder to populate with the UUID data */ public void get(int index, NullableUuidHolder holder) { - if (NullCheckingForGet.NULL_CHECKING_ENABLED && this.isSet(index) == 0) { + Preconditions.checkArgument(index >= 0, "Cannot get negative index in UUID vector."); + if (isSet(index) == 0) { holder.isSet = 0; - } else { - holder.isSet = 1; - holder.buffer = getBufferSlicePostNullCheck(index); + return; } + holder.isSet = 1; + holder.buffer = getDataBuffer(); + holder.start = getStartOffset(index); } /** - * Reads the UUID value at the given index into a UuidHolder. + * Calculates the byte offset for a given index in the data buffer. * - * @param index the index to read from - * @param holder the holder to populate with the UUID data + * @param index the index of the UUID value + * @return the byte offset in the data buffer */ - public void get(int index, UuidHolder holder) { - holder.isSet = 1; - holder.buffer = getBufferSlicePostNullCheck(index); + public final int getStartOffset(int index) { + return index * UUID_BYTE_WIDTH; } /** @@ -207,7 +196,7 @@ public void set(int index, UUID value) { * @param holder the holder containing the UUID data */ public void set(int index, UuidHolder holder) { - this.set(index, holder.isSet, holder.buffer); + this.set(index, holder.buffer, holder.start); } /** @@ -217,28 +206,11 @@ public void set(int index, UuidHolder holder) { * @param holder the holder containing the UUID data */ public void set(int index, NullableUuidHolder holder) { - this.set(index, holder.isSet, holder.buffer); - } - - /** - * Sets the UUID value at the given index with explicit null flag. - * - * @param index the index to set - * @param isSet 1 if the value is set, 0 if null - * @param buffer the buffer containing the 16-byte UUID data - */ - public void set(int index, int isSet, ArrowBuf buffer) { - getUnderlyingVector().set(index, isSet, buffer); - } - - /** - * Sets the UUID value at the given index from an ArrowBuf. - * - * @param index the index to set - * @param value the buffer containing the 16-byte UUID data - */ - public void set(int index, ArrowBuf value) { - getUnderlyingVector().set(index, value); + if (holder.isSet == 0) { + getUnderlyingVector().setNull(index); + } else { + this.set(index, holder.buffer, holder.start); + } } /** @@ -249,10 +221,12 @@ public void set(int index, ArrowBuf value) { * @param sourceOffset the offset in the source buffer where the UUID data starts */ public void set(int index, ArrowBuf source, int sourceOffset) { - // Copy bytes from source buffer to target vector data buffer - ArrowBuf dataBuffer = getUnderlyingVector().getDataBuffer(); - dataBuffer.setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH); - getUnderlyingVector().setIndexDefined(index); + Preconditions.checkNotNull(source, "Cannot set UUID vector, the source buffer is null."); + + BitVectorHelper.setBit(getUnderlyingVector().getValidityBuffer(), index); + getUnderlyingVector() + .getDataBuffer() + .setBytes((long) index * UUID_BYTE_WIDTH, source, sourceOffset, UUID_BYTE_WIDTH); } /** @@ -286,10 +260,10 @@ public void setSafe(int index, UUID value) { * @param holder the holder containing the UUID data, or null to set a null value */ public void setSafe(int index, NullableUuidHolder holder) { - if (holder != null) { - getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer); - } else { + if (holder == null || holder.isSet == 0) { getUnderlyingVector().setNull(index); + } else { + this.setSafe(index, holder.buffer, holder.start); } } @@ -297,14 +271,23 @@ public void setSafe(int index, NullableUuidHolder holder) { * Sets the UUID value at the given index from a UuidHolder, expanding capacity if needed. * * @param index the index to set - * @param holder the holder containing the UUID data, or null to set a null value + * @param holder the holder containing the UUID data */ public void setSafe(int index, UuidHolder holder) { - if (holder != null) { - getUnderlyingVector().setSafe(index, holder.isSet, holder.buffer); - } else { - getUnderlyingVector().setNull(index); - } + this.setSafe(index, holder.buffer, holder.start); + } + + /** + * Sets the UUID value at the given index by copying from a source buffer, expanding capacity if + * needed. + * + * @param index the index to set + * @param buffer the source buffer to copy from + * @param start the offset in the source buffer where the UUID data starts + */ + public void setSafe(int index, ArrowBuf buffer, int start) { + getUnderlyingVector().handleSafe(index); + this.set(index, buffer, start); } /** @@ -400,15 +383,9 @@ public TransferPair getTransferPair(BufferAllocator allocator) { return getTransferPair(this.getField().getName(), allocator); } - private ArrowBuf getBufferSlicePostNullCheck(int index) { - return getUnderlyingVector() - .getDataBuffer() - .slice((long) index * UUID_BYTE_WIDTH, UUID_BYTE_WIDTH); - } - @Override public int getTypeWidth() { - return getUnderlyingVector().getTypeWidth(); + return UUID_BYTE_WIDTH; } /** {@link TransferPair} for {@link UuidVector}. */ diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java new file mode 100644 index 0000000000..7a5312f6ed --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableUuidHolderReaderImpl.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.vector.holders.ExtensionHolder; +import org.apache.arrow.vector.holders.NullableUuidHolder; +import org.apache.arrow.vector.holders.UuidHolder; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.util.UuidUtility; + +/** + * Reader implementation for reading UUID values from a {@link NullableUuidHolder}. + * + *

This reader wraps a single UUID holder value and provides methods to read from it. Unlike + * {@link UuidReaderImpl} which reads from a vector, this reader operates on a holder instance. + * + * @see NullableUuidHolder + * @see UuidReaderImpl + */ +public class NullableUuidHolderReaderImpl extends AbstractFieldReader { + private final NullableUuidHolder holder; + + /** + * Constructs a reader for the given UUID holder. + * + * @param holder the UUID holder to read from + */ + public NullableUuidHolderReaderImpl(NullableUuidHolder holder) { + this.holder = holder; + } + + @Override + public int size() { + throw new UnsupportedOperationException( + "size() is not supported on NullableUuidHolderReaderImpl. " + + "This reader wraps a single UUID holder value, not a collection. " + + "Use UuidReaderImpl for vector-based UUID reading."); + } + + @Override + public boolean next() { + throw new UnsupportedOperationException( + "next() is not supported on NullableUuidHolderReaderImpl. " + + "This reader wraps a single UUID holder value, not an iterator. " + + "Use UuidReaderImpl for vector-based UUID reading."); + } + + @Override + public void setPosition(int index) { + throw new UnsupportedOperationException( + "setPosition() is not supported on NullableUuidHolderReaderImpl. " + + "This reader wraps a single UUID holder value, not a vector. " + + "Use UuidReaderImpl for vector-based UUID reading."); + } + + @Override + public Types.MinorType getMinorType() { + return Types.MinorType.EXTENSIONTYPE; + } + + @Override + public boolean isSet() { + return holder.isSet == 1; + } + + @Override + public void read(ExtensionHolder h) { + if (h instanceof NullableUuidHolder) { + NullableUuidHolder nullableHolder = (NullableUuidHolder) h; + nullableHolder.buffer = this.holder.buffer; + nullableHolder.isSet = this.holder.isSet; + nullableHolder.start = this.holder.start; + } else if (h instanceof UuidHolder) { + UuidHolder uuidHolder = (UuidHolder) h; + uuidHolder.buffer = this.holder.buffer; + uuidHolder.start = this.holder.start; + } else { + throw new IllegalArgumentException( + "Unsupported holder type: " + + h.getClass().getName() + + ". " + + "Only NullableUuidHolder and UuidHolder are supported for UUID values. " + + "Provided holder type cannot be used to read UUID data."); + } + } + + @Override + public Object readObject() { + if (!isSet()) { + return null; + } + // Convert UUID bytes to Java UUID object + try { + return UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); + } catch (Exception e) { + throw new RuntimeException( + String.format( + "Failed to read UUID from buffer. Invalid Arrow buffer state: " + + "capacity=%d, readableBytes=%d, readerIndex=%d, writerIndex=%d, refCnt=%d. " + + "The buffer must contain exactly 16 bytes of valid UUID data.", + holder.buffer.capacity(), + holder.buffer.readableBytes(), + holder.buffer.readerIndex(), + holder.buffer.writerIndex(), + holder.buffer.refCnt()), + e); + } + } +} diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java index bb35b960d3..bb7ae13e5b 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java @@ -63,9 +63,7 @@ public boolean isSet() { @Override public void read(ExtensionHolder holder) { - if (holder instanceof UuidHolder) { - vector.get(idx(), (UuidHolder) holder); - } else if (holder instanceof NullableUuidHolder) { + if (holder instanceof NullableUuidHolder) { vector.get(idx(), (NullableUuidHolder) holder); } else { throw new IllegalArgumentException( @@ -75,9 +73,7 @@ public void read(ExtensionHolder holder) { @Override public void read(int arrayIndex, ExtensionHolder holder) { - if (holder instanceof UuidHolder) { - vector.get(arrayIndex, (UuidHolder) holder); - } else if (holder instanceof NullableUuidHolder) { + if (holder instanceof NullableUuidHolder) { vector.get(arrayIndex, (NullableUuidHolder) holder); } else { throw new IllegalArgumentException( diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java index 8a78add11c..3f60ca9223 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UuidWriterImpl.java @@ -50,8 +50,15 @@ public void writeExtension(Object value) { vector.setSafe(getPosition(), (ArrowBuf) value); } else if (value instanceof java.util.UUID) { vector.setSafe(getPosition(), (java.util.UUID) value); + } else if (value instanceof ExtensionHolder) { + write((ExtensionHolder) value); } else { - throw new IllegalArgumentException("Unsupported value type for UUID: " + value.getClass()); + throw new IllegalArgumentException( + "Unsupported value type for UUID: " + + value.getClass().getName() + + ". " + + "Supported types are: byte[] (16 bytes), ArrowBuf (16 bytes), or java.util.UUID. " + + "Convert your value to one of these types before writing."); } vector.setValueCount(getPosition() + 1); } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java index e5398d82cf..ea93b5adf4 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/NullableUuidHolder.java @@ -32,4 +32,7 @@ public class NullableUuidHolder extends ExtensionHolder { /** Buffer containing 16-byte UUID data. */ public ArrowBuf buffer; + + /** Offset in the buffer where the UUID data starts. */ + public int start = 0; } diff --git a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java index 484e05c24b..8bcac90c35 100644 --- a/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java +++ b/vector/src/main/java/org/apache/arrow/vector/holders/UuidHolder.java @@ -31,6 +31,9 @@ public class UuidHolder extends ExtensionHolder { /** Buffer containing 16-byte UUID data. */ public ArrowBuf buffer; + /** Offset in the buffer where the UUID data starts. */ + public int start = 0; + /** Constructs a UuidHolder with isSet = 1. */ public UuidHolder() { this.isSet = 1; diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 41a95a8d11..f10295ac58 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -41,8 +41,8 @@ import org.apache.arrow.vector.extension.UuidType; import org.apache.arrow.vector.holders.DurationHolder; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; +import org.apache.arrow.vector.holders.NullableUuidHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; -import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -1257,14 +1257,14 @@ public void testListVectorReaderForExtensionType() throws Exception { reader.setPosition(0); reader.next(); FieldReader uuidReader = reader.reader(); - UuidHolder holder = new UuidHolder(); + NullableUuidHolder holder = new NullableUuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u2, actualUuid); } } @@ -1298,14 +1298,14 @@ public void testCopyFromForExtensionType() throws Exception { reader.setPosition(0); reader.next(); FieldReader uuidReader = reader.reader(); - UuidHolder holder = new UuidHolder(); + NullableUuidHolder holder = new NullableUuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u1, actualUuid); reader.next(); uuidReader = reader.reader(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index df8f338f45..a1a0d4cdd6 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -43,7 +43,7 @@ import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.extension.UuidType; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; -import org.apache.arrow.vector.holders.UuidHolder; +import org.apache.arrow.vector.holders.NullableUuidHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; @@ -1302,14 +1302,14 @@ public void testMapVectorWithExtensionType() throws Exception { mapReader.setPosition(0); mapReader.next(); FieldReader uuidReader = mapReader.value(); - UuidHolder holder = new UuidHolder(); + NullableUuidHolder holder = new NullableUuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u2, actualUuid); } } @@ -1347,14 +1347,14 @@ public void testCopyFromForExtensionType() throws Exception { mapReader.setPosition(0); mapReader.next(); FieldReader uuidReader = mapReader.value(); - UuidHolder holder = new UuidHolder(); + NullableUuidHolder holder = new NullableUuidHolder(); uuidReader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u1, actualUuid); mapReader.next(); uuidReader = mapReader.value(); uuidReader.read(holder); - actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(u2, actualUuid); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java index 9f7c65b82b..bbf5620f32 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestUuidType.java @@ -233,7 +233,8 @@ void testVectorByteArrayOperations() { // Verify the bytes match byte[] actualBytes = new byte[UuidType.UUID_BYTE_WIDTH]; - uuidVector.get(0).getBytes(0, actualBytes); + int offset = uuidVector.getStartOffset(0); + uuidVector.getDataBuffer().getBytes(offset, actualBytes); assertArrayEquals(uuidBytes, actualBytes); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java index 3d70238ece..581626145c 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestUuidVector.java @@ -27,6 +27,7 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.complex.impl.NullableUuidHolderReaderImpl; import org.apache.arrow.vector.complex.impl.UuidReaderImpl; import org.apache.arrow.vector.complex.impl.UuidWriterImpl; import org.apache.arrow.vector.extension.UuidType; @@ -135,8 +136,8 @@ void testWriteExtensionWithUnsupportedType() throws Exception { IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> writer.writeExtension("invalid-type")); - assertEquals( - "Unsupported value type for UUID: class java.lang.String", exception.getMessage()); + assertTrue( + exception.getMessage().contains("Unsupported value type for UUID: java.lang.String")); } } @@ -234,9 +235,9 @@ void testReaderCopyAsValueExtensionVector() throws Exception { UuidReaderImpl reader = (UuidReaderImpl) vectorForRead.getReader(); reader.copyAsValue(writer); UuidReaderImpl reader2 = (UuidReaderImpl) vector.getReader(); - UuidHolder holder = new UuidHolder(); + NullableUuidHolder holder = new NullableUuidHolder(); reader2.read(0, holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(uuid, actualUuid); } } @@ -251,10 +252,10 @@ void testReaderReadWithUuidHolder() throws Exception { UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); reader.setPosition(0); - UuidHolder holder = new UuidHolder(); + NullableUuidHolder holder = new NullableUuidHolder(); reader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(uuid, actualUuid); assertEquals(1, holder.isSet); } @@ -273,7 +274,7 @@ void testReaderReadWithNullableUuidHolder() throws Exception { NullableUuidHolder holder = new NullableUuidHolder(); reader.read(holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(uuid, actualUuid); assertEquals(1, holder.isSet); } @@ -309,10 +310,10 @@ void testReaderReadWithArrayIndexUuidHolder() throws Exception { UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - UuidHolder holder = new UuidHolder(); + NullableUuidHolder holder = new NullableUuidHolder(); reader.read(1, holder); - UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, 0); + UUID actualUuid = UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start); assertEquals(uuid2, actualUuid); assertEquals(1, holder.isSet); } @@ -333,7 +334,7 @@ void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { NullableUuidHolder holder1 = new NullableUuidHolder(); reader.read(0, holder1); - assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, 0)); + assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, holder1.start)); assertEquals(1, holder1.isSet); NullableUuidHolder holder2 = new NullableUuidHolder(); @@ -342,7 +343,7 @@ void testReaderReadWithArrayIndexNullableUuidHolder() throws Exception { NullableUuidHolder holder3 = new NullableUuidHolder(); reader.read(2, holder3); - assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder3.buffer, 0)); + assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder3.buffer, holder3.start)); assertEquals(1, holder3.isSet); } } @@ -367,25 +368,6 @@ void testReaderReadWithUnsupportedHolder() throws Exception { } } - @Test - void testReaderReadWithArrayIndexUnsupportedHolder() throws Exception { - try (UuidVector vector = new UuidVector("test", allocator)) { - UUID uuid = UUID.randomUUID(); - vector.setSafe(0, uuid); - vector.setValueCount(1); - - UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); - - // Create a mock unsupported holder - ExtensionHolder unsupportedHolder = new ExtensionHolder() {}; - - IllegalArgumentException exception = - assertThrows(IllegalArgumentException.class, () -> reader.read(0, unsupportedHolder)); - - assertTrue(exception.getMessage().contains("Unsupported holder type for UuidReader")); - } - } - @Test void testReaderIsSet() throws Exception { try (UuidVector vector = new UuidVector("test", allocator)) { @@ -448,4 +430,290 @@ void testReaderGetField() throws Exception { assertEquals("test", reader.getField().getName()); } } + + @Test + void testHolderStartOffsetWithMultipleValues() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setSafe(1, uuid2); + vector.setSafe(2, uuid3); + vector.setValueCount(3); + + // Test UuidHolder with different indices + NullableUuidHolder holder = new NullableUuidHolder(); + vector.get(0, holder); + assertEquals(0, holder.start); + assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + + vector.get(1, holder); + assertEquals(16, holder.start); // UUID_BYTE_WIDTH = 16 + assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + + vector.get(2, holder); + assertEquals(32, holder.start); // 2 * UUID_BYTE_WIDTH = 32 + assertEquals(uuid3, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + } + } + + @Test + void testNullableHolderStartOffsetWithMultipleValues() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setNull(1); + vector.setSafe(2, uuid2); + vector.setValueCount(3); + + // Test NullableUuidHolder with different indices + NullableUuidHolder holder1 = new NullableUuidHolder(); + vector.get(0, holder1); + assertEquals(0, holder1.start); + assertEquals(1, holder1.isSet); + assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder1.buffer, holder1.start)); + + NullableUuidHolder holder2 = new NullableUuidHolder(); + vector.get(1, holder2); + assertEquals(0, holder2.isSet); + + NullableUuidHolder holder3 = new NullableUuidHolder(); + vector.get(2, holder3); + assertEquals(32, holder3.start); // 2 * UUID_BYTE_WIDTH = 32 + assertEquals(1, holder3.isSet); + assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder3.buffer, holder3.start)); + + // Verify all holders share the same buffer + assertEquals(holder1.buffer, holder3.buffer); + } + } + + @Test + void testSetFromHolderWithStartOffset() throws Exception { + try (UuidVector sourceVector = new UuidVector("source", allocator); + UuidVector targetVector = new UuidVector("target", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + + sourceVector.setSafe(0, uuid1); + sourceVector.setSafe(1, uuid2); + sourceVector.setValueCount(3); + + // Get holder from index 1 (should have start = 16) + NullableUuidHolder holder = new NullableUuidHolder(); + sourceVector.get(1, holder); + assertEquals(16, holder.start); + + // Set target vector using holder with non-zero start offset + targetVector.setSafe(0, holder); + targetVector.setValueCount(1); + + // Verify the value was copied correctly + assertEquals(uuid2, targetVector.getObject(0)); + } + } + + @Test + void testSetFromNullableHolderWithStartOffset() throws Exception { + try (UuidVector sourceVector = new UuidVector("source", allocator); + UuidVector targetVector = new UuidVector("target", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + + sourceVector.setSafe(0, uuid1); + sourceVector.setNull(1); + sourceVector.setSafe(2, uuid2); + sourceVector.setValueCount(3); + + // Get holder from index 2 (should have start = 32) + NullableUuidHolder holder = new NullableUuidHolder(); + sourceVector.get(2, holder); + assertEquals(32, holder.start); + assertEquals(1, holder.isSet); + + // Set target vector using holder with non-zero start offset + targetVector.setSafe(0, holder); + targetVector.setValueCount(1); + + // Verify the value was copied correctly + assertEquals(uuid2, targetVector.getObject(0)); + + // Test with null holder + NullableUuidHolder nullHolder = new NullableUuidHolder(); + sourceVector.get(1, nullHolder); + assertEquals(0, nullHolder.isSet); + + targetVector.setSafe(1, nullHolder); + targetVector.setValueCount(2); + assertTrue(targetVector.isNull(1)); + } + } + + @Test + void testGetStartOffset() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + vector.allocateNew(10); + + // Test getStartOffset for various indices + assertEquals(0, vector.getStartOffset(0)); + assertEquals(16, vector.getStartOffset(1)); + assertEquals(32, vector.getStartOffset(2)); + assertEquals(48, vector.getStartOffset(3)); + assertEquals(160, vector.getStartOffset(10)); + } + } + + @Test + void testReaderWithStartOffsetMultipleReads() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + + vector.setSafe(0, uuid1); + vector.setSafe(1, uuid2); + vector.setSafe(2, uuid3); + vector.setValueCount(3); + + UuidReaderImpl reader = (UuidReaderImpl) vector.getReader(); + NullableUuidHolder holder = new NullableUuidHolder(); + + // Read from different positions and verify start offset + reader.read(0, holder); + assertEquals(0, holder.start); + assertEquals(uuid1, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + + reader.read(1, holder); + assertEquals(16, holder.start); + assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + + reader.read(2, holder); + assertEquals(32, holder.start); + assertEquals(uuid3, UuidUtility.uuidFromArrowBuf(holder.buffer, holder.start)); + } + } + + @Test + void testWriterWithExtensionHolder() throws Exception { + try (UuidVector sourceVector = new UuidVector("source", allocator); + UuidVector targetVector = new UuidVector("target", allocator)) { + UUID uuid = UUID.randomUUID(); + sourceVector.setSafe(0, uuid); + sourceVector.setValueCount(1); + + // Get holder from source + NullableUuidHolder holder = new NullableUuidHolder(); + sourceVector.get(0, holder); + + // Write using UuidWriterImpl with ExtensionHolder + UuidWriterImpl writer = new UuidWriterImpl(targetVector); + writer.setPosition(0); + writer.writeExtension(holder); + + assertEquals(uuid, targetVector.getObject(0)); + } + } + + @Test + void testNullableUuidHolderReaderImpl() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid = UUID.randomUUID(); + vector.setSafe(0, uuid); + vector.setValueCount(1); + + // Get holder from vector + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(0, sourceHolder); + assertEquals(1, sourceHolder.isSet); + assertEquals(0, sourceHolder.start); + + // Create reader from holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + assertTrue(reader.isSet()); + assertEquals(uuid, reader.readObject()); + + // Read into another holder + NullableUuidHolder targetHolder = new NullableUuidHolder(); + reader.read(targetHolder); + assertEquals(1, targetHolder.isSet); + assertEquals(0, targetHolder.start); + assertEquals(uuid, UuidUtility.uuidFromArrowBuf(targetHolder.buffer, targetHolder.start)); + } + } + + @Test + void testNullableUuidHolderReaderImplWithNull() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + vector.setNull(0); + vector.setValueCount(1); + + // Get null holder from vector + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(0, sourceHolder); + assertEquals(0, sourceHolder.isSet); + + // Create reader from null holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); + + // Read into another holder + NullableUuidHolder targetHolder = new NullableUuidHolder(); + reader.read(targetHolder); + assertEquals(0, targetHolder.isSet); + } + } + + @Test + void testNullableUuidHolderReaderImplReadIntoUuidHolder() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid = UUID.randomUUID(); + vector.setSafe(0, uuid); + vector.setValueCount(1); + + // Get holder from vector + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(0, sourceHolder); + + // Create reader from holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + + // Read into UuidHolder (non-nullable) + UuidHolder targetHolder = new UuidHolder(); + reader.read(targetHolder); + assertEquals(0, targetHolder.start); + assertEquals(uuid, UuidUtility.uuidFromArrowBuf(targetHolder.buffer, targetHolder.start)); + } + } + + @Test + void testNullableUuidHolderReaderImplWithNonZeroStart() throws Exception { + try (UuidVector vector = new UuidVector("test", allocator)) { + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + vector.setSafe(0, uuid1); + vector.setSafe(1, uuid2); + vector.setValueCount(2); + + // Get holder from index 1 (start = 16) + NullableUuidHolder sourceHolder = new NullableUuidHolder(); + vector.get(1, sourceHolder); + assertEquals(1, sourceHolder.isSet); + assertEquals(16, sourceHolder.start); + + // Create reader from holder + NullableUuidHolderReaderImpl reader = new NullableUuidHolderReaderImpl(sourceHolder); + assertEquals(uuid2, reader.readObject()); + + // Read into another holder and verify start is preserved + NullableUuidHolder targetHolder = new NullableUuidHolder(); + reader.read(targetHolder); + assertEquals(16, targetHolder.start); + assertEquals(uuid2, UuidUtility.uuidFromArrowBuf(targetHolder.buffer, targetHolder.start)); + } + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 871a3cc461..31292ca513 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -87,8 +87,8 @@ import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder; import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder; +import org.apache.arrow.vector.holders.NullableUuidHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; -import org.apache.arrow.vector.holders.UuidHolder; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -2520,7 +2520,7 @@ public void extensionWriterReader() throws Exception { { FieldReader uuidReader = rootReader.reader("uuid1"); uuidReader.setPosition(0); - UuidHolder uuidHolder = new UuidHolder(); + NullableUuidHolder uuidHolder = new NullableUuidHolder(); uuidReader.read(uuidHolder); UUID actualUuid = UuidUtility.uuidFromArrowBuf(uuidHolder.buffer, 0); assertEquals(u1, actualUuid);