Skip to content

Commit 2b45ca4

Browse files
committed
GH-946: Add Variant extension type support
Implements VariantType extension type with VariantVector for storing variant data with metadata and value buffers. Includes reader/writer implementations and comprehensive test coverage.
1 parent b9e40fa commit 2b45ca4

File tree

10 files changed

+1923
-0
lines changed

10 files changed

+1923
-0
lines changed
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.arrow.vector.complex.impl;
18+
19+
import org.apache.arrow.vector.holders.NullableVariantHolder;
20+
import org.apache.arrow.vector.types.Types;
21+
22+
public class NullableVariantHolderReaderImpl extends AbstractFieldReader {
23+
private final NullableVariantHolder holder;
24+
25+
public NullableVariantHolderReaderImpl(NullableVariantHolder holder) {
26+
this.holder = holder;
27+
}
28+
29+
@Override
30+
public int size() {
31+
throw new UnsupportedOperationException("You can't call size on a Holder value reader.");
32+
}
33+
34+
@Override
35+
public boolean next() {
36+
throw new UnsupportedOperationException("You can't call next on a single value reader.");
37+
}
38+
39+
@Override
40+
public void setPosition(int index) {
41+
throw new UnsupportedOperationException("You can't call setPosition on a single value reader.");
42+
}
43+
44+
@Override
45+
public Types.MinorType getMinorType() {
46+
return Types.MinorType.EXTENSIONTYPE;
47+
}
48+
49+
@Override
50+
public boolean isSet() {
51+
return holder.isSet == 1;
52+
}
53+
54+
/**
55+
* Reads the variant holder data into the provided holder.
56+
*
57+
* @param h the holder to read into
58+
*/
59+
public void read(NullableVariantHolder h) {
60+
h.metadataStart = this.holder.metadataStart;
61+
h.metadataEnd = this.holder.metadataEnd;
62+
h.metadataBuffer = this.holder.metadataBuffer;
63+
h.valueStart = this.holder.valueStart;
64+
h.valueEnd = this.holder.valueEnd;
65+
h.valueBuffer = this.holder.valueBuffer;
66+
h.isSet = this.isSet() ? 1 : 0;
67+
}
68+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.arrow.vector.complex.impl;
18+
19+
import org.apache.arrow.vector.extension.VariantVector;
20+
import org.apache.arrow.vector.holders.ExtensionHolder;
21+
import org.apache.arrow.vector.holders.NullableVariantHolder;
22+
import org.apache.arrow.vector.holders.VariantHolder;
23+
import org.apache.arrow.vector.types.Types;
24+
import org.apache.arrow.vector.types.pojo.Field;
25+
26+
public class VariantReaderImpl extends AbstractFieldReader {
27+
private final VariantVector vector;
28+
29+
public VariantReaderImpl(VariantVector vector) {
30+
this.vector = vector;
31+
}
32+
33+
@Override
34+
public Types.MinorType getMinorType() {
35+
return this.vector.getMinorType();
36+
}
37+
38+
@Override
39+
public Field getField() {
40+
return this.vector.getField();
41+
}
42+
43+
@Override
44+
public boolean isSet() {
45+
return !this.vector.isNull(this.idx());
46+
}
47+
48+
@Override
49+
public void read(ExtensionHolder holder) {
50+
if (holder instanceof VariantHolder) {
51+
vector.get(idx(), (VariantHolder) holder);
52+
} else if (holder instanceof NullableVariantHolder) {
53+
vector.get(idx(), (NullableVariantHolder) holder);
54+
} else {
55+
throw new IllegalArgumentException(
56+
"Unsupported holder type for VariantReader: " + holder.getClass());
57+
}
58+
}
59+
60+
public void read(VariantHolder h) {
61+
this.vector.get(this.idx(), h);
62+
}
63+
64+
public void read(NullableVariantHolder h) {
65+
this.vector.get(this.idx(), h);
66+
}
67+
68+
@Override
69+
public Object readObject() {
70+
return this.vector.getObject(this.idx());
71+
}
72+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.arrow.vector.complex.impl;
18+
19+
import org.apache.arrow.vector.ExtensionTypeVector;
20+
import org.apache.arrow.vector.extension.VariantVector;
21+
22+
/**
23+
* Factory for creating {@link VariantWriterImpl} instances.
24+
*
25+
* <p>This factory is used to create writers for Variant extension type vectors.
26+
*
27+
* @see VariantWriterImpl
28+
* @see org.apache.arrow.vector.extension.VariantType
29+
*/
30+
public class VariantWriterFactory implements ExtensionTypeWriterFactory {
31+
32+
/**
33+
* Creates a writer implementation for the given extension type vector.
34+
*
35+
* @param extensionTypeVector the vector to create a writer for
36+
* @return a {@link VariantWriterImpl} if the vector is a {@link VariantVector}, null otherwise
37+
*/
38+
@Override
39+
public AbstractFieldWriter getWriterImpl(ExtensionTypeVector extensionTypeVector) {
40+
if (extensionTypeVector instanceof VariantVector) {
41+
return new VariantWriterImpl((VariantVector) extensionTypeVector);
42+
}
43+
return null;
44+
}
45+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.arrow.vector.complex.impl;
18+
19+
import org.apache.arrow.vector.extension.VariantVector;
20+
import org.apache.arrow.vector.holders.ExtensionHolder;
21+
import org.apache.arrow.vector.holders.NullableVariantHolder;
22+
import org.apache.arrow.vector.holders.VariantHolder;
23+
24+
/**
25+
* Writer implementation for VARIANT extension type vectors.
26+
*
27+
* <p>This writer handles writing variant data to a {@link VariantVector}. It accepts both {@link
28+
* VariantHolder} and {@link NullableVariantHolder} objects containing metadata and value buffers
29+
* and writes them to the appropriate position in the vector.
30+
*/
31+
public class VariantWriterImpl extends AbstractExtensionTypeWriter<VariantVector> {
32+
33+
private static final String UNSUPPORTED_TYPE_TEMPLATE = "Unsupported type for Variant: %s";
34+
35+
/**
36+
* Constructs a new VariantWriterImpl for the given vector.
37+
*
38+
* @param vector the variant vector to write to
39+
*/
40+
public VariantWriterImpl(VariantVector vector) {
41+
super(vector);
42+
}
43+
44+
/**
45+
* Writes an extension type value to the vector.
46+
*
47+
* <p>This method validates that the object is an {@link ExtensionHolder} and delegates to {@link
48+
* #write(ExtensionHolder)}.
49+
*
50+
* @param object the object to write, must be an {@link ExtensionHolder}
51+
* @throws IllegalArgumentException if the object is not an {@link ExtensionHolder}
52+
*/
53+
@Override
54+
public void writeExtension(Object object) {
55+
if (object instanceof ExtensionHolder) {
56+
write((ExtensionHolder) object);
57+
} else {
58+
throw new IllegalArgumentException(
59+
String.format(UNSUPPORTED_TYPE_TEMPLATE, object.getClass().getName()));
60+
}
61+
}
62+
63+
/**
64+
* Writes a variant holder to the vector at the current position.
65+
*
66+
* <p>The holder can be either a {@link VariantHolder} (non-nullable, always set) or a {@link
67+
* NullableVariantHolder} (nullable, may be null). The data is written using {@link
68+
* VariantVector#setSafe(int, NullableVariantHolder)} which handles buffer allocation and copying.
69+
*
70+
* @param extensionHolder the variant holder to write, must be a {@link VariantHolder} or {@link
71+
* NullableVariantHolder}
72+
* @throws IllegalArgumentException if the holder is neither a {@link VariantHolder} nor a {@link
73+
* NullableVariantHolder}
74+
*/
75+
@Override
76+
public void write(ExtensionHolder extensionHolder) {
77+
if (extensionHolder instanceof VariantHolder) {
78+
vector.setSafe(getPosition(), (VariantHolder) extensionHolder);
79+
} else if (extensionHolder instanceof NullableVariantHolder) {
80+
vector.setSafe(getPosition(), (NullableVariantHolder) extensionHolder);
81+
} else {
82+
throw new IllegalArgumentException(
83+
String.format(UNSUPPORTED_TYPE_TEMPLATE, extensionHolder.getClass().getName()));
84+
}
85+
vector.setValueCount(getPosition() + 1);
86+
}
87+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.arrow.vector.extension;
18+
19+
import org.apache.arrow.memory.BufferAllocator;
20+
import org.apache.arrow.vector.FieldVector;
21+
import org.apache.arrow.vector.types.pojo.ArrowType;
22+
import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
23+
import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry;
24+
import org.apache.arrow.vector.types.pojo.FieldType;
25+
26+
public final class VariantType extends ExtensionType {
27+
28+
public static final VariantType INSTANCE = new VariantType();
29+
30+
public static final String EXTENSION_NAME = "parquet.variant";
31+
32+
static {
33+
ExtensionTypeRegistry.register(INSTANCE);
34+
}
35+
36+
private VariantType() {}
37+
38+
@Override
39+
public ArrowType storageType() {
40+
return ArrowType.Struct.INSTANCE;
41+
}
42+
43+
@Override
44+
public String extensionName() {
45+
return EXTENSION_NAME;
46+
}
47+
48+
@Override
49+
public boolean extensionEquals(ExtensionType other) {
50+
return other instanceof VariantType;
51+
}
52+
53+
@Override
54+
public String serialize() {
55+
return "";
56+
}
57+
58+
@Override
59+
public ArrowType deserialize(ArrowType storageType, String serializedData) {
60+
if (!storageType.equals(this.storageType())) {
61+
throw new UnsupportedOperationException(
62+
"Cannot construct VariantType from underlying type " + storageType);
63+
}
64+
return INSTANCE;
65+
}
66+
67+
@Override
68+
public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
69+
return new VariantVector(name, allocator);
70+
}
71+
72+
@Override
73+
public boolean isComplex() {
74+
// The type itself is not complex meaning we need separate functions to convert/extract
75+
// different types.
76+
// Meanwhile, the containing vector is complex in terms of containing multiple values (metadata
77+
// and value)
78+
return false;
79+
}
80+
}

0 commit comments

Comments
 (0)