Skip to content

Commit 2bff8b2

Browse files
committed
introduced ColumnDataHolder to replace the List in DataColumnImpl. This interface can define how the data of columns is stored. ColumnDataHolderImpl was created as default implementation and it defaults to store data in primitive arrays whenever possible
1 parent 485f3ba commit 2bff8b2

File tree

8 files changed

+591
-26
lines changed

8 files changed

+591
-26
lines changed
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package org.jetbrains.kotlinx.dataframe
2+
3+
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnDataHolderImpl
4+
import kotlin.reflect.KType
5+
import kotlin.reflect.typeOf
6+
7+
public interface ColumnDataHolder<T> : Iterable<T> {
8+
9+
public val size: Int
10+
11+
public fun toSet(): Set<T>
12+
13+
public fun toList(): List<T>
14+
15+
public fun contains(value: T): Boolean
16+
17+
public operator fun get(index: Int): T
18+
19+
public operator fun get(range: IntRange): List<T>
20+
21+
public val distinct: Lazy<Set<T>>
22+
23+
public companion object
24+
}
25+
26+
public fun <T> Collection<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
27+
ColumnDataHolderImpl.of(this, type, distinct)
28+
29+
public inline fun <reified T> Collection<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
30+
this.toColumnDataHolder(typeOf<T>(), distinct)
31+
32+
public fun <T> Array<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
33+
ColumnDataHolderImpl.of(this, type, distinct)
34+
35+
public inline fun <reified T> Array<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
36+
this.toColumnDataHolder(typeOf<T>(), distinct)
37+
38+
public fun BooleanArray.asColumnDataHolder(distinct: Lazy<Set<Boolean>>? = null): ColumnDataHolder<Boolean> =
39+
ColumnDataHolderImpl.of(this, typeOf<Boolean>(), distinct)
40+
41+
public fun ByteArray.asColumnDataHolder(distinct: Lazy<Set<Byte>>? = null): ColumnDataHolder<Byte> =
42+
ColumnDataHolderImpl.of(this, typeOf<Byte>(), distinct)
43+
44+
public fun ShortArray.asColumnDataHolder(distinct: Lazy<Set<Short>>? = null): ColumnDataHolder<Short> =
45+
ColumnDataHolderImpl.of(this, typeOf<Short>(), distinct)
46+
47+
public fun IntArray.asColumnDataHolder(distinct: Lazy<Set<Int>>? = null): ColumnDataHolder<Int> =
48+
ColumnDataHolderImpl.of(this, typeOf<Int>(), distinct)
49+
50+
public fun LongArray.asColumnDataHolder(distinct: Lazy<Set<Long>>? = null): ColumnDataHolder<Long> =
51+
ColumnDataHolderImpl.of(this, typeOf<Long>(), distinct)
52+
53+
public fun FloatArray.asColumnDataHolder(distinct: Lazy<Set<Float>>? = null): ColumnDataHolder<Float> =
54+
ColumnDataHolderImpl.of(this, typeOf<Float>(), distinct)
55+
56+
public fun DoubleArray.asColumnDataHolder(distinct: Lazy<Set<Double>>? = null): ColumnDataHolder<Double> =
57+
ColumnDataHolderImpl.of(this, typeOf<Double>(), distinct)
58+
59+
public fun CharArray.asColumnDataHolder(distinct: Lazy<Set<Char>>? = null): ColumnDataHolder<Char> =
60+
ColumnDataHolderImpl.of(this, typeOf<Char>(), distinct)
61+
62+
public fun UByteArray.asColumnDataHolder(distinct: Lazy<Set<UByte>>? = null): ColumnDataHolder<UByte> =
63+
ColumnDataHolderImpl.of(this, typeOf<UByte>(), distinct)
64+
65+
public fun UShortArray.asColumnDataHolder(distinct: Lazy<Set<UShort>>? = null): ColumnDataHolder<UShort> =
66+
ColumnDataHolderImpl.of(this, typeOf<UShort>(), distinct)
67+
68+
public fun UIntArray.asColumnDataHolder(distinct: Lazy<Set<UInt>>? = null): ColumnDataHolder<UInt> =
69+
ColumnDataHolderImpl.of(this, typeOf<UInt>(), distinct)
70+
71+
public fun ULongArray.asColumnDataHolder(distinct: Lazy<Set<ULong>>? = null): ColumnDataHolder<ULong> =
72+
ColumnDataHolderImpl.of(this, typeOf<ULong>(), distinct)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
@file:OptIn(ExperimentalUnsignedTypes::class)
2+
3+
package org.jetbrains.kotlinx.dataframe.impl.columns
4+
5+
import org.jetbrains.kotlinx.dataframe.ColumnDataHolder
6+
import org.jetbrains.kotlinx.dataframe.impl.asList
7+
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveArray
8+
import kotlin.reflect.KType
9+
import kotlin.reflect.typeOf
10+
11+
internal class ColumnDataHolderImpl<T> private constructor(
12+
private val list: List<T>,
13+
distinct: Lazy<Set<T>>?,
14+
) : ColumnDataHolder<T> {
15+
16+
override val distinct = distinct ?: lazy { list.toSet() }
17+
override val size: Int get() = list.size
18+
19+
override fun toSet(): Set<T> = distinct.value
20+
override fun toList(): List<T> = list
21+
override fun get(index: Int): T = list[index]
22+
override fun get(range: IntRange): List<T> = list.subList(range.first, range.last + 1)
23+
override fun contains(value: T): Boolean = list.contains(value)
24+
override fun iterator(): Iterator<T> = list.iterator()
25+
26+
companion object {
27+
28+
/**
29+
* Constructs [ColumnDataHolderImpl] using an [asList] wrapper around the [list].
30+
*/
31+
@Suppress("UNCHECKED_CAST")
32+
internal fun <T> of(list: Collection<T>, type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> {
33+
if (list is ColumnDataHolder<*>) return list as ColumnDataHolder<T>
34+
35+
return try {
36+
when (type) {
37+
BOOLEAN -> ColumnDataHolderImpl((list as Collection<Boolean>).toBooleanArray().asList(), distinct)
38+
BYTE -> ColumnDataHolderImpl((list as Collection<Byte>).toByteArray().asList(), distinct)
39+
SHORT -> ColumnDataHolderImpl((list as Collection<Short>).toShortArray().asList(), distinct)
40+
INT -> ColumnDataHolderImpl((list as Collection<Int>).toIntArray().asList(), distinct)
41+
LONG -> ColumnDataHolderImpl((list as Collection<Long>).toLongArray().asList(), distinct)
42+
FLOAT -> ColumnDataHolderImpl((list as Collection<Float>).toFloatArray().asList(), distinct)
43+
DOUBLE -> ColumnDataHolderImpl((list as Collection<Double>).toDoubleArray().asList(), distinct)
44+
CHAR -> ColumnDataHolderImpl((list as Collection<Char>).toCharArray().asList(), distinct)
45+
UBYTE -> ColumnDataHolderImpl((list as Collection<UByte>).toUByteArray().asList(), distinct)
46+
USHORT -> ColumnDataHolderImpl((list as Collection<UShort>).toUShortArray().asList(), distinct)
47+
UINT -> ColumnDataHolderImpl((list as Collection<UInt>).toUIntArray().asList(), distinct)
48+
ULONG -> ColumnDataHolderImpl((list as Collection<ULong>).toULongArray().asList(), distinct)
49+
else -> ColumnDataHolderImpl(list.asList(), distinct)
50+
} as ColumnDataHolder<T>
51+
} catch (e: Exception) {
52+
throw IllegalArgumentException("Can't create ColumnDataHolder from $list and type $type", e)
53+
}
54+
}
55+
56+
/**
57+
* Constructs [ColumnDataHolderImpl] using an [asList] wrapper around the [array].
58+
* If [array] is an array of primitives, it will be converted to a primitive array first before being
59+
* wrapped with [asList].
60+
*/
61+
@Suppress("UNCHECKED_CAST")
62+
internal fun <T> of(array: Array<T>, type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
63+
try {
64+
when (type) {
65+
BOOLEAN -> ColumnDataHolderImpl((array as Array<Boolean>).toBooleanArray().asList(), distinct)
66+
BYTE -> ColumnDataHolderImpl((array as Array<Byte>).toByteArray().asList(), distinct)
67+
SHORT -> ColumnDataHolderImpl((array as Array<Short>).toShortArray().asList(), distinct)
68+
INT -> ColumnDataHolderImpl((array as Array<Int>).toIntArray().asList(), distinct)
69+
LONG -> ColumnDataHolderImpl((array as Array<Long>).toLongArray().asList(), distinct)
70+
FLOAT -> ColumnDataHolderImpl((array as Array<Float>).toFloatArray().asList(), distinct)
71+
DOUBLE -> ColumnDataHolderImpl((array as Array<Double>).toDoubleArray().asList(), distinct)
72+
CHAR -> ColumnDataHolderImpl((array as Array<Char>).toCharArray().asList(), distinct)
73+
UBYTE -> ColumnDataHolderImpl((array as Array<UByte>).toUByteArray().asList(), distinct)
74+
USHORT -> ColumnDataHolderImpl((array as Array<UShort>).toUShortArray().asList(), distinct)
75+
UINT -> ColumnDataHolderImpl((array as Array<UInt>).toUIntArray().asList(), distinct)
76+
ULONG -> ColumnDataHolderImpl((array as Array<ULong>).toULongArray().asList(), distinct)
77+
else -> ColumnDataHolderImpl(array.asList(), distinct)
78+
} as ColumnDataHolder<T>
79+
} catch (e: Exception) {
80+
throw IllegalArgumentException(
81+
"Can't create ColumnDataHolder from $array and mismatching type $type",
82+
e
83+
)
84+
}
85+
86+
/**
87+
* Constructs [ColumnDataHolderImpl] using an [asList] wrapper around the [primitiveArray].
88+
* [primitiveArray] must be an array of primitives, returns `null` if something goes wrong.
89+
*/
90+
@Suppress("UNCHECKED_CAST")
91+
internal fun <T> of(primitiveArray: Any, type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
92+
when {
93+
type == BOOLEAN && primitiveArray is BooleanArray ->
94+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
95+
96+
type == BYTE && primitiveArray is ByteArray ->
97+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
98+
99+
type == SHORT && primitiveArray is ShortArray ->
100+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
101+
102+
type == INT && primitiveArray is IntArray ->
103+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
104+
105+
type == LONG && primitiveArray is LongArray ->
106+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
107+
108+
type == FLOAT && primitiveArray is FloatArray ->
109+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
110+
111+
type == DOUBLE && primitiveArray is DoubleArray ->
112+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
113+
114+
type == CHAR && primitiveArray is CharArray ->
115+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
116+
117+
type == UBYTE && primitiveArray is UByteArray ->
118+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
119+
120+
type == USHORT && primitiveArray is UShortArray ->
121+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
122+
123+
type == UINT && primitiveArray is UIntArray ->
124+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
125+
126+
type == ULONG && primitiveArray is ULongArray ->
127+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
128+
129+
!primitiveArray.isPrimitiveArray ->
130+
throw IllegalArgumentException(
131+
"Can't create ColumnDataHolder from non primitive array $primitiveArray and type $type"
132+
)
133+
134+
else ->
135+
throw IllegalArgumentException(
136+
"Can't create ColumnDataHolder from primitive array $primitiveArray and type $type"
137+
)
138+
} as ColumnDataHolder<T>
139+
}
140+
}
141+
142+
private val BOOLEAN = typeOf<Boolean>()
143+
private val BYTE = typeOf<Byte>()
144+
private val SHORT = typeOf<Short>()
145+
private val INT = typeOf<Int>()
146+
private val LONG = typeOf<Long>()
147+
private val FLOAT = typeOf<Float>()
148+
private val DOUBLE = typeOf<Double>()
149+
private val CHAR = typeOf<Char>()
150+
private val UBYTE = typeOf<UByte>()
151+
private val USHORT = typeOf<UShort>()
152+
private val UINT = typeOf<UInt>()
153+
private val ULONG = typeOf<ULong>()
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package org.jetbrains.kotlinx.dataframe
2+
3+
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnDataHolderImpl
4+
import kotlin.reflect.KType
5+
import kotlin.reflect.typeOf
6+
7+
public interface ColumnDataHolder<T> : Iterable<T> {
8+
9+
public val size: Int
10+
11+
public fun toSet(): Set<T>
12+
13+
public fun toList(): List<T>
14+
15+
public fun contains(value: T): Boolean
16+
17+
public operator fun get(index: Int): T
18+
19+
public operator fun get(range: IntRange): List<T>
20+
21+
public val distinct: Lazy<Set<T>>
22+
23+
public companion object
24+
}
25+
26+
public fun <T> Collection<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
27+
ColumnDataHolderImpl.of(this, type, distinct)
28+
29+
public inline fun <reified T> Collection<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
30+
this.toColumnDataHolder(typeOf<T>(), distinct)
31+
32+
public fun <T> Array<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
33+
ColumnDataHolderImpl.of(this, type, distinct)
34+
35+
public inline fun <reified T> Array<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
36+
this.toColumnDataHolder(typeOf<T>(), distinct)
37+
38+
public fun BooleanArray.asColumnDataHolder(distinct: Lazy<Set<Boolean>>? = null): ColumnDataHolder<Boolean> =
39+
ColumnDataHolderImpl.of(this, typeOf<Boolean>(), distinct)
40+
41+
public fun ByteArray.asColumnDataHolder(distinct: Lazy<Set<Byte>>? = null): ColumnDataHolder<Byte> =
42+
ColumnDataHolderImpl.of(this, typeOf<Byte>(), distinct)
43+
44+
public fun ShortArray.asColumnDataHolder(distinct: Lazy<Set<Short>>? = null): ColumnDataHolder<Short> =
45+
ColumnDataHolderImpl.of(this, typeOf<Short>(), distinct)
46+
47+
public fun IntArray.asColumnDataHolder(distinct: Lazy<Set<Int>>? = null): ColumnDataHolder<Int> =
48+
ColumnDataHolderImpl.of(this, typeOf<Int>(), distinct)
49+
50+
public fun LongArray.asColumnDataHolder(distinct: Lazy<Set<Long>>? = null): ColumnDataHolder<Long> =
51+
ColumnDataHolderImpl.of(this, typeOf<Long>(), distinct)
52+
53+
public fun FloatArray.asColumnDataHolder(distinct: Lazy<Set<Float>>? = null): ColumnDataHolder<Float> =
54+
ColumnDataHolderImpl.of(this, typeOf<Float>(), distinct)
55+
56+
public fun DoubleArray.asColumnDataHolder(distinct: Lazy<Set<Double>>? = null): ColumnDataHolder<Double> =
57+
ColumnDataHolderImpl.of(this, typeOf<Double>(), distinct)
58+
59+
public fun CharArray.asColumnDataHolder(distinct: Lazy<Set<Char>>? = null): ColumnDataHolder<Char> =
60+
ColumnDataHolderImpl.of(this, typeOf<Char>(), distinct)
61+
62+
public fun UByteArray.asColumnDataHolder(distinct: Lazy<Set<UByte>>? = null): ColumnDataHolder<UByte> =
63+
ColumnDataHolderImpl.of(this, typeOf<UByte>(), distinct)
64+
65+
public fun UShortArray.asColumnDataHolder(distinct: Lazy<Set<UShort>>? = null): ColumnDataHolder<UShort> =
66+
ColumnDataHolderImpl.of(this, typeOf<UShort>(), distinct)
67+
68+
public fun UIntArray.asColumnDataHolder(distinct: Lazy<Set<UInt>>? = null): ColumnDataHolder<UInt> =
69+
ColumnDataHolderImpl.of(this, typeOf<UInt>(), distinct)
70+
71+
public fun ULongArray.asColumnDataHolder(distinct: Lazy<Set<ULong>>? = null): ColumnDataHolder<ULong> =
72+
ColumnDataHolderImpl.of(this, typeOf<ULong>(), distinct)

0 commit comments

Comments
 (0)