Skip to content

Commit bfaaec8

Browse files
committed
speed testing
1 parent 6589165 commit bfaaec8

File tree

2 files changed

+97
-24
lines changed

2 files changed

+97
-24
lines changed

core/build.gradle.kts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ dependencies {
8282
}
8383
testImplementation(libs.kotlin.scriptingJvm)
8484
testImplementation(libs.jsoup)
85+
86+
testImplementation("org.openjdk.jol:jol-core:0.10")
8587
}
8688

8789
val samplesImplementation by configurations.getting {

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnDataHolder.kt

Lines changed: 95 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,142 @@
11
package org.jetbrains.kotlinx.dataframe.columns
22

33
import org.jetbrains.kotlinx.dataframe.DataColumn
4+
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
5+
import org.jetbrains.kotlinx.dataframe.api.DataSchemaEnum
6+
import org.jetbrains.kotlinx.dataframe.api.add
7+
import org.jetbrains.kotlinx.dataframe.api.aggregate
8+
import org.jetbrains.kotlinx.dataframe.api.column
49
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
10+
import org.jetbrains.kotlinx.dataframe.api.fillNulls
511
import org.jetbrains.kotlinx.dataframe.api.filter
12+
import org.jetbrains.kotlinx.dataframe.api.groupBy
13+
import org.jetbrains.kotlinx.dataframe.api.print
14+
import org.jetbrains.kotlinx.dataframe.api.sortBy
15+
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
16+
import org.jetbrains.kotlinx.dataframe.api.with
617
import org.jetbrains.kotlinx.dataframe.columns.ColumnDataHolder.ColumnType.BOXED_ARRAY
18+
import org.jetbrains.kotlinx.dataframe.columns.ColumnDataHolder.ColumnType.BOXED_ARRAY_WITH_NULL
719
import org.jetbrains.kotlinx.dataframe.columns.ColumnDataHolder.ColumnType.DOUBLE_ARRAY
820
import org.jetbrains.kotlinx.dataframe.columns.ColumnDataHolder.ColumnType.LIST
21+
import org.jetbrains.kotlinx.dataframe.columns.ColumnDataHolder.ColumnType.LIST_WITH_NULL
22+
import org.jetbrains.kotlinx.dataframe.math.mean
923
import org.junit.Test
24+
import org.openjdk.jol.info.GraphLayout
1025
import kotlin.random.Random
1126
import kotlin.time.Duration
1227
import kotlin.time.measureTime
28+
import kotlin.time.measureTimedValue
1329

1430
class ColumnDataHolder {
1531

16-
enum class ColumnType {
17-
LIST,
18-
BOXED_ARRAY,
19-
DOUBLE_ARRAY,
32+
enum class ColumnType(override val value: String) : DataSchemaEnum {
33+
LIST("list"),
34+
LIST_WITH_NULL("list with null"),
35+
BOXED_ARRAY("boxed array"),
36+
BOXED_ARRAY_WITH_NULL("boxed array with null"),
37+
DOUBLE_ARRAY("double array"),
2038
}
2139

40+
@DataSchema
41+
data class Result(
42+
val type: ColumnType,
43+
val creationTime: Duration,
44+
val processingTime: Duration,
45+
val size: Long,
46+
)
47+
48+
// ⌌-------------------------------------------------------------------⌍
49+
// | | type| creation| processing| size|
50+
// |--|----------------------|-----------|------------|----------------|
51+
// | 0| BOXED_ARRAY_WITH_NULL| 1.668690ms| 40.072489ms| 14500481.813333|
52+
// | 1| LIST_WITH_NULL| 9.142612ms| 41.064332ms| 14509001.813333|
53+
// | 2| LIST| 2.710987ms| 42.268814ms| 11496455.760000|
54+
// | 3| BOXED_ARRAY| 2.415740ms| 42.270087ms| 11502541.520000|
55+
// | 4| DOUBLE_ARRAY| 1.840757ms| 42.354001ms| 11499172.666667|
56+
// ⌎-------------------------------------------------------------------⌏
2257
@Test
2358
fun `measuring speed of ColumnDataHolder creation`() {
2459
val size = 50_000
2560
val content = { i: Int -> Random.nextDouble() }
2661
val tests = buildList {
27-
repeat(2_000) {
62+
repeat(300) {
2863
add(LIST)
64+
add(LIST_WITH_NULL)
2965
add(BOXED_ARRAY)
66+
add(BOXED_ARRAY_WITH_NULL)
3067
add(DOUBLE_ARRAY)
3168
}
3269
}.shuffled()
3370

34-
val results = mapOf(
35-
LIST to mutableListOf<Duration>(),
36-
BOXED_ARRAY to mutableListOf(),
37-
DOUBLE_ARRAY to mutableListOf(),
38-
)
71+
val results = mutableListOf<Result>()
72+
73+
val a by column<Double>()
74+
val b by column<Double>()
75+
val c by column<Double>()
76+
val d by column<Double>()
3977

4078
for (test in tests) {
41-
val time = measureTime {
42-
val df = when (test) {
79+
val (df, time1) = measureTimedValue {
80+
when (test) {
4381
LIST -> dataFrameOf(
44-
DataColumn.createValueColumn("a", List(size, content)),
45-
DataColumn.createValueColumn("b", List(size, content)),
82+
DataColumn.createValueColumn(a.name(), List(size, content)),
83+
DataColumn.createValueColumn(b.name(), List(size, content)),
84+
DataColumn.createValueColumn(c.name(), List(size, content)),
85+
)
86+
87+
LIST_WITH_NULL -> dataFrameOf(
88+
DataColumn.createValueColumn(a.name(), List<Double?>(size - 1, content) + null),
89+
DataColumn.createValueColumn(b.name(), List<Double?>(size - 1, content) + null),
90+
DataColumn.createValueColumn(c.name(), List<Double?>(size - 1, content) + null),
4691
)
4792

4893
BOXED_ARRAY -> dataFrameOf(
49-
DataColumn.createValueColumn("a", Array(size, content)),
50-
DataColumn.createValueColumn("b", Array(size, content)),
94+
DataColumn.createValueColumn(a.name(), Array(size, content)),
95+
DataColumn.createValueColumn(b.name(), Array(size, content)),
96+
DataColumn.createValueColumn(c.name(), Array(size, content)),
97+
)
98+
99+
BOXED_ARRAY_WITH_NULL -> dataFrameOf(
100+
DataColumn.createValueColumn(a.name(), Array<Double?>(size - 1, content) + null),
101+
DataColumn.createValueColumn(b.name(), Array<Double?>(size - 1, content) + null),
102+
DataColumn.createValueColumn(c.name(), Array<Double?>(size - 1, content) + null),
51103
)
52104

53105
DOUBLE_ARRAY -> dataFrameOf(
54-
DataColumn.createValueColumn("a", DoubleArray(size, content)),
55-
DataColumn.createValueColumn("b", DoubleArray(size, content)),
106+
DataColumn.createValueColumn(a.name(), DoubleArray(size, content)),
107+
DataColumn.createValueColumn(b.name(), DoubleArray(size, content)),
108+
DataColumn.createValueColumn(c.name(), DoubleArray(size, content)),
56109
)
57110
}
111+
}
58112

59-
df.filter { "a"<Double>() > "b"<Double>() }
113+
val time2 = measureTime {
114+
df.fillNulls { a and b and c }.with { 0.0 }
115+
.filter { a() > b() }
116+
.add(d) { a() + b() + c() }
60117
}
61118

62-
results[test]!!.add(time)
63-
}
119+
val footprint = GraphLayout.parseInstance(df).toFootprint()
120+
val size = footprint.lines()
121+
.last { "total" in it }
122+
.split(" ")
123+
.mapNotNull { it.toLongOrNull() }
124+
.last()
64125

65-
println("Results:")
66-
results.forEach { (type, times) ->
67-
println("$type: ${times.mean()}")
126+
results += Result(test, time1, time2, size)
68127
}
128+
129+
results.toDataFrame()
130+
.groupBy { type }
131+
.aggregate {
132+
creationTime.toList().mean() into "creation"
133+
processingTime.toList().mean() into "processing"
134+
this.size.toList().mean() into "size"
135+
}
136+
.sortBy { "processing"() }
137+
.print(borders = true, title = true)
138+
139+
results
69140
}
70141

71142
fun Collection<Duration>.mean(): Duration = reduce { acc, duration -> acc + duration } / size

0 commit comments

Comments
 (0)