-
Notifications
You must be signed in to change notification settings - Fork 4
/
large_datasets_test.go
41 lines (32 loc) · 1.02 KB
/
large_datasets_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
package test
import (
"fmt"
"query-engine/datatypes"
"query-engine/execution"
. "query-engine/logicalplan"
"query-engine/physicalplan"
"testing"
"time"
)
func Test_execution_on_large_datasets(t *testing.T) {
t.SkipNow()
ctx := execution.NewCtx()
csv := ctx.CSV("./yellow_tripdata_2019-01.csv")
groupExpr := []LogicalExpr{NewCol("passenger_count")}
aggExpr := []AggregateExpr{NewMax(NewCast(NewCol("fare_amount"), datatypes.FloatType))}
df := csv.Aggregate(groupExpr, aggExpr)
originalLogicalPlan := df.LogicalPlan()
fmt.Printf("Logical Plan:\t%s\n", PrettyFormat(originalLogicalPlan))
ctx.Plan(df.LogicalPlan())
fmt.Printf("Optimized Physical Plan:\t%s\n", physicalplan.PrettyFormat(ctx.PhysicalPlan))
start := time.Now()
for ctx.Next() {
recordBatch := ctx.Execute()
fmt.Printf("Schema: %s\n", recordBatch.Schema)
fmt.Printf("CSV:\n%s\n", recordBatch.ToCSV())
}
end := time.Now()
// Query took 18.084590897s
// After removed gob: Query took 6.710639891s
fmt.Printf("Query took %s\n", end.Sub(start))
}