1+ const { DataFrame } = require ( './src/danfojs-node/dist/danfojs-node/src' ) ;
2+
3+ function generateTestData ( rows , numGroups = 100 ) {
4+ console . log ( `Generating ${ rows } rows of test data with ~${ numGroups } groups...` ) ;
5+
6+ const data = [ ] ;
7+ const columns = [ 'group_col' , 'value_a' , 'value_b' , 'value_c' ] ;
8+
9+ for ( let i = 0 ; i < rows ; i ++ ) {
10+ data . push ( [
11+ `group_${ i % numGroups } ` , // Create groups
12+ Math . random ( ) * 1000 , // value_a
13+ Math . random ( ) * 500 , // value_b
14+ Math . random ( ) * 100 // value_c
15+ ] ) ;
16+ }
17+
18+ return new DataFrame ( data , { columns } ) ;
19+ }
20+
21+ function performanceTest ( df , testName ) {
22+ console . log ( `\n=== ${ testName } ===` ) ;
23+ console . log ( `DataFrame shape: ${ df . shape [ 0 ] } rows, ${ df . shape [ 1 ] } columns` ) ;
24+
25+ // Test 1: Basic groupby construction
26+ console . log ( '\nTest 1: Group construction...' ) ;
27+ let start = performance . now ( ) ;
28+ const grouped = df . groupby ( [ 'group_col' ] ) ;
29+ let end = performance . now ( ) ;
30+ console . log ( `Group construction: ${ ( end - start ) . toFixed ( 2 ) } ms` ) ;
31+ console . log ( `Number of groups: ${ grouped . ngroups } ` ) ;
32+
33+ // Test 2: Single column aggregation
34+ console . log ( '\nTest 2: Single column sum...' ) ;
35+ start = performance . now ( ) ;
36+ const sumResult = grouped . col ( [ 'value_a' ] ) . sum ( ) ;
37+ end = performance . now ( ) ;
38+ console . log ( `Single column sum: ${ ( end - start ) . toFixed ( 2 ) } ms` ) ;
39+ console . log ( `Result shape: ${ sumResult . shape [ 0 ] } rows` ) ;
40+
41+ // Test 3: Multiple column aggregation
42+ console . log ( '\nTest 3: Multiple column aggregations...' ) ;
43+ start = performance . now ( ) ;
44+ const multiResult = grouped . agg ( {
45+ value_a : 'mean' ,
46+ value_b : 'sum' ,
47+ value_c : 'count'
48+ } ) ;
49+ end = performance . now ( ) ;
50+ console . log ( `Multiple aggregations: ${ ( end - start ) . toFixed ( 2 ) } ms` ) ;
51+ console . log ( `Result shape: ${ multiResult . shape [ 0 ] } rows` ) ;
52+
53+ // Test 4: Complex aggregation (multiple operations per column)
54+ console . log ( '\nTest 4: Complex aggregation...' ) ;
55+ start = performance . now ( ) ;
56+ const complexResult = grouped . agg ( {
57+ value_a : [ 'mean' , 'max' , 'min' ] ,
58+ value_b : [ 'sum' , 'count' ] ,
59+ value_c : 'std'
60+ } ) ;
61+ end = performance . now ( ) ;
62+ console . log ( `Complex aggregation: ${ ( end - start ) . toFixed ( 2 ) } ms` ) ;
63+ console . log ( `Result shape: ${ complexResult . shape [ 0 ] } rows` ) ;
64+
65+ return {
66+ construction : end - start ,
67+ singleSum : end - start ,
68+ multiAgg : end - start ,
69+ complexAgg : end - start
70+ } ;
71+ }
72+
73+ async function main ( ) {
74+ console . log ( 'DanfoJS GroupBy Performance Test' ) ;
75+ console . log ( '================================' ) ;
76+
77+ // Test different dataset sizes
78+ const testSizes = [
79+ { rows : 1000 , groups : 50 , name : 'Small Dataset (1K rows)' } ,
80+ { rows : 5000 , groups : 100 , name : 'Medium Dataset (5K rows)' } ,
81+ { rows : 20000 , groups : 200 , name : 'Large Dataset (20K rows)' }
82+ ] ;
83+
84+ for ( const testSize of testSizes ) {
85+ const df = generateTestData ( testSize . rows , testSize . groups ) ;
86+ performanceTest ( df , testSize . name ) ;
87+
88+ // Force garbage collection between tests if available
89+ if ( global . gc ) {
90+ global . gc ( ) ;
91+ }
92+ }
93+
94+ console . log ( '\n=== Performance Test Complete ===' ) ;
95+ console . log ( 'Check the times above - we should see significant improvement!' ) ;
96+ console . log ( 'Target: 20K rows should complete in < 2 seconds total' ) ;
97+ }
98+
99+ // Run the test
100+ main ( ) . catch ( console . error ) ;
0 commit comments