@@ -387,24 +387,31 @@ export class PostgresSyncRulesStorage
387
387
checkpoint : InternalOpId ,
388
388
dataBuckets : Map < string , InternalOpId > ,
389
389
options ?: storage . BucketDataBatchOptions
390
- ) : AsyncIterable < storage . SyncBucketDataBatch > {
390
+ ) : AsyncIterable < storage . SyncBucketDataChunk > {
391
391
if ( dataBuckets . size == 0 ) {
392
392
return ;
393
393
}
394
394
395
+ // Internal naming:
396
+ // We do a query for one "batch", which may be returend in multiple "chunks".
397
+ // Each chunk is limited to single bucket, and is limited in length and size.
398
+ // There are also overall batch length and size limits.
399
+ // Each batch query batch are streamed in separate sets of rows, which may or may
400
+ // not match up with chunks.
401
+
395
402
const end = checkpoint ?? BIGINT_MAX ;
396
403
const filters = Array . from ( dataBuckets . entries ( ) ) . map ( ( [ name , start ] ) => ( {
397
404
bucket_name : name ,
398
405
start : start
399
406
} ) ) ;
400
407
401
- const rowLimit = options ?. limit ?? storage . DEFAULT_DOCUMENT_BATCH_LIMIT ;
402
- const sizeLimit = options ?. chunkLimitBytes ?? storage . DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES ;
408
+ const batchRowLimit = options ?. limit ?? storage . DEFAULT_DOCUMENT_BATCH_LIMIT ;
409
+ const chunkSizeLimitBytes = options ?. chunkLimitBytes ?? storage . DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES ;
403
410
404
- let batchSize = 0 ;
405
- let currentBatch : utils . SyncBucketData | null = null ;
411
+ let chunkSizeBytes = 0 ;
412
+ let currentChunk : utils . SyncBucketData | null = null ;
406
413
let targetOp : InternalOpId | null = null ;
407
- let rowCount = 0 ;
414
+ let batchRowCount = 0 ;
408
415
409
416
/**
410
417
* It is possible to perform this query with JSONB join. e.g.
@@ -458,7 +465,7 @@ export class PostgresSyncRulesStorage
458
465
params : [
459
466
{ type : 'int4' , value : this . group_id } ,
460
467
{ type : 'int8' , value : end } ,
461
- { type : 'int4' , value : rowLimit + 1 } ,
468
+ { type : 'int4' , value : batchRowLimit } ,
462
469
...filters . flatMap ( ( f ) => [
463
470
{ type : 'varchar' as const , value : f . bucket_name } ,
464
471
{ type : 'int8' as const , value : f . start } satisfies StatementParam
@@ -469,28 +476,27 @@ export class PostgresSyncRulesStorage
469
476
470
477
for ( const row of decodedRows ) {
471
478
const { bucket_name } = row ;
472
- const rowSize = row . data ? row . data . length : 0 ;
473
-
474
- if (
475
- currentBatch == null ||
476
- currentBatch . bucket != bucket_name ||
477
- batchSize >= sizeLimit ||
478
- ( currentBatch ?. data . length && batchSize + rowSize > sizeLimit ) ||
479
- currentBatch . data . length >= rowLimit
480
- ) {
479
+ const rowSizeBytes = row . data ? row . data . length : 0 ;
480
+
481
+ const sizeExceeded =
482
+ chunkSizeBytes >= chunkSizeLimitBytes ||
483
+ ( currentChunk ?. data . length && chunkSizeBytes + rowSizeBytes > chunkSizeLimitBytes ) ||
484
+ ( currentChunk ?. data . length ?? 0 ) >= batchRowLimit ;
485
+
486
+ if ( currentChunk == null || currentChunk . bucket != bucket_name || sizeExceeded ) {
481
487
let start : string | undefined = undefined ;
482
- if ( currentBatch != null ) {
483
- if ( currentBatch . bucket == bucket_name ) {
484
- currentBatch . has_more = true ;
488
+ if ( currentChunk != null ) {
489
+ if ( currentChunk . bucket == bucket_name ) {
490
+ currentChunk . has_more = true ;
491
+ start = currentChunk . next_after ;
485
492
}
486
493
487
- const yieldBatch = currentBatch ;
488
- start = currentBatch . after ;
489
- currentBatch = null ;
490
- batchSize = 0 ;
491
- yield { batch : yieldBatch , targetOp : targetOp } ;
494
+ const yieldChunk = currentChunk ;
495
+ currentChunk = null ;
496
+ chunkSizeBytes = 0 ;
497
+ yield { chunkData : yieldChunk , targetOp : targetOp } ;
492
498
targetOp = null ;
493
- if ( rowCount >= rowLimit ) {
499
+ if ( batchRowCount >= batchRowLimit ) {
494
500
// We've yielded all the requested rows
495
501
break ;
496
502
}
@@ -503,11 +509,13 @@ export class PostgresSyncRulesStorage
503
509
}
504
510
start = internalToExternalOpId ( startOpId ) ;
505
511
}
506
- currentBatch = {
512
+ currentChunk = {
507
513
bucket : bucket_name ,
508
514
after : start ,
515
+ // this is updated when we yield the batch
509
516
has_more : false ,
510
517
data : [ ] ,
518
+ // this is updated incrementally
511
519
next_after : start
512
520
} ;
513
521
targetOp = null ;
@@ -527,20 +535,25 @@ export class PostgresSyncRulesStorage
527
535
}
528
536
}
529
537
530
- currentBatch . data . push ( entry ) ;
531
- currentBatch . next_after = entry . op_id ;
538
+ currentChunk . data . push ( entry ) ;
539
+ currentChunk . next_after = entry . op_id ;
532
540
533
- batchSize += rowSize ;
541
+ chunkSizeBytes += rowSizeBytes ;
534
542
535
543
// Manually track the total rows yielded
536
- rowCount ++ ;
544
+ batchRowCount ++ ;
537
545
}
538
546
}
539
547
540
- if ( currentBatch != null ) {
541
- const yieldBatch = currentBatch ;
542
- currentBatch = null ;
543
- yield { batch : yieldBatch , targetOp : targetOp } ;
548
+ if ( currentChunk != null ) {
549
+ const yieldChunk = currentChunk ;
550
+ currentChunk = null ;
551
+ // This is the final chunk in the batch.
552
+ // There may be more data if and only if the batch we retrieved isn't complete.
553
+ // If batchRowCount == batchRowLimit, we don't actually know whether there is more data,
554
+ // but it is safe to return true in that case.
555
+ yieldChunk . has_more = batchRowCount >= batchRowLimit ;
556
+ yield { chunkData : yieldChunk , targetOp : targetOp } ;
544
557
targetOp = null ;
545
558
}
546
559
}
0 commit comments