@@ -315,57 +315,66 @@ class TBlockStorage : public TComputationValue<TBlockStorage> {
315
315
316
316
TBlockStorage (
317
317
TMemoryUsageInfo* memInfo,
318
- const TVector<TType*>& itemTypes,
319
- NUdf::TUnboxedValue stream,
318
+ const TVector<TType*>& types,
319
+ size_t blockLengthIndex,
320
+ NUdf::TUnboxedValue listIter,
320
321
TStringBuf resourceTag,
321
322
arrow::MemoryPool* pool
322
323
)
323
324
: TBase(memInfo)
324
- , InputsDescr_(ToValueDescr(itemTypes))
325
- , Stream_(std::move(stream))
326
- , Inputs_(itemTypes.size())
325
+ , InputsDescr_(ToValueDescr(types))
326
+ , Readers_(types.size())
327
+ , Hashers_(types.size())
328
+ , Comparators_(types.size())
329
+ , Trimmers_(types.size())
330
+ , ListIter_(std::move(listIter))
331
+ , BlockLengthIndex_(blockLengthIndex)
327
332
, ResourceTag_(std::move(resourceTag))
328
333
{
329
334
TBlockTypeHelper helper;
330
- for (size_t i = 0 ; i < itemTypes.size (); i++) {
331
- TType* blockItemType = AS_TYPE (TBlockType, itemTypes[i])->GetItemType ();
332
- Readers_.push_back (MakeBlockReader (TTypeInfoHelper (), blockItemType));
333
- Hashers_.push_back (helper.MakeHasher (blockItemType));
334
- Comparators_.push_back (helper.MakeComparator (blockItemType));
335
- Trimmers_.push_back (MakeBlockTrimmer (TTypeInfoHelper (), blockItemType, pool));
335
+ for (size_t i = 0 ; i < types.size (); i++) {
336
+ if (i == BlockLengthIndex_) {
337
+ continue ;
338
+ }
339
+
340
+ TType* blockItemType = AS_TYPE (TBlockType, types[i])->GetItemType ();
341
+ Readers_[i] = MakeBlockReader (TTypeInfoHelper (), blockItemType);
342
+ Hashers_[i] = helper.MakeHasher (blockItemType);
343
+ Comparators_[i] = helper.MakeComparator (blockItemType);
344
+ Trimmers_[i] = MakeBlockTrimmer (TTypeInfoHelper (), blockItemType, pool);
336
345
}
337
346
}
338
347
339
- NUdf::EFetchStatus FetchStream () {
340
- switch (Stream_.WideFetch (Inputs_.data (), Inputs_.size ())) {
341
- case NUdf::EFetchStatus::Yield:
342
- return NUdf::EFetchStatus::Yield;
343
- case NUdf::EFetchStatus::Finish:
348
+ bool FetchNextBlock () {
349
+ if (!ListIter_.Next (Block_)) {
344
350
IsFinished_ = true ;
345
- return NUdf::EFetchStatus::Finish;
346
- case NUdf::EFetchStatus::Ok:
347
- break ;
351
+ return false ;
348
352
}
353
+ BlockItems_ = Block_.GetElements ();
349
354
350
355
Y_ENSURE (!IsFinished_, " Got data on finished stream" );
351
356
352
- std::vector<arrow::Datum> blockColumns;
353
- for (size_t i = 0 ; i < Inputs_.size () - 1 ; i++) {
354
- auto & datum = TArrowBlock::From (Inputs_[i]).GetDatum ();
357
+ std::vector<arrow::Datum> blockColumns (Readers_.size ());
358
+ for (size_t i = 0 ; i < Readers_.size (); i++) {
359
+ if (i == BlockLengthIndex_) {
360
+ continue ;
361
+ }
362
+
363
+ auto & datum = TArrowBlock::From (BlockItems_[i]).GetDatum ();
355
364
ARROW_DEBUG_CHECK_DATUM_TYPES (InputsDescr_[i], datum.descr ());
356
365
if (datum.is_scalar ()) {
357
- blockColumns. push_back ( datum) ;
366
+ blockColumns[i] = datum;
358
367
} else {
359
368
MKQL_ENSURE (datum.is_array (), " Expecting array" );
360
- blockColumns. push_back ( Trimmers_[i]->Trim (datum.array () ));
369
+ blockColumns[i] = Trimmers_[i]->Trim (datum.array ());
361
370
}
362
371
}
363
372
364
- auto blockSize = ::GetBlockCount (Inputs_[Inputs_. size () - 1 ]);
373
+ auto blockSize = ::GetBlockCount (BlockItems_[BlockLengthIndex_ ]);
365
374
Data_.emplace_back (blockSize, std::move (blockColumns));
366
375
RowCount_ += blockSize;
367
376
368
- return NUdf::EFetchStatus::Ok ;
377
+ return true ;
369
378
}
370
379
371
380
const TBlock& GetBlock (size_t blockOffset) const {
@@ -392,11 +401,11 @@ class TBlockStorage : public TComputationValue<TBlockStorage> {
392
401
}
393
402
394
403
TBlockItem GetItem (TRowEntry entry, ui32 columnIdx) const {
395
- Y_ENSURE (columnIdx < Inputs_.size () - 1 );
396
404
return GetItemFromBlock (GetBlock (entry.BlockOffset ), columnIdx, entry.ItemOffset );
397
405
}
398
406
399
407
TBlockItem GetItemFromBlock (const TBlock& block, ui32 columnIdx, size_t offset) const {
408
+ Y_ENSURE (columnIdx < Readers_.size () && columnIdx != BlockLengthIndex_);
400
409
Y_ENSURE (offset < block.Size );
401
410
const auto & datum = block.Columns [columnIdx];
402
411
if (datum.is_scalar ()) {
@@ -447,8 +456,11 @@ class TBlockStorage : public TComputationValue<TBlockStorage> {
447
456
size_t RowCount_ = 0 ;
448
457
bool IsFinished_ = false ;
449
458
450
- NUdf::TUnboxedValue Stream_;
451
- TUnboxedValueVector Inputs_;
459
+ NUdf::TUnboxedValue ListIter_;
460
+ NUdf::TUnboxedValue Block_;
461
+ const NUdf::TUnboxedValue* BlockItems_ = nullptr ;
462
+
463
+ size_t BlockLengthIndex_ = 0 ;
452
464
453
465
const TStringBuf ResourceTag_;
454
466
};
@@ -459,33 +471,44 @@ class TBlockStorageWrapper : public TMutableComputationNode<TBlockStorageWrapper
459
471
public:
460
472
TBlockStorageWrapper (
461
473
TComputationMutables& mutables,
462
- TVector<TType*>&& itemTypes ,
463
- IComputationNode* stream ,
474
+ TStructType* structType ,
475
+ IComputationNode* list ,
464
476
const TStringBuf& resourceTag
465
477
)
466
478
: TBaseComputation(mutables, EValueRepresentation::Boxed)
467
- , ItemTypes_(std::move(itemTypes))
468
- , Stream_(stream)
479
+ , List_(list)
469
480
, ResourceTag_(resourceTag)
470
- {}
481
+ {
482
+ for (size_t i = 0 ; i < structType->GetMembersCount (); i++) {
483
+ if (structType->GetMemberName (i) == NYql::BlockLengthColumnName) {
484
+ BlockLengthIndex_ = i;
485
+ Types_.push_back (nullptr );
486
+ continue ;
487
+ }
488
+ Types_.push_back (structType->GetMemberType (i));
489
+ }
490
+ }
471
491
472
492
NUdf::TUnboxedValuePod DoCalculate (TComputationContext& ctx) const {
473
493
return ctx.HolderFactory .Create <TBlockStorage>(
474
- ItemTypes_,
475
- std::move (Stream_->GetValue (ctx)),
494
+ Types_,
495
+ BlockLengthIndex_,
496
+ List_->GetValue (ctx).GetListIterator (),
476
497
ResourceTag_,
477
498
&ctx.ArrowMemoryPool
478
499
);
479
500
}
480
501
481
502
private:
482
503
void RegisterDependencies () const final {
483
- DependsOn (Stream_ );
504
+ DependsOn (List_ );
484
505
}
485
506
486
507
private:
487
- const TVector<TType*> ItemTypes_;
488
- IComputationNode* const Stream_;
508
+ TVector<TType*> Types_;
509
+ size_t BlockLengthIndex_ = 0 ;
510
+
511
+ IComputationNode* const List_;
489
512
490
513
const TString ResourceTag_;
491
514
};
@@ -992,17 +1015,13 @@ class TBlockMapJoinCoreWraper : public TMutableComputationNode<TBlockMapJoinCore
992
1015
auto & indexState = *static_cast <TIndexState*>(RightBlockIndex_.GetResource ());
993
1016
auto & storageState = *static_cast <TStorageState*>(indexState.GetBlockStorage ().GetResource ());
994
1017
995
- if (!RightStreamConsumed_) {
996
- auto fetchStatus = NUdf::EFetchStatus::Ok;
997
- while (fetchStatus != NUdf::EFetchStatus::Finish) {
998
- fetchStatus = storageState.FetchStream ();
999
- if (fetchStatus == NUdf::EFetchStatus::Yield) {
1000
- return NUdf::EFetchStatus::Yield;
1001
- }
1018
+ if (!RightInputConsumed_) {
1019
+ while (storageState.FetchNextBlock ()) {
1020
+ // Fetch entire data from the right input
1002
1021
}
1003
1022
1004
1023
indexState.BuildIndex ();
1005
- RightStreamConsumed_ = true ;
1024
+ RightInputConsumed_ = true ;
1006
1025
}
1007
1026
1008
1027
auto * inputFields = joinState.GetRawInputFields ();
@@ -1104,7 +1123,7 @@ class TBlockMapJoinCoreWraper : public TMutableComputationNode<TBlockMapJoinCore
1104
1123
const TVector<ui32>& LeftKeyColumns_;
1105
1124
1106
1125
const TVector<ui32>& RightIOMap_;
1107
- bool RightStreamConsumed_ = false ;
1126
+ bool RightInputConsumed_ = false ;
1108
1127
1109
1128
std::array<typename TIndexState::TIterator, PrefetchBatchSize> LookupBatchIterators_;
1110
1129
ui32 LookupBatchCurrent_ = 0 ;
@@ -1204,16 +1223,12 @@ class TBlockCrossJoinCoreWraper : public TMutableComputationNode<TBlockCrossJoin
1204
1223
auto & joinState = *static_cast <TJoinState*>(JoinState_.AsBoxed ().Get ());
1205
1224
auto & storageState = *static_cast <TStorageState*>(RightBlockStorage_.GetResource ());
1206
1225
1207
- if (!RightStreamConsumed_) {
1208
- auto fetchStatus = NUdf::EFetchStatus::Ok;
1209
- while (fetchStatus != NUdf::EFetchStatus::Finish) {
1210
- fetchStatus = storageState.FetchStream ();
1211
- if (fetchStatus == NUdf::EFetchStatus::Yield) {
1212
- return NUdf::EFetchStatus::Yield;
1213
- }
1226
+ if (!RightInputConsumed_) {
1227
+ while (storageState.FetchNextBlock ()) {
1228
+ // Fetch entire data from the right input
1214
1229
}
1215
1230
1216
- RightStreamConsumed_ = true ;
1231
+ RightInputConsumed_ = true ;
1217
1232
RightRowIterator_ = storageState.GetRowIterator ();
1218
1233
}
1219
1234
@@ -1270,7 +1285,7 @@ class TBlockCrossJoinCoreWraper : public TMutableComputationNode<TBlockCrossJoin
1270
1285
NUdf::TUnboxedValue JoinState_;
1271
1286
1272
1287
const TVector<ui32>& RightIOMap_;
1273
- bool RightStreamConsumed_ = false ;
1288
+ bool RightInputConsumed_ = false ;
1274
1289
1275
1290
TStorageState::TRowIterator RightRowIterator_;
1276
1291
@@ -1310,19 +1325,15 @@ IComputationNode* WrapBlockStorage(TCallable& callable, const TComputationNodeFa
1310
1325
MKQL_ENSURE (resultResourceType->GetTag ().StartsWith (BlockStorageResourcePrefix), " Expected block storage resource" );
1311
1326
1312
1327
const auto inputType = callable.GetInput (0 ).GetStaticType ();
1313
- MKQL_ENSURE (inputType->IsStream (), " Expected WideStream as an input stream" );
1314
- const auto inputStreamType = AS_TYPE (TStreamType, inputType);
1315
- MKQL_ENSURE (inputStreamType->GetItemType ()->IsMulti (),
1316
- " Expected Multi as a left stream item type" );
1317
- const auto inputStreamComponents = GetWideComponents (inputStreamType);
1318
- MKQL_ENSURE (inputStreamComponents.size () > 0 , " Expected at least one column" );
1319
- TVector<TType*> inputStreamItems (inputStreamComponents.cbegin (), inputStreamComponents.cend ());
1328
+ MKQL_ENSURE (inputType->IsList (), " Expected List as an input stream" );
1329
+ const auto inputItemType = AS_TYPE (TListType, inputType)->GetItemType ();;
1330
+ MKQL_ENSURE (inputItemType->IsStruct (), " Expected Struct as a list item type" );
1320
1331
1321
- const auto inputStream = LocateNode (ctx.NodeLocator , callable, 0 );
1332
+ const auto list = LocateNode (ctx.NodeLocator , callable, 0 );
1322
1333
return new TBlockStorageWrapper (
1323
1334
ctx.Mutables ,
1324
- std::move (inputStreamItems ),
1325
- inputStream ,
1335
+ AS_TYPE (TStructType, inputItemType ),
1336
+ list ,
1326
1337
resultResourceType->GetTag ()
1327
1338
);
1328
1339
}
@@ -1341,9 +1352,9 @@ IComputationNode* WrapBlockMapJoinIndex(TCallable& callable, const TComputationN
1341
1352
MKQL_ENSURE (inputResourceType->GetTag ().StartsWith (BlockStorageResourcePrefix), " Expected block storage resource" );
1342
1353
1343
1354
auto origInputItemType = AS_VALUE (TTypeType, callable.GetInput (1 ));
1344
- MKQL_ENSURE (origInputItemType->IsMulti (), " Expected Multi as an input item type" );
1345
- const auto streamComponents = AS_TYPE (TMultiType , origInputItemType)-> GetElements ( );
1346
- MKQL_ENSURE (streamComponents. size () > 0 , " Expected at least one column" );
1355
+ MKQL_ENSURE (origInputItemType->IsStruct (), " Expected Struct as an input item type" );
1356
+ const auto origInputItemStructType = AS_TYPE (TStructType , origInputItemType);
1357
+ MKQL_ENSURE (origInputItemStructType-> GetMembersCount () > 0 , " Expected at least one column" );
1347
1358
1348
1359
const auto keyColumnsLiteral = callable.GetInput (2 );
1349
1360
const auto keyColumnsTuple = AS_VALUE (TTupleLiteral, keyColumnsLiteral);
@@ -1355,7 +1366,7 @@ IComputationNode* WrapBlockMapJoinIndex(TCallable& callable, const TComputationN
1355
1366
}
1356
1367
1357
1368
for (ui32 keyColumn : keyColumns) {
1358
- MKQL_ENSURE (keyColumn < streamComponents. size () - 1 , " Key column out of range" );
1369
+ MKQL_ENSURE (keyColumn < origInputItemStructType-> GetMembersCount () , " Key column out of range" );
1359
1370
}
1360
1371
1361
1372
const auto anyNode = callable.GetInput (3 );
@@ -1408,10 +1419,9 @@ IComputationNode* WrapBlockMapJoinCore(TCallable& callable, const TComputationNo
1408
1419
}
1409
1420
1410
1421
auto origRightItemType = AS_VALUE (TTypeType, callable.GetInput (2 ));
1411
- MKQL_ENSURE (origRightItemType->IsMulti (), " Expected Multi as a right stream item type" );
1412
- const auto rightStreamComponents = AS_TYPE (TMultiType, origRightItemType)->GetElements ();
1413
- MKQL_ENSURE (rightStreamComponents.size () > 0 , " Expected at least one column" );
1414
- const TVector<TType*> rightStreamItems (rightStreamComponents.cbegin (), rightStreamComponents.cend ());
1422
+ MKQL_ENSURE (origRightItemType->IsStruct (), " Expected Struct as a right stream item type" );
1423
+ const auto origRightItemStructType = AS_TYPE (TStructType, origRightItemType);
1424
+ MKQL_ENSURE (origRightItemStructType->GetMembersCount () > 0 , " Expected at least one column" );
1415
1425
1416
1426
const auto leftKeyColumnsLiteral = callable.GetInput (4 );
1417
1427
const auto leftKeyColumnsTuple = AS_VALUE (TTupleLiteral, leftKeyColumnsLiteral);
@@ -1479,8 +1489,8 @@ IComputationNode* WrapBlockMapJoinCore(TCallable& callable, const TComputationNo
1479
1489
// XXX: Mind the last wide item, containing block length.
1480
1490
TVector<ui32> rightIOMap;
1481
1491
if (joinKind == EJoinKind::Inner || joinKind == EJoinKind::Left || joinKind == EJoinKind::Cross) {
1482
- for (size_t i = 0 ; i < rightStreamItems. size () - 1 ; i++) {
1483
- if (rightKeyDrops.contains (i)) {
1492
+ for (size_t i = 0 ; i < origRightItemStructType-> GetMembersCount () ; i++) {
1493
+ if (rightKeyDrops.contains (i) || origRightItemStructType-> GetMemberName (i) == NYql::BlockLengthColumnName ) {
1484
1494
continue ;
1485
1495
}
1486
1496
rightIOMap.push_back (i);
0 commit comments