Skip to content

Commit e21b8d5

Browse files
committed
Make _merge_blocks lazy
1 parent c3bace8 commit e21b8d5

File tree

1 file changed

+11
-20
lines changed

1 file changed

+11
-20
lines changed

pandas/core/internals/managers.py

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1938,7 +1938,7 @@ def _consolidate_check(self) -> None:
19381938

19391939
def _consolidate_inplace(self) -> None:
19401940
if not self.is_consolidated():
1941-
self.blocks = _consolidate(self.blocks)
1941+
self.blocks = tuple(_consolidate(self.blocks))
19421942
self._is_consolidated = True
19431943
self._known_consolidated = True
19441944
self._rebuild_blknos_and_blklocs()
@@ -2430,30 +2430,21 @@ def _stack_arrays(tuples, dtype: np.dtype):
24302430
return stacked, placement
24312431

24322432

2433-
def _consolidate(blocks: tuple[Block, ...]) -> tuple[Block, ...]:
2433+
def _consolidate(blocks: tuple[Block, ...]) -> Generator[Block]:
24342434
"""
24352435
Merge blocks having same dtype, exclude non-consolidating blocks
24362436
"""
24372437
# sort by _can_consolidate, dtype
24382438
gkey = lambda x: x._consolidate_key
24392439
grouper = itertools.groupby(sorted(blocks, key=gkey), gkey)
2440-
2441-
new_blocks: list[Block] = []
2442-
for (_can_consolidate, dtype), group_blocks in grouper:
2443-
merged_blocks, _ = _merge_blocks(
2444-
list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate
2445-
)
2446-
new_blocks = extend_blocks(merged_blocks, new_blocks)
2447-
return tuple(new_blocks)
2440+
for (_can_consolidate, _), group_blocks in grouper:
2441+
yield from _merge_blocks(tuple(group_blocks), can_consolidate=_can_consolidate)
24482442

24492443

2450-
def _merge_blocks(
2451-
blocks: list[Block], dtype: DtypeObj, can_consolidate: bool
2452-
) -> tuple[list[Block], bool]:
2444+
def _merge_blocks(blocks: tuple[Block], can_consolidate: bool) -> Generator[Block]:
24532445
if len(blocks) == 1:
2454-
return blocks, False
2455-
2456-
if can_consolidate:
2446+
yield from blocks
2447+
elif can_consolidate:
24572448
# TODO: optimization potential in case all mgrs contain slices and
24582449
# combination of those slices is a slice, too.
24592450
new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks])
@@ -2476,10 +2467,10 @@ def _merge_blocks(
24762467
new_mgr_locs = new_mgr_locs[argsort]
24772468

24782469
bp = BlockPlacement(new_mgr_locs)
2479-
return [new_block_2d(new_values, placement=bp)], True
2480-
2481-
# can't consolidate --> no merge
2482-
return blocks, False
2470+
yield new_block_2d(new_values, placement=bp)
2471+
else:
2472+
# can't consolidate --> no merge
2473+
yield from blocks
24832474

24842475

24852476
def _preprocess_slice_or_indexer(

0 commit comments

Comments
 (0)