Skip to content

Commit 432cd81

Browse files
sumedhsakdeoclaude
andcommitted
Update tests for new ArrivalOrder batch_size API
- Update benchmark tests to use simplified parameter structure - Remove separate batch_size parameter from test calls - Fix concurrent_streams validation error message in unit tests - Maintain all existing test coverage and functionality 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent d93526e commit 432cd81

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

tests/benchmark/test_read_benchmark.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -85,26 +85,27 @@ def benchmark_table(tmp_path_factory: pytest.TempPathFactory) -> Table:
8585

8686

8787
@pytest.mark.parametrize(
88-
"order,batch_size",
88+
"order",
8989
[
90-
pytest.param(TaskOrder(), None, id="default"),
91-
pytest.param(ArrivalOrder(concurrent_streams=1), None, id="arrival-cf1"),
92-
pytest.param(ArrivalOrder(concurrent_streams=2), None, id="arrival-cf2"),
93-
pytest.param(ArrivalOrder(concurrent_streams=4), None, id="arrival-cf4"),
94-
pytest.param(ArrivalOrder(concurrent_streams=8), None, id="arrival-cf8"),
95-
pytest.param(ArrivalOrder(concurrent_streams=16), None, id="arrival-cf16"),
90+
pytest.param(TaskOrder(), id="default"),
91+
pytest.param(ArrivalOrder(concurrent_streams=1), id="arrival-cf1"),
92+
pytest.param(ArrivalOrder(concurrent_streams=2), id="arrival-cf2"),
93+
pytest.param(ArrivalOrder(concurrent_streams=4), id="arrival-cf4"),
94+
pytest.param(ArrivalOrder(concurrent_streams=8), id="arrival-cf8"),
95+
pytest.param(ArrivalOrder(concurrent_streams=16), id="arrival-cf16"),
9696
],
9797
)
9898
def test_read_throughput(
9999
benchmark_table: Table,
100100
order: ScanOrder,
101-
batch_size: int | None,
102101
) -> None:
103102
"""Measure records/sec, time to first record, and peak Arrow memory for a scan configuration."""
104-
effective_batch_size = batch_size or 131_072 # PyArrow default
103+
# Determine effective batch_size for display and memory calculation
105104
if isinstance(order, ArrivalOrder):
105+
effective_batch_size = order.batch_size or 131_072 # PyArrow default
106106
config_str = f"order=ARRIVAL, concurrent_streams={order.concurrent_streams}, batch_size={effective_batch_size}"
107107
else:
108+
effective_batch_size = 131_072 # PyArrow default (TaskOrder doesn't control batch_size directly)
108109
config_str = f"order=TASK (executor.map, all files parallel), batch_size={effective_batch_size}"
109110
print("\n--- ArrowScan Read Throughput Benchmark ---")
110111
print(f"Config: {config_str}")
@@ -126,7 +127,6 @@ def test_read_throughput(
126127
total_rows = 0
127128
first_batch_time = None
128129
for batch in benchmark_table.scan().to_arrow_batch_reader(
129-
batch_size=batch_size,
130130
order=order,
131131
):
132132
if first_batch_time is None:

tests/io/test_pyarrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3320,7 +3320,7 @@ def test_concurrent_files_invalid_value(tmpdir: str) -> None:
33203320
"""Test that concurrent_files < 1 raises ValueError."""
33213321
scan, tasks = _create_scan_and_tasks(tmpdir, num_files=1, rows_per_file=10)
33223322

3323-
with pytest.raises(ValueError, match="concurrent_files must be >= 1"):
3323+
with pytest.raises(ValueError, match="concurrent_streams must be >= 1"):
33243324
list(scan.to_record_batches(tasks, order=ArrivalOrder(concurrent_streams=0)))
33253325

33263326

0 commit comments

Comments
 (0)