Skip to content

Commit 2001cca

Browse files
committed
Update memory benchmark to use manifest cache functions
1 parent 07e5008 commit 2001cca

File tree

1 file changed

+12
-7
lines changed

1 file changed

+12
-7
lines changed

tests/benchmark/test_memory_benchmark.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
import pytest
3434

3535
from pyiceberg.catalog.memory import InMemoryCatalog
36-
from pyiceberg.manifest import _manifest_cache
36+
from pyiceberg.manifest import _get_manifest_cache, clear_manifest_cache
3737

3838

3939
def generate_test_dataframe() -> pa.Table:
@@ -64,7 +64,7 @@ def memory_catalog(tmp_path_factory: pytest.TempPathFactory) -> InMemoryCatalog:
6464
@pytest.fixture(autouse=True)
6565
def clear_caches() -> None:
6666
"""Clear caches before each test."""
67-
_manifest_cache.clear()
67+
clear_manifest_cache()
6868
gc.collect()
6969

7070

@@ -95,7 +95,8 @@ def test_manifest_cache_memory_growth(memory_catalog: InMemoryCatalog) -> None:
9595
# Sample memory at intervals
9696
if (i + 1) % 10 == 0:
9797
current, _ = tracemalloc.get_traced_memory()
98-
cache_size = len(_manifest_cache)
98+
cache = _get_manifest_cache()
99+
cache_size = len(cache) if cache is not None else 0
99100

100101
memory_samples.append((i + 1, current, cache_size))
101102
print(f" Iteration {i + 1}: Memory={current / 1024:.1f} KB, Cache entries={cache_size}")
@@ -150,13 +151,14 @@ def test_memory_after_gc_with_cache_cleared(memory_catalog: InMemoryCatalog) ->
150151

151152
gc.collect()
152153
before_clear_memory, _ = tracemalloc.get_traced_memory()
153-
cache_size_before = len(_manifest_cache)
154+
cache = _get_manifest_cache()
155+
cache_size_before = len(cache) if cache is not None else 0
154156
print(f" Memory before clear: {before_clear_memory / 1024:.1f} KB")
155157
print(f" Cache size: {cache_size_before}")
156158

157159
# Phase 2: Clear cache and GC
158160
print("\nPhase 2: Clearing cache and running GC...")
159-
_manifest_cache.clear()
161+
clear_manifest_cache()
160162
gc.collect()
161163
gc.collect() # Multiple GC passes for thorough cleanup
162164

@@ -191,7 +193,9 @@ def test_manifest_cache_deduplication_efficiency() -> None:
191193
FileFormat,
192194
ManifestEntry,
193195
ManifestEntryStatus,
196+
_get_manifest_cache,
194197
_manifests,
198+
clear_manifest_cache,
195199
write_manifest,
196200
write_manifest_list,
197201
)
@@ -245,7 +249,7 @@ def test_manifest_cache_deduplication_efficiency() -> None:
245249
num_lists = 10
246250
print(f"Creating {num_lists} manifest lists with overlapping manifests...")
247251

248-
_manifest_cache.clear()
252+
clear_manifest_cache()
249253

250254
for i in range(num_lists):
251255
list_path = f"{tmp_dir}/manifest-list_{i}.avro"
@@ -265,7 +269,8 @@ def test_manifest_cache_deduplication_efficiency() -> None:
265269
_manifests(io, list_path)
266270

267271
# Analyze cache efficiency
268-
cache_entries = len(_manifest_cache)
272+
cache = _get_manifest_cache()
273+
cache_entries = len(cache) if cache is not None else 0
269274
# List i contains manifests 0..i, so only the first num_lists manifests are actually used
270275
manifests_actually_used = num_lists
271276

0 commit comments

Comments
 (0)