⚡️ Speed up method ModuleDependencyFinder.cached by 17%
#621
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 17% (0.17x) speedup for
ModuleDependencyFinder.cachedinmarimo/_runtime/reload/autoreload.py⏱️ Runtime :
930 microseconds→794 microseconds(best of193runs)📝 Explanation and details
The optimization replaces the expensive
hasattr(module, "__file__") or module.__file__ is Nonecheck with a singlegetattr(module, "__file__", None)call, achieving a 17% speedup.Key optimizations:
Eliminates redundant attribute lookups: The original code performs up to 3 attribute accesses on
module.__file__(hasattr check + None check + final access), while the optimized version does only 1-2 accesses total.Uses
getattr()with default:getattr(module, "__file__", None)is more efficient thanhasattr()+ separate attribute access because it combines the existence check and value retrieval in a single operation.Caches the attribute value: Storing
module.__file__inmodule_fileeliminates the final redundant lookup in the dictionary membership test.Performance impact by test case:
__file__: Minimal impact (~5% faster to 8% slower)The optimization is particularly effective for modules that have
__file__attributes (the common case), where it reduces Python's attribute access overhead. The performance gain scales well with workload size, making it valuable in hot paths where module dependency checking occurs frequently.✅ Correctness verification report:
⚙️ Existing Unit Tests and Runtime
_runtime/reload/test_autoreload.py::TestModuleDependencyFinder.test_dependencies_cached🌀 Generated Regression Tests and Runtime
import types
imports
import pytest
from marimo._runtime.reload.autoreload import ModuleDependencyFinder
unit tests
Helper function to create a dummy module with a file attribute
def make_module_with_file(filename):
mod = types.ModuleType("dummy")
mod.file = filename
return mod
Helper function to create a dummy module with no file attribute
def make_module_without_file():
mod = types.ModuleType("dummy")
if hasattr(mod, "file"):
delattr(mod, "file")
return mod
Helper function to create a dummy module with file set to None
def make_module_with_file_none():
mod = types.ModuleType("dummy")
mod.file = None
return mod
1. Basic Test Cases
def test_module_in_cache_returns_true():
# Test: module whose file is in _module_dependencies
finder = ModuleDependencyFinder()
filename = "/path/to/module.py"
mod = make_module_with_file(filename)
finder._module_dependencies[filename] = {"dummy": mod}
codeflash_output = finder.cached(mod) # 664ns -> 566ns (17.3% faster)
def test_module_not_in_cache_returns_false():
# Test: module whose file is NOT in _module_dependencies
finder = ModuleDependencyFinder()
filename = "/path/to/module.py"
mod = make_module_with_file(filename)
# _module_dependencies is empty
codeflash_output = finder.cached(mod) # 651ns -> 563ns (15.6% faster)
def test_module_with_no_file_attribute_returns_false():
# Test: module with no file attribute
finder = ModuleDependencyFinder()
mod = make_module_without_file()
codeflash_output = finder.cached(mod) # 1.19μs -> 1.19μs (0.168% slower)
def test_module_with_file_none_returns_false():
# Test: module with file set to None
finder = ModuleDependencyFinder()
mod = make_module_with_file_none()
codeflash_output = finder.cached(mod) # 565ns -> 466ns (21.2% faster)
def test_module_with_empty_file_string():
# Test: module with file set to empty string
finder = ModuleDependencyFinder()
mod = make_module_with_file("")
# Not in cache
codeflash_output = finder.cached(mod) # 716ns -> 474ns (51.1% faster)
# Add to cache
finder._module_dependencies[""] = {"dummy": mod}
codeflash_output = finder.cached(mod) # 352ns -> 189ns (86.2% faster)
2. Edge Test Cases
def test_module_with_file_attribute_collision():
# Test: two modules with same file value
finder = ModuleDependencyFinder()
filename = "/path/to/module.py"
mod1 = make_module_with_file(filename)
mod2 = make_module_with_file(filename)
finder._module_dependencies[filename] = {"dummy1": mod1}
codeflash_output = finder.cached(mod1) # 633ns -> 497ns (27.4% faster)
codeflash_output = finder.cached(mod2) # 325ns -> 270ns (20.4% faster)
def test_module_with_file_attribute_case_sensitivity():
# Test: case sensitivity of file keys
finder = ModuleDependencyFinder()
filename_lower = "/path/to/module.py"
filename_upper = "/PATH/TO/MODULE.PY"
mod_lower = make_module_with_file(filename_lower)
mod_upper = make_module_with_file(filename_upper)
finder._module_dependencies[filename_lower] = {"dummy": mod_lower}
codeflash_output = finder.cached(mod_lower) # 605ns -> 441ns (37.2% faster)
codeflash_output = finder.cached(mod_upper) # 387ns -> 304ns (27.3% faster)
def test_module_with_file_attribute_special_characters():
# Test: file with special characters
finder = ModuleDependencyFinder()
filename = "/path/to/module$@!.py"
mod = make_module_with_file(filename)
finder._module_dependencies[filename] = {"dummy": mod}
codeflash_output = finder.cached(mod) # 601ns -> 471ns (27.6% faster)
def test_module_with_file_attribute_empty_dict():
# Test: _module_dependencies contains file but value is empty dict
finder = ModuleDependencyFinder()
filename = "/path/to/module.py"
mod = make_module_with_file(filename)
finder._module_dependencies[filename] = {}
codeflash_output = finder.cached(mod) # 570ns -> 479ns (19.0% faster)
def test_module_with_file_attribute_none_in_cache():
# Test: _module_dependencies contains None as key
finder = ModuleDependencyFinder()
mod = make_module_with_file_none()
finder._module_dependencies[None] = {"dummy": mod}
# Should still return False because cached checks file is not None
codeflash_output = finder.cached(mod) # 514ns -> 422ns (21.8% faster)
def test_many_modules_in_cache():
# Test: many modules in cache, verify lookup is correct
finder = ModuleDependencyFinder()
num_modules = 500
filenames = [f"/path/to/module_{i}.py" for i in range(num_modules)]
modules = [make_module_with_file(fn) for fn in filenames]
for fn, mod in zip(filenames, modules):
finder._module_dependencies[fn] = {"dummy": mod}
# All should be cached
for mod in modules:
codeflash_output = finder.cached(mod) # 112μs -> 94.2μs (19.1% faster)
# Add a module not in cache
mod_not_in_cache = make_module_with_file("/path/to/not_in_cache.py")
codeflash_output = finder.cached(mod_not_in_cache) # 308ns -> 253ns (21.7% faster)
def test_many_modules_not_in_cache():
# Test: many modules, none in cache
finder = ModuleDependencyFinder()
num_modules = 500
filenames = [f"/path/to/module_{i}.py" for i in range(num_modules)]
modules = [make_module_with_file(fn) for fn in filenames]
# _module_dependencies is empty
for mod in modules:
codeflash_output = finder.cached(mod) # 114μs -> 96.8μs (18.2% faster)
def test_large_cache_with_random_access():
# Test: large cache, random access lookup
import random
finder = ModuleDependencyFinder()
num_modules = 500
filenames = [f"/path/to/module_{i}.py" for i in range(num_modules)]
modules = [make_module_with_file(fn) for fn in filenames]
for fn, mod in zip(filenames, modules):
finder._module_dependencies[fn] = {"dummy": mod}
# Randomly pick 50 modules and check
indices = random.sample(range(num_modules), 50)
for idx in indices:
codeflash_output = finder.cached(modules[idx]) # 12.1μs -> 10.5μs (15.6% faster)
def test_large_cache_with_collisions():
# Test: large cache with collisions (multiple modules with same file)
finder = ModuleDependencyFinder()
filename = "/path/to/collision.py"
modules = [make_module_with_file(filename) for _ in range(100)]
finder._module_dependencies[filename] = {"dummy": modules[0]}
for mod in modules:
codeflash_output = finder.cached(mod) # 21.7μs -> 18.3μs (18.4% faster)
def test_large_cache_with_missing_file_attributes():
# Test: large cache, some modules missing file attribute
finder = ModuleDependencyFinder()
num_modules = 500
filenames = [f"/path/to/module_{i}.py" for i in range(num_modules)]
modules = [make_module_with_file(fn) for fn in filenames]
modules += [make_module_without_file() for _ in range(50)]
for fn, mod in zip(filenames, modules[:num_modules]):
finder._module_dependencies[fn] = {"dummy": mod}
# All with file in cache should be True
for mod in modules[:num_modules]:
codeflash_output = finder.cached(mod) # 112μs -> 95.0μs (18.9% faster)
# All without file should be False
for mod in modules[num_modules:]:
codeflash_output = finder.cached(mod) # 33.8μs -> 34.1μs (0.901% slower)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
import types
imports
import pytest # used for our unit tests
from marimo._runtime.reload.autoreload import ModuleDependencyFinder
unit tests
Helper to create a dummy module object
def make_module(name="dummy", file=None):
mod = types.ModuleType(name)
if file is not None:
mod.file = file
return mod
1. Basic Test Cases
def test_cached_returns_true_for_module_with_file_in_dependencies():
# Create finder and module
finder = ModuleDependencyFinder()
mod = make_module("mod1", "/path/to/mod1.py")
# Add module file to dependencies
finder._module_dependencies[mod.file] = {"mod1": mod}
# Should be cached
codeflash_output = finder.cached(mod) # 660ns -> 481ns (37.2% faster)
def test_cached_returns_false_for_module_with_file_not_in_dependencies():
finder = ModuleDependencyFinder()
mod = make_module("mod2", "/path/to/mod2.py")
# Not added to dependencies
codeflash_output = finder.cached(mod) # 678ns -> 512ns (32.4% faster)
def test_cached_returns_false_for_module_without_file_attribute():
finder = ModuleDependencyFinder()
mod = make_module("mod3")
# Remove file attribute
if hasattr(mod, "file"):
delattr(mod, "file")
codeflash_output = finder.cached(mod) # 1.16μs -> 1.10μs (5.34% faster)
def test_cached_returns_false_for_module_with_file_set_to_none():
finder = ModuleDependencyFinder()
mod = make_module("mod4", None)
codeflash_output = finder.cached(mod) # 1.41μs -> 1.37μs (3.00% faster)
def test_cached_returns_true_for_multiple_modules_with_same_file():
finder = ModuleDependencyFinder()
file_path = "/path/to/common.py"
mod1 = make_module("modA", file_path)
mod2 = make_module("modB", file_path)
finder._module_dependencies[file_path] = {"modA": mod1, "modB": mod2}
codeflash_output = finder.cached(mod1) # 703ns -> 528ns (33.1% faster)
codeflash_output = finder.cached(mod2) # 333ns -> 251ns (32.7% faster)
2. Edge Test Cases
def test_cached_with_empty_module_dependencies_dict():
finder = ModuleDependencyFinder()
mod = make_module("mod5", "/path/to/mod5.py")
codeflash_output = finder.cached(mod) # 658ns -> 512ns (28.5% faster)
def test_cached_with_empty_string_file():
finder = ModuleDependencyFinder()
mod = make_module("mod6", "")
finder._module_dependencies[""] = {"mod6": mod}
codeflash_output = finder.cached(mod) # 615ns -> 427ns (44.0% faster)
def test_cached_with_file_attribute_set_to_none_and_in_dependencies():
# Even if None is a key in _module_dependencies, cached should return False
finder = ModuleDependencyFinder()
mod = make_module("mod12", None)
finder._module_dependencies[None] = {"mod12": mod}
codeflash_output = finder.cached(mod) # 1.48μs -> 1.52μs (2.24% slower)
def test_cached_with_file_attribute_missing_and_in_dependencies():
# Even if module has no file and None is a key, should return False
finder = ModuleDependencyFinder()
mod = make_module("mod13")
if hasattr(mod, "file"):
delattr(mod, "file")
finder._module_dependencies[None] = {"mod13": mod}
codeflash_output = finder.cached(mod) # 1.08μs -> 1.18μs (7.99% slower)
def test_cached_with_large_number_of_dependencies():
finder = ModuleDependencyFinder()
num_mods = 500
modules = []
# Add 500 modules to dependencies
for i in range(num_mods):
file_path = f"/path/to/mod{i}.py"
mod = make_module(f"mod{i}", file_path)
finder._module_dependencies[file_path] = {f"mod{i}": mod}
modules.append(mod)
# All should be cached
for mod in modules:
codeflash_output = finder.cached(mod) # 111μs -> 95.5μs (17.2% faster)
def test_cached_with_large_number_of_non_cached_modules():
finder = ModuleDependencyFinder()
num_mods = 500
modules = []
# No modules in dependencies
for i in range(num_mods):
file_path = f"/path/to/mod{i}.py"
mod = make_module(f"mod{i}", file_path)
modules.append(mod)
# None should be cached
for mod in modules:
codeflash_output = finder.cached(mod) # 114μs -> 96.6μs (18.1% faster)
def test_cached_performance_with_mixed_cached_and_uncached_modules():
finder = ModuleDependencyFinder()
num_mods = 500
cached_mods = []
uncached_mods = []
# Add half to dependencies, half not
for i in range(num_mods):
file_path = f"/path/to/mod{i}.py"
mod = make_module(f"mod{i}", file_path)
if i % 2 == 0:
finder._module_dependencies[file_path] = {f"mod{i}": mod}
cached_mods.append(mod)
else:
uncached_mods.append(mod)
# Check cached
for mod in cached_mods:
codeflash_output = finder.cached(mod) # 56.5μs -> 48.0μs (17.7% faster)
for mod in uncached_mods:
codeflash_output = finder.cached(mod) # 59.6μs -> 51.9μs (14.7% faster)
def test_cached_with_large_number_of_modules_with_same_file():
finder = ModuleDependencyFinder()
file_path = "/path/to/shared.py"
num_mods = 500
modules = []
for i in range(num_mods):
mod = make_module(f"mod{i}", file_path)
modules.append(mod)
finder._module_dependencies[file_path] = {f"mod{i}": mod for i, mod in enumerate(modules)}
for mod in modules:
codeflash_output = finder.cached(mod) # 106μs -> 88.6μs (19.7% faster)
#------------------------------------------------
from marimo._runtime.reload.autoreload import ModuleDependencyFinder
To edit these changes
git checkout codeflash/optimize-ModuleDependencyFinder.cached-mhvm44ihand push.