From 3ca8588d896f09fcca193891f888e6e7b8da61d1 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Tue, 4 Nov 2025 13:59:34 +0000 Subject: [PATCH 1/4] Stricter handling of submodules as attributes --- mypy/build.py | 5 +++++ mypy/nodes.py | 20 +++++++++--------- mypy/semanal.py | 30 ++++++++++++++++++++++++++- test-data/unit/check-incremental.test | 29 ++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 11 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 0b78f879c547..9915360c07db 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -603,6 +603,7 @@ def __init__( self.options = options self.version_id = version_id self.modules: dict[str, MypyFile] = {} + self.import_map: dict[str, set[str]] = {} self.missing_modules: set[str] = set() self.fg_deps_meta: dict[str, FgDepMeta] = {} # fg_deps holds the dependencies of every module that has been @@ -623,6 +624,7 @@ def __init__( self.incomplete_namespaces, self.errors, self.plugin, + self.import_map, ) self.all_types: dict[Expression, Type] = {} # Enabled by export_types self.indirection_detector = TypeIndirectionVisitor() @@ -2898,6 +2900,9 @@ def dispatch(sources: list[BuildSource], manager: BuildManager, stdout: TextIO) manager.cache_enabled = False graph = load_graph(sources, manager) + for id in graph: + manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) + t1 = time.time() manager.add_stats( graph_size=len(graph), diff --git a/mypy/nodes.py b/mypy/nodes.py index 539995ce9229..13ba011eebc0 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -5002,27 +5002,27 @@ def local_definitions( SYMBOL_TABLE_NODE: Final[Tag] = 61 -def read_symbol(data: Buffer) -> mypy.nodes.SymbolNode: +def read_symbol(data: Buffer) -> SymbolNode: tag = read_tag(data) # The branches here are ordered manually by type "popularity". if tag == VAR: - return mypy.nodes.Var.read(data) + return Var.read(data) if tag == FUNC_DEF: - return mypy.nodes.FuncDef.read(data) + return FuncDef.read(data) if tag == DECORATOR: - return mypy.nodes.Decorator.read(data) + return Decorator.read(data) if tag == TYPE_INFO: - return mypy.nodes.TypeInfo.read(data) + return TypeInfo.read(data) if tag == OVERLOADED_FUNC_DEF: - return mypy.nodes.OverloadedFuncDef.read(data) + return OverloadedFuncDef.read(data) if tag == TYPE_VAR_EXPR: - return mypy.nodes.TypeVarExpr.read(data) + return TypeVarExpr.read(data) if tag == TYPE_ALIAS: - return mypy.nodes.TypeAlias.read(data) + return TypeAlias.read(data) if tag == PARAM_SPEC_EXPR: - return mypy.nodes.ParamSpecExpr.read(data) + return ParamSpecExpr.read(data) if tag == TYPE_VAR_TUPLE_EXPR: - return mypy.nodes.TypeVarTupleExpr.read(data) + return TypeVarTupleExpr.read(data) assert False, f"Unknown symbol tag {tag}" diff --git a/mypy/semanal.py b/mypy/semanal.py index e55819b898e9..0925ac9d23c3 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -451,6 +451,7 @@ def __init__( incomplete_namespaces: set[str], errors: Errors, plugin: Plugin, + import_map: dict[str, set[str]], ) -> None: """Construct semantic analyzer. @@ -483,6 +484,7 @@ def __init__( self.loop_depth = [0] self.errors = errors self.modules = modules + self.import_map = import_map self.msg = MessageBuilder(errors, modules) self.missing_modules = missing_modules self.missing_names = [set()] @@ -534,6 +536,9 @@ def __init__( self.type_expression_full_parse_success_count: int = 0 # Successful full parses self.type_expression_full_parse_failure_count: int = 0 # Failed full parses + # Imports of submodules transitively visible from given module. + self.transitive_submodule_imports: dict[str, set[str]] = {} + # mypyc doesn't properly handle implementing an abstractproperty # with a regular attribute so we make them properties @property @@ -6637,7 +6642,7 @@ def get_module_symbol(self, node: MypyFile, name: str) -> SymbolTableNode | None sym = names.get(name) if not sym: fullname = module + "." + name - if fullname in self.modules: + if fullname in self.modules and self.is_visible_import(fullname): sym = SymbolTableNode(GDEF, self.modules[fullname]) elif self.is_incomplete_namespace(module): self.record_incomplete_ref() @@ -6656,6 +6661,29 @@ def get_module_symbol(self, node: MypyFile, name: str) -> SymbolTableNode | None sym = None return sym + def is_visible_import(self, id: str) -> bool: + if self.cur_mod_id not in self.transitive_submodule_imports: + self.add_transitive_submodule_imports(self.cur_mod_id) + return id in self.transitive_submodule_imports[self.cur_mod_id] + + def add_transitive_submodule_imports(self, mod_id: str) -> None: + todo = self.import_map[mod_id] + seen = {mod_id} + result = {mod_id} + while todo: + dep = todo.pop() + if dep in seen: + continue + seen.add(dep) + if "." in dep: + result.add(dep) + if dep in self.transitive_submodule_imports: + result |= self.transitive_submodule_imports[dep] + continue + if dep in self.import_map: + todo |= self.import_map[dep] + self.transitive_submodule_imports[mod_id] = result + def is_missing_module(self, module: str) -> bool: return module in self.missing_modules diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 5fbaa4f2c904..59357d7ed762 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -7512,3 +7512,32 @@ tmp/impl.py:31: note: Revealed type is "builtins.object" tmp/impl.py:32: note: Revealed type is "Union[builtins.int, builtins.str, lib.Unrelated]" tmp/impl.py:33: note: Revealed type is "builtins.object" tmp/impl.py:34: note: Revealed type is "builtins.object" + +[case testIncrementalAccessSubmoduleWithoutExplicitImport] +import b +import a + +[file a.py] +import pkg + +pkg.submod.foo() + +[file a.py.2] +import pkg + +pkg.submod.foo() +x = 1 + +[file b.py] +import c + +[file c.py] +from pkg import submod + +[file pkg/__init__.pyi] +[file pkg/submod.pyi] +def foo() -> None: pass +[out] +tmp/a.py:3: error: "object" has no attribute "submod" +[out2] +tmp/a.py:3: error: "object" has no attribute "submod" From e4075315687598ed076e8dcc10ffb74fe1a6592f Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Tue, 4 Nov 2025 18:18:51 +0000 Subject: [PATCH 2/4] Add ancestors --- mypy/build.py | 4 +++- test-data/unit/check-incremental.test | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index 9915360c07db..9b2754df108b 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -2901,7 +2901,9 @@ def dispatch(sources: list[BuildSource], manager: BuildManager, stdout: TextIO) graph = load_graph(sources, manager) for id in graph: - manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) + ancestors = graph[id].ancestors + assert ancestors is not None + manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed + ancestors) t1 = time.time() manager.add_stats( diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index 59357d7ed762..e2d74c206da8 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -7541,3 +7541,21 @@ def foo() -> None: pass tmp/a.py:3: error: "object" has no attribute "submod" [out2] tmp/a.py:3: error: "object" has no attribute "submod" + +[case testIncrementalAccessSubmoduleWithoutExplicitImportNested] +import pandas + +pandas.core.dtypes + +[file pandas/__init__.py] +import pandas.core.api + +[file pandas/core/__init__.py] +[file pandas/core/api.py] +from pandas.core.dtypes.dtypes import X + +[file pandas/core/dtypes/__init__.py] +[file pandas/core/dtypes/dtypes.py] +X = 0 +[out] +[out2] From e91bc30d01eaf6df6d975fb4ffd6dd0d6248f04b Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 5 Nov 2025 00:33:27 +0000 Subject: [PATCH 3/4] Explicitly don't handle import from --- mypy/build.py | 49 +++++++++++++-------------- mypy/semanal.py | 19 +++++++++-- test-data/unit/check-incremental.test | 37 ++++++++++++++++++++ 3 files changed, 77 insertions(+), 28 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 9b2754df108b..e9c50ce6b224 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -744,6 +744,26 @@ def getmtime(self, path: str) -> int: else: return int(self.metastore.getmtime(path)) + def correct_rel_imp(self, file: MypyFile, imp: ImportFrom | ImportAll) -> str: + """Function to correct for relative imports.""" + file_id = file.fullname + rel = imp.relative + if rel == 0: + return imp.id + if os.path.basename(file.path).startswith("__init__."): + rel -= 1 + if rel != 0: + file_id = ".".join(file_id.split(".")[:-rel]) + new_id = file_id + "." + imp.id if imp.id else file_id + + if not new_id: + self.errors.set_file(file.path, file.name, self.options) + self.errors.report( + imp.line, 0, "No parent module -- cannot perform relative import", blocker=True + ) + + return new_id + def all_imported_modules_in_file(self, file: MypyFile) -> list[tuple[int, str, int]]: """Find all reachable import statements in a file. @@ -752,27 +772,6 @@ def all_imported_modules_in_file(self, file: MypyFile) -> list[tuple[int, str, i Can generate blocking errors on bogus relative imports. """ - - def correct_rel_imp(imp: ImportFrom | ImportAll) -> str: - """Function to correct for relative imports.""" - file_id = file.fullname - rel = imp.relative - if rel == 0: - return imp.id - if os.path.basename(file.path).startswith("__init__."): - rel -= 1 - if rel != 0: - file_id = ".".join(file_id.split(".")[:-rel]) - new_id = file_id + "." + imp.id if imp.id else file_id - - if not new_id: - self.errors.set_file(file.path, file.name, self.options) - self.errors.report( - imp.line, 0, "No parent module -- cannot perform relative import", blocker=True - ) - - return new_id - res: list[tuple[int, str, int]] = [] for imp in file.imports: if not imp.is_unreachable: @@ -787,7 +786,7 @@ def correct_rel_imp(imp: ImportFrom | ImportAll) -> str: ancestors.append(part) res.append((ancestor_pri, ".".join(ancestors), imp.line)) elif isinstance(imp, ImportFrom): - cur_id = correct_rel_imp(imp) + cur_id = self.correct_rel_imp(file, imp) all_are_submodules = True # Also add any imported names that are submodules. pri = import_priority(imp, PRI_MED) @@ -807,7 +806,7 @@ def correct_rel_imp(imp: ImportFrom | ImportAll) -> str: res.append((pri, cur_id, imp.line)) elif isinstance(imp, ImportAll): pri = import_priority(imp, PRI_HIGH) - res.append((pri, correct_rel_imp(imp), imp.line)) + res.append((pri, self.correct_rel_imp(file, imp), imp.line)) # Sort such that module (e.g. foo.bar.baz) comes before its ancestors (e.g. foo # and foo.bar) so that, if FindModuleCache finds the target module in a @@ -2901,9 +2900,7 @@ def dispatch(sources: list[BuildSource], manager: BuildManager, stdout: TextIO) graph = load_graph(sources, manager) for id in graph: - ancestors = graph[id].ancestors - assert ancestors is not None - manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed + ancestors) + manager.import_map[id] = set(graph[id].dependencies + graph[id].suppressed) t1 = time.time() manager.add_stats( diff --git a/mypy/semanal.py b/mypy/semanal.py index 0925ac9d23c3..cea545a4dd34 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -537,6 +537,13 @@ def __init__( self.type_expression_full_parse_failure_count: int = 0 # Failed full parses # Imports of submodules transitively visible from given module. + # This is needed to support patterns like this + # [a.py] + # import b + # import foo + # foo.bar # <- this should work even if bar is not re-exported in foo + # [b.py] + # import foo.bar self.transitive_submodule_imports: dict[str, set[str]] = {} # mypyc doesn't properly handle implementing an abstractproperty @@ -6642,7 +6649,7 @@ def get_module_symbol(self, node: MypyFile, name: str) -> SymbolTableNode | None sym = names.get(name) if not sym: fullname = module + "." + name - if fullname in self.modules and self.is_visible_import(fullname): + if fullname in self.modules and self.is_visible_import(module, fullname): sym = SymbolTableNode(GDEF, self.modules[fullname]) elif self.is_incomplete_namespace(module): self.record_incomplete_ref() @@ -6661,12 +6668,20 @@ def get_module_symbol(self, node: MypyFile, name: str) -> SymbolTableNode | None sym = None return sym - def is_visible_import(self, id: str) -> bool: + def is_visible_import(self, base_id: str, id: str) -> bool: + if base_id not in self.transitive_submodule_imports: + # This is a performance optimization for a common pattern. If one module + # in a codebase uses import numpy as np; np.foo.bar, then it is likely that + # other modules use similar pattern as well. So we pre-compute transitive + # dependencies for np, to avoid possible duplicate work in the future. + self.add_transitive_submodule_imports(base_id) if self.cur_mod_id not in self.transitive_submodule_imports: self.add_transitive_submodule_imports(self.cur_mod_id) return id in self.transitive_submodule_imports[self.cur_mod_id] def add_transitive_submodule_imports(self, mod_id: str) -> None: + if mod_id not in self.import_map: + return todo = self.import_map[mod_id] seen = {mod_id} result = {mod_id} diff --git a/test-data/unit/check-incremental.test b/test-data/unit/check-incremental.test index e2d74c206da8..56c9cef80f34 100644 --- a/test-data/unit/check-incremental.test +++ b/test-data/unit/check-incremental.test @@ -7543,8 +7543,43 @@ tmp/a.py:3: error: "object" has no attribute "submod" tmp/a.py:3: error: "object" has no attribute "submod" [case testIncrementalAccessSubmoduleWithoutExplicitImportNested] +import a + +[file a.py] +import pandas +pandas.core.dtypes + +[file a.py.2] +import pandas +pandas.core.dtypes +# touch + +[file pandas/__init__.py] +import pandas.core.api + +[file pandas/core/__init__.py] +[file pandas/core/api.py] +import pandas.core.dtypes.dtypes + +[file pandas/core/dtypes/__init__.py] +[file pandas/core/dtypes/dtypes.py] +X = 0 +[out] +[out2] + +[case testIncrementalAccessSubmoduleWithoutExplicitImportNestedFrom] +import a + +[file a.py] import pandas +# Although this actually works at runtime, we do not support this, since +# this would cause major slowdown for a rare edge case. This test verifies +# that we fail consistently on cold and warm runs. +pandas.core.dtypes + +[file a.py.2] +import pandas pandas.core.dtypes [file pandas/__init__.py] @@ -7558,4 +7593,6 @@ from pandas.core.dtypes.dtypes import X [file pandas/core/dtypes/dtypes.py] X = 0 [out] +tmp/a.py:6: error: "object" has no attribute "dtypes" [out2] +tmp/a.py:2: error: "object" has no attribute "dtypes" From 96650aba7426d487037bdab8da52f7946a8825e5 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 5 Nov 2025 17:22:14 +0000 Subject: [PATCH 4/4] Another performance optimization --- mypy/semanal.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mypy/semanal.py b/mypy/semanal.py index cea545a4dd34..41d117ea4d96 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -6669,6 +6669,9 @@ def get_module_symbol(self, node: MypyFile, name: str) -> SymbolTableNode | None return sym def is_visible_import(self, base_id: str, id: str) -> bool: + if id in self.import_map[self.cur_mod_id]: + # Fast path: module is imported locally. + return True if base_id not in self.transitive_submodule_imports: # This is a performance optimization for a common pattern. If one module # in a codebase uses import numpy as np; np.foo.bar, then it is likely that