Skip to content

Commit d24a9dc

Browse files
jklukasgemini-code-assist[bot]rickeylev
authored
feat(runfiles): support for --incompatible_compact_repo_mapping_manifest (#3277)
Fixes #3022. To quote @rickeylev: > Under bzlmod, the repo mapping can become quite large (i.e. tens of megabytes) because its size scales as a factor of the number of repos in the transitive dependencies. > > To address this, the --incompatible_compact_repo_mapping_manifest flag was introduced. This changes the repo mapping formation to use prefixes (instead of exact repo names) for mapping things. > > To make this work with the runfiles library, the code has to be updated to handle these prefixes instead of just exact strings. --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Richard Levasseur <[email protected]>
1 parent 05735c8 commit d24a9dc

File tree

3 files changed

+292
-32
lines changed

3 files changed

+292
-32
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ END_UNRELEASED_TEMPLATE
8585

8686
{#v0-0-0-added}
8787
### Added
88+
* (runfiles) The Python runfiles library now supports Bazel's
89+
`--incompatible_compact_repo_mapping_manifest` flag.
8890
* (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the
8991
{obj}`main_module` attribute.
9092
* (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the

python/runfiles/runfiles.py

Lines changed: 130 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,131 @@
1515
"""Runfiles lookup library for Bazel-built Python binaries and tests.
1616
1717
See @rules_python//python/runfiles/README.md for usage instructions.
18+
19+
:::{versionadded} VERSION_NEXT_FEATURE
20+
Support for Bazel's `--incompatible_compact_repo_mapping_manifest` flag was added.
21+
This enables prefix-based repository mappings to reduce memory usage for large
22+
dependency graphs under bzlmod.
23+
:::
1824
"""
25+
import collections.abc
1926
import inspect
2027
import os
2128
import posixpath
2229
import sys
30+
from collections import defaultdict
2331
from typing import Dict, Optional, Tuple, Union
2432

2533

34+
class _RepositoryMapping:
35+
"""Repository mapping for resolving apparent repository names to canonical ones.
36+
37+
Handles both exact mappings and prefix-based mappings introduced by the
38+
--incompatible_compact_repo_mapping_manifest flag.
39+
"""
40+
41+
def __init__(
42+
self,
43+
exact_mappings: Dict[Tuple[str, str], str],
44+
prefixed_mappings: Dict[Tuple[str, str], str],
45+
) -> None:
46+
"""Initialize repository mapping with exact and prefixed mappings.
47+
48+
Args:
49+
exact_mappings: Dict mapping (source_canonical, target_apparent) -> target_canonical
50+
prefixed_mappings: Dict mapping (source_prefix, target_apparent) -> target_canonical
51+
"""
52+
self._exact_mappings = exact_mappings
53+
54+
# Group prefixed mappings by target_apparent for faster lookups
55+
self._grouped_prefixed_mappings = defaultdict(list)
56+
for (
57+
prefix_source,
58+
target_app,
59+
), target_canonical in prefixed_mappings.items():
60+
self._grouped_prefixed_mappings[target_app].append(
61+
(prefix_source, target_canonical)
62+
)
63+
64+
@staticmethod
65+
def create_from_file(repo_mapping_path: Optional[str]) -> "_RepositoryMapping":
66+
"""Create RepositoryMapping from a repository mapping manifest file.
67+
68+
Args:
69+
repo_mapping_path: Path to the repository mapping file, or None if not available
70+
71+
Returns:
72+
RepositoryMapping instance with parsed mappings
73+
"""
74+
# If the repository mapping file can't be found, that is not an error: We
75+
# might be running without Bzlmod enabled or there may not be any runfiles.
76+
# In this case, just apply empty repo mappings.
77+
if not repo_mapping_path:
78+
return _RepositoryMapping({}, {})
79+
80+
try:
81+
with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f:
82+
content = f.read()
83+
except FileNotFoundError:
84+
return _RepositoryMapping({}, {})
85+
86+
exact_mappings = {}
87+
prefixed_mappings = {}
88+
for line in content.splitlines():
89+
source_canonical, target_apparent, target_canonical = line.split(",")
90+
if source_canonical.endswith("*"):
91+
# This is a prefixed mapping - remove the '*' for prefix matching
92+
prefix = source_canonical[:-1]
93+
prefixed_mappings[(prefix, target_apparent)] = target_canonical
94+
else:
95+
# This is an exact mapping
96+
exact_mappings[(source_canonical, target_apparent)] = target_canonical
97+
98+
return _RepositoryMapping(exact_mappings, prefixed_mappings)
99+
100+
def lookup(self, source_repo: Optional[str], target_apparent: str) -> Optional[str]:
101+
"""Look up repository mapping for the given source and target.
102+
103+
This handles both exact mappings and prefix-based mappings introduced by the
104+
--incompatible_compact_repo_mapping_manifest flag. Exact mappings are tried
105+
first, followed by prefix-based mappings where order matters.
106+
107+
Args:
108+
source_repo: Source canonical repository name
109+
target_apparent: Target apparent repository name
110+
111+
Returns:
112+
target_canonical repository name, or None if no mapping exists
113+
"""
114+
if source_repo is None:
115+
return None
116+
117+
key = (source_repo, target_apparent)
118+
119+
# Try exact mapping first
120+
if key in self._exact_mappings:
121+
return self._exact_mappings[key]
122+
123+
# Try prefixed mapping if no exact match found
124+
if target_apparent in self._grouped_prefixed_mappings:
125+
for prefix_source, target_canonical in self._grouped_prefixed_mappings[
126+
target_apparent
127+
]:
128+
if source_repo.startswith(prefix_source):
129+
return target_canonical
130+
131+
# No mapping found
132+
return None
133+
134+
def is_empty(self) -> bool:
135+
"""Check if this repository mapping is empty (no exact or prefixed mappings).
136+
137+
Returns:
138+
True if there are no mappings, False otherwise
139+
"""
140+
return len(self._exact_mappings) == 0 and len(self._grouped_prefixed_mappings) == 0
141+
142+
26143
class _ManifestBased:
27144
"""`Runfiles` strategy that parses a runfiles-manifest to look up runfiles."""
28145

@@ -130,7 +247,7 @@ class Runfiles:
130247
def __init__(self, strategy: Union[_ManifestBased, _DirectoryBased]) -> None:
131248
self._strategy = strategy
132249
self._python_runfiles_root = _FindPythonRunfilesRoot()
133-
self._repo_mapping = _ParseRepoMapping(
250+
self._repo_mapping = _RepositoryMapping.create_from_file(
134251
strategy.RlocationChecked("_repo_mapping")
135252
)
136253

@@ -179,7 +296,7 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st
179296
if os.path.isabs(path):
180297
return path
181298

182-
if source_repo is None and self._repo_mapping:
299+
if source_repo is None and not self._repo_mapping.is_empty():
183300
# Look up runfiles using the repository mapping of the caller of the
184301
# current method. If the repo mapping is empty, determining this
185302
# name is not necessary.
@@ -188,7 +305,8 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st
188305
# Split off the first path component, which contains the repository
189306
# name (apparent or canonical).
190307
target_repo, _, remainder = path.partition("/")
191-
if not remainder or (source_repo, target_repo) not in self._repo_mapping:
308+
target_canonical = self._repo_mapping.lookup(source_repo, target_repo)
309+
if not remainder or target_canonical is None:
192310
# One of the following is the case:
193311
# - not using Bzlmod, so the repository mapping is empty and
194312
# apparent and canonical repository names are the same
@@ -202,11 +320,15 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st
202320
source_repo is not None
203321
), "BUG: if the `source_repo` is None, we should never go past the `if` statement above"
204322

205-
# target_repo is an apparent repository name. Look up the corresponding
206-
# canonical repository name with respect to the current repository,
207-
# identified by its canonical name.
208-
target_canonical = self._repo_mapping[(source_repo, target_repo)]
209-
return self._strategy.RlocationChecked(target_canonical + "/" + remainder)
323+
# Look up the target repository using the repository mapping
324+
if target_canonical is not None:
325+
return self._strategy.RlocationChecked(
326+
target_canonical + "/" + remainder
327+
)
328+
329+
# No mapping found - assume target_repo is already canonical or
330+
# we're not using Bzlmod
331+
return self._strategy.RlocationChecked(path)
210332

211333
def EnvVars(self) -> Dict[str, str]:
212334
"""Returns environment variables for subprocesses.
@@ -359,30 +481,6 @@ def _FindPythonRunfilesRoot() -> str:
359481
return root
360482

361483

362-
def _ParseRepoMapping(repo_mapping_path: Optional[str]) -> Dict[Tuple[str, str], str]:
363-
"""Parses the repository mapping manifest."""
364-
# If the repository mapping file can't be found, that is not an error: We
365-
# might be running without Bzlmod enabled or there may not be any runfiles.
366-
# In this case, just apply an empty repo mapping.
367-
if not repo_mapping_path:
368-
return {}
369-
try:
370-
with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f:
371-
content = f.read()
372-
except FileNotFoundError:
373-
return {}
374-
375-
repo_mapping = {}
376-
for line in content.split("\n"):
377-
if not line:
378-
# Empty line following the last line break
379-
break
380-
current_canonical, target_local, target_canonical = line.split(",")
381-
repo_mapping[(current_canonical, target_local)] = target_canonical
382-
383-
return repo_mapping
384-
385-
386484
def CreateManifestBased(manifest_path: str) -> Runfiles:
387485
return Runfiles.CreateManifestBased(manifest_path)
388486

tests/runfiles/runfiles_test.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from typing import Any, List, Optional
1919

2020
from python.runfiles import runfiles
21+
from python.runfiles.runfiles import _RepositoryMapping
2122

2223

2324
class RunfilesTest(unittest.TestCase):
@@ -525,6 +526,165 @@ def testDirectoryBasedRlocationWithRepoMappingFromOtherRepo(self) -> None:
525526
r.Rlocation("config.json", "protobuf~3.19.2"), dir + "/config.json"
526527
)
527528

529+
def testDirectoryBasedRlocationWithCompactRepoMappingFromMain(self) -> None:
530+
"""Test repository mapping with prefix-based entries (compact format)."""
531+
with _MockFile(
532+
name="_repo_mapping",
533+
contents=[
534+
# Exact mappings (no asterisk)
535+
"_,config.json,config.json~1.2.3",
536+
",my_module,_main",
537+
",my_workspace,_main",
538+
# Prefixed mappings (with asterisk) - these apply to any repo starting with the prefix
539+
"deps+*,external_dep,external_dep~1.0.0",
540+
"test_deps+*,test_lib,test_lib~2.1.0",
541+
],
542+
) as rm:
543+
dir = os.path.dirname(rm.Path())
544+
r = runfiles.CreateDirectoryBased(dir)
545+
546+
# Test exact mappings still work
547+
self.assertEqual(
548+
r.Rlocation("my_module/bar/runfile", ""), dir + "/_main/bar/runfile"
549+
)
550+
self.assertEqual(
551+
r.Rlocation("my_workspace/bar/runfile", ""), dir + "/_main/bar/runfile"
552+
)
553+
554+
# Test prefixed mappings - should match any repo starting with "deps+"
555+
self.assertEqual(
556+
r.Rlocation("external_dep/foo/file", "deps+dep1"),
557+
dir + "/external_dep~1.0.0/foo/file",
558+
)
559+
self.assertEqual(
560+
r.Rlocation("external_dep/bar/file", "deps+dep2"),
561+
dir + "/external_dep~1.0.0/bar/file",
562+
)
563+
self.assertEqual(
564+
r.Rlocation("external_dep/nested/path/file", "deps+some_long_dep_name"),
565+
dir + "/external_dep~1.0.0/nested/path/file",
566+
)
567+
568+
# Test that prefixed mappings work for test_deps+ prefix too
569+
self.assertEqual(
570+
r.Rlocation("test_lib/test/file", "test_deps+junit"),
571+
dir + "/test_lib~2.1.0/test/file",
572+
)
573+
574+
# Test that non-matching prefixes don't match
575+
self.assertEqual(
576+
r.Rlocation("external_dep/foo/file", "other_prefix"),
577+
dir + "/external_dep/foo/file", # No mapping applied, use as-is
578+
)
579+
580+
def testDirectoryBasedRlocationWithCompactRepoMappingPrecedence(self) -> None:
581+
"""Test that exact mappings take precedence over prefixed mappings."""
582+
with _MockFile(
583+
name="_repo_mapping",
584+
contents=[
585+
# Exact mapping for a specific source repo
586+
"deps+specific_repo,external_dep,external_dep~exact",
587+
# Prefixed mapping for repos starting with "deps+"
588+
"deps+*,external_dep,external_dep~prefix",
589+
# Another prefixed mapping with different prefix
590+
"other+*,external_dep,external_dep~other",
591+
],
592+
) as rm:
593+
dir = os.path.dirname(rm.Path())
594+
r = runfiles.CreateDirectoryBased(dir)
595+
596+
# Exact mapping should take precedence over prefix
597+
self.assertEqual(
598+
r.Rlocation("external_dep/foo/file", "deps+specific_repo"),
599+
dir + "/external_dep~exact/foo/file",
600+
)
601+
602+
# Other repos with deps+ prefix should use the prefixed mapping
603+
self.assertEqual(
604+
r.Rlocation("external_dep/foo/file", "deps+other_repo"),
605+
dir + "/external_dep~prefix/foo/file",
606+
)
607+
608+
# Different prefix should use its own mapping
609+
self.assertEqual(
610+
r.Rlocation("external_dep/foo/file", "other+some_repo"),
611+
dir + "/external_dep~other/foo/file",
612+
)
613+
614+
def testDirectoryBasedRlocationWithCompactRepoMappingOrderMatters(self) -> None:
615+
"""Test that order matters for prefixed mappings (first match wins)."""
616+
with _MockFile(
617+
name="_repo_mapping",
618+
contents=[
619+
# More specific prefix comes first
620+
"deps+specific+*,lib,lib~specific",
621+
# More general prefix comes second
622+
"deps+*,lib,lib~general",
623+
],
624+
) as rm:
625+
dir = os.path.dirname(rm.Path())
626+
r = runfiles.CreateDirectoryBased(dir)
627+
628+
# Should match the more specific prefix first
629+
self.assertEqual(
630+
r.Rlocation("lib/foo/file", "deps+specific+repo"),
631+
dir + "/lib~specific/foo/file",
632+
)
633+
634+
# Should match the general prefix for non-specific repos
635+
self.assertEqual(
636+
r.Rlocation("lib/foo/file", "deps+other_repo"),
637+
dir + "/lib~general/foo/file",
638+
)
639+
640+
def testRepositoryMappingLookup(self) -> None:
641+
"""Test _RepositoryMapping.lookup() method for both exact and prefix-based mappings."""
642+
exact_mappings = {
643+
("", "my_workspace"): "_main",
644+
("", "config_lib"): "config_lib~1.0.0",
645+
("deps+specific_repo", "external_dep"): "external_dep~exact",
646+
}
647+
prefixed_mappings = {
648+
("deps+", "external_dep"): "external_dep~prefix",
649+
("test_deps+", "test_lib"): "test_lib~2.1.0",
650+
}
651+
652+
repo_mapping = _RepositoryMapping(exact_mappings, prefixed_mappings)
653+
654+
# Test exact lookups
655+
self.assertEqual(repo_mapping.lookup("", "my_workspace"), "_main")
656+
self.assertEqual(repo_mapping.lookup("", "config_lib"), "config_lib~1.0.0")
657+
self.assertEqual(
658+
repo_mapping.lookup("deps+specific_repo", "external_dep"),
659+
"external_dep~exact",
660+
)
661+
662+
# Test prefix-based lookups
663+
self.assertEqual(
664+
repo_mapping.lookup("deps+some_repo", "external_dep"), "external_dep~prefix"
665+
)
666+
self.assertEqual(
667+
repo_mapping.lookup("test_deps+another_repo", "test_lib"), "test_lib~2.1.0"
668+
)
669+
670+
# Test that exact takes precedence over prefix
671+
self.assertEqual(
672+
repo_mapping.lookup("deps+specific_repo", "external_dep"),
673+
"external_dep~exact",
674+
)
675+
676+
# Test non-existent mapping
677+
self.assertIsNone(repo_mapping.lookup("nonexistent", "repo"))
678+
self.assertIsNone(repo_mapping.lookup("unknown+repo", "missing"))
679+
680+
# Test empty mapping
681+
empty_mapping = _RepositoryMapping({}, {})
682+
self.assertIsNone(empty_mapping.lookup("any", "repo"))
683+
684+
# Test is_empty() method
685+
self.assertFalse(repo_mapping.is_empty()) # Should have mappings
686+
self.assertTrue(empty_mapping.is_empty()) # Should be empty
687+
528688
def testCurrentRepository(self) -> None:
529689
# Under bzlmod, the current repository name is the empty string instead
530690
# of the name in the workspace file.

0 commit comments

Comments
 (0)