Skip to content

Commit 76bb0c0

Browse files
aivanoufacebook-github-bot
authored andcommitted
Refactor components discovery logic (#88)
Summary: Pull Request resolved: #88 Move component search registration into a separate module and share it across runner and torch_cli Introduce new format for ``[torchx.components]`` : foo = my_project.some_module.bar Where ``my_project.some_module.bar`` is a python module. The components will be recursively found in the dir for ``my_project.some_module.bar.__file__`` file. Reviewed By: kiukchung Differential Revision: D29202878 fbshipit-source-id: af07a9bf4d3cf6038fc6d0f1c704c025176a2da9
1 parent 34219b5 commit 76bb0c0

File tree

8 files changed

+446
-150
lines changed

8 files changed

+446
-150
lines changed

docs/source/configure.rst

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,36 @@ when they run
107107
108108
$ torchx builtins
109109
110-
Custom components can be registered via the following endpoint:
110+
Custom components can be registered via the following modification of the ``entry_points.txt``:
111111

112112
::
113113

114114
[torchx.components]
115-
custom_component = my_module.components:my_component
115+
foo = my_project.bar
116116

117+
The line above registers a group ``foo`` that is associated with the module ``my_project.bar``.
118+
Torchx will recursively traverse lowest level dir associated with the ``my_project.bar`` and will find
119+
all defined components.
117120

118-
Custom components can be executed in the following manner:
121+
.. note:: If there are two registry entries, e.g. ``foo = my_project.bar`` and ``test = my_project``
122+
there will be two sets of overlapping components with different aliases.
123+
124+
125+
After registration, torchx cli will display registered components via:
126+
127+
.. code-block:: shell-session
128+
129+
$ torchx builtins
130+
131+
If ``my_project.bar`` had the following directory structure:
132+
133+
::
134+
135+
$PROJECT_ROOT/my_project/bar/
136+
|- baz.py
137+
138+
And `baz.py` defines a component (function) called `trainer`. Then the component can be run as a job in the following manner:
119139

120140
.. code-block:: shell-session
121141
122-
$ torchx run --scheduler local --scheduler_args image_fetcher=...,root_dir=/tmp custom_component -- --name "test app"
142+
$ torchx run foo.baz.trainer -- --name "test app"

torchx/cli/cmd_run.py

Lines changed: 22 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,15 @@
55
# LICENSE file in the root directory of this source tree.
66

77
import argparse
8-
import glob
9-
import importlib
10-
import os
11-
from dataclasses import dataclass
12-
from inspect import getmembers, isfunction
13-
from typing import Dict, List, Optional, Union
8+
from dataclasses import asdict
9+
from pprint import pformat
10+
from typing import Dict, List, Union, cast
1411

1512
import torchx.specs as specs
1613
from pyre_extensions import none_throws
1714
from torchx.cli.cmd_base import SubCommand
1815
from torchx.runner import get_runner
19-
from torchx.specs.file_linter import get_fn_docstring, validate
20-
from torchx.util import entrypoints
21-
from torchx.util.io import COMPONENTS_DIR, get_abspath, read_conf_file
16+
from torchx.specs.finder import get_components, _Component
2217
from torchx.util.types import to_dict
2318

2419

@@ -38,96 +33,19 @@ def _parse_run_config(arg: str) -> specs.RunConfig:
3833
return conf
3934

4035

41-
def _to_module(filepath: str) -> str:
42-
path, _ = os.path.splitext(filepath)
43-
return path.replace(os.path.sep, ".")
44-
45-
46-
def _get_builtin_description(filepath: str, function_name: str) -> Optional[str]:
47-
source = read_conf_file(filepath)
48-
if len(validate(source, torchx_function=function_name)) != 0:
49-
return None
50-
51-
func_definition, _ = none_throws(get_fn_docstring(source, function_name))
52-
return func_definition
53-
54-
55-
@dataclass
56-
class BuiltinComponent:
57-
definition: str
58-
description: str
59-
60-
61-
def _get_component_definition(module: str, function_name: str) -> str:
62-
if module.startswith("torchx.components"):
63-
module = module.split("torchx.components.")[1]
64-
return f"{module}.{function_name}"
65-
66-
67-
def _to_relative(filepath: str) -> str:
68-
if os.path.isabs(filepath):
69-
# make path torchx/components/$suffix out of the abs
70-
rel_path = filepath.split(str(COMPONENTS_DIR))[1]
71-
return f"{str(COMPONENTS_DIR)}{rel_path}"
72-
else:
73-
return os.path.join(str(COMPONENTS_DIR), filepath)
74-
75-
76-
def _get_components_from_file(filepath: str) -> List[BuiltinComponent]:
77-
components_path = _to_relative(filepath)
78-
components_module_path = _to_module(components_path)
79-
module = importlib.import_module(components_module_path)
80-
functions = getmembers(module, isfunction)
81-
buitin_functions = []
82-
for function_name, _ in functions:
83-
# Ignore private functions.
84-
if function_name.startswith("_"):
85-
continue
86-
component_desc = _get_builtin_description(filepath, function_name)
87-
if component_desc:
88-
definition = _get_component_definition(
89-
components_module_path, function_name
90-
)
91-
builtin_component = BuiltinComponent(
92-
definition=definition,
93-
description=component_desc,
94-
)
95-
buitin_functions.append(builtin_component)
96-
return buitin_functions
97-
98-
99-
def _allowed_path(path: str) -> bool:
100-
filename = os.path.basename(path)
101-
if filename.startswith("_"):
102-
return False
103-
return True
104-
105-
106-
def _builtins() -> List[BuiltinComponent]:
107-
components_dir = entrypoints.load(
108-
"torchx.file", "get_dir_path", default=get_abspath
109-
)(COMPONENTS_DIR)
110-
111-
builtins: List[BuiltinComponent] = []
112-
search_pattern = os.path.join(components_dir, "**", "*.py")
113-
for filepath in glob.glob(search_pattern, recursive=True):
114-
if not _allowed_path(filepath):
115-
continue
116-
components = _get_components_from_file(filepath)
117-
builtins += components
118-
return builtins
119-
120-
12136
class CmdBuiltins(SubCommand):
12237
def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
123-
pass # no arguments
38+
pass
39+
40+
def _builtins(self) -> Dict[str, _Component]:
41+
return get_components()
12442

12543
def run(self, args: argparse.Namespace) -> None:
126-
builtin_configs = _builtins()
127-
num_builtins = len(builtin_configs)
44+
builtin_components = self._builtins()
45+
num_builtins = len(builtin_components)
12846
print(f"Found {num_builtins} builtin configs:")
129-
for i, component in enumerate(builtin_configs):
130-
print(f" {i + 1:2d}. {component.definition} - {component.description}")
47+
for i, component in enumerate(builtin_components.values()):
48+
print(f" {i + 1:2d}. {component.name}")
13149

13250

13351
class CmdRun(SubCommand):
@@ -172,15 +90,23 @@ def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
17290
def run(self, args: argparse.Namespace) -> None:
17391
# TODO: T91790598 - remove the if condition when all apps are migrated to pure python
17492
runner = get_runner()
175-
app_handle = runner.run_from_path(
93+
result = runner.run_component(
17694
args.conf_file,
17795
args.conf_args,
17896
args.scheduler,
17997
args.scheduler_args,
18098
dryrun=args.dryrun,
18199
)
182100

183-
if not args.dryrun:
101+
if args.dryrun:
102+
app_dryrun_info = cast(specs.AppDryRunInfo, result)
103+
print("=== APPLICATION ===")
104+
print(pformat(asdict(app_dryrun_info._app), indent=2, width=80))
105+
106+
print("=== SCHEDULER REQUEST ===")
107+
print(app_dryrun_info)
108+
else:
109+
app_handle = cast(specs.AppHandle, result)
184110
if args.scheduler == "local":
185111
runner.wait(app_handle)
186112
else:

torchx/cli/test/cmd_run_test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from typing import Generator
1616
from unittest.mock import MagicMock, patch
1717

18-
from torchx.cli.cmd_run import CmdBuiltins, CmdRun, _builtins, _parse_run_config
18+
from torchx.cli.cmd_run import CmdBuiltins, CmdRun, _parse_run_config
1919

2020

2121
@contextmanager
@@ -109,7 +109,8 @@ def test_run(self) -> None:
109109
cmd_builtins.run(args)
110110

111111
def test_builtins(self) -> None:
112-
builtins = _builtins()
112+
cmd_builtins = CmdBuiltins()
113+
builtins = cmd_builtins._builtins()
113114
# make sure there's at least one
114115
# there will always be one (example.torchx)
115116
self.assertTrue(len(builtins) > 0)

torchx/components/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
2121
# using via sdk
2222
from torchx.runner import get_runner
23-
get_runner().run_from_path("distributed.ddp", app_args=[], scheduler="local", ...)
23+
get_runner().run_component("distributed.ddp", app_args=[], scheduler="local", ...)
2424
2525
# using via torchx-cli
2626

torchx/runner/api.py

Lines changed: 40 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,11 @@
66
# LICENSE file in the root directory of this source tree.
77

88
import getpass
9-
import importlib
109
import json
10+
import logging
1111
import time
12-
from dataclasses import asdict
1312
from datetime import datetime
14-
from pprint import pformat
15-
from typing import Any, Dict, Iterable, List, Optional, Tuple
13+
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
1614

1715
from pyre_extensions import none_throws
1816
from torchx.runner.events import log_event
@@ -28,16 +26,22 @@
2826
UnknownAppException,
2927
from_file,
3028
from_function,
31-
from_module,
3229
make_app_handle,
3330
parse_app_handle,
3431
runopts,
3532
)
36-
from torchx.util import entrypoints
33+
from torchx.specs.finder import get_component
34+
35+
logger: logging.Logger = logging.getLogger(__name__)
3736

3837

3938
NONE: str = "<NONE>"
4039

40+
# Unique identifier of the component, can be either
41+
# component name, e.g.: utils.echo or path to component function:
42+
# /tmp/foobar.py:component
43+
ComponentId = str
44+
4145

4246
class Runner:
4347
"""
@@ -65,81 +69,75 @@ def __init__(
6569
self._wait_interval = wait_interval
6670
self._apps: Dict[AppHandle, AppDef] = {}
6771

68-
def run_from_path(
72+
def run_component(
6973
self,
70-
component_path: str,
74+
component_name: ComponentId,
7175
app_args: List[str],
7276
scheduler: SchedulerBackend = "default",
7377
cfg: Optional[RunConfig] = None,
7478
dryrun: bool = False,
75-
) -> AppHandle:
79+
# pyre-ignore[24]: Allow generic AppDryRunInfo
80+
) -> Union[AppHandle, AppDryRunInfo]:
7681
"""Resolves and runs the application in the specified mode.
7782
78-
Retrieves application based on ``component_path`` and runs it in the specified mode.
83+
Retrieves application based on ``component_name`` and runs it in the specified mode.
7984
80-
The ``component_path`` has the following resolution order(from high-pri to low-pri):
85+
The ``component_name`` has the following resolution order(from high-pri to low-pri):
8186
* User-registered components. Users can register components via
8287
https://packaging.python.org/specifications/entry-points/. Method looks for
8388
entrypoints in the group ``torchx.components``.
84-
* File-based components in format: ``$FILE_PATH:FUNCTION_NAME``. Both relative and
85-
absolute paths supported.
8689
* Builtin components relative to `torchx.components`. The path to the component should
8790
be module name relative to `torchx.components` and function name in a format:
8891
``$module.$function``.
92+
* File-based components in format: ``$FILE_PATH:FUNCTION_NAME``. Both relative and
93+
absolute paths supported.
8994
9095
Usage:
9196
9297
::
9398
94-
runner.run_from_path("distributed.ddp", ...) - will be resolved to
99+
runner.run_component("distributed.ddp", ...) - will be resolved to
95100
``torchx.components.distributed`` module and ``ddp`` function.
96101
97102
98-
runner.run_from_path("~/home/components.py:my_component", ...) - will be resolved to
103+
runner.run_component("~/home/components.py:my_component", ...) - will be resolved to
99104
``~/home/components.py`` file and ``my_component`` function.
100105
106+
Args:
107+
component_name: The name of the component to lookup
108+
app_args: Arguments of the component function
109+
scheduler: Scheduler to execute component
110+
cfg: Scheduler run configuration
111+
dryrun: If True, the will return ``torchx.specs.AppDryRunInfo``
112+
101113
102114
Returns:
103115
An application handle that is used to call other action APIs on the app, or ``<NONE>``
104116
if it dryrun specified.
105117
106118
Raises:
107119
AppNotReRunnableException: if the session/scheduler does not support re-running attached apps
108-
ValueError: if the ``component_path`` is failed to resolve.
120+
ValueError: if the ``component_name`` is failed to resolve.
109121
"""
110-
app_fn = entrypoints.load("torchx.components", component_path, default=NONE)
111-
if app_fn != NONE:
112-
app = from_function(app_fn, app_args)
113-
elif ":" in component_path:
114-
file_path, function_name = component_path.split(":")
122+
component_def = get_component(component_name)
123+
if component_def:
124+
app = from_function(component_def.fn, app_args)
125+
elif ":" in component_name:
126+
file_path, function_name = component_name.split(":")
115127
if not function_name:
116128
raise ValueError(
117-
f"Incorrect path: {component_path}. Missing function name after `:`"
129+
f"Incorrect path: {component_name}. Missing function name after `:`"
118130
)
119131
app = from_file(file_path, function_name, app_args)
120132
else:
121-
function_name = component_path.split(".")[-1]
122-
component_module = component_path[
123-
0 : len(component_path) - len(function_name) - 1
124-
]
125-
full_module = f"torchx.components.{component_module}"
126-
try:
127-
app_module = importlib.import_module(full_module)
128-
except ModuleNotFoundError:
129-
raise ValueError(
130-
f"Incorrect component path: {component_path}. Valid values: \n"
131-
"* $file_path:function_name, Example: ~/home/file.py:my_component \n"
132-
"* Module and function name to the builtin components, e.g. : distributed.ddp"
133-
)
134-
app = from_module(app_module, function_name, app_args)
133+
raise ValueError(
134+
f"Component `{component_name}` not found. Please make sure it is one of the "
135+
"builtins: `torchx builtins`. Or registered via `[torchx.components]` "
136+
"entry point (see: https://pytorch.org/torchx/latest/configure.html)"
137+
)
135138

136139
if dryrun:
137-
print("=== APPLICATION ===")
138-
print(pformat(asdict(app), indent=2, width=80))
139-
140-
print("=== SCHEDULER REQUEST ===")
141-
print(self.dryrun(app, scheduler, cfg))
142-
return "<NONE>"
140+
return self.dryrun(app, scheduler, cfg)
143141
else:
144142
return self.run(app, scheduler, cfg)
145143

0 commit comments

Comments
 (0)