diff --git a/docs/source/conf.py b/docs/source/conf.py index 136fdb38..b3f747be 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -79,3 +79,27 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = "nvidia_sphinx_theme" +html_theme_options = { + "switcher": { + "json_url": "../versions1.json", + "version_match": release, + }, + "extra_head": { + """ + + """ + }, + "extra_footer": { + """ + + """ + }, + "icon_links": [ + { + "name": "GitHub", + "url": "https://github.com/NVIDIA-NeMo/Run", + "icon": "fa-brands fa-github", + } + ], +} +html_extra_path = ["project.json", "versions1.json"] diff --git a/docs/source/guides/index.md b/docs/source/guides/index.md new file mode 100644 index 00000000..912d7ce8 --- /dev/null +++ b/docs/source/guides/index.md @@ -0,0 +1,42 @@ +Guides +================= + +```{toctree} +:maxdepth: 2 +:hidden: + +why-use-nemo-run +configuration +execution +management +ray +cli +``` + +Welcome to the NeMo-Run guides! This section provides comprehensive documentation on how to use NeMo-Run effectively for your machine learning experiments. + +## Get Started + +If you're new to NeMo-Run, we recommend starting with: + +- **[Why Use NeMo-Run?](why-use-nemo-run.md)** - Understand the benefits and philosophy behind NeMo-Run. +- **[Configuration](configuration.md)** - Learn how to configure your ML tasks and experiments. +- **[Execution](execution.md)** - Discover how to run your experiments across different computing environments. +- **[Management](management.md)** - Master experiment tracking, reproducibility, and organization. + +## Advanced Topics + +For more advanced usage: + +- **[Ray Integration](ray.md)** - Learn how to use NeMo-Run with Ray for distributed computing. +- **[CLI Reference](cli.md)** - Explore the command-line interface for NeMo-Run. + +## Core Concepts + +NeMo-Run is built around three core responsibilities: + +1. **Configuration** - Define your ML experiments using a flexible, Pythonic configuration system. +2. **Execution** - Run your experiments seamlessly across local machines, Slurm clusters, cloud providers, and more. +3. **Management** - Track, reproduce, and organize your experiments with built-in experiment management. + +Each guide dives deep into these concepts with practical examples and best practices. Choose a guide above to get started! diff --git a/docs/source/guides/index.rst b/docs/source/guides/index.rst deleted file mode 100644 index f2dacf22..00000000 --- a/docs/source/guides/index.rst +++ /dev/null @@ -1,12 +0,0 @@ -Guides -================= - -.. toctree:: - :maxdepth: 2 - - configuration - execution - management - why-use-nemo-run - ray - cli diff --git a/docs/source/guides/management.md b/docs/source/guides/management.md index 5bccbb79..a2e2c147 100644 --- a/docs/source/guides/management.md +++ b/docs/source/guides/management.md @@ -1,8 +1,8 @@ -# Management +# Manage NeMo-Run -The central component for management of tasks in NeMo-Run is the `Experiment` class. It allows you to define, launch, and manage complex workflows consisting of multiple tasks. This guide provides an overview of the `Experiment` class, its methods, and how to use it effectively. +The central component for the management of tasks in NeMo-Run is the `Experiment` class. It allows you to define, launch, and manage complex workflows consisting of multiple tasks. This guide provides an overview of the `Experiment` class, its methods, and how to use it effectively. -## **Creating an Experiment** +## Create an Experiment To create an experiment, you can instantiate the `Experiment` class by passing in a descriptive title: @@ -14,11 +14,11 @@ When executed, it will automatically generate a unique experiment ID for you, wh > [!NOTE] > `Experiment` is a context manager and `Experiment.add` and `Experiment.run` methods can currently only be used after entering the context manager. -## **Adding Tasks** +## Add Tasks You can add tasks to an experiment using the `add` method. This method supports tasks of the following kind: -- A single task which is an instance of either `run.Partial` or `run.Script`, along with its executor. +- A single task, which is an instance of either `run.Partial` or `run.Script`, along with its executor. ```python with exp: @@ -50,7 +50,7 @@ with run.Experiment("dag-experiment", log_level="INFO") as exp: ) ``` -## **Launching an Experiment** +## Launch an Experiment Once you have added all tasks to an experiment, you can launch it using the `run` method. This method takes several optional arguments, including `detach`, `sequential`, and `tail_logs` and `direct`: @@ -65,7 +65,7 @@ with exp: exp.run(detach=True, sequential=False, tail_logs=True, direct=False) ``` -## **Experiment Status** +## Check Experiment Status You can check the status of an experiment using the `status` method: @@ -73,7 +73,7 @@ You can check the status of an experiment using the `status` method: exp.status() ``` -This method will display information the status of each task in the experiment. The following is a sample output from the status of experiment in [hello_scripts.py](../../../examples/hello-world/hello_scripts.py): +This method will display information about the status of each task in the experiment. The following is a sample output from the status of experiment in [hello_scripts.py](../../../examples/hello-world/hello_scripts.py): ```bash Experiment Status for experiment_with_scripts_1730761155 @@ -97,7 +97,7 @@ Task 2: simple.add.add_object - Local Directory: /home/your_user/.nemo_run/experiments/experiment_with_scripts/experiment_with_scripts_1730761155/simple.add.add_object ``` -## **Canceling a Task** +## Cancel a Task You can cancel a task using the `cancel` method: @@ -105,7 +105,7 @@ You can cancel a task using the `cancel` method: exp.cancel("task_id") ``` -## **Viewing Logs** +## View Logs You can view the logs of a task using the `logs` method: @@ -113,7 +113,7 @@ You can view the logs of a task using the `logs` method: exp.logs("task_id") ``` -## **Experiment output** +## Review Experiment Output Once an experiment is run, NeMo-Run displays information on ways to inspect and reproduce past experiments. This allows you to check logs, sync artifacts (in the future), cancel running tasks, and rerun an old experiment. diff --git a/docs/source/index.rst b/docs/source/index.rst index 6692d020..714e7823 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,7 +3,7 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -NeMo-Run documentation +NeMo-Run Documentation ====================== NeMo-Run is a powerful tool designed to streamline the configuration, execution and management of Machine Learning experiments across various computing environments. NeMo Run has three core responsibilities: @@ -18,23 +18,25 @@ This is also the typical order Nemo Run users will follow to setup and launch ex .. toctree:: :maxdepth: 1 - guides/index.rst + guides/index API Reference faqs -Installation ------------- +Install the Project +------------------- To install the project, use the following command: ``pip install git+https://github.com/NVIDIA-NeMo/Run.git`` -To install Skypilot, we have optional features available. +To install Skypilot with optional features, use one of the following commands: -``pip install git+https://github.com/NVIDIA-NeMo/Run.git[skypilot]`` -will install Skypilot w Kubernetes +- To install Skypilot with Kubernetes support: -``pip install git+https://github.com/NVIDIA-NeMo/Run.git[skypilot-all]`` -will install Skypilot w all clouds + ``pip install git+https://github.com/NVIDIA-NeMo/Run.git[skypilot]`` + +- To install Skypilot with support for all cloud platforms: + + ``pip install git+https://github.com/NVIDIA-NeMo/Run.git[skypilot-all]`` You can also manually install Skypilot from https://skypilot.readthedocs.io/en/latest/getting-started/installation.html @@ -50,7 +52,7 @@ Make sure you have `pip` installed and configured properly. Tutorials --------- -The ``hello_world`` tutorial series provides a comprehensive introduction to NeMo Run, demonstrating its capabilities through a simple example. The tutorial covers: +The ``hello_world`` tutorial series provides a comprehensive introduction to NeMo-Run, demonstrating its capabilities through a simple example. The tutorial covers: - Configuring Python functions using ``Partial`` and ``Config`` classes. - Executing configured functions locally and on remote clusters. @@ -59,6 +61,6 @@ The ``hello_world`` tutorial series provides a comprehensive introduction to NeM You can find the tutorial series below: -1. `Part 1 <../../../NeMo-Run/examples/hello-world/hello_world.ipynb>` -2. `Part 2 <../../../NeMo-Run/examples/hello-world/hello_experiments.ipynb>` -3. `Part 3 <../../../NeMo-Run/examples/hello-world/hello_scripts.py>` +1. `Part 1: Hello World <../../../examples/hello-world/hello_world.ipynb>`_ +2. `Part 2: Hello Experiments <../../../examples/hello-world/hello_experiments.ipynb>`_ +3. `Part 3: Hello Scripts <../../../examples/hello-world/hello_scripts.py>`_ diff --git a/docs/source/project.json b/docs/source/project.json new file mode 100644 index 00000000..b14a96fb --- /dev/null +++ b/docs/source/project.json @@ -0,0 +1 @@ +{"name": "NeMo-Run", "version": "0.1.0"} diff --git a/docs/source/versions1.json b/docs/source/versions1.json new file mode 100644 index 00000000..604af762 --- /dev/null +++ b/docs/source/versions1.json @@ -0,0 +1,7 @@ +[ + { + "preferred": true, + "version": "0.1.0", + "url": "../0.1.0" + } +]