apoorvkh
diff --git a/‎README.md
Lines changed: 8 additions & 8 deletions b/‎README.md
Lines changed: 8 additions & 8 deletions
diff --git a/‎docs/conf.py
Lines changed: 2 additions & 0 deletions b/‎docs/conf.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/api.md
Lines changed: 1 addition & 3 deletions b/‎docs/source/api.md
Lines changed: 1 addition & 3 deletions
diff --git a/‎docs/source/examples/accelerate.md
Lines changed: 1 addition & 1 deletion b/‎docs/source/examples/accelerate.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/examples/deepspeed.md
Lines changed: 2 additions & 2 deletions b/‎docs/source/examples/deepspeed.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/examples/lightning.md
Lines changed: 1 addition & 1 deletion b/‎docs/source/examples/lightning.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/examples/accelerate_example.py renamed to ‎docs/source/examples/scripts/accelerate_example.py b/‎docs/source/examples/accelerate_example.py renamed to ‎docs/source/examples/scripts/accelerate_example.py
diff --git a/‎docs/source/examples/deepspeed_config.json renamed to ‎docs/source/examples/scripts/deepspeed_config.json b/‎docs/source/examples/deepspeed_config.json renamed to ‎docs/source/examples/scripts/deepspeed_config.json
diff --git a/‎docs/source/examples/deepspeed_example.py renamed to ‎docs/source/examples/scripts/deepspeed_example.py b/‎docs/source/examples/deepspeed_example.py renamed to ‎docs/source/examples/scripts/deepspeed_example.py
diff --git a/‎docs/source/examples/transformers_example.py renamed to ‎docs/source/examples/scripts/transformers_train.py
Lines changed: 11 additions & 7 deletions b/‎docs/source/examples/transformers_example.py renamed to ‎docs/source/examples/scripts/transformers_train.py
Lines changed: 11 additions & 7 deletions
diff --git a/‎docs/source/examples/transformers.md
Lines changed: 38 additions & 4 deletions b/‎docs/source/examples/transformers.md
Lines changed: 38 additions & 4 deletions
@@ -73,14 +73,14 @@ trained_model: nn.Module = results.rank(0)
 torch.save(trained_model.state_dict(), "output/model.pth")
 ```
 
-**See [training GPT-2 on WikiText](https://torchrunx.readthedocs.io/stable/examples.html#training-gpt-2-on-wikitext) for more examples using the following deep learning libraries:**
-  - Accelerate
-  - HF Transformers
-  - DeepSpeed
-  - PyTorch Lightning
-  - MosaicML Composer
-
-**Refer to our [API](https://torchrunx.readthedocs.io/stable/api.html) and [Advanced Usage Guide](https://torchrunx.readthedocs.io/stable/advanced.html) for many more capabilities!**
+**See examples where we fine-tune LLMs (e.g. GPT-2 on WikiText) using:**
+  - [Accelerate](https://torchrun.xyz/examples/accelerate.html)
+  - [HF Transformers](https://torchrun.xyz/examples/transformers.html)
+  - [DeepSpeed](https://torchrun.xyz/examples/deepspeed.html)
+  - [PyTorch Lightning](https://torchrun.xyz/examples/lightning.html)
+  - [MosaicML Composer](https://torchrun.xyz/examples/composer.html)
+
+**Refer to our [API](https://torchrun.xyz/api.html) and [Advanced Usage Guide](https://torchrun.xyz/advanced.html) for many more capabilities!**
 
 ---
 
 
@@ -6,6 +6,8 @@
 html_theme = "furo"
 language = "en"
 
+html_extra_path = ["source/examples/scripts"]
+
 extensions = [
     "autodoc2",
     "myst_parser",  # support markdown
 
@@ -1,9 +1,7 @@
 # API
 
-## Launching functions
-
 ```{eval-rst}
-.. autofunction:: torchrunx.launch(func: Callable, ...)
+.. autofunction:: torchrunx.launch
 ```
 
 We provide the {mod}`torchrunx.Launcher` class as an alias to {mod}`torchrunx.launch`.
 
@@ -1,5 +1,5 @@
 # Accelerate
 
 ```{eval-rst}
-.. literalinclude:: ./accelerate_example.py
+.. literalinclude:: ./scripts/accelerate_example.py
 ```
@@ -1,6 +1,6 @@
 # DeepSpeed
 
 ```{eval-rst}
-.. literalinclude:: ./deepspeed_example.py
-.. literalinclude:: ./deepspeed_config.json
+.. literalinclude:: ./scripts/deepspeed_example.py
+.. literalinclude:: ./scripts/deepspeed_config.json
 ```
@@ -1,5 +1,5 @@
 # Pytorch Lightning
 
 ```{eval-rst}
-.. literalinclude:: ./lightning_example.py
+.. literalinclude:: ./scripts/lightning_example.py
 ```
@@ -9,6 +9,8 @@
 # ]
 # ///
 
+# [docs:start-after]
+import functools
 import os
 from typing import Annotated
 
@@ -25,7 +27,7 @@
 import torchrunx
 
 
-def build_model(name: str = "gpt2") -> PreTrainedModel:
+def build_model(name: str) -> PreTrainedModel:
     return AutoModelForCausalLM.from_pretrained(name)
 
 
@@ -41,6 +43,12 @@ def load_training_data(
     tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
+    tokenize_fn = functools.partial(
+        tokenizer,
+        max_length=tokenizer.model_max_length,
+        truncation=True,
+        padding="max_length",
+    )
 
     dataset = load_dataset(path, name=name, split=split)
 
@@ -50,12 +58,7 @@ def load_training_data(
     return (
         dataset.select(range(num_samples))
         .map(
-            lambda x: tokenizer(
-                x[text_column_name],
-                max_length=tokenizer.model_max_length,
-                truncation=True,
-                padding="max_length",
-            ),
+            tokenize_fn,
             batched=True,
             input_columns=[text_column_name],
             remove_columns=[text_column_name],
@@ -74,6 +77,7 @@ def train(
     )
     trainer.train()
 
+    # TODO: return checkpoint path
     if int(os.environ["RANK"]) == 0:
         return model
 
 
@@ -1,11 +1,45 @@
 # Transformers
 
+Here's an example script that uses `torchrunx` with [`transformers.Trainer`](https://huggingface.co/docs/transformers/en/main_classes/trainer) to fine-tune any causal language model (from `transformers`) on any text dataset (from `datasets`) with any number of GPUs or nodes: [https://torchrun.xyz/transformers_train.py](https://torchrun.xyz/transformers_train.py).
+
+You can pass command-line arguments to customize:
+  - `--launcher`: [torchrunx.Launcher](../api.md#torchrunx.Launcher)
+  - `--model`: [`transformers.AutoModelForCausalLM`](https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoModelForCausalLM)
+  - `--dataset`: [`transformers.AutoTokenizer`](https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoTokenizer) and [`datasets.load_dataset`](https://huggingface.co/docs/datasets/en/package_reference/loading_methods#datasets.load_dataset)
+  - `--trainer`: [`transformers.TrainingArguments`](https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.TrainingArguments)
+
+The following arguments are required: `--model.name`, `--dataset.tokenizer-name`, `--dataset.path`, `--trainer.output-dir`.
+
+<details>
+  <summary><p style="display: inline-block;"><code class="docutils literal notranslate"><span class="pre">python transformers_train.py --help</span></code></p> (expand)</summary>
+
+  ```{eval-rst}
+  .. literalinclude:: ./transformers_help.txt
+  ```
+</details>
+
+Of course, this script is a template: you can also edit the script first, as desired.
+
+### Training GPT-2 on WikiText in One Line
+
+The following one-line command runs our script end-to-end (installing all dependencies, downloading model and data, training, logging to TensorBoard, etc.).
+
+Pre-requisites: [uv](https://docs.astral.sh/uv)
+
 ```bash
-uv run torchrun.xyz/torchrunx_transformers.py \
-      --launcher.hostnames localhost --launcher.workers-per-host 2 \
-      --args.output_dir output --args.per-device-train-batch-size 4 --args.report-to tensorboard
+uv run https://torchrun.xyz/transformers_train.py \
+      --model.name gpt2 --dataset.tokenizer-name gpt2 \
+      --dataset.path "Salesforce/wikitext" --dataset.name "wikitext-2-v1" --dataset.split "train" --dataset.num-samples 80 \
+      --trainer.output_dir output --trainer.per-device-train-batch-size 4 --trainer.report-to tensorboard
 ```
 
+We don't need to pass `--launcher` arguments by default. But if you want to do multi-node training (and are not using SLURM), you can also pass e.g. `--launcher.hostnames node1 node2`.
+
+### Script
+
+[The [raw source code](https://torchrun.xyz/transformers_train.py) also specifies dependencies at the top of the file — in [PEP 723](https://peps.python.org/pep-0723) format — e.g. for `uv` as above.]
+
 ```{eval-rst}
-.. literalinclude:: ./transformers_example.py
+.. literalinclude:: ./scripts/transformers_train.py
+   :start-after: # [docs:start-after]
 ```