Skip to content

Commit cac9dd4

Browse files
authored
Replaced torch.distributed.launch with torchrun (#206)
* replaced torch.distributed.launch with torchrun * fixed code for torch.distributed.launch * changed torch.distributed.run to torchrun * reverted some unnecessary changes * reverted some more changes * updated scripts/run_tests.sh
1 parent ec93e2d commit cac9dd4

File tree

6 files changed

+42
-42
lines changed

6 files changed

+42
-42
lines changed

scripts/run_tests.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ run_launch() {
4949
for dir in $(find ./dist-tests/$1-launch -type d)
5050
do
5151
cd $dir
52-
python -m torch.distributed.launch \
53-
--nproc_per_node 2 --use_env \
52+
torchrun \
53+
--nproc_per_node 2 \
5454
main.py --backend gloo --data_path ~/data \
5555
--train_batch_size 2 \
5656
--eval_batch_size 2 \

src/templates/template-common/README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,42 +3,42 @@
33
#::: if (it.nproc_per_node) { :::#
44
#::: if (it.nnodes > 1 && it.master_addr && it.master_port) { :::#
55

6-
### Multi Node, Multi GPU Training (`torch.distributed.launch`) (recommended)
6+
### Multi Node, Multi GPU Training (`torchrun`) (recommended)
77

88
- Execute on master node
99

1010
```sh
11-
python -m torch.distributed.launch \
11+
torchrun \
1212
--nproc_per_node #:::= it.nproc_per_node :::# \
1313
--nnodes #:::= it.nnodes :::# \
1414
--node_rank 0 \
1515
--master_addr #:::= it.master_addr :::# \
1616
--master_port #:::= it.master_port :::# \
17-
--use_env main.py \
17+
main.py \
1818
--backend #:::= it.backend :::#
1919
```
2020

2121
- Execute on worker nodes
2222

2323
```sh
24-
python -m torch.distributed.launch \
24+
torchrun \
2525
--nproc_per_node #:::= it.nproc_per_node :::# \
2626
--nnodes #:::= it.nnodes :::# \
2727
--node_rank <node_rank> \
2828
--master_addr #:::= it.master_addr :::# \
2929
--master_port #:::= it.master_port :::# \
30-
--use_env main.py \
30+
main.py \
3131
--backend #:::= it.backend :::#
3232
```
3333

3434
#::: } else { :::#
3535

36-
### Multi GPU Training (`torch.distributed.launch`) (recommended)
36+
### Multi GPU Training (`torchrun`) (recommended)
3737

3838
```sh
39-
python -m torch.distributed.launch \
39+
torchrun \
4040
--nproc_per_node #:::= it.nproc_per_node :::# \
41-
--use_env main.py \
41+
main.py \
4242
--backend #:::= it.backend :::#
4343
```
4444

src/templates/template-text-classification/README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,42 +19,42 @@ pip install -r requirements.txt --progress-bar off -U
1919
#::: if (it.nproc_per_node) { :::#
2020
#::: if (it.nnodes > 1 && it.master_addr && it.master_port) { :::#
2121

22-
### Multi Node, Multi GPU Training (`torch.distributed.launch`) (recommended)
22+
### Multi Node, Multi GPU Training (`torchrun`) (recommended)
2323

2424
- Execute on master node
2525

2626
```sh
27-
python -m torch.distributed.launch \
27+
torchrun \
2828
--nproc_per_node #:::= it.nproc_per_node :::# \
2929
--nnodes #:::= it.nnodes :::# \
3030
--node_rank 0 \
3131
--master_addr #:::= it.master_addr :::# \
3232
--master_port #:::= it.master_port :::# \
33-
--use_env main.py \
33+
main.py \
3434
--backend #:::= it.backend :::#
3535
```
3636

3737
- Execute on worker nodes
3838

3939
```sh
40-
python -m torch.distributed.launch \
40+
torchrun \
4141
--nproc_per_node #:::= it.nproc_per_node :::# \
4242
--nnodes #:::= it.nnodes :::# \
4343
--node_rank <node_rank> \
4444
--master_addr #:::= it.master_addr :::# \
4545
--master_port #:::= it.master_port :::# \
46-
--use_env main.py \
46+
main.py \
4747
--backend #:::= it.backend :::#
4848
```
4949

5050
#::: } else { :::#
5151

52-
### Multi GPU Training (`torch.distributed.launch`) (recommended)
52+
### Multi GPU Training (`torchrun`) (recommended)
5353

5454
```sh
55-
python -m torch.distributed.launch \
55+
torchrun \
5656
--nproc_per_node #:::= it.nproc_per_node :::# \
57-
--use_env main.py \
57+
main.py \
5858
--backend #:::= it.backend :::#
5959
```
6060

src/templates/template-vision-classification/README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,42 +19,42 @@ pip install -r requirements.txt --progress-bar off -U
1919
#::: if (it.nproc_per_node) { :::#
2020
#::: if (it.nnodes > 1 && it.master_addr && it.master_port) { :::#
2121

22-
### Multi Node, Multi GPU Training (`torch.distributed.launch`) (recommended)
22+
### Multi Node, Multi GPU Training (`torchrun`) (recommended)
2323

2424
- Execute on master node
2525

2626
```sh
27-
python -m torch.distributed.launch \
27+
torchrun \
2828
--nproc_per_node #:::= it.nproc_per_node :::# \
2929
--nnodes #:::= it.nnodes :::# \
3030
--node_rank 0 \
3131
--master_addr #:::= it.master_addr :::# \
3232
--master_port #:::= it.master_port :::# \
33-
--use_env main.py \
33+
main.py \
3434
--backend #:::= it.backend :::#
3535
```
3636

3737
- Execute on worker nodes
3838

3939
```sh
40-
python -m torch.distributed.launch \
40+
torchrun \
4141
--nproc_per_node #:::= it.nproc_per_node :::# \
4242
--nnodes #:::= it.nnodes :::# \
4343
--node_rank <node_rank> \
4444
--master_addr #:::= it.master_addr :::# \
4545
--master_port #:::= it.master_port :::# \
46-
--use_env main.py \
46+
main.py \
4747
--backend #:::= it.backend :::#
4848
```
4949

5050
#::: } else { :::#
5151

52-
### Multi GPU Training (`torch.distributed.launch`) (recommended)
52+
### Multi GPU Training (`torchrun`) (recommended)
5353

5454
```sh
55-
python -m torch.distributed.launch \
55+
torchrun \
5656
--nproc_per_node #:::= it.nproc_per_node :::# \
57-
--use_env main.py \
57+
main.py \
5858
--backend #:::= it.backend :::#
5959
```
6060

src/templates/template-vision-dcgan/README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,42 +19,42 @@ pip install -r requirements.txt --progress-bar off -U
1919
#::: if (it.nproc_per_node) { :::#
2020
#::: if (it.nnodes > 1 && it.master_addr && it.master_port) { :::#
2121

22-
### Multi Node, Multi GPU Training (`torch.distributed.launch`) (recommended)
22+
### Multi Node, Multi GPU Training (`torchrun`) (recommended)
2323

2424
- Execute on master node
2525

2626
```sh
27-
python -m torch.distributed.launch \
27+
torchrun \
2828
--nproc_per_node #:::= it.nproc_per_node :::# \
2929
--nnodes #:::= it.nnodes :::# \
3030
--node_rank 0 \
3131
--master_addr #:::= it.master_addr :::# \
3232
--master_port #:::= it.master_port :::# \
33-
--use_env main.py \
33+
main.py \
3434
--backend #:::= it.backend :::#
3535
```
3636

3737
- Execute on worker nodes
3838

3939
```sh
40-
python -m torch.distributed.launch \
40+
torchrun \
4141
--nproc_per_node #:::= it.nproc_per_node :::# \
4242
--nnodes #:::= it.nnodes :::# \
4343
--node_rank <node_rank> \
4444
--master_addr #:::= it.master_addr :::# \
4545
--master_port #:::= it.master_port :::# \
46-
--use_env main.py \
46+
main.py \
4747
--backend #:::= it.backend :::#
4848
```
4949

5050
#::: } else { :::#
5151

52-
### Multi GPU Training (`torch.distributed.launch`) (recommended)
52+
### Multi GPU Training (`torchrun`) (recommended)
5353

5454
```sh
55-
python -m torch.distributed.launch \
55+
torchrun \
5656
--nproc_per_node #:::= it.nproc_per_node :::# \
57-
--use_env main.py \
57+
main.py \
5858
--backend #:::= it.backend :::#
5959
```
6060

src/templates/template-vision-segmentation/README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,42 +19,42 @@ pip install -r requirements.txt --progress-bar off -U
1919
#::: if (it.nproc_per_node) { :::#
2020
#::: if (it.nnodes > 1 && it.master_addr && it.master_port) { :::#
2121

22-
### Multi Node, Multi GPU Training (`torch.distributed.launch`) (recommended)
22+
### Multi Node, Multi GPU Training (`torchrun`) (recommended)
2323

2424
- Execute on master node
2525

2626
```sh
27-
python -m torch.distributed.launch \
27+
torchrun \
2828
--nproc_per_node #:::= it.nproc_per_node :::# \
2929
--nnodes #:::= it.nnodes :::# \
3030
--node_rank 0 \
3131
--master_addr #:::= it.master_addr :::# \
3232
--master_port #:::= it.master_port :::# \
33-
--use_env main.py \
33+
main.py \
3434
--backend #:::= it.backend :::#
3535
```
3636

3737
- Execute on worker nodes
3838

3939
```sh
40-
python -m torch.distributed.launch \
40+
torchrun \
4141
--nproc_per_node #:::= it.nproc_per_node :::# \
4242
--nnodes #:::= it.nnodes :::# \
4343
--node_rank <node_rank> \
4444
--master_addr #:::= it.master_addr :::# \
4545
--master_port #:::= it.master_port :::# \
46-
--use_env main.py \
46+
main.py \
4747
--backend #:::= it.backend :::#
4848
```
4949

5050
#::: } else { :::#
5151

52-
### Multi GPU Training (`torch.distributed.launch`) (recommended)
52+
### Multi GPU Training (`torchrun`) (recommended)
5353

5454
```sh
55-
python -m torch.distributed.launch \
55+
torchrun \
5656
--nproc_per_node #:::= it.nproc_per_node :::# \
57-
--use_env main.py \
57+
main.py \
5858
--backend #:::= it.backend :::#
5959
```
6060

0 commit comments

Comments
 (0)