Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ cuda = [
"mpi4py==4.1.1",
]
rocm = [
"torch==2.10.0+rocm7.1",
"torch==2.10.0+rocm7.2.1.lw.gitb07cec22",
"mpi4py==4.1.1",
]
rocmwci = [
Expand Down
31 changes: 0 additions & 31 deletions scripts/install-rccl.sh

This file was deleted.

9 changes: 5 additions & 4 deletions scripts/install-tuolumne-torchpypi.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
. install-rccl.sh
ml load python/3.11.5 && python3 -m venv .venvs/scaffoldvenv-tuo-pypi && source .venvs/scaffoldvenv-tuo-pypi/bin/activate && pip install --upgrade pip
ml cce/21.0.0 cray-mpich/9.1.0 rocm/7.1.1 rccl/fast-env-slows-mpi
pip install -e .[rocm] --prefix=.venvs/scaffoldvenv-tuo-pypi --extra-index-url https://download.pytorch.org/whl/rocm7.1 2>&1 | tee install.log
ml load python/3.12.2 && python3 -m venv .venvs/scaffoldvenv-tuo-pypi && source .venvs/scaffoldvenv-tuo-pypi/bin/activate && pip install --upgrade pip
ml cce/21.0.0 cray-mpich/9.1.0 rocm/7.2.1 rccl/fast-env-slows-mpi
pip install -e .[rocm] --find-links https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2.1/ 2>&1 | tee install.log
# libmpi.so.12 does not exist => ls /opt/cray/pe/lib64/ | grep libmpi
patchelf --replace-needed libmpi.so.12 libmpi_gnu.so.12 .venvs/scaffoldvenv-tuo-pypi/lib/python3.12/site-packages/mpi4py/MPI.mpich.cpython-312-x86_64-linux-gnu.so
2 changes: 1 addition & 1 deletion scripts/install-tuolumne.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ml load python/3.11.5 && python3 -m venv .venvs/scaffoldvenv-tuo && source .venvs/scaffoldvenv-tuo/bin/activate && pip install --upgrade pip
ml cce/21.0.0 cray-mpich/9.1.0 rocm/7.1.1 rccl/fast-env-slows-mpi
pip install -e .[rocmwci] --prefix=.venvs/scaffoldvenv-tuo 2>&1 | tee install.log
pip install -e .[rocmwci] 2>&1 | tee install.log
# Needed until new wheel exists for torch using mpich 9.1.0
TORCH_LIB_DIR=".venvs/scaffoldvenv-tuo/lib/python3.11/site-packages/torch/lib"
OLD="libmpi_gnu_112.so.12"
Expand Down
11 changes: 5 additions & 6 deletions scripts/scaffold-tuolumne-torchpypi.job
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,20 @@
# flux: -g=1
# flux: -t 60m
# flux: -qpdebug
# flux: -B fractale
# flux: -B flask

ml cce/21.0.0 cray-mpich/9.1.0 rocm/7.1.1 rccl/fast-env-slows-mpi
ml cce/21.0.0 cray-mpich/9.1.0 rocm/7.2.1 rccl/fast-env-slows-mpi

. .venvs/scaffoldvenv-tuo-pypi/bin/activate

# Use ccl plugin that we manually built with install-rccl.sh
export NCCL_NET_PLUGIN=../aws-ofi-nccl.git/install/lib/librccl-net.so
export NCCL_NET_PLUGIN=/collab/usr/global/tools/rccl/toss_4_x86_64_ib_cray/rocm-7.2.0/install/lib/librccl-net.so

# Disable direct convolution benchmarking (should speedup warmup by a significant amount, does the below three options together)
# export MIOPEN_DEBUG_CONV_DIRECT=0
# Disable direct naive convolution benchmarking (naive_conv_ab_nonpacked_fwd_ndhwc_half_double_half.kd)
export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD=0
# Disable naive_conv_ab_nonpacked_bwd_ndhwc_half_double_half.kd
# export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD=0
export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD=0
# Disable naive_conv_ab_nonpacked_wrw_ndhwc_half_double_half.kd
export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW=0

Expand All @@ -29,4 +28,4 @@ torchrun-hpc -N 1 -n 1 $(which scaffold) generate_fractals -c $(pwd)/ScaFFold/co
#export PROFILE_TORCH=ON

torchrun-hpc -N 1 -n 4 --gpus-per-proc 1 $(which scaffold) benchmark -c $(pwd)/ScaFFold/configs/benchmark_default.yml
#torchrun-hpc -N 2 -n 4 --gpus-per-proc 1 $(which scaffold) benchmark -c $(pwd)/ScaFFold/configs/benchmark_default.yml
# torchrun-hpc -N 2 -n 4 --gpus-per-proc 1 $(which scaffold) benchmark -c $(pwd)/ScaFFold/configs/benchmark_default.yml
6 changes: 3 additions & 3 deletions scripts/scaffold-tuolumne.job
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# flux: -g=1
# flux: -t 60m
# flux: -qpdebug
# flux: -B fractale
# flux: -B flask

ml cce/21.0.0 cray-mpich/9.1.0 rocm/7.1.1 rccl/fast-env-slows-mpi

Expand All @@ -20,7 +20,7 @@ export LD_PRELOAD="/opt/rocm-7.1.1/llvm/lib/libomp.so /opt/cray/pe/mpich/9.1.0/o
# Disable direct naive convolution benchmarking (naive_conv_ab_nonpacked_fwd_ndhwc_half_double_half.kd)
export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD=0
# Disable naive_conv_ab_nonpacked_bwd_ndhwc_half_double_half.kd
# export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD=0
export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD=0
# Disable naive_conv_ab_nonpacked_wrw_ndhwc_half_double_half.kd
export MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW=0

Expand All @@ -30,4 +30,4 @@ torchrun-hpc -N 1 -n 1 $(which scaffold) generate_fractals -c $(pwd)/ScaFFold/co
#export PROFILE_TORCH=ON

torchrun-hpc -N 1 -n 4 --gpus-per-proc 1 $(which scaffold) benchmark -c $(pwd)/ScaFFold/configs/benchmark_default.yml
#torchrun-hpc -N 2 -n 4 --gpus-per-proc 1 $(which scaffold) benchmark -c $(pwd)/ScaFFold/configs/benchmark_default.yml
# torchrun-hpc -N 2 -n 4 --gpus-per-proc 1 $(which scaffold) benchmark -c $(pwd)/ScaFFold/configs/benchmark_default.yml
Loading