Skip to content

Frontier scripts for nanogpt #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 12 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 59 additions & 3 deletions AMG2023/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# AMG2023 README
For more detailed installation parameters, please refer to the [installation document](https://github.com/pssg-int/AMG2023/blob/main/amg-doc.pdf).

## Perlmutter Compilation
Repository: [AMG2023](https://github.com/hpcgroup/AMG2023/)

Repository: [AMG2023](https://github.com/pssg-int/AMG2023)
## Perlmutter Compilation

### Steps to Compile

Expand Down Expand Up @@ -50,5 +50,61 @@ Repository: [AMG2023](https://github.com/pssg-int/AMG2023)
cmake -DHYPRE_PREFIX=/pscratch/sd/c/cunyang/AMG2023 ..
```

## Frontier Installation
## Frontier Compilation

### Steps to Compile

1. Load modules
```sh
module reset

module load cray-mpich/8.1.30
module load craype-accel-amd-gfx90a
module load rocm/6.1.3
export MPICH_GPU_SUPPORT_ENABLED=1

# load compatible cmake version
module load Core/24.07
module load cmake/3.27.9
```
2. Configure hypre (v2.32.0)
- Clone hypre v2.32.0 and navigate to src:
```sh
git clone -b v2.32.0 https://github.com/hypre-space/hypre.git
cd into ~/hypre/src
```
- Configure hypre (in hypre/src)
```sh
./configure --with-hip --enable-device-memory-pool --enable-mixedint --with-gpu-arch=gfx90a \
--with-MPI-lib-dirs="${MPICH_DIR}/lib" --with-MPI-libs="mpi" \
--with-MPI-include="${MPICH_DIR}/include" \
CFLAGS="-I${ROCM_PATH}/include/ -I${ROCM_PATH}/llvm/include/ -I${ROCM_PATH}/include/rocsparse/" \
LDFLAGS="-L${ROCM_PATH}/lib/ -L${ROCM_PATH}/llvm/lib/ -lrocsparse"
```
- Compile hypre (in hypre/src)
```sh
# build with make
make
```
3. Configure AMG2023
- Clone repo:
```sh
git clone https://github.com/pssg-int/AMG2023`
cd AMG2023
```
- Add mpiP to LD_LIBRARY_PATH
```sh
export LD_LIBRARY_PATH=/ccs/home/keshprad/mpiP:$LD_LIBRARY_PATH
```
- Configure cmake
```sh
mkdir build && cd build

cmake .. -DHYPRE_PREFIX=/ccs/home/keshprad/hypre/src/hypre/ \
-DCMAKE_C_FLAGS="-I${ROCM_PATH}/include/ -I${ROCM_PATH}/llvm/include/ -I${ROCM_PATH}/include/rocsparse/" \
-DCMAKE_EXE_LINKER_FLAGS="-L${ROCM_PATH}/lib/ -L${ROCM_PATH}/llvm/lib/ -lrocsparse -lrocrand"
```
- Compile AMG2023 (in AMG2023/build)
```sh
make install
```
57 changes: 57 additions & 0 deletions AMG2023/run_frontier_16.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/bin/bash
#SBATCH -N 16
#SBATCH -n 128
#SBATCH -q normal
#SBATCH -J amg
#SBATCH --gpu-bind none
#SBATCH -t 00:30:00
#SBATCH -A csc569
#SBATCH --output /lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/16nodes/%x-%j/output-AMG2023.log
#SBATCH --error /lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/16nodes/%x-%j/error-AMG2023.log
#SBATCH --exclusive
# Run like: sbatch run_frontier_16.sh

OUTPUT_DIR=/lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/16nodes/$SLURM_JOB_NAME-$SLURM_JOB_ID
OUTPUT_FILE=$OUTPUT_DIR/output-AMG2023.log
ERROR_FILE=$OUTPUT_DIR/error-AMG2023.log

# Run gpu benchmarks
COMM_TYPE=mpi
ROCM_VERSION=6.1.3
PERF_VARIABILITY_ROOT=/ccs/home/keshprad/perf-variability
echo running allreduce benchmark
bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/allreduce/run_frontier.sh $COMM_TYPE $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR
# echo running allgather benchmark
# bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/allgather/run_frontier.sh $COMM_TYPE $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR
echo running gemm benchmark
bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/gemm/run_frontier.sh $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR

APP_ROOT=/ccs/home/keshprad/AMG2023
cd $APP_ROOT

# reset modules
echo resetting modules:
module reset
# load modules
echo loading modules:
module load cray-mpich/8.1.30
module load craype-accel-amd-gfx90a
module load rocm/6.1.3

export MPICH_GPU_SUPPORT_ENABLED=1
export CRAY_ACCEL_TARGET=gfx90a
export HYPRE_INSTALL_DIR=/ccs/home/keshprad/hypre/src/hypre/
# mpiP
export LD_LIBRARY_PATH=/ccs/home/keshprad/mpiP:$LD_LIBRARY_PATH
export MPIP="-o -f $OUTPUT_DIR"

# log start date
echo start AMG2023: $(date)
# define command
cmd="srun --output $OUTPUT_FILE --error $ERROR_FILE \
./build/amg -P 4 4 8 -n 128 64 64 -problem 1 -iter 500"
echo solving:
echo $cmd
$cmd
# log end date
echo end AMG2023: $(date)
57 changes: 57 additions & 0 deletions AMG2023/run_frontier_64.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/bin/bash
#SBATCH -N 64
#SBATCH -n 512
#SBATCH -q normal
#SBATCH -J amg
#SBATCH --gpu-bind none
#SBATCH -t 00:30:00
#SBATCH -A csc569
#SBATCH --output /lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/64nodes/%x-%j/output-AMG2023.log
#SBATCH --error /lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/64nodes/%x-%j/error-AMG2023.log
#SBATCH --exclusive
# Run like: sbatch run_frontier_64.sh

OUTPUT_DIR=/lustre/orion/csc569/scratch/keshprad/perfvar/AMG2023_logs/64nodes/$SLURM_JOB_NAME-$SLURM_JOB_ID
OUTPUT_FILE=$OUTPUT_DIR/output-AMG2023.log
ERROR_FILE=$OUTPUT_DIR/error-AMG2023.log

# Run gpu benchmarks
COMM_TYPE=mpi
ROCM_VERSION=6.1.3
PERF_VARIABILITY_ROOT=/ccs/home/keshprad/perf-variability
echo running allreduce benchmark
bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/allreduce/run_frontier.sh $COMM_TYPE $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR
# echo running allgather benchmark
# bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/allgather/run_frontier.sh $COMM_TYPE $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR
echo running gemm benchmark
bash $PERF_VARIABILITY_ROOT/gpu-benchmarks/gemm/run_frontier.sh $ROCM_VERSION $SLURM_JOB_NUM_NODES $OUTPUT_DIR

APP_ROOT=/ccs/home/keshprad/AMG2023
cd $APP_ROOT

# reset modules
echo resetting modules:
module reset
# load modules
echo loading modules:
module load cray-mpich/8.1.30
module load craype-accel-amd-gfx90a
module load rocm/6.1.3

export MPICH_GPU_SUPPORT_ENABLED=1
export CRAY_ACCEL_TARGET=gfx90a
export HYPRE_INSTALL_DIR=/ccs/home/keshprad/hypre/src/hypre/
# mpiP
export LD_LIBRARY_PATH=/ccs/home/keshprad/mpiP:$LD_LIBRARY_PATH
export MPIP="-o -f $OUTPUT_DIR"

# log start date
echo start AMG2023: $(date)
# define command
cmd="srun --output $OUTPUT_FILE --error $ERROR_FILE \
./build/amg -P 8 8 8 -n 128 64 64 -problem 1 -iter 500"
echo solving:
echo $cmd
$cmd
# log end date
echo end AMG2023: $(date)
19 changes: 19 additions & 0 deletions AMG2023/run_frontier_crontab.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash
if [ "$#" -ne 1 ]; then
echo "Usage: $0 <number_of_nodes>"
exit 1
fi
# `16` or `64`
NUM_NODES=$1

PERF_VARIABILITY_ROOT=/ccs/home/keshprad/perf-variability

# load lmod
source /usr/share/lmod/lmod/init/bash
# load default LMOD_SYSTEM_DEFAULT_MODULES and MODULEPATH
export LMOD_SYSTEM_DEFAULT_MODULES=craype-x86-trento:craype-network-ofi:perftools-base:xpmem:cray-pmi:PrgEnv-cray:DefApps
export MODULEPATH=/sw/frontier/spack-envs/modules/cce/17.0.0/cray-mpich-8.1.28/cce-17.0.0:/sw/frontier/spack-envs/modules/cce/17.0.0/cce-17.0.0:/sw/frontier/spack-envs/modules/Core/24.07:/opt/cray/pe/lmod/modulefiles/mpi/crayclang/17.0/ofi/1.0/cray-mpich/8.0:/opt/cray/pe/lmod/modulefiles/comnet/crayclang/17.0/ofi/1.0:/opt/cray/pe/lmod/modulefiles/compiler/crayclang/17.0:/opt/cray/pe/lmod/modulefiles/mix_compilers:/opt/cray/pe/lmod/modulefiles/perftools/23.12.0:/opt/cray/pe/lmod/modulefiles/net/ofi/1.0:/opt/cray/pe/lmod/modulefiles/cpu/x86-trento/1.0:/opt/cray/pe/modulefiles/Linux:/opt/cray/pe/modulefiles/Core:/opt/cray/pe/lmod/lmod/modulefiles/Core:/opt/cray/pe/lmod/modulefiles/core:/opt/cray/pe/lmod/modulefiles/craype-targets/default:/sw/frontier/modulefiles:/opt/cray/modulefiles

# run sbatch script
script=$PERF_VARIABILITY_ROOT/AMG2023/run_frontier_$NUM_NODES\.sh
sbatch $script
14 changes: 14 additions & 0 deletions gpu-benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# gpu-benchmarks README
Code Repository: [gpu-benchmarks](#TODO:)

## Perlmutter Compilation

### Steps to Compile

TODO:

## Frontier Compilation

### Steps to Compile

TODO:
63 changes: 63 additions & 0 deletions gpu-benchmarks/allgather/run_frontier.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# This script assumes it is being run by another sbatch script,
# so does not include portions for SBATCH vars (e.g. account, time, etc.)

# run like: bash /ccs/home/keshprad/gpu-benchmarks/benchmark/frontier/allgather.sh <comm_type> <num_nodes> <output_dir>

#!/bin/bash
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <communication_type> <rocm_version> <number_of_nodes> <output_dir>"
exit 1
fi
# `mpi` or `rccl`
COMM_TYPE=$1
# `5.7.1` or `6.1.3`
ROCM_VERSION=$2
# `16` or `64`
NUM_NODES=$3
# output directory
OUTPUT_DIR=$4

# setup cray-mpich version
if [[ "$ROCM_VERSION" == "6.1.3" ]]; then
MPICH_VERSION=8.1.30
else
MPICH_VERSION=8.1.28
fi

OUTPUT_FILE=$OUTPUT_DIR/output-allgather.log

{
# reset modules
echo resetting modules:
module reset
# load modules
echo loading modules:
module load PrgEnv-cray craype-accel-amd-gfx90a cpe/23.05 amd/${ROCM_VERSION}
module load cray-mpich/${MPICH_VERSION}
module load rocm/${ROCM_VERSION}
module list

GPU_BENCHMARKS_ROOT=/lustre/orion/csc569/scratch/keshprad/gpu-benchmarks
EXEC=$GPU_BENCHMARKS_ROOT/allgather_$COMM_TYPE\_rocm-${ROCM_VERSION}.x
NUM_TASKS=$(($NUM_NODES * 8))
MIN_MSG_SIZE=$((1 * 1024))
MAX_MSG_SIZE=$((1 * 1024 * 1024))
ITERATIONS=100

export MPICH_GPU_SUPPORT_ENABLED=1
export LD_LIBRARY_PATH="${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}"

echo start allgather: $(date)
For MPI-bench we should use --gpus-per-node --gpus-per-task --ntasks-per-node , and --gpu-bind=none in srun.
CMD="srun -N $NUM_NODES -n $NUM_TASKS \
--gpus-per-node 8 \
--gpus-per-task 1 \
--ntasks-per-node 8 \
--gpu-bind none \
--output $OUTPUT_FILE \
$EXEC $NUM_TASKS $MIN_MSG_SIZE $MAX_MSG_SIZE $ITERATIONS"
echo running:
echo $CMD
$CMD
echo end allgather: $(date)
} &>> $OUTPUT_FILE
58 changes: 58 additions & 0 deletions gpu-benchmarks/allreduce/run_frontier.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# This script assumes it is being run by another sbatch script,
# so does not include portions for SBATCH vars (e.g. account, time, etc.)

# run like: bash /ccs/home/keshprad/gpu-benchmarks/benchmark/frontier/allreduce.sh <comm_type> <num_nodes> <output_dir>

#!/bin/bash
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <communication_type> <rocm_version> <number_of_nodes> <output_dir>"
exit 1
fi
# `mpi` or `rccl`
COMM_TYPE=$1
# `5.7.1` or `6.1.3`
ROCM_VERSION=$2
# `16` or `64`
NUM_NODES=$3
# output directory
OUTPUT_DIR=$4

# setup cray-mpich version
if [[ "$ROCM_VERSION" == "6.1.3" ]]; then
MPICH_VERSION=8.1.30
else
MPICH_VERSION=8.1.28
fi

OUTPUT_FILE=$OUTPUT_DIR/output-allreduce.log

{
# reset modules
echo resetting modules:
module reset
# load modules
echo loading modules:
module load PrgEnv-cray craype-accel-amd-gfx90a cpe/23.05 amd/${ROCM_VERSION}
module load cray-mpich/${MPICH_VERSION}
module load rocm/${ROCM_VERSION}
module list

GPU_BENCHMARKS_ROOT=/lustre/orion/csc569/scratch/keshprad/gpu-benchmarks
EXEC=$GPU_BENCHMARKS_ROOT/allreduce_$COMM_TYPE\_rocm-${ROCM_VERSION}.x
NUM_TASKS=$(($NUM_NODES * 8))
MIN_MSG_SIZE=$((1 * 1024))
MAX_MSG_SIZE=$((1 * 1024 * 1024))
ITERATIONS=100

export MPICH_GPU_SUPPORT_ENABLED=1
export LD_LIBRARY_PATH="${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}"

echo start allreduce: $(date)
CMD="srun -N $NUM_NODES -n $NUM_TASKS \
--output $OUTPUT_FILE \
$EXEC $NUM_TASKS $MIN_MSG_SIZE $MAX_MSG_SIZE $ITERATIONS"
echo running:
echo $CMD
$CMD
echo end allreduce: $(date)
} &>> $OUTPUT_FILE
Loading