Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pretrain/installers/v4-megatron-abci/qsub_setup.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#PBS -P gcg51557
#PBS -q R10415
#PBS -q R9920251000
#PBS -v RTYPE=rt_HF
#PBS -l select=1
#PBS -l walltime=01:00:00
Expand Down Expand Up @@ -46,6 +46,7 @@ source ${SCRIPT_DIR}/src/install_pytorch.sh
source ${SCRIPT_DIR}/src/install_requirements.sh
source ${SCRIPT_DIR}/src/install_apex.sh
source ${SCRIPT_DIR}/src/install_flash_attention.sh
# source ${SCRIPT_DIR}/src/install_flash_attention3.sh
source ${SCRIPT_DIR}/src/install_transformer_engine.sh
source ${SCRIPT_DIR}/src/install_megatron_lm.sh
source ${SCRIPT_DIR}/src/install_tokenizer.sh
Expand Down
8 changes: 6 additions & 2 deletions pretrain/installers/v4-megatron-abci/scripts/environment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@ export PRETRAIN_TORCH_VERSION=2.6.0
export PRETRAIN_TORCHVISION_VERSION=0.21.0
export PRETRAIN_APEX_COMMIT=312acb44f9fe05cab8c67bba6daa0e64d3737863
export PRETRAIN_FLASH_ATTENTION_VERSION=2.5.8
export PRETRAIN_TRANSFORMER_ENGINE_VERSION=1.13.0
# export PRETRAIN_FLASH_ATTENTION_VERSION=3.0.0b1
# export PRETRAIN_FLASH_ATTENTION_COMMIT=0e79d71175346c7151f49ab6287084a052bc9613
# export PRETRAIN_TRANSFORMER_ENGINE_VERSION=1.13.0
export PRETRAIN_TRANSFORMER_ENGINE_VERSION=1.9

export PRETRAIN_MEGATRON_TAG=v4
# export PRETRAIN_MEGATRON_TAG=v4-old
# Ensure the appropriate Huggingface tokenizer is included
# https://github.com/llm-jp/scripts/pull/12#discussion_r1708415209
export PRETRAIN_TOKENIZER_TAG=v3.0b2
Expand All @@ -31,4 +35,4 @@ module load cudnn/${PRETRAIN_CUDNN_VERSION}/${PRETRAIN_CUDNN_VERSION_WITH_PATCH}
module load hpcx/${PRETRAIN_HPCX_VERSION}
module load nccl/${PRETRAIN_NCCL_VERSION}/${PRETRAIN_NCCL_VERSION_WITH_PATCH}

export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Ref: https://github.com/llm-jp/scripts/blob/exp/tokenizer_test/experiments/v4-hq_tokenizer_test/installer/install_megatron.sh

echo "Installing FlashAttention ${PRETRAIN_FLASH_ATTENTION_VERSION} (commit ${PRETRAIN_FLASH_ATTENTION_COMMIT})"
source "${TARGET_DIR}/venv/bin/activate"
pushd "${TARGET_DIR}/src"

git clone https://github.com/Dao-AILab/flash-attention.git
pushd flash-attention
git checkout "${PRETRAIN_FLASH_ATTENTION_COMMIT}"
pushd hopper # cd hopper/
export TORCH_CUDA_ARCH_LIST="90"
python setup.py install

python_path=`python -c "import site; print(site.getsitepackages()[0])"`
mkdir -p $python_path/flash_attn_3
wget -P $python_path/flash_attn_3 https://raw.githubusercontent.com/Dao-AILab/flash-attention/${PRETRAIN_FLASH_ATTENTION_COMMIT}/hopper/flash_attn_interface.py

popd # flash-attention/hopper
popd # flash-attention
popd # ${TARGET_DIR}/src
deactivate