Skip to content

Commit

Permalink
support intermediate_api baichuan test
Browse files Browse the repository at this point in the history
  • Loading branch information
Function-Samuel committed Feb 19, 2025
1 parent 08183d7 commit bf77f95
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
# limitations under the License.

param="model_item=baichuan-inc-baichuan-2-13b_pretrain_dy2st "
param+="run_mode=DP1_MP4_PP2_1F1B_Sharding4_Stage1 "
param+="run_mode=DP1_MP4_PP1_Sharding8_Stage1 "
param+="device_num=N4C32 "
param+="global_batch_size=128 "
param+="global_batch_size=32 "
param+="nnodes=4 "
param+="model_type=baichuan2_13b "

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

param="model_item=intermediate_api_baichuan-inc-baichuan-2-13b_pretrain_dy2st "
param+="run_mode=DP1_MP4_PP1_Sharding8_Stage1 "
param+="device_num=N4C32 "
param+="global_batch_size=32 "
param+="nnodes=4 "
param+="model_type=baichuan2_13b "

cd ./tests
bash ./test_tipc/static/auto_parallel/baichuan2/benchmark_common/prepare.sh

bash -c "${param} bash ./test_tipc/static/auto_parallel/baichuan2/benchmark_common/run_benchmark.sh"
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ function _set_params(){
fp_item="bf16"
MODEL_TYPE=${model_type:-"baichuan2_13b"}

# for intermediate api
intermediate_api=${intermediate_api:-""}

ip_lists=($(echo $TRAINER_INSTANCES | tr ',' ' '))
master_ip=${ip_lists[0]}
nnodes=${nnodes:-1}
Expand Down Expand Up @@ -170,17 +173,17 @@ function _train(){
train_cmd="python -u -m paddle.distributed.launch --gpus=0,1,2,3,4,5,6,7 \
--nnodes 1 --nproc_per_node 8 \
--log_dir mylog run_pretrain_auto.py \
./pretrain_config_${MODEL_TYPE}/pretrain-${MODEL_TYPE}.json"
./pretrain_config_${MODEL_TYPE}/${intermediate_api}pretrain-${MODEL_TYPE}.json"
;;
N4C32) echo "Run with: device_num=${device_num} run_mode=${run_mode}"
train_cmd="python -u -m paddle.distributed.launch --gpus=0,1,2,3,4,5,6,7 \
--log_dir mylog run_pretrain_auto.py \
./pretrain_config_${MODEL_TYPE}/pretrain-${MODEL_TYPE}.json"
./pretrain_config_${MODEL_TYPE}/${intermediate_api}pretrain-${MODEL_TYPE}.json"
;;
*) echo "Run with: device_num=${device_num}, run_mode=${run_mode}"
train_cmd="python -u -m paddle.distributed.launch --gpus=0,1,2,3,4,5,6,7 \
--log_dir mylog run_pretrain_auto.py \
./pretrain_config_${MODEL_TYPE}/pretrain-${MODEL_TYPE}.json"
./pretrain_config_${MODEL_TYPE}/${intermediate_api}pretrain-${MODEL_TYPE}.json"
;;
esac
cd ../llm/auto_parallel/llama
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"model_name_or_path": "baichuan-inc/Baichuan2-13B-Base",
"tokenizer_name_or_path": "baichuan-inc/Baichuan2-13B-Base",
"input_dir": "./data",
"output_dir": "./checkpoints/baichuan2_13b_ckpts",
"split": "949,50,1",
"to_static": true,
"pipeline_parallel_degree": 1,
"tensor_parallel_degree": 4,
"virtual_pp_degree": 1,
"weight_decay": 0.01,
"warmup_ratio": 0.01,
"max_grad_norm": 1.0,
"learning_rate": 0.00003,
"min_learning_rate": 0.000003,
"max_steps": 200,
"logging_steps": 5,
"eval_steps": 10000,
"save_steps": 1000,
"continue_training": 0,
"do_train": true,
"do_eval": false,
"do_predict": false,
"disable_tqdm": true,
"save_total_limit": 2,
"device": "gpu",
"dataloader_num_workers": 1,
"distributed_dataloader": 0,
"enable_auto_parallel": 1,
"per_device_train_batch_size": 2,
"gradient_accumulation_steps": 2,
"per_device_eval_batch_size": 1,
"recompute": false,
"recompute_use_reentrant": true,
"recompute_granularity": "full",
"pp_recompute_interval": 0,
"bf16": true,
"fp16_opt_level": "O2",
"amp_master_grad": true,
"fuse_attention_ffn": true,
"fuse_attention_qkv": true,
"use_flash_attention": true,
"fused_linear": 1,
"fused_linear_param_grad_add": 1,
"use_fused_rope": true,
"use_fused_rms_norm": true,
"max_seq_length": 4096,
"sequence_parallel": 1,
"sharding": "stage1",
"sharding_parallel_degree": 8,
"sharding_parallel_config": "enable_tensor_fusion enable_overlap",
"tensor_parallel_config": "enable_mp_async_allreduce replace_with_parallel_cross_entropy",
"data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate",
"pipeline_parallel_config": "enable_send_recv_overlap enable_split_backward",
"model_type": "llama_network",
"use_intermediate_api": true
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,16 @@
"output_dir": "./checkpoints/baichuan2_13b_ckpts",
"split": "949,50,1",
"to_static": true,
"pipeline_parallel_degree": 2,
"pipeline_parallel_degree": 1,
"tensor_parallel_degree": 4,
"virtual_pp_degree": 2,
"pipeline_schedule_mode": "1F1B",
"virtual_pp_degree": 1,
"weight_decay": 0.01,
"warmup_ratio": 0.01,
"max_grad_norm": 0.0,
"max_grad_norm": 1.0,
"learning_rate": 0.00003,
"min_learning_rate": 0.000003,
"max_steps": 100,
"logging_steps": 1,
"max_steps": 200,
"logging_steps": 5,
"eval_steps": 10000,
"save_steps": 1000,
"continue_training": 0,
Expand All @@ -25,11 +24,11 @@
"disable_tqdm": true,
"save_total_limit": 2,
"device": "gpu",
"dataloader_num_workers": 4,
"dataloader_num_workers": 1,
"distributed_dataloader": 0,
"enable_auto_parallel": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 32,
"per_device_train_batch_size": 2,
"gradient_accumulation_steps": 2,
"per_device_eval_batch_size": 1,
"recompute": false,
"recompute_use_reentrant": true,
Expand All @@ -46,8 +45,9 @@
"use_fused_rope": true,
"use_fused_rms_norm": true,
"max_seq_length": 4096,
"sequence_parallel": false,
"sequence_parallel": 1,
"sharding": "stage1",
"sharding_parallel_degree": 8,
"sharding_parallel_config": "enable_tensor_fusion enable_overlap",
"tensor_parallel_config": "enable_mp_async_allreduce replace_with_parallel_cross_entropy",
"data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate",
Expand Down

0 comments on commit bf77f95

Please sign in to comment.