diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml index 60537ce8776..0dc97066835 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml @@ -22,8 +22,8 @@ MODEL_ARGS: --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml index 506d92d6e3e..05117c8f4a0 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml @@ -22,8 +22,8 @@ MODEL_ARGS: --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml index f965ee1d9ef..01ba8adeccf 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml @@ -22,8 +22,8 @@ MODEL_ARGS: --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml index fc4c836c98a..680c3c69ea7 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml @@ -22,8 +22,8 @@ MODEL_ARGS: --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml index 8974bc1ea24..c372de7180a 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml @@ -22,8 +22,8 @@ MODEL_ARGS: --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml index 49135684124..4afcb0c9d47 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml @@ -22,8 +22,8 @@ MODEL_ARGS: --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml index 6c0dc550515..8a776e6bfe5 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml @@ -22,8 +22,8 @@ MODEL_ARGS: --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml index e001ea4dc08..15ec6afdebe 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml @@ -22,8 +22,8 @@ MODEL_ARGS: --lr-decay-iters: 990000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence - --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-bert_00_text_sentence + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.0001 diff --git a/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml index af341b0f670..860215bd473 100644 --- a/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_release/model_config.yaml @@ -27,7 +27,7 @@ MODEL_ARGS: --pipeline-model-parallel-size: 8 # Data args --data-path: ${DATA_BLEND} - --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt --split: 949,50,1 --data-cache-path: ${DATA_CACHE_PATH} # EVAL_AND_LOGGING_ARGS diff --git a/tests/functional_tests/test_cases/bert/bert_release_sm/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_release_sm/model_config.yaml index 60ef765cf15..1d982960246 100644 --- a/tests/functional_tests/test_cases/bert/bert_release_sm/model_config.yaml +++ b/tests/functional_tests/test_cases/bert/bert_release_sm/model_config.yaml @@ -27,7 +27,7 @@ MODEL_ARGS: --pipeline-model-parallel-size: 8 # Data args --data-path: ${DATA_BLEND} - --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt --split: 949,50,1 --data-cache-path: ${DATA_CACHE_PATH} # EVAL_AND_LOGGING_ARGS diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_disable/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_disable/model_config.yaml index 2026f11ade2..1a82cfcde7e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_disable/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_disable/model_config.yaml @@ -63,9 +63,9 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} --timing-log-level: 0 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_enable/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_enable/model_config.yaml index 41cb6561429..e580c759f81 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_enable/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_enable/model_config.yaml @@ -62,9 +62,9 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} --timing-log-level: 0 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_1/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_1/model_config.yaml index cd590ff1554..c758e13e5a6 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_1/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_1/model_config.yaml @@ -62,9 +62,9 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} --timing-log-level: 0 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_2/model_config.yaml index f902393d049..c16e5c99780 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_2/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_2/model_config.yaml @@ -62,9 +62,9 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} --timing-log-level: 0 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_reshard/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_reshard/model_config.yaml index 2e82cad10a8..bf406294fe0 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_reshard/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_reshard/model_config.yaml @@ -63,9 +63,9 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} --timing-log-level: 0 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume/model_config.yaml index 0abd4db698e..9e9857796ab 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume/model_config.yaml @@ -62,9 +62,9 @@ MODEL_ARGS: --load: ${CHECKPOINT_LOAD_PATH} # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} --timing-log-level: 0 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/model_config.yaml index aff4e8a6f3d..8015e1b1767 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/model_config.yaml @@ -90,9 +90,9 @@ BASE_MODEL_ARGS: &BASE_MODEL_ARGS --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2 # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} --timing-log-level: 0 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_transient/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_transient/model_config.yaml index b091c0ff9f6..2533c93415f 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_transient/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_transient/model_config.yaml @@ -63,9 +63,9 @@ MODEL_ARGS: --exit-interval: 4 # data settings --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt # logging settings --tensorboard-dir: ${TENSORBOARD_PATH} --timing-log-level: 0 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml index ddc8286573b..df122ec30ff 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/model_config.yaml index 2d5e340fa6d..59aae2c0ce8 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/model_config.yaml index fc92d226b6d..9c173e54356 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_mup/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_mup/model_config.yaml index 811b88718f9..e59dc448195 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_mup/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_mup/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/model_config.yaml index 82506115963..36ea2b603f2 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/model_config.yaml index 4a5bf3d8fc7..bf62961e06e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/model_config.yaml index 8a471f2238b..b715e3cd2c7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/model_config.yaml index a5dbe2157e5..24e3016ea31 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/model_config.yaml index 0310dbf2a64..d816bad8b54 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/model_config.yaml index 6d6bf2b5b94..32e29be9923 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/model_config.yaml index 5e4131a43ca..2acf5eae6b2 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/model_config.yaml index 37b5f5d7471..c4d36557e24 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/model_config.yaml index 7ddf65f4ca8..0f0b09444ae 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/model_config.yaml index b391387f9ff..b5ca9d84a0c 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/model_config.yaml index 5415e3de96d..56cb9e83ce8 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/model_config.yaml index 8d372f5539d..87cf4c64172 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/model_config.yaml index 9a3947b5e71..89b883024df 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/model_config.yaml index 7d069ce9ec6..2f5da8c37d6 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/model_config.yaml index ea882318c7e..3cf79eaf7d2 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/model_config.yaml index d67dd6af765..a8bb71dc510 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/model_config.yaml index 1e25f4bd4e1..549b1ffdc99 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/model_config.yaml index 2d734908089..614f9bf3f1a 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/model_config.yaml index 319164782fc..62d3b8fcd00 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/model_config.yaml index ea6f2520553..202ea416823 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/model_config.yaml index ea8f4bb71d0..8425f9ea940 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/model_config.yaml index ea869246a7c..3c20b14466e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/model_config.yaml index fff31764409..8b90b2dfe3e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/model_config.yaml index 46ff13cb9a8..bf6775690cb 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/model_config.yaml index 5a1b1ce289d..97611acad6a 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index 31ffc9c8111..af9e6958260 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/model_config.yaml index 0bd25e79735..28248046b44 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml index f6a72754edb..240ed65fff5 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/model_config.yaml index d502c3e1fef..6bcce43a2db 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/model_config.yaml index edc9bc1ff2a..6a718c157fe 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/model_config.yaml index 1b9c96b3f7d..44a1b7d8b70 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml index ad7854aeacb..46dfa70b40f 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index af06fe06903..0906c429865 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml index 035549f8fb6..08e0fb8ad67 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/model_config.yaml index ef758e5639f..43bd4be15fd 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/model_config.yaml index 06545179645..9a68a6ec644 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/model_config.yaml index 8710e92a138..48ba57051cc 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/model_config.yaml index d074a823ffd..4933f8bc03d 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/model_config.yaml index 5394f9d0070..05779c31263 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/model_config.yaml index 4bd321b43da..92d4ce5c4e7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/model_config.yaml index 1229288b9be..88c69a55bd8 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml index ee2c093e0ab..cc7211c9967 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml @@ -50,9 +50,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml index 1171dfc454e..d422a2f2a18 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml @@ -37,9 +37,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/model_config.yaml index 57bcdeb7ca6..f9aae842b0c 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/model_config.yaml index 3f427a04f9d..787ee3bab10 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/model_config.yaml index d3446e92c2e..a1e4797ec14 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/model_config.yaml index 05b166f0a7b..00131b06531 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/model_config.yaml index 70155c2ff81..143f89b7f60 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/model_config.yaml index 92f4bfb1cdf..0ec24c4a6ac 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/model_config.yaml index 088ababb9cb..60f66a93cba 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/model_config.yaml index 880d7fc7ce0..81918bdfb5f 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/model_config.yaml index 013569c5882..afc9cd205fd 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/model_config.yaml index f45345f9911..dc5b971a0a3 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/model_config.yaml index 8866fa67175..1a993aeb9fb 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/model_config.yaml index f4649e2d303..b1c98c49497 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/model_config.yaml index a77cd637800..1d6ed4ae398 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/model_config.yaml index 9f416e74884..af3471d97df 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/model_config.yaml index 2622612205a..954701a18e7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/model_config.yaml index 00f01d3bac0..e44301baaf4 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/model_config.yaml @@ -27,9 +27,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/model_config.yaml index d6384e7f604..9391c252630 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/model_config.yaml index d4939a8c2cf..7beb94598c4 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/model_config.yaml index af4aa0bf4fc..81695220690 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/model_config.yaml index 9fbe95431e0..e6bd4f0f256 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/model_config.yaml index 54d49da6c14..44b7a5e1183 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/model_config.yaml index f906e5f8439..94f42e56df6 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/model_config.yaml index 487227e5abd..3c554412de7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/model_config.yaml index ea5523e1d2a..3b5f8a4c6bd 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/model_config.yaml index afbc17a0301..ca817f2aacb 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/model_config.yaml index bcbfdad6616..a162da61840 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/model_config.yaml index ecc62315f9f..f563037677e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml index 89c6943100e..9ee581ecfef 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index 944dfb0b489..7d0a95ee0c9 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/model_config.yaml index 18a7195b436..a7bf80934c3 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml index fe8e0f493d1..db040619047 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index 136c696ef2f..098dbcda03d 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/model_config.yaml index 755e9ba49e9..0de39f358cb 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/model_config.yaml index 4aa0b36a84b..1bfc304a0e7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/model_config.yaml index 620eeaeff46..66cfbd4f0fe 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index b8a79c7a083..a0fd0044165 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml index 4febeeb3aca..8fbce7b088b 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml index 8793230c3c9..e1c4376d4cc 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml index 4cc6e53b8c8..1db23e02284 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/model_config.yaml index f4c058fb0a0..2b0d8e7fec7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/model_config.yaml index e2a0f1f1f69..9ff97764091 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/model_config.yaml index b9b786ee247..efa0bebdf12 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/model_config.yaml index b4991e3621e..6b3fc86e318 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/model_config.yaml index cc6a76a97d9..06854c3eea6 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/model_config.yaml index 7601d0188ae..e818716f210 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/model_config.yaml @@ -25,9 +25,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/model_config.yaml index a365aae9089..32e060bfe58 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/model_config.yaml index c9473f99f96..e61ad0b9ea9 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/model_config.yaml index 23b58cdc782..7ea12e55652 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml index ae8a2368c59..7335879dbe6 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml index 3963a359ea9..d556654cec1 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml +++ b/tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml index ddb776d0aee..d2e21e5d244 100644 --- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml @@ -24,9 +24,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp2_vpp2_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp2_vpp2_cp1_dgx_a100_1N8G/model_config.yaml index 3f2a25be6b4..bf3af605766 100644 --- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp2_vpp2_cp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp2_vpp2_cp1_dgx_a100_1N8G/model_config.yaml @@ -24,9 +24,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml index 8cecc7de2ed..04b80e958a4 100644 --- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml @@ -24,9 +24,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml index 79b6fd506bd..8c655bc135c 100644 --- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml @@ -24,9 +24,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml index 7e16a27960f..f7e830040f7 100644 --- a/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml +++ b/tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml @@ -24,9 +24,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml index cdabc4b6225..e9d42f5f5f8 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml index 731ff82d8d4..563b7e8d135 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/model_config.yaml index f7fd8b2963d..329639ad0c9 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/model_config.yaml @@ -19,9 +19,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml index 61b5c9339ba..685a2d52350 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml index a3995df9627..c64b6a67891 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml index 8672163186c..c8632f4bee7 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml index 9a827a4ee72..fc50407598c 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/model_config.yaml index 8e267b178b4..6a6ded49b95 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml index 8525e285ac9..e0568e91845 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/model_config.yaml index b84bf45b890..96047c47086 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/model_config.yaml index b5c774d4d3c..187ce1d4614 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/model_config.yaml index b84bf45b890..96047c47086 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/model_config.yaml index 8c75b0a2e76..3092a434fbb 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/model_config.yaml index 978babb72ff..c758ca93ebb 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/model_config.yaml index b6a7c223acc..53e0113bfed 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml index 4c991767ca3..8004bb8d8df 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml @@ -38,9 +38,9 @@ MODEL_ARGS: # Data args --seq-length: 4096 --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 # Add network size args --num-layers: 16 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml index fdb452c65a9..9423229db56 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml @@ -38,9 +38,9 @@ MODEL_ARGS: # Data args --seq-length: 4096 --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 # Add network size args --num-layers: 16 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml index bd565830970..c9024d10f00 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml @@ -38,9 +38,9 @@ MODEL_ARGS: # Data args --seq-length: 4096 --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 # Add network size args --num-layers: 16 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml index e7971347f02..0af51d29191 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml @@ -39,9 +39,9 @@ MODEL_ARGS: # Data args --seq-length: 4096 --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 # Add network size args --num-layers: 16 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml index 8874f9cf045..15f971e9ff3 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml index c147b689e71..be754cd03d2 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml index f77c2a41f68..d9db93ccd4f 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml index 12e6698a5f4..02f307e3739 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml index 4d674322a23..01cabb29247 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml index 86a05a93562..10bae7aa8a8 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml index 5020d9d9397..bc36fa10caa 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml index c93bd4367f3..5de72de0b7d 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml @@ -21,9 +21,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/model_config.yaml index 1d0ef19232e..5987e1e9750 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml index cd7656d240f..9ee76a37590 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml index 5c395caed56..2f1edeadd1f 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml index c04d55564a3..60099ac01a3 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml @@ -44,9 +44,9 @@ MODEL_ARGS: # Data args --seq-length: 4096 --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 # Add network size args --num-layers: 15 diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml index dbfb29ea48c..b59e4d5a1c6 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml @@ -44,9 +44,9 @@ MODEL_ARGS: # Data args --seq-length: 4096 --data-cache-path: ${DATA_CACHE_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 # Add network size args --num-layers: 15 diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml index fb438f0edda..34ad8a782b5 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml @@ -22,9 +22,9 @@ MODEL_ARGS: --lr-decay-iters: 320000 --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --split: 949,50,1 --distributed-backend: nccl --lr: 0.00015 diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml index fd0d79e0986..0a2f552fbd1 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml @@ -77,10 +77,10 @@ MODEL_ARGS: --save-interval: 50 --eval-interval: 1000 --eval-iters: 10 - --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document --data-cache-path: ${DATA_CACHE_PATH} - --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json - --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt --save: ${CHECKPOINT_SAVE_PATH} --load: ${CHECKPOINT_LOAD_PATH} --ckpt-fully-parallel-load: true diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/model_config.yaml index bccce17cef1..8dc5cf5a713 100644 --- a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml index aa0f67ff311..8fc0b04c8c8 100644 --- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/model_config.yaml index 59c1d0f280f..277d637cb4d 100644 --- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml index 80a84a26e0c..8463c0ebfa4 100644 --- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/model_config.yaml index 047280dec39..f4a4e7c3b1c 100644 --- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/model_config.yaml index 1611c02251b..5f170ec8f43 100644 --- a/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: transformer_engine - --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/model_config.yaml index 12ccecb5883..f8e6db5d685 100644 --- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: local - --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml index 8559fd587d1..314a960cc75 100644 --- a/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: local - --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/model_config.yaml index 9c6a835571c..df1bb9d1833 100644 --- a/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: local - --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/model_config.yaml index dd3896ad88a..33d79798194 100644 --- a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: local - --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 diff --git a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/model_config.yaml index 4c955dd5441..15096f11362 100644 --- a/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/model_config.yaml @@ -29,8 +29,8 @@ MODEL_ARGS: --vocab-extra-ids: 100 --init-method-std: 0.015 --transformer-impl: local - --data-path: ${DATA_PATH}/text/the_pile/t5_shard00/my-t5_00_text_document - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --calculate-per-token-loss: true --split: 99982,9,9 diff --git a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml index a7abdc1bdd4..485b14cbfa2 100644 --- a/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_release/model_config.yaml @@ -37,7 +37,7 @@ MODEL_ARGS: --pipeline-model-parallel-size: 1 # Data args --data-path: ${DATA_BLEND} - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --split: 99982,9,9 --data-cache-path: ${DATA_CACHE_PATH} diff --git a/tests/functional_tests/test_cases/t5/t5_release_sm/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_release_sm/model_config.yaml index 7f748273cd3..6feba73c0a1 100644 --- a/tests/functional_tests/test_cases/t5/t5_release_sm/model_config.yaml +++ b/tests/functional_tests/test_cases/t5/t5_release_sm/model_config.yaml @@ -37,7 +37,7 @@ MODEL_ARGS: --pipeline-model-parallel-size: 1 # Data args --data-path: ${DATA_BLEND} - --vocab-file: ${DATA_PATH}/text/the_pile/t5_shard00/bert-large-cased-vocab.txt + --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt --tokenizer-type: BertWordPieceCase --split: 99982,9,9 --data-cache-path: ${DATA_CACHE_PATH} diff --git a/tools/common_pile_dataset/README.md b/tools/common_pile_dataset/README.md new file mode 100644 index 00000000000..2431d1b01d3 --- /dev/null +++ b/tools/common_pile_dataset/README.md @@ -0,0 +1,223 @@ +# Common Pile CI Dataset + +This directory contains tools to create CI test datasets from the +[Common Pile](https://huggingface.co/datasets/common-pile/comma_v0.1_training_dataset) +filtered dataset, replacing the previous datasets sourced from The Pile. + +## Output + +The scripts produce Megatron-LM indexed binary datasets for three model families: + +``` +/ +├── my-gpt3_00_text_document.{bin,idx} # GPT (~24 GB bin) +├── my-bert_00_text_sentence.{bin,idx} # BERT (~25 GB bin) +├── my-t5_00_text_document.{bin,idx} # T5 (~25 GB bin) +├── bpe/ +│ ├── vocab.json # GPT-2 BPE vocabulary +│ └── merges.txt # GPT-2 BPE merges +├── vocab.txt # BERT WordPiece vocabulary +└── bert-large-cased-vocab.txt # T5 (BERT-large cased) vocabulary +``` + +Current production location on the HPC cluster: + +``` +/lustre/fsw/portfolios/coreai/projects/coreai_dlalgo_mcore/mcore_ci/text/common_pile/v01_filtered_data/ +``` + +## Quick Start (HPC) + +The fastest way to recreate the dataset on the NVIDIA HPC cluster: + +```bash +# 1. Copy both scripts to the remote machine +scp tools/common_pile_dataset/setup_common_pile_dataset.sh tools/common_pile_dataset/create_common_pile_ci_dataset.py \ + @:/tmp/ + +# 2. Launch as a background job (survives SSH disconnection) +ssh @ \ + 'nohup bash /tmp/setup_common_pile_dataset.sh > /tmp/dataset_creation.log 2>&1 &' + +# 3. Monitor progress +ssh @ 'tail -f /tmp/dataset_creation.log' +``` + +Total runtime is approximately **3-5 hours** (40 min download + ~1 hr per +preprocessing step). + +## What the Setup Script Does + +`setup_common_pile_dataset.sh` is a self-contained wrapper that: + +1. **Finds Python 3.10+** (required by latest Megatron-LM for PEP 604 syntax). +2. **Creates a virtual environment** in `/tmp` to isolate from system packages. +3. **Clones Megatron-LM** (shallow, depth 1) for `preprocess_data.py`. +4. **Patches `megatron/training/__init__.py`** to skip heavy imports (triton, + apex, transformer-engine) that are not needed for preprocessing. +5. **Installs pip dependencies**: `datasets`, `nltk`, `torch` (CPU-only), + `transformers`. +6. **Redirects HuggingFace cache** to lustre (`HF_HOME`) to avoid filling up + the 10 GB `/home` filesystem. +7. **Runs `create_common_pile_ci_dataset.py`** with production parameters. +8. **Cleans up** the temporary work directory. + +## Running the Python Script Directly + +If you already have a Megatron-LM checkout and the dependencies installed, you +can run the Python script directly: + +```bash +# Small test run (streaming, ~10K docs, a few minutes) +python tools/common_pile_dataset/create_common_pile_ci_dataset.py \ + --output-dir /path/to/output \ + --num-documents 10000 \ + --download-vocab + +# Full production run (~12M docs, matching existing shard00 sizes) +python tools/common_pile_dataset/create_common_pile_ci_dataset.py \ + --output-dir /path/to/output \ + --num-documents 12000000 \ + --keep-jsonl \ + --copy-vocab-from /lustre/.../text/the_pile +``` + +### Key Arguments + +| Argument | Description | +|---|---| +| `--output-dir` | Where to write the output files (required) | +| `--num-documents` | Number of documents to download (default: 10000) | +| `--megatron-dir` | Path to Megatron-LM repo (default: auto-detect from script location) | +| `--copy-vocab-from` | Copy vocab files from existing `the_pile` directory | +| `--download-vocab` | Download vocab files from HuggingFace instead | +| `--existing-jsonl` | Skip download; use a pre-existing JSONL file | +| `--keep-jsonl` | Keep the intermediate JSONL after preprocessing | +| `--bulk-download` | Non-streaming download (faster but requires ~460 GB HF cache) | +| `--workers` | Number of worker processes for preprocessing (default: 4) | + +## Prerequisites + +- **Python 3.10+** (Megatron-LM uses PEP 604 `type | None` syntax) +- **PyTorch** (CPU-only is sufficient for preprocessing) +- **Python packages**: `datasets`, `nltk`, `transformers` +- **Disk space**: + - Output directory: ~80 GB for final files + - Intermediate JSONL: ~43 GB (deleted unless `--keep-jsonl`) + - Sentence-split JSONL: ~43 GB (created during BERT preprocessing) + - HF cache: ~1 GB (streaming mode) or ~460 GB (`--bulk-download` mode) +- **RAM**: ~18 GB peak (during BERT index finalization) + +## How It Works + +### Step 1: Vocabulary Files + +Copies tokenizer vocabularies from the existing `the_pile` dataset, or +downloads them from HuggingFace. These are standard GPT-2 BPE and BERT +WordPiece vocabularies. + +### Step 2: Download Raw Text + +Streams documents from `common-pile/comma_v0.1_training_dataset` on +HuggingFace, filtering out documents shorter than 100 characters. Writes a +JSONL file with `{"text": "..."}` per line. At 12M documents this produces a +~43 GB file. + +Progress is logged with ETA: + +``` +[ 42.0%] 5,040,000/12,000,000 docs | 4,500 docs/s | 20,150.3 MB on disk | ETA: 25.8m +``` + +### Step 3: GPT Preprocessing + +Runs `preprocess_data.py` with `GPT2BPETokenizer` and `--append-eod` to create +`my-gpt3_00_text_document.{bin,idx}`. + +### Step 4: BERT Preprocessing + +BERT requires sentence splitting, which is a **two-pass process** when using +`partitions=1` (the default): + +1. **Pass 1**: Runs with `--split-sentences` to create a sentence-split JSONL + (`common_pile_raw_ss.jsonl`), then returns. +2. **Pass 2**: Detects the `_ss.jsonl` file exists, skips splitting, and + encodes to binary `my-bert_00_text_sentence.{bin,idx}`. + +The BERT `.idx` file is much larger (~4 GB vs ~229 MB for GPT/T5) because it +indexes individual sentences rather than whole documents. + +### Step 5: T5 Preprocessing + +Runs `preprocess_data.py` with `BertWordPieceCase` tokenizer and `--append-eod` +to create `my-t5_00_text_document.{bin,idx}`. + +### Step 6: Verification + +Checks that all 10 expected output files exist and reports their sizes. + +## Troubleshooting + +### "No space left on device" during download + +The HuggingFace `datasets` library caches data under `~/.cache/huggingface/` by +default. On HPC systems where `/home` is small, set `HF_HOME` to a path with +sufficient space: + +```bash +export HF_HOME=/lustre/path/to/.hf_cache +``` + +The setup script does this automatically. + +### "ModuleNotFoundError: No module named 'triton'" (or similar) + +`preprocess_data.py` imports from `megatron.training`, which eagerly loads the +full training stack. The setup script patches `megatron/training/__init__.py` to +comment out the heavy imports. If running manually, apply: + +```bash +sed -i 's/^from \.initialize/#from .initialize/' megatron/training/__init__.py +``` + +### "TypeError: unsupported operand type(s) for |: 'type' and 'NoneType'" + +You need Python 3.10+. The latest Megatron-LM uses PEP 604 union syntax +(`type | None`) which is not supported in Python 3.9. + +### numpy/scipy binary incompatibility + +Use a virtual environment (`python3.10 -m venv venv`) to isolate from system +packages. The setup script creates one automatically. + +### BERT produces no output files + +BERT with `--split-sentences` and `partitions=1` requires two invocations of +`preprocess_data.py` (see Step 4 above). The script handles this automatically. + +## CI Data Path Configuration + +To use this dataset in CI tests, set the following paths in `model_config.yaml`: + +```yaml +# GPT +--data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document +--vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json +--merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt + +# BERT +--data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-bert_00_text_sentence +--vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt + +# T5 +--data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document +--vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt +``` + +## Files + +| File | Description | +|---|---| +| `create_common_pile_ci_dataset.py` | Main Python script that downloads data and runs preprocessing | +| `setup_common_pile_dataset.sh` | Self-contained bash wrapper for HPC deployment | +| `README.md` | This file | diff --git a/tools/common_pile_dataset/create_common_pile_ci_dataset.py b/tools/common_pile_dataset/create_common_pile_ci_dataset.py new file mode 100644 index 00000000000..0e8d8c282c6 --- /dev/null +++ b/tools/common_pile_dataset/create_common_pile_ci_dataset.py @@ -0,0 +1,531 @@ +#!/usr/bin/env python3 +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + +""" +Create CI test dataset from Common Pile (filtered). + +Downloads a sample from the Common Pile filtered dataset and preprocesses it +into Megatron-LM's indexed binary format for GPT, BERT, and T5 models. + +Output structure: + / + ├── my-gpt3_00_text_document.{bin,idx} + ├── bpe/ + │ ├── vocab.json + │ └── merges.txt + ├── my-bert_00_text_sentence.{bin,idx} + ├── vocab.txt + ├── my-t5_00_text_document.{bin,idx} + └── bert-large-cased-vocab.txt + +Usage: + # Small test run (streaming): + python tools/common_pile_dataset/create_common_pile_ci_dataset.py \ + --output-dir /path/to/output \ + --num-documents 10000 \ + --copy-vocab-from /path/to/existing/the_pile + + # Large production run (~24GB, matching existing shard00): + python tools/common_pile_dataset/create_common_pile_ci_dataset.py \ + --output-dir /path/to/output \ + --num-documents 12000000 \ + --bulk-download --keep-jsonl \ + --copy-vocab-from /path/to/existing/the_pile + + # With vocab download (no existing dataset needed): + python tools/common_pile_dataset/create_common_pile_ci_dataset.py \ + --output-dir /path/to/output \ + --num-documents 10000 \ + --download-vocab +""" + +import argparse +import json +import os +import subprocess +import sys +import time +import urllib.request + + +def _format_eta(seconds): + """Format seconds into a human-readable ETA string.""" + if seconds < 60: + return f"{seconds:.0f}s" + elif seconds < 3600: + return f"{seconds / 60:.1f}m" + else: + h = int(seconds // 3600) + m = int((seconds % 3600) // 60) + return f"{h}h{m:02d}m" + + +def download_common_pile_sample(output_jsonl, num_documents, dataset_name): + """Download a sample from Common Pile using the datasets library.""" + try: + from datasets import load_dataset + except ImportError: + print("ERROR: 'datasets' library not found. Install with: pip install datasets") + sys.exit(1) + + print(f"Downloading {num_documents} documents from {dataset_name}...") + print(" (Using HF_TOKEN from environment for authentication if set)") + + ds = load_dataset(dataset_name, split="train", streaming=True) + + count = 0 + start_time = time.time() + log_interval = max(1000, num_documents // 100) # Log ~100 times, min every 1000 + + with open(output_jsonl, 'w', encoding='utf-8') as f: + for sample in ds: + text = sample.get("text", "") + # Skip very short documents that wouldn't be useful for CI tests + if text and len(text.strip()) > 100: + f.write(json.dumps({"text": text}) + "\n") + count += 1 + if count >= num_documents: + break + if count % log_interval == 0: + elapsed = time.time() - start_time + rate = count / elapsed if elapsed > 0 else 0 + remaining = (num_documents - count) / rate if rate > 0 else 0 + pct = count / num_documents * 100 + file_size_mb = os.path.getsize(output_jsonl) / (1024 * 1024) + print( + f" [{pct:5.1f}%] {count:,}/{num_documents:,} docs | " + f"{rate:,.0f} docs/s | " + f"{file_size_mb:,.1f} MB on disk | " + f"ETA: {_format_eta(remaining)}" + ) + + elapsed = time.time() - start_time + print(f" Saved {count:,} documents to {output_jsonl} in {_format_eta(elapsed)}") + file_size_mb = os.path.getsize(output_jsonl) / (1024 * 1024) + print(f" JSONL file size: {file_size_mb:,.2f} MB") + return count + + +def download_common_pile_bulk(output_jsonl, num_documents, dataset_name): + """Download from Common Pile using bulk parquet loading for speed. + + This is faster than streaming for large downloads (>100K docs) because + it downloads full parquet files and processes them locally. + """ + try: + from datasets import load_dataset + except ImportError: + print("ERROR: 'datasets' library not found. Install with: pip install datasets") + sys.exit(1) + + print(f"Bulk downloading {num_documents:,} documents from {dataset_name}...") + print(" (Using non-streaming mode for faster throughput)") + print(" (Using HF_TOKEN from environment for authentication if set)") + + # Load non-streaming — this downloads parquet shards to the HF cache + print(" Loading dataset (downloading parquet shards)...") + ds = load_dataset(dataset_name, split="train") + total_available = len(ds) + print(f" Dataset loaded: {total_available:,} documents available") + + count = 0 + skipped = 0 + start_time = time.time() + log_interval = max(1000, num_documents // 100) + + with open(output_jsonl, 'w', encoding='utf-8') as f: + for i in range(min(total_available, num_documents + num_documents // 10)): + text = ds[i].get("text", "") + if text and len(text.strip()) > 100: + f.write(json.dumps({"text": text}) + "\n") + count += 1 + if count >= num_documents: + break + if count % log_interval == 0: + elapsed = time.time() - start_time + rate = count / elapsed if elapsed > 0 else 0 + remaining = (num_documents - count) / rate if rate > 0 else 0 + pct = count / num_documents * 100 + file_size_mb = os.path.getsize(output_jsonl) / (1024 * 1024) + print( + f" [{pct:5.1f}%] {count:,}/{num_documents:,} docs | " + f"{rate:,.0f} docs/s | " + f"{file_size_mb:,.1f} MB on disk | " + f"ETA: {_format_eta(remaining)}" + ) + else: + skipped += 1 + + elapsed = time.time() - start_time + print( + f" Saved {count:,} documents to {output_jsonl} in {_format_eta(elapsed)} " + f"({skipped:,} short docs skipped)" + ) + file_size_mb = os.path.getsize(output_jsonl) / (1024 * 1024) + print(f" JSONL file size: {file_size_mb:,.2f} MB") + return count + + +def copy_vocab_files(output_dir, source_base): + """Copy vocabulary files from existing the_pile dataset directories.""" + copies = [ + ( + os.path.join(source_base, "shard00", "bpe", "vocab.json"), + os.path.join(output_dir, "bpe", "vocab.json"), + ), + ( + os.path.join(source_base, "shard00", "bpe", "merges.txt"), + os.path.join(output_dir, "bpe", "merges.txt"), + ), + ( + os.path.join(source_base, "bert_shard00", "vocab.txt"), + os.path.join(output_dir, "vocab.txt"), + ), + ( + os.path.join(source_base, "t5_shard00", "bert-large-cased-vocab.txt"), + os.path.join(output_dir, "bert-large-cased-vocab.txt"), + ), + ] + + for src, dst in copies: + os.makedirs(os.path.dirname(dst), exist_ok=True) + if not os.path.exists(src): + print(f" ERROR: Source vocab file not found: {src}") + sys.exit(1) + if os.path.exists(dst): + print(f" Already exists: {dst}") + continue + print(f" Copying {src} -> {dst}") + with open(src, 'rb') as f_in, open(dst, 'wb') as f_out: + f_out.write(f_in.read()) + + +def download_vocab_files(output_dir): + """Download tokenizer vocabulary files from HuggingFace.""" + downloads = [ + ( + "https://huggingface.co/openai-community/gpt2/resolve/main/vocab.json", + os.path.join(output_dir, "bpe", "vocab.json"), + ), + ( + "https://huggingface.co/openai-community/gpt2/resolve/main/merges.txt", + os.path.join(output_dir, "bpe", "merges.txt"), + ), + ( + "https://huggingface.co/google-bert/bert-base-uncased/resolve/main/vocab.txt", + os.path.join(output_dir, "vocab.txt"), + ), + ( + "https://huggingface.co/google-bert/bert-large-cased/resolve/main/vocab.txt", + os.path.join(output_dir, "bert-large-cased-vocab.txt"), + ), + ] + + for url, dst in downloads: + os.makedirs(os.path.dirname(dst), exist_ok=True) + if os.path.exists(dst): + print(f" Already exists: {dst}") + continue + print(f" Downloading {url}") + print(f" -> {dst}") + hf_token = os.environ.get("HF_TOKEN", "") + req = urllib.request.Request(url) + if hf_token: + req.add_header("Authorization", f"Bearer {hf_token}") + with urllib.request.urlopen(req) as response, open(dst, 'wb') as f_out: + f_out.write(response.read()) + + +def run_preprocess(megatron_dir, jsonl_path, output_prefix, tokenizer_type, + vocab_file, merge_file=None, split_sentences=False, + append_eod=False, workers=4): + """Run preprocess_data.py to create .bin/.idx files.""" + cmd = [ + sys.executable, + os.path.join(megatron_dir, "tools", "preprocess_data.py"), + "--input", jsonl_path, + "--output-prefix", output_prefix, + "--tokenizer-type", tokenizer_type, + "--vocab-file", vocab_file, + "--workers", str(workers), + ] + + if merge_file: + cmd.extend(["--merge-file", merge_file]) + if split_sentences: + cmd.append("--split-sentences") + if append_eod: + cmd.append("--append-eod") + + print(f"\n Running: {' '.join(cmd)}") + result = subprocess.run(cmd, cwd=megatron_dir) + if result.returncode != 0: + print(f" ERROR: Preprocessing failed with return code {result.returncode}") + sys.exit(1) + + +def verify_output(output_dir): + """Verify all expected output files exist.""" + expected_files = [ + "my-gpt3_00_text_document.bin", + "my-gpt3_00_text_document.idx", + "my-bert_00_text_sentence.bin", + "my-bert_00_text_sentence.idx", + "my-t5_00_text_document.bin", + "my-t5_00_text_document.idx", + "bpe/vocab.json", + "bpe/merges.txt", + "vocab.txt", + "bert-large-cased-vocab.txt", + ] + + all_ok = True + for f in expected_files: + full_path = os.path.join(output_dir, f) + if os.path.exists(full_path): + size_mb = os.path.getsize(full_path) / (1024 * 1024) + print(f" OK: {f} ({size_mb:.2f} MB)") + else: + print(f" MISSING: {f}") + all_ok = False + + return all_ok + + +def main(): + parser = argparse.ArgumentParser( + description="Create CI test dataset from Common Pile" + ) + parser.add_argument( + "--output-dir", type=str, required=True, + help="Output directory for preprocessed data", + ) + parser.add_argument( + "--megatron-dir", type=str, default=None, + help="Path to Megatron-LM repo root (default: auto-detect from script location)", + ) + parser.add_argument( + "--num-documents", type=int, default=10000, + help="Number of documents to download (default: 10000)", + ) + parser.add_argument( + "--dataset-name", type=str, default="common-pile/comma_v0.1_training_dataset", + help="HuggingFace dataset name (default: common-pile/comma_v0.1_training_dataset)", + ) + parser.add_argument( + "--copy-vocab-from", type=str, default=None, + help="Copy vocab files from existing the_pile base directory " + "(e.g., /lustre/.../text/the_pile)", + ) + parser.add_argument( + "--download-vocab", action="store_true", + help="Download vocab files from HuggingFace instead of copying", + ) + parser.add_argument( + "--existing-jsonl", type=str, default=None, + help="Path to existing JSONL file (skip download step)", + ) + parser.add_argument( + "--workers", type=int, default=4, + help="Number of worker processes for preprocessing (default: 4)", + ) + parser.add_argument( + "--keep-jsonl", action="store_true", + help="Keep the intermediate JSONL file after preprocessing", + ) + parser.add_argument( + "--bulk-download", action="store_true", + help="Use bulk (non-streaming) download for faster throughput at scale. " + "Downloads full parquet shards to HF cache before writing JSONL. " + "Recommended for --num-documents > 100000.", + ) + args = parser.parse_args() + + # Auto-detect Megatron-LM directory + if args.megatron_dir is None: + args.megatron_dir = os.path.abspath( + os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir) + ) + print(f"Megatron-LM directory: {args.megatron_dir}") + print(f"Output directory: {args.output_dir}") + + # Verify megatron dir has preprocess_data.py + preprocess_script = os.path.join(args.megatron_dir, "tools", "preprocess_data.py") + if not os.path.exists(preprocess_script): + print(f"ERROR: preprocess_data.py not found at {preprocess_script}") + sys.exit(1) + + os.makedirs(args.output_dir, exist_ok=True) + + # ================================================================ + # Step 1: Get vocabulary files + # ================================================================ + print("\n" + "=" * 60) + print("Step 1: Setting up vocabulary files...") + print("=" * 60) + + if args.copy_vocab_from: + print(f" Copying from: {args.copy_vocab_from}") + copy_vocab_files(args.output_dir, args.copy_vocab_from) + elif args.download_vocab: + print(" Downloading from HuggingFace...") + download_vocab_files(args.output_dir) + else: + # Default: try to copy from standard CI location, fall back to download + default_source = ( + "/lustre/fsw/portfolios/coreai/projects/coreai_dlalgo_mcore" + "/mcore_ci/text/the_pile" + ) + if os.path.exists(default_source): + print(f" Copying from default location: {default_source}") + copy_vocab_files(args.output_dir, default_source) + else: + print(" Default vocab source not found, downloading from HuggingFace...") + download_vocab_files(args.output_dir) + + # ================================================================ + # Step 2: Get raw text data + # ================================================================ + print("\n" + "=" * 60) + print("Step 2: Preparing raw text data...") + print("=" * 60) + + if args.existing_jsonl: + jsonl_path = args.existing_jsonl + print(f" Using existing JSONL: {jsonl_path}") + else: + jsonl_path = os.path.join(args.output_dir, "common_pile_raw.jsonl") + if os.path.exists(jsonl_path): + print(f" JSONL already exists: {jsonl_path}") + print(" (Delete it to re-download)") + elif args.bulk_download: + download_common_pile_bulk( + jsonl_path, args.num_documents, args.dataset_name + ) + else: + download_common_pile_sample( + jsonl_path, args.num_documents, args.dataset_name + ) + + # ================================================================ + # Step 3: Preprocess for GPT (GPT2BPETokenizer) + # ================================================================ + print("\n" + "=" * 60) + print("Step 3: Preprocessing for GPT (GPT2BPETokenizer)...") + print("=" * 60) + + gpt_prefix = os.path.join(args.output_dir, "my-gpt3_00") + gpt_bin = gpt_prefix + "_text_document.bin" + if os.path.exists(gpt_bin): + print(f" GPT data already exists: {gpt_bin}") + else: + run_preprocess( + megatron_dir=args.megatron_dir, + jsonl_path=jsonl_path, + output_prefix=gpt_prefix, + tokenizer_type="GPT2BPETokenizer", + vocab_file=os.path.join(args.output_dir, "bpe", "vocab.json"), + merge_file=os.path.join(args.output_dir, "bpe", "merges.txt"), + append_eod=True, + workers=args.workers, + ) + + # ================================================================ + # Step 4: Preprocess for BERT (BertWordPieceLowerCase + split-sentences) + # ================================================================ + print("\n" + "=" * 60) + print("Step 4: Preprocessing for BERT (BertWordPieceLowerCase)...") + print("=" * 60) + + bert_prefix = os.path.join(args.output_dir, "my-bert_00") + bert_bin = bert_prefix + "_text_sentence.bin" + if os.path.exists(bert_bin): + print(f" BERT data already exists: {bert_bin}") + else: + # BERT with --split-sentences requires two passes when partitions=1: + # Pass 1: splits sentences, creates _ss.jsonl, then returns + # Pass 2: detects _ss.jsonl exists, encodes to binary .bin/.idx + jsonl_base, jsonl_ext = os.path.splitext(jsonl_path) + ss_file = jsonl_base + "_ss" + jsonl_ext + if not os.path.exists(ss_file): + print(" Pass 1: Splitting sentences...") + run_preprocess( + megatron_dir=args.megatron_dir, + jsonl_path=jsonl_path, + output_prefix=bert_prefix, + tokenizer_type="BertWordPieceLowerCase", + vocab_file=os.path.join(args.output_dir, "vocab.txt"), + split_sentences=True, + workers=args.workers, + ) + else: + print(f" Sentence-split file already exists: {ss_file}") + print(" Pass 2: Encoding split sentences to binary...") + run_preprocess( + megatron_dir=args.megatron_dir, + jsonl_path=jsonl_path, + output_prefix=bert_prefix, + tokenizer_type="BertWordPieceLowerCase", + vocab_file=os.path.join(args.output_dir, "vocab.txt"), + split_sentences=True, + workers=args.workers, + ) + + # ================================================================ + # Step 5: Preprocess for T5 (BertWordPieceCase) + # ================================================================ + print("\n" + "=" * 60) + print("Step 5: Preprocessing for T5 (BertWordPieceCase)...") + print("=" * 60) + + t5_prefix = os.path.join(args.output_dir, "my-t5_00") + t5_bin = t5_prefix + "_text_document.bin" + if os.path.exists(t5_bin): + print(f" T5 data already exists: {t5_bin}") + else: + run_preprocess( + megatron_dir=args.megatron_dir, + jsonl_path=jsonl_path, + output_prefix=t5_prefix, + tokenizer_type="BertWordPieceCase", + vocab_file=os.path.join(args.output_dir, "bert-large-cased-vocab.txt"), + append_eod=True, + workers=args.workers, + ) + + # ================================================================ + # Step 6: Clean up and verify + # ================================================================ + print("\n" + "=" * 60) + print("Step 6: Verifying output...") + print("=" * 60) + + if not args.keep_jsonl and not args.existing_jsonl: + intermediate = os.path.join(args.output_dir, "common_pile_raw.jsonl") + if os.path.exists(intermediate): + print(f" Removing intermediate JSONL: {intermediate}") + os.remove(intermediate) + ss_intermediate = os.path.join(args.output_dir, "common_pile_raw_ss.jsonl") + if os.path.exists(ss_intermediate): + print(f" Removing sentence-split JSONL: {ss_intermediate}") + os.remove(ss_intermediate) + + all_ok = verify_output(args.output_dir) + + if all_ok: + print(f"\nDataset created successfully at: {args.output_dir}") + print("\nTo use in CI tests, update model_config.yaml data paths:") + print(" GPT: --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-gpt3_00_text_document") + print(" --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/vocab.json") + print(" --merge-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bpe/merges.txt") + print(" BERT: --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-bert_00_text_sentence") + print(" --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/vocab.txt") + print(" T5: --data-path: ${DATA_PATH}/text/common_pile/v01_filtered_data/my-t5_00_text_document") + print(" --vocab-file: ${DATA_PATH}/text/common_pile/v01_filtered_data/bert-large-cased-vocab.txt") + else: + print("\nERROR: Some expected files are missing!") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tools/common_pile_dataset/setup_common_pile_dataset.sh b/tools/common_pile_dataset/setup_common_pile_dataset.sh new file mode 100644 index 00000000000..cb869e28368 --- /dev/null +++ b/tools/common_pile_dataset/setup_common_pile_dataset.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# Setup script to create Common Pile CI test dataset on the remote HPC machine. +# +# This script: +# 1. Clones the Megatron-LM repo (if needed) +# 2. Installs Python dependencies (if needed) +# 3. Runs create_common_pile_ci_dataset.py to download, preprocess, and save data +# +# Usage: +# scp tools/common_pile_dataset/setup_common_pile_dataset.sh \ +# tools/common_pile_dataset/create_common_pile_ci_dataset.py \ +# @:/tmp/ +# ssh @ 'bash /tmp/setup_common_pile_dataset.sh' + +set -euo pipefail + +# Use Python 3.10+ (required by latest Megatron-LM for PEP 604 type syntax) +PYTHON="/usr/bin/python3.10" +if [ ! -x "${PYTHON}" ]; then + echo "ERROR: Python 3.10+ required but ${PYTHON} not found" + exit 1 +fi +echo "Using Python: ${PYTHON} ($("${PYTHON}" --version))" + +OUTPUT_DIR="/lustre/fsw/portfolios/coreai/projects/coreai_dlalgo_mcore/mcore_ci/text/common_pile/v01_filtered_data" +EXISTING_VOCAB="/lustre/fsw/portfolios/coreai/projects/coreai_dlalgo_mcore/mcore_ci/text/the_pile" +NUM_DOCUMENTS=12000000 +DATASET_NAME="common-pile/comma_v0.1_training_dataset" +WORK_DIR="/tmp/mcore_dataset_setup_$$" + +# Redirect HuggingFace cache to lustre so it doesn't fill up /home +export HF_HOME="/lustre/fsw/portfolios/coreai/projects/coreai_dlalgo_mcore/mcore_ci/.hf_cache" +export HF_DATASETS_CACHE="${HF_HOME}/datasets" + +echo "============================================================" +echo "Common Pile CI Dataset Setup" +echo "============================================================" +echo "Output: ${OUTPUT_DIR}" +echo "Vocab from: ${EXISTING_VOCAB}" +echo "Documents: ${NUM_DOCUMENTS}" +echo "Dataset: ${DATASET_NAME}" +echo "Work dir: ${WORK_DIR}" +echo "HF cache: ${HF_HOME}" +echo "============================================================" + +# Create work directory +mkdir -p "${WORK_DIR}" +cd "${WORK_DIR}" + +# Check if create_common_pile_ci_dataset.py was scp'd alongside this script +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +if [ -f "${SCRIPT_DIR}/create_common_pile_ci_dataset.py" ]; then + DATASET_SCRIPT="${SCRIPT_DIR}/create_common_pile_ci_dataset.py" + echo "Found dataset script at: ${DATASET_SCRIPT}" +else + DATASET_SCRIPT="" +fi + +# Clone Megatron-LM — full shallow clone needed because preprocess_data.py +# imports from megatron.core which pulls in tensor_parallel and other submodules +MEGATRON_DIR="${WORK_DIR}/Megatron-LM" +if [ ! -d "${MEGATRON_DIR}" ]; then + echo "" + echo "Cloning Megatron-LM repository (full shallow clone)..." + git clone --depth 1 https://github.com/NVIDIA/Megatron-LM.git "${MEGATRON_DIR}" 2>&1 +fi + +# Patch megatron/training/__init__.py to avoid importing the full training stack. +# preprocess_data.py only needs _add_tokenizer_args from arguments.py, but the +# __init__.py eagerly imports initialize_megatron which pulls in triton, apex, etc. +TRAINING_INIT="${MEGATRON_DIR}/megatron/training/__init__.py" +if [ -f "${TRAINING_INIT}" ] && grep -q "from .initialize" "${TRAINING_INIT}"; then + echo "Patching megatron/training/__init__.py to skip heavy imports..." + sed -i 's/^from \.initialize/#from .initialize/' "${TRAINING_INIT}" + sed -i 's/^from \.training/#from .training/' "${TRAINING_INIT}" +fi + +# Use the script from the repo if we didn't have it locally +if [ -z "${DATASET_SCRIPT}" ]; then + DATASET_SCRIPT="${MEGATRON_DIR}/tools/common_pile_dataset/create_common_pile_ci_dataset.py" + if [ ! -f "${DATASET_SCRIPT}" ]; then + echo "ERROR: create_common_pile_ci_dataset.py not found" + exit 1 + fi +fi + +# Create a virtual environment to avoid system package conflicts +VENV_DIR="${WORK_DIR}/venv" +if [ ! -d "${VENV_DIR}" ]; then + echo "" + echo "Creating virtual environment..." + "${PYTHON}" -m venv "${VENV_DIR}" +fi +source "${VENV_DIR}/bin/activate" +PYTHON="$(which python)" +echo "Using venv Python: ${PYTHON} ($(${PYTHON} --version))" + +# Install Python dependencies +echo "" +echo "Checking Python dependencies..." +"${PYTHON}" -c "import datasets" 2>/dev/null || { + echo "Installing 'datasets' library..." + pip install datasets --quiet +} +"${PYTHON}" -c "import nltk" 2>/dev/null || { + echo "Installing 'nltk' library..." + pip install nltk --quiet +} +"${PYTHON}" -c "import torch" 2>/dev/null || { + echo "Installing PyTorch (CPU-only, needed for Megatron preprocessing)..." + pip install torch --index-url https://download.pytorch.org/whl/cpu --quiet +} +"${PYTHON}" -c "import transformers" 2>/dev/null || { + echo "Installing 'transformers' library (needed for tokenizer in preprocessing)..." + pip install transformers --quiet +} + +# Download NLTK punkt tokenizer data (needed for BERT sentence splitting) +echo "Ensuring NLTK punkt tokenizer data is available..." +"${PYTHON}" -c "import nltk; nltk.download('punkt', quiet=True); nltk.download('punkt_tab', quiet=True)" 2>/dev/null || true + +# Create the output directory +mkdir -p "${OUTPUT_DIR}" + +# Run the dataset creation script +echo "" +echo "============================================================" +echo "Running dataset creation..." +echo "============================================================" + +"${PYTHON}" "${DATASET_SCRIPT}" \ + --output-dir "${OUTPUT_DIR}" \ + --megatron-dir "${MEGATRON_DIR}" \ + --num-documents "${NUM_DOCUMENTS}" \ + --dataset-name "${DATASET_NAME}" \ + --copy-vocab-from "${EXISTING_VOCAB}" \ + --keep-jsonl \ + --workers 4 + +echo "" +echo "============================================================" +echo "Done! Dataset saved to: ${OUTPUT_DIR}" +echo "============================================================" +echo "" +echo "Final directory listing:" +find "${OUTPUT_DIR}" -type f -exec ls -lh {} \; + +# Cleanup +echo "" +echo "Cleaning up work directory: ${WORK_DIR}" +rm -rf "${WORK_DIR}"