11#! /usr/bin/bash
22
3+ # Check if flavor is provided
4+ if [ -z " $1 " ]; then
5+ echo " Usage: $0 <flavor> [--compile]"
6+ echo " Example: $0 debugperf_large"
7+ echo " Example: $0 debugperf_large --compile"
8+ exit 1
9+ fi
10+
11+ FLAVOR=$1
12+ COMPILE_FLAG=" "
13+ FULL_AC_FLAG=" "
14+
15+ # Check for flags
16+ for arg in " $@ " ; do
17+ if [ " $arg " = " --compile" ]; then
18+ COMPILE_FLAG=" --enable_compile"
19+ fi
20+ if [ " $arg " = " --full_ac" ]; then
21+ FULL_AC_FLAG=" --enable_full_ac"
22+ fi
23+ done
24+
325# Shared model configuration for fair comparison
426VOCAB_SIZE=2048
527N_LAYERS=6
@@ -20,35 +42,35 @@ model_names=(
2042for model_name in " ${tt_model_names[@]} " ; do
2143 rm -rf debug_local_results/${model_name}
2244
23- python ./tooling_dev/test_hf_integration.py create_configs --model_name " $model_name " --out_dir debug_local_results --flavor debugperf_large --model_type torchtitan --enable_profiling --profile_freq 5
24- python ./tooling_dev/test_hf_integration.py submit_jobs --inp_dir debug_local_results/${model_name} /debugperf_large /seed_checkpoint --qos high
25- while [ ! -f debug_local_results/${model_name} /debugperf_large /seed_checkpoint/status.txt ] || [ " $( cat debug_local_results/${model_name} /debugperf_large /seed_checkpoint/status.txt) " != " completed" ]; do
45+ python ./tooling_dev/test_hf_integration.py create_configs --model_name " $model_name " --out_dir debug_local_results --flavor " $FLAVOR " --model_type torchtitan --enable_profiling --profile_freq 5 $COMPILE_FLAG $FULL_AC_FLAG
46+ python ./tooling_dev/test_hf_integration.py submit_jobs --inp_dir debug_local_results/${model_name} /${FLAVOR} /seed_checkpoint --qos high
47+ while [ ! -f debug_local_results/${model_name} /${FLAVOR} /seed_checkpoint/status.txt ] || [ " $( cat debug_local_results/${model_name} /${FLAVOR} /seed_checkpoint/status.txt) " != " completed" ]; do
2648 echo " Waiting for seed checkpoint from ${model_name} to complete ..."
2749 sleep 1
2850 done
29- python ./tooling_dev/test_hf_integration.py submit_jobs --inp_dir debug_local_results/${model_name} /debugperf_large --qos high
51+ python ./tooling_dev/test_hf_integration.py submit_jobs --inp_dir debug_local_results/${model_name} /${FLAVOR} --qos high
3052 echo " ================"
3153done
3254
3355for model_name in " ${model_names[@]} " ; do
3456 rm -rf debug_local_results/${model_name}
3557
36- python ./tooling_dev/test_hf_integration.py create_configs --model_name " $model_name " --out_dir debug_local_results --flavor debugperf_large --model_type transformers_modeling_backend --hf_assets_path " /fsx/ferdinandmom/ferdinand-hf/huggingface/torchtitan/tests/assets/tokenizer" --enable_profiling --profile_freq 5
37- python ./tooling_dev/test_hf_integration.py submit_jobs --inp_dir debug_local_results/${model_name} /debugperf_large /seed_checkpoint --qos high
38- while [ ! -f debug_local_results/${model_name} /debugperf_large /seed_checkpoint/status.txt ] || [ " $( cat debug_local_results/${model_name} /debugperf_large /seed_checkpoint/status.txt) " != " completed" ]; do
58+ python ./tooling_dev/test_hf_integration.py create_configs --model_name " $model_name " --out_dir debug_local_results --flavor " $FLAVOR " --model_type transformers_modeling_backend --hf_assets_path " /fsx/ferdinandmom/ferdinand-hf/huggingface/torchtitan/tests/assets/tokenizer" --enable_profiling --profile_freq 5 $COMPILE_FLAG $FULL_AC_FLAG
59+ python ./tooling_dev/test_hf_integration.py submit_jobs --inp_dir debug_local_results/${model_name} /${FLAVOR} /seed_checkpoint --qos high
60+ while [ ! -f debug_local_results/${model_name} /${FLAVOR} /seed_checkpoint/status.txt ] || [ " $( cat debug_local_results/${model_name} /${FLAVOR} /seed_checkpoint/status.txt) " != " completed" ]; do
3961 echo " Waiting for seed checkpoint from ${model_name} to complete ..."
4062 sleep 1
4163 done
42- python ./tooling_dev/test_hf_integration.py submit_jobs --inp_dir debug_local_results/${model_name} /debugperf_large --qos high
64+ python ./tooling_dev/test_hf_integration.py submit_jobs --inp_dir debug_local_results/${model_name} /${FLAVOR} --qos high
4365 echo " ================"
4466done
4567
4668# for model_name in "${moe_model_names[@]}"; do
4769# rm -rf debug_local_results/${model_name}
4870
49- # USE_MOE=1 python ./tooling_dev/test_hf_integration.py create_configs --model_name "$model_name" --out_dir debug_local_results --flavor debugperf_large
50- # USE_MOE=1 python ./tooling_dev/test_hf_integration.py submit_jobs --inp_dir debug_local_results/${model_name}/debugperf_large /seed_checkpoint --qos high
51- # while [ ! -f debug_local_results/${model_name}/debugperf_large /seed_checkpoint/status.txt ] || [ "$(cat debug_local_results/${model_name}/debugperf_large /seed_checkpoint/status.txt)" != "completed" ]; do
71+ # USE_MOE=1 python ./tooling_dev/test_hf_integration.py create_configs --model_name "$model_name" --out_dir debug_local_results --flavor $FLAVOR
72+ # USE_MOE=1 python ./tooling_dev/test_hf_integration.py submit_jobs --inp_dir debug_local_results/${model_name}/${FLAVOR} /seed_checkpoint --qos high
73+ # while [ ! -f debug_local_results/${model_name}/${FLAVOR} /seed_checkpoint/status.txt ] || [ "$(cat debug_local_results/${model_name}/${FLAVOR} /seed_checkpoint/status.txt)" != "
5274# echo "Waiting for seed checkpoint from ${model_name} to complete ..."
5375# sleep 1
5476# done
0 commit comments