-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvalidate.sh
executable file
·67 lines (62 loc) · 6.18 KB
/
validate.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/bin/bash
#export KMP_BLOCKTIME=1
#export KMP_SETTINGS=1
export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so
export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
#export CCL_ALLREDUCE=recursive_doubling
export CCL_PROCESS_LAUNCHER=none
export CCL_ATL_TRANSPORT=ofi
export CCL_ATL_SHM=1
#export CCL_ITT_LEVEL=1
export CCL_WORKER_COUNT=1
#if turn this line on, need to use a small iteration count such as 50
#export CCL_SCHED_PROFILE=1
#for 48 core *2
#set CCL_WORKER_AFFINITY if necessary
#export CCL_WORKER_AFFINITY=10,22,34,46,58,70,82,94
#single node
# default core binding
#deepspeed --bind_cores_to_rank ds_comm_bench.py $*
deepspeed --num_accelerators 1 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 $* 2>&1 |tee result.txt
deepspeed --num_accelerators 1 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 2 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 $* 2>&1 |tee result.txt
deepspeed --num_accelerators 2 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 3 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 3 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 6 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 6 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 11 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 11 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 16 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 16 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 17 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 17 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 1 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 1 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 2 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 2 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 3 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 3 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 6 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 6 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 11 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 11 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 16 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 16 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 17 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 17 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp16 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 1 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 1 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 2 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 2 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 3 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 3 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 6 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 6 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 11 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 11 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 16 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 16 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 17 --bind_cores_to_rank ds_comm_bench.py --elements 1024 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
deepspeed --num_accelerators 17 --bind_cores_to_rank ds_comm_bench.py --elements 1048576 --count 100 --dtype fp32 $* 2>&1 |tee -a result.txt
cat result.txt|grep diff