Skip to content

Commit dc2cf8d

Browse files
committed
scripts that run all affinity tests in one go, and the updated performance results
timing of kernel measured with MPI_Wtime() instead of measuring the entire execution externally
1 parent 72f78be commit dc2cf8d

11 files changed

+193
-52
lines changed

README.md

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,19 @@ Here workers are representing MPI tasks. Data is exhchanged between MPI tasks. T
2020
- envMvapich.sh - environment file for Mvapich2-mic
2121
- envMvapich2.sh - environment file for Mvapich2-mic v2.0
2222
- build.sh - build script
23-
- getTimeImpi.sh - run script for Intel MPI
23+
- getTimeImpi.sh - run script for Intel MPI 4 and 5
24+
- getTimeImpi51.sh - run script for Intel MPI 5.1
2425
- getTimeMvapich.sh - run script Mvapich2-mic
2526
- getTimeMvapich2.sh - run script Mvapich2-mic
26-
- runImpi51.sh - run script for Intel MPI 5.1 (beta)
2727
- kernelComm.c - communication kernel
2828
- kernel.h - kernel header
2929
- mpiWork.c - mpi driver
3030
- README.md - this file
3131
- threadWork.c - threaded mpi driver
3232

3333
## build
34-
source env<Impi|Impi5|Mvapich|Mvapich2>.sh
35-
./build.sh <threadsPerCore=1|2|4>
34+
source env<Impi|Impi51|Mvapich|Mvapich2>.sh
35+
for i in 1 2 4; do ./build.sh $i; done
3636

3737
## run
3838
From an interactive idev session:
@@ -67,8 +67,4 @@ run
6767
## Performance Results
6868

6969
The following tests used MPSS 3.3 as installed on Stampede.
70-
71-
![alt text](
72-
https://github.com/cwsmith/commKernel/raw/master/mpiThreadMultiplePerformanceOnPhi.png
73-
"Performance Results")
74-
70+
see performance.csv

build.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ tpc=$1
44
flags="-DTHREADS_PER_CORE=$tpc -mmic -Wall -g -O3 -fno-omit-frame-pointer "
55
src="kernelComm.c"
66
set -x
7-
mpicc $flags threadWork.c $src -o threadWorkComm -lpthread $LDFLAGS $THREAD_MULTIPLE
8-
mpicc $flags mpiWork.c $src -o mpiWork -lpthread $LDFLAGS
7+
mpicc $flags threadWork.c $src -o threadWorkComm.${tpc} -lpthread $LDFLAGS $THREAD_MULTIPLE
8+
mpicc $flags mpiWork.c $src -o mpiWork.${tpc} -lpthread $LDFLAGS
99
mpicc -DTHREAD_MULTIPLE $flags mpiWork.c $src \
10-
-o mpiWorkThreadMult -lpthread $LDFLAGS $THREAD_MULTIPLE
10+
-o mpiWorkThreadMult.${tpc} -lpthread $LDFLAGS $THREAD_MULTIPLE

getTimeImpi.sh

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,42 @@
11
#!/bin/bash -e
22
run() {
3-
/usr/bin/time -f "realTime %e" ibrun.symm -m "$1" &>> work${2}.log
3+
ibrun.symm -m "$1" &>> work${2}.log
44
}
55
threadWorkComm() {
66
export MIC_MY_NSLOTS=1
77
export MIC_PPN=1
8-
run "./threadWorkComm $1" $1
8+
run "./threadWorkComm.${1} $2" $2
99
}
1010
mpiWork() {
1111
export MIC_MY_NSLOTS=$1
1212
export MIC_PPN=$1
13-
run "./mpiWork" $1
13+
run "./mpiWork.${1}" $2
1414
}
1515
mpiWorkThreadMult() {
1616
export MIC_MY_NSLOTS=$1
1717
export MIC_PPN=$1
18-
run "./mpiWorkThreadMult" $1
18+
run "./mpiWorkThreadMult.${1}" $2
1919
}
2020
getAvg() {
21-
awk '/realTime/ {sum+=$2; cnt+=1;} END {print "average " sum/cnt}' work${1}.log
21+
t=$(awk '/realTime/ {sum+=$2; cnt+=1;} END {print sum/cnt}' work${1}.log)
22+
echo -n ", $t "
2223
}
2324

2425
declare -a fns
2526
fns[1]="threadWorkComm"
2627
fns[2]="mpiWork"
2728
fns[3]="mpiWorkThreadMult"
2829

29-
for workers in 2 4 8 16 32; do
30-
for i in "${!fns[@]}"; do
31-
cat /dev/null > work${workers}.log
32-
echo -n "${fns[i]} workers $workers "
33-
for j in {0..4}; do
34-
${fns[$i]} $workers
30+
for fnIdx in "${!fns[@]}"; do
31+
for affinity in 1 2 4; do
32+
echo -n "${fns[$fnIdx]}, $affinity"
33+
for workers in 2 4 8 16 32; do
34+
cat /dev/null > work${workers}.log
35+
for j in {0..4}; do
36+
${fns[$fnIdx]} $affinity $workers
37+
done
38+
getAvg $workers
3539
done
36-
getAvg $workers
40+
echo ""
3741
done
3842
done

getTimeImpi51.sh

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#!/bin/bash -e
2+
3+
setup() {
4+
ulimit -l unlimited
5+
echo "memlock ulimit:" `ulimit -l`
6+
7+
for i in `env | grep I_MPI| cut -f 1 -d '='`; do unset $i; done
8+
for i in `env | grep DAPL | cut -f 1 -d '='`; do unset $i; done
9+
for i in `env | grep TACC | cut -f 1 -d '='`; do unset $i; done
10+
11+
beta=/work/01187/bmatth/intel_tools/2016_beta1
12+
compiler=$beta/compilers_and_libraries_2016.0.042
13+
mpi=$beta/impi/5.1.0.042
14+
export SINK_LD_LIBRARY_PATH=$compiler/linux/compiler/lib/mic/:$mpi/mic/lib/:$SINK_LD_LIBRARY_PATH
15+
export SINK_PATH=$mpi/mic/bin:$SINK_PATH
16+
17+
. $beta/bin/compilervars.sh intel64
18+
. $mpi/bin64/mpivars.sh
19+
export I_MPI_CC=$compiler/linux/bin/intel64_mic/icc
20+
export I_MPI_CXX=$compiler/linux/bin/intel64_mic/icpc
21+
export I_MPI_FC=$compiler/linux/bin/intel64_mic/ifort
22+
}
23+
24+
mpirun() {
25+
#EDIT THESE
26+
hostnp=0 #tasks per host
27+
micnp=1 #tasks per accelerator
28+
n_mics=1 #accelerators per host
29+
app=$1
30+
31+
#The rest of this script should be safe to ignore
32+
#(Give or take performance)
33+
tmpdir=`mktemp -d`
34+
35+
hosts=()
36+
for i in $SLURM_NODELIST; do
37+
for j in `scontrol show hostname $i`; do
38+
hosts+=($j)
39+
done
40+
done
41+
echo "-genv I_MPI_DEBUG 5" > $tmpdir/mpi_conf
42+
echo "-genv I_MPI_FABRICS 'shm:dapl'" >> $tmpdir/mpi_conf
43+
echo "-genv I_MPI_FALLBACK 0" >> $tmpdir/mpi_conf
44+
echo "-genv I_MPI_MIC 1" >> $tmpdir/mpi_conf
45+
echo "-genv I_MPI_MIC_PREFIX ./" >> $tmpdir/mpi_conf
46+
echo "-genv I_MPI_EXTRA_FILE_SYSTEM 1" >> $tmpdir/mpi_conf
47+
echo "-genv I_MPI_EXTRA_FILE_SYSTEM_LLIST lustre" >> $tmpdir/mpi_conf
48+
echo "-genv I_MPI_MIC_PROXY_PATH $mpi/mic/bin" >> $tmpdir/mpi_conf
49+
MIC_ENVS="-genv PATH $SINK_PATH -genv LD_LIBRARY_PATH $SINK_LD_LIBRARY_PATH"
50+
51+
for h in "${hosts[@]}"; do
52+
for i in `seq 0 $(( $n_mics-1 ))`; do
53+
printf -- "$MIC_ENVS -n %d -host %s %s\n" $micnp "$h-mic$i" "$app" >> $tmpdir/mpi_conf
54+
done
55+
printf -- "$HOST_ENVS -n %d -host %s %s\n" $hostnp "$h-br0" "$app" >> $tmpdir/mpi_conf
56+
done
57+
58+
cat $tmpdir/mpi_conf
59+
which mpirun
60+
export I_MPI_MIC=1
61+
export I_MPI_MIC_PROXY_PATH=$mpi/mic/bin
62+
mpiexec.hydra -iface br0 -configfile $tmpdir/mpi_conf
63+
cp $tmpdir/mpi_conf ./
64+
rm -rf $tmpdir
65+
}
66+
67+
run() {
68+
mpirun "$1" &>> work${2}.log
69+
}
70+
threadWorkComm() {
71+
run "./threadWorkComm.${1} $2" $2
72+
}
73+
getAvg() {
74+
t=$(awk '/realTime/ {sum+=$2; cnt+=1;} END {print sum/cnt}' work${1}.log)
75+
echo -n ", $t "
76+
}
77+
78+
declare -a fns
79+
fns[1]="threadWorkComm"
80+
81+
setup
82+
83+
for fnIdx in "${!fns[@]}"; do
84+
for affinity in 1 2 4; do
85+
echo -n "${fns[$fnIdx]}, $affinity"
86+
for workers in 2 4 8 16 32; do
87+
cat /dev/null > work${workers}.log
88+
for j in {0..4}; do
89+
${fns[$fnIdx]} $affinity $workers
90+
done
91+
getAvg $workers
92+
done
93+
echo ""
94+
done
95+
done

getTimeMvapich.sh

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,44 @@
11
#!/bin/bash -e
22
run() {
3-
/usr/bin/time -f "realTime %e" ibrun.symm -m "$1" &>> work${2}.log
3+
ibrun.symm -m "$1" &>> work${2}.log
44
}
55
threadWorkComm() {
66
export MIC_MY_NSLOTS=1
77
export MIC_PPN=1
88
export MV2_ENABLE_AFFINITY=0
9-
run "./threadWorkComm $1" $1
9+
run "./threadWorkComm.${1} $2" $2
1010
}
1111
mpiWork() {
1212
export MIC_MY_NSLOTS=$1
1313
export MIC_PPN=$1
14-
run "./mpiWork" $1
14+
run "./mpiWork.${1}" $2
1515
}
1616
mpiWorkThreadMult() {
1717
export MIC_MY_NSLOTS=$1
1818
export MIC_PPN=$1
1919
export MV2_ENABLE_AFFINITY=0
20-
run "./mpiWorkThreadMult" $1
20+
run "./mpiWorkThreadMult.${1}" $2
2121
}
2222
getAvg() {
23-
awk '/realTime/ {sum+=$2; cnt+=1;} END {print "average " sum/cnt}' work${1}.log
23+
t=$(awk '/realTime/ {sum+=$2; cnt+=1;} END {print sum/cnt}' work${1}.log)
24+
echo -n ", $t "
2425
}
2526

2627
declare -a fns
2728
fns[1]="threadWorkComm"
2829
fns[2]="mpiWork"
2930
fns[3]="mpiWorkThreadMult"
3031

31-
for workers in 2 4 8 16 32; do
32-
for i in "${!fns[@]}"; do
33-
cat /dev/null > work${workers}.log
34-
echo -n "${fns[i]} workers $workers "
35-
for j in {0..4}; do
36-
${fns[$i]} $workers
32+
for fnIdx in "${!fns[@]}"; do
33+
for affinity in 1 2 4; do
34+
echo -n "${fns[$fnIdx]}, $affinity"
35+
for workers in 2 4 8 16 32; do
36+
cat /dev/null > work${workers}.log
37+
for j in {0..4}; do
38+
${fns[$fnIdx]} $affinity $workers
39+
done
40+
getAvg $workers
3741
done
38-
getAvg $workers
42+
echo ""
3943
done
4044
done

getTimeMvapich2.sh

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,35 @@
11
#!/bin/bash -e
22
threadWorkComm() {
3-
workers=$1
3+
affinity=$1
4+
workers=$2
45
echo "mic0:1" > ./hostsThreadWorkComm
56
echo "export MV2_ENABLE_AFFINITY=0" > ./threadWorkCommMvapich2.sh
6-
echo "$PWD/threadWorkComm $workers" >> ./threadWorkCommMvapich2.sh
7+
echo "$PWD/threadWorkComm.${affinity} $workers" >> ./threadWorkCommMvapich2.sh
78
chmod +x ./threadWorkCommMvapich2.sh
89
echo "-n 1 : $PWD/threadWorkCommMvapich2.sh " > ./configThreadWorkComm
9-
/usr/bin/time -f "realTime %e" mpirun_rsh \
10+
mpirun_rsh \
1011
-config ./configThreadWorkComm \
11-
-hostfile ./hostsThreadWorkComm &>> work${1}.log
12+
-hostfile ./hostsThreadWorkComm &>> work${workers}.log
1213
}
1314

1415
getAvg() {
15-
awk '/realTime/ {sum+=$2; cnt+=1;} END {print "average " sum/cnt}' work${1}.log
16+
t=$(awk '/realTime/ {sum+=$2; cnt+=1;} END {print sum/cnt}' work${1}.log)
17+
echo -n ", $t "
1618
}
1719

1820
declare -a fns
1921
fns[1]="threadWorkComm"
2022

21-
for workers in 2 4 8 16 32; do
22-
for i in "${!fns[@]}"; do
23-
cat /dev/null > work${workers}.log
24-
echo -n "${fns[i]} workers $workers "
25-
for j in {0..4}; do
26-
${fns[$i]} $workers
23+
for fnIdx in "${!fns[@]}"; do
24+
for affinity in 1 2 4; do
25+
echo -n "${fns[$fnIdx]}, $affinity"
26+
for workers in 2 4 8 16 32; do
27+
cat /dev/null > work${workers}.log
28+
for j in {0..4}; do
29+
${fns[$fnIdx]} $affinity $workers
30+
done
31+
getAvg $workers
2732
done
28-
getAvg $workers
33+
echo ""
2934
done
3035
done

mpiThreadMultiplePerformanceOnPhi.png

-98.2 KB
Binary file not shown.

mpiWork.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
int main(int argc, char** argv) {
1111
int rank;
1212
int worldSz;
13+
double t0;
1314
#ifdef THREAD_MULTIPLE
1415
int provided;
1516
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
@@ -23,12 +24,15 @@ int main(int argc, char** argv) {
2324
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
2425
MPI_Comm_size(MPI_COMM_WORLD, &worldSz);
2526
thdata* data = (thdata*) calloc(1,sizeof(thdata));
27+
t0 = MPI_Wtime();
2628
data->rank = rank;
2729
data->commsz = worldSz;
2830
data->id = rank;
2931
data->peers = 1;
3032
kernelComm((void*) data);
3133
MPI_Barrier(MPI_COMM_WORLD);
34+
if( !rank )
35+
fprintf(stderr, "realTime %.3f\n", MPI_Wtime()-t0);
3236
free(data);
3337
MPI_Finalize();
3438
return 0;

performance.csv

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
, , , workers
2+
mpi, model, threads-per-core, 2, 4, 8, 16, 32
3+
impi4, threadWorkComm, 1, 1.0942 , 2.6856 , 7.444 , 19.8354 , 44.3168
4+
impi4, threadWorkComm, 2, 1.847 , 2.741 , 7.0572 , 17.7922 , 43.8934
5+
impi4, threadWorkComm, 4, 1.5304 , 2.417 , 6.7752 , 16.9144 , 42.0378
6+
impi4, mpiWork, 1, 0.0444 , 0.0436 , 0.0438 , 0.0436 , 0.0436
7+
impi4, mpiWork, 2, 0.1454 , 0.1438 , 0.1444 , 0.144 , 0.144
8+
impi4, mpiWork, 4, 0.251 , 0.2522 , 0.2532 , 0.2528 , 0.2526
9+
impi4, mpiWorkThreadMult, 1, 0.0742 , 0.0734 , 0.074 , 0.0736 , 0.0738
10+
impi4, mpiWorkThreadMult, 2, 0.1992 , 0.1994 , 0.1998 , 0.202 , 0.1988
11+
impi4, mpiWorkThreadMult, 4, 0.3576 , 0.3556 , 0.3594 , 0.355 , 0.3578
12+
impi5, threadWorkComm, 1, 0.7422 , 2.7226 , 7.1064 , 18.4822 , 42.765
13+
impi5, threadWorkComm, 2, 0.9698 , 2.3846 , 6.4668 , 15.289 , 41.4386
14+
impi5, threadWorkComm, 4, 0.9668 , 2.0686 , 6.0412 , 13.7852 , 38.2496
15+
impi5, mpiWork, 1, 0.0226 , 0.023 , 0.023 , 0.0246 , 0.0254
16+
impi5, mpiWork, 2, 0.0622 , 0.0632 , 0.0622 , 0.0622 , 0.063
17+
impi5, mpiWork, 4, 0.1628 , 0.1592 , 0.156 , 0.157 , 0.1584
18+
impi5, mpiWorkThreadMult, 1, 0.037 , 0.0362 , 0.0364 , 0.0376 , 0.0364
19+
impi5, mpiWorkThreadMult, 2, 0.0902 , 0.0904 , 0.0886 , 0.0896 , 0.0894
20+
impi5, mpiWorkThreadMult, 4, 0.234 , 0.2324 , 0.219 , 0.2206 , 0.2286
21+
impi51, threadWorkComm, 1, 0.7728 , 2.807 , 6.9916 , 18.293 , 42.3842
22+
impi51, threadWorkComm, 2, 1.4986 , 2.3072 , 6.3904 , 15.2192 , 41.3416
23+
impi51, threadWorkComm, 4, 1.2304 , 2.1484 , 6.0166 , 13.7792 , 37.9134
24+
mvapich2-mic, threadWorkComm, 1, 38.3682 , 10.404 , 5.944 , 12.6848 , 28.5394
25+
mvapich2-mic, threadWorkComm, 2, 147.384 , 5.735 , 5.466 , 11.84 , 24.4438
26+
mvapich2-mic, threadWorkComm, 4, >100, >100, 7.9684 , 12.4788 , 25.419
27+
mvapich2-mic2, threadWorkComm, 1, 1.1414 , 6.3 , 5.3218 , 12.2706 , 25.9254
28+
mvapich2-mic2, threadWorkComm, 2, 0.6936 , 2.2848 , 4.9388 , 10.5948 , 22.7568
29+
mvapich2-mic2, threadWorkComm, 4, 0.6854 , 2.7284 , 5.356 , 10.8154 , 23.2128

runImpi51.sh

100644100755
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ hostnp=0 #tasks per host
4242
#micnp=240
4343
micnp=1 #tasks per accelerator
4444
n_mics=1 #accelerators per host
45-
app="threadWorkComm"
46-
args="16"
45+
app=$1
4746

4847
#The rest of this script should be safe to ignore
4948
#(Give or take performance)
@@ -69,9 +68,9 @@ MIC_ENVS="-genv PATH $SINK_PATH -genv LD_LIBRARY_PATH $SINK_LD_LIBRARY_PATH"
6968

7069
for h in "${hosts[@]}"; do
7170
for i in `seq 0 $(( $n_mics-1 ))`; do
72-
printf -- "$MIC_ENVS -n %d -host %s %s\n" $micnp "$h-mic$i" "$app $args" >> $tmpdir/mpi_conf
71+
printf -- "$MIC_ENVS -n %d -host %s %s\n" $micnp "$h-mic$i" "$app" >> $tmpdir/mpi_conf
7372
done
74-
printf -- "$HOST_ENVS -n %d -host %s %s\n" $hostnp "$h-br0" "$app $args" >> $tmpdir/mpi_conf
73+
printf -- "$HOST_ENVS -n %d -host %s %s\n" $hostnp "$h-br0" "$app" >> $tmpdir/mpi_conf
7574
done
7675

7776
cat $tmpdir/mpi_conf

0 commit comments

Comments
 (0)