Skip to content

Commit 2572837

Browse files
committed
Checking power counter access is now in remora_init.sh. Binary power collector is not called now if power files not accessible.
1 parent 4ce0bb6 commit 2572837

File tree

4 files changed

+65
-56
lines changed

4 files changed

+65
-56
lines changed

src/aux/extra

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,3 +481,36 @@ capture_environment() {
481481
#Dump environment during run
482482
env > $REMORA_OUTDIR/runtime_env.txt
483483
}
484+
485+
check_energy_counter_accessibility() {
486+
local sockets=1 #default
487+
hash lscpu 2>/dev/null
488+
[[ $? == 0 ]] && sockets=$(lscpu | grep 'Socket(s):' | awk '{print $2}')
489+
490+
#Check if sensors exists. Use hash for this and the result of the command
491+
hash sensors 2>/dev/null
492+
local has_sensors=$? # 0/1 yes/no
493+
if [[ has_sensors == 0 ]]; then # test whether sensors provides "power1:"
494+
sensors | grep power1 2>/dev/null
495+
REMORA_POWER_IGNORE=${PIPESTATUS[1]} #if power1 found status=0,set don't ignore
496+
if [[ $REMORA_POWER_IGNORE == 0 ]]; then # sensors have power
497+
REMORA_POWER_COLLECTOR=sensors # assuming KNL & only one socket
498+
fi
499+
else
500+
REMORA_POWER_IGNORE=1 # specify no power utility so far
501+
REMORA_POWER_COLLECTOR=none
502+
fi
503+
504+
if [[ $REMORA_POWER_IGNORE == 1 ]]; then # test for intel_rapl
505+
power_readable=TRUE
506+
for ((soc_num=0; soc_num<$sockets; soc_num++)); do
507+
[[ ! -r /sys/devices/virtual/powercap/intel-rapl/intel-rapl:$soc_num/energy_uj ]] && power_readable=FALSE
508+
done
509+
if [[ $power_readable == TRUE ]]; then
510+
REMORA_POWER_IGNORE=0
511+
REMORA_POWER_COLLECTOR=intel_rapl
512+
fi
513+
fi
514+
echo "export REMORA_POWER_IGNORE=$REMORA_POWER_IGNORE" >> $REMORA_OUTDIR/remora_env.txt
515+
echo "export REMORA_POWER_COLLECTOR=$REMORA_POWER_COLLECTOR" >> $REMORA_OUTDIR/remora_env.txt
516+
}

src/modules/power

Lines changed: 23 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,15 @@ init_module_power()
3939

4040
# Updated 2023-03-28 by Kent
4141
# Can now use intel RAPL to get Energy, Watts=Energy_uj_now - Energy_uj_prev/delta_time
42-
# source $REMORA_OUTDIR/remora_env.txt
42+
# source $REMORA_OUTDIR/remora_env.txt
4343

4444
local remora_date_precision="ms"
4545
local remora_date_format='+%s.%3N'
4646

4747
local REMORA_POWER_IGNORE # 0 == don't ignore 1 == ignore
4848
local REMORA_POWER_COLLECTOR # sensors or intel_rapl
4949

50-
local sockets sockets_m1
50+
local sockets
5151
local has_sensors sensors base_values=()
5252
local soc_num
5353

@@ -57,56 +57,22 @@ init_module_power()
5757
sockets=1 #default
5858
hash lscpu 2>/dev/null
5959
[[ $? == 0 ]] && sockets=$(lscpu | grep 'Socket(s):' | awk '{print $2}')
60-
sockets_m1=$(($sockets-1))
61-
62-
#Check if sensors exists. Use hash for this and the result of the command
63-
hash sensors 2>/dev/null
64-
has_sensors=$? # 0/1 yes/no
65-
if [[ has_sensors == 0 ]]; then # test whether sensors provides "power1:"
66-
sensors | grep power1 2>/dev/null
67-
REMORA_POWER_IGNORE=${PIPESTATUS[1]} #if power1 found status=0,set don't ignore
68-
if [[ $REMORA_POWER_IGNORE == 0 ]]; then # sensors have power
69-
REMORA_POWER_COLLECTOR=sensors # assuming KNL & only one socket
70-
71-
#base_values[0]=`date +%s` # W are no longer putting in a base for Power sensors
72-
#base_values[1]=`sensors | grep power1 | awk '{print $2;}'` #TODO: multiple sockets with sensors cmd
73-
fi
74-
else
75-
REMORA_POWER_IGNORE=1 # specify no power utility so far
76-
REMORA_POWER_COLLECTOR=none
77-
fi
78-
79-
if [[ $REMORA_POWER_IGNORE == 1 ]]; then # test for intel_rapl
8060

81-
if [[ $REMORA_POWER_IGNORE == 1 ]]; then # test for intel_rapl
61+
if [[ REMORA_POWER_COLLECTOR == sensors ]]; then
62+
base_values[0]=`date +%s` # No longer in use (was for KNL)
63+
base_values[1]=`sensors | grep power1 | awk '{print $2;}'` #TODO: if revised, get power 4 all sockets
64+
fi
8265

83-
energy_readable=TRUE # Make sure all energy file are readable
84-
for ((soc_num=0; soc_num<$sockets; soc_num++)); do
85-
[[ ! -r /sys/devices/virtual/powercap/intel-rapl/intel-rapl:$soc_num/energy_uj ]] && energy_readable=FALSE
86-
energy_readable=FALSE
87-
66+
if [[ REMORA_POWER_COLLECTOR == intel_rapl ]]; then
67+
base_values[0]=`date $remora_date_format`
68+
for i in $(seq 1 $sockets); do
69+
soc_num=$(( $i - 1 ))
70+
base_values[$i]=$(</sys/devices/virtual/powercap/intel-rapl/intel-rapl:$soc_num/energy_uj)
8871
done
89-
if [[ $energy_readable == TRUE ]]; then
90-
REMORA_POWER_IGNORE=0
91-
REMORA_POWER_COLLECTOR=intel_rapl
92-
# base_values[0]=`date +%s`
93-
base_values[0]=`date $remora_date_format`
94-
for i in $(seq 1 $sockets); do # get base values
95-
soc_num=$(( $i - 1 ))
96-
base_values[$i]=$(</sys/devices/virtual/powercap/intel-rapl/intel-rapl:$soc_num/energy_uj)
97-
done
98-
fi
99-
fi
100-
101-
REMORA_MASTER=`head -n 1 $REMORA_OUTDIR/remora_nodes.txt`
102-
103-
if [[ "$REMORA_NODE" == "$REMORA_MASTER" ]]; then
104-
echo "export REMORA_POWER_IGNORE=$REMORA_POWER_IGNORE" >> $REMORA_OUTDIR/remora_env.txt
105-
echo "export REMORA_POWER_COLLECTOR=$REMORA_POWER_COLLECTOR" >> $REMORA_OUTDIR/remora_env.txt
10672
fi
10773

10874
if [[ "$REMORA_POWER_IGNORE" == 1 ]]; then
109-
[[ "$REMORA_VERBOSE" == 1 ]] && echo " -> power not initialized, sensors not found."
75+
[[ "$REMORA_VERBOSE" == 1 ]] && echo " -> power not initialized, sensors not found."
11076
return 1
11177
fi
11278

@@ -137,6 +103,7 @@ init_module_power()
137103
#either from sensors or energy differences.
138104

139105
#sleep 1 && echo remove sleep in $0 #used for testing, assure at least 1s before data collection
106+
140107
}
141108

142109
collect_data_power()
@@ -152,9 +119,10 @@ collect_data_power()
152119
local POWER_FILE=$REMORA_TMPDIR/power_${REMORA_NODE}.txt
153120
local ENERGY_FILE=$REMORA_TMPDIR/energy_${REMORA_NODE}.txt
154121

122+
echo "HERE collect_data_power $REMORA_POWER_IGNORE" >> /home1/00770/milfeld/R
155123
[[ $REMORA_POWER_IGNORE -ne 0 ]] && return 1
156124

157-
local sockets sockets_m1
125+
local sockets
158126
local time_stamp time_base time_seq power
159127

160128
local prev_line
@@ -167,7 +135,6 @@ collect_data_power()
167135
sockets=1 #default
168136
hash lscpu 2>/dev/null
169137
[[ $? == 0 ]] && sockets=$(lscpu | grep 'Socket(s):' | awk '{print $2}')
170-
sockets_m1=$(($sockets-1))
171138

172139
if [[ $REMORA_POWER_COLLECTOR == sensors ]]; then #for KNL, single socket
173140
time_stamp=`date $remora_date_format`
@@ -184,7 +151,7 @@ collect_data_power()
184151
time_stamp=`date $remora_date_format`
185152
time_str=$(printf "%-17s" "$time_stamp" )
186153
line=$time_str
187-
for soc_num in $(seq 0 $sockets_m1); do
154+
for ((soc_num=0; soc_num<$sockets; soc_num++)); do
188155
energy_uj=$(</sys/devices/virtual/powercap/intel-rapl/intel-rapl:$soc_num/energy_uj)
189156
addon=$(printf "%-17s" "$energy_uj" )
190157
line+=$addon
@@ -200,7 +167,7 @@ collect_data_power()
200167
line=$time_str
201168

202169
prev_line=( "${prev_line[@]:1}" ) #<- acts like the shift used for args.
203-
for soc_num in $(seq 0 $sockets_m1); do
170+
for ((soc_num=0; soc_num<$sockets; soc_num++)); do
204171

205172
delta=$( bc <<< "scale=2; ${energy[$soc_num]} - ${prev_line[$soc_num]}" )
206173
# #If delta is negative, counter wrapped
@@ -266,7 +233,7 @@ plot_data_power()
266233
local POWER_TXT_FILE=$REMORA_TMPDIR/power_${node}.txt
267234
local ENERGY_TXT_FILE=$REMORA_TMPDIR/energy_${node}.txt
268235

269-
local sockets sockets_m1
236+
local sockets
270237

271238
local ln_num iter row word #common to power and energy_uj blocks.
272239
local using_power_data=no
@@ -288,7 +255,6 @@ plot_data_power()
288255
sockets=1 #default
289256
hash lscpu 2>/dev/null
290257
[[ $? == 0 ]] && sockets=$(lscpu | grep 'Socket(s):' | awk '{print $2}')
291-
sockets_m1=$(($sockets-1))
292258

293259
printf "%s \n" "<html>" > $FILE
294260
printf "%s \n" "<head>" >> $FILE
@@ -301,7 +267,7 @@ plot_data_power()
301267
printf "%s \n" "var data = new google.visualization.DataTable();" >> $FILE
302268
printf "%s \n" "data.addColumn('number', 'Execution Time (sec)');">> $FILE
303269

304-
for soc_num in $(seq 0 $sockets_m1); do
270+
for ((soc_num=0; soc_num<$sockets; soc_num++)); do
305271
printf "%s \n" "data.addColumn('number', 'socket_$soc_num');" >> $FILE
306272
done
307273
printf "%s\n" "data.addRows([" >> $FILE
@@ -441,14 +407,15 @@ finalize_module_power()
441407
# Uses: lscpu to determine # of sockets
442408

443409
REMORA_NODE=$1; REMORA_OUTDIR=$2; REMORA_TMPDIR=$3
444-
410+
echo "HERE finalize_module_power $REMORA_POWER_IGNORE" >> /home1/00770/milfeld/R
445411
# RemPltRes must be set, and not equal to zero
446412
source $REMORA_OUTDIR/remora_env.txt
447413
if [[ -z ${REMORA_PLOT_RESULTS} ]] || [[ "$REMORA_PLOT_RESULTS" -eq "0" ]] ; then
448414
return
449415
fi
450416

451-
[[ $REMORA_POWER_IGNORE -ne 0 ]] && return
417+
[[ $REMORA_POWER_IGNORE -ne 0 ]] && return
418+
echo "NEVER HERE AFTER RETURN finalize_module_power $REMORA_POWER_IGNORE" >> /home1/00770/milfeld/R
452419

453420
#NODE PLOT
454421

@@ -529,7 +496,7 @@ finalize_module_power()
529496
printf "%s \n" "var data = new google.visualization.DataTable();" >> $FILE
530497
printf "%s \n" "data.addColumn('number', 'Execution Time (s)');" >> $FILE
531498

532-
for soc_num in $(seq 0 $n_socs_m1); do
499+
for ((soc_num=0; soc_num<$sockets; soc_num++)); do
533500
printf "%s \n" "data.addColumn('number', 'socket_$soc_num');" >> $FILE
534501
done
535502
printf "%s\n" "data.addRows([" >> $FILE

src/scripts/remora_init.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ function remora_init() {
9393
[[ "$REMORA_VERBOSE" == "1" ]] && echo " Checking if parallel file system analysis is required/available."
9494
check_io
9595

96+
# Check if Energy/Power files are accessible
97+
[[ "$REMORA_VERBOSE" == "1" ]] && echo " Checking if $nergy/Power files are accessible."
98+
check_energy_counter_accessibility
99+
96100
## Check TIMER precision (date precision) and if REMORA_DATE_PRECISION is set
97101
#if [[ "$REMORA_VERBOSE" == "1" ]]; then
98102
# echo " Checking date precision (timer) & if REMORA_DATE_PRECISION is set"

src/scripts/remora_report.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ trap "exit_clean" EXIT
8383
alpha=0.6
8484

8585

86+
# Remove any collectors that were rejected during remora_init
87+
if [[ $REMORA_POWER_IGNORE -ne 0 ]]; then
88+
REMORA_MODULES=( "${REMORA_MODULES[@]/power}" )
89+
fi
90+
8691
period_no=1
8792
while [[ 1 ]]; do
8893

0 commit comments

Comments
 (0)