diff --git a/memcpy-benchmarks/count-top-funcs.sh b/memcpy-benchmarks/count-top-funcs.sh index 0e5d9a5..f168aac 100755 --- a/memcpy-benchmarks/count-top-funcs.sh +++ b/memcpy-benchmarks/count-top-funcs.sh @@ -11,25 +11,36 @@ set -u usage () { cat <] : Directory with the results. Default - "res-baseline" - [--total|--self] : Select results based on total (self + children) - or just self. Default "total"` - [--md | --csv] : Output results in Markdown (default) or CSV +Usage ./count-top-funcs.sh : Count frequency of most use functions + [--resdir ] : Directory with the results. Default + "res-baseline" + [--total|--self] : Select results based on total (self + + children) or just self. Default "total" + [--types ] : List of result file types, default + "scalar vector-small vector-large" + [--cutoff ] : Cutoff for count to be presented. Default 0 + [--md | --csv | --raw] : Output results in Markdown, CSV or as a raw + string for use as the --funclist argument + of profile-all-funcs.sh The results to be analysed will be in files of the form -"prof--.res", where type is one of "scalar", "vector-small" or -"vector-large", and size, is the size of the data block in bytes copied on +"prof--.res", where type is one of the types listed in the +--types argument, and size, is the size of the data block in bytes copied on each iteration. EOF } -topdir="$(cd $(dirname $(dirname $(dirname $(readlink -f $0)))) ; pwd)" -memcpydir="${topdir}/tooling/memcpy-benchmarks" +# Directories +tooldir="$(cd $(dirname $(dirname $(readlink -f $0))) ; pwd)" +topdir="$(cd $(dirname ${tooldir}) ; pwd)" +memcpydir="${tooldir}/memcpy-benchmarks" + +# Defaults resdir="${memcpydir}/res-baseline" dototal="--total" -format="--md" +types="scalar vector-small vector-large" +cutoff="0" +format="--raw" set +u until @@ -43,7 +54,15 @@ until --total|--self) dototal=$1 ;; - --md|--csv) + --types) + shift + types="$1" + ;; + --cutoff) + shift + cutoff="$1" + ;; + --md|--csv|--raw) format="$1" ;; --help) @@ -66,25 +85,28 @@ set -u # We create a lot of temporaries! tmpdir="$(mktemp -d count-top-funcs-XXXXXX)" -# Find out the sizes -dlens="$(ls -1 ${resdir}/prof-scalar-*.res | \ - sed -e 's/^.*prof-scalar-//' -e 's/\.res$//' | sort -n)" - cd ${memcpydir} -for tp in "scalar" "vector-small" "vector-large" +for tp in ${types} do - echo - echo "${tp}" - echo tmpf1="${tmpdir}/all-${tp}.res" tmpf2="${tmpdir}/table-${tp}.res" rm -f "${tmpf1}" touch "${tmpf1}" + + # Find out the sizes + dlens=$(ls -1 ${resdir}/prof-${tp}-*.res | \ + sed -e "s/^.*prof-${tp}-//" -e 's/\.res$//' | sort -n) + nlens=$(echo "${dlens}" | wc -l) + printf "%s %d results\n" "${tp}" "${nlens}" + + # Extract all the desired data for l in ${dlens} do + echo -n "." ./extract-top-level-funcs.sh --resfile ${resdir}/prof-${tp}-${l}.res \ ${dototal} --omit-empty >> ${tmpf1} done + echo sed -n < ${tmpf1} -e 's/`//gp' | \ sed -e 's/^|[^|]*|[^|]*| //' -e 's/[[:space:]]*|$//' | \ sort | uniq -c | sort -nr > ${tmpf2} @@ -100,9 +122,15 @@ do printf '"%s","%s"\n' "Count" "Function/address" esac + funclist= while IFS='' read -r line do cnt=$(echo "${line}" | sed -e 's/^[[:space:]]\+//' -e 's/ .*$//') + if [[ "${cnt}" -lt "${cutoff}" ]] + then + break + fi + func=$(echo "${line}" | \ sed -e 's/^[[:space:]]\+[[:digit:]]\+[[:space:]]\+//') @@ -114,8 +142,21 @@ do ;; --csv) printf '"%s","%s"\n' "${cnt}" "${func}" + ;; + --raw) + if [[ "x${funclist}" == "x" ]] + then + funclist="${func}" + else + funclist="${funclist} ${func}" + fi esac done < ${tmpf2} + + if [[ "${format}" == "--raw" ]] + then + printf '"%s"\n' "${funclist}" + fi done rm -r ${tmpdir} diff --git a/memcpy-benchmarks/extract-top-level-funcs.sh b/memcpy-benchmarks/extract-top-level-funcs.sh index 42cfb21..b83bd1b 100755 --- a/memcpy-benchmarks/extract-top-level-funcs.sh +++ b/memcpy-benchmarks/extract-top-level-funcs.sh @@ -23,8 +23,9 @@ Usage ./extract-top-level-funcs.sh : Extract list of top level functions EOF } -topdir="$(cd $(dirname $(dirname $(dirname $(readlink -f $0)))) ; pwd)" -memcpydir=${topdir}/tooling/memcpy-benchmarks +tooldir="$(cd $(dirname $(dirname $(readlink -f $0))) ; pwd)" +topdir="$(cd $(dirname ${tooldir}) ; pwd)" +memcpydir="${tooldir}/memcpy-benchmarks" # Default values resfile= @@ -85,17 +86,6 @@ fi # Temporary file, so we can sort the results tmpf=$(mktemp extract-top-level-funcs-XXXXXX) -case "${format}" -in - --md) - printf "| %8s | %8s | %-45s |\n" "Children" "Self" "Function/address" - printf "| %8s | %8s | %-45s |\n" "-------:" "-------:" \ - ":--------------------------------------------" - ;; - --csv) - printf '"%s","%s","%s"\n' "Children" "Self" "Function/address" -esac - while IFS='' read -r line do if (echo "${line}" | grep -q '\[\.\] [^[:space:]]\+$') @@ -139,25 +129,37 @@ do # greater than pctot) if [[ "$(echo "${pctot}" | sed -e 's/\...$//')" -lt ${cutoff} ]] then - # Sort the results if necessary - if ${dototal} - then - cat < ${tmpf} - else - # Need to sort - case "${format}" - in - --md) - sort -nr -t'|' -k3 < ${tmpf} - ;; - --csv) - sort -nr -t'"' -k4 < ${tmpf} - ;; - esac - fi - - rm ${tmpf} - exit 0 + break fi fi done < ${resfile} + +# Print the results, sorting if necessary +case "${format}" +in + --md) + printf "| %8s | %8s | %-45s |\n" "Children" "Self" "Function/address" + printf "| %8s | %8s | %-45s |\n" "-------:" "-------:" \ + ":--------------------------------------------" + ;; + --csv) + printf '"%s","%s","%s"\n' "Children" "Self" "Function/address" +esac + +if ${dototal} +then + cat < ${tmpf} +else + # Need to sort + case "${format}" + in + --md) + sort -nr -t'|' -k3 < ${tmpf} + ;; + --csv) + sort -nr -t'"' -k4 < ${tmpf} + ;; + esac +fi + +rm ${tmpf} diff --git a/memcpy-benchmarks/profile-all-funcs.sh b/memcpy-benchmarks/profile-all-funcs.sh index 2fe1a35..6fea231 100755 --- a/memcpy-benchmarks/profile-all-funcs.sh +++ b/memcpy-benchmarks/profile-all-funcs.sh @@ -24,8 +24,9 @@ Usage ./profile-all-funcs.sh : Extract function performance data EOF } -topdir="$(cd $(dirname $(dirname $(dirname $(readlink -f $0)))) ; pwd)" -memcpydir=${topdir}/tooling/memcpy-benchmarks +tooldir="$(cd $(dirname $(dirname $(readlink -f $0))) ; pwd)" +topdir="$(cd $(dirname ${tooldir}) ; pwd)" +memcpydir="${tooldir}/memcpy-benchmarks" # Default values resdir="${memcpydir}/res-baseline" @@ -85,7 +86,8 @@ set -u # Temporary file for intermediaries tmpf="$(mktemp profile-all-funcs-XXXXXX)" -tmpcsv="$(mktemp profile-all-funcs-XXXXXX.csv)" +tmpcsv1="$(mktemp profile-all-funcs-XXXXXX-1.csv)" +tmpcsv2="$(mktemp profile-all-funcs-XXXXXX-2.csv)" # Find out the sizes cd ${resdir} @@ -105,6 +107,7 @@ cd ${memcpydir} for l in ${dlens} do res_title="${res_title}#${l}" + echo -n . ./extract-top-level-funcs.sh --md \ --resfile ${resdir}/prof-${restype}-${l}.res > ${tmpf} @@ -137,17 +140,21 @@ do reslist[${f}]="${reslist[${f}]}#${res}" done done +echo -# Print it all out -res_title="${res_title}%" -echo "${res_title}" | sed -e 's/%/"/g' -e 's/#/","/g' > ${tmpcsv} +# Print it all out. Prep the body of the CSV, sort it by function name, then +# transpose it. for f in ${funclist} do reslist[${f}]="${reslist[${f}]}%" - echo "${reslist[${f}]}" | sed -e 's/%/"/g' -e 's/#/","/g' >> ${tmpcsv} + echo "${reslist[${f}]}" | sed -e 's/%/"/g' -e 's/#/","/g' >> ${tmpcsv1} done -csvtool transpose ${tmpcsv} +res_title="${res_title}%" +echo "${res_title}" | sed -e 's/%/"/g' -e 's/#/","/g' > ${tmpcsv2} +sort -t, -k1 < ${tmpcsv1} >> ${tmpcsv2} +csvtool transpose ${tmpcsv2} rm -f ${tmpf} -rm -f ${tmpcsv} +rm -f ${tmpcsv1} +rm -f ${tmpcsv2} diff --git a/memcpy-benchmarks/run-all.sh b/memcpy-benchmarks/run-all.sh new file mode 100755 index 0000000..d8d2093 --- /dev/null +++ b/memcpy-benchmarks/run-all.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Copyright (C) 2024 Embecosm Limited +# Contributor Jeremy Bennett + +# SPDX-License-Identifier: GPL-3.0-or-later + +# A script to do multiple performance profiling runs + +tooldir="$(cd $(dirname $(dirname $(readlink -f $0))) ; pwd)" +topdir="$(cd $(dirname ${tooldir}) ; pwd)" +memcpydir="${tooldir}/memcpy-benchmarks" +qemudir=${topdir}/qemu + +logfile=${memcpydir}/run-all.log +rm -f ${logfile} +touch ${logfile} + +ids="ef9e258b94376c5017b4df9fe061abcadc9661f2 \ + 7809b7fafbc24c557751a1845bb1ccc0b9376f90" + +export PATH=${topdir}/install/bin:${PATH} +which qemu-riscv64 2>&1 | tee -a ${logfile} + +# Build all the programs to benchmark +cd ${memcpydir} +make + +# Now do the profiling +for c in ${ids} +do + resfile="${memcpydir}/results-${c}" + echo "Checking out QEMU commit ${c}..." 2>&1 | tee -a ${logfile} + date 2>&1 | tee -a ${logfile} + pushd ${qemudir} > /dev/null 2>&1 + git checkout ${c} 2>&1 | tee -a ${logfile} + popd > /dev/null 2>&1 + + echo "Building QEMU for commit ${c}..." 2>&1 | tee -a ${logfile} + date 2>&1 | tee -a ${logfile} + pushd ${tooldir} > /dev/null 2>&1 + ./build-all.sh --qemu-only --clean-qemu --qemu-cflags "-g" \ + --qemu-configs "--disable-plugins" 2>&1 | tee -a ${logfile} + popd > /dev/null 2>&1 + + echo "Running perf for commit ${c}..." 2>&1 | tee -a ${logfile} + date 2>&1 | tee -a ${logfile} + pushd ${memcpydir} > /dev/null 2>&1 + mkdir -p "${resfile}" + ./run-perf.sh 2>&1 | tee -a ${logfile} + + echo "Putting results in ${resfile}..." 2>&1 | tee -a ${logfile} + mv prof-*.res "${resfile}" + date 2>&1 | tee -a ${logfile} + popd > /dev/null 2>&1 +done diff --git a/memcpy-benchmarks/run-memcpy.sh b/memcpy-benchmarks/run-memcpy.sh index dd0978b..b56ac35 100755 --- a/memcpy-benchmarks/run-memcpy.sh +++ b/memcpy-benchmarks/run-memcpy.sh @@ -182,26 +182,26 @@ if [[ $lmul != "1" ]] then if [[ ${format} == "--md" ]] then - printf "| %5d | %6d | %7.2f | %7.2f | %7.2f | %10.1f | %10.1f | %10.1f | %10.2f | %10.2f | %10.2f |\n" \ - ${vlen} $length $scalar_time $vector1_time $vectorM_time \ - ${scalar_micount} ${vector1_micount} ${vectorM_micount} \ - ${scalar_nspi} ${vector1_nspi} ${vectorM_nspi} + printf "| %10d | %5d | %6d | %7.2f | %7.2f | %7.2f | %10.1f | %10.1f | %10.1f | %10.2f | %10.2f | %10.2f |\n" \ + ${iterations} ${vlen} $length $scalar_time $vector1_time \ + $vectorM_time ${scalar_micount} ${vector1_micount} \ + ${vectorM_micount} ${scalar_nspi} ${vector1_nspi} ${vectorM_nspi} else - printf "\"%d\",\"%d\",\"%.2f\",\"%.2f\",\"%.2f\",\"%.1f\",\"%.1f\",\"%.1f\",\"%.2f\",\"%.2f\",\"%.2f\"\n" \ - ${vlen} $length $scalar_time $vector1_time $vectorM_time \ - ${scalar_micount} ${vector1_micount} ${vectorM_micount} \ - ${scalar_nspi} ${vector1_nspi} ${vectorM_nspi} + printf "\"%d\",\"%d\",\"%d\",\"%.2f\",\"%.2f\",\"%.2f\",\"%.1f\",\"%.1f\",\"%.1f\",\"%.2f\",\"%.2f\",\"%.2f\"\n" \ + ${iterations} ${vlen} $length $scalar_time $vector1_time \ + $vectorM_time ${scalar_micount} ${vector1_micount} \ + ${vectorM_micount} ${scalar_nspi} ${vector1_nspi} ${vectorM_nspi} fi else if [[ ${format} == "--md" ]] then - printf "| %5d | %6d | %7.2f | %7.2f | %10.1f | %10.1f | %10.2f | %10.2f |\n" \ - ${vlen} $length $scalar_time $vector1_time \ + printf "| %10d | %5d | %6d | %7.2f | %7.2f | %10.1f | %10.1f | %10.2f | %10.2f |\n" \ + ${iterations} ${vlen} $length $scalar_time $vector1_time \ ${scalar_micount} ${vector1_micount} \ ${scalar_nspi} ${vector1_nspi} else - printf "\"%d\",\"%d\",\"%.2f\",\"%.2f\",\"%.1f\",\"%.1f\",\"%.2f\",\"%.2f\"\n" \ - ${vlen} $length $scalar_time $vector1_time \ + printf "\"%d\",\"%d\",\"%d\",\"%.2f\",\"%.2f\",\"%.1f\",\"%.1f\",\"%.2f\",\"%.2f\"\n" \ + ${iterations} ${vlen} $length $scalar_time $vector1_time \ ${scalar_micount} ${vector1_micount} \ ${scalar_nspi} ${vector1_nspi} fi diff --git a/memcpy-benchmarks/run-perf.sh b/memcpy-benchmarks/run-perf.sh index 5200821..1ca5b21 100755 --- a/memcpy-benchmarks/run-perf.sh +++ b/memcpy-benchmarks/run-perf.sh @@ -14,8 +14,7 @@ usage () { Usage: ./run-perf.sh : Run Linux perf on memcpy benchmarks [--bytes ] : Total bytes to copy. Default 1,000,000,000 [--resdir ] : Directory in which to place the results. Default - is "res-baseline" in the directory holding this - script. + is the directory holding this script. [--sizes ] : Space separated list of the data sizes to use when creating results. Default list is all the powers of 2, 3, 5 and 7 up to 5^6 @@ -34,11 +33,11 @@ accurate results, but is slow. Expect each iteration to take of the order of EOF } -memcpydir="$(cd $(readlink -f $0) ; pwd)" +memcpydir="$(cd $(dirname $(readlink -f $0)) ; pwd)" # Default values bytes="1000000000" -resdir="${memcpydir}/res-baseline" +resdir="${memcpydir}" data_lens=" 1 \ 2 \ 3 \ @@ -105,6 +104,8 @@ do done set -u +mkdir -p "${resdir}" + for dlen in ${data_lens} do iters=$((bytes / dlen)) diff --git a/memcpy-benchmarks/run-sequence-all.sh b/memcpy-benchmarks/run-sequence-all.sh new file mode 100755 index 0000000..2fb21ef --- /dev/null +++ b/memcpy-benchmarks/run-sequence-all.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Copyright (C) 2024 Embecosm Limited +# Contributor Jeremy Bennett + +# SPDX-License-Identifier: GPL-3.0-or-later + +# A script to do multiple sequence runs + +tooldir="$(cd $(dirname $(dirname $(readlink -f $0))) ; pwd)" +topdir="$(cd $(dirname ${tooldir}) ; pwd)" +memcpydir="${tooldir}/memcpy-benchmarks" +qemudir=${topdir}/qemu + +logfile=${memcpydir}/run-all.log +rm -f ${logfile} +touch ${logfile} + +# Baseline, prev best, new best +ids="7bbadc60b58b742494555f06cd342311ddab9351 \ + ef9e258b94376c5017b4df9fe061abcadc9661f2 \ + 7809b7fafbc24c557751a1845bb1ccc0b9376f90" + +export PATH=${topdir}/install/bin:${PATH} +which qemu-riscv64 2>&1 | tee -a ${logfile} + +# Build all the programs to benchmark +cd ${memcpydir} +make 2>&1 | tee -a ${logfile} + +# Now do the profiling +for c in ${ids} +do + csvfile="${memcpydir}/seq-results-${c}.csv" + echo "Checking out QEMU commit ${c}..." 2>&1 | tee -a ${logfile} + date 2>&1 | tee -a ${logfile} + pushd ${qemudir} > /dev/null 2>&1 + git checkout ${c} >> ${logfile} 2>&1 + popd > /dev/null 2>&1 + + echo "Building QEMU for commit ${c}..." 2>&1 | tee -a ${logfile} + date 2>&1 | tee -a ${logfile} + pushd ${tooldir} > /dev/null 2>&1 + ./build-all.sh --qemu-only --clean-qemu >> ${logfile} 2>&1 + popd > /dev/null 2>&1 + + echo "Running sequence for commit ${c}..." 2>&1 | tee -a ${logfile} + date 2>&1 | tee -a ${logfile} + pushd ${memcpydir} > /dev/null 2>&1 + ./run-sequence.sh --iter "100000000" --csv > ${csvfile} 2>&1 + echo "Results in ${csvfile}" | tee -a ${logfile} + date 2>&1 | tee -a ${logfile} +done diff --git a/memcpy-benchmarks/run-sequence.sh b/memcpy-benchmarks/run-sequence.sh index cfaa850..4d7f018 100755 --- a/memcpy-benchmarks/run-sequence.sh +++ b/memcpy-benchmarks/run-sequence.sh @@ -5,30 +5,59 @@ # SPDX-License-Identifier: GPL-3.0-or-later -iterations=1000000 -length=1 -fullrun="no" +# Defaults +iterations=10000000 +vlens="128 256 512 1024" +data_lens=" 1 \ + 2 \ + 3 \ + 4 \ + 5 \ + 7 \ + 8 \ + 9 \ + 16 \ + 25 \ + 27 \ + 32 \ + 49 \ + 64 \ + 81 \ + 125 \ + 128 \ + 243 \ + 256 \ + 343 \ + 512 \ + 625 \ + 729 \ + 1024 \ + 2048 \ + 2401 \ + 3125 \ + 4096 \ + 6561 \ + 8192 \ + 15625" format="--md" -print_help=false -debug_mode=false -vlen_list=( -128 -256 -512 -1024 -) -vlen_list_arg="" lmul=8 benchmark="memcpy" usage () { cat < : Number of iterations of the tests - [--vlen-list] : Comma separated list of values for the RVV VLEN parameter - [--lmul] : RVV LMUL parameter - [--full | --concise] : How many data sizes to use - [--csv | --md] : Format of table - [--help] : Print this message and exit +Usage ./build-all.sh [--iter ] : Number of iterations of the tests. + Default 1000000 + [--vlens ] : Space separated list of values for the + RVV VLEN parameter. Default "128 256 + 512 1024" + [--lmul ] : RVV LMUL parameter. Default 8 + [--dlens ] : Space separated list of data sizes to + use. + [--csv | --md] : Format of table + [--help] : Print this message and exit + +The default list of data points is 1 and all the powers of 2, 3, 5 and 7 up to +5^6. EOF } @@ -41,14 +70,18 @@ until shift iterations="$1" ;; - --vlen-list) + --vlens) shift - vlen_list_arg="$1" + vlens="$1" ;; --lmul) shift lmul="$1" ;; + --dlens) + shift + data_lens="$1" + ;; --full) fullrun="yes" ;; @@ -58,18 +91,17 @@ until --md|--csv) format="$1" ;; - --debug) - debug_mode=true - ;; --benchmark) shift benchmark="$1" ;; --help) - print_help=true + usage + exit 0 ;; ?*) echo "Unknown argument '$1'" + usage exit 1 ;; *) @@ -81,112 +113,13 @@ do done set -u -if ${print_help} +# Build the binaries once +if ! make > /dev/null 2>&1 then - usage + echo "ERROR: run-sequence.sh: Failed to build binaries" exit 1 fi -if [[ -n $vlen_list_arg ]] -then - IFS=',' read -r -a vlen_list <<< "$vlen_list_arg" -fi - -if [[ "${fullrun}" = "yes" ]] -then - data_lengths=( - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 43 - 49 - 50 - 51 - 52 - 53 - 59 - 60 - 61 - 62 - 63 - 64 - 65 - 66 - 67 - 79 - 127 - 128 - 129 - 130 - 131 - 132 - 133 - 197 - 256 - 281 - 512 - 613 - 1024 - 1579 - 2048 - 2897 - 4096 - 5081 - 8192 - 9103 -) -else - data_lengths=( - 1 - 2 - 4 - 8 - 16 - 32 - 64 - 128 - 256 - 512 - 1024 - 2048 -) -fi - -# Build the binaries once -make - rm -rf vmem.check rm -rf smem.check @@ -195,19 +128,17 @@ if [[ "${lmul}" != "1" ]] then if [[ "${format}" == "--md" ]] then - printf "Iterations: %d\n\n" ${iterations} - printf "| %5s | %6s | %7s | %7s | %7s | %10s | %10s | %10s | %10s | %10s | %11s|\n" \ - "VLEN" "length" "s time" "v1 time" "v$lmul time" \ + printf "| %10s | %5s | %6s | %7s | %7s | %7s | %10s | %10s | %10s | %10s | %10s | %11s|\n" \ + "Iterations" "VLEN" "length" "s time" "v1 time" "v$lmul time" \ "s Micount" "v1 Micount" "v$lmul Micount" \ "s ns/inst" "v1 ns/inst" "v$lmul ns/inst" - printf "| %5s | %6s | %7s | %7s | %7s | %10s | %10s | %10s | %10s | %10s | %11s|\n" \ - "----:" "-----:" "------:" "------:" "------:" \ + printf "| %10s | %5s | %6s | %7s | %7s | %7s | %10s | %10s | %10s | %10s | %10s | %11s|\n" \ + "---------:" "----:" "-----:" "------:" "------:" "------:" \ "---------:" "---------:" "---------:" \ "---------:" "---------:" "----------:" else - printf "\"Iterations\",\"%d\"\n" ${iterations} - printf "\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"\n" \ - "VLEN" "length" "s time" "v1 time" "v$lmul time" \ + printf "\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"\n" \ + "Iterations" "VLEN" "length" "s time" "v1 time" "v$lmul time" \ "s Micount" "v1 Micount" "v$lmul Micount" \ "s ns/inst" "v1 ns/inst" "v$lmul ns/inst" fi @@ -215,18 +146,18 @@ else if [[ "${format}" == "--md" ]] then printf "Iterations: %d\n\n" ${iterations} - printf "| %5s | %6s | %7s | %7s | %10s | %10s | %10s | %10s |\n" \ - "VLEN" "length" "s time" "v1 time" \ + printf "| %10s | %5s | %6s | %7s | %7s | %10s | %10s | %10s | %10s |\n" \ + "Iterations" "VLEN" "length" "s time" "v1 time" \ "s Micount" "v1 Micount" \ "s ns/inst" "v1 ns/inst" - printf "| %5s | %6s | %7s | %7s | %10s | %10s | %10s | %10s |\n" \ - "----:" "-----:" "------:" "------:" \ + printf "| %10s | %5s | %6s | %7s | %7s | %10s | %10s | %10s | %10s |\n" \ + "---------:" "----:" "-----:" "------:" "------:" \ "---------:" "---------:" \ "---------:" "---------:" else printf "\"Iterations\",\"%d\"\n" ${iterations} - printf "\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"\n" \ - "VLEN" "length" "s time" "v1 time" \ + printf "\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"\n" \ + "Iterations" "VLEN" "length" "s time" "v1 time" \ "s Micount" "v1 Micount" \ "s ns/inst" "v1 ns/inst" fi @@ -235,11 +166,13 @@ fi # Do all the runs, but the scalar runs only once scalar_flag="--scalar" -for vlen in ${vlen_list[@]} +for vlen in ${vlens} do - for l in ${data_lengths[@]}; do - ./run-${benchmark}.sh ${format} ${scalar_flag} --iter $iterations --len $l \ - --vlen ${vlen} --lmul ${lmul} + for l in ${data_lens} + do + iters=$((iterations / l)) + ./run-${benchmark}.sh ${format} ${scalar_flag} --iter $iters \ + --len $l --vlen ${vlen} --lmul ${lmul} done scalar_flag="--no-scalar" done diff --git a/memcpy-benchmarks/vmemcpy-main.c b/memcpy-benchmarks/vmemcpy-main.c index d7db0a8..9bdc787 100644 --- a/memcpy-benchmarks/vmemcpy-main.c +++ b/memcpy-benchmarks/vmemcpy-main.c @@ -90,4 +90,3 @@ main (int argc, char* argv[]) free(dst); } -