diff --git a/.gitignore b/.gitignore index 36ba3cd..d7af72c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,16 @@ +# Git comparison files +*.diff +*.patch +*.orig +*.rej # Editor backup files *~ +# Generated logs and results files +*.log +*.res +*.csv +# Generated graphics +*.png +bm-graph-all-*/ +# Dump files +*.dump diff --git a/build-all.sh b/build-all.sh index c2afecf..3ddaf3e 100755 --- a/build-all.sh +++ b/build-all.sh @@ -15,9 +15,16 @@ usage () { cat <] : Install path of the tool chain. + Default path is ../install [--arch ] : Target architecture. Default architecture is rv64gc [--abi ] : Target ABI. Default ABI is lp64d @@ -33,6 +40,7 @@ Usage ./build-all.sh : Build riscv64-unknown-linux-gnu [--clean] : Delete build directories in riscv-gnu-toolchain and the install directory before building + [--clean-qemu] : Clean just the QEMU build [--help] : Print this message and exit EOF } @@ -51,9 +59,13 @@ DEFAULTTRIPLE=riscv64-unknown-elf build_linux=true qemu_only=false +qemu_configs="" +qemu_cflags="" +profile_qemu="" build_gdbserver=false build_clang=false clean_build=false +clean_qemu_build=false enable_multilib=true print_help=false print_hashes=false @@ -110,6 +122,17 @@ until --qemu-only) qemu_only=true ;; + --qemu-configs) + shift + qemu_configs="$1" + ;; + --qemu-cflags) + shift + qemu_cflags="$1" + ;; + --profile-qemu) + profile_qemu="--enable-gprof" + ;; --build-gdbserver) build_gdbserver=true ;; @@ -156,6 +179,10 @@ until ;; --clean) clean_build=true + clean_qemu_build=true + ;; + --clean-qemu) + clean_qemu_build=true ;; --help) print_help=true @@ -267,6 +294,15 @@ else EXTRA_OPTS="${EXTRA_OPTS} --disable-multilib" fi echo " build qemu: yes" +echo " qemu_configs: ${qemu_configs}" +echo " qemu_cflags: ${qemu_cflags}" +if ${clean_qemu_build} +then + echo " qemu_clean: yes" +else + echo " qemu_clean: no" +fi + if ${build_gdbserver} then echo " build gdbserver: yes" @@ -283,7 +319,7 @@ fi cd $TOPDIR/riscv-gnu-toolchain log_file="${LOGDIR}/clean-toolchain.log" -if ${clean_build} +if ${clean_build} && ! ${qemu_only} then echo echo "Cleaning... logging to ${log_file}" @@ -362,8 +398,14 @@ echo "Building QEMU... logging to ${log_file}" $TOPDIR/qemu/configure --prefix=$INSTALLDIR \ --target-list=riscv64-linux-user,riscv32-linux-user \ --interp-prefix=$INSTALLDIR/sysroot \ - --python=python3 \ - --extra-cflags="-Wno-error" + --python=python3 ${profile_qemu} \ + ${qemu_configs} \ + --extra-cflags="${qemu_cflags}" + if ${clean_build} || ${clean_qemu_build} + then + rm -f ${INSTALLDIR}/bin/qemu-riscv?? + make clean + fi make -j $(nproc) make install ) > ${log_file} 2>&1 diff --git a/memcpy-benchmarks/.gitignore b/memcpy-benchmarks/.gitignore index b8f3d14..99b66b3 100644 --- a/memcpy-benchmarks/.gitignore +++ b/memcpy-benchmarks/.gitignore @@ -3,3 +3,12 @@ *.exe *.icount *.check +# Generated data +*.csv +*.res +perf.data +perf.data.old +gmon.out +# Standard directories for generated data +res-baseline +res-development diff --git a/memcpy-benchmarks/README.md b/memcpy-benchmarks/README.md index 118bf27..6874eca 100644 --- a/memcpy-benchmarks/README.md +++ b/memcpy-benchmarks/README.md @@ -30,3 +30,143 @@ option to see arguments and the comments in the script. The `run-sequence.sh` script will run a large number of benchmarks for different values of VLEN and LMUL and for a range of data sizes. Again use the `--help` option to see arguments and look at the comments in the script. + +## Scripts to help with Linux _perf_ + +### Prerequisites + +The scripts are intended to run under Linux. Prequisites are Linux _perf_ and +_csvtool_, both of which should be available with standard distributions. + +### `run_perf.sh` + +``` +./run-perf.sh [--bytes ] [--resdir ] [--sizes ] +``` + +Uses Linux _perf_ to profile different variants of the `memcpy` benchmark. +Arguments are as follows. + +- `--bytes` _num_ : Total bytes to copy (optional). Default 1,000,000,000. + +- `--resdir` _dir_ : Directory in which to place the results (optional). + Default is `res-baseline` in the directory holding this script. + +- `--sizes` _list_ : Space separated list of the data sizes to use when + creating results (optional). Default list is all the powers of 2, 3, 5 and + 7 up to 56. + +The results will be three sets of files of the form +`prof-`_type_`-`_size_`.res`, where `type` is one of `scalar`, `vector-small` +or `vector-large`, and _size_, is the size of the data block copied on each +iteration. + +The total number of iterations for each test is determined by the number given +in the `--bytes` argument divided by the size of the data block being used for +the run. + +`perf record` is run using DWARF to determine the call graph. This gives +accurate results, but is slow. Expect each iteration to take of the order of +20 minutes on a decent server. + +### `extract-top-level-funcs.sh` + +``` +./extract-top-level-funcs.sh --resfile [--cutoff ] \ + [--total|--self] [--omit-empty] [--md | --csv] +``` + +Extract the main results from a file generated by `run_perf.sh`. Arguments +are as follows. + +- `--resfile` _file_: Target file to extract results from (mandatory) +- `--cutoff` _num_: Percentage below which to stop showing results + (optional). Default value 1 +- `--total`: Cutoff and sorting based on total time (self + children) + (optional). Set by default. +- `--self`: Cutoff and sorting just based on self time (no children) + (optional). Opposite to `--total`, so not set by default. +- `--omit-empty`: Do not show results if self is 0.00 (optional). Only has + any effect in combination with `--total`. +- `--md`: Output results in MarkDown format (optional). Set by default +- `--csv`: Output results in CSV format. + +**Note.** Only one of `--total` or `--self` may be specified. Only one of +`--md` or `--csv` may be specified. + +This is the central file for extracting data from the Linux _perf_ results. +In general using `--self` gives the most useful data for targeting +optimizations. Using `--total` will flag up these functions, but also +functions which are just wrappers for other functions. The `--omit-empty` +option can be helpful when using `--total` to skip functions which are purely +wrapping other functions. + +### `count-top-funcs.sh` + +``` +Usage ./count-top-funcs.sh [--resdir ] [--total|--self] [--md | --csv] +``` + +Find the frequency of the most used functions in a set of data. This is a +wrapper for `extract-top-level-funcs`. Arguments are as follows. + +- `--resdir` _dir_: Directory with the results to be analysed (optional). + Default `res-baseline` +- `--total`: Cutoff and sorting based on total time (self + children) + (optional). Set by default. +- `--self`: Cutoff and sorting just based on self time (no children) + (optional). Opposite to `--total`, so not set by default. +- `--md`: Output results in MarkDown format (optional). Set by default +- `--csv`: Output results in CSV format. + +**Note.** Only one of `--total` or `--self` may be specified. Only one of +`--md` or `--csv` may be specified. + +The results to be analysed will be in files of the form +`prof-`_type_`-`_size_`.res`, where _type_ is one of `scalar`, `vector-small` +or `vector-large`, and _size_, is the size of the data block in bytes copied +on each iteration. + +### `profile-all-funcs.sh` + +``` +./profile-all-funcs.sh [--resdir ] [--type ] [--total|--self] \ + [--funclist ] +``` + +Extract data on function usage for different data sizes in a form suitable for +graphical analysis. Arguments are as follows. + +- `--resdir` _dir_: Directory with the results to be analysed (optional). + Default `res-baseline`. +- `--type` _str_: What type of result to look at (optional). Permitted values + are `scalar` (default), `vector-small` or `vector-large`. +- `--total`: Cutoff and sorting based on total time (self + children) + (optional). Set by default. +- `--self`: Cutoff and sorting just based on self time (no children) + (optional). Opposite to `--total`, so not set by default. +- `--funclist` _list_: Space separated list of functions to profile. Default + value `helper_lookup_tb_ptr cpu_get_tb_cpu_state` + +**Note.** Only one of `--total` or `--self` may be specified. + +This script typically takes a set of functions identified by +`count-top-funcs.sh`. The output is always CSV format. + +### `run-spec-pop2.sh` + +``` +./run-spec-pop2.sh [--reportfile ] [--specdir ] +``` + +**Note.** Because this is not specific to the `memcpy` benchmarks it lives in +the main `tooling` repository. Arguments are as follows. + +- `--reportfile` _file_: Put the results in this file. Default + `prof-628.pop2_s.res` in the `tooling` repository + +- `--specdir` _dir_: The directory holding the SPEC installation to be used. + +This script runs the SPEC CPU 2017 benchmark under QEMU with Linux _perf_. +The script runs a previously built benchmark. If necessary use +`runspec-qemu.sh` to create the benchmark binary. diff --git a/memcpy-benchmarks/count-top-funcs.sh b/memcpy-benchmarks/count-top-funcs.sh new file mode 100755 index 0000000..0e5d9a5 --- /dev/null +++ b/memcpy-benchmarks/count-top-funcs.sh @@ -0,0 +1,121 @@ +#!/bin/bash + +# Copyright (C) 2024 Embecosm Limited +# Contributor Jeremy Bennett + +# SPDX-License-Identifier: GPL-3.0-or-later + +# A script to count how often functions are used in profiled memcpy benchmarks + +set -u + +usage () { + cat <] : Directory with the results. Default + "res-baseline" + [--total|--self] : Select results based on total (self + children) + or just self. Default "total"` + [--md | --csv] : Output results in Markdown (default) or CSV + +The results to be analysed will be in files of the form +"prof--.res", where type is one of "scalar", "vector-small" or +"vector-large", and size, is the size of the data block in bytes copied on +each iteration. +EOF +} + +topdir="$(cd $(dirname $(dirname $(dirname $(readlink -f $0)))) ; pwd)" +memcpydir="${topdir}/tooling/memcpy-benchmarks" +resdir="${memcpydir}/res-baseline" +dototal="--total" +format="--md" + +set +u +until + opt="$1" + case "${opt}" + in + --resdir) + shift + resdir="$(cd $(readlink -f $1) ; pwd)" + ;; + --total|--self) + dototal=$1 + ;; + --md|--csv) + format="$1" + ;; + --help) + usage + exit 0 + ;; + ?*) + usage + exit 1 + ;; + *) + ;; + esac +[ "x${opt}" = "x" ] +do + shift +done +set -u + +# We create a lot of temporaries! +tmpdir="$(mktemp -d count-top-funcs-XXXXXX)" + +# Find out the sizes +dlens="$(ls -1 ${resdir}/prof-scalar-*.res | \ + sed -e 's/^.*prof-scalar-//' -e 's/\.res$//' | sort -n)" + +cd ${memcpydir} +for tp in "scalar" "vector-small" "vector-large" +do + echo + echo "${tp}" + echo + tmpf1="${tmpdir}/all-${tp}.res" + tmpf2="${tmpdir}/table-${tp}.res" + rm -f "${tmpf1}" + touch "${tmpf1}" + for l in ${dlens} + do + ./extract-top-level-funcs.sh --resfile ${resdir}/prof-${tp}-${l}.res \ + ${dototal} --omit-empty >> ${tmpf1} + done + sed -n < ${tmpf1} -e 's/`//gp' | \ + sed -e 's/^|[^|]*|[^|]*| //' -e 's/[[:space:]]*|$//' | \ + sort | uniq -c | sort -nr > ${tmpf2} + + case "${format}" + in + --md) + printf "| %5s | %-45s |\n" "Count" "Function/address" + printf "| %5s | %-45s |\n" "----:" \ + ":------------------------------------------" + ;; + --csv) + printf '"%s","%s"\n' "Count" "Function/address" + esac + + while IFS='' read -r line + do + cnt=$(echo "${line}" | sed -e 's/^[[:space:]]\+//' -e 's/ .*$//') + func=$(echo "${line}" | \ + sed -e 's/^[[:space:]]\+[[:digit:]]\+[[:space:]]\+//') + + case "${format}" + in + --md) + func=$(echo "\`${func}\`") + printf "| %5s | %-45s |\n" "${cnt}" "${func}" + ;; + --csv) + printf '"%s","%s"\n' "${cnt}" "${func}" + esac + done < ${tmpf2} +done + +rm -r ${tmpdir} diff --git a/memcpy-benchmarks/extract-top-level-funcs.sh b/memcpy-benchmarks/extract-top-level-funcs.sh new file mode 100755 index 0000000..42cfb21 --- /dev/null +++ b/memcpy-benchmarks/extract-top-level-funcs.sh @@ -0,0 +1,163 @@ +#!/bin/bash + +# Copyright (C) 2024 Embecosm Limited +# Contributor Jeremy Bennett + +# SPDX-License-Identifier: GPL-3.0-or-later + +# A script to extract performance data from memcpy perf runs + +set -u + +usage () { + cat < : Target file to extract + [--cutoff ] : Percentage at which to stop showing + results (default 1) + [--total|--self] : Select based on total (self + children) + or just self. Default "total" + [--omit-empty] : Do not show results if self is 0.00 + [--md | --csv] : Output results in Markdown (default) or + CSV +EOF +} + +topdir="$(cd $(dirname $(dirname $(dirname $(readlink -f $0)))) ; pwd)" +memcpydir=${topdir}/tooling/memcpy-benchmarks + +# Default values +resfile= +cutoff=1 +dototal=true +format="--md" +omit_empty=false + +set +u +until + opt="$1" + case "${opt}" + in + --resfile) + shift + resfile="$(readlink -f $1)" + ;; + --cutoff) + shift + cutoff="$1" + ;; + --total) + dototal=true + ;; + --self) + dototal=false + ;; + --omit-empty) + omit_empty=true + ;; + --md|--csv) + format="$1" + ;; + --help) + usage + exit 0 + ;; + ?*) + usage + exit 1 + ;; + *) + ;; + esac +[ "x${opt}" = "x" ] +do + shift +done +set -u + +if [[ "x${resfile}" == "x" ]] +then + echo "ERROR: --resfile required." + usage + exit 1 +fi + +# Temporary file, so we can sort the results +tmpf=$(mktemp extract-top-level-funcs-XXXXXX) + +case "${format}" +in + --md) + printf "| %8s | %8s | %-45s |\n" "Children" "Self" "Function/address" + printf "| %8s | %8s | %-45s |\n" "-------:" "-------:" \ + ":--------------------------------------------" + ;; + --csv) + printf '"%s","%s","%s"\n' "Children" "Self" "Function/address" +esac + +while IFS='' read -r line +do + if (echo "${line}" | grep -q '\[\.\] [^[:space:]]\+$') + then + # Extract the three fields of interest + pctot=$(echo "${line}" | \ + sed -e 's/^[[:space:]]\+\([[:digit:]]\+\...\)%.*$/\1/') + pcself=$(echo "${line}" | \ + sed -e 's/^[[:space:]]\+[[:digit:]]\+\...%[[:space:]]\+\([[:digit:]]\+\...\)%.*$/\1/') + func=$(echo ${line} | sed -e 's/^.*\[\.\] \([^[:space:]]\+\)$/\1/') + + # Print fields of interest + if ${dototal} + then + selector="${pctot}" + else + selector="${pcself}" + fi + + if [[ "$(echo "${selector}" | sed -e 's/\...$//')" -ge ${cutoff} ]] + then + if ! ${omit_empty} || [[ "${pcself}" != "0.00" ]] + then + case "${format}" + in + --md) + func=$(echo "\`${func}\`") + printf "| %8s | %8s | %-45s |\n" "${pctot}" \ + "${pcself}" "${func}" >> ${tmpf} + ;; + --csv) + printf '"%s","%s","%s"\n' "${pctot}" \ + "${pcself}" "${func}" >> ${tmpf} + ;; + esac + fi + fi + + # If pctot is less than the cutoff, then we definitely cannot have any + # more useful data (since we are ordered on pctot, and pcself can be no + # greater than pctot) + if [[ "$(echo "${pctot}" | sed -e 's/\...$//')" -lt ${cutoff} ]] + then + # Sort the results if necessary + if ${dototal} + then + cat < ${tmpf} + else + # Need to sort + case "${format}" + in + --md) + sort -nr -t'|' -k3 < ${tmpf} + ;; + --csv) + sort -nr -t'"' -k4 < ${tmpf} + ;; + esac + fi + + rm ${tmpf} + exit 0 + fi + fi +done < ${resfile} diff --git a/memcpy-benchmarks/profile-all-funcs.sh b/memcpy-benchmarks/profile-all-funcs.sh new file mode 100755 index 0000000..2fe1a35 --- /dev/null +++ b/memcpy-benchmarks/profile-all-funcs.sh @@ -0,0 +1,153 @@ +#!/bin/bash + +# Copyright (C) 2024 Embecosm Limited +# Contributor Jeremy Bennett + +# SPDX-License-Identifier: GPL-3.0-or-later + +# A script to extract function performance data from memcpy perf runs + +set -u + +usage () { + cat <] : Directory with the results. Default + "res-baseline" + [--type ] : What type of result to look at: + scalar (default), vector-small or vector-large. + [--total|--self] : Select based on total (self + children) or just + self. Default "total" + [--funclist ] : Space separated list of functions to profile, + default + "helper_lookup_tb_ptr cpu_get_tb_cpu_state" +EOF +} + +topdir="$(cd $(dirname $(dirname $(dirname $(readlink -f $0)))) ; pwd)" +memcpydir=${topdir}/tooling/memcpy-benchmarks + +# Default values +resdir="${memcpydir}/res-baseline" +restype="scalar" +dototal=true +funclist="helper_lookup_tb_ptr cpu_get_tb_cpu_state" + +set +u +until + opt="$1" + case "${opt}" + in + --resdir) + shift + resdir="$(cd $(readlink -f $1) ; pwd)" + ;; + --type) + shift + case "$1" + in + scalar|vector-small|vector-large) + restype="$1" + ;; + *) + echo "ERROR: Uknown results type: \"$1\"" + usage + exit 1 + ;; + esac + ;; + --total) + dototal=true + ;; + --self) + dototal=false + ;; + --funclist) + shift + funclist="$1" + ;; + --help) + usage + exit 0 + ;; + ?*) + usage + exit 1 + ;; + *) + ;; + esac +[ "x${opt}" = "x" ] +do + shift +done +set -u + +# Temporary file for intermediaries +tmpf="$(mktemp profile-all-funcs-XXXXXX)" +tmpcsv="$(mktemp profile-all-funcs-XXXXXX.csv)" + +# Find out the sizes +cd ${resdir} +dlens=$(ls -1 prof-${restype}-*.res | \ + sed -e "s/^prof-${restype}-//" -e 's/\.res$//' | sort -n) + +# Build up the results in a list +declare -A reslist +for f in ${funclist} +do + reslist[${f}]="%${f}" +done +res_title="%Size" + +# Extract the data +cd ${memcpydir} +for l in ${dlens} +do + res_title="${res_title}#${l}" + ./extract-top-level-funcs.sh --md \ + --resfile ${resdir}/prof-${restype}-${l}.res > ${tmpf} + + for f in ${funclist} + do + # Select which percentage we are reporting + if ${dototal} + then + pc=$(grep ${f} < ${tmpf} | \ + sed -n -e 's/|[^|]\+|[[:space:]]\+\([^[:space:]]\+\).*$/\1/p') + else + pc=$(grep ${f} < ${tmpf} | \ + sed -n -e 's/|[[:space:]]\+\([^[:space:]]\+\).*$/\1/p') + fi + + if [[ "x${pc}" == "x" ]] + then + res="0.0000" + elif [[ ${pc} == "100.0" ]] + then + res="1.0000" + else + intpart=$(echo "${pc}" | \ + sed -e 's/\([[:digit:]]\+\)\.[[:digit:]]\+$/\1/') + fracpart=$(echo "${pc}" | \ + sed -e 's/[[:digit:]]\+\.\([[:digit:]]\+\)$/\1/') + res=$(printf "0.%02d%2s" "${intpart}" "${fracpart}") + + fi + reslist[${f}]="${reslist[${f}]}#${res}" + done +done + +# Print it all out +res_title="${res_title}%" +echo "${res_title}" | sed -e 's/%/"/g' -e 's/#/","/g' > ${tmpcsv} +for f in ${funclist} +do + reslist[${f}]="${reslist[${f}]}%" + echo "${reslist[${f}]}" | sed -e 's/%/"/g' -e 's/#/","/g' >> ${tmpcsv} +done + +csvtool transpose ${tmpcsv} + +rm -f ${tmpf} +rm -f ${tmpcsv} diff --git a/memcpy-benchmarks/run-perf.sh b/memcpy-benchmarks/run-perf.sh new file mode 100755 index 0000000..5200821 --- /dev/null +++ b/memcpy-benchmarks/run-perf.sh @@ -0,0 +1,136 @@ +#!/bin/bash + +# Copyright (C) 2024 Embecosm Limited +# Contributor Jeremy Bennett + +# SPDX-License-Identifier: GPL-3.0-or-later + +# A script to run Linux perf on lots of memcpy benchmarks + +set -u + +usage () { + cat <] : Total bytes to copy. Default 1,000,000,000 + [--resdir ] : Directory in which to place the results. Default + is "res-baseline" in the directory holding this + script. + [--sizes ] : Space separated list of the data sizes to use when + creating results. Default list is all the powers + of 2, 3, 5 and 7 up to 5^6 + +The results will be three sets of files of the form "prof--.res", +where type is one of "scalar", "vector-small" or "vector-large", and size, is +the size of the data block copied on each iteration. + +The total number of iterations for each test is determined by the number given +in the "--bytes" argument divided by the size of the data block being used for +the run. + +"perf record" is run using DWARF to determine the call graph. This gives +accurate results, but is slow. Expect each iteration to take of the order of +20 minutes on a decent server. +EOF +} + +memcpydir="$(cd $(readlink -f $0) ; pwd)" + +# Default values +bytes="1000000000" +resdir="${memcpydir}/res-baseline" +data_lens=" 1 \ + 2 \ + 3 \ + 4 \ + 5 \ + 7 \ + 8 \ + 9 \ + 16 \ + 25 \ + 27 \ + 32 \ + 49 \ + 64 \ + 81 \ + 125 \ + 128 \ + 243 \ + 256 \ + 343 \ + 512 \ + 625 \ + 729 \ + 1024 \ + 2048 \ + 2401 \ + 3125 \ + 4096 \ + 6561 \ + 8192 \ + 15625" + +set +u +until + opt="$1" + case "${opt}" + in + --bytes) + shift + bytes="$1" + ;; + --resdir) + shift + resdir="$(cd $(readlink -f $1) ; pwd)" + ;; + --sizes) + shift + data_lens="$1" + ;; + --help) + usage + exit 0 + ;; + ?*) + usage + exit 1 + ;; + *) + ;; + esac +[ "x${opt}" = "x" ] +do + shift +done +set -u + +for dlen in ${data_lens} +do + iters=$((bytes / dlen)) + echo "=== Data len ${dlen}, iterations ${iters}" + echo " - scalar record" + time perf record -g -m 16M --call-graph dwarf,4096 -- \ + qemu-riscv64 -cpu "rv64,v=true,vlen=128" \ + smemcpy.exe ${dlen} ${iters} > /dev/null + echo " - scalar report" + time perf report --stdio --call-graph "graph,0.1,caller,function" \ + -k /tmp/vmlinux | \ + sed -e 's/[[:space:]]*$//' > ${resdir}/prof-scalar-${dlen}.res + echo " - small vector record" + time perf record -g -m 16M --call-graph dwarf -- \ + qemu-riscv64 -cpu "rv64,v=true,vlen=128" \ + vmemcpy1.exe ${dlen} ${iters} > /dev/null + echo " - small vector report" + time perf report --stdio --call-graph "graph,0.1,caller,function" \ + -k /tmp/vmlinux | \ + sed -e 's/[[:space:]]*$//' > ${resdir}/prof-vector-small-${dlen}.res + echo " - large vector record" + time perf record -g -m 16M --call-graph dwarf,4096 -- \ + qemu-riscv64 -cpu "rv64,v=true,vlen=1024" \ + vmemcpy8.exe ${dlen} ${iters} > /dev/null + echo " - large vector report" + time perf report --stdio --call-graph "graph,0.1,caller,function" \ + -k /tmp/vmlinux | \ + sed -e 's/[[:space:]]*$//' > ${resdir}/prof-vector-large-${dlen}.res +done diff --git a/run-spec-pop2.sh b/run-spec-pop2.sh new file mode 100755 index 0000000..1e7e72b --- /dev/null +++ b/run-spec-pop2.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +# Copyright (C) 2024 Embecosm Limited +# Contributor Jeremy Bennett + +# SPDX-License-Identifier: GPL-3.0-or-later + +# A script to run Linux perf on the SPEC 628.pop2_s benchmark. + +set -u + +usage () { + cat <] : Name of the report file (relative to tooling + directory). Default: prof-628.pop2_s.res + [--specdir ] : Use this as the directory with the SPEC CPU + 2017 installation. Default + ${topdir}/install/spec-2024-08-14-08-41-03 +EOF +} + +topdir="$(cd $(dirname $(dirname $(readlink -f $0))) ; pwd)" +tooldir="${topdir}/tooling" +specdir="${topdir}/install/spec-2024-08-14-08-41-03" +reportfile=prof-628.pop2_s.res + +set +u +until + opt="$1" + case "${opt}" in + --reportfile) + shift + reportfile=$(readlink -f $1) + ;; + --specdir) + shift + specdir="$(cd $(readlink -f $1) ; pwd)" + ;; + --help) + usage + exit 0 + ;; + ?*) + usage + exit 1 + ;; + *) + ;; + esac +[ "x${opt}" = "x" ] +do + shift +done +set -u + +export PATH="${topdir}/install/bin:${PATH}" + +speccpudir="${specdir}/benchspec/CPU" +cd ${speccpudir}/628.pop2_s/run/run_base_test_riscv64-qemu-default.0000 + +echo "Recording..." +time perf record -g -m 16M --call-graph dwarf,4096 -- qemu-riscv64 \ + -cpu "rv64,zicsr=true,v=true,vext_spec=v1.0,zfh=true,zvfh=true" \ + ./speed_pop2_base.riscv64-qemu-default \ + > pop2_s-perf.out 2>> pop2_s-perf.err +echo "Generating report..." +time perf report --stdio --call-graph "graph,0.1,caller,function" \ + -k /tmp/vmlinux | \ + sed -e 's/[[:space:]]*$//' > ${topdir}/tooling/${reportfile}