-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial framework for benchmarking fault-only-first load
fault-first-load/ * .gitignore: Created. * Makefile: Created. * do-all.sh: Created. * template-ld.S: Created. Signed-off-by: Jeremy Bennett <[email protected]>
- Loading branch information
1 parent
c1251ba
commit 23c21aa
Showing
4 changed files
with
302 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Generated results | ||
*.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Makefile to generate benchmarks for whole word load/store | ||
|
||
# Copyright (C) 2025 Embecosm Limited <www.embecosm.com> | ||
# Contributor Jeremy Bennett <[email protected]> | ||
|
||
# SPDX-License-Identifier: GPL-3.0-or-later | ||
|
||
# Parameters that can be set | ||
LD_CNT ?= 1 | ||
LD_LOOP_CNT ?= 1000000 | ||
VLDOP ?= vle8ff | ||
DOMASK ?= 0 | ||
VMASK ?= | ||
ELEM ?= e8 | ||
LMUL ?= m1 | ||
|
||
# The tools and their flags | ||
AS=riscv64-unknown-linux-gnu-gcc | ||
ASFLAGS=-march=rv64gcv -DLD_CNT="$(LD_CNT)" -DLD_LOOP_CNT="$(LD_LOOP_CNT)" \ | ||
-DVLDOP="$(VLDOP)" -DDOMASK="$(DOMASK)" -DVMASK="$(VMASK)" \ | ||
-DELEM="$(ELEM)" -DLMUL="$(LMUL)" | ||
CFLAGS=-march=rv64gcv -O0 | ||
LDFLAGS=-march=rv64gcv -O0 | ||
|
||
$(VLDOP).exe: template-ld.S | ||
$(AS) $(ASFLAGS) $^ -o $@ | ||
|
||
.PHONE: clean | ||
clean: | ||
$(RM) *.exe fault-first-load*.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
# Run all the fault-only-first load benchmarks | ||
|
||
# Copyright (C) 2025 Embecosm Limited <www.embecosm.com> | ||
# Contributor Jeremy Bennett <[email protected]> | ||
|
||
# SPDX-License-Identifier: GPL-3.0-or-later | ||
|
||
set -u | ||
|
||
# Produce help message | ||
usage () { | ||
cat <<EOF | ||
Usage ./do-all.sh : Benchmark whole word load/store. | ||
[--nloops <count>] : Number of iterations of the test | ||
program (default 10000000) | ||
[--nstats <count>] : Number of times to repeat each test | ||
for statistical analysis (default 10). | ||
[--base <id>] : Commit of the QEMU version to use as | ||
baseline | ||
[--test <id>] : Commit of the QEMU version to use be | ||
tested against the baseline | ||
[--help] : Print this message and exit | ||
The QMEU are assumed to have been built without plugins enabled, with the | ||
binary installed in ../../install/qemu-${qid}-no-plugin/bin, where qid is the | ||
argument to --base or --test. | ||
EOF | ||
} | ||
|
||
# Build the exe | ||
# $1 Instruction | ||
# $2 Instruction count | ||
# $3 "load" or "store" | ||
buildit() { | ||
local i=$1 | ||
local icount=$2 | ||
local lmul=$3 | ||
local elem=$(echo "${i}" | sed -e 's/vl//' -e 's/ff//') | ||
# Single instr build | ||
make clean > /dev/null 2>&1 | ||
make LD_CNT=${icount} LD_LOOP_CNT=${nloops} VLDOP=${i} \ | ||
DOMASK=0 ELEM="${elem}" LMUL=${lmul} ${i}.exe > /dev/null 2>&1 | ||
} | ||
|
||
# Return the time for doing a single run. Assumes the exe has been built | ||
# $1 VLEN | ||
# $2 Instruction | ||
# $3 QEMU commit | ||
runone() { | ||
local vl=$1 | ||
local i=$2 | ||
local qid=$3 | ||
|
||
local OLDPATH=${PATH} | ||
PATH=../../install/qemu-${qid}-no-plugin/bin:${OLDPATH} | ||
(time qemu-riscv64 -cpu rv64,v=true,vlen=${vl} ${i}.exe) \ | ||
> ${tmpf} 2>&1 | ||
local u=$(sed -n -e 's/user[[:space:]]*0m\(.\+\)s$/\1/p' < ${tmpf}) | ||
local s=$(sed -n -e 's/sys[[:space:]]*0m\(.\+\)s$/\1/p' < ${tmpf}) | ||
PATH=${OLDPATH} | ||
t=$(echo "print(${u} + ${s})" | python3) | ||
printf "%.3f" ${t} | ||
} | ||
|
||
# Run a single measurement | ||
# $1 VLEN | ||
# $2 LMUL | ||
# $3 Instruction | ||
runit() { | ||
local vl=$1 | ||
local lmul=$2 | ||
local i=$3 | ||
local res="" | ||
# We repeat until we get a meaningful result. This should mean we skip | ||
# some silly results due to chron jobs | ||
while [[ "x${res}" == "x" ]] | ||
do | ||
# single instruction runs for base and test QEMU | ||
buildit ${i} 1 ${lmul} | ||
local tbase1=$(runone ${vl} ${i} ${qemubase}) | ||
local ttest1=$(runone ${vl} ${i} ${qemutest}) | ||
# 11 instruction runs for base and test | ||
buildit ${i} 11 ${lmul} | ||
local tbase11=$(runone ${vl} ${i} ${qemubase}) | ||
local ttest11=$(runone ${vl} ${i} ${qemutest}) | ||
# Calculate net instructions times for base and test | ||
local tdbase=$(echo "print(${tbase11} - ${tbase1})" | python3) | ||
local tdtest=$(echo "print(${ttest11} - ${ttest1})" | python3) | ||
res=$(echo "print(${tdbase} / ${tdtest} - 1 \ | ||
if (${tdbase} > 0) and (${tdtest} > 0) else '')" \ | ||
| python3) | ||
done | ||
printf "%.3f" ${res} | ||
} | ||
|
||
ldinstr="vle8ff \ | ||
vle16ff \ | ||
vle32ff \ | ||
vle64ff" | ||
|
||
vlenlist="128 256 512 1024" | ||
|
||
lmullist="m1 m2 m4 m8" | ||
|
||
tmpf=$(mktemp fault-first-load-XXXXXX.txt) | ||
|
||
ldresf="ldres.csv" | ||
|
||
# Defaults for variables | ||
nloops=10000000 | ||
nstats=10 | ||
qemubase="6528013b5f" | ||
qemutest="db95037b42" | ||
|
||
# Parse command line options | ||
set +u | ||
until | ||
opt="$1" | ||
case "${opt}" in | ||
--nloops) | ||
shift | ||
nloops="$1" | ||
;; | ||
--nstats) | ||
shift | ||
nstats="$1" | ||
;; | ||
--base) | ||
shift | ||
qemubase="$1" | ||
;; | ||
--test) | ||
shift | ||
qemutest="$1" | ||
;; | ||
--help) | ||
usage | ||
exit 0 | ||
;; | ||
?*) | ||
echo "Unknown argument '$1'" | ||
usage | ||
exit 1 | ||
;; | ||
*) | ||
;; | ||
esac | ||
[ "x${opt}" = "x" ] | ||
do | ||
shift | ||
done | ||
set -u | ||
|
||
# All the load instructions | ||
echo "Fault only first load instructions" | ||
printf '"vlen","lmul","ldop"' > ${ldresf} | ||
for n in $(seq ${nstats}) | ||
do | ||
printf ',"run %d"' ${n} >> ${ldresf} | ||
done | ||
printf '\n' >> ${ldresf} | ||
|
||
for vl in ${vlenlist} | ||
do | ||
for lmul in ${lmullist} | ||
do | ||
for i in ${ldinstr} | ||
do | ||
printf '"%d","%s","%s"' ${vl} ${lmul} ${i} >> ${ldresf} | ||
printf "VLEN = %4d, LMUL = %2s: %-7s " ${vl} ${lmul} ${i} | ||
for n in $(seq ${nstats}) | ||
do | ||
r=$(runit ${vl} ${lmul} ${i}) | ||
printf ',"%.3f"' ${r} >> ${ldresf} | ||
printf "." | ||
done | ||
printf '\n' >> ${ldresf} | ||
printf "\n" | ||
done | ||
done | ||
done | ||
|
||
# Tidy up | ||
make clean > /dev/null 2>&1 | ||
rm -f ${tmpf} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
// A generic framework for benchmarking whole word load | ||
// | ||
// Copyright (C) 2025 Embecosm Limited | ||
// Contributor: Jeremy Bennett <[email protected]> | ||
// | ||
// SPDX-License-Identifier: GPL-3.0-or-later | ||
|
||
// Test LD_CNT loads LD_LOOP_CNT times | ||
.text | ||
.globl main | ||
.align 2 | ||
.type symbol,@function | ||
main: | ||
.cfi_startproc | ||
|
||
csrr t1, vlenb | ||
#if DOMASK | ||
la a0, testmask // Location of mask to load | ||
vsetvli t2, t1, e8, m1, ta, ma | ||
vle8.v v0, (a0) | ||
#endif | ||
la a0, testlddata // Location of values to load | ||
li t0, LD_LOOP_CNT // Number of times around the loop | ||
|
||
.L0: | ||
vsetvli t2, t1, ELEM, LMUL, ta, ma | ||
.rept LD_CNT | ||
#if DOMASK | ||
VLDOP.v v1, (a0), v0.t | ||
#else | ||
VLDOP.v v1, (a0) | ||
#endif | ||
.endr | ||
addi t0, t0, -1 | ||
bnez t0, .L0 | ||
|
||
mv a0, zero | ||
ret | ||
|
||
.cfi_endproc | ||
.size main,.-main | ||
|
||
.data | ||
// Mask to set. | ||
#if DOMASK | ||
// Mask if needed. | ||
testmask: | ||
.8byte VMASK | ||
#endif | ||
// Data to load. Maximum is for 8 registers of 1024 bits, so 1Kibytes. | ||
testlddata: | ||
.rept 4 | ||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 | ||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f | ||
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 | ||
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f | ||
.byte 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27 | ||
.byte 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f | ||
.byte 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37 | ||
.byte 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f | ||
.byte 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47 | ||
.byte 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f | ||
.byte 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57 | ||
.byte 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f | ||
.byte 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67 | ||
.byte 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f | ||
.byte 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77 | ||
.byte 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f | ||
.byte 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 | ||
.byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f | ||
.byte 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97 | ||
.byte 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f | ||
.byte 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7 | ||
.byte 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf | ||
.byte 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7 | ||
.byte 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf | ||
.byte 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7 | ||
.byte 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf | ||
.byte 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7 | ||
.byte 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf | ||
.byte 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7 | ||
.byte 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef | ||
.byte 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 | ||
.byte 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff | ||
.endr |