Skip to content

Commit

Permalink
Initial framework for benchmarking fault-only-first load
Browse files Browse the repository at this point in the history
fault-first-load/

	* .gitignore: Created.
	* Makefile: Created.
	* do-all.sh: Created.
	* template-ld.S: Created.

Signed-off-by: Jeremy Bennett <[email protected]>
  • Loading branch information
jeremybennett committed Feb 5, 2025
1 parent c1251ba commit 23c21aa
Show file tree
Hide file tree
Showing 4 changed files with 302 additions and 0 deletions.
2 changes: 2 additions & 0 deletions fault-first-load/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Generated results
*.csv
30 changes: 30 additions & 0 deletions fault-first-load/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Makefile to generate benchmarks for whole word load/store

# Copyright (C) 2025 Embecosm Limited <www.embecosm.com>
# Contributor Jeremy Bennett <[email protected]>

# SPDX-License-Identifier: GPL-3.0-or-later

# Parameters that can be set
LD_CNT ?= 1
LD_LOOP_CNT ?= 1000000
VLDOP ?= vle8ff
DOMASK ?= 0
VMASK ?=
ELEM ?= e8
LMUL ?= m1

# The tools and their flags
AS=riscv64-unknown-linux-gnu-gcc
ASFLAGS=-march=rv64gcv -DLD_CNT="$(LD_CNT)" -DLD_LOOP_CNT="$(LD_LOOP_CNT)" \
-DVLDOP="$(VLDOP)" -DDOMASK="$(DOMASK)" -DVMASK="$(VMASK)" \
-DELEM="$(ELEM)" -DLMUL="$(LMUL)"
CFLAGS=-march=rv64gcv -O0
LDFLAGS=-march=rv64gcv -O0

$(VLDOP).exe: template-ld.S
$(AS) $(ASFLAGS) $^ -o $@

.PHONE: clean
clean:
$(RM) *.exe fault-first-load*.txt
185 changes: 185 additions & 0 deletions fault-first-load/do-all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# Run all the fault-only-first load benchmarks

# Copyright (C) 2025 Embecosm Limited <www.embecosm.com>
# Contributor Jeremy Bennett <[email protected]>

# SPDX-License-Identifier: GPL-3.0-or-later

set -u

# Produce help message
usage () {
cat <<EOF
Usage ./do-all.sh : Benchmark whole word load/store.
[--nloops <count>] : Number of iterations of the test
program (default 10000000)
[--nstats <count>] : Number of times to repeat each test
for statistical analysis (default 10).
[--base <id>] : Commit of the QEMU version to use as
baseline
[--test <id>] : Commit of the QEMU version to use be
tested against the baseline
[--help] : Print this message and exit
The QMEU are assumed to have been built without plugins enabled, with the
binary installed in ../../install/qemu-${qid}-no-plugin/bin, where qid is the
argument to --base or --test.
EOF
}

# Build the exe
# $1 Instruction
# $2 Instruction count
# $3 "load" or "store"
buildit() {
local i=$1
local icount=$2
local lmul=$3
local elem=$(echo "${i}" | sed -e 's/vl//' -e 's/ff//')
# Single instr build
make clean > /dev/null 2>&1
make LD_CNT=${icount} LD_LOOP_CNT=${nloops} VLDOP=${i} \
DOMASK=0 ELEM="${elem}" LMUL=${lmul} ${i}.exe > /dev/null 2>&1
}

# Return the time for doing a single run. Assumes the exe has been built
# $1 VLEN
# $2 Instruction
# $3 QEMU commit
runone() {
local vl=$1
local i=$2
local qid=$3

local OLDPATH=${PATH}
PATH=../../install/qemu-${qid}-no-plugin/bin:${OLDPATH}
(time qemu-riscv64 -cpu rv64,v=true,vlen=${vl} ${i}.exe) \
> ${tmpf} 2>&1
local u=$(sed -n -e 's/user[[:space:]]*0m\(.\+\)s$/\1/p' < ${tmpf})
local s=$(sed -n -e 's/sys[[:space:]]*0m\(.\+\)s$/\1/p' < ${tmpf})
PATH=${OLDPATH}
t=$(echo "print(${u} + ${s})" | python3)
printf "%.3f" ${t}
}

# Run a single measurement
# $1 VLEN
# $2 LMUL
# $3 Instruction
runit() {
local vl=$1
local lmul=$2
local i=$3
local res=""
# We repeat until we get a meaningful result. This should mean we skip
# some silly results due to chron jobs
while [[ "x${res}" == "x" ]]
do
# single instruction runs for base and test QEMU
buildit ${i} 1 ${lmul}
local tbase1=$(runone ${vl} ${i} ${qemubase})
local ttest1=$(runone ${vl} ${i} ${qemutest})
# 11 instruction runs for base and test
buildit ${i} 11 ${lmul}
local tbase11=$(runone ${vl} ${i} ${qemubase})
local ttest11=$(runone ${vl} ${i} ${qemutest})
# Calculate net instructions times for base and test
local tdbase=$(echo "print(${tbase11} - ${tbase1})" | python3)
local tdtest=$(echo "print(${ttest11} - ${ttest1})" | python3)
res=$(echo "print(${tdbase} / ${tdtest} - 1 \
if (${tdbase} > 0) and (${tdtest} > 0) else '')" \
| python3)
done
printf "%.3f" ${res}
}

ldinstr="vle8ff \
vle16ff \
vle32ff \
vle64ff"

vlenlist="128 256 512 1024"

lmullist="m1 m2 m4 m8"

tmpf=$(mktemp fault-first-load-XXXXXX.txt)

ldresf="ldres.csv"

# Defaults for variables
nloops=10000000
nstats=10
qemubase="6528013b5f"
qemutest="db95037b42"

# Parse command line options
set +u
until
opt="$1"
case "${opt}" in
--nloops)
shift
nloops="$1"
;;
--nstats)
shift
nstats="$1"
;;
--base)
shift
qemubase="$1"
;;
--test)
shift
qemutest="$1"
;;
--help)
usage
exit 0
;;
?*)
echo "Unknown argument '$1'"
usage
exit 1
;;
*)
;;
esac
[ "x${opt}" = "x" ]
do
shift
done
set -u

# All the load instructions
echo "Fault only first load instructions"
printf '"vlen","lmul","ldop"' > ${ldresf}
for n in $(seq ${nstats})
do
printf ',"run %d"' ${n} >> ${ldresf}
done
printf '\n' >> ${ldresf}

for vl in ${vlenlist}
do
for lmul in ${lmullist}
do
for i in ${ldinstr}
do
printf '"%d","%s","%s"' ${vl} ${lmul} ${i} >> ${ldresf}
printf "VLEN = %4d, LMUL = %2s: %-7s " ${vl} ${lmul} ${i}
for n in $(seq ${nstats})
do
r=$(runit ${vl} ${lmul} ${i})
printf ',"%.3f"' ${r} >> ${ldresf}
printf "."
done
printf '\n' >> ${ldresf}
printf "\n"
done
done
done

# Tidy up
make clean > /dev/null 2>&1
rm -f ${tmpf}
85 changes: 85 additions & 0 deletions fault-first-load/template-ld.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// A generic framework for benchmarking whole word load
//
// Copyright (C) 2025 Embecosm Limited
// Contributor: Jeremy Bennett <[email protected]>
//
// SPDX-License-Identifier: GPL-3.0-or-later

// Test LD_CNT loads LD_LOOP_CNT times
.text
.globl main
.align 2
.type symbol,@function
main:
.cfi_startproc

csrr t1, vlenb
#if DOMASK
la a0, testmask // Location of mask to load
vsetvli t2, t1, e8, m1, ta, ma
vle8.v v0, (a0)
#endif
la a0, testlddata // Location of values to load
li t0, LD_LOOP_CNT // Number of times around the loop

.L0:
vsetvli t2, t1, ELEM, LMUL, ta, ma
.rept LD_CNT
#if DOMASK
VLDOP.v v1, (a0), v0.t
#else
VLDOP.v v1, (a0)
#endif
.endr
addi t0, t0, -1
bnez t0, .L0

mv a0, zero
ret

.cfi_endproc
.size main,.-main

.data
// Mask to set.
#if DOMASK
// Mask if needed.
testmask:
.8byte VMASK
#endif
// Data to load. Maximum is for 8 registers of 1024 bits, so 1Kibytes.
testlddata:
.rept 4
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
.byte 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
.byte 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f
.byte 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
.byte 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f
.byte 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
.byte 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f
.byte 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
.byte 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f
.byte 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67
.byte 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f
.byte 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77
.byte 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f
.byte 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
.byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
.byte 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
.byte 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
.byte 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7
.byte 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf
.byte 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7
.byte 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf
.byte 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7
.byte 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf
.byte 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7
.byte 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf
.byte 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7
.byte 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef
.byte 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7
.byte 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
.endr

0 comments on commit 23c21aa

Please sign in to comment.