Skip to content
Open
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
5ec0a43
[llvm-exegesis] [AArch64] Use X16 instead of X8
lakshayk-nv Jun 19, 2025
a75c835
[llvm-exegesis] [AArch64] Add helpers to push/pop GPRs and save/resto…
lakshayk-nv Jun 19, 2025
21cd653
[llvm-exegesis] [AArch64] Implement memory management required functions
lakshayk-nv Jun 19, 2025
50c07db
[llvm-exegesis] [AArch64] Implement different register initialization…
lakshayk-nv Jun 19, 2025
01a03a1
[llvm-exegesis] [AArch64] Resolve Merge Conflict coming from reverted…
lakshayk-nv Jun 19, 2025
412c9ca
[llvm-exegesis] [AArch64] Format changes
lakshayk-nv Jun 19, 2025
9d7efdb
[llvm-exegesis] [AArch64] Fix missing closing brace in Target.cpp
lakshayk-nv Jun 19, 2025
be42cd7
[llvm-exegesis] Renamed `ArgumentRegisters` enum to `ReservedRegister…
lakshayk-nv Jul 1, 2025
ce64b47
[llvm-exegesis] [AArch64] Refactor stack push/pop functions for clari…
lakshayk-nv Jul 1, 2025
5133a05
[llvm-exegesis] [AArch64] Removed if-else block brackets
lakshayk-nv Jul 1, 2025
c50890b
[llvm-exegesis] [AArch64] Introduced warnings for unimplemented funct…
lakshayk-nv Jul 1, 2025
1b0f4c7
[llvm-exegesis] [AArch64] refactor configurePerfCounter, errs to dbgs
lakshayk-nv Jul 1, 2025
a53d5b0
[llvm-exegesis] [AArch64] Remove unneccessary AArch64 guard.
lakshayk-nv Aug 14, 2025
4ab9412
[llvm-exegesis] [AArch64] Remove register initial value debug info
lakshayk-nv Aug 14, 2025
13d2a10
[llvm-exegesis] [AArch64] Remove additional register initial value de…
lakshayk-nv Aug 18, 2025
1f66364
[llvm-exegesis] Revert ReservedRegisters enum to ArgumentRegisters in…
lakshayk-nv Sep 1, 2025
eeb6427
[llvm-exegesis] [AArch64] Removed kernal or fixed address option for …
lakshayk-nv Sep 4, 2025
63c199f
[llvm-exegesis] [AArch64] setStackRegisterToAuxMem Implemention
lakshayk-nv Sep 4, 2025
a05f2a9
[llvm-exegesis] [AArch64] Revert setStackRegisterToAuxMem Implementio…
lakshayk-nv Sep 4, 2025
c7d7676
[llvm-exegesis] [AArch64] Remove unused generateRoundToNearestPage fu…
lakshayk-nv Sep 4, 2025
f52e612
[llvm-exegesis] [AArch64] Fix function naming for syscall register h…
lakshayk-nv Sep 4, 2025
a124755
[llvm-exegesis] [AArch64] Header cleanup
lakshayk-nv Sep 4, 2025
c538e9d
Clang Format
lakshayk-nv Sep 4, 2025
c92220d
Merge branch 'main' into llvm-exegesis-segfault-subprocess
lakshayk-nv Sep 4, 2025
9c082ff
[llvm-exegesis] [AArch64] Updated configurePerfCounter to properly lo…
lakshayk-nv Sep 15, 2025
16543e1
[llvm-exegesis] [AArch64] Add warning in generateMmapAuxMem about fd
lakshayk-nv Sep 15, 2025
a9207b4
[llvm-exegesis] [AArch64] Initialize file descriptor handling in auxi…
lakshayk-nv Sep 22, 2025
c7c0130
[llvm-exegesis] [AArch64] configurePerfCounter with improved file des…
lakshayk-nv Sep 22, 2025
a5c1bb4
[llvm-exegesis] [AArch64] Replace getpagesize() with llvm::sys::Proce…
lakshayk-nv Sep 22, 2025
f9e83d5
[llvm-exegesis] [AArch64] Add conditional compilation for performance…
lakshayk-nv Sep 22, 2025
c77b16c
[llvm-exegesis] [AArch64] Refactor conditional compilation for perfor…
lakshayk-nv Sep 22, 2025
871392b
format changes
lakshayk-nv Sep 22, 2025
d6f2371
[llvm-exegesis] [AArch64] Add test for LD1B load instr in subprocess…
lakshayk-nv Sep 23, 2025
819dfc0
[llvm-exegesis] [AArch64] Update lit local config file to expand chec…
lakshayk-nv Sep 26, 2025
9721828
[llvm-exegesis] [AArch64] Conditional compilation of setup and cleanu…
lakshayk-nv Sep 26, 2025
b45e004
[llvm-exegesis] [AArch64] Reverted to manual snippet mmap to use anon…
lakshayk-nv Sep 30, 2025
8e8babe
[llvm-exegesis] [AArch64] Add manual snippet syscall test for memory …
lakshayk-nv Sep 30, 2025
1c3cb5d
[llvm-exegesis] [AArch64] Update latency test to use ADCXr opcode ins…
lakshayk-nv Sep 30, 2025
422124e
[llvm-exegesis] [AArch64] Add newline and formatting
lakshayk-nv Sep 30, 2025
ebb7b16
[llvm-exegesis] [AArch64] Add warning for ioctl syscall failure in co…
lakshayk-nv Sep 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions llvm/test/tools/llvm-exegesis/AArch64/error-resolution.s
Original file line number Diff line number Diff line change
@@ -70,3 +70,15 @@
# UMOVvi16_idx0_throughput-NEXT: instructions:
# UMOVvi16_idx0_throughput-NEXT: UMOVvi16_idx0 [[REG1:W[0-9]+|LR]] [[REG2:Q[0-9]+|LR]] i_0x0
# UMOVvi16_idx0_throughput: ...


// Test for Load instruction execution by --execution-mode=subprocess pathway
// LD1B: ld1b { Zt.b }, Pg/z, [Xn, Xm]
# RUN: llvm-exegesis --mtriple=aarch64 --mcpu=neoverse-v2 --execution-mode=subprocess --benchmark-phase=prepare-and-assemble-snippet --opcode-name=LD1B --mode=inverse_throughput 2>&1 | FileCheck %s --check-prefix=LD1B_throughput

# LD1B_throughput: ---
# LD1B_throughput-NEXT: mode: inverse_throughput
# LD1B_throughput-NEXT: key:
# LD1B_throughput-NEXT: instructions:
# LD1B_throughput-NEXT: - 'LD1B [[ZREG:Z[0-9]+|LR]] [[PREG:P[0-9]+|LR]] [[XREG1:X[0-9]+|LR]] [[XREG2:X[0-9]+|LR]]'
# LD1B_throughput-NOT: error: 'snippet crashed while running: Segmentation fault'
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# RUN: llvm-exegesis -mode=latency -opcode-name=ADDXrr | FileCheck %s
# RUN: llvm-exegesis -mode=latency -opcode-name=ADCXr | FileCheck %s
# REQUIRES: exegesis-can-execute-aarch64, exegesis-can-measure-latency

CHECK: ---
CHECK-NEXT: mode: latency
CHECK-NEXT: key:
CHECK-NEXT: instructions:
CHECK-NEXT: ADDXrr [[REG1:X[0-9]+|LR]] [[REG2:X[0-9]+|LR]] [[REG3:X[0-9]+|LR]]
CHECK-NEXT: - 'ADCXr [[REG1:X[0-9]+|LR]] [[REG2:X[0-9]+|LR]] [[REG3:X[0-9]+|LR]]'
CHECK-NEXT: config: ''
CHECK-NEXT: register_initial_values:
CHECK-DAG: - '[[REG2]]=0x0'
# We don't check REG3 because in the case that REG2=REG3 the check would fail
CHECK-DAG: - '[[REG3]]=0x0'
CHECK-DAG: - 'NZCV=0x0'
CHECK-DAG: ...
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# REQUIRES: aarch64-registered-target, exegesis-can-measure-latency

# LLVM-EXEGESIS-MEM-DEF test_mem 4096 16
# LLVM-EXEGESIS-MEM-MAP test_mem 140737488093184
# LLVM-EXEGESIS-DEFREG X0 65536
# LLVM-EXEGESIS-DEFREG X1 0
.arch armv8-a+sve

# memory location = VAddressSpaceCeiling - Pagesize * var
# Aux memory loc = 0x0x800000000000 - 0x10000 * 2 = 0x7ffffffe0000
mov x0, 140737488224256
ldr x1, [x0, #0]

# specific mem loc = 0x0x800000000000 - 0x10000 * 4 = 0x7ffffffc0000
mov x0, 140737488093184
ldr x1, [x0, #0]


# RUN: llvm-exegesis --mtriple=aarch64 --mcpu=neoverse-v2 --execution-mode=subprocess \
# RUN: --mode=inverse_throughput --benchmark-phase=assemble-measured-code \
# RUN: --dump-object-to-disk=%t.o --min-instructions=1 --snippets-file=%s 2>&1

# RUN: llvm-objdump -d %t.o > %t.disasm
# RUN: FileCheck %s --check-prefix=CHECK_SYSCALLS < %t.disasm

# CHECK_SYSCALLS: <foo>:

# Check for aux memory mapping syscall (syscall number 222/0xde)
# CHECK_SYSCALLS: mov x0, #0x7ffffffe0000
# CHECK_SYSCALLS-NEXT: mov x1, #0x1000
# CHECK_SYSCALLS-NEXT: mov x2, #0x3
# CHECK_SYSCALLS-NEXT: mov x3, #0x21
# CHECK_SYSCALLS-NEXT: movk x3, #0x10, lsl #16
# CHECK_SYSCALLS-NEXT: mov x4, #-0x1
# CHECK_SYSCALLS-NEXT: mov x5, #0x0
# CHECK_SYSCALLS-NEXT: mov x8, #0xde
# CHECK_SYSCALLS-NEXT: svc #0

# CHECK_SYSCALLS: str x0, [sp, #-0x10]!

# Check for specific memory mapping syscall
# CHECK_SYSCALLS: mov x0, #0x7ffffffc0000
# CHECK_SYSCALLS-NEXT: mov x1, #0x1000
# CHECK_SYSCALLS-NEXT: mov x2, #0x3
# CHECK_SYSCALLS-NEXT: mov x3, #0x21
# CHECK_SYSCALLS-NEXT: movk x3, #0x10, lsl #16
# CHECK_SYSCALLS-NEXT: mov x4, #-0x1
# CHECK_SYSCALLS-NEXT: mov x5, #0x0
# CHECK_SYSCALLS-NEXT: mov x8, #0xde
# CHECK_SYSCALLS-NEXT: svc #0

# CHECK_SYSCALLS: ldr x0, [sp], #0x10
# CHECK_SYSCALLS: mov x1, #0x0

# Check for performance counter control syscalls (ioctl - syscall number 29/0x1d)
# CHECK_SYSCALLS: str x8, [sp, #-0x10]!
# CHECK_SYSCALLS-NEXT: str x0, [sp, #-0x10]!
# CHECK_SYSCALLS-NEXT: str x1, [sp, #-0x10]!
# CHECK_SYSCALLS-NEXT: str x2, [sp, #-0x10]!
# CHECK_SYSCALLS-NEXT: mov x16, #0x7ffffffe0000
# CHECK_SYSCALLS-NEXT: ldr w0, [x16]
# CHECK_SYSCALLS-NEXT: mov x1, #0x2403
# CHECK_SYSCALLS-NEXT: mov x2, #0x1
# CHECK_SYSCALLS-NEXT: mov x8, #0x1d
# CHECK_SYSCALLS-NEXT: svc #0
# CHECK_SYSCALLS-NEXT: ldr x2, [sp], #0x10
# CHECK_SYSCALLS-NEXT: ldr x1, [sp], #0x10
# CHECK_SYSCALLS-NEXT: ldr x0, [sp], #0x10
# CHECK_SYSCALLS-NEXT: ldr x8, [sp], #0x10

# === Test instruction execution ===
# CHECK_SYSCALLS: mov x0, #0x7ffffffe0000
# CHECK_SYSCALLS-NEXT: ldr x1, [x0]
# CHECK_SYSCALLS-NEXT: mov x0, #0x7ffffffc0000
# CHECK_SYSCALLS-NEXT: ldr x1, [x0]

# === ioctl syscall - stop performance counters ===
# CHECK_SYSCALLS: mov x16, #0x7ffffffe0000
# CHECK_SYSCALLS-NEXT: ldr w0, [x16]
# CHECK_SYSCALLS-NEXT: mov x1, #0x2401
# CHECK_SYSCALLS-NEXT: mov x2, #0x1
# CHECK_SYSCALLS-NEXT: mov x8, #0x1d
# CHECK_SYSCALLS-NEXT: svc #0

# Check for process exit syscall (exit - syscall number 93/0x5d)
# CHECK_SYSCALLS: mov x0, #0x0
# CHECK_SYSCALLS-NEXT: mov x8, #0x5d
# CHECK_SYSCALLS-NEXT: svc #0

# CHECK_SYSCALLS-NEXT: ret
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s
Original file line number Diff line number Diff line change
@@ -70,6 +70,6 @@ RUN: llvm-objdump -d %d > %t.s
RUN: FileCheck %s --check-prefix=FPCR-ASM < %t.s
FPCR-ASM: <foo>:
FPCR-ASM: movi d{{[0-9]+}}, #0000000000000000
FPCR-ASM-NEXT: mov x8, #0x0
FPCR-ASM-NEXT: msr FPCR, x8
FPCR-ASM-NEXT: mov x16, #0x0
FPCR-ASM-NEXT: msr FPCR, x16
FPCR-ASM-NEXT: bfcvt h{{[0-9]+}}, s{{[0-9]+}}
12 changes: 11 additions & 1 deletion llvm/test/tools/llvm-exegesis/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -18,9 +18,19 @@ def can_use_perf_counters(mode, extra_options=[]):
if llvm_exegesis_exe is None:
print("could not find llvm-exegesis")
return False

opcode_name = None
if "x86_64" in config.root.host_triple or "i386" in config.root.host_triple:
opcode_name = "ADD64rr"
elif "aarch64" in config.root.host_triple:
opcode_name = "ADDXrr"
else:
# FIXME: Add opcode_name for other architectures for testing
return False

try:
return_code = subprocess.call(
[llvm_exegesis_exe, "-mode", mode, "-opcode-name=ADD64rr"]
[llvm_exegesis_exe, "-mode", mode, "-opcode-name=" + opcode_name]
+ extra_options,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
283 changes: 282 additions & 1 deletion llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
Original file line number Diff line number Diff line change
@@ -6,11 +6,25 @@
//
//===----------------------------------------------------------------------===//
#include "../Target.h"
#include "../Error.h"
#include "../MmapUtils.h"
#include "../SerialSnippetGenerator.h"
#include "../SnippetGenerator.h"
#include "../SubprocessMemory.h"
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Process.h"
#include <vector>

#if defined(__aarch64__) && defined(__linux__)
#include <sys/mman.h>
#include <sys/syscall.h>
#ifdef HAVE_LIBPFM
#include <perfmon/perf_event.h>
#endif // HAVE_LIBPFM
#include <sys/prctl.h> // For PR_PAC_* constants
#ifndef PR_PAC_APIAKEY
#define PR_PAC_APIAKEY (1UL << 0)
@@ -73,7 +87,7 @@ static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth,
// Generates instructions to load an immediate value into an FPCR register.
static std::vector<MCInst>
loadFPCRImmediate(MCRegister Reg, unsigned RegBitWidth, const APInt &Value) {
MCRegister TempReg = AArch64::X8;
MCRegister TempReg = AArch64::X16;
MCInst LoadImm = MCInstBuilder(AArch64::MOVi64imm).addReg(TempReg).addImm(0);
MCInst MoveToFPCR =
MCInstBuilder(AArch64::MSR).addImm(AArch64SysReg::FPCR).addReg(TempReg);
@@ -106,6 +120,90 @@ static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth,
return Instructions;
}

static void generateRegisterStackPush(unsigned int RegToPush,
std::vector<MCInst> &GeneratedCode,
int imm = -16) {
// STR [X|W]t, [SP, #simm]!: SP is decremented by default 16 bytes
// before the store to maintain 16-bytes alignment.
if (AArch64::GPR64RegClass.contains(RegToPush))
GeneratedCode.push_back(MCInstBuilder(AArch64::STRXpre)
.addReg(AArch64::SP)
.addReg(RegToPush)
.addReg(AArch64::SP)
.addImm(imm));
else if (AArch64::GPR32RegClass.contains(RegToPush))
GeneratedCode.push_back(MCInstBuilder(AArch64::STRWpre)
.addReg(AArch64::SP)
.addReg(RegToPush)
.addReg(AArch64::SP)
.addImm(imm));
else
llvm_unreachable("Unsupported register class for stack push");
}

static void generateRegisterStackPop(unsigned int RegToPopTo,
std::vector<MCInst> &GeneratedCode,
int imm = 16) {
// LDR Xt, [SP], #simm: SP is incremented by default 16 bytes after the load.
if (AArch64::GPR64RegClass.contains(RegToPopTo))
GeneratedCode.push_back(MCInstBuilder(AArch64::LDRXpost)
.addReg(AArch64::SP)
.addReg(RegToPopTo)
.addReg(AArch64::SP)
.addImm(imm));
else if (AArch64::GPR32RegClass.contains(RegToPopTo))
GeneratedCode.push_back(MCInstBuilder(AArch64::LDRWpost)
.addReg(AArch64::SP)
.addReg(RegToPopTo)
.addReg(AArch64::SP)
.addImm(imm));
else
llvm_unreachable("Unsupported register class for stack pop");
}

void generateSysCall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) {
// AArch64 Linux follows the AAPCS (ARM Architecture Procedure Call Standard):
// - X8 register contains the system call number
// - X0-X5 registers contain the first 6 arguments (if any)
// - SVC #0 instruction triggers the system call
// - Return value is placed in X0 register
GeneratedCode.push_back(
loadImmediate(AArch64::X8, 64, APInt(64, SyscallNumber)));
GeneratedCode.push_back(MCInstBuilder(AArch64::SVC).addImm(0));
}

/// Functions to save/restore system call registers
#if defined(__linux__) && defined(HAVE_LIBPFM)
constexpr std::array<unsigned, 8> SyscallArgumentRegisters{
AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7,
};

static void saveSyscallRegisters(std::vector<MCInst> &GeneratedCode,
unsigned ArgumentCount) {
// AArch64 follows the AAPCS (ARM Architecture Procedure Call Standard):
// X0-X7 registers contain the first 8 arguments.
assert(ArgumentCount <= 8 &&
"This implementation saves up to 8 argument registers (X0-X7)");
// Preserve X8 (used for the syscall number/return value).
generateRegisterStackPush(AArch64::X8, GeneratedCode);
// Preserve the registers used to pass arguments to the system call.
for (unsigned I = 0; I < ArgumentCount; ++I) {
generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode);
}
}

static void restoreSyscallRegisters(std::vector<MCInst> &GeneratedCode,
unsigned ArgumentCount) {
assert(ArgumentCount <= 8 &&
"This implementation restores up to 8 argument registers (X0-X7)");
// Restore registers in reverse order
for (int I = ArgumentCount - 1; I >= 0; --I) {
generateRegisterStackPop(SyscallArgumentRegisters[I], GeneratedCode);
}
generateRegisterStackPop(AArch64::X8, GeneratedCode);
}
#endif // __linux__ && HAVE_LIBPFM
#include "AArch64GenExegesis.inc"

namespace {
@@ -119,11 +217,43 @@ class ExegesisAArch64Target : public ExegesisTarget {
ExegesisAArch64Target()
: ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {}

enum ArgumentRegisters {
CodeSize = AArch64::X12,
AuxiliaryMemoryFD = AArch64::X13,
TempRegister = AArch64::X16,
};

std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg,
int imm = 0) const override {
std::vector<MCInst> Insts;
if (AArch64::GPR32RegClass.contains(Reg) ||
AArch64::GPR64RegClass.contains(Reg)) {
generateRegisterStackPop(Reg, Insts, imm);
return Insts;
}
return {};
}

Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var,
MCOperand &AssignedValue,
const BitVector &ForbiddenRegs) const override;

private:
#ifdef __linux__
std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const override;
std::vector<MCInst>
generateMmap(uintptr_t Address, size_t Length,
uintptr_t FileDescriptorAddress) const override;
void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const override;
std::vector<MCInst> generateMemoryInitialSetup() const override;
std::vector<MCInst> setStackRegisterToAuxMem() const override;
uintptr_t getAuxiliaryMemoryStartAddress() const override;
std::vector<MCInst> configurePerfCounter(long Request,
bool SaveRegisters) const override;
std::vector<MCRegister> getArgumentRegisters() const override;
std::vector<MCRegister> getRegistersNeedSaving() const override;
#endif // __linux__

std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg,
const APInt &Value) const override {
if (AArch64::GPR32RegClass.contains(Reg))
@@ -238,6 +368,157 @@ Error ExegesisAArch64Target::randomizeTargetMCOperand(

} // namespace

#ifdef __linux__
static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000;

std::vector<MCInst>
ExegesisAArch64Target::generateExitSyscall(unsigned ExitCode) const {
std::vector<MCInst> ExitCallCode;
ExitCallCode.push_back(loadImmediate(AArch64::X0, 64, APInt(64, ExitCode)));
generateSysCall(SYS_exit, ExitCallCode); // SYS_exit is 93
return ExitCallCode;
}

std::vector<MCInst>
ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length,
uintptr_t FileDescriptorAddress) const {
// mmap(address, length, prot, flags, fd, offset=0)
int flags = MAP_SHARED;
int fd = -1;
if (fd == -1) {
dbgs() << "Warning: generateMmap using anonymous mapping\n";
flags |= MAP_ANONYMOUS;
}
if (Address != 0)
flags |= MAP_FIXED_NOREPLACE;
std::vector<MCInst> MmapCode;
MmapCode.push_back(
loadImmediate(AArch64::X0, 64, APInt(64, Address))); // map adr
MmapCode.push_back(
loadImmediate(AArch64::X1, 64, APInt(64, Length))); // length
MmapCode.push_back(loadImmediate(AArch64::X2, 64,
APInt(64, PROT_READ | PROT_WRITE))); // prot
MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags
// FIXME: Loading [FileDescriptorAddress] as fd leds syscall to return error
MmapCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd
MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset
generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222
return MmapCode;
}

void ExegesisAArch64Target::generateMmapAuxMem(
std::vector<MCInst> &GeneratedCode) const {
int fd = -1;
int flags = MAP_SHARED;
uintptr_t address = getAuxiliaryMemoryStartAddress();
if (fd == -1) {
dbgs() << "Warning: generateMmapAuxMem using anonymous mapping\n";
flags |= MAP_ANONYMOUS;
}
if (address != 0)
flags |= MAP_FIXED_NOREPLACE;
int prot = PROT_READ | PROT_WRITE;

GeneratedCode.push_back(
loadImmediate(AArch64::X0, 64, APInt(64, address))); // map adr
GeneratedCode.push_back(loadImmediate(
AArch64::X1, 64,
APInt(64, SubprocessMemory::AuxiliaryMemorySize))); // length
GeneratedCode.push_back(
loadImmediate(AArch64::X2, 64, APInt(64, prot))); // prot
GeneratedCode.push_back(
loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags
GeneratedCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd
GeneratedCode.push_back(
loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset
generateSysCall(SYS_mmap, GeneratedCode); // SYS_mmap is 222
}

std::vector<MCInst> ExegesisAArch64Target::generateMemoryInitialSetup() const {
std::vector<MCInst> MemoryInitialSetupCode;
generateMmapAuxMem(MemoryInitialSetupCode);

// If using fixed address for auxiliary memory skip this step,
// When using dynamic memory allocation (non-fixed address), we must preserve
// the mmap return value (X0) which contains the allocated memory address.
// This value is saved to the stack to ensure registers requiring memory
// access can retrieve the correct address even if X0 is modified by
// intermediate code.
generateRegisterStackPush(AArch64::X0, MemoryInitialSetupCode);
// FIXME: Ensure stack pointer remains stable to prevent loss of saved address
return MemoryInitialSetupCode;
}

std::vector<MCInst> ExegesisAArch64Target::setStackRegisterToAuxMem() const {
std::vector<MCInst> instructions; // NOP
// Motivation unclear, found no need for this in AArch64.
// TODO: Implement this, if required.
dbgs() << "Warning: setStackRegisterToAuxMem called but not required for "
"AArch64\n";
return instructions;
}

uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const {
// Return the second to last page in the virtual address space to try and
// prevent interference with memory annotations in the snippet
// FIXME: Why 2 pages?
return VAddressSpaceCeiling - (2 * llvm::sys::Process::getPageSizeEstimate());
}

std::vector<MCInst>
ExegesisAArch64Target::configurePerfCounter(long Request,
bool SaveRegisters) const {
std::vector<MCInst> ConfigurePerfCounterCode;
#ifdef HAVE_LIBPFM
if (SaveRegisters)
saveSyscallRegisters(ConfigurePerfCounterCode, 3);

// Load actual file descriptor from auxiliary memory location [address + 0]
// CounterFileDescriptor was stored at AuxiliaryMemoryMapping[0]
dbgs() << "Warning: configurePerfCounter ioctl syscall failing\n";
// FIXME: Ensure file descriptor is correctly populated at auxiliary memory
// address before ioctl syscall to avoid unreliable benchmark results
ConfigurePerfCounterCode.push_back(
loadImmediate(ArgumentRegisters::TempRegister, 64,
APInt(64, getAuxiliaryMemoryStartAddress())));
ConfigurePerfCounterCode.push_back(
MCInstBuilder(AArch64::LDRWui)
.addReg(AArch64::W0)
.addReg(ArgumentRegisters::TempRegister)
.addImm(0));
ConfigurePerfCounterCode.push_back(
loadImmediate(AArch64::X1, 64, APInt(64, Request))); // cmd
ConfigurePerfCounterCode.push_back(
loadImmediate(AArch64::X2, 64, APInt(64, PERF_IOC_FLAG_GROUP))); // arg
generateSysCall(SYS_ioctl, ConfigurePerfCounterCode); // SYS_ioctl is 29

if (SaveRegisters)
restoreSyscallRegisters(ConfigurePerfCounterCode, 3);
#endif
return ConfigurePerfCounterCode;
}

std::vector<MCRegister> ExegesisAArch64Target::getArgumentRegisters() const {
return {AArch64::X0, AArch64::X1};
}

std::vector<MCRegister> ExegesisAArch64Target::getRegistersNeedSaving() const {
return {
AArch64::X0,
AArch64::X1,
AArch64::X2,
AArch64::X3,
AArch64::X4,
AArch64::X5,
AArch64::X8,
ArgumentRegisters::TempRegister,
ArgumentRegisters::CodeSize,
ArgumentRegisters::AuxiliaryMemoryFD,
};
}

#endif // __linux__

static ExegesisTarget *getTheExegesisAArch64Target() {
static ExegesisAArch64Target Target;
return &Target;
9 changes: 9 additions & 0 deletions llvm/tools/llvm-exegesis/lib/Assembler.cpp
Original file line number Diff line number Diff line change
@@ -78,13 +78,22 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
Register StackPointerRegister = BBF.MF.getSubtarget()
.getTargetLowering()
->getStackPointerRegisterToSaveRestore();
// FIXME: Only loading first register with memory address is hacky.
bool isFirstRegister = true;
for (const RegisterValue &RV : Key.RegisterInitialValues) {
if (GenerateMemoryInstructions) {
// If we're generating memory instructions, don't load in the value for
// the register with the stack pointer as it will be used later to finish
// the setup.
if (Register(RV.Register) == StackPointerRegister)
continue;
auto StackLoadInsts = ET._generateRegisterStackPop(RV.Register, 16);
if (!StackLoadInsts.empty() && isFirstRegister) {
for (const auto &Inst : StackLoadInsts)
BBF.addInstruction(Inst);
isFirstRegister = false;
continue;
}
}
// Load a constant in the register.
const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
5 changes: 5 additions & 0 deletions llvm/tools/llvm-exegesis/lib/Target.h
Original file line number Diff line number Diff line change
@@ -308,6 +308,11 @@ class ExegesisTarget {
return std::make_unique<SavedState>();
}

virtual std::vector<MCInst> _generateRegisterStackPop(MCRegister Reg,
int imm = 0) const {
return {};
}

private:
virtual bool matchesArch(Triple::ArchType Arch) const = 0;