diff --git a/llvm/test/tools/llvm-exegesis/AArch64/error-resolution.s b/llvm/test/tools/llvm-exegesis/AArch64/error-resolution.s index 66ca6fb31d78f..022747cdbaad9 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/error-resolution.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/error-resolution.s @@ -70,3 +70,15 @@ # UMOVvi16_idx0_throughput-NEXT: instructions: # UMOVvi16_idx0_throughput-NEXT: UMOVvi16_idx0 [[REG1:W[0-9]+|LR]] [[REG2:Q[0-9]+|LR]] i_0x0 # UMOVvi16_idx0_throughput: ... + + +// Test for Load instruction execution by --execution-mode=subprocess pathway +// LD1B: ld1b { Zt.b }, Pg/z, [Xn, Xm] +# RUN: llvm-exegesis --mtriple=aarch64 --mcpu=neoverse-v2 --execution-mode=subprocess --benchmark-phase=prepare-and-assemble-snippet --opcode-name=LD1B --mode=inverse_throughput 2>&1 | FileCheck %s --check-prefix=LD1B_throughput + +# LD1B_throughput: --- +# LD1B_throughput-NEXT: mode: inverse_throughput +# LD1B_throughput-NEXT: key: +# LD1B_throughput-NEXT: instructions: +# LD1B_throughput-NEXT: - 'LD1B [[ZREG:Z[0-9]+|LR]] [[PREG:P[0-9]+|LR]] [[XREG1:X[0-9]+|LR]] [[XREG2:X[0-9]+|LR]]' +# LD1B_throughput-NOT: error: 'snippet crashed while running: Segmentation fault' diff --git a/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s b/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s index 1db28a84e2ff6..f1aa95b245c5b 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s @@ -1,13 +1,14 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=ADDXrr | FileCheck %s +# RUN: llvm-exegesis -mode=latency -opcode-name=ADCXr | FileCheck %s # REQUIRES: exegesis-can-execute-aarch64, exegesis-can-measure-latency CHECK: --- CHECK-NEXT: mode: latency CHECK-NEXT: key: CHECK-NEXT: instructions: -CHECK-NEXT: ADDXrr [[REG1:X[0-9]+|LR]] [[REG2:X[0-9]+|LR]] [[REG3:X[0-9]+|LR]] +CHECK-NEXT: - 'ADCXr [[REG1:X[0-9]+|LR]] [[REG2:X[0-9]+|LR]] [[REG3:X[0-9]+|LR]]' CHECK-NEXT: config: '' CHECK-NEXT: register_initial_values: CHECK-DAG: - '[[REG2]]=0x0' -# We don't check REG3 because in the case that REG2=REG3 the check would fail +CHECK-DAG: - '[[REG3]]=0x0' +CHECK-DAG: - 'NZCV=0x0' CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s b/llvm/test/tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s new file mode 100644 index 0000000000000..1e49378035d03 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s @@ -0,0 +1,90 @@ +# REQUIRES: aarch64-registered-target, exegesis-can-measure-latency + +# LLVM-EXEGESIS-MEM-DEF test_mem 4096 16 +# LLVM-EXEGESIS-MEM-MAP test_mem 140737488093184 +# LLVM-EXEGESIS-DEFREG X0 65536 +# LLVM-EXEGESIS-DEFREG X1 0 +.arch armv8-a+sve + +# memory location = VAddressSpaceCeiling - Pagesize * var +# Aux memory loc = 0x0x800000000000 - 0x10000 * 2 = 0x7ffffffe0000 +mov x0, 140737488224256 +ldr x1, [x0, #0] + +# specific mem loc = 0x0x800000000000 - 0x10000 * 4 = 0x7ffffffc0000 +mov x0, 140737488093184 +ldr x1, [x0, #0] + + +# RUN: llvm-exegesis --mtriple=aarch64 --mcpu=neoverse-v2 --execution-mode=subprocess \ +# RUN: --mode=inverse_throughput --benchmark-phase=assemble-measured-code \ +# RUN: --dump-object-to-disk=%t.o --min-instructions=1 --snippets-file=%s 2>&1 + +# RUN: llvm-objdump -d %t.o > %t.disasm +# RUN: FileCheck %s --check-prefix=CHECK_SYSCALLS < %t.disasm + +# CHECK_SYSCALLS: : + +# Check for aux memory mapping syscall (syscall number 222/0xde) +# CHECK_SYSCALLS: mov x0, #0x7ffffffe0000 +# CHECK_SYSCALLS-NEXT: mov x1, #0x1000 +# CHECK_SYSCALLS-NEXT: mov x2, #0x3 +# CHECK_SYSCALLS-NEXT: mov x3, #0x21 +# CHECK_SYSCALLS-NEXT: movk x3, #0x10, lsl #16 +# CHECK_SYSCALLS-NEXT: mov x4, #-0x1 +# CHECK_SYSCALLS-NEXT: mov x5, #0x0 +# CHECK_SYSCALLS-NEXT: mov x8, #0xde +# CHECK_SYSCALLS-NEXT: svc #0 + +# CHECK_SYSCALLS: str x0, [sp, #-0x10]! + +# Check for specific memory mapping syscall +# CHECK_SYSCALLS: mov x0, #0x7ffffffc0000 +# CHECK_SYSCALLS-NEXT: mov x1, #0x1000 +# CHECK_SYSCALLS-NEXT: mov x2, #0x3 +# CHECK_SYSCALLS-NEXT: mov x3, #0x21 +# CHECK_SYSCALLS-NEXT: movk x3, #0x10, lsl #16 +# CHECK_SYSCALLS-NEXT: mov x4, #-0x1 +# CHECK_SYSCALLS-NEXT: mov x5, #0x0 +# CHECK_SYSCALLS-NEXT: mov x8, #0xde +# CHECK_SYSCALLS-NEXT: svc #0 + +# CHECK_SYSCALLS: ldr x0, [sp], #0x10 +# CHECK_SYSCALLS: mov x1, #0x0 + +# Check for performance counter control syscalls (ioctl - syscall number 29/0x1d) +# CHECK_SYSCALLS: str x8, [sp, #-0x10]! +# CHECK_SYSCALLS-NEXT: str x0, [sp, #-0x10]! +# CHECK_SYSCALLS-NEXT: str x1, [sp, #-0x10]! +# CHECK_SYSCALLS-NEXT: str x2, [sp, #-0x10]! +# CHECK_SYSCALLS-NEXT: mov x16, #0x7ffffffe0000 +# CHECK_SYSCALLS-NEXT: ldr w0, [x16] +# CHECK_SYSCALLS-NEXT: mov x1, #0x2403 +# CHECK_SYSCALLS-NEXT: mov x2, #0x1 +# CHECK_SYSCALLS-NEXT: mov x8, #0x1d +# CHECK_SYSCALLS-NEXT: svc #0 +# CHECK_SYSCALLS-NEXT: ldr x2, [sp], #0x10 +# CHECK_SYSCALLS-NEXT: ldr x1, [sp], #0x10 +# CHECK_SYSCALLS-NEXT: ldr x0, [sp], #0x10 +# CHECK_SYSCALLS-NEXT: ldr x8, [sp], #0x10 + +# === Test instruction execution === +# CHECK_SYSCALLS: mov x0, #0x7ffffffe0000 +# CHECK_SYSCALLS-NEXT: ldr x1, [x0] +# CHECK_SYSCALLS-NEXT: mov x0, #0x7ffffffc0000 +# CHECK_SYSCALLS-NEXT: ldr x1, [x0] + +# === ioctl syscall - stop performance counters === +# CHECK_SYSCALLS: mov x16, #0x7ffffffe0000 +# CHECK_SYSCALLS-NEXT: ldr w0, [x16] +# CHECK_SYSCALLS-NEXT: mov x1, #0x2401 +# CHECK_SYSCALLS-NEXT: mov x2, #0x1 +# CHECK_SYSCALLS-NEXT: mov x8, #0x1d +# CHECK_SYSCALLS-NEXT: svc #0 + +# Check for process exit syscall (exit - syscall number 93/0x5d) +# CHECK_SYSCALLS: mov x0, #0x0 +# CHECK_SYSCALLS-NEXT: mov x8, #0x5d +# CHECK_SYSCALLS-NEXT: svc #0 + +# CHECK_SYSCALLS-NEXT: ret diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s index 3ef664f899551..bcd7792f17fd8 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s @@ -70,6 +70,6 @@ RUN: llvm-objdump -d %d > %t.s RUN: FileCheck %s --check-prefix=FPCR-ASM < %t.s FPCR-ASM: : FPCR-ASM: movi d{{[0-9]+}}, #0000000000000000 -FPCR-ASM-NEXT: mov x8, #0x0 -FPCR-ASM-NEXT: msr FPCR, x8 +FPCR-ASM-NEXT: mov x16, #0x0 +FPCR-ASM-NEXT: msr FPCR, x16 FPCR-ASM-NEXT: bfcvt h{{[0-9]+}}, s{{[0-9]+}} diff --git a/llvm/test/tools/llvm-exegesis/lit.local.cfg b/llvm/test/tools/llvm-exegesis/lit.local.cfg index 89110ed2816cd..cd16139e2c3c8 100644 --- a/llvm/test/tools/llvm-exegesis/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/lit.local.cfg @@ -18,9 +18,19 @@ def can_use_perf_counters(mode, extra_options=[]): if llvm_exegesis_exe is None: print("could not find llvm-exegesis") return False + + opcode_name = None + if "x86_64" in config.root.host_triple or "i386" in config.root.host_triple: + opcode_name = "ADD64rr" + elif "aarch64" in config.root.host_triple: + opcode_name = "ADDXrr" + else: + # FIXME: Add opcode_name for other architectures for testing + return False + try: return_code = subprocess.call( - [llvm_exegesis_exe, "-mode", mode, "-opcode-name=ADD64rr"] + [llvm_exegesis_exe, "-mode", mode, "-opcode-name=" + opcode_name] + extra_options, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 2c13dd514a744..588bb2e7aa4bb 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -6,11 +6,25 @@ // //===----------------------------------------------------------------------===// #include "../Target.h" +#include "../Error.h" +#include "../MmapUtils.h" +#include "../SerialSnippetGenerator.h" +#include "../SnippetGenerator.h" +#include "../SubprocessMemory.h" #include "AArch64.h" #include "AArch64RegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Process.h" +#include #if defined(__aarch64__) && defined(__linux__) +#include +#include +#ifdef HAVE_LIBPFM +#include +#endif // HAVE_LIBPFM #include // For PR_PAC_* constants #ifndef PR_PAC_APIAKEY #define PR_PAC_APIAKEY (1UL << 0) @@ -73,7 +87,7 @@ static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth, // Generates instructions to load an immediate value into an FPCR register. static std::vector loadFPCRImmediate(MCRegister Reg, unsigned RegBitWidth, const APInt &Value) { - MCRegister TempReg = AArch64::X8; + MCRegister TempReg = AArch64::X16; MCInst LoadImm = MCInstBuilder(AArch64::MOVi64imm).addReg(TempReg).addImm(0); MCInst MoveToFPCR = MCInstBuilder(AArch64::MSR).addImm(AArch64SysReg::FPCR).addReg(TempReg); @@ -106,6 +120,90 @@ static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth, return Instructions; } +static void generateRegisterStackPush(unsigned int RegToPush, + std::vector &GeneratedCode, + int imm = -16) { + // STR [X|W]t, [SP, #simm]!: SP is decremented by default 16 bytes + // before the store to maintain 16-bytes alignment. + if (AArch64::GPR64RegClass.contains(RegToPush)) + GeneratedCode.push_back(MCInstBuilder(AArch64::STRXpre) + .addReg(AArch64::SP) + .addReg(RegToPush) + .addReg(AArch64::SP) + .addImm(imm)); + else if (AArch64::GPR32RegClass.contains(RegToPush)) + GeneratedCode.push_back(MCInstBuilder(AArch64::STRWpre) + .addReg(AArch64::SP) + .addReg(RegToPush) + .addReg(AArch64::SP) + .addImm(imm)); + else + llvm_unreachable("Unsupported register class for stack push"); +} + +static void generateRegisterStackPop(unsigned int RegToPopTo, + std::vector &GeneratedCode, + int imm = 16) { + // LDR Xt, [SP], #simm: SP is incremented by default 16 bytes after the load. + if (AArch64::GPR64RegClass.contains(RegToPopTo)) + GeneratedCode.push_back(MCInstBuilder(AArch64::LDRXpost) + .addReg(AArch64::SP) + .addReg(RegToPopTo) + .addReg(AArch64::SP) + .addImm(imm)); + else if (AArch64::GPR32RegClass.contains(RegToPopTo)) + GeneratedCode.push_back(MCInstBuilder(AArch64::LDRWpost) + .addReg(AArch64::SP) + .addReg(RegToPopTo) + .addReg(AArch64::SP) + .addImm(imm)); + else + llvm_unreachable("Unsupported register class for stack pop"); +} + +void generateSysCall(long SyscallNumber, std::vector &GeneratedCode) { + // AArch64 Linux follows the AAPCS (ARM Architecture Procedure Call Standard): + // - X8 register contains the system call number + // - X0-X5 registers contain the first 6 arguments (if any) + // - SVC #0 instruction triggers the system call + // - Return value is placed in X0 register + GeneratedCode.push_back( + loadImmediate(AArch64::X8, 64, APInt(64, SyscallNumber))); + GeneratedCode.push_back(MCInstBuilder(AArch64::SVC).addImm(0)); +} + +/// Functions to save/restore system call registers +#if defined(__linux__) && defined(HAVE_LIBPFM) +constexpr std::array SyscallArgumentRegisters{ + AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, + AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7, +}; + +static void saveSyscallRegisters(std::vector &GeneratedCode, + unsigned ArgumentCount) { + // AArch64 follows the AAPCS (ARM Architecture Procedure Call Standard): + // X0-X7 registers contain the first 8 arguments. + assert(ArgumentCount <= 8 && + "This implementation saves up to 8 argument registers (X0-X7)"); + // Preserve X8 (used for the syscall number/return value). + generateRegisterStackPush(AArch64::X8, GeneratedCode); + // Preserve the registers used to pass arguments to the system call. + for (unsigned I = 0; I < ArgumentCount; ++I) { + generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode); + } +} + +static void restoreSyscallRegisters(std::vector &GeneratedCode, + unsigned ArgumentCount) { + assert(ArgumentCount <= 8 && + "This implementation restores up to 8 argument registers (X0-X7)"); + // Restore registers in reverse order + for (int I = ArgumentCount - 1; I >= 0; --I) { + generateRegisterStackPop(SyscallArgumentRegisters[I], GeneratedCode); + } + generateRegisterStackPop(AArch64::X8, GeneratedCode); +} +#endif // __linux__ && HAVE_LIBPFM #include "AArch64GenExegesis.inc" namespace { @@ -119,11 +217,43 @@ class ExegesisAArch64Target : public ExegesisTarget { ExegesisAArch64Target() : ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {} + enum ArgumentRegisters { + CodeSize = AArch64::X12, + AuxiliaryMemoryFD = AArch64::X13, + TempRegister = AArch64::X16, + }; + + std::vector _generateRegisterStackPop(MCRegister Reg, + int imm = 0) const override { + std::vector Insts; + if (AArch64::GPR32RegClass.contains(Reg) || + AArch64::GPR64RegClass.contains(Reg)) { + generateRegisterStackPop(Reg, Insts, imm); + return Insts; + } + return {}; + } + Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue, const BitVector &ForbiddenRegs) const override; private: +#ifdef __linux__ + std::vector generateExitSyscall(unsigned ExitCode) const override; + std::vector + generateMmap(uintptr_t Address, size_t Length, + uintptr_t FileDescriptorAddress) const override; + void generateMmapAuxMem(std::vector &GeneratedCode) const override; + std::vector generateMemoryInitialSetup() const override; + std::vector setStackRegisterToAuxMem() const override; + uintptr_t getAuxiliaryMemoryStartAddress() const override; + std::vector configurePerfCounter(long Request, + bool SaveRegisters) const override; + std::vector getArgumentRegisters() const override; + std::vector getRegistersNeedSaving() const override; +#endif // __linux__ + std::vector setRegTo(const MCSubtargetInfo &STI, MCRegister Reg, const APInt &Value) const override { if (AArch64::GPR32RegClass.contains(Reg)) @@ -238,6 +368,157 @@ Error ExegesisAArch64Target::randomizeTargetMCOperand( } // namespace +#ifdef __linux__ +static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000; + +std::vector +ExegesisAArch64Target::generateExitSyscall(unsigned ExitCode) const { + std::vector ExitCallCode; + ExitCallCode.push_back(loadImmediate(AArch64::X0, 64, APInt(64, ExitCode))); + generateSysCall(SYS_exit, ExitCallCode); // SYS_exit is 93 + return ExitCallCode; +} + +std::vector +ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, + uintptr_t FileDescriptorAddress) const { + // mmap(address, length, prot, flags, fd, offset=0) + int flags = MAP_SHARED; + int fd = -1; + if (fd == -1) { + dbgs() << "Warning: generateMmap using anonymous mapping\n"; + flags |= MAP_ANONYMOUS; + } + if (Address != 0) + flags |= MAP_FIXED_NOREPLACE; + std::vector MmapCode; + MmapCode.push_back( + loadImmediate(AArch64::X0, 64, APInt(64, Address))); // map adr + MmapCode.push_back( + loadImmediate(AArch64::X1, 64, APInt(64, Length))); // length + MmapCode.push_back(loadImmediate(AArch64::X2, 64, + APInt(64, PROT_READ | PROT_WRITE))); // prot + MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags + // FIXME: Loading [FileDescriptorAddress] as fd leds syscall to return error + MmapCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd + MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset + generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222 + return MmapCode; +} + +void ExegesisAArch64Target::generateMmapAuxMem( + std::vector &GeneratedCode) const { + int fd = -1; + int flags = MAP_SHARED; + uintptr_t address = getAuxiliaryMemoryStartAddress(); + if (fd == -1) { + dbgs() << "Warning: generateMmapAuxMem using anonymous mapping\n"; + flags |= MAP_ANONYMOUS; + } + if (address != 0) + flags |= MAP_FIXED_NOREPLACE; + int prot = PROT_READ | PROT_WRITE; + + GeneratedCode.push_back( + loadImmediate(AArch64::X0, 64, APInt(64, address))); // map adr + GeneratedCode.push_back(loadImmediate( + AArch64::X1, 64, + APInt(64, SubprocessMemory::AuxiliaryMemorySize))); // length + GeneratedCode.push_back( + loadImmediate(AArch64::X2, 64, APInt(64, prot))); // prot + GeneratedCode.push_back( + loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags + GeneratedCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd + GeneratedCode.push_back( + loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset + generateSysCall(SYS_mmap, GeneratedCode); // SYS_mmap is 222 +} + +std::vector ExegesisAArch64Target::generateMemoryInitialSetup() const { + std::vector MemoryInitialSetupCode; + generateMmapAuxMem(MemoryInitialSetupCode); + + // If using fixed address for auxiliary memory skip this step, + // When using dynamic memory allocation (non-fixed address), we must preserve + // the mmap return value (X0) which contains the allocated memory address. + // This value is saved to the stack to ensure registers requiring memory + // access can retrieve the correct address even if X0 is modified by + // intermediate code. + generateRegisterStackPush(AArch64::X0, MemoryInitialSetupCode); + // FIXME: Ensure stack pointer remains stable to prevent loss of saved address + return MemoryInitialSetupCode; +} + +std::vector ExegesisAArch64Target::setStackRegisterToAuxMem() const { + std::vector instructions; // NOP + // Motivation unclear, found no need for this in AArch64. + // TODO: Implement this, if required. + dbgs() << "Warning: setStackRegisterToAuxMem called but not required for " + "AArch64\n"; + return instructions; +} + +uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const { + // Return the second to last page in the virtual address space to try and + // prevent interference with memory annotations in the snippet + // FIXME: Why 2 pages? + return VAddressSpaceCeiling - (2 * llvm::sys::Process::getPageSizeEstimate()); +} + +std::vector +ExegesisAArch64Target::configurePerfCounter(long Request, + bool SaveRegisters) const { + std::vector ConfigurePerfCounterCode; +#ifdef HAVE_LIBPFM + if (SaveRegisters) + saveSyscallRegisters(ConfigurePerfCounterCode, 3); + + // Load actual file descriptor from auxiliary memory location [address + 0] + // CounterFileDescriptor was stored at AuxiliaryMemoryMapping[0] + dbgs() << "Warning: configurePerfCounter ioctl syscall failing\n"; + // FIXME: Ensure file descriptor is correctly populated at auxiliary memory + // address before ioctl syscall to avoid unreliable benchmark results + ConfigurePerfCounterCode.push_back( + loadImmediate(ArgumentRegisters::TempRegister, 64, + APInt(64, getAuxiliaryMemoryStartAddress()))); + ConfigurePerfCounterCode.push_back( + MCInstBuilder(AArch64::LDRWui) + .addReg(AArch64::W0) + .addReg(ArgumentRegisters::TempRegister) + .addImm(0)); + ConfigurePerfCounterCode.push_back( + loadImmediate(AArch64::X1, 64, APInt(64, Request))); // cmd + ConfigurePerfCounterCode.push_back( + loadImmediate(AArch64::X2, 64, APInt(64, PERF_IOC_FLAG_GROUP))); // arg + generateSysCall(SYS_ioctl, ConfigurePerfCounterCode); // SYS_ioctl is 29 + + if (SaveRegisters) + restoreSyscallRegisters(ConfigurePerfCounterCode, 3); +#endif + return ConfigurePerfCounterCode; +} + +std::vector ExegesisAArch64Target::getArgumentRegisters() const { + return {AArch64::X0, AArch64::X1}; +} + +std::vector ExegesisAArch64Target::getRegistersNeedSaving() const { + return { + AArch64::X0, + AArch64::X1, + AArch64::X2, + AArch64::X3, + AArch64::X4, + AArch64::X5, + AArch64::X8, + ArgumentRegisters::TempRegister, + ArgumentRegisters::CodeSize, + ArgumentRegisters::AuxiliaryMemoryFD, + }; +} + +#endif // __linux__ + static ExegesisTarget *getTheExegesisAArch64Target() { static ExegesisAArch64Target Target; return &Target; diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp index fd7924db08441..9259fd5dd7923 100644 --- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp +++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp @@ -78,6 +78,8 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, Register StackPointerRegister = BBF.MF.getSubtarget() .getTargetLowering() ->getStackPointerRegisterToSaveRestore(); + // FIXME: Only loading first register with memory address is hacky. + bool isFirstRegister = true; for (const RegisterValue &RV : Key.RegisterInitialValues) { if (GenerateMemoryInstructions) { // If we're generating memory instructions, don't load in the value for @@ -85,6 +87,13 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, // the setup. if (Register(RV.Register) == StackPointerRegister) continue; + auto StackLoadInsts = ET._generateRegisterStackPop(RV.Register, 16); + if (!StackLoadInsts.empty() && isFirstRegister) { + for (const auto &Inst : StackLoadInsts) + BBF.addInstruction(Inst); + isFirstRegister = false; + continue; + } } // Load a constant in the register. const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value); diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h index 77fbaa6e95412..0304908cbb2b2 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -308,6 +308,11 @@ class ExegesisTarget { return std::make_unique(); } + virtual std::vector _generateRegisterStackPop(MCRegister Reg, + int imm = 0) const { + return {}; + } + private: virtual bool matchesArch(Triple::ArchType Arch) const = 0;