From 5ec0a438512f4932a5313ffc8bceb050b48e9b3b Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 19 Jun 2025 04:49:31 -0700 Subject: [PATCH 01/39] [llvm-exegesis] [AArch64] Use X16 instead of X8 - Switched X16 as temporary register in loadFPCRImmediate instead of X8 which is used by syscalls - Updated Testcase with hardcoded reg number. --- llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s | 4 ++-- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s index 3ef664f899551..bcd7792f17fd8 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/setReg_init_check.s @@ -70,6 +70,6 @@ RUN: llvm-objdump -d %d > %t.s RUN: FileCheck %s --check-prefix=FPCR-ASM < %t.s FPCR-ASM: : FPCR-ASM: movi d{{[0-9]+}}, #0000000000000000 -FPCR-ASM-NEXT: mov x8, #0x0 -FPCR-ASM-NEXT: msr FPCR, x8 +FPCR-ASM-NEXT: mov x16, #0x0 +FPCR-ASM-NEXT: msr FPCR, x16 FPCR-ASM-NEXT: bfcvt h{{[0-9]+}}, s{{[0-9]+}} diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 3a0021e3c132d..b1333dc8081b2 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -72,7 +72,7 @@ static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth, // Generates instructions to load an immediate value into an FPCR register. static std::vector loadFPCRImmediate(MCRegister Reg, unsigned RegBitWidth, const APInt &Value) { - MCRegister TempReg = AArch64::X8; + MCRegister TempReg = AArch64::X16; MCInst LoadImm = MCInstBuilder(AArch64::MOVi64imm).addReg(TempReg).addImm(0); MCInst MoveToFPCR = MCInstBuilder(AArch64::MSR).addImm(AArch64SysReg::FPCR).addReg(TempReg); From a75c835be316a7a060d1b0a2dfd79f17eee6ea67 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 19 Jun 2025 04:57:21 -0700 Subject: [PATCH 02/39] [llvm-exegesis] [AArch64] Add helpers to push/pop GPRs and save/restore syscall registers and syscall generator --- .../llvm-exegesis/lib/AArch64/Target.cpp | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index b1333dc8081b2..7576b5f08f9c3 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -105,6 +105,89 @@ static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth, return Instructions; } +static void generateRegisterStackPush(unsigned int RegToPush, + std::vector &GeneratedCode, + int imm = -16) { + // STR [X|W]t, [SP, #simm]!: SP is decremented by default 16 bytes + // before the store to maintain 16-bytes alignment. + if (AArch64::GPR64RegClass.contains(RegToPush)) { + GeneratedCode.push_back(MCInstBuilder(AArch64::STRXpre) + .addReg(AArch64::SP) + .addReg(RegToPush) + .addReg(AArch64::SP) + .addImm(imm)); + } else if (AArch64::GPR32RegClass.contains(RegToPush)) { + GeneratedCode.push_back(MCInstBuilder(AArch64::STRWpre) + .addReg(AArch64::SP) + .addReg(RegToPush) + .addReg(AArch64::SP) + .addImm(imm)); + } else { + llvm_unreachable("Unsupported register class for stack push"); + } +} + +static void generateRegisterStackPop(unsigned int RegToPopTo, + std::vector &GeneratedCode, + int imm = 16) { + // LDR Xt, [SP], #simm: SP is incremented by default 16 bytes after the load. + if (AArch64::GPR64RegClass.contains(RegToPopTo)) { + GeneratedCode.push_back(MCInstBuilder(AArch64::LDRXpost) + .addReg(AArch64::SP) + .addReg(RegToPopTo) + .addReg(AArch64::SP) + .addImm(imm)); + } else if (AArch64::GPR32RegClass.contains(RegToPopTo)) { + GeneratedCode.push_back(MCInstBuilder(AArch64::LDRWpost) + .addReg(AArch64::SP) + .addReg(RegToPopTo) + .addReg(AArch64::SP) + .addImm(imm)); + } else { + llvm_unreachable("Unsupported register class for stack pop"); + } +} + +void generateSysCall(long SyscallNumber, std::vector &GeneratedCode) { + GeneratedCode.push_back( + loadImmediate(AArch64::X8, 64, APInt(64, SyscallNumber))); + GeneratedCode.push_back(MCInstBuilder(AArch64::SVC).addImm(0)); +} + +/// Functions to save/restore system call registers +#ifdef __linux__ +constexpr std::array SyscallArgumentRegisters{ + AArch64::X0, AArch64::X1, AArch64::X2, + AArch64::X3, AArch64::X4, AArch64::X5, +}; + +static void saveSysCallRegisters(std::vector &GeneratedCode, + unsigned ArgumentCount) { + // AArch64 Linux typically uses X0-X5 for the first 6 arguments. + // Some syscalls can take up to 8 arguments in X0-X7. + assert(ArgumentCount <= 6 && + "This implementation saves up to 6 argument registers (X0-X5)"); + // generateRegisterStackPush(ArgumentRegisters::TempRegister, GeneratedCode); + // Preserve X8 (used for the syscall number/return value). + generateRegisterStackPush(AArch64::X8, GeneratedCode); + // Preserve the registers used to pass arguments to the system call. + for (unsigned I = 0; I < ArgumentCount; ++I) { + generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode); + } +} + +static void restoreSysCallRegisters(std::vector &GeneratedCode, + unsigned ArgumentCount) { + assert(ArgumentCount <= 6 && + "This implementation restores up to 6 argument registers (X0-X5)"); + // Restore argument registers, in opposite order of the way they are saved. + for (int I = ArgumentCount - 1; I >= 0; --I) { + generateRegisterStackPop(SyscallArgumentRegisters[I], GeneratedCode); + } + generateRegisterStackPop(AArch64::X8, GeneratedCode); + // generateRegisterStackPop(ArgumentRegisters::TempRegister, GeneratedCode); +} +#endif // __linux__ #include "AArch64GenExegesis.inc" namespace { From 21cd6535f5fc4c5b4e6fa35e839abfb91fa3184b Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 19 Jun 2025 05:00:21 -0700 Subject: [PATCH 03/39] [llvm-exegesis] [AArch64] Implement memory management required functions --- .../llvm-exegesis/lib/AArch64/Target.cpp | 207 +++++++++++++++++- llvm/tools/llvm-exegesis/lib/Target.h | 4 + 2 files changed, 210 insertions(+), 1 deletion(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 7576b5f08f9c3..4fdf1a56398e2 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -6,11 +6,34 @@ // //===----------------------------------------------------------------------===// #include "../Target.h" +#include "../Error.h" +#include "../MmapUtils.h" +#include "../SerialSnippetGenerator.h" +#include "../SnippetGenerator.h" +#include "../SubprocessMemory.h" #include "AArch64.h" #include "AArch64RegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCRegisterInfo.h" +#include +#define DEBUG_TYPE "exegesis-aarch64-target" #if defined(__aarch64__) && defined(__linux__) -#include // For PR_PAC_* constants +#include +#include +#include // for getpagesize() +#ifdef HAVE_LIBPFM +#include +#endif // HAVE_LIBPFM +#include // For PR_PAC_* constants +#include +#ifndef PR_PAC_SET_ENABLED_KEYS +#define PR_PAC_SET_ENABLED_KEYS 60 +#endif +#ifndef PR_PAC_GET_ENABLED_KEYS +#define PR_PAC_GET_ENABLED_KEYS 61 +#endif #ifndef PR_PAC_APIAKEY #define PR_PAC_APIAKEY (1UL << 0) #endif @@ -197,7 +220,39 @@ class ExegesisAArch64Target : public ExegesisTarget { ExegesisAArch64Target() : ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {} + enum ArgumentRegisters { + CodeSize = AArch64::X12, + AuxiliaryMemoryFD = AArch64::X13, + TempRegister = AArch64::X16, + }; + + std::vector _generateRegisterStackPop(MCRegister Reg, + int imm = 0) const override { + std::vector Insts; + if (AArch64::GPR32RegClass.contains(Reg) || + AArch64::GPR64RegClass.contains(Reg)) { + generateRegisterStackPop(Reg, Insts, imm); + return Insts; + } + return {}; + } + private: +#ifdef __linux__ + std::vector generateExitSyscall(unsigned ExitCode) const override; + std::vector + generateMmap(uintptr_t Address, size_t Length, + uintptr_t FileDescriptorAddress) const override; + void generateMmapAuxMem(std::vector &GeneratedCode) const override; + std::vector generateMemoryInitialSetup() const override; + std::vector setStackRegisterToAuxMem() const override; + uintptr_t getAuxiliaryMemoryStartAddress() const override; + std::vector configurePerfCounter(long Request, + bool SaveRegisters) const override; + std::vector getArgumentRegisters() const override; + std::vector getRegistersNeedSaving() const override; +#endif // __linux__ + std::vector setRegTo(const MCSubtargetInfo &STI, MCRegister Reg, const APInt &Value) const override { if (AArch64::GPR32RegClass.contains(Reg)) @@ -237,6 +292,156 @@ class ExegesisAArch64Target : public ExegesisTarget { } // namespace +#ifdef __linux__ +// true : let use of fixed address to Virtual Address Space Ceiling +// false: let kernel choose the address of the auxiliary memory +bool UseFixedAddress = true; + +static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000; + +static void generateRoundToNearestPage(unsigned int TargetRegister, + std::vector &GeneratedCode) { + int PageSizeShift = static_cast(round(log2(getpagesize()))); + // Round down to the nearest page by getting rid of the least significant bits + // representing location in the page. + + // Single instruction using AND with inverted mask (effectively BIC) + uint64_t BitsToClearMask = (1ULL << PageSizeShift) - 1; // 0xFFF + uint64_t AndMask = ~BitsToClearMask; // ...FFFFFFFFFFFF000 + GeneratedCode.push_back(MCInstBuilder(AArch64::ANDXri) + .addReg(TargetRegister) // Xd + .addReg(TargetRegister) // Xn + .addImm(AndMask) // imm bitmask + ); +} + +std::vector +ExegesisAArch64Target::generateExitSyscall(unsigned ExitCode) const { + std::vector ExitCallCode; + ExitCallCode.push_back(loadImmediate(AArch64::X0, 64, APInt(64, ExitCode))); + generateSysCall(SYS_exit, ExitCallCode); // SYS_exit is 93 + return ExitCallCode; +} + +std::vector +ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, + uintptr_t FileDescriptorAddress) const { + // mmap(address, length, prot, flags, fd, offset=0) + int flags = MAP_SHARED; + if (Address != 0) { + flags |= MAP_FIXED_NOREPLACE; + } + std::vector MmapCode; + MmapCode.push_back( + loadImmediate(AArch64::X0, 64, APInt(64, Address))); // map adr + MmapCode.push_back( + loadImmediate(AArch64::X1, 64, APInt(64, Length))); // length + MmapCode.push_back(loadImmediate(AArch64::X2, 64, + APInt(64, PROT_READ | PROT_WRITE))); // prot + MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags + // FIXME: File descriptor address is not initialized. + // Copy file descriptor location from aux memory into X4 + MmapCode.push_back( + loadImmediate(AArch64::X4, 64, APInt(64, FileDescriptorAddress))); // fd + // Dereference file descriptor into FD argument register + // MmapCode.push_back(MCInstBuilder(AArch64::LDRWui) + // .addReg(AArch64::W4) // Destination register + // .addReg(AArch64::X4) // Base register (address) + // .addImm(0)); // Offset (-byte words) + // FIXME: This is not correct. + MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset + generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222 + return MmapCode; +} + +void ExegesisAArch64Target::generateMmapAuxMem( + std::vector &GeneratedCode) const { + int fd = -1; + int flags = MAP_SHARED; + uintptr_t address = getAuxiliaryMemoryStartAddress(); + if (fd == -1) + flags |= MAP_ANONYMOUS; + if (address != 0) + flags |= MAP_FIXED_NOREPLACE; + int prot = PROT_READ | PROT_WRITE; + + GeneratedCode.push_back( + loadImmediate(AArch64::X0, 64, APInt(64, address))); // map adr + GeneratedCode.push_back(loadImmediate( + AArch64::X1, 64, + APInt(64, SubprocessMemory::AuxiliaryMemorySize))); // length + GeneratedCode.push_back( + loadImmediate(AArch64::X2, 64, APInt(64, prot))); // prot + GeneratedCode.push_back( + loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags + GeneratedCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd + GeneratedCode.push_back( + loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset + generateSysCall(SYS_mmap, GeneratedCode); // SYS_mmap is 222 +} + +std::vector ExegesisAArch64Target::generateMemoryInitialSetup() const { + std::vector MemoryInitialSetupCode; + generateMmapAuxMem(MemoryInitialSetupCode); // FIXME: Uninit file descriptor + + // If using fixed address for auxiliary memory skip this step, + // When using dynamic memory allocation (non-fixed address), we must preserve + // the mmap return value (X0) which contains the allocated memory address. + // This value is saved to the stack to ensure registers requiring memory + // access can retrieve the correct address even if X0 is modified by + // intermediate code. + generateRegisterStackPush(AArch64::X0, MemoryInitialSetupCode); + // FIXME: Ensure stack pointer remains stable to prevent loss of saved address + return MemoryInitialSetupCode; +} + +std::vector ExegesisAArch64Target::setStackRegisterToAuxMem() const { + std::vector instructions; // NOP + // TODO: Implement this, Found no need for this in AArch64. + return instructions; +} + +uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const { + if (!UseFixedAddress) + // Allow kernel to select an appropriate memory address + return 0; + // Return the second to last page in the virtual address space + // to try and prevent interference with memory annotations in the snippet + // VAddressSpaceCeiling = 0x0000800000000000 + // FIXME: Why 2 pages? + return VAddressSpaceCeiling - (2 * getpagesize()); +} + +std::vector +ExegesisAArch64Target::configurePerfCounter(long Request, + bool SaveRegisters) const { + std::vector ConfigurePerfCounterCode; // NOP + // FIXME: SYSCALL exits with EBADF error - file descriptor is invalid + // No file is opened previosly to add as file descriptor + return ConfigurePerfCounterCode; +} + +std::vector ExegesisAArch64Target::getArgumentRegisters() const { + return {AArch64::X0, AArch64::X1}; +} + +std::vector ExegesisAArch64Target::getRegistersNeedSaving() const { + return { + AArch64::X0, + AArch64::X1, + AArch64::X2, + AArch64::X3, + AArch64::X4, + AArch64::X5, + AArch64::X8, + ArgumentRegisters::TempRegister, + ArgumentRegisters::CodeSize, + ArgumentRegisters::AuxiliaryMemoryFD, + }; +} + +#endif // __linux__ + static ExegesisTarget *getTheExegesisAArch64Target() { static ExegesisAArch64Target Target; return &Target; diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h index 77fbaa6e95412..736c9d9ff6c23 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -308,6 +308,10 @@ class ExegesisTarget { return std::make_unique(); } + virtual std::vector _generateRegisterStackPop(MCRegister Reg, int imm = 0) const { + return {}; + } + private: virtual bool matchesArch(Triple::ArchType Arch) const = 0; From 50c07dbaec5f2db052ee2f2c2bd0dfa8db6a6464 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 19 Jun 2025 06:16:21 -0700 Subject: [PATCH 04/39] [llvm-exegesis] [AArch64] Implement different register initialization for subprocess execution mode --- llvm/tools/llvm-exegesis/lib/Assembler.cpp | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp index fd7924db08441..a73eaf76a46d7 100644 --- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp +++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp @@ -66,6 +66,8 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, assert(MM.Address % getpagesize() == 0 && "Memory mappings need to be aligned to page boundaries."); #endif + // FIXME: file descriptor for aux memory seems not initialized. + // TODO: Invoke openat syscall to get correct fd for aux memory const MemoryValue &MemVal = Key.MemoryValues.at(MM.MemoryValueName); BBF.addInstructions(ET.generateMmap( MM.Address, MemVal.SizeBytes, @@ -78,15 +80,47 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, Register StackPointerRegister = BBF.MF.getSubtarget() .getTargetLowering() ->getStackPointerRegisterToSaveRestore(); +#define DEBUG_TYPE "register-initial-values" + // FIXME: Only loading first register with memory address is hacky. + bool isFirstRegister = true; for (const RegisterValue &RV : Key.RegisterInitialValues) { + // Debug: register name and class name and value from BenchmarkKey + const MCRegisterInfo *RegInfo = BBF.MF.getTarget().getMCRegisterInfo(); + const char *RegName = RegInfo->getName(RV.Register); + const char *regClassName = "Unknown"; + for (unsigned i = 0, e = RegInfo->getNumRegClasses(); i < e; ++i) { + const MCRegisterClass &RC = RegInfo->getRegClass(i); + if (RC.contains(RV.Register)) { + regClassName = RegInfo->getRegClassName(&RC); + break; + } + } + LLVM_DEBUG( + dbgs() << "Setting register (Class: " << regClassName << ") " << RegName + << std::string( + std::max(0, 3 - static_cast(strlen(RegName))), ' ')); + if (GenerateMemoryInstructions) { // If we're generating memory instructions, don't load in the value for // the register with the stack pointer as it will be used later to finish // the setup. if (Register(RV.Register) == StackPointerRegister) continue; +#if defined(__aarch64__) + auto StackLoadInsts = ET._generateRegisterStackPop(RV.Register, 16); + if (!StackLoadInsts.empty() && isFirstRegister) { + for (const auto &Inst : StackLoadInsts) + BBF.addInstruction(Inst); + isFirstRegister = false; + LLVM_DEBUG(dbgs() << "from stack with post-increment offset of " << 16 + << " bytes\n"); + continue; + } +#endif } // Load a constant in the register. + LLVM_DEBUG(dbgs() << " to " << RV.Value << "\n"); +#undef DEBUG_TYPE const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value); if (SetRegisterCode.empty()) IsSnippetSetupComplete = false; From 01a03a1c1ca78a41e00d408bea4fcbe197c3dfba Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 19 Jun 2025 06:23:19 -0700 Subject: [PATCH 05/39] [llvm-exegesis] [AArch64] Resolve Merge Conflict coming from reverted #136868 --- .../llvm-exegesis/lib/AArch64/Target.cpp | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 4fdf1a56398e2..ec1e667b1f629 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -28,12 +28,6 @@ #endif // HAVE_LIBPFM #include // For PR_PAC_* constants #include -#ifndef PR_PAC_SET_ENABLED_KEYS -#define PR_PAC_SET_ENABLED_KEYS 60 -#endif -#ifndef PR_PAC_GET_ENABLED_KEYS -#define PR_PAC_GET_ENABLED_KEYS 61 -#endif #ifndef PR_PAC_APIAKEY #define PR_PAC_APIAKEY (1UL << 0) #endif @@ -288,6 +282,35 @@ class ExegesisAArch64Target : public ExegesisTarget { // Function return is a pseudo-instruction that needs to be expanded PM.add(createAArch64ExpandPseudoPass()); } + + const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State, + unsigned Opcode) const override { + if (const char *Reason = + ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode)) + return Reason; + + if (isPointerAuth(Opcode)) { +#if defined(__aarch64__) && defined(__linux__) + // Disable all PAC keys. Note that while we expect the measurements to + // be the same with PAC keys disabled, they could potentially be lower + // since authentication checks are bypassed. + if (prctl(PR_PAC_SET_ENABLED_KEYS, + PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | + PR_PAC_APDBKEY, // all keys + 0, // disable all + 0, 0) < 0) { + return "Failed to disable PAC keys"; + } +#else + return "Unsupported opcode: isPointerAuth"; +#endif + } + + if (isLoadTagMultiple(Opcode)) + return "Unsupported opcode: load tag multiple"; + + return nullptr; + } }; } // namespace From 412c9caa86d8586d4419c4334646a7186bd81f7a Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 19 Jun 2025 06:58:45 -0700 Subject: [PATCH 06/39] [llvm-exegesis] [AArch64] Format changes --- .../llvm-exegesis/lib/AArch64/Target.cpp | 32 +------------------ llvm/tools/llvm-exegesis/lib/Target.h | 3 +- 2 files changed, 3 insertions(+), 32 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index ec1e667b1f629..c8613f360b376 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -223,7 +223,7 @@ class ExegesisAArch64Target : public ExegesisTarget { std::vector _generateRegisterStackPop(MCRegister Reg, int imm = 0) const override { std::vector Insts; - if (AArch64::GPR32RegClass.contains(Reg) || + if (AArch64::GPR32RegClass.contains(Reg) || AArch64::GPR64RegClass.contains(Reg)) { generateRegisterStackPop(Reg, Insts, imm); return Insts; @@ -283,36 +283,6 @@ class ExegesisAArch64Target : public ExegesisTarget { PM.add(createAArch64ExpandPseudoPass()); } - const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State, - unsigned Opcode) const override { - if (const char *Reason = - ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode)) - return Reason; - - if (isPointerAuth(Opcode)) { -#if defined(__aarch64__) && defined(__linux__) - // Disable all PAC keys. Note that while we expect the measurements to - // be the same with PAC keys disabled, they could potentially be lower - // since authentication checks are bypassed. - if (prctl(PR_PAC_SET_ENABLED_KEYS, - PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | - PR_PAC_APDBKEY, // all keys - 0, // disable all - 0, 0) < 0) { - return "Failed to disable PAC keys"; - } -#else - return "Unsupported opcode: isPointerAuth"; -#endif - } - - if (isLoadTagMultiple(Opcode)) - return "Unsupported opcode: load tag multiple"; - - return nullptr; - } -}; - } // namespace #ifdef __linux__ diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h index 736c9d9ff6c23..0304908cbb2b2 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -308,7 +308,8 @@ class ExegesisTarget { return std::make_unique(); } - virtual std::vector _generateRegisterStackPop(MCRegister Reg, int imm = 0) const { + virtual std::vector _generateRegisterStackPop(MCRegister Reg, + int imm = 0) const { return {}; } From 9d7efdbc155eab21b84bd7c481481b5aff6c6002 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 19 Jun 2025 08:45:38 -0700 Subject: [PATCH 07/39] [llvm-exegesis] [AArch64] Fix missing closing brace in Target.cpp --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index c8613f360b376..c3bd18c3a8440 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -282,6 +282,7 @@ class ExegesisAArch64Target : public ExegesisTarget { // Function return is a pseudo-instruction that needs to be expanded PM.add(createAArch64ExpandPseudoPass()); } +}; } // namespace From be42cd7a92f8e5068b43b7d9b87eafa2219740a8 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Tue, 1 Jul 2025 09:42:50 -0700 Subject: [PATCH 08/39] [llvm-exegesis] Renamed `ArgumentRegisters` enum to `ReservedRegisters` for clarity. --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 8 ++++---- llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index c3bd18c3a8440..8cfce2d7af682 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -214,7 +214,7 @@ class ExegesisAArch64Target : public ExegesisTarget { ExegesisAArch64Target() : ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {} - enum ArgumentRegisters { + enum ReservedRegisters { CodeSize = AArch64::X12, AuxiliaryMemoryFD = AArch64::X13, TempRegister = AArch64::X16, @@ -428,9 +428,9 @@ std::vector ExegesisAArch64Target::getRegistersNeedSaving() const { AArch64::X4, AArch64::X5, AArch64::X8, - ArgumentRegisters::TempRegister, - ArgumentRegisters::CodeSize, - ArgumentRegisters::AuxiliaryMemoryFD, + ReservedRegisters::TempRegister, + ReservedRegisters::CodeSize, + ReservedRegisters::AuxiliaryMemoryFD, }; } diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 5dae6c0a25fab..a13bcc595efc6 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -725,7 +725,7 @@ class ExegesisX86Target : public ExegesisTarget { ProcessID); } - enum ArgumentRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 }; + enum ReservedRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 }; private: void addTargetSpecificPasses(PassManagerBase &PM) const override; @@ -1166,7 +1166,7 @@ void ExegesisX86Target::generateUpperMunmap( // Load in the size of the snippet to RDI from from the argument register. GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) .addReg(X86::RDI) - .addReg(ArgumentRegisters::CodeSize)); + .addReg(ReservedRegisters::CodeSize)); // Add the length of the snippet (in %RDI) to the current instruction pointer // (%R8) to get the address where we should start unmapping at. GeneratedCode.push_back(MCInstBuilder(X86::ADD64rr) @@ -1236,7 +1236,7 @@ void ExegesisX86Target::generateMmapAuxMem( loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) .addReg(X86::R8) - .addReg(ArgumentRegisters::AuxiliaryMemoryFD)); + .addReg(ReservedRegisters::AuxiliaryMemoryFD)); GeneratedCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0))); generateSyscall(SYS_mmap, GeneratedCode); } @@ -1244,10 +1244,10 @@ void ExegesisX86Target::generateMmapAuxMem( void ExegesisX86Target::moveArgumentRegisters( std::vector &GeneratedCode) const { GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) - .addReg(ArgumentRegisters::CodeSize) + .addReg(ReservedRegisters::CodeSize) .addReg(X86::RDI)); GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) - .addReg(ArgumentRegisters::AuxiliaryMemoryFD) + .addReg(ReservedRegisters::AuxiliaryMemoryFD) .addReg(X86::RSI)); } From ce64b4732550f681d0a25649f9f799f6cbe97f65 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Tue, 1 Jul 2025 09:49:48 -0700 Subject: [PATCH 09/39] [llvm-exegesis] [AArch64] Refactor stack push/pop functions for clarity and update syscall argument handling and descriptive comments --- .../llvm-exegesis/lib/AArch64/Target.cpp | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 8cfce2d7af682..bb09f8adfa10e 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -166,6 +166,11 @@ static void generateRegisterStackPop(unsigned int RegToPopTo, } void generateSysCall(long SyscallNumber, std::vector &GeneratedCode) { + // AArch64 Linux follows the AAPCS (ARM Architecture Procedure Call Standard): + // - X8 register contains the system call number + // - X0-X5 registers contain the first 6 arguments (if any) + // - SVC #0 instruction triggers the system call + // - Return value is placed in X0 register GeneratedCode.push_back( loadImmediate(AArch64::X8, 64, APInt(64, SyscallNumber))); GeneratedCode.push_back(MCInstBuilder(AArch64::SVC).addImm(0)); @@ -173,18 +178,17 @@ void generateSysCall(long SyscallNumber, std::vector &GeneratedCode) { /// Functions to save/restore system call registers #ifdef __linux__ -constexpr std::array SyscallArgumentRegisters{ - AArch64::X0, AArch64::X1, AArch64::X2, - AArch64::X3, AArch64::X4, AArch64::X5, +constexpr std::array SyscallArgumentRegisters{ + AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, + AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7, }; static void saveSysCallRegisters(std::vector &GeneratedCode, unsigned ArgumentCount) { - // AArch64 Linux typically uses X0-X5 for the first 6 arguments. - // Some syscalls can take up to 8 arguments in X0-X7. - assert(ArgumentCount <= 6 && - "This implementation saves up to 6 argument registers (X0-X5)"); - // generateRegisterStackPush(ArgumentRegisters::TempRegister, GeneratedCode); + // AArch64 follows the AAPCS (ARM Architecture Procedure Call Standard): + // X0-X7 registers contain the first 8 arguments. + assert(ArgumentCount <= 8 && + "This implementation saves up to 8 argument registers (X0-X7)"); // Preserve X8 (used for the syscall number/return value). generateRegisterStackPush(AArch64::X8, GeneratedCode); // Preserve the registers used to pass arguments to the system call. @@ -195,14 +199,13 @@ static void saveSysCallRegisters(std::vector &GeneratedCode, static void restoreSysCallRegisters(std::vector &GeneratedCode, unsigned ArgumentCount) { - assert(ArgumentCount <= 6 && - "This implementation restores up to 6 argument registers (X0-X5)"); - // Restore argument registers, in opposite order of the way they are saved. + assert(ArgumentCount <= 8 && + "This implementation restores up to 8 argument registers (X0-X7)"); + // Restore registers in reverse order for (int I = ArgumentCount - 1; I >= 0; --I) { generateRegisterStackPop(SyscallArgumentRegisters[I], GeneratedCode); } generateRegisterStackPop(AArch64::X8, GeneratedCode); - // generateRegisterStackPop(ArgumentRegisters::TempRegister, GeneratedCode); } #endif // __linux__ #include "AArch64GenExegesis.inc" From 5133a050a4e986f87deeb4d637243f69ca224b66 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Tue, 1 Jul 2025 09:51:46 -0700 Subject: [PATCH 10/39] [llvm-exegesis] [AArch64] Removed if-else block brackets --- .../llvm-exegesis/lib/AArch64/Target.cpp | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index bb09f8adfa10e..50018831c1923 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -127,42 +127,40 @@ static void generateRegisterStackPush(unsigned int RegToPush, int imm = -16) { // STR [X|W]t, [SP, #simm]!: SP is decremented by default 16 bytes // before the store to maintain 16-bytes alignment. - if (AArch64::GPR64RegClass.contains(RegToPush)) { + if (AArch64::GPR64RegClass.contains(RegToPush)) GeneratedCode.push_back(MCInstBuilder(AArch64::STRXpre) .addReg(AArch64::SP) .addReg(RegToPush) .addReg(AArch64::SP) .addImm(imm)); - } else if (AArch64::GPR32RegClass.contains(RegToPush)) { + else if (AArch64::GPR32RegClass.contains(RegToPush)) GeneratedCode.push_back(MCInstBuilder(AArch64::STRWpre) .addReg(AArch64::SP) .addReg(RegToPush) .addReg(AArch64::SP) .addImm(imm)); - } else { + else llvm_unreachable("Unsupported register class for stack push"); - } } static void generateRegisterStackPop(unsigned int RegToPopTo, std::vector &GeneratedCode, int imm = 16) { // LDR Xt, [SP], #simm: SP is incremented by default 16 bytes after the load. - if (AArch64::GPR64RegClass.contains(RegToPopTo)) { + if (AArch64::GPR64RegClass.contains(RegToPopTo)) GeneratedCode.push_back(MCInstBuilder(AArch64::LDRXpost) .addReg(AArch64::SP) .addReg(RegToPopTo) .addReg(AArch64::SP) .addImm(imm)); - } else if (AArch64::GPR32RegClass.contains(RegToPopTo)) { + else if (AArch64::GPR32RegClass.contains(RegToPopTo)) GeneratedCode.push_back(MCInstBuilder(AArch64::LDRWpost) .addReg(AArch64::SP) .addReg(RegToPopTo) .addReg(AArch64::SP) .addImm(imm)); - } else { + else llvm_unreachable("Unsupported register class for stack pop"); - } } void generateSysCall(long SyscallNumber, std::vector &GeneratedCode) { @@ -340,12 +338,6 @@ ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, // Copy file descriptor location from aux memory into X4 MmapCode.push_back( loadImmediate(AArch64::X4, 64, APInt(64, FileDescriptorAddress))); // fd - // Dereference file descriptor into FD argument register - // MmapCode.push_back(MCInstBuilder(AArch64::LDRWui) - // .addReg(AArch64::W4) // Destination register - // .addReg(AArch64::X4) // Base register (address) - // .addImm(0)); // Offset (-byte words) - // FIXME: This is not correct. MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222 return MmapCode; From c50890b9a984a07060b3077e3cabdc32debb935c Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Tue, 1 Jul 2025 09:52:33 -0700 Subject: [PATCH 11/39] [llvm-exegesis] [AArch64] Introduced warnings for unimplemented functions using dbgs --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 50018831c1923..3b7bfce1ae138 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -386,7 +386,10 @@ std::vector ExegesisAArch64Target::generateMemoryInitialSetup() const { std::vector ExegesisAArch64Target::setStackRegisterToAuxMem() const { std::vector instructions; // NOP - // TODO: Implement this, Found no need for this in AArch64. + // Motivation unclear, found no need for this in AArch64. + // TODO: Implement this, if required. + dbgs() << "Warning: setStackRegisterToAuxMem called but not required for " + "AArch64\n"; return instructions; } @@ -407,6 +410,8 @@ ExegesisAArch64Target::configurePerfCounter(long Request, std::vector ConfigurePerfCounterCode; // NOP // FIXME: SYSCALL exits with EBADF error - file descriptor is invalid // No file is opened previosly to add as file descriptor + errs() << "Warning: configurePerfCounter not implemented, measurements will " + "be unreliable\n"; return ConfigurePerfCounterCode; } From 1b0f4c7723d7aad37d95d049ee0f724179fc03fc Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Tue, 1 Jul 2025 10:12:10 -0700 Subject: [PATCH 12/39] [llvm-exegesis] [AArch64] refactor configurePerfCounter, errs to dbgs --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 3b7bfce1ae138..f7273f70ccf2a 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -410,7 +410,7 @@ ExegesisAArch64Target::configurePerfCounter(long Request, std::vector ConfigurePerfCounterCode; // NOP // FIXME: SYSCALL exits with EBADF error - file descriptor is invalid // No file is opened previosly to add as file descriptor - errs() << "Warning: configurePerfCounter not implemented, measurements will " + dbgs() << "Warning: configurePerfCounter not implemented, measurements will " "be unreliable\n"; return ConfigurePerfCounterCode; } From a53d5b08e1aeaa6f52fa8d293677721b7cf932a5 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 14 Aug 2025 04:16:37 -0700 Subject: [PATCH 13/39] [llvm-exegesis] [AArch64] Remove unneccessary AArch64 guard. --- llvm/tools/llvm-exegesis/lib/Assembler.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp index a73eaf76a46d7..67d78e82f411b 100644 --- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp +++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp @@ -106,7 +106,6 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, // the setup. if (Register(RV.Register) == StackPointerRegister) continue; -#if defined(__aarch64__) auto StackLoadInsts = ET._generateRegisterStackPop(RV.Register, 16); if (!StackLoadInsts.empty() && isFirstRegister) { for (const auto &Inst : StackLoadInsts) @@ -116,7 +115,6 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, << " bytes\n"); continue; } -#endif } // Load a constant in the register. LLVM_DEBUG(dbgs() << " to " << RV.Value << "\n"); From 4ab9412aa373985a225453cd785236034c69c8a9 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 14 Aug 2025 04:17:48 -0700 Subject: [PATCH 14/39] [llvm-exegesis] [AArch64] Remove register initial value debug info --- llvm/tools/llvm-exegesis/lib/Assembler.cpp | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp index 67d78e82f411b..6a9103198e2de 100644 --- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp +++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp @@ -80,26 +80,9 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, Register StackPointerRegister = BBF.MF.getSubtarget() .getTargetLowering() ->getStackPointerRegisterToSaveRestore(); -#define DEBUG_TYPE "register-initial-values" // FIXME: Only loading first register with memory address is hacky. bool isFirstRegister = true; for (const RegisterValue &RV : Key.RegisterInitialValues) { - // Debug: register name and class name and value from BenchmarkKey - const MCRegisterInfo *RegInfo = BBF.MF.getTarget().getMCRegisterInfo(); - const char *RegName = RegInfo->getName(RV.Register); - const char *regClassName = "Unknown"; - for (unsigned i = 0, e = RegInfo->getNumRegClasses(); i < e; ++i) { - const MCRegisterClass &RC = RegInfo->getRegClass(i); - if (RC.contains(RV.Register)) { - regClassName = RegInfo->getRegClassName(&RC); - break; - } - } - LLVM_DEBUG( - dbgs() << "Setting register (Class: " << regClassName << ") " << RegName - << std::string( - std::max(0, 3 - static_cast(strlen(RegName))), ' ')); - if (GenerateMemoryInstructions) { // If we're generating memory instructions, don't load in the value for // the register with the stack pointer as it will be used later to finish @@ -117,8 +100,6 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, } } // Load a constant in the register. - LLVM_DEBUG(dbgs() << " to " << RV.Value << "\n"); -#undef DEBUG_TYPE const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value); if (SetRegisterCode.empty()) IsSnippetSetupComplete = false; From 13d2a10a01bacf72b2c423e10a3b8839da9df9d3 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Sun, 17 Aug 2025 22:24:35 -0700 Subject: [PATCH 15/39] [llvm-exegesis] [AArch64] Remove additional register initial value debug info --- llvm/tools/llvm-exegesis/lib/Assembler.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp index 6a9103198e2de..a9d6125ca98a5 100644 --- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp +++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp @@ -94,8 +94,6 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, for (const auto &Inst : StackLoadInsts) BBF.addInstruction(Inst); isFirstRegister = false; - LLVM_DEBUG(dbgs() << "from stack with post-increment offset of " << 16 - << " bytes\n"); continue; } } From 1f66364b251ec41ab7a040b5d59f3ec94459309c Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Sun, 31 Aug 2025 22:52:23 -0700 Subject: [PATCH 16/39] [llvm-exegesis] Revert ReservedRegisters enum to ArgumentRegisters in AArch64 and X86 targets --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 8 ++++---- llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index f7273f70ccf2a..1138d04f84740 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -215,7 +215,7 @@ class ExegesisAArch64Target : public ExegesisTarget { ExegesisAArch64Target() : ExegesisTarget(AArch64CpuPfmCounters, AArch64_MC::isOpcodeAvailable) {} - enum ReservedRegisters { + enum ArgumentRegisters { CodeSize = AArch64::X12, AuxiliaryMemoryFD = AArch64::X13, TempRegister = AArch64::X16, @@ -428,9 +428,9 @@ std::vector ExegesisAArch64Target::getRegistersNeedSaving() const { AArch64::X4, AArch64::X5, AArch64::X8, - ReservedRegisters::TempRegister, - ReservedRegisters::CodeSize, - ReservedRegisters::AuxiliaryMemoryFD, + ArgumentRegisters::TempRegister, + ArgumentRegisters::CodeSize, + ArgumentRegisters::AuxiliaryMemoryFD, }; } diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index a13bcc595efc6..5dae6c0a25fab 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -725,7 +725,7 @@ class ExegesisX86Target : public ExegesisTarget { ProcessID); } - enum ReservedRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 }; + enum ArgumentRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 }; private: void addTargetSpecificPasses(PassManagerBase &PM) const override; @@ -1166,7 +1166,7 @@ void ExegesisX86Target::generateUpperMunmap( // Load in the size of the snippet to RDI from from the argument register. GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) .addReg(X86::RDI) - .addReg(ReservedRegisters::CodeSize)); + .addReg(ArgumentRegisters::CodeSize)); // Add the length of the snippet (in %RDI) to the current instruction pointer // (%R8) to get the address where we should start unmapping at. GeneratedCode.push_back(MCInstBuilder(X86::ADD64rr) @@ -1236,7 +1236,7 @@ void ExegesisX86Target::generateMmapAuxMem( loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) .addReg(X86::R8) - .addReg(ReservedRegisters::AuxiliaryMemoryFD)); + .addReg(ArgumentRegisters::AuxiliaryMemoryFD)); GeneratedCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0))); generateSyscall(SYS_mmap, GeneratedCode); } @@ -1244,10 +1244,10 @@ void ExegesisX86Target::generateMmapAuxMem( void ExegesisX86Target::moveArgumentRegisters( std::vector &GeneratedCode) const { GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) - .addReg(ReservedRegisters::CodeSize) + .addReg(ArgumentRegisters::CodeSize) .addReg(X86::RDI)); GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) - .addReg(ReservedRegisters::AuxiliaryMemoryFD) + .addReg(ArgumentRegisters::AuxiliaryMemoryFD) .addReg(X86::RSI)); } From eeb64273b7bbbcc759bc24aa1729cca2522f3bc9 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 4 Sep 2025 01:00:35 -0700 Subject: [PATCH 17/39] [llvm-exegesis] [AArch64] Removed kernal or fixed address option for auxiliary memory to only fixed address. --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 1138d04f84740..b25a43fcdbf9f 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -288,10 +288,6 @@ class ExegesisAArch64Target : public ExegesisTarget { } // namespace #ifdef __linux__ -// true : let use of fixed address to Virtual Address Space Ceiling -// false: let kernel choose the address of the auxiliary memory -bool UseFixedAddress = true; - static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000; static void generateRoundToNearestPage(unsigned int TargetRegister, @@ -394,12 +390,8 @@ std::vector ExegesisAArch64Target::setStackRegisterToAuxMem() const { } uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const { - if (!UseFixedAddress) - // Allow kernel to select an appropriate memory address - return 0; - // Return the second to last page in the virtual address space - // to try and prevent interference with memory annotations in the snippet - // VAddressSpaceCeiling = 0x0000800000000000 + // Return the second to last page in the virtual address space to try and + // prevent interference with memory annotations in the snippet // FIXME: Why 2 pages? return VAddressSpaceCeiling - (2 * getpagesize()); } From 63c199f5f0469dd6adf11b513953cdb559de2898 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 4 Sep 2025 01:02:22 -0700 Subject: [PATCH 18/39] [llvm-exegesis] [AArch64] setStackRegisterToAuxMem Implemention --- .../llvm-exegesis/lib/AArch64/Target.cpp | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index b25a43fcdbf9f..65c2eb0da7efc 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -386,6 +386,52 @@ std::vector ExegesisAArch64Target::setStackRegisterToAuxMem() const { // TODO: Implement this, if required. dbgs() << "Warning: setStackRegisterToAuxMem called but not required for " "AArch64\n"; + + const uint64_t targetSPVal = + getAuxiliaryMemoryStartAddress() + SubprocessMemory::AuxiliaryMemorySize; + // sub, stack args and local storage + // Use X16 as a temporary register since it's a scratch register + const MCRegister TempReg = AArch64::X16; + + // Load the 64-bit immediate into TempReg using MOVZ/MOVK sequence + // MOVZ Xd, #imm16, LSL #(shift_val * 16) + // MOVK Xd, #imm16, LSL #(shift_val * 16) (* 3 times for 64-bit immediate) + + // 1. MOVZ TmpReg, #(targetSPVal & 0xFFFF), LSL #0 + instructions.push_back(MCInstBuilder(AArch64::MOVZXi) + .addReg(TempReg) + .addImm(targetSPVal & 0xFFFF) // imm16 + .addImm(0)); // hw(shift/16) = 0 + // 2. MOVK TmpReg, #((targetSPVal >> 16) & 0xFFFF), LSL #16 + if (((targetSPVal >> 16) & 0xFFFF) != 0 || (targetSPVal > 0xFFFF)) { + instructions.push_back(MCInstBuilder(AArch64::MOVKXi) + .addReg(TempReg) + .addReg(TempReg) + .addImm((targetSPVal >> 16) & 0xFFFF) // imm16 + .addImm(1)); // hw(shift/16) = 1 + } + // 3. MOVK TmpReg, #((targetSPVal >> 32) & 0xFFFF), LSL #32 + if (((targetSPVal >> 32) & 0xFFFF) != 0 || (targetSPVal > 0xFFFFFFFF)) { + instructions.push_back(MCInstBuilder(AArch64::MOVKXi) + .addReg(TempReg) + .addReg(TempReg) + .addImm((targetSPVal >> 32) & 0xFFFF) // imm16 + .addImm(2)); // hw(shift/16) = 2 + } + // 4. MOVK TmpReg, #((targetSPVal >> 48) & 0xFFFF), LSL #48 + if (((targetSPVal >> 48) & 0xFFFF) != 0 || (targetSPVal > 0xFFFFFFFFFFFF)) { + instructions.push_back(MCInstBuilder(AArch64::MOVKXi) + .addReg(TempReg) + .addReg(TempReg) + .addImm((targetSPVal >> 48) & 0xFFFF) // imm16 + .addImm(3)); // hw(shift/16) = 3 + } + // Finally, move the value from TempReg to SP + instructions.push_back(MCInstBuilder(AArch64::ADDXri) // ADD SP, TempReg, #0 + .addReg(AArch64::SP) + .addReg(TempReg) + .addImm(0) // imm = 0 + .addImm(0)); // shift = 0 return instructions; } From a05f2a9323e424fb00f9a4a2424edf908deb34fc Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 4 Sep 2025 01:03:21 -0700 Subject: [PATCH 19/39] [llvm-exegesis] [AArch64] Revert setStackRegisterToAuxMem Implemention. Introduces Bus error. --- .../llvm-exegesis/lib/AArch64/Target.cpp | 46 ------------------- 1 file changed, 46 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 65c2eb0da7efc..b25a43fcdbf9f 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -386,52 +386,6 @@ std::vector ExegesisAArch64Target::setStackRegisterToAuxMem() const { // TODO: Implement this, if required. dbgs() << "Warning: setStackRegisterToAuxMem called but not required for " "AArch64\n"; - - const uint64_t targetSPVal = - getAuxiliaryMemoryStartAddress() + SubprocessMemory::AuxiliaryMemorySize; - // sub, stack args and local storage - // Use X16 as a temporary register since it's a scratch register - const MCRegister TempReg = AArch64::X16; - - // Load the 64-bit immediate into TempReg using MOVZ/MOVK sequence - // MOVZ Xd, #imm16, LSL #(shift_val * 16) - // MOVK Xd, #imm16, LSL #(shift_val * 16) (* 3 times for 64-bit immediate) - - // 1. MOVZ TmpReg, #(targetSPVal & 0xFFFF), LSL #0 - instructions.push_back(MCInstBuilder(AArch64::MOVZXi) - .addReg(TempReg) - .addImm(targetSPVal & 0xFFFF) // imm16 - .addImm(0)); // hw(shift/16) = 0 - // 2. MOVK TmpReg, #((targetSPVal >> 16) & 0xFFFF), LSL #16 - if (((targetSPVal >> 16) & 0xFFFF) != 0 || (targetSPVal > 0xFFFF)) { - instructions.push_back(MCInstBuilder(AArch64::MOVKXi) - .addReg(TempReg) - .addReg(TempReg) - .addImm((targetSPVal >> 16) & 0xFFFF) // imm16 - .addImm(1)); // hw(shift/16) = 1 - } - // 3. MOVK TmpReg, #((targetSPVal >> 32) & 0xFFFF), LSL #32 - if (((targetSPVal >> 32) & 0xFFFF) != 0 || (targetSPVal > 0xFFFFFFFF)) { - instructions.push_back(MCInstBuilder(AArch64::MOVKXi) - .addReg(TempReg) - .addReg(TempReg) - .addImm((targetSPVal >> 32) & 0xFFFF) // imm16 - .addImm(2)); // hw(shift/16) = 2 - } - // 4. MOVK TmpReg, #((targetSPVal >> 48) & 0xFFFF), LSL #48 - if (((targetSPVal >> 48) & 0xFFFF) != 0 || (targetSPVal > 0xFFFFFFFFFFFF)) { - instructions.push_back(MCInstBuilder(AArch64::MOVKXi) - .addReg(TempReg) - .addReg(TempReg) - .addImm((targetSPVal >> 48) & 0xFFFF) // imm16 - .addImm(3)); // hw(shift/16) = 3 - } - // Finally, move the value from TempReg to SP - instructions.push_back(MCInstBuilder(AArch64::ADDXri) // ADD SP, TempReg, #0 - .addReg(AArch64::SP) - .addReg(TempReg) - .addImm(0) // imm = 0 - .addImm(0)); // shift = 0 return instructions; } From c7d7676f7f91e06c5a5188aaaabc59b1204e166f Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 4 Sep 2025 01:04:48 -0700 Subject: [PATCH 20/39] [llvm-exegesis] [AArch64] Remove unused generateRoundToNearestPage function, called by unmap func. (Motivation for unmap unclear for AArch64) --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index b25a43fcdbf9f..056ec00a2b150 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -290,22 +290,6 @@ class ExegesisAArch64Target : public ExegesisTarget { #ifdef __linux__ static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000; -static void generateRoundToNearestPage(unsigned int TargetRegister, - std::vector &GeneratedCode) { - int PageSizeShift = static_cast(round(log2(getpagesize()))); - // Round down to the nearest page by getting rid of the least significant bits - // representing location in the page. - - // Single instruction using AND with inverted mask (effectively BIC) - uint64_t BitsToClearMask = (1ULL << PageSizeShift) - 1; // 0xFFF - uint64_t AndMask = ~BitsToClearMask; // ...FFFFFFFFFFFF000 - GeneratedCode.push_back(MCInstBuilder(AArch64::ANDXri) - .addReg(TargetRegister) // Xd - .addReg(TargetRegister) // Xn - .addImm(AndMask) // imm bitmask - ); -} - std::vector ExegesisAArch64Target::generateExitSyscall(unsigned ExitCode) const { std::vector ExitCallCode; From f52e612e6b3b10f5a1902d6bbbf61c676a6af845 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 4 Sep 2025 01:07:25 -0700 Subject: [PATCH 21/39] [llvm-exegesis] [AArch64] Fix function naming for syscall register handling and utilized in prologue epilogue regs in configurePerfCounter. --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 056ec00a2b150..9da5596ad89e0 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -181,7 +181,7 @@ constexpr std::array SyscallArgumentRegisters{ AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7, }; -static void saveSysCallRegisters(std::vector &GeneratedCode, +static void saveSyscallRegisters(std::vector &GeneratedCode, unsigned ArgumentCount) { // AArch64 follows the AAPCS (ARM Architecture Procedure Call Standard): // X0-X7 registers contain the first 8 arguments. @@ -195,7 +195,7 @@ static void saveSysCallRegisters(std::vector &GeneratedCode, } } -static void restoreSysCallRegisters(std::vector &GeneratedCode, +static void restoreSyscallRegisters(std::vector &GeneratedCode, unsigned ArgumentCount) { assert(ArgumentCount <= 8 && "This implementation restores up to 8 argument registers (X0-X7)"); @@ -383,11 +383,18 @@ uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const { std::vector ExegesisAArch64Target::configurePerfCounter(long Request, bool SaveRegisters) const { - std::vector ConfigurePerfCounterCode; // NOP + std::vector ConfigurePerfCounterCode; + if (SaveRegisters) + saveSyscallRegisters(ConfigurePerfCounterCode, 3); + // FIXME: SYSCALL exits with EBADF error - file descriptor is invalid // No file is opened previosly to add as file descriptor dbgs() << "Warning: configurePerfCounter not implemented, measurements will " "be unreliable\n"; + + if (SaveRegisters) + restoreSyscallRegisters(ConfigurePerfCounterCode, 3); + return ConfigurePerfCounterCode; } From a1247555d166d0f1591b222a84d56aeee613ec4c Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 4 Sep 2025 02:01:12 -0700 Subject: [PATCH 22/39] [llvm-exegesis] [AArch64] Header cleanup --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 9da5596ad89e0..651d7e9867ba3 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -17,7 +17,6 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCRegisterInfo.h" #include -#define DEBUG_TYPE "exegesis-aarch64-target" #if defined(__aarch64__) && defined(__linux__) #include @@ -26,8 +25,7 @@ #ifdef HAVE_LIBPFM #include #endif // HAVE_LIBPFM -#include // For PR_PAC_* constants -#include +#include // For PR_PAC_* constants #ifndef PR_PAC_APIAKEY #define PR_PAC_APIAKEY (1UL << 0) #endif From c538e9d8d076c2de4371ba3db45680daf74c0103 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Thu, 4 Sep 2025 02:40:00 -0700 Subject: [PATCH 23/39] Clang Format --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 651d7e9867ba3..b81700a96f950 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -24,7 +24,7 @@ #include // for getpagesize() #ifdef HAVE_LIBPFM #include -#endif // HAVE_LIBPFM +#endif // HAVE_LIBPFM #include // For PR_PAC_* constants #ifndef PR_PAC_APIAKEY #define PR_PAC_APIAKEY (1UL << 0) From 9c082ff4838617741a01535435d3a719f7bf35b5 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Mon, 15 Sep 2025 04:21:47 -0700 Subject: [PATCH 24/39] [llvm-exegesis] [AArch64] Updated configurePerfCounter to properly load syscall params and generate the SYS_ioctl call. --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 238abbde31477..4dde7fc1f19ec 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -468,10 +468,13 @@ ExegesisAArch64Target::configurePerfCounter(long Request, if (SaveRegisters) saveSyscallRegisters(ConfigurePerfCounterCode, 3); - // FIXME: SYSCALL exits with EBADF error - file descriptor is invalid - // No file is opened previosly to add as file descriptor - dbgs() << "Warning: configurePerfCounter not implemented, measurements will " - "be unreliable\n"; + ConfigurePerfCounterCode.push_back(loadImmediate( + AArch64::X0, 64, APInt(64, getAuxiliaryMemoryStartAddress()))); // fd + ConfigurePerfCounterCode.push_back( + loadImmediate(AArch64::X1, 64, APInt(64, Request))); // cmd + ConfigurePerfCounterCode.push_back( + loadImmediate(AArch64::X2, 64, APInt(64, PERF_IOC_FLAG_GROUP))); // arg + generateSysCall(SYS_ioctl, ConfigurePerfCounterCode); // SYS_ioctl is 29 if (SaveRegisters) restoreSyscallRegisters(ConfigurePerfCounterCode, 3); From 16543e1b1ca6ce2c856b86b70d1d0dc4d83171c0 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Mon, 15 Sep 2025 04:23:04 -0700 Subject: [PATCH 25/39] [llvm-exegesis] [AArch64] Add warning in generateMmapAuxMem about fd --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 4dde7fc1f19ec..3e8f89b42d1e6 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -406,6 +406,7 @@ ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, void ExegesisAArch64Target::generateMmapAuxMem( std::vector &GeneratedCode) const { + dbgs() << "Warning: generateMmapAuxMem using anonymous mapping\n"; int fd = -1; int flags = MAP_SHARED; uintptr_t address = getAuxiliaryMemoryStartAddress(); @@ -432,7 +433,7 @@ void ExegesisAArch64Target::generateMmapAuxMem( std::vector ExegesisAArch64Target::generateMemoryInitialSetup() const { std::vector MemoryInitialSetupCode; - generateMmapAuxMem(MemoryInitialSetupCode); // FIXME: Uninit file descriptor + generateMmapAuxMem(MemoryInitialSetupCode); // If using fixed address for auxiliary memory skip this step, // When using dynamic memory allocation (non-fixed address), we must preserve From a9207b44452737b48e2dc2130050587467176481 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Mon, 22 Sep 2025 06:05:42 -0700 Subject: [PATCH 26/39] [llvm-exegesis] [AArch64] Initialize file descriptor handling in auxillary mmap --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 16 ++++++++++------ llvm/tools/llvm-exegesis/lib/Assembler.cpp | 2 -- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 3e8f89b42d1e6..605e6e26cbccd 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -384,9 +384,8 @@ ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, uintptr_t FileDescriptorAddress) const { // mmap(address, length, prot, flags, fd, offset=0) int flags = MAP_SHARED; - if (Address != 0) { + if (Address != 0) flags |= MAP_FIXED_NOREPLACE; - } std::vector MmapCode; MmapCode.push_back( loadImmediate(AArch64::X0, 64, APInt(64, Address))); // map adr @@ -395,10 +394,14 @@ ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, MmapCode.push_back(loadImmediate(AArch64::X2, 64, APInt(64, PROT_READ | PROT_WRITE))); // prot MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags - // FIXME: File descriptor address is not initialized. // Copy file descriptor location from aux memory into X4 MmapCode.push_back( - loadImmediate(AArch64::X4, 64, APInt(64, FileDescriptorAddress))); // fd + loadImmediate(ArgumentRegisters::TempRegister, 64, APInt(64, FileDescriptorAddress))); + // Dereference file descriptor into X4 (32-bit load from [X16]) + MmapCode.push_back(MCInstBuilder(AArch64::LDRWui) + .addReg(AArch64::W4) // destination: W4 (X4 lower 32 bits) + .addReg(ArgumentRegisters::TempRegister) // base address: X16 + .addImm(0)); // offset: 0 MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222 return MmapCode; @@ -406,12 +409,13 @@ ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, void ExegesisAArch64Target::generateMmapAuxMem( std::vector &GeneratedCode) const { - dbgs() << "Warning: generateMmapAuxMem using anonymous mapping\n"; int fd = -1; int flags = MAP_SHARED; uintptr_t address = getAuxiliaryMemoryStartAddress(); - if (fd == -1) + if (fd == -1){ + dbgs() << "Warning: generateMmapAuxMem using anonymous mapping\n"; flags |= MAP_ANONYMOUS; + } if (address != 0) flags |= MAP_FIXED_NOREPLACE; int prot = PROT_READ | PROT_WRITE; diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp index a9d6125ca98a5..9259fd5dd7923 100644 --- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp +++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp @@ -66,8 +66,6 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET, assert(MM.Address % getpagesize() == 0 && "Memory mappings need to be aligned to page boundaries."); #endif - // FIXME: file descriptor for aux memory seems not initialized. - // TODO: Invoke openat syscall to get correct fd for aux memory const MemoryValue &MemVal = Key.MemoryValues.at(MM.MemoryValueName); BBF.addInstructions(ET.generateMmap( MM.Address, MemVal.SizeBytes, From c7c01300db23a0bfcff98fb7a6f93413ac11aa48 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Mon, 22 Sep 2025 06:15:46 -0700 Subject: [PATCH 27/39] [llvm-exegesis] [AArch64] configurePerfCounter with improved file descriptor handling and clang formatter --- .../llvm-exegesis/lib/AArch64/Target.cpp | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 605e6e26cbccd..07b74648f9d7c 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -395,13 +395,13 @@ ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, APInt(64, PROT_READ | PROT_WRITE))); // prot MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags // Copy file descriptor location from aux memory into X4 - MmapCode.push_back( - loadImmediate(ArgumentRegisters::TempRegister, 64, APInt(64, FileDescriptorAddress))); + MmapCode.push_back(loadImmediate(ArgumentRegisters::TempRegister, 64, + APInt(64, FileDescriptorAddress))); // Dereference file descriptor into X4 (32-bit load from [X16]) MmapCode.push_back(MCInstBuilder(AArch64::LDRWui) - .addReg(AArch64::W4) // destination: W4 (X4 lower 32 bits) - .addReg(ArgumentRegisters::TempRegister) // base address: X16 - .addImm(0)); // offset: 0 + .addReg(AArch64::W4) + .addReg(ArgumentRegisters::TempRegister) + .addImm(0)); MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222 return MmapCode; @@ -412,7 +412,7 @@ void ExegesisAArch64Target::generateMmapAuxMem( int fd = -1; int flags = MAP_SHARED; uintptr_t address = getAuxiliaryMemoryStartAddress(); - if (fd == -1){ + if (fd == -1) { dbgs() << "Warning: generateMmapAuxMem using anonymous mapping\n"; flags |= MAP_ANONYMOUS; } @@ -473,8 +473,16 @@ ExegesisAArch64Target::configurePerfCounter(long Request, if (SaveRegisters) saveSyscallRegisters(ConfigurePerfCounterCode, 3); - ConfigurePerfCounterCode.push_back(loadImmediate( - AArch64::X0, 64, APInt(64, getAuxiliaryMemoryStartAddress()))); // fd + // Load actual file descriptor from auxiliary memory location [address + 0] + // CounterFileDescriptor was stored at AuxiliaryMemoryMapping[0] + ConfigurePerfCounterCode.push_back( + loadImmediate(ArgumentRegisters::TempRegister, 64, + APInt(64, getAuxiliaryMemoryStartAddress()))); + ConfigurePerfCounterCode.push_back( + MCInstBuilder(AArch64::LDRWui) + .addReg(AArch64::W0) + .addReg(ArgumentRegisters::TempRegister) + .addImm(0)); ConfigurePerfCounterCode.push_back( loadImmediate(AArch64::X1, 64, APInt(64, Request))); // cmd ConfigurePerfCounterCode.push_back( From a5c1bb4ff62a6968f37f3279a59dc49bd24ad28a Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Mon, 22 Sep 2025 09:36:18 -0700 Subject: [PATCH 28/39] [llvm-exegesis] [AArch64] Replace getpagesize() with llvm::sys::Process::getPageSizeEstimate in auxiliary memory address calculation --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 07b74648f9d7c..188f53c1a52f2 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -16,12 +16,12 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Process.h" #include #if defined(__aarch64__) && defined(__linux__) #include #include -#include // for getpagesize() #ifdef HAVE_LIBPFM #include #endif // HAVE_LIBPFM @@ -463,7 +463,7 @@ uintptr_t ExegesisAArch64Target::getAuxiliaryMemoryStartAddress() const { // Return the second to last page in the virtual address space to try and // prevent interference with memory annotations in the snippet // FIXME: Why 2 pages? - return VAddressSpaceCeiling - (2 * getpagesize()); + return VAddressSpaceCeiling - (2 * llvm::sys::Process::getPageSizeEstimate()); } std::vector From f9e83d5f6a70c83197213b10a8dbaeb7acd55506 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Mon, 22 Sep 2025 09:36:44 -0700 Subject: [PATCH 29/39] [llvm-exegesis] [AArch64] Add conditional compilation for performance counter configuration with libpfm support --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 188f53c1a52f2..eb2a576b129a3 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -470,6 +470,7 @@ std::vector ExegesisAArch64Target::configurePerfCounter(long Request, bool SaveRegisters) const { std::vector ConfigurePerfCounterCode; +#ifdef HAVE_LIBPFM if (SaveRegisters) saveSyscallRegisters(ConfigurePerfCounterCode, 3); @@ -491,7 +492,7 @@ ExegesisAArch64Target::configurePerfCounter(long Request, if (SaveRegisters) restoreSyscallRegisters(ConfigurePerfCounterCode, 3); - +#endif // HAVE_LIBPFM return ConfigurePerfCounterCode; } From c77b16cb4a33be65dacdc49d5a5f8f45dab8fd13 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Mon, 22 Sep 2025 10:48:06 -0700 Subject: [PATCH 30/39] [llvm-exegesis] [AArch64] Refactor conditional compilation for performance counter config --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index eb2a576b129a3..344f326340043 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -470,10 +470,10 @@ std::vector ExegesisAArch64Target::configurePerfCounter(long Request, bool SaveRegisters) const { std::vector ConfigurePerfCounterCode; -#ifdef HAVE_LIBPFM if (SaveRegisters) saveSyscallRegisters(ConfigurePerfCounterCode, 3); +#ifdef HAVE_LIBPFM“ // Load actual file descriptor from auxiliary memory location [address + 0] // CounterFileDescriptor was stored at AuxiliaryMemoryMapping[0] ConfigurePerfCounterCode.push_back( @@ -489,10 +489,10 @@ ExegesisAArch64Target::configurePerfCounter(long Request, ConfigurePerfCounterCode.push_back( loadImmediate(AArch64::X2, 64, APInt(64, PERF_IOC_FLAG_GROUP))); // arg generateSysCall(SYS_ioctl, ConfigurePerfCounterCode); // SYS_ioctl is 29 +#endif // HAVE_LIBPFM if (SaveRegisters) restoreSyscallRegisters(ConfigurePerfCounterCode, 3); -#endif // HAVE_LIBPFM return ConfigurePerfCounterCode; } From 871392b23d104a7153013e892c22fad4119a4156 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Mon, 22 Sep 2025 11:12:28 -0700 Subject: [PATCH 31/39] format changes --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 344f326340043..73c689b2aa115 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -473,7 +473,7 @@ ExegesisAArch64Target::configurePerfCounter(long Request, if (SaveRegisters) saveSyscallRegisters(ConfigurePerfCounterCode, 3); -#ifdef HAVE_LIBPFM“ +#ifdef HAVE_LIBPFM // Load actual file descriptor from auxiliary memory location [address + 0] // CounterFileDescriptor was stored at AuxiliaryMemoryMapping[0] ConfigurePerfCounterCode.push_back( @@ -489,7 +489,7 @@ ExegesisAArch64Target::configurePerfCounter(long Request, ConfigurePerfCounterCode.push_back( loadImmediate(AArch64::X2, 64, APInt(64, PERF_IOC_FLAG_GROUP))); // arg generateSysCall(SYS_ioctl, ConfigurePerfCounterCode); // SYS_ioctl is 29 -#endif // HAVE_LIBPFM +#endif if (SaveRegisters) restoreSyscallRegisters(ConfigurePerfCounterCode, 3); From d6f237107d552f0326d6fc5812b83b4772d71977 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Tue, 23 Sep 2025 07:55:57 -0700 Subject: [PATCH 32/39] [llvm-exegesis] [AArch64] Add test for LD1B load instr in subprocess execution mode --- .../tools/llvm-exegesis/AArch64/error-resolution.s | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/llvm/test/tools/llvm-exegesis/AArch64/error-resolution.s b/llvm/test/tools/llvm-exegesis/AArch64/error-resolution.s index 66ca6fb31d78f..022747cdbaad9 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/error-resolution.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/error-resolution.s @@ -70,3 +70,15 @@ # UMOVvi16_idx0_throughput-NEXT: instructions: # UMOVvi16_idx0_throughput-NEXT: UMOVvi16_idx0 [[REG1:W[0-9]+|LR]] [[REG2:Q[0-9]+|LR]] i_0x0 # UMOVvi16_idx0_throughput: ... + + +// Test for Load instruction execution by --execution-mode=subprocess pathway +// LD1B: ld1b { Zt.b }, Pg/z, [Xn, Xm] +# RUN: llvm-exegesis --mtriple=aarch64 --mcpu=neoverse-v2 --execution-mode=subprocess --benchmark-phase=prepare-and-assemble-snippet --opcode-name=LD1B --mode=inverse_throughput 2>&1 | FileCheck %s --check-prefix=LD1B_throughput + +# LD1B_throughput: --- +# LD1B_throughput-NEXT: mode: inverse_throughput +# LD1B_throughput-NEXT: key: +# LD1B_throughput-NEXT: instructions: +# LD1B_throughput-NEXT: - 'LD1B [[ZREG:Z[0-9]+|LR]] [[PREG:P[0-9]+|LR]] [[XREG1:X[0-9]+|LR]] [[XREG2:X[0-9]+|LR]]' +# LD1B_throughput-NOT: error: 'snippet crashed while running: Segmentation fault' From 819dfc00931a14856127f23fc505887c1021c1f9 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Fri, 26 Sep 2025 01:15:26 -0700 Subject: [PATCH 33/39] [llvm-exegesis] [AArch64] Update lit local config file to expand check for AArch64 for execution with perf --- llvm/test/tools/llvm-exegesis/lit.local.cfg | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/llvm/test/tools/llvm-exegesis/lit.local.cfg b/llvm/test/tools/llvm-exegesis/lit.local.cfg index 89110ed2816cd..cd16139e2c3c8 100644 --- a/llvm/test/tools/llvm-exegesis/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/lit.local.cfg @@ -18,9 +18,19 @@ def can_use_perf_counters(mode, extra_options=[]): if llvm_exegesis_exe is None: print("could not find llvm-exegesis") return False + + opcode_name = None + if "x86_64" in config.root.host_triple or "i386" in config.root.host_triple: + opcode_name = "ADD64rr" + elif "aarch64" in config.root.host_triple: + opcode_name = "ADDXrr" + else: + # FIXME: Add opcode_name for other architectures for testing + return False + try: return_code = subprocess.call( - [llvm_exegesis_exe, "-mode", mode, "-opcode-name=ADD64rr"] + [llvm_exegesis_exe, "-mode", mode, "-opcode-name=" + opcode_name] + extra_options, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, From 97218286ecadd2b54b2c306ab4ae8e83250c92fc Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Fri, 26 Sep 2025 02:03:32 -0700 Subject: [PATCH 34/39] [llvm-exegesis] [AArch64] Conditional compilation of setup and cleanup asm in configurePerfCounter --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 73c689b2aa115..0c2d00199c8da 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -173,7 +173,7 @@ void generateSysCall(long SyscallNumber, std::vector &GeneratedCode) { } /// Functions to save/restore system call registers -#ifdef __linux__ +#if defined(__linux__) && defined(HAVE_LIBPFM) constexpr std::array SyscallArgumentRegisters{ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7, @@ -203,7 +203,7 @@ static void restoreSyscallRegisters(std::vector &GeneratedCode, } generateRegisterStackPop(AArch64::X8, GeneratedCode); } -#endif // __linux__ +#endif // __linux__ && HAVE_LIBPFM #include "AArch64GenExegesis.inc" namespace { @@ -470,10 +470,10 @@ std::vector ExegesisAArch64Target::configurePerfCounter(long Request, bool SaveRegisters) const { std::vector ConfigurePerfCounterCode; +#ifdef HAVE_LIBPFM if (SaveRegisters) saveSyscallRegisters(ConfigurePerfCounterCode, 3); -#ifdef HAVE_LIBPFM // Load actual file descriptor from auxiliary memory location [address + 0] // CounterFileDescriptor was stored at AuxiliaryMemoryMapping[0] ConfigurePerfCounterCode.push_back( @@ -489,10 +489,10 @@ ExegesisAArch64Target::configurePerfCounter(long Request, ConfigurePerfCounterCode.push_back( loadImmediate(AArch64::X2, 64, APInt(64, PERF_IOC_FLAG_GROUP))); // arg generateSysCall(SYS_ioctl, ConfigurePerfCounterCode); // SYS_ioctl is 29 -#endif if (SaveRegisters) restoreSyscallRegisters(ConfigurePerfCounterCode, 3); +#endif return ConfigurePerfCounterCode; } From b45e004c162bb9d42d5c79f7c7888f7cf7e79a1d Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Mon, 29 Sep 2025 20:54:55 -0700 Subject: [PATCH 35/39] [llvm-exegesis] [AArch64] Reverted to manual snippet mmap to use anonymous mapping --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index 0c2d00199c8da..c34e85d69d90e 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -384,6 +384,11 @@ ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, uintptr_t FileDescriptorAddress) const { // mmap(address, length, prot, flags, fd, offset=0) int flags = MAP_SHARED; + int fd = -1; + if (fd == -1) { + dbgs() << "Warning: generateMmap using anonymous mapping\n"; + flags |= MAP_ANONYMOUS; + } if (Address != 0) flags |= MAP_FIXED_NOREPLACE; std::vector MmapCode; @@ -394,14 +399,8 @@ ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, MmapCode.push_back(loadImmediate(AArch64::X2, 64, APInt(64, PROT_READ | PROT_WRITE))); // prot MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags - // Copy file descriptor location from aux memory into X4 - MmapCode.push_back(loadImmediate(ArgumentRegisters::TempRegister, 64, - APInt(64, FileDescriptorAddress))); - // Dereference file descriptor into X4 (32-bit load from [X16]) - MmapCode.push_back(MCInstBuilder(AArch64::LDRWui) - .addReg(AArch64::W4) - .addReg(ArgumentRegisters::TempRegister) - .addImm(0)); + // FIXME: Loading [FileDescriptorAddress] as fd leds syscall to return error + MmapCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222 return MmapCode; From 8e8babe4a9ad6234f889f97b8f413f2b869de1e3 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Mon, 29 Sep 2025 21:38:36 -0700 Subject: [PATCH 36/39] [llvm-exegesis] [AArch64] Add manual snippet syscall test for memory mapping and performance counters --- .../AArch64/manual-snippet-syscall-test.s | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 llvm/test/tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s diff --git a/llvm/test/tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s b/llvm/test/tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s new file mode 100644 index 0000000000000..3fb60dd9094cd --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s @@ -0,0 +1,90 @@ +# REQUIRES: aarch64-registered-target, exegesis-can-measure-latency + +# LLVM-EXEGESIS-MEM-DEF test_mem 4096 16 +# LLVM-EXEGESIS-MEM-MAP test_mem 140737488093184 +# LLVM-EXEGESIS-DEFREG X0 65536 +# LLVM-EXEGESIS-DEFREG X1 0 +.arch armv8-a+sve + +# memory location = VAddressSpaceCeiling - Pagesize * var +# Aux memory loc = 0x0x800000000000 - 0x10000 * 2 = 0x7ffffffe0000 +mov x0, 140737488224256 +ldr x1, [x0, #0] + +# specific mem loc = 0x0x800000000000 - 0x10000 * 4 = 0x7ffffffc0000 +mov x0, 140737488093184 +ldr x1, [x0, #0] + + +# RUN: llvm-exegesis --mtriple=aarch64 --mcpu=neoverse-v2 --execution-mode=subprocess \ +# RUN: --mode=inverse_throughput --benchmark-phase=assemble-measured-code \ +# RUN: --dump-object-to-disk=%t.o --min-instructions=1 --snippets-file=%s 2>&1 + +# RUN: llvm-objdump -d %t.o > %t.disasm +# RUN: FileCheck %s --check-prefix=CHECK_SYSCALLS < %t.disasm + +# CHECK_SYSCALLS: : + +# Check for aux memory mapping syscall (syscall number 222/0xde) +# CHECK_SYSCALLS: mov x0, #0x7ffffffe0000 +# CHECK_SYSCALLS-NEXT: mov x1, #0x1000 +# CHECK_SYSCALLS-NEXT: mov x2, #0x3 +# CHECK_SYSCALLS-NEXT: mov x3, #0x21 +# CHECK_SYSCALLS-NEXT: movk x3, #0x10, lsl #16 +# CHECK_SYSCALLS-NEXT: mov x4, #-0x1 +# CHECK_SYSCALLS-NEXT: mov x5, #0x0 +# CHECK_SYSCALLS-NEXT: mov x8, #0xde +# CHECK_SYSCALLS-NEXT: svc #0 + +# CHECK_SYSCALLS: str x0, [sp, #-0x10]! + +# Check for specific memory mapping syscall +# CHECK_SYSCALLS: mov x0, #0x7ffffffc0000 +# CHECK_SYSCALLS-NEXT: mov x1, #0x1000 +# CHECK_SYSCALLS-NEXT: mov x2, #0x3 +# CHECK_SYSCALLS-NEXT: mov x3, #0x21 +# CHECK_SYSCALLS-NEXT: movk x3, #0x10, lsl #16 +# CHECK_SYSCALLS-NEXT: mov x4, #-0x1 +# CHECK_SYSCALLS-NEXT: mov x5, #0x0 +# CHECK_SYSCALLS-NEXT: mov x8, #0xde +# CHECK_SYSCALLS-NEXT: svc #0 + +# CHECK_SYSCALLS: ldr x0, [sp], #0x10 +# CHECK_SYSCALLS: mov x1, #0x0 + +# Check for performance counter control syscalls (ioctl - syscall number 29/0x1d) +# CHECK_SYSCALLS: str x8, [sp, #-0x10]! +# CHECK_SYSCALLS-NEXT: str x0, [sp, #-0x10]! +# CHECK_SYSCALLS-NEXT: str x1, [sp, #-0x10]! +# CHECK_SYSCALLS-NEXT: str x2, [sp, #-0x10]! +# CHECK_SYSCALLS-NEXT: mov x16, #0x7ffffffe0000 +# CHECK_SYSCALLS-NEXT: ldr w0, [x16] +# CHECK_SYSCALLS-NEXT: mov x1, #0x2403 +# CHECK_SYSCALLS-NEXT: mov x2, #0x1 +# CHECK_SYSCALLS-NEXT: mov x8, #0x1d +# CHECK_SYSCALLS-NEXT: svc #0 +# CHECK_SYSCALLS-NEXT: ldr x2, [sp], #0x10 +# CHECK_SYSCALLS-NEXT: ldr x1, [sp], #0x10 +# CHECK_SYSCALLS-NEXT: ldr x0, [sp], #0x10 +# CHECK_SYSCALLS-NEXT: ldr x8, [sp], #0x10 + +# === Test instruction execution === +# CHECK_SYSCALLS: mov x0, #0x7ffffffe0000 +# CHECK_SYSCALLS-NEXT: ldr x1, [x0] +# CHECK_SYSCALLS-NEXT: mov x0, #0x7ffffffc0000 +# CHECK_SYSCALLS-NEXT: ldr x1, [x0] + +# === ioctl syscall - stop performance counters === +# CHECK_SYSCALLS: mov x16, #0x7ffffffe0000 +# CHECK_SYSCALLS-NEXT: ldr w0, [x16] +# CHECK_SYSCALLS-NEXT: mov x1, #0x2401 +# CHECK_SYSCALLS-NEXT: mov x2, #0x1 +# CHECK_SYSCALLS-NEXT: mov x8, #0x1d +# CHECK_SYSCALLS-NEXT: svc #0 + +# Check for process exit syscall (exit - syscall number 93/0x5d) +# CHECK_SYSCALLS: mov x0, #0x0 +# CHECK_SYSCALLS-NEXT: mov x8, #0x5d +# CHECK_SYSCALLS-NEXT: svc #0 + +# CHECK_SYSCALLS-NEXT: ret \ No newline at end of file From 1c3cb5d9d9a7271e4d52de738eb044e8232a1d24 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Mon, 29 Sep 2025 23:15:58 -0700 Subject: [PATCH 37/39] [llvm-exegesis] [AArch64] Update latency test to use ADCXr opcode instead of ADDXrr --- .../tools/llvm-exegesis/AArch64/latency-by-opcode-name.s | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s b/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s index 1db28a84e2ff6..f1aa95b245c5b 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/latency-by-opcode-name.s @@ -1,13 +1,14 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=ADDXrr | FileCheck %s +# RUN: llvm-exegesis -mode=latency -opcode-name=ADCXr | FileCheck %s # REQUIRES: exegesis-can-execute-aarch64, exegesis-can-measure-latency CHECK: --- CHECK-NEXT: mode: latency CHECK-NEXT: key: CHECK-NEXT: instructions: -CHECK-NEXT: ADDXrr [[REG1:X[0-9]+|LR]] [[REG2:X[0-9]+|LR]] [[REG3:X[0-9]+|LR]] +CHECK-NEXT: - 'ADCXr [[REG1:X[0-9]+|LR]] [[REG2:X[0-9]+|LR]] [[REG3:X[0-9]+|LR]]' CHECK-NEXT: config: '' CHECK-NEXT: register_initial_values: CHECK-DAG: - '[[REG2]]=0x0' -# We don't check REG3 because in the case that REG2=REG3 the check would fail +CHECK-DAG: - '[[REG3]]=0x0' +CHECK-DAG: - 'NZCV=0x0' CHECK-DAG: ... From 422124ea5876042b9cc16d64b7b2802faeb098fc Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Tue, 30 Sep 2025 01:20:01 -0700 Subject: [PATCH 38/39] [llvm-exegesis] [AArch64] Add newline and formatting --- .../tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s | 2 +- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s b/llvm/test/tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s index 3fb60dd9094cd..1e49378035d03 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/manual-snippet-syscall-test.s @@ -87,4 +87,4 @@ ldr x1, [x0, #0] # CHECK_SYSCALLS-NEXT: mov x8, #0x5d # CHECK_SYSCALLS-NEXT: svc #0 -# CHECK_SYSCALLS-NEXT: ret \ No newline at end of file +# CHECK_SYSCALLS-NEXT: ret diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index c34e85d69d90e..c950c0abbadc8 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -401,7 +401,7 @@ ExegesisAArch64Target::generateMmap(uintptr_t Address, size_t Length, MmapCode.push_back(loadImmediate(AArch64::X3, 64, APInt(64, flags))); // flags // FIXME: Loading [FileDescriptorAddress] as fd leds syscall to return error MmapCode.push_back(loadImmediate(AArch64::X4, 64, APInt(64, fd))); // fd - MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset + MmapCode.push_back(loadImmediate(AArch64::X5, 64, APInt(64, 0))); // offset generateSysCall(SYS_mmap, MmapCode); // SYS_mmap is 222 return MmapCode; } From ebb7b167b3d6b48b6e2fe744c6b623dc005e6910 Mon Sep 17 00:00:00 2001 From: lakshayk-nv Date: Tue, 30 Sep 2025 03:12:46 -0700 Subject: [PATCH 39/39] [llvm-exegesis] [AArch64] Add warning for ioctl syscall failure in configurePerfCounter --- llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp index c950c0abbadc8..588bb2e7aa4bb 100644 --- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -475,6 +475,9 @@ ExegesisAArch64Target::configurePerfCounter(long Request, // Load actual file descriptor from auxiliary memory location [address + 0] // CounterFileDescriptor was stored at AuxiliaryMemoryMapping[0] + dbgs() << "Warning: configurePerfCounter ioctl syscall failing\n"; + // FIXME: Ensure file descriptor is correctly populated at auxiliary memory + // address before ioctl syscall to avoid unreliable benchmark results ConfigurePerfCounterCode.push_back( loadImmediate(ArgumentRegisters::TempRegister, 64, APInt(64, getAuxiliaryMemoryStartAddress())));