From 05be8f612ea5a0fbaba007a97627c838fbdcf078 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 23 Jun 2025 10:24:37 +0200 Subject: [PATCH 1/3] [AArch64] Mark neon.stN intrinsics as writeonly I found this peculiar comment in EarlyCSE: https://github.com/llvm/llvm-project/blob/1c78d8d9d7bcb4b20910047ad7db35f177a17c8c/llvm/lib/Transforms/Scalar/EarlyCSE.cpp#L1620-L1624 Looking back over history, this seems to be referring to the aarch64.neon.stN intrinsics, which are indeed not marked writeonly (though the ldN intrinsics are readonly). Unless I'm missing something, these do not read memory. --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 0ec5f5163118e..3606bbe29eb93 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -612,7 +612,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_1Vec_Store_Lane_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_2Vec_Load_Intrinsic : DefaultAttrsIntrinsic<[LLVMMatchType<0>, llvm_anyvector_ty], @@ -626,11 +626,11 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_2Vec_Store_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_2Vec_Store_Lane_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_3Vec_Load_Intrinsic : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], @@ -644,12 +644,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_3Vec_Store_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_3Vec_Store_Lane_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_4Vec_Load_Intrinsic : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, @@ -667,12 +667,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_4Vec_Store_Lane_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; } // Memory ops From 05f72a2306ac89d80bf6f63599df175901324d8b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 Jun 2025 12:13:40 +0200 Subject: [PATCH 2/3] add arm intrinsics --- llvm/include/llvm/IR/IntrinsicsARM.td | 21 ++++++++++---------- llvm/test/Analysis/BasicAA/cs-cs-arm.ll | 2 +- llvm/test/Analysis/BasicAA/intrinsics-arm.ll | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index e543f64e6c0db..b5145fcf29353 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -743,32 +743,33 @@ def int_arm_neon_vld4dup : DefaultAttrsIntrinsic< // Interleaving vector stores from N-element structures. // Source operands are: the address, the N vectors, and the alignment. def int_arm_neon_vst1 : DefaultAttrsIntrinsic< - [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty], [IntrArgMemOnly]>; + [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst2 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst3 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst4 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst1x2 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; def int_arm_neon_vst1x3 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; def int_arm_neon_vst1x4 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; // Vector store N-element structure from one lane. // Source operands are: the address, the N vectors, the lane number, and @@ -777,17 +778,17 @@ def int_arm_neon_vst2lane : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst3lane : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst4lane : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; // Vector bitwise select. def int_arm_neon_vbsl : DefaultAttrsIntrinsic< diff --git a/llvm/test/Analysis/BasicAA/cs-cs-arm.ll b/llvm/test/Analysis/BasicAA/cs-cs-arm.ll index 43e7be2ee20ee..ac5e0db001110 100644 --- a/llvm/test/Analysis/BasicAA/cs-cs-arm.ll +++ b/llvm/test/Analysis/BasicAA/cs-cs-arm.ll @@ -24,7 +24,7 @@ entry: ; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16) -; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16) +; CHECK: Just Mod: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16) ; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) ; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16) diff --git a/llvm/test/Analysis/BasicAA/intrinsics-arm.ll b/llvm/test/Analysis/BasicAA/intrinsics-arm.ll index 7772cca2777f0..b7c64f18cdfab 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics-arm.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics-arm.ll @@ -27,5 +27,5 @@ declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr, i32) nounwind readonly declare void @llvm.arm.neon.vst1.p0.v8i16(ptr, <8 x i16>, i32) nounwind ; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } ; CHECK: attributes [[ATTR]] = { nounwind } From 25bc66cb6217c41cd2ef984c6599d3acca1f50f5 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 Jun 2025 12:37:57 +0200 Subject: [PATCH 3/3] Add some more tests --- llvm/test/Assembler/aarch64-intrinsics-attributes.ll | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/llvm/test/Assembler/aarch64-intrinsics-attributes.ll b/llvm/test/Assembler/aarch64-intrinsics-attributes.ll index 31b7101fba116..33f2758a4b18c 100644 --- a/llvm/test/Assembler/aarch64-intrinsics-attributes.ll +++ b/llvm/test/Assembler/aarch64-intrinsics-attributes.ll @@ -22,5 +22,15 @@ declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) ; CHECK: declare @llvm.aarch64.sve.dup.nxv4i32(, , i32) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]] declare @llvm.aarch64.sve.dup.nxv4i32(, , i32) +; CHECK: declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]] +declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) + +; CHECK: declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]] +declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) + +; CHECK: declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]] +declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) + ; CHECK: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { nofree nounwind willreturn } ; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }