Skip to content

[AArch64] Mark neon.stN intrinsics as writeonly #145289

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_1Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;

class AdvSIMD_2Vec_Load_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, llvm_anyvector_ty],
Expand All @@ -626,11 +626,11 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_2Vec_Store_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
class AdvSIMD_2Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;

class AdvSIMD_3Vec_Load_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
Expand All @@ -644,12 +644,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_3Vec_Store_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
class AdvSIMD_3Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;

class AdvSIMD_4Vec_Load_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
Expand All @@ -667,12 +667,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
class AdvSIMD_4Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<5>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<5>>]>;
}

// Memory ops
Expand Down
21 changes: 11 additions & 10 deletions llvm/include/llvm/IR/IntrinsicsARM.td
Original file line number Diff line number Diff line change
Expand Up @@ -743,32 +743,33 @@ def int_arm_neon_vld4dup : DefaultAttrsIntrinsic<
// Interleaving vector stores from N-element structures.
// Source operands are: the address, the N vectors, and the alignment.
def int_arm_neon_vst1 : DefaultAttrsIntrinsic<
[], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty], [IntrArgMemOnly]>;
[], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty],
[IntrWriteMem, IntrArgMemOnly]>;
def int_arm_neon_vst2 : DefaultAttrsIntrinsic<
[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;
def int_arm_neon_vst3 : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;
def int_arm_neon_vst4 : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
LLVMMatchType<1>, llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;

def int_arm_neon_vst1x2 : DefaultAttrsIntrinsic<
[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
def int_arm_neon_vst1x3 : DefaultAttrsIntrinsic<
[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
def int_arm_neon_vst1x4 : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
LLVMMatchType<1>],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;

// Vector store N-element structure from one lane.
// Source operands are: the address, the N vectors, the lane number, and
Expand All @@ -777,17 +778,17 @@ def int_arm_neon_vst2lane : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty,
llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;
def int_arm_neon_vst3lane : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
llvm_i32_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;
def int_arm_neon_vst4lane : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;

// Vector bitwise select.
def int_arm_neon_vbsl : DefaultAttrsIntrinsic<
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/BasicAA/cs-cs-arm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ entry:
; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16)
; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16)
; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16)
; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16)
; CHECK: Just Mod: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16)
; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16)
; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16)
; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/BasicAA/intrinsics-arm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr, i32) nounwind readonly
declare void @llvm.arm.neon.vst1.p0.v8i16(ptr, <8 x i16>, i32) nounwind

; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
; CHECK: attributes [[ATTR]] = { nounwind }
10 changes: 10 additions & 0 deletions llvm/test/Assembler/aarch64-intrinsics-attributes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,15 @@ declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>)
; CHECK: declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]]
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)

; CHECK: declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]]
declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)

; CHECK: declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]]
declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr)

; CHECK: declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]]
declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)

; CHECK: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { nofree nounwind willreturn }
; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
Loading