Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2713,6 +2713,10 @@ unsigned RISCVTTIImpl::getMinTripCountTailFoldingThreshold() const {
return RVVMinTripCount;
}

bool RISCVTTIImpl::preferAlternateOpcodeVectorization() const {
return ST->enableUnalignedVectorMem();
}

TTI::AddressingModeKind
RISCVTTIImpl::getPreferredAddressingMode(const Loop *L,
ScalarEvolution *SE) const {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {

unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override;

bool preferAlternateOpcodeVectorization() const override { return false; }
bool preferAlternateOpcodeVectorization() const override;

bool preferEpilogueVectorization() const override {
// Epilogue vectorization is usually unprofitable - tail folding or
Expand Down
82 changes: 82 additions & 0 deletions llvm/test/Transforms/SLPVectorizer/RISCV/alt-opc-vectorization.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=riscv64 -mattr=+v,+unaligned-vector-mem \
; RUN: -passes=slp-vectorizer -S \
; RUN: < %s | FileCheck %s --check-prefixes=UNALIGNED_VEC_MEM

; RUN: opt -mtriple=riscv64 -mattr=+v \
; RUN: -passes=slp-vectorizer -S \
; RUN: < %s | FileCheck %s --check-prefixes=NO_UNALIGNED_VEC_MEM

define void @alternate_opcodes(ptr %pl, ptr %ps, i8 %x) {
; UNALIGNED_VEC_MEM-LABEL: define void @alternate_opcodes(
; UNALIGNED_VEC_MEM-SAME: ptr [[PL:%.*]], ptr [[PS:%.*]], i8 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; UNALIGNED_VEC_MEM-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
; UNALIGNED_VEC_MEM-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
; UNALIGNED_VEC_MEM-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr align 1 [[GEP_L0]], i64 20, <8 x i1> splat (i1 true), i32 8)
; UNALIGNED_VEC_MEM-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[X]], i32 0
; UNALIGNED_VEC_MEM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> poison, <8 x i32> zeroinitializer
; UNALIGNED_VEC_MEM-NEXT: [[TMP4:%.*]] = add <8 x i8> [[TMP1]], [[TMP3]]
; UNALIGNED_VEC_MEM-NEXT: [[TMP5:%.*]] = sub <8 x i8> [[TMP1]], [[TMP3]]
; UNALIGNED_VEC_MEM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; UNALIGNED_VEC_MEM-NEXT: store <8 x i8> [[TMP2]], ptr [[GEP_S0]], align 1
; UNALIGNED_VEC_MEM-NEXT: ret void
;
; NO_UNALIGNED_VEC_MEM-LABEL: define void @alternate_opcodes(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the difference? I may miss it…

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No difference. I am trying to trigger the split vectorization path, but I can' t yet come up with simple IR to do that

; NO_UNALIGNED_VEC_MEM-SAME: ptr [[PL:%.*]], ptr [[PS:%.*]], i8 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; NO_UNALIGNED_VEC_MEM-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
; NO_UNALIGNED_VEC_MEM-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr align 1 [[GEP_L0]], i64 20, <8 x i1> splat (i1 true), i32 8)
; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[X]], i32 0
; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> poison, <8 x i32> zeroinitializer
; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP4:%.*]] = add <8 x i8> [[TMP1]], [[TMP3]]
; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP5:%.*]] = sub <8 x i8> [[TMP1]], [[TMP3]]
; NO_UNALIGNED_VEC_MEM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; NO_UNALIGNED_VEC_MEM-NEXT: store <8 x i8> [[TMP2]], ptr [[GEP_S0]], align 1
; NO_UNALIGNED_VEC_MEM-NEXT: ret void
;
%gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
%gep_l1 = getelementptr inbounds i8, ptr %pl, i64 20
%gep_l2 = getelementptr inbounds i8, ptr %pl, i64 40
%gep_l3 = getelementptr inbounds i8, ptr %pl, i64 60
%gep_l4 = getelementptr inbounds i8, ptr %pl, i64 80
%gep_l5 = getelementptr inbounds i8, ptr %pl, i64 100
%gep_l6 = getelementptr inbounds i8, ptr %pl, i64 120
%gep_l7 = getelementptr inbounds i8, ptr %pl, i64 140

%load0 = load i8, ptr %gep_l0
%load1 = load i8, ptr %gep_l1
%load2 = load i8, ptr %gep_l2
%load3 = load i8, ptr %gep_l3
%load4 = load i8, ptr %gep_l4
%load5 = load i8, ptr %gep_l5
%load6 = load i8, ptr %gep_l6
%load7 = load i8, ptr %gep_l7

%add0 = add i8 %load0, %x
%add1 = add i8 %load1, %x
%add2 = add i8 %load2, %x
%add3 = add i8 %load3, %x
%sub0 = sub i8 %load4, %x
%sub1 = sub i8 %load5, %x
%sub2 = sub i8 %load6, %x
%sub3 = sub i8 %load7, %x

%gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
%gep_s1 = getelementptr inbounds i8, ptr %ps, i64 1
%gep_s2 = getelementptr inbounds i8, ptr %ps, i64 2
%gep_s3 = getelementptr inbounds i8, ptr %ps, i64 3
%gep_s4 = getelementptr inbounds i8, ptr %ps, i64 4
%gep_s5 = getelementptr inbounds i8, ptr %ps, i64 5
%gep_s6 = getelementptr inbounds i8, ptr %ps, i64 6
%gep_s7 = getelementptr inbounds i8, ptr %ps, i64 7

store i8 %add0, ptr %gep_s0
store i8 %add1, ptr %gep_s1
store i8 %add2, ptr %gep_s2
store i8 %add3, ptr %gep_s3
store i8 %sub0, ptr %gep_s4
store i8 %sub1, ptr %gep_s5
store i8 %sub2, ptr %gep_s6
store i8 %sub3, ptr %gep_s7
ret void
}