Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 68 additions & 13 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4513,41 +4513,96 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,

const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;

// General case: splat the first operand and slide other operands down one
// by one to form a vector. Alternatively, if the last operand is an
// extraction from element 0 of a vector, we can use that vector as the start
// value and slide up instead of slide down. Such that we can avoid the splat.
SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
SDValue EVec;
bool SlideUp = false;
auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
if (SlideUp)
return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
Mask, VL, Policy);
return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
Mask, VL, Policy);
};

// Find the first first non-undef from the tail.
auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(),
[](SDValue V) { return !V.isUndef(); });
if (ItLastNonUndef != Operands.rend()) {
using namespace SDPatternMatch;
// Check if the last non-undef operand was an extraction.
SlideUp = sd_match(*ItLastNonUndef, m_ExtractElt(m_Value(EVec), m_Zero()));
}

if (SlideUp) {
MVT EVecContainerVT = EVec.getSimpleValueType();
// Make sure the original vector has scalable vector type.
if (EVecContainerVT.isFixedLengthVector()) {
EVecContainerVT =
getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
}

// Adapt EVec's type into ContainerVT.
if (EVecContainerVT.getVectorMinNumElements() <
ContainerVT.getVectorMinNumElements())
EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
else
EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);

// Reverse the elements as we're going to slide up from the last element.
std::reverse(Operands.begin(), Operands.end());
}

SDValue Vec;
UndefCount = 0;
for (SDValue V : Op->ops()) {
for (SDValue V : Operands) {
if (V.isUndef()) {
UndefCount++;
continue;
}

// Start our sequence with a TA splat in the hopes that hardware is able to
// recognize there's no dependency on the prior value of our temporary
// register.
// Start our sequence with either a TA splat or extract source in the
// hopes that hardware is able to recognize there's no dependency on the
// prior value of our temporary register.
if (!Vec) {
Vec = DAG.getSplatVector(VT, DL, V);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
if (SlideUp) {
Vec = EVec;
} else {
Vec = DAG.getSplatVector(VT, DL, V);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}

UndefCount = 0;
continue;
}

if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
Vec, Offset, Mask, VL, Policy);
Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
VL);
UndefCount = 0;
}
auto OpCode =
VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;

unsigned Opcode;
if (VT.isFloatingPoint())
Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
else
Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;

if (!VT.isFloatingPoint())
V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
V, Mask, VL);
}
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
Vec, Offset, Mask, VL, Policy);
Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
VL);
}
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
Expand Down
171 changes: 171 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1828,3 +1828,174 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
%v7 = insertelement <8 x double> %v6, double %e7, i64 7
ret <8 x double> %v7
}

define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6) vscale_range(4, 128) {
; CHECK-LABEL: buildvec_slideup:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma
; CHECK-NEXT: vfslide1up.vf v10, v8, fa6
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
; CHECK-NEXT: vfslide1up.vf v10, v8, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa3
; CHECK-NEXT: vfslide1up.vf v10, v8, fa2
; CHECK-NEXT: vfslide1up.vf v12, v10, fa1
; CHECK-NEXT: vfslide1up.vf v8, v12, fa0
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
%v1 = insertelement <8 x double> %v0, double %e1, i64 1
%v2 = insertelement <8 x double> %v1, double %e2, i64 2
%v3 = insertelement <8 x double> %v2, double %e3, i64 3
%v4 = insertelement <8 x double> %v3, double %e4, i64 4
%v5 = insertelement <8 x double> %v4, double %e5, i64 5
%v6 = insertelement <8 x double> %v5, double %e6, i64 6
%e7 = extractelement <4 x double> %v, i64 0
%v7 = insertelement <8 x double> %v6, double %e7, i64 7
ret <8 x double> %v7
}

define <8 x double> @buildvec_slideup_leading_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) {
; CHECK-LABEL: buildvec_slideup_leading_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma
; CHECK-NEXT: vfslide1up.vf v10, v8, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa3
; CHECK-NEXT: vfslide1up.vf v10, v8, fa2
; CHECK-NEXT: vfslide1up.vf v8, v10, fa1
; CHECK-NEXT: vfslide1up.vf v10, v8, fa0
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: ret
%v2 = insertelement <8 x double> poison, double %e0, i64 2
%v3 = insertelement <8 x double> %v2, double %e1, i64 3
%v4 = insertelement <8 x double> %v3, double %e2, i64 4
%v5 = insertelement <8 x double> %v4, double %e3, i64 5
%v6 = insertelement <8 x double> %v5, double %e4, i64 6
%e5 = extractelement <4 x double> %v, i64 0
%v7 = insertelement <8 x double> %v6, double %e5, i64 7
ret <8 x double> %v7
}

define <8 x double> @buildvec_slideup_mid_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) {
; CHECK-LABEL: buildvec_slideup_mid_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma
; CHECK-NEXT: vfslide1up.vf v10, v8, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa3
; CHECK-NEXT: vfslide1up.vf v10, v8, fa2
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vfslide1up.vf v10, v8, fa1
; CHECK-NEXT: vfslide1up.vf v8, v10, fa0
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
%v1 = insertelement <8 x double> %v0, double %e1, i64 1
%v4 = insertelement <8 x double> %v1, double %e2, i64 4
%v5 = insertelement <8 x double> %v4, double %e3, i64 5
%v6 = insertelement <8 x double> %v5, double %e4, i64 6
%e5 = extractelement <4 x double> %v, i64 0
%v7 = insertelement <8 x double> %v6, double %e5, i64 7
ret <8 x double> %v7
}

define <8 x double> @buildvec_slideup_trailing_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) {
; CHECK-LABEL: buildvec_slideup_trailing_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma
; CHECK-NEXT: vfslide1up.vf v10, v8, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa3
; CHECK-NEXT: vfslide1up.vf v10, v8, fa2
; CHECK-NEXT: vfslide1up.vf v12, v10, fa1
; CHECK-NEXT: vfslide1up.vf v8, v12, fa0
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
%v1 = insertelement <8 x double> %v0, double %e1, i64 1
%v2 = insertelement <8 x double> %v1, double %e2, i64 2
%v3 = insertelement <8 x double> %v2, double %e3, i64 3
%v4 = insertelement <8 x double> %v3, double %e4, i64 4
%e5 = extractelement <4 x double> %v, i64 0
%v5 = insertelement <8 x double> %v4, double %e5, i64 5
%v6 = insertelement <8 x double> %v5, double poison, i64 6
%v7 = insertelement <8 x double> %v6, double poison, i64 7
ret <8 x double> %v7
}

; Negative test for slideup lowering where the extract_element was not build_vector's last operand.
define <8 x double> @buildvec_slideup_not_last_element(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e7) vscale_range(4, 128) {
; CHECK-LABEL: buildvec_slideup_not_last_element:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma
; CHECK-NEXT: vfmv.f.s ft0, v8
; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, ft0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
%v1 = insertelement <8 x double> %v0, double %e1, i64 1
%v2 = insertelement <8 x double> %v1, double %e2, i64 2
%v3 = insertelement <8 x double> %v2, double %e3, i64 3
%v4 = insertelement <8 x double> %v3, double %e4, i64 4
%v5 = insertelement <8 x double> %v4, double %e5, i64 5
%e6 = extractelement <4 x double> %v, i64 0
%v6 = insertelement <8 x double> %v5, double %e6, i64 6
%v7 = insertelement <8 x double> %v6, double %e7, i64 7
ret <8 x double> %v7
}

define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
; CHECK-LABEL: buildvec_vfredusum:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v16
; CHECK-NEXT: vfredusum.vs v9, v10, v16
; CHECK-NEXT: vfredusum.vs v10, v12, v16
; CHECK-NEXT: vfmv.f.s fa5, v8
; CHECK-NEXT: vfmv.f.s fa4, v9
; CHECK-NEXT: vfmv.f.s fa3, v10
; CHECK-NEXT: vfredusum.vs v8, v14, v16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
; CHECK-NEXT: ret
%247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%248 = insertelement <4 x float> poison, float %247, i64 0
%250 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2)
%251 = insertelement <4 x float> %248, float %250, i64 1
%252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
%253 = insertelement <4 x float> %251, float %252, i64 2
%254 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
%255 = insertelement <4 x float> %253, float %254, i64 3
ret <4 x float> %255
}

define <4 x float> @buildvec_vfredosum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
; CHECK-LABEL: buildvec_vfredosum:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v16, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v16
; CHECK-NEXT: vfredosum.vs v9, v10, v16
; CHECK-NEXT: vfredosum.vs v10, v12, v16
; CHECK-NEXT: vfmv.f.s fa5, v8
; CHECK-NEXT: vfmv.f.s fa4, v9
; CHECK-NEXT: vfmv.f.s fa3, v10
; CHECK-NEXT: vfredosum.vs v8, v14, v16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
; CHECK-NEXT: ret
%247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1)
%248 = insertelement <4 x float> poison, float %247, i64 0
%250 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2)
%251 = insertelement <4 x float> %248, float %250, i64 1
%252 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3)
%253 = insertelement <4 x float> %251, float %252, i64 2
%254 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4)
%255 = insertelement <4 x float> %253, float %254, i64 3
ret <4 x float> %255
}
Loading
Loading