Skip to content

Commit e1007fa

Browse files
author
Martien de Jong
committed
[AIE] Supply vector implementation for scalar FADD and FMUL
1 parent e89d756 commit e1007fa

File tree

14 files changed

+447
-159
lines changed

14 files changed

+447
-159
lines changed

llvm/lib/Target/AIE/AIE2LegalizerInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,9 @@ AIE2LegalizerInfo::AIE2LegalizerInfo(const AIE2Subtarget &ST) : AIEHelper(ST) {
205205

206206
getActionDefinitionsBuilder({G_FADD, G_FSUB})
207207
.legalFor({V16S32})
208-
.customFor({S16})
209-
.libcallFor({S32, S64});
208+
.clampScalar(0, S32, S64)
209+
.customFor({S32})
210+
.libcallFor({S64});
210211

211212
getActionDefinitionsBuilder({G_FMUL, G_FDIV, G_FREM})
212213
.clampScalar(0, S32, S64)

llvm/lib/Target/AIE/AIECombine.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,12 @@ def combine_narrow_zext_s20 : GICombineRule<
195195
[{ return matchNarrowZext(*${root}, MRI, Observer, ${matchinfo}); }]),
196196
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
197197

198+
def combine_widen_fmul : GICombineRule<
199+
(defs root:$root, build_fn_matchinfo:$matchinfo),
200+
(match (wip_match_opcode G_FMUL): $root,
201+
[{ return matchWidenFMul(*${root}, MRI, Observer, ${matchinfo}); }]),
202+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
203+
198204
def concat_unmerge_matchdata : GIDefMatchData<"AIEConcatUnmergeCombineMatchData">;
199205
def combine_concat_unmerge_phis : GICombineRule <
200206
(defs root:$root, concat_unmerge_matchdata:$matchinfo),
@@ -299,6 +305,7 @@ def aie2p_additional_combines : GICombineGroup<[
299305
combine_vector_shuffle_to_extract_insert_elt,
300306
combine_vector_shuffle_concat_extracted_subvectors,
301307
combine_paired_extracts,
308+
combine_widen_fmul,
302309
combine_vector_shuffle_to_extract_insert_elt_to_broadcast,
303310
combine_bitcast_unmerge_swap,
304311
combine_phi_bitcast_swap

llvm/lib/Target/AIE/AIECombinerHelper.cpp

Lines changed: 98 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@ cl::opt<bool> MemsetOptimizations(
6868

6969
namespace {
7070

71+
static constexpr const LLT S8 = LLT::scalar(8);
72+
static constexpr const LLT S16 = LLT::scalar(16);
73+
static constexpr const LLT S32 = LLT::scalar(32);
74+
75+
const llvm::AIEBaseInstrInfo &getAIETII(MachineIRBuilder &B) {
76+
return static_cast<const AIEBaseInstrInfo &>(B.getTII());
77+
}
78+
7179
bool isGenericExtractOpcode(unsigned Opc, const AIEBaseInstrInfo &TII) {
7280
// Check if it's either SEXT or ZEXT extract
7381
const unsigned ExtractSextOpc = TII.getGenericExtractVectorEltOpcode(true);
@@ -78,10 +86,22 @@ bool isGenericExtractOpcode(unsigned Opc, const AIEBaseInstrInfo &TII) {
7886
return Opc == ExtractZextOpc;
7987
}
8088

81-
} // namespace
89+
Register buildScalarAsVector(MachineIRBuilder &B, Register Src, LLT VecTy) {
90+
auto *MRI = B.getMRI();
91+
if (MRI->getType(Src) != S32) {
92+
Src = B.buildAnyExt(S32, Src).getReg(0);
93+
}
94+
const AIEBaseInstrInfo &TII = getAIETII(B);
95+
const Register IdxReg = B.buildConstant(S32, 0).getReg(0);
96+
const Register UndefVec = B.buildUndef(VecTy).getReg(0);
97+
const unsigned InsertEltOpc = TII.getGenericInsertVectorEltOpcode();
98+
Register Vector =
99+
B.buildInstr(InsertEltOpc, {VecTy}, {UndefVec, Src, IdxReg}).getReg(0);
100+
101+
return Vector;
102+
}
82103

83-
static unsigned getNumMaskUndefs(const ArrayRef<int> &Mask,
84-
unsigned StartIndex) {
104+
unsigned getNumMaskUndefs(const ArrayRef<int> &Mask, unsigned StartIndex) {
85105
unsigned Count = 0;
86106
for (unsigned I = StartIndex; I < Mask.size(); ++I) {
87107
if (Mask[I] == -1) {
@@ -91,6 +111,8 @@ static unsigned getNumMaskUndefs(const ArrayRef<int> &Mask,
91111
return Count;
92112
}
93113

114+
} // namespace
115+
94116
bool MaskMatch::isValidMask(const ArrayRef<int> Mask) const {
95117
for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
96118
if (Mask[Idx] == -1)
@@ -1069,8 +1091,6 @@ bool llvm::matchExtractVecEltAndExt(
10691091
assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
10701092
"Expected a extract_vector_elt");
10711093
Register DstReg = MI.getOperand(0).getReg();
1072-
const LLT S8 = LLT::scalar(8);
1073-
const LLT S16 = LLT::scalar(16);
10741094
LLT SrcVecTy = MRI.getType(MI.getOperand(1).getReg());
10751095
// Extracts from vectors <= 64-bits are lowered to bit-arithmetic in
10761096
// legalization
@@ -3534,6 +3554,79 @@ bool llvm::matchNarrowZext(MachineInstr &MI, MachineRegisterInfo &MRI,
35343554
return false;
35353555
}
35363556

3557+
namespace {
3558+
// We match widenings from 16 bit, with possible negations on top.
3559+
// Negations commute with conversions and multiplications. We keep track of the
3560+
// total number of negations modulo two.
3561+
class ExtendOperand {
3562+
public:
3563+
Register Source{};
3564+
bool Negate = false;
3565+
ExtendOperand operator-() { return {Source, !Negate}; }
3566+
operator bool() { return Source; }
3567+
};
3568+
3569+
ExtendOperand matchExtend(Register SrcReg, MachineRegisterInfo &MRI) {
3570+
const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
3571+
if (SrcMI->getOpcode() == TargetOpcode::G_FPEXT) {
3572+
const Register HalfOp = SrcMI->getOperand(1).getReg();
3573+
if (MRI.getType(HalfOp) != S16) {
3574+
return {};
3575+
}
3576+
return {HalfOp, false};
3577+
}
3578+
if (SrcMI->getOpcode() == TargetOpcode::G_FNEG) {
3579+
return -matchExtend(SrcMI->getOperand(1).getReg(), MRI);
3580+
}
3581+
return {};
3582+
}
3583+
} // namespace
3584+
3585+
bool llvm::matchWidenFMul(MachineInstr &FMul, MachineRegisterInfo &MRI,
3586+
GISelChangeObserver &Observer, BuildFnTy &MatchInfo) {
3587+
if (!FMul.getMF()->getTarget().getTargetTriple().isAIE2P()) {
3588+
return false;
3589+
}
3590+
3591+
ExtendOperand Lft = matchExtend(FMul.getOperand(1).getReg(), MRI);
3592+
if (!Lft) {
3593+
return false;
3594+
}
3595+
ExtendOperand Rgt = matchExtend(FMul.getOperand(2).getReg(), MRI);
3596+
if (!Rgt) {
3597+
return false;
3598+
}
3599+
3600+
const LLT VecTy = LLT::fixed_vector(32, S16);
3601+
const LLT AccTy = LLT::fixed_vector(32, S32);
3602+
const Register DstReg = FMul.getOperand(0).getReg();
3603+
bool Negate = Lft.Negate ^ Rgt.Negate;
3604+
3605+
// We build extract(mul(broadcast(Lft), broadcast(Rgt), 0);
3606+
MatchInfo = [=, &MRI](MachineIRBuilder &B) {
3607+
auto &TII = getAIETII(B);
3608+
Register LSrc = Lft.Source;
3609+
if (Negate) {
3610+
LSrc = MRI.createGenericVirtualRegister(S16);
3611+
B.buildInstr(TargetOpcode::G_FNEG, {LSrc}, {Lft.Source});
3612+
}
3613+
const Register VLhs = buildScalarAsVector(B, LSrc, VecTy);
3614+
const Register VRhs = buildScalarAsVector(B, Rgt.Source, VecTy);
3615+
const Register Acc = MRI.createGenericVirtualRegister(AccTy);
3616+
const Register Mode = B.buildConstant(S32, 60).getReg(0);
3617+
B.buildIntrinsic(Intrinsic::aie2p_I512_I512_ACC1024_bf_mul_conf, Acc, true,
3618+
false)
3619+
.addUse(VLhs)
3620+
.addUse(VRhs)
3621+
.addUse(Mode);
3622+
const Register Index = B.buildConstant(S32, 0).getReg(0);
3623+
B.buildInstr(TII.getGenericExtractVectorEltOpcode(/*SignExt*/ true),
3624+
{DstReg}, {Acc, Index});
3625+
};
3626+
3627+
return true;
3628+
}
3629+
35373630
// Fold G_TRUNC (G_[ANY|S|Z]EXT x) -> X or (G_[ANY|S|Z]EXT x) or (G_TRUNC x).
35383631
bool llvm::matchCombineExtAndTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
35393632
BuildFnTy &MatchInfo) {

llvm/lib/Target/AIE/AIECombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
namespace llvm {
1919

2020
struct AIEBaseInstrInfo;
21+
class MachineIRBuilder;
2122

2223
struct ShuffleMaskValidity {
2324
bool IsValid;
@@ -272,6 +273,9 @@ bool matchNarrowTruncConstant(MachineInstr &MI, MachineRegisterInfo &MRI,
272273
bool matchNarrowZext(MachineInstr &MI, MachineRegisterInfo &MRI,
273274
GISelChangeObserver &Observer, BuildFnTy &MatchInfo);
274275

276+
bool matchWidenFMul(MachineInstr &MI, MachineRegisterInfo &MRI,
277+
GISelChangeObserver &Observer, BuildFnTy &MatchInfo);
278+
275279
bool matchCombineExtAndTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
276280
BuildFnTy &MatchInfo);
277281

llvm/lib/Target/AIE/AIELegalizerHelper.cpp

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/CodeGen/MachineInstrBuilder.h"
2424
#include "llvm/CodeGen/MachineRegisterInfo.h"
2525
#include "llvm/CodeGen/TargetOpcodes.h"
26+
#include "llvm/CodeGenTypes/MachineValueType.h"
2627
#include "llvm/IR/IntrinsicsAIE2.h"
2728
#include "llvm/IR/IntrinsicsAIE2P.h"
2829
#include "llvm/IR/RuntimeLibcalls.h"
@@ -1405,7 +1406,7 @@ bool AIELegalizerHelper::legalizeG_FMUL(LegalizerHelper &Helper,
14051406
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
14061407

14071408
const Register DstReg = MI.getOperand(0).getReg();
1408-
assert(MRI.getType(DstReg) == LLT::scalar(16) &&
1409+
assert(MRI.getType(DstReg) == S16 &&
14091410
"Expected bfloat16 type in custom legalization.");
14101411

14111412
Register SrcLHS = MI.getOperand(1).getReg();
@@ -1441,19 +1442,19 @@ bool AIELegalizerHelper::legalizeG_FMUL(LegalizerHelper &Helper,
14411442

14421443
bool AIELegalizerHelper::legalizeG_FADD_G_FSUB(LegalizerHelper &Helper,
14431444
MachineInstr &MI) const {
1445+
14441446
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
14451447
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1446-
14471448
const Register DstReg = MI.getOperand(0).getReg();
1449+
14481450
Register SrcLHS = MI.getOperand(1).getReg();
14491451
Register SrcRHS = MI.getOperand(2).getReg();
14501452

1451-
assert(MRI.getType(DstReg) == LLT::scalar(16) &&
1452-
"Expected bfloat16 type in custom legalization.");
1453+
assert(MRI.getType(DstReg) == S32 &&
1454+
"Expected float32 type in custom legalization.");
14531455

14541456
const LLT InsertVecLLT = V16FP32;
1455-
SrcLHS = MIRBuilder.buildFPExt(S32, SrcLHS).getReg(0);
1456-
SrcRHS = MIRBuilder.buildFPExt(S32, SrcRHS).getReg(0);
1457+
14571458
const Register IdxReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
14581459
const Register UndefVec = MIRBuilder.buildUndef(InsertVecLLT).getReg(0);
14591460

@@ -1479,29 +1480,16 @@ bool AIELegalizerHelper::legalizeG_FADD_G_FSUB(LegalizerHelper &Helper,
14791480
.getReg(0);
14801481

14811482
if (ST.isAIE2()) {
1482-
Res = MIRBuilder.buildBitcast(V8ACC64, Res).getReg(0);
1483+
if (MRI.getType(Res) != V16S32) {
1484+
Res = MIRBuilder.buildBitcast(V16S32, Res).getReg(0);
1485+
}
14831486
} else if (ST.isAIE2P()) {
1484-
Res = MIRBuilder.buildUnmerge(V32ACC32, Res).getReg(0);
1485-
}
1486-
1487-
const int VecSize = MRI.getType(Res).getSizeInBits();
1488-
const LLT DstLLT = ST.isAIE2P() ? V32BF16 : V16BF16;
1489-
Res = MIRBuilder
1490-
.buildIntrinsic(getFpTrunc32ToBF16IntrID(ST, VecSize), {DstLLT},
1491-
true, false)
1492-
.addUse(Res)
1493-
.getReg(0);
1494-
1495-
if (ST.isAIE2()) {
1496-
Res = emitPadUndefVector(MRI, MIRBuilder, V32BF16, Res);
1487+
Res = MIRBuilder.buildUnmerge(V16S32, Res).getReg(0);
14971488
}
14981489

14991490
const unsigned ExtractEltOpc =
15001491
ST.getInstrInfo()->getGenericExtractVectorEltOpcode(/*SignExt*/ true);
1501-
Res = MIRBuilder.buildInstr(ExtractEltOpc, {S32}, {Res, IdxReg}).getReg(0);
1502-
Res = MIRBuilder.buildAssertInstr(TargetOpcode::G_ASSERT_SEXT, {S32}, Res, 16)
1503-
.getReg(0);
1504-
MIRBuilder.buildTrunc(DstReg, Res);
1492+
MIRBuilder.buildInstr(ExtractEltOpc, {DstReg}, {Res, IdxReg});
15051493

15061494
MI.eraseFromParent();
15071495
return true;

llvm/lib/Target/AIE/AIELegalizerHelper.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class GICmp;
2727

2828
class AIELegalizerHelper {
2929
const AIEBaseSubtarget &ST;
30-
30+
const LLT S16 = LLT::scalar(16);
3131
const LLT S32 = LLT::scalar(32);
3232
const LLT V2S16 = LLT::fixed_vector(2, 16);
3333
const LLT V2S32 = LLT::fixed_vector(2, 32);

llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,9 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
246246

247247
getActionDefinitionsBuilder({G_FADD, G_FSUB})
248248
.legalFor({AccV64S32})
249-
.customFor({S16})
250-
.libcallFor({S32, S64});
249+
.clampScalar(0, S32, S64)
250+
.customFor({S32})
251+
.libcallFor({S64});
251252

252253
getActionDefinitionsBuilder({G_FDIV, G_FREM})
253254
.clampScalar(0, S32, S64)

llvm/test/CodeGen/AIE/GlobalISel/legalize-float-binop.mir

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -296,27 +296,37 @@ body: |
296296
; AIE2-NEXT: {{ $}}
297297
; AIE2-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r6
298298
; AIE2-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r7
299-
; AIE2-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
300-
; AIE2-NEXT: $r1 = COPY [[COPY]](s32)
301-
; AIE2-NEXT: $r2 = COPY [[COPY1]](s32)
302-
; AIE2-NEXT: PseudoJL &__addsf3, csr_aie2, implicit-def $lr, implicit $r1, implicit $r2, implicit-def $r0
303-
; AIE2-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $r0
304-
; AIE2-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
305-
; AIE2-NEXT: $r0 = COPY [[COPY2]](s32)
299+
; AIE2-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
300+
; AIE2-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
301+
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
302+
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY1]](s32), [[C]](s32)
303+
; AIE2-NEXT: [[FADD:%[0-9]+]]:_(<16 x s32>) = G_FADD [[AIE_INSERT_VECTOR_ELT]], [[AIE_INSERT_VECTOR_ELT1]]
304+
; AIE2-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[FADD]](<16 x s32>), [[C]](s32)
305+
; AIE2-NEXT: $r0 = COPY [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
306306
; AIE2-NEXT: PseudoRET implicit $lr, implicit $r0
307307
;
308308
; AIE2P-LABEL: name: fadd_float
309309
; AIE2P: liveins: $r6, $r7
310310
; AIE2P-NEXT: {{ $}}
311311
; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r6
312312
; AIE2P-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r7
313-
; AIE2P-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
314-
; AIE2P-NEXT: $r1 = COPY [[COPY]](s32)
315-
; AIE2P-NEXT: $r2 = COPY [[COPY1]](s32)
316-
; AIE2P-NEXT: PseudoJL &__addsf3, csr_aie2p, implicit-def $lr, implicit $r1, implicit $r2, implicit-def $r0
317-
; AIE2P-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $r0
318-
; AIE2P-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
319-
; AIE2P-NEXT: $r0 = COPY [[COPY2]](s32)
313+
; AIE2P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
314+
; AIE2P-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
315+
; AIE2P-NEXT: [[AIE_INSERT_VECTOR_ELT:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
316+
; AIE2P-NEXT: [[AIE_INSERT_VECTOR_ELT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY1]](s32), [[C]](s32)
317+
; AIE2P-NEXT: [[COPY2:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
318+
; AIE2P-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_INSERT_VECTOR_ELT]](<16 x s32>), [[COPY2]](<16 x s32>)
319+
; AIE2P-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[COPY2]](<16 x s32>), [[COPY2]](<16 x s32>)
320+
; AIE2P-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[CONCAT_VECTORS]](<32 x s32>), [[CONCAT_VECTORS1]](<32 x s32>)
321+
; AIE2P-NEXT: [[COPY3:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
322+
; AIE2P-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_INSERT_VECTOR_ELT1]](<16 x s32>), [[COPY3]](<16 x s32>)
323+
; AIE2P-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[COPY3]](<16 x s32>), [[COPY3]](<16 x s32>)
324+
; AIE2P-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[CONCAT_VECTORS3]](<32 x s32>), [[CONCAT_VECTORS4]](<32 x s32>)
325+
; AIE2P-NEXT: [[FADD:%[0-9]+]]:_(<64 x s32>) = G_FADD [[CONCAT_VECTORS2]], [[CONCAT_VECTORS5]]
326+
; AIE2P-NEXT: [[UV:%[0-9]+]]:_(<32 x s32>), [[UV1:%[0-9]+]]:_(<32 x s32>) = G_UNMERGE_VALUES [[FADD]](<64 x s32>)
327+
; AIE2P-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[UV]](<32 x s32>)
328+
; AIE2P-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV2]](<16 x s32>), [[C]](s32)
329+
; AIE2P-NEXT: $r0 = COPY [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
320330
; AIE2P-NEXT: PseudoRET implicit $lr, implicit $r0
321331
%1:_(s32) = COPY $r6
322332
%2:_(s32) = COPY $r7
@@ -427,27 +437,37 @@ body: |
427437
; AIE2-NEXT: {{ $}}
428438
; AIE2-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r6
429439
; AIE2-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r7
430-
; AIE2-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
431-
; AIE2-NEXT: $r1 = COPY [[COPY]](s32)
432-
; AIE2-NEXT: $r2 = COPY [[COPY1]](s32)
433-
; AIE2-NEXT: PseudoJL &__subsf3, csr_aie2, implicit-def $lr, implicit $r1, implicit $r2, implicit-def $r0
434-
; AIE2-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $r0
435-
; AIE2-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
436-
; AIE2-NEXT: $r0 = COPY [[COPY2]](s32)
440+
; AIE2-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
441+
; AIE2-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
442+
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
443+
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY1]](s32), [[C]](s32)
444+
; AIE2-NEXT: [[FSUB:%[0-9]+]]:_(<16 x s32>) = G_FSUB [[AIE_INSERT_VECTOR_ELT]], [[AIE_INSERT_VECTOR_ELT1]]
445+
; AIE2-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[FSUB]](<16 x s32>), [[C]](s32)
446+
; AIE2-NEXT: $r0 = COPY [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
437447
; AIE2-NEXT: PseudoRET implicit $lr, implicit $r0
438448
;
439449
; AIE2P-LABEL: name: fsub_float
440450
; AIE2P: liveins: $r6, $r7
441451
; AIE2P-NEXT: {{ $}}
442452
; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r6
443453
; AIE2P-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r7
444-
; AIE2P-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
445-
; AIE2P-NEXT: $r1 = COPY [[COPY]](s32)
446-
; AIE2P-NEXT: $r2 = COPY [[COPY1]](s32)
447-
; AIE2P-NEXT: PseudoJL &__subsf3, csr_aie2p, implicit-def $lr, implicit $r1, implicit $r2, implicit-def $r0
448-
; AIE2P-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $r0
449-
; AIE2P-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
450-
; AIE2P-NEXT: $r0 = COPY [[COPY2]](s32)
454+
; AIE2P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
455+
; AIE2P-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
456+
; AIE2P-NEXT: [[AIE_INSERT_VECTOR_ELT:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
457+
; AIE2P-NEXT: [[AIE_INSERT_VECTOR_ELT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY1]](s32), [[C]](s32)
458+
; AIE2P-NEXT: [[COPY2:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
459+
; AIE2P-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_INSERT_VECTOR_ELT]](<16 x s32>), [[COPY2]](<16 x s32>)
460+
; AIE2P-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[COPY2]](<16 x s32>), [[COPY2]](<16 x s32>)
461+
; AIE2P-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[CONCAT_VECTORS]](<32 x s32>), [[CONCAT_VECTORS1]](<32 x s32>)
462+
; AIE2P-NEXT: [[COPY3:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
463+
; AIE2P-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_INSERT_VECTOR_ELT1]](<16 x s32>), [[COPY3]](<16 x s32>)
464+
; AIE2P-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[COPY3]](<16 x s32>), [[COPY3]](<16 x s32>)
465+
; AIE2P-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[CONCAT_VECTORS3]](<32 x s32>), [[CONCAT_VECTORS4]](<32 x s32>)
466+
; AIE2P-NEXT: [[FSUB:%[0-9]+]]:_(<64 x s32>) = G_FSUB [[CONCAT_VECTORS2]], [[CONCAT_VECTORS5]]
467+
; AIE2P-NEXT: [[UV:%[0-9]+]]:_(<32 x s32>), [[UV1:%[0-9]+]]:_(<32 x s32>) = G_UNMERGE_VALUES [[FSUB]](<64 x s32>)
468+
; AIE2P-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[UV]](<32 x s32>)
469+
; AIE2P-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV2]](<16 x s32>), [[C]](s32)
470+
; AIE2P-NEXT: $r0 = COPY [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
451471
; AIE2P-NEXT: PseudoRET implicit $lr, implicit $r0
452472
%1:_(s32) = COPY $r6
453473
%2:_(s32) = COPY $r7

0 commit comments

Comments
 (0)