Skip to content

Commit 784e0fa

Browse files
author
Martien de Jong
committed
[AIE][Legalizer] Legalize Scalar S32 FADD/FSUB. Clamp S16 FADD/FSUB to S32
We legalize a scalar to one element in a vector. We lose some IEEE compliance, but we get more equivalance between vectors and scalars in return.
1 parent 8c7eff7 commit 784e0fa

File tree

10 files changed

+188
-153
lines changed

10 files changed

+188
-153
lines changed

llvm/lib/Target/AIE/AIE2LegalizerInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,9 @@ AIE2LegalizerInfo::AIE2LegalizerInfo(const AIE2Subtarget &ST) : AIEHelper(ST) {
205205

206206
getActionDefinitionsBuilder({G_FADD, G_FSUB})
207207
.legalFor({V16S32})
208-
.customFor({S16})
209-
.libcallFor({S32, S64});
208+
.clampScalar(0, S32, S64)
209+
.customFor({S32})
210+
.libcallFor({S64});
210211

211212
getActionDefinitionsBuilder({G_FMUL, G_FDIV, G_FREM})
212213
.clampScalar(0, S32, S64)

llvm/lib/Target/AIE/AIELegalizerHelper.cpp

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,19 +1441,19 @@ bool AIELegalizerHelper::legalizeG_FMUL(LegalizerHelper &Helper,
14411441

14421442
bool AIELegalizerHelper::legalizeG_FADD_G_FSUB(LegalizerHelper &Helper,
14431443
MachineInstr &MI) const {
1444+
14441445
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
14451446
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1446-
14471447
const Register DstReg = MI.getOperand(0).getReg();
1448+
14481449
Register SrcLHS = MI.getOperand(1).getReg();
14491450
Register SrcRHS = MI.getOperand(2).getReg();
14501451

1451-
assert(MRI.getType(DstReg) == LLT::scalar(16) &&
1452-
"Expected bfloat16 type in custom legalization.");
1452+
assert(MRI.getType(DstReg) == S32 &&
1453+
"Expected float32 type in custom legalization.");
14531454

14541455
const LLT InsertVecLLT = V16FP32;
1455-
SrcLHS = MIRBuilder.buildFPExt(S32, SrcLHS).getReg(0);
1456-
SrcRHS = MIRBuilder.buildFPExt(S32, SrcRHS).getReg(0);
1456+
14571457
const Register IdxReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
14581458
const Register UndefVec = MIRBuilder.buildUndef(InsertVecLLT).getReg(0);
14591459

@@ -1479,29 +1479,16 @@ bool AIELegalizerHelper::legalizeG_FADD_G_FSUB(LegalizerHelper &Helper,
14791479
.getReg(0);
14801480

14811481
if (ST.isAIE2()) {
1482-
Res = MIRBuilder.buildBitcast(V8ACC64, Res).getReg(0);
1482+
if (MRI.getType(Res) != V16S32) {
1483+
Res = MIRBuilder.buildBitcast(V16S32, Res).getReg(0);
1484+
}
14831485
} else if (ST.isAIE2P()) {
1484-
Res = MIRBuilder.buildUnmerge(V32ACC32, Res).getReg(0);
1485-
}
1486-
1487-
const int VecSize = MRI.getType(Res).getSizeInBits();
1488-
const LLT DstLLT = ST.isAIE2P() ? V32BF16 : V16BF16;
1489-
Res = MIRBuilder
1490-
.buildIntrinsic(getFpTrunc32ToBF16IntrID(ST, VecSize), {DstLLT},
1491-
true, false)
1492-
.addUse(Res)
1493-
.getReg(0);
1494-
1495-
if (ST.isAIE2()) {
1496-
Res = emitPadUndefVector(MRI, MIRBuilder, V32BF16, Res);
1486+
Res = MIRBuilder.buildUnmerge(V16S32, Res).getReg(0);
14971487
}
14981488

14991489
const unsigned ExtractEltOpc =
15001490
ST.getInstrInfo()->getGenericExtractVectorEltOpcode(/*SignExt*/ true);
1501-
Res = MIRBuilder.buildInstr(ExtractEltOpc, {S32}, {Res, IdxReg}).getReg(0);
1502-
Res = MIRBuilder.buildAssertInstr(TargetOpcode::G_ASSERT_SEXT, {S32}, Res, 16)
1503-
.getReg(0);
1504-
MIRBuilder.buildTrunc(DstReg, Res);
1491+
MIRBuilder.buildInstr(ExtractEltOpc, {DstReg}, {Res, IdxReg});
15051492

15061493
MI.eraseFromParent();
15071494
return true;

llvm/lib/Target/AIE/AIELegalizerHelper.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class GICmp;
2727

2828
class AIELegalizerHelper {
2929
const AIEBaseSubtarget &ST;
30-
30+
const LLT S16 = LLT::scalar(16);
3131
const LLT S32 = LLT::scalar(32);
3232
const LLT V2S16 = LLT::fixed_vector(2, 16);
3333
const LLT V2S32 = LLT::fixed_vector(2, 32);

llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,9 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
246246

247247
getActionDefinitionsBuilder({G_FADD, G_FSUB})
248248
.legalFor({AccV64S32})
249-
.customFor({S16})
250-
.libcallFor({S32, S64});
249+
.clampScalar(0, S32, S64)
250+
.customFor({S32})
251+
.libcallFor({S64});
251252

252253
getActionDefinitionsBuilder({G_FDIV, G_FREM})
253254
.clampScalar(0, S32, S64)

llvm/test/CodeGen/AIE/GlobalISel/legalize-float-binop.mir

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -296,27 +296,37 @@ body: |
296296
; AIE2-NEXT: {{ $}}
297297
; AIE2-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r6
298298
; AIE2-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r7
299-
; AIE2-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
300-
; AIE2-NEXT: $r1 = COPY [[COPY]](s32)
301-
; AIE2-NEXT: $r2 = COPY [[COPY1]](s32)
302-
; AIE2-NEXT: PseudoJL &__addsf3, csr_aie2, implicit-def $lr, implicit $r1, implicit $r2, implicit-def $r0
303-
; AIE2-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $r0
304-
; AIE2-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
305-
; AIE2-NEXT: $r0 = COPY [[COPY2]](s32)
299+
; AIE2-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
300+
; AIE2-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
301+
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
302+
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY1]](s32), [[C]](s32)
303+
; AIE2-NEXT: [[FADD:%[0-9]+]]:_(<16 x s32>) = G_FADD [[AIE_INSERT_VECTOR_ELT]], [[AIE_INSERT_VECTOR_ELT1]]
304+
; AIE2-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[FADD]](<16 x s32>), [[C]](s32)
305+
; AIE2-NEXT: $r0 = COPY [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
306306
; AIE2-NEXT: PseudoRET implicit $lr, implicit $r0
307307
;
308308
; AIE2P-LABEL: name: fadd_float
309309
; AIE2P: liveins: $r6, $r7
310310
; AIE2P-NEXT: {{ $}}
311311
; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r6
312312
; AIE2P-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r7
313-
; AIE2P-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
314-
; AIE2P-NEXT: $r1 = COPY [[COPY]](s32)
315-
; AIE2P-NEXT: $r2 = COPY [[COPY1]](s32)
316-
; AIE2P-NEXT: PseudoJL &__addsf3, csr_aie2p, implicit-def $lr, implicit $r1, implicit $r2, implicit-def $r0
317-
; AIE2P-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $r0
318-
; AIE2P-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
319-
; AIE2P-NEXT: $r0 = COPY [[COPY2]](s32)
313+
; AIE2P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
314+
; AIE2P-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
315+
; AIE2P-NEXT: [[AIE_INSERT_VECTOR_ELT:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
316+
; AIE2P-NEXT: [[AIE_INSERT_VECTOR_ELT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY1]](s32), [[C]](s32)
317+
; AIE2P-NEXT: [[COPY2:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
318+
; AIE2P-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_INSERT_VECTOR_ELT]](<16 x s32>), [[COPY2]](<16 x s32>)
319+
; AIE2P-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[COPY2]](<16 x s32>), [[COPY2]](<16 x s32>)
320+
; AIE2P-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[CONCAT_VECTORS]](<32 x s32>), [[CONCAT_VECTORS1]](<32 x s32>)
321+
; AIE2P-NEXT: [[COPY3:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
322+
; AIE2P-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_INSERT_VECTOR_ELT1]](<16 x s32>), [[COPY3]](<16 x s32>)
323+
; AIE2P-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[COPY3]](<16 x s32>), [[COPY3]](<16 x s32>)
324+
; AIE2P-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[CONCAT_VECTORS3]](<32 x s32>), [[CONCAT_VECTORS4]](<32 x s32>)
325+
; AIE2P-NEXT: [[FADD:%[0-9]+]]:_(<64 x s32>) = G_FADD [[CONCAT_VECTORS2]], [[CONCAT_VECTORS5]]
326+
; AIE2P-NEXT: [[UV:%[0-9]+]]:_(<32 x s32>), [[UV1:%[0-9]+]]:_(<32 x s32>) = G_UNMERGE_VALUES [[FADD]](<64 x s32>)
327+
; AIE2P-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[UV]](<32 x s32>)
328+
; AIE2P-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV2]](<16 x s32>), [[C]](s32)
329+
; AIE2P-NEXT: $r0 = COPY [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
320330
; AIE2P-NEXT: PseudoRET implicit $lr, implicit $r0
321331
%1:_(s32) = COPY $r6
322332
%2:_(s32) = COPY $r7
@@ -427,27 +437,37 @@ body: |
427437
; AIE2-NEXT: {{ $}}
428438
; AIE2-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r6
429439
; AIE2-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r7
430-
; AIE2-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
431-
; AIE2-NEXT: $r1 = COPY [[COPY]](s32)
432-
; AIE2-NEXT: $r2 = COPY [[COPY1]](s32)
433-
; AIE2-NEXT: PseudoJL &__subsf3, csr_aie2, implicit-def $lr, implicit $r1, implicit $r2, implicit-def $r0
434-
; AIE2-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $r0
435-
; AIE2-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
436-
; AIE2-NEXT: $r0 = COPY [[COPY2]](s32)
440+
; AIE2-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
441+
; AIE2-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
442+
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
443+
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY1]](s32), [[C]](s32)
444+
; AIE2-NEXT: [[FSUB:%[0-9]+]]:_(<16 x s32>) = G_FSUB [[AIE_INSERT_VECTOR_ELT]], [[AIE_INSERT_VECTOR_ELT1]]
445+
; AIE2-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[FSUB]](<16 x s32>), [[C]](s32)
446+
; AIE2-NEXT: $r0 = COPY [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
437447
; AIE2-NEXT: PseudoRET implicit $lr, implicit $r0
438448
;
439449
; AIE2P-LABEL: name: fsub_float
440450
; AIE2P: liveins: $r6, $r7
441451
; AIE2P-NEXT: {{ $}}
442452
; AIE2P-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r6
443453
; AIE2P-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r7
444-
; AIE2P-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
445-
; AIE2P-NEXT: $r1 = COPY [[COPY]](s32)
446-
; AIE2P-NEXT: $r2 = COPY [[COPY1]](s32)
447-
; AIE2P-NEXT: PseudoJL &__subsf3, csr_aie2p, implicit-def $lr, implicit $r1, implicit $r2, implicit-def $r0
448-
; AIE2P-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $r0
449-
; AIE2P-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
450-
; AIE2P-NEXT: $r0 = COPY [[COPY2]](s32)
454+
; AIE2P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
455+
; AIE2P-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
456+
; AIE2P-NEXT: [[AIE_INSERT_VECTOR_ELT:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
457+
; AIE2P-NEXT: [[AIE_INSERT_VECTOR_ELT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[COPY1]](s32), [[C]](s32)
458+
; AIE2P-NEXT: [[COPY2:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
459+
; AIE2P-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_INSERT_VECTOR_ELT]](<16 x s32>), [[COPY2]](<16 x s32>)
460+
; AIE2P-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[COPY2]](<16 x s32>), [[COPY2]](<16 x s32>)
461+
; AIE2P-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[CONCAT_VECTORS]](<32 x s32>), [[CONCAT_VECTORS1]](<32 x s32>)
462+
; AIE2P-NEXT: [[COPY3:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
463+
; AIE2P-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_INSERT_VECTOR_ELT1]](<16 x s32>), [[COPY3]](<16 x s32>)
464+
; AIE2P-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[COPY3]](<16 x s32>), [[COPY3]](<16 x s32>)
465+
; AIE2P-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[CONCAT_VECTORS3]](<32 x s32>), [[CONCAT_VECTORS4]](<32 x s32>)
466+
; AIE2P-NEXT: [[FSUB:%[0-9]+]]:_(<64 x s32>) = G_FSUB [[CONCAT_VECTORS2]], [[CONCAT_VECTORS5]]
467+
; AIE2P-NEXT: [[UV:%[0-9]+]]:_(<32 x s32>), [[UV1:%[0-9]+]]:_(<32 x s32>) = G_UNMERGE_VALUES [[FSUB]](<64 x s32>)
468+
; AIE2P-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[UV]](<32 x s32>)
469+
; AIE2P-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV2]](<16 x s32>), [[C]](s32)
470+
; AIE2P-NEXT: $r0 = COPY [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
451471
; AIE2P-NEXT: PseudoRET implicit $lr, implicit $r0
452472
%1:_(s32) = COPY $r6
453473
%2:_(s32) = COPY $r7

llvm/test/CodeGen/AIE/GlobalISel/legalize-scalar-fadd.mir

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,16 @@ body: |
2626
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[SHL]](s32), [[C1]](s32)
2727
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[SHL1]](s32), [[C1]](s32)
2828
; AIE2-NEXT: [[FADD:%[0-9]+]]:_(<16 x s32>) = G_FADD [[AIE_INSERT_VECTOR_ELT]], [[AIE_INSERT_VECTOR_ELT1]]
29-
; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[FADD]](<16 x s32>)
29+
; AIE2-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[FADD]](<16 x s32>), [[C1]](s32)
30+
; AIE2-NEXT: [[COPY2:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
31+
; AIE2-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
32+
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT2:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[COPY2]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32), [[COPY3]](s32)
33+
; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[AIE_INSERT_VECTOR_ELT2]](<16 x s32>)
3034
; AIE2-NEXT: [[INT:%[0-9]+]]:_(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.v16accfloat.to.v16bf16), [[BITCAST]](<8 x s64>)
3135
; AIE2-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s16>) = G_IMPLICIT_DEF
3236
; AIE2-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[INT]](<16 x s16>), [[DEF1]](<16 x s16>)
33-
; AIE2-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<32 x s16>), [[C1]](s32)
34-
; AIE2-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 16
37+
; AIE2-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<32 x s16>), [[COPY3]](s32)
38+
; AIE2-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT1]], 16
3539
; AIE2-NEXT: $r0 = COPY [[ASSERT_SEXT]](s32)
3640
; AIE2-NEXT: PseudoRET implicit $lr, implicit $r0
3741
;
@@ -57,9 +61,16 @@ body: |
5761
; AIE2P-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[CONCAT_VECTORS3]](<32 x s32>), [[CONCAT_VECTORS4]](<32 x s32>)
5862
; AIE2P-NEXT: [[FADD:%[0-9]+]]:_(<64 x s32>) = G_FADD [[CONCAT_VECTORS2]], [[CONCAT_VECTORS5]]
5963
; AIE2P-NEXT: [[UV:%[0-9]+]]:_(<32 x s32>), [[UV1:%[0-9]+]]:_(<32 x s32>) = G_UNMERGE_VALUES [[FADD]](<64 x s32>)
60-
; AIE2P-NEXT: [[INT:%[0-9]+]]:_(<32 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v32accfloat.to.v32bf16), [[UV]](<32 x s32>)
61-
; AIE2P-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[INT]](<32 x s16>), [[C1]](s32)
62-
; AIE2P-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 16
64+
; AIE2P-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[UV]](<32 x s32>)
65+
; AIE2P-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV2]](<16 x s32>), [[C1]](s32)
66+
; AIE2P-NEXT: [[COPY4:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
67+
; AIE2P-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
68+
; AIE2P-NEXT: [[AIE_INSERT_VECTOR_ELT2:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[COPY4]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32), [[COPY5]](s32)
69+
; AIE2P-NEXT: [[INT:%[0-9]+]]:_(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v16accfloat.to.v16bf16), [[AIE_INSERT_VECTOR_ELT2]](<16 x s32>)
70+
; AIE2P-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s16>) = G_IMPLICIT_DEF
71+
; AIE2P-NEXT: [[CONCAT_VECTORS6:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[INT]](<16 x s16>), [[DEF1]](<16 x s16>)
72+
; AIE2P-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS6]](<32 x s16>), [[COPY5]](s32)
73+
; AIE2P-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT1]], 16
6374
; AIE2P-NEXT: $r0 = COPY [[ASSERT_SEXT]](s32)
6475
; AIE2P-NEXT: PseudoRET implicit $lr, implicit $r0
6576
%0:_(s32) = COPY $r1

llvm/test/CodeGen/AIE/GlobalISel/legalize-scalar-fsub.mir

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,16 @@ body: |
2626
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[SHL]](s32), [[C1]](s32)
2727
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[DEF]], [[SHL1]](s32), [[C1]](s32)
2828
; AIE2-NEXT: [[FSUB:%[0-9]+]]:_(<16 x s32>) = G_FSUB [[AIE_INSERT_VECTOR_ELT]], [[AIE_INSERT_VECTOR_ELT1]]
29-
; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[FSUB]](<16 x s32>)
29+
; AIE2-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[FSUB]](<16 x s32>), [[C1]](s32)
30+
; AIE2-NEXT: [[COPY2:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
31+
; AIE2-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
32+
; AIE2-NEXT: [[AIE_INSERT_VECTOR_ELT2:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[COPY2]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32), [[COPY3]](s32)
33+
; AIE2-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[AIE_INSERT_VECTOR_ELT2]](<16 x s32>)
3034
; AIE2-NEXT: [[INT:%[0-9]+]]:_(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.v16accfloat.to.v16bf16), [[BITCAST]](<8 x s64>)
3135
; AIE2-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s16>) = G_IMPLICIT_DEF
3236
; AIE2-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[INT]](<16 x s16>), [[DEF1]](<16 x s16>)
33-
; AIE2-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<32 x s16>), [[C1]](s32)
34-
; AIE2-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 16
37+
; AIE2-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<32 x s16>), [[COPY3]](s32)
38+
; AIE2-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT1]], 16
3539
; AIE2-NEXT: $r0 = COPY [[ASSERT_SEXT]](s32)
3640
; AIE2-NEXT: PseudoRET implicit $lr, implicit $r0
3741
;
@@ -57,9 +61,16 @@ body: |
5761
; AIE2P-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[CONCAT_VECTORS3]](<32 x s32>), [[CONCAT_VECTORS4]](<32 x s32>)
5862
; AIE2P-NEXT: [[FSUB:%[0-9]+]]:_(<64 x s32>) = G_FSUB [[CONCAT_VECTORS2]], [[CONCAT_VECTORS5]]
5963
; AIE2P-NEXT: [[UV:%[0-9]+]]:_(<32 x s32>), [[UV1:%[0-9]+]]:_(<32 x s32>) = G_UNMERGE_VALUES [[FSUB]](<64 x s32>)
60-
; AIE2P-NEXT: [[INT:%[0-9]+]]:_(<32 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v32accfloat.to.v32bf16), [[UV]](<32 x s32>)
61-
; AIE2P-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[INT]](<32 x s16>), [[C1]](s32)
62-
; AIE2P-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 16
64+
; AIE2P-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[UV]](<32 x s32>)
65+
; AIE2P-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV2]](<16 x s32>), [[C1]](s32)
66+
; AIE2P-NEXT: [[COPY4:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
67+
; AIE2P-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
68+
; AIE2P-NEXT: [[AIE_INSERT_VECTOR_ELT2:%[0-9]+]]:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT [[COPY4]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32), [[COPY5]](s32)
69+
; AIE2P-NEXT: [[INT:%[0-9]+]]:_(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v16accfloat.to.v16bf16), [[AIE_INSERT_VECTOR_ELT2]](<16 x s32>)
70+
; AIE2P-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s16>) = G_IMPLICIT_DEF
71+
; AIE2P-NEXT: [[CONCAT_VECTORS6:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[INT]](<16 x s16>), [[DEF1]](<16 x s16>)
72+
; AIE2P-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS6]](<32 x s16>), [[COPY5]](s32)
73+
; AIE2P-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT1]], 16
6374
; AIE2P-NEXT: $r0 = COPY [[ASSERT_SEXT]](s32)
6475
; AIE2P-NEXT: PseudoRET implicit $lr, implicit $r0
6576
%0:_(s32) = COPY $r1

0 commit comments

Comments
 (0)