Skip to content

Commit 78a35a0

Browse files
committed
[AIE2][AIE2P] Add combiner to convert insert(undef, extract(vec, index), index) into copy
Postlegalizer combiner that matches a pattern where: %18:_(<16 x s32>) = COPY $x0 %10:_(<16 x s32>) = G_IMPLICIT_DEF %9:_(s32) = G_CONSTANT i32 0 %8:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %18(<16 x s32>), %9(s32) %22:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT %10, %8(s32), %9(s32) And turns it into: %22:_(<16 x s32>) = COPY %18
1 parent 73636fc commit 78a35a0

File tree

4 files changed

+220
-2
lines changed

4 files changed

+220
-2
lines changed

llvm/lib/Target/AIE/AIECombine.td

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,13 @@ def combine_add_vector_elt_undef : GICombineRule <
361361
(apply [{ applyAddVecEltUndef(*${root}, MRI, B); }] )
362362
>;
363363

364+
def combine_insert_extract_vector_elt_to_copy : GICombineRule<
365+
(defs root:$root, build_fn_matchinfo:$matchinfo),
366+
(match (wip_match_opcode G_AIE_INSERT_VECTOR_ELT): $root,
367+
[{ return matchInsertExtractVectorEltToCopy(*${root}, MRI, (const AIEBaseInstrInfo &)B.getTII(), ${matchinfo}); }]),
368+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
369+
>;
370+
364371
def combine_load_store_split_matchdata: GIDefMatchData<"unsigned">;
365372
def combine_load_store_split : GICombineRule<
366373
(defs root:$root, combine_load_store_split_matchdata:$matchinfo),
@@ -404,7 +411,8 @@ def AIE2PostLegalizerCustomCombiner
404411
combine_add_vector_elt_undef,
405412
combine_extract_concat,
406413
combine_unmerge_concat,
407-
combine_upd_to_concat,
414+
combine_upd_to_concat,
415+
combine_insert_extract_vector_elt_to_copy
408416
]> {
409417
}
410418

@@ -417,6 +425,6 @@ def AIE2PPostLegalizerCustomCombiner
417425
combine_offset_load_store_ptradd,
418426
combine_offset_load_store_share_ptradd,
419427
combine_add_vector_elt_undef,
428+
combine_insert_extract_vector_elt_to_copy
420429
]> {
421430
}
422-

llvm/lib/Target/AIE/AIECombinerHelper.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4323,3 +4323,74 @@ bool llvm::matchMsbScalar(Register ScalarReg, Register BroadcastReg,
43234323

43244324
return false;
43254325
}
4326+
4327+
/// Match a pattern where:
4328+
/// %18:_(<16 x s32>) = COPY $x0
4329+
/// %10:_(<16 x s32>) = G_IMPLICIT_DEF
4330+
/// %9:_(s32) = G_CONSTANT i32 0
4331+
/// %8:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %18(<16 x s32>), %9(s32)
4332+
/// %22:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT %10, %8(s32), %9(s32)
4333+
///
4334+
/// This can be simplified to:
4335+
/// %22:_(<16 x s32>) = COPY %18
4336+
bool llvm::matchInsertExtractVectorEltToCopy(MachineInstr &MI,
4337+
MachineRegisterInfo &MRI,
4338+
const AIEBaseInstrInfo &TII,
4339+
BuildFnTy &MatchInfo) {
4340+
assert(MI.getOpcode() == TII.getGenericInsertVectorEltOpcode() &&
4341+
"Expected G_AIE_INSERT_VECTOR_ELT");
4342+
4343+
// Get the insert operands
4344+
const Register InsertDstReg = MI.getOperand(0).getReg();
4345+
const Register InsertSrcVecReg = MI.getOperand(1).getReg();
4346+
const Register InsertedEltReg = MI.getOperand(2).getReg();
4347+
const Register InsertIdxReg = MI.getOperand(3).getReg();
4348+
4349+
// Check that the insert source vector is G_IMPLICIT_DEF
4350+
const MachineInstr *InsertSrcMI = MRI.getVRegDef(InsertSrcVecReg);
4351+
if (!InsertSrcMI || InsertSrcMI->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
4352+
return false;
4353+
4354+
// Get the definition of the inserted element
4355+
const MachineInstr *ExtractMI = MRI.getVRegDef(InsertedEltReg);
4356+
if (!ExtractMI)
4357+
return false;
4358+
4359+
// Check if it's either SEXT or ZEXT extract
4360+
const unsigned ExtractSextOpc = TII.getGenericExtractVectorEltOpcode(true);
4361+
const unsigned ExtractZextOpc = TII.getGenericExtractVectorEltOpcode(false);
4362+
4363+
if (ExtractMI->getOpcode() != ExtractSextOpc &&
4364+
ExtractMI->getOpcode() != ExtractZextOpc)
4365+
return false;
4366+
4367+
// Get extract operands
4368+
const Register ExtractSrcVecReg = ExtractMI->getOperand(1).getReg();
4369+
const Register ExtractIdxReg = ExtractMI->getOperand(2).getReg();
4370+
4371+
// Verify that the insert destination vector type matches the extract source
4372+
// vector type
4373+
const LLT InsertDstTy = MRI.getType(InsertDstReg);
4374+
const LLT ExtractSrcTy = MRI.getType(ExtractSrcVecReg);
4375+
4376+
if (InsertDstTy != ExtractSrcTy)
4377+
return false;
4378+
4379+
// Check that insert and extract indices are the same
4380+
// They can be the same register, or both constants with the same value
4381+
if (InsertIdxReg != ExtractIdxReg) {
4382+
auto InsertIdxCst = getIConstantVRegValWithLookThrough(InsertIdxReg, MRI);
4383+
auto ExtractIdxCst = getIConstantVRegValWithLookThrough(ExtractIdxReg, MRI);
4384+
if (!InsertIdxCst || !ExtractIdxCst ||
4385+
InsertIdxCst->Value != ExtractIdxCst->Value)
4386+
return false;
4387+
}
4388+
4389+
// Build the lambda for applying the transformation
4390+
// Copy the extract source vector (the real vector) to the insert destination
4391+
MatchInfo = [=](MachineIRBuilder &B) {
4392+
B.buildCopy(InsertDstReg, ExtractSrcVecReg);
4393+
};
4394+
4395+
return true;
4396+
}

llvm/lib/Target/AIE/AIECombinerHelper.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,11 @@ bool matchExtractVecEltAssertBcst(MachineInstr &MI, MachineRegisterInfo &MRI,
316316
bool matchMsbScalar(Register ScalarReg, Register BroadcastReg,
317317
MachineRegisterInfo &MRI);
318318

319+
bool matchInsertExtractVectorEltToCopy(MachineInstr &MI,
320+
MachineRegisterInfo &MRI,
321+
const AIEBaseInstrInfo &TII,
322+
BuildFnTy &MatchInfo);
323+
319324
} // namespace llvm
320325

321326
#endif
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
#
3+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
8+
# RUN: llc -mtriple=aie2 -run-pass=aie2-postlegalizer-custom-combiner -verify-machineinstrs %s -o - | FileCheck %s
9+
# RUN: llc -mtriple=aie2p -run-pass=aie2p-postlegalizer-custom-combiner -verify-machineinstrs %s -o - | FileCheck %s
10+
11+
---
12+
name: test_insert_extract_s32_vector
13+
tracksRegLiveness: true
14+
body: |
15+
bb.0:
16+
liveins: $x0
17+
18+
; CHECK-LABEL: name: test_insert_extract_s32_vector
19+
; CHECK: liveins: $x0
20+
; CHECK-NEXT: {{ $}}
21+
; CHECK-NEXT: %src_vec:_(<16 x s32>) = COPY $x0
22+
; CHECK-NEXT: %result:_(<16 x s32>) = COPY %src_vec(<16 x s32>)
23+
; CHECK-NEXT: PseudoRET implicit $lr, implicit %result(<16 x s32>)
24+
%src_vec:_(<16 x s32>) = COPY $x0
25+
%idx:_(s32) = G_CONSTANT i32 0
26+
%undef_vec:_(<16 x s32>) = G_IMPLICIT_DEF
27+
%extracted_elt:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %src_vec, %idx
28+
%result:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT %undef_vec, %extracted_elt, %idx
29+
PseudoRET implicit $lr, implicit %result
30+
...
31+
---
32+
name: test_insert_extract_s16_vector
33+
tracksRegLiveness: true
34+
body: |
35+
bb.0:
36+
liveins: $x0
37+
38+
; CHECK-LABEL: name: test_insert_extract_s16_vector
39+
; CHECK: liveins: $x0
40+
; CHECK-NEXT: {{ $}}
41+
; CHECK-NEXT: %src_vec:_(<32 x s16>) = COPY $x0
42+
; CHECK-NEXT: %result:_(<32 x s16>) = COPY %src_vec(<32 x s16>)
43+
; CHECK-NEXT: PseudoRET implicit $lr, implicit %result(<32 x s16>)
44+
%src_vec:_(<32 x s16>) = COPY $x0
45+
%idx:_(s32) = G_CONSTANT i32 0
46+
%undef_vec:_(<32 x s16>) = G_IMPLICIT_DEF
47+
%extracted_elt:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %src_vec, %idx
48+
%result:_(<32 x s16>) = G_AIE_INSERT_VECTOR_ELT %undef_vec, %extracted_elt, %idx
49+
PseudoRET implicit $lr, implicit %result
50+
...
51+
---
52+
name: test_same_constant_non_zero
53+
tracksRegLiveness: true
54+
body: |
55+
bb.0:
56+
liveins: $x0
57+
; CHECK-LABEL: name: test_same_constant_non_zero
58+
; CHECK: liveins: $x0
59+
; CHECK-NEXT: {{ $}}
60+
; CHECK-NEXT: %src_vec:_(<16 x s32>) = COPY $x0
61+
; CHECK-NEXT: %result:_(<16 x s32>) = COPY %src_vec(<16 x s32>)
62+
; CHECK-NEXT: PseudoRET implicit $lr, implicit %result(<16 x s32>)
63+
%src_vec:_(<16 x s32>) = COPY $x0
64+
%idx:_(s32) = G_CONSTANT i32 5
65+
%undef_vec:_(<16 x s32>) = G_IMPLICIT_DEF
66+
%extracted_elt:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %src_vec, %idx
67+
%result:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT %undef_vec, %extracted_elt, %idx
68+
PseudoRET implicit $lr, implicit %result
69+
...
70+
---
71+
name: test_same_register_dynamic_index
72+
tracksRegLiveness: true
73+
body: |
74+
bb.0:
75+
liveins: $r0, $x0
76+
; CHECK-LABEL: name: test_same_register_dynamic_index
77+
; CHECK: liveins: $r0, $x0
78+
; CHECK-NEXT: {{ $}}
79+
; CHECK-NEXT: %src_vec:_(<16 x s32>) = COPY $x0
80+
; CHECK-NEXT: %result:_(<16 x s32>) = COPY %src_vec(<16 x s32>)
81+
; CHECK-NEXT: PseudoRET implicit $lr, implicit %result(<16 x s32>)
82+
%idx:_(s32) = COPY $r0
83+
%src_vec:_(<16 x s32>) = COPY $x0
84+
%undef_vec:_(<16 x s32>) = G_IMPLICIT_DEF
85+
%extracted_elt:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %src_vec, %idx
86+
%result:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT %undef_vec, %extracted_elt, %idx
87+
PseudoRET implicit $lr, implicit %result
88+
...
89+
---
90+
name: test_no_combine_different_constants
91+
tracksRegLiveness: true
92+
body: |
93+
bb.0:
94+
liveins: $x0
95+
; CHECK-LABEL: name: test_no_combine_different_constants
96+
; CHECK: liveins: $x0
97+
; CHECK-NEXT: {{ $}}
98+
; CHECK-NEXT: %src_vec:_(<16 x s32>) = COPY $x0
99+
; CHECK-NEXT: %idx1:_(s32) = G_CONSTANT i32 1
100+
; CHECK-NEXT: %idx2:_(s32) = G_CONSTANT i32 2
101+
; CHECK-NEXT: %undef_vec:_(<16 x s32>) = G_IMPLICIT_DEF
102+
; CHECK-NEXT: %extracted_elt:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %src_vec(<16 x s32>), %idx1(s32)
103+
; CHECK-NEXT: %result:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT %undef_vec, %extracted_elt(s32), %idx2(s32)
104+
; CHECK-NEXT: PseudoRET implicit $lr, implicit %result(<16 x s32>)
105+
%src_vec:_(<16 x s32>) = COPY $x0
106+
%idx1:_(s32) = G_CONSTANT i32 1
107+
%idx2:_(s32) = G_CONSTANT i32 2
108+
%undef_vec:_(<16 x s32>) = G_IMPLICIT_DEF
109+
%extracted_elt:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %src_vec, %idx1
110+
%result:_(<16 x s32>) = G_AIE_INSERT_VECTOR_ELT %undef_vec, %extracted_elt, %idx2
111+
PseudoRET implicit $lr, implicit %result
112+
...
113+
---
114+
name: test_no_combine_type_mismatch
115+
tracksRegLiveness: true
116+
body: |
117+
bb.0:
118+
liveins: $x0
119+
; CHECK-LABEL: name: test_no_combine_type_mismatch
120+
; CHECK: liveins: $x0
121+
; CHECK-NEXT: {{ $}}
122+
; CHECK-NEXT: %src_vec:_(<16 x s32>) = COPY $x0
123+
; CHECK-NEXT: %idx:_(s32) = G_CONSTANT i32 0
124+
; CHECK-NEXT: %undef_vec:_(<32 x s16>) = G_IMPLICIT_DEF
125+
; CHECK-NEXT: %extracted_elt:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %src_vec(<16 x s32>), %idx(s32)
126+
; CHECK-NEXT: %result:_(<32 x s16>) = G_AIE_INSERT_VECTOR_ELT %undef_vec, %extracted_elt(s32), %idx(s32)
127+
; CHECK-NEXT: PseudoRET implicit $lr, implicit %result(<32 x s16>)
128+
%src_vec:_(<16 x s32>) = COPY $x0
129+
%idx:_(s32) = G_CONSTANT i32 0
130+
%undef_vec:_(<32 x s16>) = G_IMPLICIT_DEF
131+
%extracted_elt:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %src_vec, %idx
132+
%result:_(<32 x s16>) = G_AIE_INSERT_VECTOR_ELT %undef_vec, %extracted_elt, %idx
133+
PseudoRET implicit $lr, implicit %result
134+
...

0 commit comments

Comments
 (0)