Skip to content

Commit 049cf96

Browse files
authored
[AMDGPU][WaveTransform] Enable SIOptimizeExecMaskingPreRA pass for new pipeline. (#412)
This patch introduces SIOptimizeExecMaskingPreRA after AMDGPUWaveTransform pass, but just before SGPR allocation to reduce register pressure for the new pipeline. While at the same time, it still acts as pre-RA pass optimizing EXEC-mask related instructions for legacy pipeline. It is a follow-up which depended on the #369.
1 parent 0bba171 commit 049cf96

File tree

3 files changed

+8
-7
lines changed

3 files changed

+8
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,9 +1675,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
16751675
// instructions that cause scheduling barriers.
16761676
insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
16771677

1678-
if (!LateWaveTransform && OptExecMaskPreRA)
1679-
insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
1680-
16811678
// This is not an essential optimization and it has a noticeable impact on
16821679
// compilation time, so we only enable it from O2.
16831680
if (TM->getOptLevel() > CodeGenOptLevel::Less)
@@ -1861,6 +1858,9 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
18611858
addPass(&RegisterCoalescerID);
18621859
}
18631860

1861+
if (OptExecMaskPreRA)
1862+
addPass(&SIOptimizeExecMaskingPreRAID);
1863+
18641864
addPass(createSGPRAllocPass(true));
18651865

18661866
// Commit allocated register changes. This is mostly necessary because too

llvm/test/CodeGen/AMDGPU/WaveTransform/wave-transform-llc-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@
387387
; GCN-O3-NEXT: Live Interval Analysis
388388
; GCN-O3-NEXT: Machine Natural Loop Construction
389389
; GCN-O3-NEXT: Register Coalescer
390+
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
390391
; GCN-O3-NEXT: Machine Block Frequency Analysis
391392
; GCN-O3-NEXT: Debug Variable Analysis
392393
; GCN-O3-NEXT: Live Stack Slot Analysis

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,8 @@
352352
; GCN-O1-NEXT: Rewrite Partial Register Uses
353353
; GCN-O1-NEXT: Machine Instruction Scheduler
354354
; GCN-O1-NEXT: SI Whole Quad Mode
355-
; GCN-O1-NEXT: SI optimize exec mask operations pre-RA
356355
; GCN-O1-NEXT: AMDGPU Pre-RA Long Branch Reg
356+
; GCN-O1-NEXT: SI optimize exec mask operations pre-RA
357357
; GCN-O1-NEXT: Machine Natural Loop Construction
358358
; GCN-O1-NEXT: Machine Block Frequency Analysis
359359
; GCN-O1-NEXT: Debug Variable Analysis
@@ -665,8 +665,8 @@
665665
; GCN-O1-OPTS-NEXT: Machine Instruction Scheduler
666666
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations
667667
; GCN-O1-OPTS-NEXT: SI Whole Quad Mode
668-
; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA
669668
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA Long Branch Reg
669+
; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA
670670
; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction
671671
; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis
672672
; GCN-O1-OPTS-NEXT: Debug Variable Analysis
@@ -983,9 +983,9 @@
983983
; GCN-O2-NEXT: Machine Instruction Scheduler
984984
; GCN-O2-NEXT: AMDGPU Pre-RA optimizations
985985
; GCN-O2-NEXT: SI Whole Quad Mode
986-
; GCN-O2-NEXT: SI optimize exec mask operations pre-RA
987986
; GCN-O2-NEXT: SI Form memory clauses
988987
; GCN-O2-NEXT: AMDGPU Pre-RA Long Branch Reg
988+
; GCN-O2-NEXT: SI optimize exec mask operations pre-RA
989989
; GCN-O2-NEXT: Machine Natural Loop Construction
990990
; GCN-O2-NEXT: Machine Block Frequency Analysis
991991
; GCN-O2-NEXT: Debug Variable Analysis
@@ -1315,9 +1315,9 @@
13151315
; GCN-O3-NEXT: Machine Instruction Scheduler
13161316
; GCN-O3-NEXT: AMDGPU Pre-RA optimizations
13171317
; GCN-O3-NEXT: SI Whole Quad Mode
1318-
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
13191318
; GCN-O3-NEXT: SI Form memory clauses
13201319
; GCN-O3-NEXT: AMDGPU Pre-RA Long Branch Reg
1320+
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
13211321
; GCN-O3-NEXT: Machine Natural Loop Construction
13221322
; GCN-O3-NEXT: Machine Block Frequency Analysis
13231323
; GCN-O3-NEXT: Debug Variable Analysis

0 commit comments

Comments
 (0)