Skip to content

Commit 30560ae

Browse files
authored
[AMDGPU][WaveTransform] Separating SiOptimizeExecMaskingPreRA pass usage for both pipelines. (#422)
We enable the SIOptimizeSexecMaskingPreRA pass just before SGPR allocation as planned for waveTransform pipeline, while reverting back to its orginal invocation in the default pipeline. It is the continuation work on #412 .
1 parent 049cf96 commit 30560ae

File tree

3 files changed

+12
-8
lines changed

3 files changed

+12
-8
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,6 +1675,9 @@ void GCNPassConfig::addOptimizedRegAlloc() {
16751675
// instructions that cause scheduling barriers.
16761676
insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
16771677

1678+
if (!LateWaveTransform && OptExecMaskPreRA)
1679+
insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
1680+
16781681
// This is not an essential optimization and it has a noticeable impact on
16791682
// compilation time, so we only enable it from O2.
16801683
if (TM->getOptLevel() > CodeGenOptLevel::Less)
@@ -1853,13 +1856,14 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
18531856
// allocations.
18541857
// addPass(&AMDGPUUpdateAllocatedVGPRLiveRangesID);
18551858

1859+
// Optimize EXEC-mask related instructions around SGPR register class.
1860+
if (OptExecMaskPreRA)
1861+
addPass(&SIOptimizeExecMaskingPreRAID);
1862+
18561863
// Now we can perform register-coalescing on remaining copies,
18571864
// mainly sgpr copies and wwm-vgpr copies.
18581865
addPass(&RegisterCoalescerID);
18591866
}
1860-
1861-
if (OptExecMaskPreRA)
1862-
addPass(&SIOptimizeExecMaskingPreRAID);
18631867

18641868
addPass(createSGPRAllocPass(true));
18651869

llvm/test/CodeGen/AMDGPU/WaveTransform/wave-transform-llc-pipeline.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,9 +385,9 @@
385385
; GCN-O3-NEXT: AMDGPU Control Flow Wave Transform
386386
; GCN-O3-NEXT: Slot index numbering
387387
; GCN-O3-NEXT: Live Interval Analysis
388+
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
388389
; GCN-O3-NEXT: Machine Natural Loop Construction
389390
; GCN-O3-NEXT: Register Coalescer
390-
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
391391
; GCN-O3-NEXT: Machine Block Frequency Analysis
392392
; GCN-O3-NEXT: Debug Variable Analysis
393393
; GCN-O3-NEXT: Live Stack Slot Analysis

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,8 @@
352352
; GCN-O1-NEXT: Rewrite Partial Register Uses
353353
; GCN-O1-NEXT: Machine Instruction Scheduler
354354
; GCN-O1-NEXT: SI Whole Quad Mode
355-
; GCN-O1-NEXT: AMDGPU Pre-RA Long Branch Reg
356355
; GCN-O1-NEXT: SI optimize exec mask operations pre-RA
356+
; GCN-O1-NEXT: AMDGPU Pre-RA Long Branch Reg
357357
; GCN-O1-NEXT: Machine Natural Loop Construction
358358
; GCN-O1-NEXT: Machine Block Frequency Analysis
359359
; GCN-O1-NEXT: Debug Variable Analysis
@@ -665,8 +665,8 @@
665665
; GCN-O1-OPTS-NEXT: Machine Instruction Scheduler
666666
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations
667667
; GCN-O1-OPTS-NEXT: SI Whole Quad Mode
668-
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA Long Branch Reg
669668
; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA
669+
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA Long Branch Reg
670670
; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction
671671
; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis
672672
; GCN-O1-OPTS-NEXT: Debug Variable Analysis
@@ -983,9 +983,9 @@
983983
; GCN-O2-NEXT: Machine Instruction Scheduler
984984
; GCN-O2-NEXT: AMDGPU Pre-RA optimizations
985985
; GCN-O2-NEXT: SI Whole Quad Mode
986+
; GCN-O2-NEXT: SI optimize exec mask operations pre-RA
986987
; GCN-O2-NEXT: SI Form memory clauses
987988
; GCN-O2-NEXT: AMDGPU Pre-RA Long Branch Reg
988-
; GCN-O2-NEXT: SI optimize exec mask operations pre-RA
989989
; GCN-O2-NEXT: Machine Natural Loop Construction
990990
; GCN-O2-NEXT: Machine Block Frequency Analysis
991991
; GCN-O2-NEXT: Debug Variable Analysis
@@ -1315,9 +1315,9 @@
13151315
; GCN-O3-NEXT: Machine Instruction Scheduler
13161316
; GCN-O3-NEXT: AMDGPU Pre-RA optimizations
13171317
; GCN-O3-NEXT: SI Whole Quad Mode
1318+
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
13181319
; GCN-O3-NEXT: SI Form memory clauses
13191320
; GCN-O3-NEXT: AMDGPU Pre-RA Long Branch Reg
1320-
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
13211321
; GCN-O3-NEXT: Machine Natural Loop Construction
13221322
; GCN-O3-NEXT: Machine Block Frequency Analysis
13231323
; GCN-O3-NEXT: Debug Variable Analysis

0 commit comments

Comments
 (0)