Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Consolidate layout passes into one phase #112004

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
0a70d2a
Reorder block list only once after 3-opt
amanasifkhalid Jan 29, 2025
25389c9
Fix block insertion
amanasifkhalid Jan 30, 2025
df25317
Run 3-opt once for all regions
amanasifkhalid Jan 30, 2025
46539d5
Remove some EH checks
amanasifkhalid Jan 30, 2025
84600ec
Remove unused layout methods
amanasifkhalid Jan 30, 2025
289134a
Relax EH invariants; better stress mode
amanasifkhalid Jan 30, 2025
94d4f84
Place cold blocks in RPO
amanasifkhalid Jan 30, 2025
38b3700
Merge from main
amanasifkhalid Feb 5, 2025
a2774e0
Clean up 3-opt driver a bit
amanasifkhalid Feb 5, 2025
1262dd6
Clean up 3-opt driver a bit
amanasifkhalid Feb 6, 2025
918c557
Merge from main
amanasifkhalid Feb 6, 2025
1404f80
Cold code motion working
amanasifkhalid Feb 6, 2025
26359c4
Move try regions
amanasifkhalid Feb 7, 2025
f062f0b
Ensure method entry is considered hot by 3-opt
amanasifkhalid Feb 7, 2025
ce79655
Always invalidate DFS tree after layout
amanasifkhalid Feb 7, 2025
2520f7a
Revert "Always invalidate DFS tree after layout"
amanasifkhalid Feb 7, 2025
e54dbb5
Fix DFS tree invalidation logic
amanasifkhalid Feb 7, 2025
2f5f342
Merge from main
amanasifkhalid Feb 20, 2025
f08a756
Style; use bbPreorderNum for ordinal
amanasifkhalid Feb 20, 2025
447b2dd
Cleanup
amanasifkhalid Feb 20, 2025
6f7ec57
Fix call-finally motion
amanasifkhalid Feb 20, 2025
5d32277
fgFindEHRegionEnds -> fgFindTryRegionEnds
amanasifkhalid Feb 20, 2025
52536be
Style
amanasifkhalid Feb 21, 2025
60303ad
Recompute try region ends only when necessary
amanasifkhalid Feb 21, 2025
2497d6b
Merge branch 'main' into one-layout-phase
amanasifkhalid Feb 21, 2025
c3b78a3
Merge branch 'main' into one-layout-phase
amanasifkhalid Feb 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Reorder block list only once after 3-opt
amanasifkhalid committed Jan 29, 2025
commit 0a70d2a7b0d8613c4b3d14d56a2f444505c13877
25 changes: 1 addition & 24 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
@@ -5233,30 +5233,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
//
if (JitConfig.JitDoReversePostOrderLayout())
{
auto lateLayoutPhase = [this] {
// Skip preliminary reordering passes to create more work for 3-opt layout
if (compStressCompile(STRESS_THREE_OPT_LAYOUT, 10))
{
m_dfsTree = fgComputeDfs</* useProfile */ true>();
}
else
{
fgDoReversePostOrderLayout();
fgMoveColdBlocks();
}

fgSearchImprovedLayout();
fgInvalidateDfsTree();

if (compHndBBtabCount != 0)
{
fgRebuildEHRegions();
}

return PhaseStatus::MODIFIED_EVERYTHING;
};

DoPhase(this, PHASE_OPTIMIZE_LAYOUT, lateLayoutPhase);
DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::fgSearchImprovedLayout);
}

// Now that the flowgraph is finalized, run post-layout optimizations.
10 changes: 5 additions & 5 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
@@ -3048,7 +3048,7 @@ class Compiler

void fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast);

void fgRebuildEHRegions();
void fgFindEHRegionEnds();

void fgSkipRmvdBlocks(EHblkDsc* handlerTab);

@@ -6315,7 +6315,7 @@ class Compiler
bool fgReorderBlocks(bool useProfile);
void fgDoReversePostOrderLayout();
void fgMoveColdBlocks();
void fgSearchImprovedLayout();
PhaseStatus fgSearchImprovedLayout();

class ThreeOptLayout
{
@@ -6341,11 +6341,11 @@ class Compiler
void AddNonFallthroughPreds(unsigned blockPos);
bool RunGreedyThreeOptPass(unsigned startPos, unsigned endPos);

bool RunThreeOptPass(BasicBlock* startBlock, BasicBlock* endBlock);
void RunThreeOptPass(BasicBlock* startBlock, BasicBlock* endBlock);

public:
ThreeOptLayout(Compiler* comp);
void Run();
ThreeOptLayout(Compiler* comp, BasicBlock** hotBlocks, unsigned numHotBlocks);
bool Run();
};

template <bool hasEH>
168 changes: 100 additions & 68 deletions src/coreclr/jit/fgopt.cpp
Original file line number Diff line number Diff line change
@@ -4454,6 +4454,7 @@ bool Compiler::fgReorderBlocks(bool useProfile)
template <bool hasEH>
void Compiler::fgMoveHotJumps()
{
return;
#ifdef DEBUG
if (verbose)
{
@@ -4924,13 +4925,19 @@ void Compiler::fgMoveColdBlocks()
//
// Parameters:
// comp - The Compiler instance
// hotBlocks - An array of the blocks worth reordering
// numHotBlocks - The number of blocks in 'hotBlocks'
//
Compiler::ThreeOptLayout::ThreeOptLayout(Compiler* comp)
// Notes:
// To save an allocation, we will reuse the DFS tree's underlying array for 'tempOrder'.
// This means we will trash the DFS tree.
//
Compiler::ThreeOptLayout::ThreeOptLayout(Compiler* comp, BasicBlock** hotBlocks, unsigned numHotBlocks)
: compiler(comp)
, cutPoints(comp->getAllocator(CMK_FlowEdge), &ThreeOptLayout::EdgeCmp)
, blockOrder(nullptr)
, tempOrder(nullptr)
, numCandidateBlocks(0)
, blockOrder(hotBlocks)
, tempOrder(comp->m_dfsTree->GetPostOrder())
, numCandidateBlocks(numHotBlocks)
, currEHRegion(0)
{
}
@@ -5228,49 +5235,21 @@ void Compiler::ThreeOptLayout::AddNonFallthroughPreds(unsigned blockPos)
// we're interested in reordering.
// We skip reordering handler regions for now, as these are assumed to be cold.
//
void Compiler::ThreeOptLayout::Run()
// Returns:
// True if any blocks were moved
//
bool Compiler::ThreeOptLayout::Run()
{
// Since we moved all cold blocks to the end of the method already,
// we should have a span of hot blocks to consider reordering at the beginning of the method
// (unless none of the blocks are cold relative to the rest of the method,
// in which case we will reorder the whole main method body).
BasicBlock* const finalBlock = (compiler->fgFirstColdBlock != nullptr) ? compiler->fgFirstColdBlock->Prev()
: compiler->fgLastBBInMainFunction();

// Reset cold section pointer, in case we decide to do hot/cold splitting later
compiler->fgFirstColdBlock = nullptr;
assert(numCandidateBlocks > 0);

// We better have an end block for the hot section, and it better not be the start of a call-finally pair.
assert(finalBlock != nullptr);
assert(!finalBlock->isBBCallFinallyPair());

// For methods with fewer than three candidate blocks, we cannot partition anything
if (finalBlock->IsFirst() || finalBlock->Prev()->IsFirst())
// Initialize ordinals, and find EH region ends
for (unsigned i = 0; i < numCandidateBlocks; i++)
{
JITDUMP("Not enough blocks to partition anything. Skipping 3-opt.\n");
return;
}

// Get an upper bound on the number of hot blocks without walking the whole block list.
// We will only consider blocks reachable via normal flow.
const unsigned numBlocksUpperBound = compiler->m_dfsTree->GetPostOrderCount();
assert(numBlocksUpperBound != 0);
blockOrder = new (compiler, CMK_BasicBlock) BasicBlock*[numBlocksUpperBound * 2];
tempOrder = (blockOrder + numBlocksUpperBound);

// Initialize the current block order
for (BasicBlock* const block : compiler->Blocks(compiler->fgFirstBB, finalBlock))
{
if (!compiler->m_dfsTree->Contains(block))
{
continue;
}

assert(numCandidateBlocks < numBlocksUpperBound);
blockOrder[numCandidateBlocks] = tempOrder[numCandidateBlocks] = block;
BasicBlock* const block = blockOrder[i];
tempOrder[i] = block;

// Repurpose 'bbPostorderNum' for the block's ordinal
block->bbPostorderNum = numCandidateBlocks++;
block->bbPostorderNum = i;

// While walking the span of blocks to reorder,
// remember where each try region ends within this span.
@@ -5283,7 +5262,6 @@ void Compiler::ThreeOptLayout::Run()
}

// Reorder try regions first
bool modified = false;
for (EHblkDsc* const HBtab : EHClauses(compiler))
{
// If multiple region indices map to the same region,
@@ -5298,32 +5276,55 @@ void Compiler::ThreeOptLayout::Run()
if ((tryBeg->bbPostorderNum < numCandidateBlocks) && (blockOrder[tryBeg->bbPostorderNum] == tryBeg))
{
JITDUMP("Running 3-opt for try region #%d\n", (currEHRegion - 1));
modified |= RunThreeOptPass(tryBeg, HBtab->ebdTryLast);
RunThreeOptPass(tryBeg, HBtab->ebdTryLast);
}
}

// Finally, reorder the main method body
currEHRegion = 0;
JITDUMP("Running 3-opt for main method body\n");
modified |= RunThreeOptPass(compiler->fgFirstBB, blockOrder[numCandidateBlocks - 1]);
RunThreeOptPass(blockOrder[0], blockOrder[numCandidateBlocks - 1]);

if (modified)
bool modified = false;
for (unsigned i = 1; i < numCandidateBlocks; i++)
{
for (unsigned i = 1; i < numCandidateBlocks; i++)
BasicBlock* const block = blockOrder[i - 1];
BasicBlock* const next = blockOrder[i];

if (block->NextIs(next))
{
BasicBlock* const block = blockOrder[i - 1];
BasicBlock* const next = blockOrder[i];
continue;
}

// Only reorder within EH regions to maintain contiguity.
// TODO: Allow moving blocks in different regions when 'next' is the region entry.
// This would allow us to move entire regions up/down because of the contiguity requirement.
if (!block->NextIs(next) && BasicBlock::sameEHRegion(block, next))
{
compiler->fgUnlinkBlock(next);
compiler->fgInsertBBafter(block, next);
}
// Only reorder within EH regions to maintain contiguity.
if (!BasicBlock::sameEHRegion(block, next))
{
continue;
}

// Don't break up call-finally pairs
if (block->isBBCallFinallyPair() || next->isBBCallFinallyPairTail())
{
continue;
}

modified = true;

// Move call-finally pairs in tandem
if (next->isBBCallFinallyPair())
{
BasicBlock* const callFinallyTail = next->Next();
compiler->fgUnlinkRange(next, callFinallyTail);
compiler->fgMoveBlocksAfter(next, callFinallyTail, block);
}
else
{
compiler->fgUnlinkBlock(next);
compiler->fgInsertBBafter(block, next);
}
}

return modified;
}

//-----------------------------------------------------------------------------
@@ -5535,10 +5536,7 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned
// startBlock - The first block of the range to reorder
// endBlock - The last block (inclusive) of the range to reorder
//
// Returns:
// True if we reordered anything, false otherwise
//
bool Compiler::ThreeOptLayout::RunThreeOptPass(BasicBlock* startBlock, BasicBlock* endBlock)
void Compiler::ThreeOptLayout::RunThreeOptPass(BasicBlock* startBlock, BasicBlock* endBlock)
{
assert(startBlock != nullptr);
assert(endBlock != nullptr);
@@ -5551,7 +5549,7 @@ bool Compiler::ThreeOptLayout::RunThreeOptPass(BasicBlock* startBlock, BasicBloc
if (numBlocks < 3)
{
JITDUMP("Not enough blocks to partition anything. Skipping reordering.\n");
return false;
return;
}

JITDUMP("Initial layout cost: %f\n", GetLayoutCost(startPos, endPos));
@@ -5567,18 +5565,19 @@ bool Compiler::ThreeOptLayout::RunThreeOptPass(BasicBlock* startBlock, BasicBloc
{
JITDUMP("No changes made.\n");
}

return modified;
}

//-----------------------------------------------------------------------------
// fgSearchImprovedLayout: Try to improve upon RPO-based layout with the 3-opt method:
// fgSearchImprovedLayout: Try to improve upon a RPO-based layout with the 3-opt method:
// - Identify a range of hot blocks to reorder within
// - Partition this set into three segments: S1 - S2 - S3
// - Evaluate cost of swapped layout: S1 - S3 - S2
// - If the cost improves, keep this layout
//
void Compiler::fgSearchImprovedLayout()
// Returns:
// Suitable phase status
//
PhaseStatus Compiler::fgSearchImprovedLayout()
{
#ifdef DEBUG
if (verbose)
@@ -5591,8 +5590,41 @@ void Compiler::fgSearchImprovedLayout()
}
#endif // DEBUG

ThreeOptLayout layoutRunner(this);
layoutRunner.Run();
// Before running 3-opt, compute a loop-aware RPO to get a sensible starting layout.
m_dfsTree = fgComputeDfs</* useProfile */ true>();
m_loops = FlowGraphNaturalLoops::Find(m_dfsTree);
BasicBlock** const initialLayout = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()];

// When walking the RPO-based layout, only add hot blocks to the initial layout.
// We don't want to waste time running 3-opt on cold blocks.
unsigned numHotBlocks = 0;
auto addToSequence = [this, initialLayout, &numHotBlocks](BasicBlock* block) {
if (!block->isBBWeightCold(this))
{
initialLayout[numHotBlocks++] = block;
}
};

fgVisitBlocksInLoopAwareRPO(m_dfsTree, m_loops, addToSequence);
bool modified = false;

if (numHotBlocks > 0)
{
ThreeOptLayout layoutRunner(this, initialLayout, numHotBlocks);
modified = layoutRunner.Run();

if (compHndBBtabCount > 0)
{
fgFindEHRegionEnds();
}
}
else
{
JITDUMP("No hot blocks found. Skipping block reordering.\n");
}

fgInvalidateDfsTree();
return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

//-------------------------------------------------------------
Loading