From 9c456f6bdbfa82a984688bede5f65136cbb53b18 Mon Sep 17 00:00:00 2001 From: Aman Khalid Date: Sat, 1 Feb 2025 10:50:20 -0800 Subject: [PATCH] JIT: Compute `fgCalledCount` after synthesis (#112041) Part of #107749. Follow-up to #111971 and #110693. For methods without profile data, ensure the default call count is available throughout compilation (this had no diffs for me locally). For methods with profile data, compute the call count after synthesis runs to ensure it is available early, and reasonably accurate. I'm only seeing diffs in OSR methods locally, due to the logic in `fgFixEntryFlowForOSR` (which runs right after profile incorporation) no longer affecting `fgCalledCount`. This method guesses that the loop iterates about 100x the method call count, and scales the method entry block's weight down accordingly. This gives the impression later on that `fgCalledCount` is much lower than what we calculated using `fgEntryBB`. The actual diffs seem to manifest largely in LSRA, which uses `fgCalledCount` to normalize block weights, though there are a few other phases that use `BasicBlock::getBBWeight` in lieu of the raw weight as well. I think we ought to consolidate our block weight strategy at some point, especially if we have newfound faith in `fgCalledCount`. For example, instead of this check in if conversion: ``` if (m_startBlock->getBBWeight(m_comp) > BB_UNITY_WEIGHT * 1.05) ``` Perhaps we could do: ``` if (m_startBlock->bbWeight > fgCalledCount * 1.05) ``` But that's for another PR. --- src/coreclr/jit/compiler.h | 6 +++--- src/coreclr/jit/fgprofile.cpp | 10 ---------- src/coreclr/jit/fgprofilesynthesis.cpp | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 61aa9207d05a66..1087c3d9156419 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5400,9 +5400,9 @@ class Compiler // - Rationalization links all nodes into linear form which is kept until // the end of compilation. The first and last nodes are stored in the block. NodeThreading fgNodeThreading = NodeThreading::None; - weight_t fgCalledCount = BB_ZERO_WEIGHT; // count of the number of times this method was called - // This is derived from the profile data - // or is BB_UNITY_WEIGHT when we don't have profile data + weight_t fgCalledCount = BB_UNITY_WEIGHT; // count of the number of times this method was called + // This is derived from the profile data + // or is BB_UNITY_WEIGHT when we don't have profile data bool fgFuncletsCreated = false; // true if the funclet creation phase has been run diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 1dd0cfd27615b2..152e8bb5fd1ad9 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -4222,19 +4222,9 @@ PhaseStatus Compiler::fgComputeBlockWeights() if (fgIsUsingProfileWeights()) { - // Compute fgCalledCount by subtracting any non-entry flow into fgFirstBB from its weight - fgCalledCount = fgFirstBB->bbWeight; - for (FlowEdge* const predEdge : fgFirstBB->PredEdges()) - { - fgCalledCount = max(BB_ZERO_WEIGHT, fgCalledCount - predEdge->getLikelyWeight()); - } - - JITDUMP("We are using the profile weights and fgCalledCount is " FMT_WT "\n", fgCalledCount); return PhaseStatus::MODIFIED_NOTHING; } - JITDUMP(" -- no profile data, so using default called count\n"); - fgCalledCount = BB_UNITY_WEIGHT; return fgComputeMissingBlockWeights() ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp index 4647bedde594dd..1647925e270f3f 100644 --- a/src/coreclr/jit/fgprofilesynthesis.cpp +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -161,6 +161,20 @@ void ProfileSynthesis::Run(ProfileSynthesisOption option) m_comp->Metrics.ProfileInconsistentInitially++; } + // Derive the method's call count from the entry block's weight + // + if (m_comp->fgIsUsingProfileWeights() && !m_comp->compIsForInlining()) + { + weight_t entryWeight = m_entryBlock->bbWeight; + for (FlowEdge* const predEdge : m_entryBlock->PredEdges()) + { + entryWeight -= predEdge->getLikelyWeight(); + } + + m_comp->fgCalledCount = max(BB_ZERO_WEIGHT, entryWeight); + JITDUMP("fgCalledCount is " FMT_WT "\n", m_comp->fgCalledCount); + } + #ifdef DEBUG // We want to assert that the profile is consistent. // However, we need to defer asserting since invalid IL can