From 3f19af848a431bdf1f39a39903f609c6e8366b0a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Jul 2023 20:37:09 +0200 Subject: [PATCH] JIT: Compute BB weights with higher precision, use tolerance in if-conversion (#88385) In https://github.com/dotnet/runtime/issues/88376#issuecomment-1620371447 I noticed a spurious diff in the System.Tests.Perf_Int32.ToStringHex benchmark that looked like this: ```diff G_M62987_IG13: and edi, esi mov ebx, dword ptr [rbp+88H] - mov ecx, 1 test ebx, ebx - cmovle ebx, ecx + jg SHORT G_M62987_IG15 + ;; size=12 bbWeight=1.00 PerfScore 2.50 +G_M62987_IG14: + mov ebx, 1 + ;; size=5 bbWeight=0.98 PerfScore 0.24 +G_M62987_IG15: ``` Investigating, it turns out to be caused by a check in if-conversion for detecting loops, that is using a very sharp boundary. It turns out that the integral block weights are exactly the same (in both the base and diff), but the floating point calculation ended up with exactly 1 and very close to 1 in the base/diff cases respectively. So two changes to address it: * Switch getBBWeight to multiply by BB_UNITY_WEIGHT after dividing * Switch if-conversion to have a 5% tolerance so that losing just a single count in the wrong place will not cause a decision change. Note that the check here is used as a cheap "is inside loop" check. --- src/coreclr/jit/block.cpp | 4 ++-- src/coreclr/jit/ifconversion.cpp | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 01c1e52b99613..33dfd017e8505 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -1665,9 +1665,9 @@ weight_t BasicBlock::getBBWeight(Compiler* comp) { weight_t calledCount = getCalledCount(comp); - // Normalize the bbWeights by multiplying by BB_UNITY_WEIGHT and dividing by the calledCount. + // Normalize the bbWeight. // - weight_t fullResult = this->bbWeight * BB_UNITY_WEIGHT / calledCount; + weight_t fullResult = (this->bbWeight / calledCount) * BB_UNITY_WEIGHT; return fullResult; } diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp index c7441458556bc..f51417453225a 100644 --- a/src/coreclr/jit/ifconversion.cpp +++ b/src/coreclr/jit/ifconversion.cpp @@ -656,10 +656,11 @@ bool OptIfConversionDsc::optIfConvert() if (!m_comp->compStressCompile(Compiler::STRESS_IF_CONVERSION_INNER_LOOPS, 25)) { - // Don't optimise the block if it is inside a loop - // When inside a loop, branches are quicker than selects. + // Don't optimise the block if it is inside a loop. Loop-carried + // dependencies can cause significant stalls if if-converted. // Detect via the block weight as that will be high when inside a loop. - if (m_startBlock->getBBWeight(m_comp) > BB_UNITY_WEIGHT) + + if (m_startBlock->getBBWeight(m_comp) > BB_UNITY_WEIGHT * 1.05) { JITDUMP("Skipping if-conversion inside loop (via weight)\n"); return false;