Skip to content

Commit

Permalink
[JSC] Loop Unrolling should not change Branch to Jump
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=285206
rdar://142111593

Reviewed by Justin Michaud.

Until later phase in FTL, we should not change Branch to Jump.
Otherwise, one block's successors get reduced into one block, which
confuses liveness of locals in DFG CPS graph. Instead, we preserve
the graph at this phase by using Branch(True, next, header). Later FTL
phase will convert this to Jump with appropriate liveness preservation
but we do not need to do this analysis here. We can just use this
always-true branch and keep graph.

* JSTests/stress/loop-unrolling-liveness-0.js: Added.
(f1):
* JSTests/stress/loop-unrolling-liveness-1.js: Added.
(f):
* Source/JavaScriptCore/dfg/DFGLoopUnrollingPhase.cpp:
(JSC::DFG::LoopUnrollingPhase::run):
(JSC::DFG::LoopUnrollingPhase::populateCandidateLoops):
(JSC::DFG::LoopUnrollingPhase::unrollLoop):
(JSC::DFG::LoopUnrollingPhase::selectDeepestNestedLoop): Deleted.

Canonical link: https://commits.webkit.org/289279@main
  • Loading branch information
Constellation committed Jan 23, 2025
1 parent 384d445 commit cd234bf
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 44 deletions.
14 changes: 14 additions & 0 deletions JSTests/stress/loop-unrolling-liveness-0.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//@ runDefault("--useConcurrentJIT=0", "--thresholdForJITAfterWarmUp=10", "--thresholdForOptimizeAfterWarmUp=100", "--thresholdForFTLOptimizeAfterWarmUp=1000")
function f1(a2, a3, a4) {
let v5 = a2 && 99;
let v6 = 0;
let v8 = v5++;
do {
v8 &&= v5;
v6++;
} while (v6 < 4);
return v6;
}
for (let v10 = 0; v10 < 100; v10++) {
f1();
}
14 changes: 14 additions & 0 deletions JSTests/stress/loop-unrolling-liveness-1.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
function f(y) {
let x = 0;
for (let i = 0; i < 2; i++) {
if (y) {
x++;
}
}
}
for (let j = 0; j < 9; j++) {
for (let k = 0; k < 999; k++) {
f(k);
}
}
(function(){})();
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
./dfg/DFGArgumentsEliminationPhase.cpp
./dfg/DFGDesiredWatchpoints.h
./dfg/DFGLazyJSValue.cpp
./dfg/DFGLoopUnrollingPhase.cpp
./dfg/DFGObjectAllocationSinkingPhase.cpp
./dfg/DFGOperations.cpp
./dfg/DFGPhantomInsertionPhase.cpp
Expand Down
94 changes: 50 additions & 44 deletions Source/JavaScriptCore/dfg/DFGLoopUnrollingPhase.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Cloneright (C) 2024 Apple Inc. All rights reserved.
* Copyright (C) 2024 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -88,61 +88,62 @@ class LoopUnrollingPhase : public Phase {

bool run()
{
bool changed = false;

if (UNLIKELY(Options::verboseLoopUnrolling())) {
dataLogLn("Graph before Loop Unrolling Phase:");
m_graph.dump();
}

uint32_t unrolledCount = 0;
while (true) {
const NaturalLoop* loop = selectDeepestNestedLoop();
if (!loop || unrolledCount >= Options::maxLoopUnrollingCount())
break;
if (!tryUnroll(loop))
auto loops = populateCandidateLoops();
if (loops.isEmpty() || unrolledCount >= Options::maxLoopUnrollingCount())
break;

++unrolledCount;
changed = true;
bool unrolled = false;
for (auto [loop, depth] : loops) {
if (!loop)
break;
if (tryUnroll(loop)) {
unrolled = true;
++unrolledCount;
break;
}
}
if (!unrolled)
break;
}

dataLogLnIf(Options::verboseLoopUnrolling(), "Successfully unrolled ", unrolledCount, " loops.");
return changed;
return !!unrolledCount;
}

const NaturalLoop* selectDeepestNestedLoop()
Vector<std::tuple<const NaturalLoop*, int32_t>, 16> populateCandidateLoops()
{
m_graph.ensureCPSNaturalLoops();

const NaturalLoop* target = nullptr;
int32_t maxDepth = INT_MIN;
uint32_t loopCount = m_graph.m_cpsNaturalLoops->numLoops();
IndexMap<const NaturalLoop*, int32_t> depthCache(loopCount, INT_MIN);

Vector<std::tuple<const NaturalLoop*, int32_t>, 16> loops(loopCount, std::tuple { nullptr, INT_MIN });
for (uint32_t loopIndex = loopCount; loopIndex--;) {
const NaturalLoop& loop = m_graph.m_cpsNaturalLoops->loop(loopIndex);
ASSERT(loop.index() == loopIndex && depthCache[loopIndex] == INT_MIN);
ASSERT(loop.index() == loopIndex && std::get<1>(loops[loopIndex]) == INT_MIN);

int32_t depth = 0;
const NaturalLoop* current = &loop;
while (current) {
int32_t cachedDepth = depthCache[current->index()];
int32_t cachedDepth = std::get<1>(loops[current->index()]);
if (cachedDepth != INT_MIN) {
depth += cachedDepth;
break;
}
++depth;
current = m_graph.m_cpsNaturalLoops->innerMostOuterLoop(*current);
}
depthCache[loopIndex] = depth;

if (depth > maxDepth) {
maxDepth = depth;
target = &loop;
}
loops[loopIndex] = std::tuple { &loop, depth };
}
return target;
std::sort(loops.begin(), loops.end(), [&](const auto& lhs, const auto& rhs) {
return std::get<1>(lhs) > std::get<1>(rhs);
});
return loops;
}

bool tryUnroll(const NaturalLoop* loop)
Expand Down Expand Up @@ -459,9 +460,7 @@ class LoopUnrollingPhase : public Phase {
BasicBlock* const header = data.header();
BasicBlock* const tail = data.tail;

const NodeOrigin tailTerminalOrigin = tail->terminal()->origin;
const CodeOrigin tailTerminalOriginSemantic = tailTerminalOrigin.semantic;
dataLogLnIf(Options::verboseLoopUnrolling(), "tailTerminalOriginSemantic ", tailTerminalOriginSemantic);
dataLogLnIf(Options::verboseLoopUnrolling(), "tailTerminalOriginSemantic ", tail->terminal()->origin.semantic);

// Mapping from the origin to the clones.
UncheckedKeyHashMap<BasicBlock*, BasicBlock*> blockClones;
Expand All @@ -477,6 +476,23 @@ class LoopUnrollingPhase : public Phase {
}
};

auto convertTailBranchToNextJump = [&](BasicBlock* tail, BasicBlock* next) {
// Why don't we use Jump instead of Branch? The reason is tail's original terminal was Branch.
// If we change this from Branch to Jump, we need to preserve how variables are used (via GetLocal, MovHint, SetLocal)
// not to confuse these variables liveness, it involves what blocks are used for successors of this tail block.
// Here, we can simplify the problem by using Branch and using the original "header" successors as never-taken branch.
// FTL's subsequent pass will optimize this and convert this Branch to Jump and/or eliminate this Branch and merge
// multiple blocks easily since its condition is constant boolean True. But we do not need to do the complicated analysis
// here. So let's just use Branch.
ASSERT(tail->terminal()->isBranch());
auto* constant = m_graph.addNode(SpecBoolean, JSConstant, tail->terminal()->origin, OpInfo(m_graph.freezeStrong(jsBoolean(true))));
tail->insertBeforeTerminal(constant);
auto* terminal = tail->terminal();
terminal->child1() = Edge(constant, KnownBooleanUse);
terminal->branchData()->taken = BranchTarget(next);
terminal->branchData()->notTaken = BranchTarget(header);
};

#if ASSERT_ENABLED
m_graph.initializeNodeOwners(); // This is only used for the debug assertion in cloneNodeImpl.
#endif
Expand Down Expand Up @@ -507,13 +523,8 @@ class LoopUnrollingPhase : public Phase {
}

// 3. Clone nodes.
for (uint32_t i = 0; i < body->size(); ++i) {
Node* bodyNode = body->at(i);
// Ignore the branch nodes at the end of the tail since we can directly jump to next (See step 5).
if (body == tail && bodyNode->origin.semantic == tailTerminalOriginSemantic)
continue;
cloneNode(nodeClones, clone, bodyNode);
}
for (Node* node : *body)
cloneNode(nodeClones, clone, node);

// 4. Clone variables and tail and head.
clone->variablesAtTail = body->variablesAtTail;
Expand All @@ -522,9 +533,10 @@ class LoopUnrollingPhase : public Phase {
replaceOperands(clone->variablesAtHead);

// 5. Clone successors. (predecessors will be fixed in resetReachability below)
if (body == tail)
clone->appendNode(m_graph, SpecNone, Jump, tailTerminalOrigin, OpInfo(next));
else {
if (body == tail) {
ASSERT(tail->terminal()->isBranch());
convertTailBranchToNextJump(clone, next);
} else {
for (uint32_t i = 0; i < body->numSuccessors(); ++i) {
auto& successor = clone->successor(i);
ASSERT(successor == body->successor(i));
Expand All @@ -538,13 +550,7 @@ class LoopUnrollingPhase : public Phase {
}

// 6. Replace the original loop tail branch with a jump to the last header clone.
{
for (Node* node : *tail) {
if (node->origin.semantic == tailTerminalOriginSemantic)
node->removeWithoutChecks();
}
tail->appendNode(m_graph, SpecNone, Jump, tailTerminalOrigin, OpInfo(next));
}
convertTailBranchToNextJump(tail, next);

// Done clone.
if (!m_blockInsertionSet.execute()) {
Expand Down

0 comments on commit cd234bf

Please sign in to comment.