Skip to content

Commit

Permalink
[EVM] Support commutable operations in BP stackification algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
PavelKopyl committed Nov 16, 2024
1 parent 8636cbe commit 5771d35
Show file tree
Hide file tree
Showing 7 changed files with 210 additions and 54 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/EVM/EVMControlFlowGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ struct CFG {

struct BuiltinCall {
MachineInstr *Builtin = nullptr;
// True if this instruction has commutable operands. In EVM ISA
// commutable operands always take top two stack slots.
bool IsCommutable = false;
bool TerminatesOrReverts = false;
};

Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,9 +258,9 @@ void ControlFlowGraphBuilder::handleMachineInstr(MachineInstr &MI) {
default: {
Stack Input, Output;
collectInstrOperands(MI, &Input, &Output);
CurrentBlock->Operations.emplace_back(
CFG::Operation{std::move(Input), std::move(Output),
CFG::BuiltinCall{&MI, TerminatesOrReverts}});
CurrentBlock->Operations.emplace_back(CFG::Operation{
std::move(Input), std::move(Output),
CFG::BuiltinCall{&MI, MI.isCommutable(), TerminatesOrReverts}});
} break;
}

Expand Down
24 changes: 13 additions & 11 deletions llvm/lib/Target/EVM/EVMInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -270,17 +270,19 @@ defm SDIV : BinaryInst<sdiv, "SDIV", 0x05, 5>;
defm MOD : BinaryInst<urem, "MOD", 0x06, 5>;
defm SMOD : BinaryInst<srem, "SMOD", 0x07, 5>;

defm ADDMOD
: I<(outs GPR:$dst), (ins GPR:$add_op1, GPR:$add_op2, GPR:$denom),
[(set GPR:$dst,
(int_evm_addmod GPR:$add_op1, GPR:$add_op2, GPR:$denom))],
"ADDMOD", " $dst, $add_op1, $add_op2, $denom", 0x08, 8>;

defm MULMOD
: I<(outs GPR:$dst), (ins GPR:$mul_op1, GPR:$mul_op2, GPR:$denom),
[(set GPR:$dst,
(int_evm_mulmod GPR:$mul_op1, GPR:$mul_op2, GPR:$denom))],
"MULMOD", " $dst, $mul_op1, $mul_op2, $denom", 0x09, 8>;
let isCommutable = 1 in {
defm ADDMOD
: I<(outs GPR:$dst), (ins GPR:$add_op1, GPR:$add_op2, GPR:$denom),
[(set GPR:$dst,
(int_evm_addmod GPR:$add_op1, GPR:$add_op2, GPR:$denom))],
"ADDMOD", " $dst, $add_op1, $add_op2, $denom", 0x08, 8>;

defm MULMOD
: I<(outs GPR:$dst), (ins GPR:$mul_op1, GPR:$mul_op2, GPR:$denom),
[(set GPR:$dst,
(int_evm_mulmod GPR:$mul_op1, GPR:$mul_op2, GPR:$denom))],
"MULMOD", " $dst, $mul_op1, $mul_op2, $denom", 0x09, 8>;
}

defm EXP
: I<(outs GPR:$dst), (ins GPR:$base, GPR:$exp),
Expand Down
70 changes: 54 additions & 16 deletions llvm/lib/Target/EVM/EVMOptimizedCodeTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,16 +257,54 @@ void EVMOptimizedCodeTransform::createStackLayout(Stack TargetStack) {
void EVMOptimizedCodeTransform::createOperationEntryLayout(
const CFG::Operation &Op) {
// Create required layout for entering the Operation.
createStackLayout(Layout.operationEntryLayout.at(&Op));
// Check if we can choose cheaper stack shuffling if the Operation is an
// instruction with commutable arguments.
if (const auto *Inst = std::get_if<CFG::BuiltinCall>(&Op.Operation);
Inst && Inst->IsCommutable) {
// Get the stack layout before the instruction.
const Stack &DefaultTargetStack = Layout.operationEntryLayout.at(&Op);
size_t DefaultCost =
EvaluateStackTransform(CurrentStack, DefaultTargetStack);

// Commutable operands always take top two stack slots.
const unsigned OpIdx1 = 0, OpIdx2 = 1;
assert(DefaultTargetStack.size() > 1);

// Swap the commutable stack items and measure the stack shuffling cost
// again.
Stack CommutedTargetStack = DefaultTargetStack;
std::swap(CommutedTargetStack[CommutedTargetStack.size() - OpIdx1 - 1],
CommutedTargetStack[CommutedTargetStack.size() - OpIdx2 - 1]);
size_t CommutedCost =
EvaluateStackTransform(CurrentStack, CommutedTargetStack);
// Choose the cheapest transformation.
createStackLayout(CommutedCost < DefaultCost ? CommutedTargetStack
: DefaultTargetStack);
#ifndef NDEBUG
// Assert that we have the inputs of the Operation on stack top.
assert(static_cast<int>(CurrentStack.size()) == Assembly.getStackHeight());
assert(CurrentStack.size() >= Op.Input.size());
Stack StackInput =
EVMUtils::to_vector(EVMUtils::take_last(CurrentStack, Op.Input.size()));
// Adjust the StackInput to match the commuted stack.
if (CommutedCost < DefaultCost) {
std::swap(StackInput[StackInput.size() - OpIdx1 - 1],
StackInput[StackInput.size() - OpIdx2 - 1]);
}
assert(AreLayoutsCompatible(StackInput, Op.Input));
#endif // NDEBUG
} else {
createStackLayout(Layout.operationEntryLayout.at(&Op));

#ifndef NDEBUG
// Assert that we have the inputs of the Operation on stack top.
assert(static_cast<int>(CurrentStack.size()) == Assembly.getStackHeight());
assert(CurrentStack.size() >= Op.Input.size());
const Stack StackInput =
EVMUtils::to_vector(EVMUtils::take_last(CurrentStack, Op.Input.size()));
assert(AreLayoutsCompatible(StackInput, Op.Input));
// Assert that we have the inputs of the Operation on stack top.
assert(static_cast<int>(CurrentStack.size()) == Assembly.getStackHeight());
assert(CurrentStack.size() >= Op.Input.size());
const Stack StackInput =
EVMUtils::to_vector(EVMUtils::take_last(CurrentStack, Op.Input.size()));
assert(AreLayoutsCompatible(StackInput, Op.Input));
#endif // NDEBUG
}
}

void EVMOptimizedCodeTransform::operator()(const CFG::BasicBlock &Block) {
Expand All @@ -280,12 +318,14 @@ void EVMOptimizedCodeTransform::operator()(const CFG::BasicBlock &Block) {

auto const &BlockInfo = Layout.blockInfos.at(&Block);

// Assert that the stack is valid for entering the block.
assert(AreLayoutsCompatible(CurrentStack, BlockInfo.entryLayout));

// Might set some slots to junk, if not required by the block.
CurrentStack = BlockInfo.entryLayout;

// Assert that the stack is valid for entering the block. The entry layout
// of the function entry block should is fully determined by the first
// instruction, so we can ignore 'BlockInfo.entryLayout'.
if (&Block != FuncInfo->Entry) {
assert(AreLayoutsCompatible(CurrentStack, BlockInfo.entryLayout));
// Might set some slots to junk, if not required by the block.
CurrentStack = BlockInfo.entryLayout;
}
assert(static_cast<int>(CurrentStack.size()) == Assembly.getStackHeight());

// Emit jumpdest, if required.
Expand Down Expand Up @@ -446,9 +486,7 @@ void EVMOptimizedCodeTransform::operator()() {
Assembly.setStackHeight(static_cast<int>(CurrentStack.size()));
Assembly.appendLabel();

// Create the entry layout of the function body block and visit.
createStackLayout(Layout.blockInfos.at(FuncInfo->Entry).entryLayout);

// Visit the function entry block.
(*this)(*FuncInfo->Entry);

Assembly.finalize();
Expand Down
137 changes: 125 additions & 12 deletions llvm/test/CodeGen/EVM/stack-ops-commutable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ define void @no_manipulations_needed_with_junk(i256 %a1, i256 %a2, i256 %a3) nor
; CHECK-LABEL: no_manipulations_needed_with_junk:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: SWAP1
; CHECK-NEXT: ADD
; CHECK-NEXT: PUSH0
; CHECK-NEXT: REVERT
Expand All @@ -17,6 +16,75 @@ define void @no_manipulations_needed_with_junk(i256 %a1, i256 %a2, i256 %a3) nor
unreachable
}

define void @no_manipulations_needed_with_junk_eq(i256 %a1, i256 %a2, i256 %a3) noreturn {
%cmp = icmp eq i256 %a1, %a2
%x1 = zext i1 %cmp to i256
call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1)
unreachable

; CHECK-LABEL: no_manipulations_needed_with_junk_eq:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: EQ
; CHECK-NEXT: PUSH0
; CHECK-NEXT: REVERT
}

define i256 @no_manipulations_needed_no_junk_addmod(i256 %a1, i256 %a2, i256 %a3) {
; CHECK-LABEL: no_manipulations_needed_no_junk_addmod:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: ADDMOD
; CHECK-NEXT: SWAP1
; CHECK-NEXT: JUMP
%x1 = call i256 @llvm.evm.addmod(i256 %a2, i256 %a1, i256 %a3)
ret i256 %x1
}

define i256 @no_manipulations_needed_no_junk_mulmod(i256 %a1, i256 %a2, i256 %a3) {
; CHECK-LABEL: no_manipulations_needed_no_junk_mulmod:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: MULMOD
; CHECK-NEXT: SWAP1
; CHECK-NEXT: JUMP
%x1 = call i256 @llvm.evm.mulmod(i256 %a2, i256 %a1, i256 %a3)
ret i256 %x1
}

define i256 @no_manipulations_needed_no_junk_and(i256 %a1, i256 %a2) {
; CHECK-LABEL: no_manipulations_needed_no_junk_and:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: AND
; CHECK-NEXT: SWAP1
; CHECK-NEXT: JUMP
%x1 = and i256 %a2, %a1
ret i256 %x1
}

define i256 @no_manipulations_needed_no_junk_or(i256 %a1, i256 %a2) {
; CHECK-LABEL: no_manipulations_needed_no_junk_or:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: OR
; CHECK-NEXT: SWAP1
; CHECK-NEXT: JUMP
%x1 = or i256 %a2, %a1
ret i256 %x1
}

define i256 @no_manipulations_needed_no_junk_xor(i256 %a1, i256 %a2) {
; CHECK-LABEL: no_manipulations_needed_no_junk_xor:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: XOR
; CHECK-NEXT: SWAP1
; CHECK-NEXT: JUMP
%x1 = xor i256 %a2, %a1
ret i256 %x1
}

define i256 @no_manipulations_needed_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
; CHECK-LABEL: no_manipulations_needed_no_junk:
; CHECK: ; %bb.0:
Expand All @@ -34,7 +102,6 @@ define void @reorder_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
; CHECK-LABEL: reorder_with_junk:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: SWAP1
; CHECK-NEXT: ADD
; CHECK-NEXT: PUSH0
; CHECK-NEXT: REVERT
Expand All @@ -61,7 +128,6 @@ define void @swap_first_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: POP
; CHECK-NEXT: SWAP1
; CHECK-NEXT: ADD
; CHECK-NEXT: PUSH0
; CHECK-NEXT: REVERT
Expand All @@ -70,6 +136,20 @@ define void @swap_first_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn {
unreachable
}

define i256 @two_commutable(i256 %a1, i256 %a2, i256 %a3) {
%x1 = add i256 %a3, %a2
%x2 = add i256 %a1, %x1
ret i256 %x2
; CHECK-LABEL: two_commutable:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: SWAP2
; CHECK-NEXT: ADD
; CHECK-NEXT: ADD
; CHECK-NEXT: SWAP1
; CHECK-NEXT: JUMP
}

define void @swap_second_with_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) noreturn {
; CHECK-LABEL: swap_second_with_junk:
; CHECK: ; %bb.0:
Expand All @@ -87,7 +167,6 @@ define i256 @swap_first_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwind
; CHECK-LABEL: swap_first_no_junk:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: SWAP3
; CHECK-NEXT: SWAP2
; CHECK-NEXT: POP
; CHECK-NEXT: POP
Expand All @@ -102,7 +181,6 @@ define i256 @swap_second_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwin
; CHECK-LABEL: swap_second_no_junk:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: SWAP3
; CHECK-NEXT: SWAP2
; CHECK-NEXT: POP
; CHECK-NEXT: POP
Expand Down Expand Up @@ -179,11 +257,10 @@ define i256 @second_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: DUP2
; CHECK-NEXT: PUSH1 4
; CHECK-NEXT: SWAP3
; CHECK-NEXT: SWAP4
; CHECK-NEXT: POP
; CHECK-NEXT: SWAP1
; CHECK-NEXT: PUSH1 4
; CHECK-NEXT: SWAP2
; CHECK-NEXT: ADD
; CHECK-NEXT: SWAP2
; CHECK-NEXT: SUB
Expand Down Expand Up @@ -220,10 +297,10 @@ define i256 @both_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
; CHECK-LABEL: both_arg_alive_no_junk:
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: SWAP2
; CHECK-NEXT: DUP1
; CHECK-NEXT: SWAP3
; CHECK-NEXT: POP
; CHECK-NEXT: DUP2
; CHECK-NEXT: DUP2
; CHECK-NEXT: DIV
; CHECK-NEXT: SWAP2
; CHECK-NEXT: ADD
Expand All @@ -241,9 +318,9 @@ define i256 @same_arg_dead_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
; CHECK: ; %bb.0:
; CHECK-NEXT: JUMPDEST
; CHECK-NEXT: POP
; CHECK-NEXT: SWAP1
; CHECK-NEXT: POP
; CHECK-NEXT: DUP1
; CHECK-NEXT: SWAP2
; CHECK-NEXT: POP
; CHECK-NEXT: ADD
; CHECK-NEXT: SWAP1
; CHECK-NEXT: DUP2
Expand All @@ -255,4 +332,40 @@ define i256 @same_arg_dead_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind {
ret i256 %x1
}

define void @commutable_not_in_function_entry() noreturn {

; CHECK-LABEL: .BB{{[0-9]+}}_3:
; CHECK: JUMPDEST
; CHECK-NEXT: PUSH4 4294967295
; CHECK-NEXT: AND
; CHECK-NEXT: PUSH0

enter:
%offset = inttoptr i256 0 to ptr addrspace(2)
%load = call i256 @llvm.evm.calldataload(ptr addrspace(2) %offset)
%calldata = trunc i256 %load to i32
br label %header

header:
%phi = phi i32 [ %calldata, %enter ], [ %inc, %do ]
%phi2 = phi i32 [ 1, %enter ], [ %mul, %do ]
%cmp = icmp sgt i32 %phi, 0
br i1 %cmp, label %do, label %exit

do:
%mul = mul nsw i32 %phi2, %phi
%inc = add nsw i32 %phi, -1
br label %header

exit:
%res = zext i32 %phi2 to i256
store i256 %res, ptr addrspace(1) null, align 4
call void @llvm.evm.return(ptr addrspace(1) null, i256 32)
unreachable
}

declare i256 @llvm.evm.addmod(i256, i256, i256)
declare i256 @llvm.evm.mulmod(i256, i256, i256)
declare i256 @llvm.evm.calldataload(ptr addrspace(2))
declare void @llvm.evm.return(ptr addrspace(1), i256)
declare void @llvm.evm.revert(ptr addrspace(1), i256)
Loading

0 comments on commit 5771d35

Please sign in to comment.