Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More optimizations for pow of two and pos/neg one const on the right #2870

Merged
merged 30 commits into from
Jun 22, 2020
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
3459b7e
more optimizations for pow of two consts
MaxGraey May 23, 2020
5d4d963
add case for x * -1
MaxGraey May 23, 2020
d0d7813
don't check sideeffects for cases which save left expr
MaxGraey May 23, 2020
99936bd
add case for x - (-1) => x + 1
MaxGraey May 23, 2020
45b5128
fixes
MaxGraey May 23, 2020
d9c38d2
add case for (unsigned)x > -1 => 0
MaxGraey May 23, 2020
cf91110
switch to templete for pow-of-two methods
MaxGraey May 23, 2020
6012b2b
minor refactoring
MaxGraey May 23, 2020
bc8c553
Add cases for x <= -1 for signed and unsigned cases
MaxGraey May 23, 2020
1e246b6
restore commented test
MaxGraey May 23, 2020
10cf654
add tests for 64-bit mul with pow of two const
MaxGraey May 24, 2020
f29ee7b
remove x / -1 transform
MaxGraey May 24, 2020
c7f3f80
add (unsigned)x < -1. Fix comparision
MaxGraey May 24, 2020
6016fa9
fix binaryen_js tests
MaxGraey May 26, 2020
530db36
Merge branch 'master' into opt-pow-of-two-div
MaxGraey May 26, 2020
71f8991
refactor & fix according review
MaxGraey May 28, 2020
d91d496
fix bugs found by fuzzer
MaxGraey May 28, 2020
e944be2
improve comments
MaxGraey May 28, 2020
f7edf1f
fix comment
MaxGraey May 28, 2020
1b9f1b8
simplify
MaxGraey May 28, 2020
3104a2c
fix bug found by fuzzer. Unify tests cases
MaxGraey May 28, 2020
346f7fb
Merge branch 'master' into opt-pow-of-two-div
MaxGraey May 29, 2020
cd4dfe6
add tests for x * -1
MaxGraey Jun 2, 2020
2cc5096
Merge branch 'master' into opt-pow-of-two-div
MaxGraey Jun 2, 2020
fa6c382
update fixtures
MaxGraey Jun 2, 2020
176e5c1
remove x - (-1) transform
MaxGraey Jun 2, 2020
0c5456d
add tests x * -1 for floats
MaxGraey Jun 2, 2020
ea4b9e8
Merge branch 'master' into opt-pow-of-two-div
MaxGraey Jun 16, 2020
7938b01
add (signed)x % 1 -> 0 case
MaxGraey Jun 16, 2020
46c40de
Merge branch 'master' into opt-pow-of-two-div
MaxGraey Jun 18, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions src/ir/abstract.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ enum Op {
// Relational
Eq,
Ne,
LtS,
LtU,
LeS,
LeU,
GtS,
GtU,
GeS,
GeU
};

// Provide a wasm type and an abstract op and get the concrete one. For example,
Expand Down Expand Up @@ -126,6 +134,22 @@ inline BinaryOp getBinary(Type type, Op op) {
return EqInt32;
case Ne:
return NeInt32;
case LtS:
return LtSInt32;
case LtU:
return LtUInt32;
case LeS:
return LeSInt32;
case LeU:
return LeUInt32;
case GtS:
return GtSInt32;
case GtU:
return GtUInt32;
case GeS:
return GeSInt32;
case GeU:
return GeUInt32;
default:
return InvalidBinary;
}
Expand Down Expand Up @@ -163,6 +187,22 @@ inline BinaryOp getBinary(Type type, Op op) {
return EqInt64;
case Ne:
return NeInt64;
case LtS:
return LtSInt64;
case LtU:
return LtUInt64;
case LeS:
return LeSInt64;
case LeU:
return LeUInt64;
case GtS:
return GtSInt64;
case GtU:
return GtUInt64;
case GeS:
return GeSInt64;
case GeU:
return GeUInt64;
default:
return InvalidBinary;
}
Expand Down
98 changes: 84 additions & 14 deletions src/passes/OptimizeInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
//

#include <algorithm>
#include <type_traits>

#include <ir/abstract.h>
#include <ir/cost.h>
Expand Down Expand Up @@ -578,10 +579,30 @@ struct OptimizeInstructions
if (right->type == Type::i32) {
uint32_t c = right->value.geti32();
if (IsPowerOf2(c)) {
if (binary->op == MulInt32) {
return optimizePowerOf2Mul(binary, c);
} else if (binary->op == RemUInt32) {
return optimizePowerOf2URem(binary, c);
switch (binary->op) {
case MulInt32:
return optimizePowerOf2Mul(binary, c);
case RemUInt32:
return optimizePowerOf2URem(binary, c);
case DivUInt32:
return optimizePowerOf2UDiv(binary, c);
default:
break;
}
}
}
if (right->type == Type::i64) {
uint64_t c = right->value.geti64();
if (IsPowerOf2(c)) {
switch (binary->op) {
case MulInt64:
return optimizePowerOf2Mul(binary, c);
case RemUInt64:
return optimizePowerOf2URem(binary, c);
case DivUInt64:
return optimizePowerOf2UDiv(binary, c);
default:
break;
MaxGraey marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
Expand Down Expand Up @@ -1265,22 +1286,37 @@ struct OptimizeInstructions
// but it's still worth doing since
// * Often shifts are more common than muls.
// * The constant is smaller.
Expression* optimizePowerOf2Mul(Binary* binary, uint32_t c) {
uint32_t shifts = CountTrailingZeroes(c);
binary->op = ShlInt32;
binary->right->cast<Const>()->value = Literal(int32_t(shifts));
template<typename T> Expression* optimizePowerOf2Mul(Binary* binary, T c) {
static_assert(std::is_same<T, uint32_t>::value ||
std::is_same<T, uint64_t>::value,
"type mismatch");
auto shifts = CountTrailingZeroes<T>(c);
binary->op = std::is_same<T, uint32_t>::value ? ShlInt32 : ShlInt64;
binary->right->cast<Const>()->value = Literal(static_cast<T>(shifts));
return binary;
}

// Optimize an unsigned divide by a power of two on the right,
// which can be an AND mask
// Optimize an unsigned divide / remainder by a power of two on the right
// This doesn't shrink code size, and VMs likely optimize it anyhow,
// but it's still worth doing since
// * Usually ands are more common than urems.
// * The constant is slightly smaller.
Expression* optimizePowerOf2URem(Binary* binary, uint32_t c) {
binary->op = AndInt32;
binary->right->cast<Const>()->value = Literal(int32_t(c - 1));
template<typename T> Expression* optimizePowerOf2UDiv(Binary* binary, T c) {
static_assert(std::is_same<T, uint32_t>::value ||
std::is_same<T, uint64_t>::value,
"type mismatch");
auto shifts = CountTrailingZeroes<T>(c);
binary->op = std::is_same<T, uint32_t>::value ? ShrUInt32 : ShrUInt64;
binary->right->cast<Const>()->value = Literal(static_cast<T>(shifts));
return binary;
}

template<typename T> Expression* optimizePowerOf2URem(Binary* binary, T c) {
static_assert(std::is_same<T, uint32_t>::value ||
std::is_same<T, uint64_t>::value,
"type mismatch");
binary->op = std::is_same<T, uint32_t>::value ? AndInt32 : AndInt64;
binary->right->cast<Const>()->value = Literal(c - 1);
return binary;
}

Expand Down Expand Up @@ -1345,15 +1381,49 @@ struct OptimizeInstructions
}
}
// operations on all 1s
// TODO: shortcut method to create an all-ones?
if (right->value == Literal(int32_t(-1)) ||
right->value == Literal(int64_t(-1))) {
if (binary->op == Abstract::getBinary(type, Abstract::And)) {
// x & -1 ==> x
return binary->left;
} else if (binary->op == Abstract::getBinary(type, Abstract::Or) &&
!EffectAnalyzer(getPassOptions(), features, binary->left)
.hasSideEffects()) {
// x | -1 ==> -1
return binary->right;
} else if (binary->op == Abstract::getBinary(type, Abstract::Sub)) {
// x - (-1) ==> x + 1
MaxGraey marked this conversation as resolved.
Show resolved Hide resolved
binary->op = Abstract::getBinary(type, Abstract::Add);
right->value = Literal::makeFromInt32(1, type);
return binary;
} else if ((binary->op == Abstract::getBinary(type, Abstract::RemS) ||
binary->op == Abstract::getBinary(type, Abstract::GtU)) &&
!EffectAnalyzer(getPassOptions(), features, binary->left)
.hasSideEffects()) {
// (signed)x % -1 ==> 0
// (unsigned)x > -1 ==> 0
right->value = Literal::makeSingleZero(type);
return right;
} else if (binary->op == Abstract::getBinary(type, Abstract::LtU)) {
// (unsigned)x < -1 ==> x != -1
binary->op = Abstract::getBinary(type, Abstract::Ne);
return binary;
MaxGraey marked this conversation as resolved.
Show resolved Hide resolved
} else if (binary->op == Abstract::getBinary(type, Abstract::DivU)) {
// (unsigned)x / -1 ==> x == -1
binary->op = Abstract::getBinary(type, Abstract::Eq);
return binary;
} else if (binary->op == Abstract::getBinary(type, Abstract::Mul)) {
// (signed)x * -1 ==> -x
MaxGraey marked this conversation as resolved.
Show resolved Hide resolved
binary->op = Abstract::getBinary(type, Abstract::Sub);
right->value = Literal::makeSingleZero(type);
std::swap(binary->left, binary->right);
return binary;
} else if (binary->op == Abstract::getBinary(type, Abstract::LeU) &&
!EffectAnalyzer(getPassOptions(), features, binary->left)
.hasSideEffects()) {
// (unsigned)x <= -1 ==> 1
right->value = Literal::makeFromInt32(1, type);
return right;
}
}
// wasm binary encoding uses signed LEBs, which slightly favor negative
Expand Down
4 changes: 2 additions & 2 deletions test/binaryen.js/sieve.js.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ optimized:
(drop
(memory.grow
(i32.sub
(i32.div_u
(i32.shr_u
(i32.add
(local.get $0)
(i32.const 65535)
)
(i32.const 65536)
(i32.const 16)
)
(memory.size)
)
Expand Down
92 changes: 31 additions & 61 deletions test/emcc_hello_world.fromasm
Original file line number Diff line number Diff line change
Expand Up @@ -7118,78 +7118,48 @@
)
(func $_fmt_u (; has Stack IR ;) (param $0 i32) (param $1 i32) (param $2 i32) (result i32)
(local $3 i32)
(local $4 i32)
(if
(i32.or
(i32.and
(i32.eqz
(local.get $1)
)
(i32.gt_u
(local.get $0)
(i32.const -1)
)
)
(i32.gt_u
(local.get $1)
(i32.const 0)
)
(i32.gt_u
(local.get $1)
(i32.const 0)
)
(local.set $0
(loop $while-in (result i32)
(i32.store8
(local.tee $2
(i32.add
(local.get $2)
(i32.const -1)
)
)
(i32.or
(call $___uremdi3
(local.get $0)
(local.get $1)
(i32.const 10)
)
(i32.const 48)
(loop $while-in
(i32.store8
(local.tee $2
(i32.add
(local.get $2)
(i32.const -1)
)
)
(local.set $3
(call $___udivdi3
(i32.or
(call $___uremdi3
(local.get $0)
(local.get $1)
(i32.const 10)
)
(i32.const 48)
)
(local.set $4
(global.get $tempRet0)
)
(local.set $0
(call $___udivdi3
(local.get $0)
(local.get $1)
(i32.const 10)
)
(if (result i32)
(i32.or
(i32.and
(i32.eq
(local.get $1)
(i32.const 9)
)
(i32.gt_u
(local.get $0)
(i32.const -1)
)
)
(i32.gt_u
(local.get $1)
(i32.const 9)
)
)
(block
(local.set $0
(local.get $3)
)
(local.set $1
(local.get $4)
)
(br $while-in)
)
(local.set $3
(global.get $tempRet0)
)
(if
(i32.gt_u
(local.get $1)
(i32.const 9)
)
(block
(local.set $1
(local.get $3)
)
(local.get $3)
(br $while-in)
)
)
)
Expand Down
Loading