diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 876c186d471..20cbc88baae 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -67,7 +67,32 @@ enum class InliningMode { SplitPatternB }; -// Useful into on a function, helping us decide if we can inline it +// Whether a function just calls another function in a way that always shrinks +// when the calling function is inlined. +enum class TrivialCall { + // Function does not just call another function, or it may not shrink when + // inlined. + NotTrivial, + + // Function just calls another function, with `local.get`s as arguments, and + // with each `local` is used exactly once, and in the order they appear in the + // argument list. + // + // In this case, inlining the function generates smaller code, and it is also + // good for runtime. + Shrinks, + + // Function just calls another function, but maybe with constant arguments, or + // maybe some locals are used more than once. In these cases code size does + // not always shrink: at the call sites, omitted locals can create `drop` + // instructions, a local used multiple times can create new locals, and + // encoding of constants may be larger than just a `local.get` with a small + // index. In these cases we still want to inline with `-O3`, but the code size + // may increase when inlined. + MayNotShrink, +}; + +// Useful info on a function, helping us decide if we can inline it. struct FunctionInfo { std::atomic refs; Index size; @@ -77,16 +102,7 @@ struct FunctionInfo { // Something is used globally if there is a reference to it in a table or // export etc. bool usedGlobally; - // We consider a function to be a trivial call if the body is just a call with - // trivial arguments, like this: - // - // (func $forward (param $x) (param $y) - // (call $target (local.get $x) (local.get $y)) - // ) - // - // Specifically the body must be a call, and the operands to the call must be - // of size 1 (generally, LocalGet or Const). - bool isTrivialCall; + TrivialCall trivialCall; InliningMode inliningMode; FunctionInfo() { clear(); } @@ -98,7 +114,7 @@ struct FunctionInfo { hasLoops = false; hasTryDelegate = false; usedGlobally = false; - isTrivialCall = false; + trivialCall = TrivialCall::NotTrivial; inliningMode = InliningMode::Unknown; } @@ -110,7 +126,7 @@ struct FunctionInfo { hasLoops = other.hasLoops; hasTryDelegate = other.hasTryDelegate; usedGlobally = other.usedGlobally; - isTrivialCall = other.isTrivialCall; + trivialCall = other.trivialCall; inliningMode = other.inliningMode; return *this; } @@ -132,6 +148,11 @@ struct FunctionInfo { size <= options.inlining.oneCallerInlineMaxSize) { return true; } + // If the function calls another one in a way that always shrinks when + // inlined, inline it in all optimization and shrink modes. + if (trivialCall == TrivialCall::Shrinks) { + return true; + } // If it's so big that we have no flexible options that could allow it, // do not inline. if (size > options.inlining.flexibleInlineMaxSize) { @@ -143,22 +164,15 @@ struct FunctionInfo { if (options.shrinkLevel > 0 || options.optimizeLevel < 3) { return false; } - if (hasCalls) { - // This has calls. If it is just a trivial call itself then inline, as we - // will save a call that way - basically we skip a trampoline in the - // middle - but if it is something more complex, leave it alone, as we may - // not help much (and with recursion we may end up with a wasteful - // increase in code size). - // - // Note that inlining trivial calls may increase code size, e.g. if they - // use a parameter more than once (forcing us after inlining to save that - // value to a local, etc.), but here we are optimizing for speed and not - // size, so we risk it. - return isTrivialCall; - } - // This doesn't have calls. Inline if loops do not prevent us (normally, a - // loop suggests a lot of work and so inlining is less useful). - return !hasLoops || options.inlining.allowFunctionsWithLoops; + // The function just calls another function, but the code size may increase + // when inlined. We only inline it fully with `-O3`. + if (trivialCall == TrivialCall::MayNotShrink) { + return true; + } + // Trivial calls are already handled. Inline if + // 1. The function doesn't have calls, and + // 2. The function doesn't have loops, or we allow inlining with loops. + return !hasCalls && (!hasLoops || options.inlining.allowFunctionsWithLoops); } }; @@ -227,10 +241,35 @@ struct FunctionInfoScanner info.size = Measurer::measure(curr->body); if (auto* call = curr->body->dynCast()) { + // If call arguments are function locals read in order, then the code size + // always shrinks when the call is inlined. Note that we don't allow + // skipping function arguments here, as that can create `drop` + // instructions at the call sites, increasing code size. + bool shrinks = true; + Index nextLocalGetIndex = 0; + for (auto* operand : call->operands) { + if (auto* localGet = operand->dynCast()) { + if (localGet->index == nextLocalGetIndex) { + nextLocalGetIndex += 1; + } else { + shrinks = false; + break; + } + } else { + shrinks = false; + break; + } + } + + if (shrinks) { + info.trivialCall = TrivialCall::Shrinks; + return; + } + if (info.size == call->operands.size() + 1) { // This function body is a call with some trivial (size 1) operands like // LocalGet or Const, so it is a trivial call. - info.isTrivialCall = true; + info.trivialCall = TrivialCall::MayNotShrink; } } } diff --git a/test/lit/passes/inlining-const-args.wat b/test/lit/passes/inlining-const-args.wat new file mode 100644 index 00000000000..023c62eaac6 --- /dev/null +++ b/test/lit/passes/inlining-const-args.wat @@ -0,0 +1,88 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; With `-O3`, we always inline calls to functions that just call other +;; functions with "trivial" arguments. +;; +;; A trivial argument for now is just an instruction with size 1. E.g. +;; `local.get`, constants. + +;; RUN: foreach %s %t wasm-opt -all -O3 -S -o - | filecheck %s --check-prefix=O3 +;; RUN: foreach %s %t wasm-opt -all -O2 -S -o - | filecheck %s --check-prefix=O2 +;; RUN: foreach %s %t wasm-opt -all -Os -S -o - | filecheck %s --check-prefix=Os + +(module + ;; O3: (type $0 (func (param i32 i32 i32))) + ;; O2: (type $1 (func)) + + ;; O2: (type $0 (func (param i32 i32 i32))) + ;; Os: (type $1 (func)) + + ;; Os: (type $0 (func (param i32 i32 i32))) + (type $0 (func (param i32 i32 i32))) + + ;; O3: (type $1 (func)) + (type $1 (func)) + + (type $2 (func)) + + ;; O3: (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32))) + ;; O2: (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32))) + ;; Os: (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32))) + (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32))) + + ;; O3: (export "main" (func $main)) + ;; O2: (export "main" (func $main)) + ;; Os: (export "main" (func $main)) + (export "main" (func $main)) + + ;; O2: (func $call-foo (type $1) + ;; O2-NEXT: (call $imported-foo + ;; O2-NEXT: (i32.const 1) + ;; O2-NEXT: (i32.const 2) + ;; O2-NEXT: (i32.const 3) + ;; O2-NEXT: ) + ;; O2-NEXT: ) + ;; Os: (func $call-foo (type $1) + ;; Os-NEXT: (call $imported-foo + ;; Os-NEXT: (i32.const 1) + ;; Os-NEXT: (i32.const 2) + ;; Os-NEXT: (i32.const 3) + ;; Os-NEXT: ) + ;; Os-NEXT: ) + (func $call-foo (type $1) + (call $imported-foo + (i32.const 1) + (i32.const 2) + (i32.const 3))) + + ;; O3: (func $main (type $1) + ;; O3-NEXT: (call $imported-foo + ;; O3-NEXT: (i32.const 1) + ;; O3-NEXT: (i32.const 2) + ;; O3-NEXT: (i32.const 3) + ;; O3-NEXT: ) + ;; O3-NEXT: (call $imported-foo + ;; O3-NEXT: (i32.const 1) + ;; O3-NEXT: (i32.const 2) + ;; O3-NEXT: (i32.const 3) + ;; O3-NEXT: ) + ;; O3-NEXT: (call $imported-foo + ;; O3-NEXT: (i32.const 1) + ;; O3-NEXT: (i32.const 2) + ;; O3-NEXT: (i32.const 3) + ;; O3-NEXT: ) + ;; O3-NEXT: ) + ;; O2: (func $main (type $1) + ;; O2-NEXT: (call $call-foo) + ;; O2-NEXT: (call $call-foo) + ;; O2-NEXT: (call $call-foo) + ;; O2-NEXT: ) + ;; Os: (func $main (type $1) + ;; Os-NEXT: (call $call-foo) + ;; Os-NEXT: (call $call-foo) + ;; Os-NEXT: (call $call-foo) + ;; Os-NEXT: ) + (func $main (type $2) + (call $call-foo) + (call $call-foo) + (call $call-foo))) diff --git a/test/lit/passes/inlining-trivial-calls-1.wast b/test/lit/passes/inlining-trivial-calls-1.wast new file mode 100644 index 00000000000..c4962af20fc --- /dev/null +++ b/test/lit/passes/inlining-trivial-calls-1.wast @@ -0,0 +1,164 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; Check that "trivial calls" are always inlined, even when optimizing for +;; size. +;; +;; A trivial call is a function that calls another, using its locals in +;; the order, without skipping any locals. +;; +;; These functions can always be inlined because they can't cause binary size +;; increase at the call sites. + +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=0 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=1 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=2 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=3 -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func (param i32 i32 i32 i32 i32 i32))) + (type $0 (func (param i32 i32 i32 i32 i32 i32))) + ;; CHECK: (type $1 (func)) + (type $1 (func)) + ;; CHECK: (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32 i32 i32 i32))) + (import "env" "foo" (func $imported-foo (type $0) (param i32 i32 i32 i32 i32 i32))) + (func $call-foo (type $0) (param $p1 i32) (param $p2 i32) (param $p3 i32) (param $p4 i32) (param $p5 i32) (param $p6 i32) + (call $imported-foo + (local.get $p1) + (local.get $p2) + (local.get $p3) + (local.get $p4) + (local.get $p5) + (local.get $p6) + ) + ) + ;; CHECK: (func $main (type $1) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local $4 i32) + ;; CHECK-NEXT: (local $5 i32) + ;; CHECK-NEXT: (local $6 i32) + ;; CHECK-NEXT: (local $7 i32) + ;; CHECK-NEXT: (local $8 i32) + ;; CHECK-NEXT: (local $9 i32) + ;; CHECK-NEXT: (local $10 i32) + ;; CHECK-NEXT: (local $11 i32) + ;; CHECK-NEXT: (local $12 i32) + ;; CHECK-NEXT: (local $13 i32) + ;; CHECK-NEXT: (local $14 i32) + ;; CHECK-NEXT: (local $15 i32) + ;; CHECK-NEXT: (local $16 i32) + ;; CHECK-NEXT: (local $17 i32) + ;; CHECK-NEXT: (block $__inlined_func$call-foo + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $4 + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $5 + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $imported-foo + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $__inlined_func$call-foo$1 + ;; CHECK-NEXT: (local.set $6 + ;; CHECK-NEXT: (i32.const 7) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $7 + ;; CHECK-NEXT: (i32.const 8) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $8 + ;; CHECK-NEXT: (i32.const 9) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $9 + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $10 + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $11 + ;; CHECK-NEXT: (i32.const 12) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $imported-foo + ;; CHECK-NEXT: (local.get $6) + ;; CHECK-NEXT: (local.get $7) + ;; CHECK-NEXT: (local.get $8) + ;; CHECK-NEXT: (local.get $9) + ;; CHECK-NEXT: (local.get $10) + ;; CHECK-NEXT: (local.get $11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block $__inlined_func$call-foo$2 + ;; CHECK-NEXT: (local.set $12 + ;; CHECK-NEXT: (i32.const 13) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $13 + ;; CHECK-NEXT: (i32.const 14) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $14 + ;; CHECK-NEXT: (i32.const 15) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $15 + ;; CHECK-NEXT: (i32.const 16) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $16 + ;; CHECK-NEXT: (i32.const 17) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $17 + ;; CHECK-NEXT: (i32.const 18) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $imported-foo + ;; CHECK-NEXT: (local.get $12) + ;; CHECK-NEXT: (local.get $13) + ;; CHECK-NEXT: (local.get $14) + ;; CHECK-NEXT: (local.get $15) + ;; CHECK-NEXT: (local.get $16) + ;; CHECK-NEXT: (local.get $17) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $main (type $1) + (call $call-foo + (i32.const 1) + (i32.const 2) + (i32.const 3) + (i32.const 4) + (i32.const 5) + (i32.const 6) + ) + (call $call-foo + (i32.const 7) + (i32.const 8) + (i32.const 9) + (i32.const 10) + (i32.const 11) + (i32.const 12) + ) + (call $call-foo + (i32.const 13) + (i32.const 14) + (i32.const 15) + (i32.const 16) + (i32.const 17) + (i32.const 18) + ) + ) +) diff --git a/test/lit/passes/inlining-trivial-calls-2.wast b/test/lit/passes/inlining-trivial-calls-2.wast new file mode 100644 index 00000000000..dfd908c5557 --- /dev/null +++ b/test/lit/passes/inlining-trivial-calls-2.wast @@ -0,0 +1,61 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; Same as inlining-trivial-calls-1, but arguments to the "trivial call" are +;; different than the caller's arguments. +;; +;; This can result in adding locals at the call sites and increase binary sizes. +;; So we don't inline these calls when optimizing for binary sizes. + +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=0 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=1 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=2 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=3 -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func (param i32 i32))) + (type $0 (func (param i32 i32))) + ;; CHECK: (type $1 (func)) + (type $1 (func)) + ;; CHECK: (import "env" "foo" (func $imported-foo (type $0) (param i32 i32))) + (import "env" "foo" (func $imported-foo (type $0) (param i32 i32))) + ;; CHECK: (func $call-foo (type $0) (param $p1 i32) (param $p2 i32) + ;; CHECK-NEXT: (call $imported-foo + ;; CHECK-NEXT: (local.get $p2) + ;; CHECK-NEXT: (local.get $p1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $call-foo (type $0) (param $p1 i32) (param $p2 i32) + (call $imported-foo + (local.get $p2) + (local.get $p1) + ) + ) + ;; CHECK: (func $main (type $1) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $main (type $1) + (call $call-foo + (i32.const 1) + (i32.const 2) + ) + (call $call-foo + (i32.const 3) + (i32.const 4) + ) + (call $call-foo + (i32.const 5) + (i32.const 6) + ) + ) +) diff --git a/test/lit/passes/inlining-trivial-calls-3.wast b/test/lit/passes/inlining-trivial-calls-3.wast new file mode 100644 index 00000000000..b4a71e15571 --- /dev/null +++ b/test/lit/passes/inlining-trivial-calls-3.wast @@ -0,0 +1,70 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; Same as inlining-trivial-calls-2, but arguments the "trivial call" omits an +;; argument from its own arguments. +;; +;; This can result in `drop` instructions at the call sites and increase binary +;; sizes. So we don't inline these calls when optimizing for binary sizes. + +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=0 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=1 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=2 -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --inlining --shrink-level=3 -S -o - | filecheck %s + +(module + ;; CHECK: (type $1 (func (param i32 i32))) + + ;; CHECK: (type $0 (func (param i32 i32 i32))) + (type $0 (func (param i32 i32 i32))) + (type $1 (func (param i32 i32))) + ;; CHECK: (type $2 (func)) + (type $2 (func)) + ;; CHECK: (import "env" "foo" (func $imported-foo (type $1) (param i32 i32))) + (import "env" "foo" (func $imported-foo (type $1) (param i32 i32))) + ;; CHECK: (func $call-foo (type $0) (param $p1 i32) (param $p2 i32) (param $p3 i32) + ;; CHECK-NEXT: (call $imported-foo + ;; CHECK-NEXT: (local.get $p1) + ;; CHECK-NEXT: (local.get $p3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $call-foo (type $0) (param $p1 i32) (param $p2 i32) (param $p3 i32) + (call $imported-foo + (local.get $p1) + (local.get $p3) + ) + ) + ;; CHECK: (func $main (type $2) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 4) + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $call-foo + ;; CHECK-NEXT: (i32.const 7) + ;; CHECK-NEXT: (i32.const 8) + ;; CHECK-NEXT: (i32.const 9) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $main (type $2) + (call $call-foo + (i32.const 1) + (i32.const 2) + (i32.const 3) + ) + (call $call-foo + (i32.const 4) + (i32.const 5) + (i32.const 6) + ) + (call $call-foo + (i32.const 7) + (i32.const 8) + (i32.const 9) + ) + ) +)