Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
Optimize RoboCaller::AddReceiver() for code size
Browse files Browse the repository at this point in the history
Essentially, instead of having the inlined UntypedFunction::Create(f)
return an UntypedFunction which is then passed as an argument to
non-inlined RoboCallerReceivers::AddReceiverImpl(), we let
UntypedFunction::PrepareArgs(f) return a few different kinds of
trivial structs (depending on what sort of type f has) which are
passed as arguments to non-inlined RoboCallerReceivers::AddReceiver()
(which then converts them to UntypedFunction by calling
UntypedFunction::Create()). These structs are smaller than
UntypedFunction and optimized for argument passing, so many fewer
instructions are needed.

Example code:

  struct Foo {
    void Receive(int, float, int, float);
    void TestAddLambdaReceiver();
    webrtc::RoboCaller<int, float, int, float> rc;
  };

  void Foo::TestAddLambdaReceiver() {
    rc.AddReceiver([this](int a, float b, int c, float d){
        Receive(a, b, c, d);});
  }

On arm32, we get before this CL:

  Foo::TestAddLambdaReceiver():
        push    {r11, lr}
        mov     r11, sp
        sub     sp, sp, #24
        ldr     r1, .LCPI0_0
        mov     r2, #0
        stm     sp, {r0, r2}
        add     r1, pc, r1
        str     r2, [sp, #20]
        str     r1, [sp, #16]
        mov     r1, sp
        bl      RoboCallerReceivers::AddReceiverImpl
        mov     sp, r11
        pop     {r11, pc}
  .LCPI0_0:
        .long   CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>
  CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>:
        ldr     r0, [r0]
        b       Foo::Receive(int, float, int, float)

After this CL:

  Foo::TestAddLambdaReceiver():
        ldr     r3, .LCPI0_0
        mov     r2, r0
        add     r3, pc, r3
        b       RoboCallerReceivers::AddReceiver<1u>
  .LCPI0_0:
        .long   CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>
  CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>:
        ldr     r0, [r0]
        b       Foo::Receive(int, float, int, float)

(Symbol names abbreviated so that they'll fit on one line.)

So a reduction from 64 to 28 bytes. The improvements on arm64 and
x86_64 are similar.

Bug: webrtc:11943
Change-Id: I93fbba083be0235051c3279d3e3f6852a4a9fdad
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/185960
Commit-Queue: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32244}
  • Loading branch information
Karl Wiberg authored and Commit Bot committed Sep 30, 2020
1 parent 79d8df0 commit d2c6967
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 66 deletions.
1 change: 1 addition & 0 deletions rtc_base/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ rtc_source_set("robo_caller") {
":untyped_function",
"../api:function_view",
"system:assume",
"system:inline",
]
}

Expand Down
17 changes: 13 additions & 4 deletions rtc_base/robo_caller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,25 @@ namespace robo_caller_impl {
RoboCallerReceivers::RoboCallerReceivers() = default;
RoboCallerReceivers::~RoboCallerReceivers() = default;

void RoboCallerReceivers::AddReceiverImpl(UntypedFunction* f) {
receivers_.push_back(std::move(*f));
}

void RoboCallerReceivers::Foreach(
rtc::FunctionView<void(UntypedFunction&)> fv) {
for (auto& r : receivers_) {
fv(r);
}
}

template void RoboCallerReceivers::AddReceiver(
UntypedFunction::TrivialUntypedFunctionArgs<1>);
template void RoboCallerReceivers::AddReceiver(
UntypedFunction::TrivialUntypedFunctionArgs<2>);
template void RoboCallerReceivers::AddReceiver(
UntypedFunction::TrivialUntypedFunctionArgs<3>);
template void RoboCallerReceivers::AddReceiver(
UntypedFunction::TrivialUntypedFunctionArgs<4>);
template void RoboCallerReceivers::AddReceiver(
UntypedFunction::NontrivialUntypedFunctionArgs);
template void RoboCallerReceivers::AddReceiver(
UntypedFunction::FunctionPointerUntypedFunctionArgs);

} // namespace robo_caller_impl
} // namespace webrtc
25 changes: 18 additions & 7 deletions rtc_base/robo_caller.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "api/function_view.h"
#include "rtc_base/system/assume.h"
#include "rtc_base/system/inline.h"
#include "rtc_base/untyped_function.h"

namespace webrtc {
Expand All @@ -30,20 +31,30 @@ class RoboCallerReceivers {
RoboCallerReceivers& operator=(RoboCallerReceivers&&) = delete;
~RoboCallerReceivers();

void AddReceiver(UntypedFunction&& f) {
AddReceiverImpl(&f);
// Assume that f was moved from and is now trivially destructible.
// This helps the compiler optimize away the destructor call.
RTC_ASSUME(f.IsTriviallyDestructible());
template <typename UntypedFunctionArgsT>
RTC_NO_INLINE void AddReceiver(UntypedFunctionArgsT args) {
receivers_.push_back(UntypedFunction::Create(args));
}

void Foreach(rtc::FunctionView<void(UntypedFunction&)> fv);

private:
void AddReceiverImpl(UntypedFunction* f);
std::vector<UntypedFunction> receivers_;
};

extern template void RoboCallerReceivers::AddReceiver(
UntypedFunction::TrivialUntypedFunctionArgs<1>);
extern template void RoboCallerReceivers::AddReceiver(
UntypedFunction::TrivialUntypedFunctionArgs<2>);
extern template void RoboCallerReceivers::AddReceiver(
UntypedFunction::TrivialUntypedFunctionArgs<3>);
extern template void RoboCallerReceivers::AddReceiver(
UntypedFunction::TrivialUntypedFunctionArgs<4>);
extern template void RoboCallerReceivers::AddReceiver(
UntypedFunction::NontrivialUntypedFunctionArgs);
extern template void RoboCallerReceivers::AddReceiver(
UntypedFunction::FunctionPointerUntypedFunctionArgs);

} // namespace robo_caller_impl

// A collection of receivers (callable objects) that can be called all at once.
Expand Down Expand Up @@ -71,7 +82,7 @@ class RoboCaller {
template <typename F>
void AddReceiver(F&& f) {
receivers_.AddReceiver(
UntypedFunction::Create<void(ArgT...)>(std::forward<F>(f)));
UntypedFunction::PrepareArgs<void(ArgT...)>(std::forward<F>(f)));
}

// Calls all receivers with the given arguments.
Expand Down
199 changes: 144 additions & 55 deletions rtc_base/untyped_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#ifndef RTC_BASE_UNTYPED_FUNCTION_H_
#define RTC_BASE_UNTYPED_FUNCTION_H_

#include <cstddef>
#include <cstring>
#include <memory>
#include <type_traits>
#include <utility>
Expand All @@ -25,9 +27,17 @@ using FunVoid = void();
union VoidUnion {
void* void_ptr;
FunVoid* fun_ptr;
typename std::aligned_storage<16>::type inline_storage;
typename std::aligned_storage<4 * sizeof(uintptr_t)>::type inline_storage;
};

// Returns the number of elements of the `inline_storage` array required to
// store an object of type T.
template <typename T>
constexpr size_t InlineStorageSize() {
// sizeof(T) / sizeof(uintptr_t), but rounded up.
return (sizeof(T) + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
}

template <typename T>
struct CallHelpers;
template <typename RetT, typename... ArgT>
Expand Down Expand Up @@ -64,76 +74,155 @@ struct CallHelpers<RetT(ArgT...)> {
// size.
class UntypedFunction final {
public:
// Create function for lambdas and other callables; it accepts every type of
// argument except those noted in its enable_if call.
// The *UntypedFunctionArgs structs are used to transfer arguments from
// PrepareArgs() to Create(). They are trivial, but may own heap allocations,
// so make sure to pass them to Create() exactly once!
//
// The point of doing Create(PrepareArgs(foo)) instead of just Create(foo) is
// to separate the code that has to be inlined (PrepareArgs) from the code
// that can be noninlined (Create); the *UntypedFunctionArgs types are
// designed to efficiently carry the required information from one to the
// other.
template <size_t N>
struct TrivialUntypedFunctionArgs {
// We use an uintptr_t array here instead of std::aligned_storage, because
// the former can be efficiently passed in registers when using
// TrivialUntypedFunctionArgs as a function argument. (We can't do the same
// in VoidUnion, because std::aligned_storage but not uintptr_t can be
// legally reinterpret_casted to arbitrary types.
// TrivialUntypedFunctionArgs, on the other hand, only needs to handle
// placement new and memcpy.)
alignas(std::max_align_t) uintptr_t inline_storage[N];
webrtc_function_impl::FunVoid* call;
};
struct NontrivialUntypedFunctionArgs {
void* void_ptr;
webrtc_function_impl::FunVoid* call;
void (*del)(webrtc_function_impl::VoidUnion*);
};
struct FunctionPointerUntypedFunctionArgs {
webrtc_function_impl::FunVoid* fun_ptr;
webrtc_function_impl::FunVoid* call;
};

// Create function for lambdas and other callables that are trivial and small;
// it accepts every type of argument except those noted in its enable_if call.
template <
typename Signature,
typename F,
typename F_deref = typename std::remove_reference<F>::type,
typename std::enable_if<
// Not for function pointers; we have another overload for that below.
!std::is_function<typename std::remove_pointer<
typename std::remove_reference<F>::type>::type>::value &&
!std::is_function<
typename std::remove_pointer<F_deref>::type>::value &&

// Not for nullptr; we have another overload for that below.
// Not for nullptr; we have a constructor for that below.
!std::is_same<std::nullptr_t,
typename std::remove_cv<F>::type>::value &&

// Not for UntypedFunction objects; we have another overload for that.
// Not for UntypedFunction objects; use move construction or
// assignment.
!std::is_same<UntypedFunction,
typename std::remove_cv<typename std::remove_reference<
F>::type>::type>::value>::type* = nullptr>
static UntypedFunction Create(F&& f) {
using F_deref = typename std::remove_reference<F>::type;
// TODO(C++17): Use `constexpr if` here. The compiler appears to do the
// right thing anyway w.r.t. resolving the branch statically and
// eliminating dead code, but it would be good for readability.
if (std::is_trivially_move_constructible<F_deref>::value &&
std::is_trivially_destructible<F_deref>::value &&
sizeof(F_deref) <=
sizeof(webrtc_function_impl::VoidUnion::inline_storage)) {
// The callable is trivial and small enough, so we just store its bytes
// in the inline storage.
webrtc_function_impl::VoidUnion vu;
new (&vu.inline_storage) F_deref(std::forward<F>(f));
return UntypedFunction(
vu,
reinterpret_cast<webrtc_function_impl::FunVoid*>(
webrtc_function_impl::CallHelpers<
Signature>::template CallInlineStorage<F_deref>),
nullptr);
} else {
// The callable is either nontrivial or too large, so we can't keep it
// in the inline storage; use the heap instead.
webrtc_function_impl::VoidUnion vu;
vu.void_ptr = new F_deref(std::forward<F>(f));
return UntypedFunction(
vu,
reinterpret_cast<webrtc_function_impl::FunVoid*>(
webrtc_function_impl::CallHelpers<
Signature>::template CallVoidPtr<F_deref>),
static_cast<void (*)(webrtc_function_impl::VoidUnion*)>(
[](webrtc_function_impl::VoidUnion* vu) {
// Assuming that this pointer isn't null allows the
// compiler to eliminate a null check in the (inlined)
// delete operation.
RTC_ASSUME(vu->void_ptr != nullptr);
delete reinterpret_cast<F_deref*>(vu->void_ptr);
}));
}
typename std::remove_cv<F_deref>::type>::value &&

// Only for trivial callables that will fit in
// VoidUnion::inline_storage.
std::is_trivially_move_constructible<F_deref>::value &&
std::is_trivially_destructible<F_deref>::value &&
sizeof(F_deref) <=
sizeof(webrtc_function_impl::VoidUnion::inline_storage)>::type* =
nullptr,
size_t InlineSize = webrtc_function_impl::InlineStorageSize<F_deref>()>
static TrivialUntypedFunctionArgs<InlineSize> PrepareArgs(F&& f) {
// The callable is trivial and small enough, so we just store its bytes
// in the inline storage.
TrivialUntypedFunctionArgs<InlineSize> args;
new (&args.inline_storage) F_deref(std::forward<F>(f));
args.call = reinterpret_cast<webrtc_function_impl::FunVoid*>(
webrtc_function_impl::CallHelpers<
Signature>::template CallInlineStorage<F_deref>);
return args;
}
template <size_t InlineSize>
static UntypedFunction Create(TrivialUntypedFunctionArgs<InlineSize> args) {
webrtc_function_impl::VoidUnion vu;
std::memcpy(&vu.inline_storage, args.inline_storage,
sizeof(args.inline_storage));
return UntypedFunction(vu, args.call, nullptr);
}

// Create function for lambdas and other callables that are nontrivial or
// large; it accepts every type of argument except those noted in its
// enable_if call.
template <
typename Signature,
typename F,
typename F_deref = typename std::remove_reference<F>::type,
typename std::enable_if<
// Not for function pointers; we have another overload for that below.
!std::is_function<
typename std::remove_pointer<F_deref>::type>::value &&

// Not for nullptr; we have a constructor for that below.
!std::is_same<std::nullptr_t,
typename std::remove_cv<F>::type>::value &&

// Not for UntypedFunction objects; use move construction or
// assignment.
!std::is_same<UntypedFunction,
typename std::remove_cv<F_deref>::type>::value &&

// Only for nontrivial callables, or callables that won't fit in
// VoidUnion::inline_storage.
!(std::is_trivially_move_constructible<F_deref>::value &&
std::is_trivially_destructible<F_deref>::value &&
sizeof(F_deref) <= sizeof(webrtc_function_impl::VoidUnion::
inline_storage))>::type* = nullptr>
static NontrivialUntypedFunctionArgs PrepareArgs(F&& f) {
// The callable is either nontrivial or too large, so we can't keep it
// in the inline storage; use the heap instead.
NontrivialUntypedFunctionArgs args;
args.void_ptr = new F_deref(std::forward<F>(f));
args.call = reinterpret_cast<webrtc_function_impl::FunVoid*>(
webrtc_function_impl::CallHelpers<Signature>::template CallVoidPtr<
F_deref>);
args.del = static_cast<void (*)(webrtc_function_impl::VoidUnion*)>(
[](webrtc_function_impl::VoidUnion* vu) {
// Assuming that this pointer isn't null allows the
// compiler to eliminate a null check in the (inlined)
// delete operation.
RTC_ASSUME(vu->void_ptr != nullptr);
delete reinterpret_cast<F_deref*>(vu->void_ptr);
});
return args;
}
static UntypedFunction Create(NontrivialUntypedFunctionArgs args) {
webrtc_function_impl::VoidUnion vu;
vu.void_ptr = args.void_ptr;
return UntypedFunction(vu, args.call, args.del);
}

// Create function that accepts function pointers. If the argument is null,
// the result is an empty UntypedFunction.
template <typename Signature>
static UntypedFunction Create(Signature* f) {
static FunctionPointerUntypedFunctionArgs PrepareArgs(Signature* f) {
FunctionPointerUntypedFunctionArgs args;
args.fun_ptr = reinterpret_cast<webrtc_function_impl::FunVoid*>(f);
args.call = reinterpret_cast<webrtc_function_impl::FunVoid*>(
webrtc_function_impl::CallHelpers<Signature>::CallFunPtr);
return args;
}
static UntypedFunction Create(FunctionPointerUntypedFunctionArgs args) {
webrtc_function_impl::VoidUnion vu;
vu.fun_ptr = reinterpret_cast<webrtc_function_impl::FunVoid*>(f);
return UntypedFunction(
vu,
f ? reinterpret_cast<webrtc_function_impl::FunVoid*>(
webrtc_function_impl::CallHelpers<Signature>::CallFunPtr)
: nullptr,
nullptr);
vu.fun_ptr = args.fun_ptr;
return UntypedFunction(vu, args.fun_ptr == nullptr ? nullptr : args.call,
nullptr);
}

// Prepares arguments and creates an UntypedFunction in one go.
template <typename Signature, typename F>
static UntypedFunction Create(F&& f) {
return Create(PrepareArgs<Signature>(std::forward<F>(f)));
}

// Default constructor. Creates an empty UntypedFunction.
Expand Down

0 comments on commit d2c6967

Please sign in to comment.