From 3c26ad1ae6246a42234cf19fd6ff451329e95017 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 4 Jan 2024 11:35:29 -0800 Subject: [PATCH 1/6] Tweak the Printer code in runtime for smaller code TL;DR: template expansion meant that we had more replicated code than expected from the inline expansion of code in Printer and friends. Restructured and added NEVER_INLINE to try to make the call sites as small as possible. It's a modest code-size savings but nonzero... e.g., the linux-x86-64 .o output from correct_cross_compilation drops from 164280 bytes to 162936 bytes. --- src/runtime/d3d12compute.cpp | 4 +- src/runtime/posix_error_handler.cpp | 20 ++- src/runtime/printer.h | 226 ++++++++++++++-------------- src/runtime/runtime_internal.h | 2 + src/runtime/tracing.cpp | 2 +- 5 files changed, 126 insertions(+), 128 deletions(-) diff --git a/src/runtime/d3d12compute.cpp b/src/runtime/d3d12compute.cpp index adae690800cc..f4f85180a56e 100644 --- a/src/runtime/d3d12compute.cpp +++ b/src/runtime/d3d12compute.cpp @@ -98,11 +98,11 @@ static constexpr uint64_t trace_buf_size = 4096; WEAK char trace_buf[trace_buf_size] = {}; WEAK int trace_indent = 0; -struct trace : public BasicPrinter { +struct trace : public PrinterBase { ScopedMutexLock lock; explicit trace(void *user_context = nullptr) - : BasicPrinter(user_context, trace_buf), + : PrinterBase(user_context, trace_buf, trace_buf_size), lock(&trace_lock) { for (int i = 0; i < trace_indent; i++) { *this << " "; diff --git a/src/runtime/posix_error_handler.cpp b/src/runtime/posix_error_handler.cpp index d40790fad15d..243e8abad660 100644 --- a/src/runtime/posix_error_handler.cpp +++ b/src/runtime/posix_error_handler.cpp @@ -7,18 +7,16 @@ extern "C" { extern void abort(); WEAK void halide_default_error(void *user_context, const char *msg) { - char buf[4096]; - char *dst = halide_string_to_string(buf, buf + 4094, "Error: "); - dst = halide_string_to_string(dst, dst + 4094, msg); - // We still have one character free. Add a newline if there - // isn't one already. - if (dst[-1] != '\n') { - dst[0] = '\n'; - dst[1] = 0; - dst += 1; + // Can't use StackBasicPrinter here because it limits size to 256 + constexpr int buf_size = 4096; + char buf[buf_size]; + PrinterBase dst(user_context, buf, buf + buf_size); + dst << "Error: " << msg; + const char *d = dst.str(); + if (d && *d && d[strlen(d) - 1] != '\n') { + dst << "\n"; } - (void)halide_msan_annotate_memory_is_initialized(user_context, buf, dst - buf + 1); - halide_print(user_context, buf); + halide_print(user_context, dst.str()); abort(); } } diff --git a/src/runtime/printer.h b/src/runtime/printer.h index be3620020824..b9fe2180f88e 100644 --- a/src/runtime/printer.h +++ b/src/runtime/printer.h @@ -41,179 +41,176 @@ constexpr uint64_t default_printer_buffer_length = 1024; // Then remember the print only happens when the debug object leaves // scope, which may print at a confusing time. -namespace { -template -class Printer { - char *buf, *dst, *end; - void *user_context; - bool own_mem; +class PrinterBase { +protected: + char *dst; + char *const end; + char *const start; + void *const user_context; + + NEVER_INLINE void allocation_error() const { + halide_error(user_context, "Printer buffer allocation failed.\n"); + } public: - explicit Printer(void *ctx, char *mem = nullptr) - : user_context(ctx), own_mem(mem == nullptr) { - if (mem != nullptr) { - buf = mem; + // This class will stream text into the range [start, end). + // It does *not* assume any ownership of the memory; it assumes + // the memory will remain valid for its lifespan, and doesn't + // attempt to free any allocations. It also doesn't do any sanity + // checking of the pointers, so if you pass in a null or bogus value, + // it will attempt to use it. + NEVER_INLINE PrinterBase(void *user_context, char *start, char *end) + : dst(start), end(end), start(start), user_context(user_context) { + if (!start) { + // Pointers equal ensures no writes to buffer via formatting code + end = dst; } else { - buf = (char *)malloc(buffer_length); + // null-terminate ahead of time + *end = 0; } + } + + // Alternate start-and-size ctor that is more convenient for the Printer subclass + PrinterBase(void *user_context, char *start, uint64_t size) + : PrinterBase(user_context, start, start + size) { + } + + NEVER_INLINE const char *str() { + (void)halide_msan_annotate_memory_is_initialized(user_context, start, dst - start + 1); + return start; + } + + uint64_t size() const { + return (uint64_t)(dst - start); + } + + uint64_t capacity() const { + return (uint64_t)(end - start); + } - dst = buf; + NEVER_INLINE void clear() { + dst = start; if (dst) { - end = buf + (buffer_length - 1); - *end = 0; - } else { - // Pointers equal ensures no writes to buffer via formatting code - end = dst; + dst[0] = 0; } + } -#if HALIDE_RUNTIME_PRINTER_LOG_THREADID - uint64_t tid; - pthread_threadid_np(0, &tid); - *this << "(TID:" << tid << ")"; -#endif + NEVER_INLINE void erase(int n) { + if (dst) { + dst -= n; + if (dst < start) { + dst = start; + } + dst[0] = 0; + } } - // Not movable, not copyable - Printer(const Printer ©) = delete; - Printer &operator=(const Printer &) = delete; - Printer(Printer &&) = delete; - Printer &operator=(Printer &&) = delete; + struct Float16Bits { + uint16_t bits; + }; - Printer &operator<<(const char *arg) { + // These are NEVER_INLINE because Clang will aggressively inline + // all of them, but the code size of calling out-of-line here is slightly + // smaller, and we ~always prefer smaller code size when using Printer + // in the runtime (it's a modest but nonzero difference). + NEVER_INLINE PrinterBase &operator<<(const char *arg) { dst = halide_string_to_string(dst, end, arg); return *this; } - Printer &operator<<(int64_t arg) { + NEVER_INLINE PrinterBase &operator<<(int64_t arg) { dst = halide_int64_to_string(dst, end, arg, 1); return *this; } - Printer &operator<<(int32_t arg) { + NEVER_INLINE PrinterBase &operator<<(int32_t arg) { dst = halide_int64_to_string(dst, end, arg, 1); return *this; } - Printer &operator<<(uint64_t arg) { + NEVER_INLINE PrinterBase &operator<<(uint64_t arg) { dst = halide_uint64_to_string(dst, end, arg, 1); return *this; } - Printer &operator<<(uint32_t arg) { + NEVER_INLINE PrinterBase &operator<<(uint32_t arg) { dst = halide_uint64_to_string(dst, end, arg, 1); return *this; } - Printer &operator<<(double arg) { + NEVER_INLINE PrinterBase &operator<<(double arg) { dst = halide_double_to_string(dst, end, arg, 1); return *this; } - Printer &operator<<(float arg) { + NEVER_INLINE PrinterBase &operator<<(float arg) { dst = halide_double_to_string(dst, end, arg, 0); return *this; } - Printer &operator<<(const void *arg) { - dst = halide_pointer_to_string(dst, end, arg); + NEVER_INLINE PrinterBase &operator<<(Float16Bits arg) { + double value = halide_float16_bits_to_double(arg.bits); + dst = halide_double_to_string(dst, end, value, 1); return *this; } - Printer &write_float16_from_bits(const uint16_t arg) { - double value = halide_float16_bits_to_double(arg); - dst = halide_double_to_string(dst, end, value, 1); + NEVER_INLINE PrinterBase &operator<<(const void *arg) { + dst = halide_pointer_to_string(dst, end, arg); return *this; } - Printer &operator<<(const halide_type_t &t) { + NEVER_INLINE PrinterBase &operator<<(const halide_type_t &t) { dst = halide_type_to_string(dst, end, &t); return *this; } - Printer &operator<<(const halide_buffer_t &buf) { + NEVER_INLINE PrinterBase &operator<<(const halide_buffer_t &buf) { dst = halide_buffer_to_string(dst, end, &buf); return *this; } - template - void append(const T &value) { - *this << value; + template + void append(const Args &...args) { + ((*this << args), ...); } - template - void append(const First &first, const Second &second, const Rest &...rest) { - append(first); - append(second, rest...); - } - - // Use it like a stringstream. - const char *str() { - if (buf) { - if (printer_type == StringStreamPrinterType) { - msan_annotate_is_initialized(); - } - return buf; - } else { - return allocation_error(); - } - } - - // Clear it. Useful for reusing a stringstream. - void clear() { - dst = buf; - if (dst) { - dst[0] = 0; - } - } - - // Returns the number of characters in the buffer - uint64_t size() const { - return (uint64_t)(dst - buf); - } + // Not movable, not copyable + PrinterBase(const PrinterBase ©) = delete; + PrinterBase &operator=(const PrinterBase &) = delete; + PrinterBase(PrinterBase &&) = delete; + PrinterBase &operator=(PrinterBase &&) = delete; +}; - uint64_t capacity() const { - return buffer_length; - } +namespace { - // Delete the last N characters - void erase(int n) { - if (dst) { - dst -= n; - if (dst < buf) { - dst = buf; - } - dst[0] = 0; +template +class HeapPrinter : public PrinterBase { +public: + NEVER_INLINE explicit HeapPrinter(void *user_context) + : PrinterBase(user_context, (char *)malloc(buffer_length), buffer_length) { + if (!start) { + allocation_error(); } - } - - const char *allocation_error() { - return "Printer buffer allocation failed.\n"; - } - void msan_annotate_is_initialized() { - (void)halide_msan_annotate_memory_is_initialized(user_context, buf, dst - buf + 1); +#if HALIDE_RUNTIME_PRINTER_LOG_THREADID + uint64_t tid; + pthread_threadid_np(0, &tid); + *this << "(TID:" << tid << ")"; +#endif } - ~Printer() { - if (!buf) { - halide_error(user_context, allocation_error()); + NEVER_INLINE ~HeapPrinter() { + if (printer_type == ErrorPrinterType) { + halide_error(user_context, str()); + } else if (printer_type == BasicPrinterType) { + halide_print(user_context, str()); } else { - msan_annotate_is_initialized(); - if (printer_type == ErrorPrinterType) { - halide_error(user_context, buf); - } else if (printer_type == BasicPrinterType) { - halide_print(user_context, buf); - } else { - // It's a stringstream. Do nothing. - } + // It's a stringstream. Do nothing. } - if (own_mem) { - free(buf); - } + free(start); } }; - // A class that supports << with all the same types as Printer, but // does nothing and should compile to a no-op. class SinkPrinter { @@ -227,13 +224,13 @@ ALWAYS_INLINE SinkPrinter operator<<(const SinkPrinter &s, T) { } template -using BasicPrinter = Printer; +using BasicPrinter = HeapPrinter; template -using ErrorPrinter = Printer; +using ErrorPrinter = HeapPrinter; template -using StringStreamPrinter = Printer; +using StringStreamPrinter = HeapPrinter; using print = BasicPrinter<>; using error = ErrorPrinter<>; @@ -244,17 +241,16 @@ using debug = BasicPrinter<>; #else using debug = SinkPrinter; #endif -} // namespace // A Printer that automatically reserves stack space for the printer buffer, rather than malloc. // Note that this requires an explicit buffer_length, and it (generally) should be <= 256. template -class StackPrinter : public Printer { +class StackPrinter : public PrinterBase { char scratch[buffer_length]; public: - explicit StackPrinter(void *ctx) - : Printer(ctx, scratch) { + explicit StackPrinter(void *user_context) + : PrinterBase(user_context, scratch, buffer_length) { static_assert(buffer_length <= 256, "StackPrinter is meant only for small buffer sizes; you are probably making a mistake."); } }; @@ -268,6 +264,8 @@ using StackErrorPrinter = StackPrinter; template using StackStringStreamPrinter = StackPrinter; +} // namespace + } // namespace Internal } // namespace Runtime } // namespace Halide diff --git a/src/runtime/runtime_internal.h b/src/runtime/runtime_internal.h index 57dfe0b1087a..027ae5c4f500 100644 --- a/src/runtime/runtime_internal.h +++ b/src/runtime/runtime_internal.h @@ -51,6 +51,8 @@ typedef ptrdiff_t ssize_t; #define WEAK __attribute__((weak)) +#define NEVER_INLINE __attribute__((noinline)) + // Note that ALWAYS_INLINE should *always* also be `inline`. #define ALWAYS_INLINE inline __attribute__((always_inline)) diff --git a/src/runtime/tracing.cpp b/src/runtime/tracing.cpp index 8e8769e2ad12..93a12c7d90a4 100644 --- a/src/runtime/tracing.cpp +++ b/src/runtime/tracing.cpp @@ -308,7 +308,7 @@ WEAK int32_t halide_default_trace(void *user_context, const halide_trace_event_t if (print_bits == 32) { ss << ((float *)(e->value))[i]; } else if (print_bits == 16) { - ss.write_float16_from_bits(((uint16_t *)(e->value))[i]); + ss << PrinterBase::Float16Bits{((uint16_t *)(e->value))[i]}; } else { ss << ((double *)(e->value))[i]; } From 581129fe0a09fb156fde6ec972df5cbeaaed7a96 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 4 Jan 2024 16:49:00 -0800 Subject: [PATCH 2/6] Update printer.h --- src/runtime/printer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/runtime/printer.h b/src/runtime/printer.h index b9fe2180f88e..ca0c2de00772 100644 --- a/src/runtime/printer.h +++ b/src/runtime/printer.h @@ -60,7 +60,9 @@ class PrinterBase { // checking of the pointers, so if you pass in a null or bogus value, // it will attempt to use it. NEVER_INLINE PrinterBase(void *user_context, char *start, char *end) - : dst(start), end(end), start(start), user_context(user_context) { + // Note that while the caller is expected to pass one-past-the-final-byte, + // internally we need end to point to the final byte, so decrement by one here. + : dst(start), end(end - 1), start(start), user_context(user_context) { if (!start) { // Pointers equal ensures no writes to buffer via formatting code end = dst; From c5bf3b07f1e697102ed7e9b2b1f558ebf11146cc Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jan 2024 08:57:18 -0600 Subject: [PATCH 3/6] debug --- src/runtime/printer.h | 3 +++ src/runtime/to_string.cpp | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/runtime/printer.h b/src/runtime/printer.h index ca0c2de00772..54a5a5acfe28 100644 --- a/src/runtime/printer.h +++ b/src/runtime/printer.h @@ -63,6 +63,7 @@ class PrinterBase { // Note that while the caller is expected to pass one-past-the-final-byte, // internally we need end to point to the final byte, so decrement by one here. : dst(start), end(end - 1), start(start), user_context(user_context) { + halide_debug_assert(user_context, end >= dst); if (!start) { // Pointers equal ensures no writes to buffer via formatting code end = dst; @@ -83,10 +84,12 @@ class PrinterBase { } uint64_t size() const { + halide_debug_assert(user_context, dst >= start); return (uint64_t)(dst - start); } uint64_t capacity() const { + halide_debug_assert(user_context, end >= start); return (uint64_t)(end - start); } diff --git a/src/runtime/to_string.cpp b/src/runtime/to_string.cpp index 71d537609e83..1200ca5c07d9 100644 --- a/src/runtime/to_string.cpp +++ b/src/runtime/to_string.cpp @@ -1,8 +1,11 @@ #include "HalideRuntime.h" +#include "runtime_internal.h" extern "C" { WEAK char *halide_string_to_string(char *dst, char *end, const char *arg) { + halide_debug_assert(nullptr, dst <= end); + if (dst >= end) { return dst; } @@ -25,6 +28,8 @@ WEAK char *halide_string_to_string(char *dst, char *end, const char *arg) { } WEAK char *halide_uint64_to_string(char *dst, char *end, uint64_t arg, int min_digits) { + halide_debug_assert(nullptr, dst <= end); + // 32 is more than enough chars to contain any 64-bit int. char buf[32]; buf[31] = 0; @@ -43,6 +48,8 @@ WEAK char *halide_uint64_to_string(char *dst, char *end, uint64_t arg, int min_d } WEAK char *halide_int64_to_string(char *dst, char *end, int64_t arg, int min_digits) { + halide_debug_assert(nullptr, dst <= end); + if (arg < 0 && dst < end) { *dst++ = '-'; arg = -arg; @@ -51,6 +58,8 @@ WEAK char *halide_int64_to_string(char *dst, char *end, int64_t arg, int min_dig } WEAK char *halide_double_to_string(char *dst, char *end, double arg, int scientific) { + halide_debug_assert(nullptr, dst <= end); + uint64_t bits = 0; memcpy(&bits, &arg, sizeof(double)); @@ -234,6 +243,8 @@ WEAK char *halide_double_to_string(char *dst, char *end, double arg, int scienti } WEAK char *halide_pointer_to_string(char *dst, char *end, const void *arg) { + halide_debug_assert(nullptr, dst <= end); + const char *hex_digits = "0123456789abcdef"; char buf[20] = {0}; char *buf_ptr = buf + 18; @@ -251,6 +262,8 @@ WEAK char *halide_pointer_to_string(char *dst, char *end, const void *arg) { } WEAK char *halide_type_to_string(char *dst, char *end, const halide_type_t *t) { + halide_debug_assert(nullptr, dst <= end); + const char *code_name = nullptr; switch (t->code) { case halide_type_int: @@ -282,6 +295,8 @@ WEAK char *halide_type_to_string(char *dst, char *end, const halide_type_t *t) { } WEAK char *halide_buffer_to_string(char *dst, char *end, const halide_buffer_t *buf) { + halide_debug_assert(nullptr, dst <= end); + if (buf == nullptr) { return halide_string_to_string(dst, end, "nullptr"); } From 260bb125c3c1fbbf127ac29bdb7a3efa75b16855 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jan 2024 10:42:15 -0600 Subject: [PATCH 4/6] Update HalideTestHelpers.cmake --- cmake/HalideTestHelpers.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/HalideTestHelpers.cmake b/cmake/HalideTestHelpers.cmake index b6b9b70551ff..8f39cec026a4 100644 --- a/cmake/HalideTestHelpers.cmake +++ b/cmake/HalideTestHelpers.cmake @@ -54,7 +54,9 @@ function(add_halide_test TARGET) add_test(NAME ${TARGET} COMMAND ${args_COMMAND} ${args_ARGS} WORKING_DIRECTORY "${args_WORKING_DIRECTORY}") - set_halide_compiler_warnings(${TARGET}) + if (NOT Halide_TARGET MATCHES "wasm") + set_halide_compiler_warnings(${TARGET}) + endif () # We can't add Halide::TerminateHandler here, because it requires Halide::Error # and friends to be present in the final linkage, but some callers of add_halide_test() From 0f3bb5f73ee5df2f17aaa1cf3755923415d10221 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jan 2024 10:46:28 -0600 Subject: [PATCH 5/6] Update printer.h --- src/runtime/printer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/printer.h b/src/runtime/printer.h index 54a5a5acfe28..5f6ae43305b1 100644 --- a/src/runtime/printer.h +++ b/src/runtime/printer.h @@ -59,10 +59,10 @@ class PrinterBase { // attempt to free any allocations. It also doesn't do any sanity // checking of the pointers, so if you pass in a null or bogus value, // it will attempt to use it. - NEVER_INLINE PrinterBase(void *user_context, char *start, char *end) + NEVER_INLINE PrinterBase(void *user_context_, char *start_, char *end_) // Note that while the caller is expected to pass one-past-the-final-byte, // internally we need end to point to the final byte, so decrement by one here. - : dst(start), end(end - 1), start(start), user_context(user_context) { + : dst(start_), end(end_ - 1), start(start_), user_context(user_context_) { halide_debug_assert(user_context, end >= dst); if (!start) { // Pointers equal ensures no writes to buffer via formatting code From 3848a53cd2a2f04b35e3e04aa84e9f52cc4c35e0 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jan 2024 12:48:28 -0600 Subject: [PATCH 6/6] fixes --- src/runtime/posix_error_handler.cpp | 2 +- src/runtime/printer.h | 25 +++++++++---------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/src/runtime/posix_error_handler.cpp b/src/runtime/posix_error_handler.cpp index 243e8abad660..27bcc1f5b28f 100644 --- a/src/runtime/posix_error_handler.cpp +++ b/src/runtime/posix_error_handler.cpp @@ -10,7 +10,7 @@ WEAK void halide_default_error(void *user_context, const char *msg) { // Can't use StackBasicPrinter here because it limits size to 256 constexpr int buf_size = 4096; char buf[buf_size]; - PrinterBase dst(user_context, buf, buf + buf_size); + PrinterBase dst(user_context, buf, buf_size); dst << "Error: " << msg; const char *d = dst.str(); if (d && *d && d[strlen(d) - 1] != '\n') { diff --git a/src/runtime/printer.h b/src/runtime/printer.h index 5f6ae43305b1..6a379561dbe5 100644 --- a/src/runtime/printer.h +++ b/src/runtime/printer.h @@ -53,31 +53,24 @@ class PrinterBase { } public: - // This class will stream text into the range [start, end). + // This class will stream text into the range [start, start + size - 1]. // It does *not* assume any ownership of the memory; it assumes // the memory will remain valid for its lifespan, and doesn't // attempt to free any allocations. It also doesn't do any sanity // checking of the pointers, so if you pass in a null or bogus value, // it will attempt to use it. - NEVER_INLINE PrinterBase(void *user_context_, char *start_, char *end_) - // Note that while the caller is expected to pass one-past-the-final-byte, - // internally we need end to point to the final byte, so decrement by one here. - : dst(start_), end(end_ - 1), start(start_), user_context(user_context_) { - halide_debug_assert(user_context, end >= dst); - if (!start) { - // Pointers equal ensures no writes to buffer via formatting code - end = dst; - } else { - // null-terminate ahead of time + NEVER_INLINE PrinterBase(void *user_context_, char *start_, uint64_t size_) + : dst(start_), + // (If start is null, set end = start to ensure no writes are done) + end(start_ ? start_ + size_ - 1 : start_), + start(start_), + user_context(user_context_) { + if (end > start) { + // null-terminate the final byte to ensure string isn't $ENDLESS *end = 0; } } - // Alternate start-and-size ctor that is more convenient for the Printer subclass - PrinterBase(void *user_context, char *start, uint64_t size) - : PrinterBase(user_context, start, start + size) { - } - NEVER_INLINE const char *str() { (void)halide_msan_annotate_memory_is_initialized(user_context, start, dst - start + 1); return start;