-
Notifications
You must be signed in to change notification settings - Fork 13k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc] Template the writing mode for the writer class #111559
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-libc Author: Joseph Huber (jhuber6) ChangesSummary: Patch is 51.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/111559.diff 31 Files Affected:
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 8172fda1e866ed..4c9a9f5ceddb2f 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -59,10 +59,8 @@ add_header_library(
libc.src.__support.common
)
-add_object_library(
+add_header_library(
writer
- SRCS
- writer.cpp
HDRS
writer.h
DEPENDS
@@ -73,10 +71,8 @@ add_object_library(
libc.src.string.memory_utils.inline_memset
)
-add_object_library(
+add_header_library(
converter
- SRCS
- converter.cpp
HDRS
converter.h
converter_atlas.h
@@ -110,10 +106,8 @@ add_object_library(
libc.src.__support.StringUtil.error_to_string
)
-add_object_library(
+add_header_library(
printf_main
- SRCS
- printf_main.cpp
HDRS
printf_main.h
DEPENDS
diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
index 2596cba813c2e9..fd2eb2553887a8 100644
--- a/libc/src/stdio/printf_core/char_converter.h
+++ b/libc/src/stdio/printf_core/char_converter.h
@@ -17,7 +17,9 @@
namespace LIBC_NAMESPACE_DECL {
namespace printf_core {
-LIBC_INLINE int convert_char(Writer *writer, const FormatSection &to_conv) {
+template <WriteMode write_mode>
+LIBC_INLINE int convert_char(Writer<write_mode> *writer,
+ const FormatSection &to_conv) {
char c = static_cast<char>(to_conv.conv_val_raw);
constexpr int STRING_LEN = 1;
diff --git a/libc/src/stdio/printf_core/converter.cpp b/libc/src/stdio/printf_core/converter.cpp
deleted file mode 100644
index b1c66451f53f0f..00000000000000
--- a/libc/src/stdio/printf_core/converter.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-//===-- Format specifier converter implmentation for printf -----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/printf_core/converter.h"
-
-#include "src/__support/macros/config.h"
-#include "src/stdio/printf_core/core_structs.h"
-#include "src/stdio/printf_core/printf_config.h"
-#include "src/stdio/printf_core/strerror_converter.h"
-#include "src/stdio/printf_core/writer.h"
-
-// This option allows for replacing all of the conversion functions with custom
-// replacements. This allows conversions to be replaced at compile time.
-#ifndef LIBC_COPT_PRINTF_CONV_ATLAS
-#include "src/stdio/printf_core/converter_atlas.h"
-#else
-#include LIBC_COPT_PRINTF_CONV_ATLAS
-#endif
-
-#include <stddef.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace printf_core {
-
-int convert(Writer *writer, const FormatSection &to_conv) {
- if (!to_conv.has_conv)
- return writer->write(to_conv.raw_string);
-
-#if !defined(LIBC_COPT_PRINTF_DISABLE_FLOAT) && \
- defined(LIBC_COPT_PRINTF_HEX_LONG_DOUBLE)
- if (to_conv.length_modifier == LengthModifier::L) {
- switch (to_conv.conv_name) {
- case 'f':
- case 'F':
- case 'e':
- case 'E':
- case 'g':
- case 'G':
- return convert_float_hex_exp(writer, to_conv);
- default:
- break;
- }
- }
-#endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
-
- switch (to_conv.conv_name) {
- case '%':
- return writer->write("%");
- case 'c':
- return convert_char(writer, to_conv);
- case 's':
- return convert_string(writer, to_conv);
- case 'd':
- case 'i':
- case 'u':
- case 'o':
- case 'x':
- case 'X':
- case 'b':
- case 'B':
- return convert_int(writer, to_conv);
-#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
- case 'f':
- case 'F':
- return convert_float_decimal(writer, to_conv);
- case 'e':
- case 'E':
- return convert_float_dec_exp(writer, to_conv);
- case 'a':
- case 'A':
- return convert_float_hex_exp(writer, to_conv);
- case 'g':
- case 'G':
- return convert_float_dec_auto(writer, to_conv);
-#endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
-#ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
- case 'r':
- case 'R':
- case 'k':
- case 'K':
- return convert_fixed(writer, to_conv);
-#endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
-#ifndef LIBC_COPT_PRINTF_DISABLE_STRERROR
- case 'm':
- return convert_strerror(writer, to_conv);
-#endif // LIBC_COPT_PRINTF_DISABLE_STRERROR
-#ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
- case 'n':
- return convert_write_int(writer, to_conv);
-#endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
- case 'p':
- return convert_pointer(writer, to_conv);
- default:
- return writer->write(to_conv.raw_string);
- }
- return -1;
-}
-
-} // namespace printf_core
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/printf_core/converter.h b/libc/src/stdio/printf_core/converter.h
index 2b3f06d0aa7a36..f26ed727f05f40 100644
--- a/libc/src/stdio/printf_core/converter.h
+++ b/libc/src/stdio/printf_core/converter.h
@@ -11,8 +11,18 @@
#include "src/__support/macros/config.h"
#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/printf_config.h"
+#include "src/stdio/printf_core/strerror_converter.h"
#include "src/stdio/printf_core/writer.h"
+// This option allows for replacing all of the conversion functions with custom
+// replacements. This allows conversions to be replaced at compile time.
+#ifndef LIBC_COPT_PRINTF_CONV_ATLAS
+#include "src/stdio/printf_core/converter_atlas.h"
+#else
+#include LIBC_COPT_PRINTF_CONV_ATLAS
+#endif
+
#include <stddef.h>
namespace LIBC_NAMESPACE_DECL {
@@ -21,7 +31,80 @@ namespace printf_core {
// convert will call a conversion function to convert the FormatSection into
// its string representation, and then that will write the result to the
// writer.
-int convert(Writer *writer, const FormatSection &to_conv);
+template <WriteMode write_mode>
+int convert(Writer<write_mode> *writer, const FormatSection &to_conv) {
+ if (!to_conv.has_conv)
+ return writer->write(to_conv.raw_string);
+
+#if !defined(LIBC_COPT_PRINTF_DISABLE_FLOAT) && \
+ defined(LIBC_COPT_PRINTF_HEX_LONG_DOUBLE)
+ if (to_conv.length_modifier == LengthModifier::L) {
+ switch (to_conv.conv_name) {
+ case 'f':
+ case 'F':
+ case 'e':
+ case 'E':
+ case 'g':
+ case 'G':
+ return convert_float_hex_exp(writer, to_conv);
+ default:
+ break;
+ }
+ }
+#endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
+
+ switch (to_conv.conv_name) {
+ case '%':
+ return writer->write("%");
+ case 'c':
+ return convert_char(writer, to_conv);
+ case 's':
+ return convert_string(writer, to_conv);
+ case 'd':
+ case 'i':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'b':
+ case 'B':
+ return convert_int(writer, to_conv);
+#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
+ case 'f':
+ case 'F':
+ return convert_float_decimal(writer, to_conv);
+ case 'e':
+ case 'E':
+ return convert_float_dec_exp(writer, to_conv);
+ case 'a':
+ case 'A':
+ return convert_float_hex_exp(writer, to_conv);
+ case 'g':
+ case 'G':
+ return convert_float_dec_auto(writer, to_conv);
+#endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
+#ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
+ case 'r':
+ case 'R':
+ case 'k':
+ case 'K':
+ return convert_fixed(writer, to_conv);
+#endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
+#ifndef LIBC_COPT_PRINTF_DISABLE_STRERROR
+ case 'm':
+ return convert_strerror(writer, to_conv);
+#endif // LIBC_COPT_PRINTF_DISABLE_STRERROR
+#ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
+ case 'n':
+ return convert_write_int(writer, to_conv);
+#endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
+ case 'p':
+ return convert_pointer(writer, to_conv);
+ default:
+ return writer->write(to_conv.raw_string);
+ }
+ return -1;
+}
} // namespace printf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/printf_core/fixed_converter.h b/libc/src/stdio/printf_core/fixed_converter.h
index c8812d77b62e34..6013c36b4cf3e9 100644
--- a/libc/src/stdio/printf_core/fixed_converter.h
+++ b/libc/src/stdio/printf_core/fixed_converter.h
@@ -62,7 +62,9 @@ LIBC_INLINE constexpr uint32_t const_ten_exp(uint32_t exponent) {
} \
} while (false)
-LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
+template <WriteMode write_mode>
+LIBC_INLINE int convert_fixed(Writer<write_mode> *writer,
+ const FormatSection &to_conv) {
// Long accum should be the largest type, so we can store all the smaller
// numbers in things sized for it.
using LARep = fixed_point::FXRep<unsigned long accum>;
diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h
index e39ba6ecea8d48..fd729f67d4dda5 100644
--- a/libc/src/stdio/printf_core/float_dec_converter.h
+++ b/libc/src/stdio/printf_core/float_dec_converter.h
@@ -92,7 +92,7 @@ zero_after_digits(int32_t base_2_exp, int32_t digits_after_point, T mantissa,
return has_trailing_zeros;
}
-class PaddingWriter {
+template <WriteMode write_mode> class PaddingWriter {
bool left_justified = false;
bool leading_zeroes = false;
char sign_char = 0;
@@ -106,7 +106,8 @@ class PaddingWriter {
sign_char(init_sign_char),
min_width(to_conv.min_width > 0 ? to_conv.min_width : 0) {}
- LIBC_INLINE int write_left_padding(Writer *writer, size_t total_digits) {
+ LIBC_INLINE int write_left_padding(Writer<write_mode> *writer,
+ size_t total_digits) {
// The pattern is (spaces) (sign) (zeroes), but only one of spaces and
// zeroes can be written, and only if the padding amount is positive.
int padding_amount =
@@ -129,7 +130,8 @@ class PaddingWriter {
return 0;
}
- LIBC_INLINE int write_right_padding(Writer *writer, size_t total_digits) {
+ LIBC_INLINE int write_right_padding(Writer<write_mode> *writer,
+ size_t total_digits) {
// If and only if the conversion is left justified, there may be trailing
// spaces.
int padding_amount =
@@ -154,7 +156,7 @@ class PaddingWriter {
This FloatWriter class does the buffering and counting, and writes to the
output when necessary.
*/
-class FloatWriter {
+template <WriteMode write_mode> class FloatWriter {
char block_buffer[BLOCK_SIZE]; // The buffer that holds a block.
size_t buffered_digits = 0; // The number of digits held in the buffer.
bool has_written = false; // True once any digits have been output.
@@ -163,8 +165,9 @@ class FloatWriter {
size_t digits_before_decimal = 0; // The # of digits to write before the '.'
size_t total_digits_written = 0; // The # of digits that have been output.
bool has_decimal_point; // True if the number has a decimal point.
- Writer *writer; // Writes to the final output.
- PaddingWriter padding_writer; // Handles prefixes/padding, uses total_digits.
+ Writer<write_mode> *writer; // Writes to the final output.
+ PaddingWriter<write_mode>
+ padding_writer; // Handles prefixes/padding, uses total_digits.
LIBC_INLINE int flush_buffer(bool round_up_max_blocks = false) {
const char MAX_BLOCK_DIGIT = (round_up_max_blocks ? '0' : '9');
@@ -244,8 +247,9 @@ class FloatWriter {
static_assert(fputil::FPBits<long double>::EXP_LEN < (sizeof(int) * 8));
public:
- LIBC_INLINE FloatWriter(Writer *init_writer, bool init_has_decimal_point,
- const PaddingWriter &init_padding_writer)
+ LIBC_INLINE FloatWriter(Writer<write_mode> *init_writer,
+ bool init_has_decimal_point,
+ const PaddingWriter<write_mode> &init_padding_writer)
: has_decimal_point(init_has_decimal_point), writer(init_writer),
padding_writer(init_padding_writer) {}
@@ -465,12 +469,24 @@ class FloatWriter {
}
};
+// Class-template auto deduction helpers.
+FloatWriter(Writer<WriteMode::FILL_BUFF_AND_DROP_OVERFLOW>, bool,
+ const PaddingWriter<WriteMode::FILL_BUFF_AND_DROP_OVERFLOW>)
+ -> FloatWriter<WriteMode::FILL_BUFF_AND_DROP_OVERFLOW>;
+FloatWriter(Writer<WriteMode::RESIZE_AND_FILL_BUFF>, bool,
+ const PaddingWriter<WriteMode::RESIZE_AND_FILL_BUFF>)
+ -> FloatWriter<WriteMode::RESIZE_AND_FILL_BUFF>;
+FloatWriter(Writer<WriteMode::FLUSH_TO_STREAM>, bool,
+ const PaddingWriter<WriteMode::FLUSH_TO_STREAM>)
+ -> FloatWriter<WriteMode::FLUSH_TO_STREAM>;
+
// This implementation is based on the Ryu Printf algorithm by Ulf Adams:
// Ulf Adams. 2019. Ryū revisited: printf floating point conversion.
// Proc. ACM Program. Lang. 3, OOPSLA, Article 169 (October 2019), 23 pages.
// https://doi.org/10.1145/3360595
-template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
-LIBC_INLINE int convert_float_decimal_typed(Writer *writer,
+template <typename T, WriteMode write_mode,
+ cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
+LIBC_INLINE int convert_float_decimal_typed(Writer<write_mode> *writer,
const FormatSection &to_conv,
fputil::FPBits<T> float_bits) {
// signed because later we use -FRACTION_LEN
@@ -497,7 +513,7 @@ LIBC_INLINE int convert_float_decimal_typed(Writer *writer,
// ignored.
bool nonzero = false;
- PaddingWriter padding_writer(to_conv, sign_char);
+ PaddingWriter<write_mode> padding_writer(to_conv, sign_char);
FloatWriter float_writer(writer, has_decimal_point, padding_writer);
FloatToString<T> float_converter(float_bits.get_val());
@@ -578,8 +594,9 @@ LIBC_INLINE int convert_float_decimal_typed(Writer *writer,
return WRITE_OK;
}
-template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
-LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
+template <typename T, WriteMode write_mode,
+ cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
+LIBC_INLINE int convert_float_dec_exp_typed(Writer<write_mode> *writer,
const FormatSection &to_conv,
fputil::FPBits<T> float_bits) {
// signed because later we use -FRACTION_LEN
@@ -604,7 +621,7 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
bool has_decimal_point =
(precision > 0) || ((to_conv.flags & FormatFlags::ALTERNATE_FORM) != 0);
- PaddingWriter padding_writer(to_conv, sign_char);
+ PaddingWriter<write_mode> padding_writer(to_conv, sign_char);
FloatWriter float_writer(writer, has_decimal_point, padding_writer);
FloatToString<T> float_converter(float_bits.get_val());
@@ -740,8 +757,9 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
return WRITE_OK;
}
-template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
-LIBC_INLINE int convert_float_dec_auto_typed(Writer *writer,
+template <typename T, WriteMode write_mode,
+ cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
+LIBC_INLINE int convert_float_dec_auto_typed(Writer<write_mode> *writer,
const FormatSection &to_conv,
fputil::FPBits<T> float_bits) {
// signed because later we use -FRACTION_LEN
@@ -1107,7 +1125,9 @@ LIBC_INLINE int convert_float_dec_auto_typed(Writer *writer,
}
// TODO: unify the float converters to remove the duplicated checks for inf/nan.
-LIBC_INLINE int convert_float_decimal(Writer *writer,
+
+template <WriteMode write_mode>
+LIBC_INLINE int convert_float_decimal(Writer<write_mode> *writer,
const FormatSection &to_conv) {
if (to_conv.length_modifier == LengthModifier::L) {
fputil::FPBits<long double>::StorageType float_raw = to_conv.conv_val_raw;
@@ -1128,7 +1148,8 @@ LIBC_INLINE int convert_float_decimal(Writer *writer,
return convert_inf_nan(writer, to_conv);
}
-LIBC_INLINE int convert_float_dec_exp(Writer *writer,
+template <WriteMode write_mode>
+LIBC_INLINE int convert_float_dec_exp(Writer<write_mode> *writer,
const FormatSection &to_conv) {
if (to_conv.length_modifier == LengthModifier::L) {
fputil::FPBits<long double>::StorageType float_raw = to_conv.conv_val_raw;
@@ -1149,7 +1170,8 @@ LIBC_INLINE int convert_float_dec_exp(Writer *writer,
return convert_inf_nan(writer, to_conv);
}
-LIBC_INLINE int convert_float_dec_auto(Writer *writer,
+template <WriteMode write_mode>
+LIBC_INLINE int convert_float_dec_auto(Writer<write_mode> *writer,
const FormatSection &to_conv) {
if (to_conv.length_modifier == LengthModifier::L) {
fputil::FPBits<long double>::StorageType float_raw = to_conv.conv_val_raw;
diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h
index 0b3ff3dd1cbfdc..3d41f8a45b0bb3 100644
--- a/libc/src/stdio/printf_core/float_hex_converter.h
+++ b/libc/src/stdio/printf_core/float_hex_converter.h
@@ -24,7 +24,8 @@
namespace LIBC_NAMESPACE_DECL {
namespace printf_core {
-LIBC_INLINE int convert_float_hex_exp(Writer *writer,
+template <WriteMode write_mode>
+LIBC_INLINE int convert_float_hex_exp(Writer<write_mode> *writer,
const FormatSection &to_conv) {
using LDBits = fputil::FPBits<long double>;
using StorageType = LDBits::StorageType;
diff --git a/libc/src/stdio/printf_core/float_inf_nan_converter.h b/libc/src/stdio/printf_core/float_inf_nan_converter.h
index a7da682b835bee..8988d6474bf409 100644
--- a/libc/src/stdio/printf_core/float_inf_nan_converter.h
+++ b/libc/src/stdio/printf_core/float_inf_nan_converter.h
@@ -23,7 +23,9 @@ namespace printf_core {
using StorageType = fputil::FPBits<long double>::StorageType;
-LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) {
+template <WriteMode write_mode>
+LIBC_INLINE int convert_inf_nan(Writer<write_mode> *writer,
+ const FormatSection &to_conv) {
// All of the letters will be defined relative to variable a, which will be
// the appropriate case based on the case of the conversion.
const char a = (to_conv.conv_name & 32) | 'A';
diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index f345e86b97a691..22f4dd627a104b 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -65,7 +65,9 @@ num_to_strview(uintmax_t num, cpp::span<char> bufref, char conv_name) {
} // namespace details
-LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
+template <WriteMode write_mode>
+LIBC_INLINE int convert_int(Writer<write_mode> *writer,
+ const FormatSection &to_conv) {
static constexpr size_t BITS_IN_BYTE = 8;
static constexpr size_t BITS_IN_NUM = sizeof(uintmax_t) * BITS_IN_BYTE;
diff --git a/libc/src/stdio/printf_core/printf_main.cpp b/libc/src/stdio/printf_core/printf_main.cpp
deleted file mode 100644
index bd4a5a168bd23e..00000000000000
--- a/libc/src/stdio/printf_core/printf_main.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- Starting point for printf -------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/printf_core/printf_main.h"
-
-#include "src/__support/arg_list.h"
-#include "src/__support/macros/config.h"
-#include "src/stdio/printf_core/converter.h"
-#include "src/stdio/printf_core/core_structs.h"
-#include "src/stdio/printf_core/parser.h"
-#include "src/stdio/printf_core/writer.h"
-
-#include <stddef.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace printf_core {
-
-int printf_main(Writer *writer, const char *__restrict str,
- internal::ArgList &args) {
- Parser<internal::ArgList> parser(str, args);
- int result = 0;
- fo...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
patch LGTM, but I'll defer to @michaelrj-google to check that the correct modes are used explicitly in each case.
I'm guessing this PR comes with a tradeoff to code size?
@@ -167,8 +167,9 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { | |||
continue; | |||
|
|||
char *buffer = temp_storage.alloc(buffer_size[lane]); | |||
WriteBuffer wb(buffer, buffer_size[lane]); | |||
Writer writer(&wb); | |||
WriteBuffer<WriteMode::FILL_BUFF_AND_DROP_OVERFLOW> wb(buffer, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this enum is the most common, should we make it the default? Or is it better to be explicit every time?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Figured it's best to keep it explicit as they do widly different things depending on the use-case.
Yes, since we put everything in the headers it'll now be copied between the unique users. however, most of these already go through a common case (i.e. vfprintf_internal) so I'm not too concerned. This makes it actually usable from a GPU context however, and allows for more optimizations. |
I'm not a fan of templating the writer for basically the reasons described: It means that the converter code is duplicated for each printf variant, of which we currently have three but there may be more (see That being said, I recognize that this is an important optimization for GPUs, so I have an alternate suggestion. Instead of templating the whole writer, only template Would that solution work for you? |
It's not just the function pointer, but letting the compiler do more optimizations results in much better results. Without this it takes several seconds to compile a use of |
I'm confused about the unnecessary code. The writer's pretty small, even including the parts that are only used for the function pointer side. Could it be that the linking is slow? The proposed solution makes everything header-only, making it one large |
Yes, the link step is very slow on AMDGPU and NVPTX as they're very sensitive to large SCC's in the optimizer (people love to put quadratic optimizations in there). Checking the IR before and after this patch, it's 7500 lines of IR versus 4500 respectively with a 2.5s link time versus a 0.5s link time. That being said, I definitely see your concern because the CPU |
The long double table isn't even included, it's going to be from multiple copies of the regular double table. I did some size analysis yesterday and the big thing that needs to happen for size improvement is moving the ryu tables into a Even with that optimization I'm seeing about a 29 KiB increase in the size of the final linked binary when I have both My main concern is this: This patch would make printf only support compile-time writer selection, but for CPU use cases it's better to do runtime writer selection. I think the best way to handle this is to allow GPUs to disable the writer modes that it doesn't need, which should hopefully solve your long compile times, while also allowing the CPU targets to have the behavior they want. Here's how to replicate my size comparison: building size_test.c: #include <stdio.h>
int main() {
char buff[100];
snprintf(buff, 99, "%f", 123.456);
printf("%s, %f\n", buff, 789.012);
} (replace the
(rebuild libc on main branch)
Now for size comparison:
|
The GPU target uses The compile time improvements, I'm pretty sure it's because it optimizes out unused formats better but I'd need to look into it more. |
Just for a simple call to A version where I just deleted all the code associated with the other modes So this patch definitely does something to improve it, likely just running more optimizations on the conversion code. It's definitely unfortunate that it creates so much bloat because the tables are huge. The issue is that we're using templates on all the conversion functions, this is only necessary because it contains a writer which needs to know which callback to use. I wonder if there's some way to avoid the template directly on the conversion part, or to pull out part of it into a common implementation function. |
Given what you're seeing, could you try a patch where you just move all of the printf_core code into headers without making any other changes? That might be the main thing making a difference. |
Accidentally closed this while clearing up some other useless branches. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From the most recent libc meeting I've been convinced that this patch is useful and we should move forward with it.
That being said there are some changes that to make before it lands:
- Rebase and update the
strftime
internals, since those use the printf writer. - Create a template of
Writer
andWriteBuffer
that does runtime dispatch based on an enum. - Create a compile flag that switches
printf_main
to take a specific templated writer and create a runtime dispatch version of it. A rough outline of what I'm thinking below:
#ifdef PRINTF_MAIN_RUNTIME_DISPATCH //name TBD
template <WriteMode write_mode>
int printf_main(Writer<write_mode> *writer, const char *__restrict str,
internal::ArgList &args) {
WriteBuffer<RUNTIME_DISPATCH> generic_wb(writer->wb.buff, ..., write_mode); //TODO: handle the fprintf overflow_write at the end
Writer<RUNTIME_DISPATCH> generic_writer(generic_wb);
Parser<internal::ArgList> parser(str, args);
int result = 0;
for (FormatSection cur_section = parser.get_next_section();
!cur_section.raw_string.empty();
cur_section = parser.get_next_section()) {
if (cur_section.has_conv)
result = convert(generic_writer, cur_section);
else
result = generic_writer->write(cur_section.raw_string);
if (result < 0)
return result;
}
return generic_writer->get_chars_written();
}
What do you think of this as a plan to move forward?
// Class-template auto deduction helpers. | ||
FloatWriter(Writer<WriteMode::FILL_BUFF_AND_DROP_OVERFLOW>, bool, | ||
const PaddingWriter<WriteMode::FILL_BUFF_AND_DROP_OVERFLOW>) | ||
-> FloatWriter<WriteMode::FILL_BUFF_AND_DROP_OVERFLOW>; | ||
FloatWriter(Writer<WriteMode::RESIZE_AND_FILL_BUFF>, bool, | ||
const PaddingWriter<WriteMode::RESIZE_AND_FILL_BUFF>) | ||
-> FloatWriter<WriteMode::RESIZE_AND_FILL_BUFF>; | ||
FloatWriter(Writer<WriteMode::FLUSH_TO_STREAM>, bool, | ||
const PaddingWriter<WriteMode::FLUSH_TO_STREAM>) | ||
-> FloatWriter<WriteMode::FLUSH_TO_STREAM>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ideally I'd like to avoid having these since they seem likely to get forgotten when adding a new WriteMode
. I may need to look into redesigning the FloatWriter
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It'll error without them I think, so a new one would be obvious.
Does this address your concerns with binary bloat? This would allow me to move some of these into |
Yes, having a dynamic dispatch option and making it the default for linux targets would address my concerns around binary bloat. Other targets can use the same mechanism if they want the same tradeoff. The intent for the headers in |
Alright, I'll try to pick this up again when I have time. Thanks. |
c7394d2
to
5286d35
Compare
@michaelrj-google I've updated this as you requested (I believe). The default value for this is now runtime dispatch, with the GPU setting it to compile time in its config. This moves the printf handling so that it is header-only, allowing us to use Now, because this gets rid of the > llvm-readelf -s fprintf.cpp.o | grep '_ZN22__llvm_libc_21_0_0_git11printf_core15convert_inf_nanILNS0_9WriteModeE0EEEiPNS0_6WriterIXT_EEERKNS0_13FormatSectionE'
145: 0000000000000000 487 FUNC WEAK HIDDEN 122 _ZN22__llvm_libc_21_0_0_git11printf_core15convert_inf_nanILNS0_9WriteModeE0EEEiPNS0_6WriterIXT_EEERKNS0_13FormatSectionE
> llvm-readelf -s asprintf.cpp.o | grep '_ZN22__llvm_libc_21_0_0_git11printf_core15convert_inf_nanILNS0_9WriteModeE0EEEiPNS0_6WriterIXT_EEERKNS0_13FormatSectionE'
148: 0000000000000000 487 FUNC WEAK HIDDEN 129 _ZN22__llvm_libc_21_0_0_git11printf_core15convert_inf_nanILNS0_9WriteModeE0EEEiPNS0_6WriterIXT_EEERKNS0_13FormatSectionE |
✅ With the latest revision this PR passed the C/C++ code formatter. |
5286d35
to
ca4de1b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A brief explanation for anyone curious: I talked with Joseph about a new idea I had for templating the scanf reader which I think will work better than templating on an enum and having if constexpr
s for each option. My new design focuses on templating the reader/writer on a class which provides static methods for handling a filled/emptied buffer. This allows for straightforward static dispatch without needing to pull in the dependencies for every option. Dynamic dispatch would be handled by an indirection reader/writer that holds an enum+union or void* for the specific reader/writer.
I'm planning to move to that design in the future, but I'd prefer not to block the GPU runtime more than I have to so this patch may end up landing mostly as is, with a refactoring coming later.
ca4de1b
to
aa832dd
Compare
There's definitely a lot of places that required the writer template that could be compressed, but mainly we just want the printf handling to be header only as well. |
Summary: Currently we dispatch the writing mode off of a runtime enum passed in by the constructor. This causes very unfortunate codegen for the GPU targets where we get worst-case codegen because of the unused function pointer for `sprintf`. Instead, this patch moves all of this to a template so it can be masked out. This results in no dynamic stack and uses 60 VGPRs instead of 117. It also compiles about 5x as fast. WIP Update
aa832dd
to
0bbcf14
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, tests pass locally
This allows specializing the implementation for different targets without including unnecessary logic and is similar to llvm#111559 which did the same for printf Writer interface.
This allows specializing the implementation for different targets without including unnecessary logic and is similar to llvm#111559 which did the same for printf Writer interface.
It looks like this broke our builders at https://ci.chromium.org/ui/p/fuchsia/builders/toolchain.ci/clang-linux-x64/b8720577119613178561/overview
Would you be able to take a look and send out a fix or revert? Thanks. |
Hm, I'm wondering why the baremetal is seeing this but not any other target. I don't have much experience there, could you tell me what's different? There should be some template autodeduction helpers. But maybe I accidentally changed those after adding the runtime value back in? |
You forgot to update the initialization in the baremetal printf.cpp:
I'll fix it |
Should be fixed already, i merged the PR that did it. |
Oh huh, the test failures will probably be fixed by cherry-picking 6fea340 then |
I think that fix led to a different failure mode
Seen on https://ci.chromium.org/ui/p/fuchsia/builders/toolchain.ci/clang-linux-x64/b8720479269949913185/overview which includes 6fea340. |
That looks trivial to fix, could you verify it since you have a local build working? |
This addresses build errors from llvm#111559.
I think #131232 catches the rest of them. I can build the toolchain locally with it. |
This addresses build errors from #111559.
…1232) This addresses build errors from llvm/llvm-project#111559.
This allows specializing the implementation for different targets without including unnecessary logic and is similar to #111559 which did the same for printf Writer interface.
Summary: Currently we dispatch the writing mode off of a runtime enum passed in by the constructor. This causes very unfortunate codegen for the GPU targets where we get worst-case codegen because of the unused function pointer for `sprintf`. Instead, this patch moves all of this to a template so it can be masked out. This results in no dynamic stack and uses 60 VGPRs instead of 117. It also compiles about 5x as fast.
This addresses build errors from llvm#111559.
Summary:
Currently we dispatch the writing mode off of a runtime enum passed in
by the constructor. This causes very unfortunate codegen for the GPU
targets where we get worst-case codegen because of the unused function
pointer for
sprintf
. Instead, this patch moves all of this to atemplate so it can be masked out. This results in no dynamic stack and
uses 60 VGPRs instead of 117. It also compiles about 5x as fast.