From b6567e7dfb0fb20443bab0fd3bb9db2c4384138c Mon Sep 17 00:00:00 2001 From: Keyhan Vakil Date: Thu, 25 May 2023 02:22:23 +0000 Subject: [PATCH] build: speedup compilation of mksnapshot output Incremental compilation of Node.js is slow. Currently on a powerful Linux machine, it takes about 5.6 seconds to compile `gen/node_snapshot.cc` with g++. As in the previous PR which dealt with `node_js2c`, we add a new flag `--use-string-literals` to `node_mksnapshot`. When this flag is set, we emit string literals instead of array literals for the snapshot blob and for the code cache, i.e.: ```c++ // old: static const uint8_t X[] = { ... }; static const uint8_t *X = "..."; ``` I only enabled the new flag on Linux/macOS, since those are systems that I have available for testing. On my Linux system with gcc, it speeds up compilation of this file by 3.7s (5.8s -> 2.1s). On my Mac system with clang, it speeds up compilation by 1.7s (3.4s -> 1.7s). Again, the right thing here is probably to generate separate files for the snapshot blob and for each code cache output, but this is a nice intermediate speedup. The thing I'm most unsure about in this PR is how to actually thread the argument through. I considered adding it to the general argument parser, but that felt strange, since this flag only makes sense during the build process. So I kind of hacked it in, which also feels weird. Suggestions are very welcome. Refs: https://github.com/nodejs/node/issues/47984 Refs: https://github.com/nodejs/node/pull/48160 --- node.gyp | 8 +++ src/node_snapshot_builder.h | 3 +- src/node_snapshotable.cc | 90 ++++++++++++++++++++++++------- tools/snapshot/node_mksnapshot.cc | 16 ++++-- 4 files changed, 92 insertions(+), 25 deletions(-) diff --git a/node.gyp b/node.gyp index d57ab5f21c575e..970e326bc410ff 100644 --- a/node.gyp +++ b/node.gyp @@ -650,8 +650,16 @@ ], 'action': [ '<@(_inputs)', + '<@(node_mksnapshot_use_string_literals_flag)', '<@(_outputs)', ], + 'conditions': [ + ['OS=="linux" or OS=="mac"', { + 'variables': {'node_mksnapshot_use_string_literals_flag': ['--use-string-literals']}, + }, { + 'variables': {'node_mksnapshot_use_string_literals_flag': []}, + }], + ], }, ], }], diff --git a/src/node_snapshot_builder.h b/src/node_snapshot_builder.h index f8cd900b2bdaa4..673c48636b8d77 100644 --- a/src/node_snapshot_builder.h +++ b/src/node_snapshot_builder.h @@ -18,7 +18,8 @@ class NODE_EXTERN_PRIVATE SnapshotBuilder { public: static ExitCode Generate(std::ostream& out, const std::vector args, - const std::vector exec_args); + const std::vector exec_args, + bool use_string_literals); // Generate the snapshot into out. static ExitCode Generate(SnapshotData* out, diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index 9d27a7c66b2aa2..9623edab32b9cb 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -742,23 +742,61 @@ static std::string FormatSize(size_t size) { return buf; } -static void WriteStaticCodeCacheData(std::ostream* ss, - const builtins::CodeCacheInfo& info) { +static void WriteDataAsCharString(std::ostream* ss, + const uint8_t* data, + size_t length) { + for (size_t i = 0; i < length; i++) { + const uint8_t ch = data[i]; + // We can print most printable characters directly. The exceptions are '\' + // (escape characters), " (would end the string), and ? (trigraphs). The + // latter may be overly conservative: we compile with C++17 which doesn't + // support trigraphs. + if (ch >= ' ' && ch <= '~' && ch != '\\' && ch != '"' && ch != '?') { + *ss << ch; + } else { + // All other characters are blindly output as octal. + const char c0 = '0' + ((ch & 0700) >> 6); + const char c1 = '0' + ((ch & 0070) >> 3); + const char c2 = '0' + (ch & 7); + *ss << "\\" << c0 << c1 << c2; + } + if (i % 64 == 63) { + // Go to a newline every 64 bytes since many text editors have + // problems with very long lines. + *ss << "\"\n\""; + } + } +} + +static void WriteStaticCodeCacheDataAsArray( + std::ostream* ss, const builtins::CodeCacheInfo& info) { *ss << "static const uint8_t " << GetCodeCacheDefName(info.id) << "[] = {\n"; WriteVector(ss, info.data.data, info.data.length); - *ss << "};"; + *ss << "};\n"; +} + +static void WriteStaticCodeCacheDataAsStringLiteral( + std::ostream* ss, const builtins::CodeCacheInfo& info) { + *ss << "static const uint8_t *" << GetCodeCacheDefName(info.id) + << "= reinterpret_cast(\""; + WriteDataAsCharString(ss, info.data.data, info.data.length); + *ss << "\");\n"; } -static void WriteCodeCacheInitializer(std::ostream* ss, const std::string& id) { +static void WriteCodeCacheInitializer(std::ostream* ss, + const std::string& id, + size_t size) { std::string def_name = GetCodeCacheDefName(id); *ss << " { \"" << id << "\",\n"; *ss << " {" << def_name << ",\n"; - *ss << " arraysize(" << def_name << "),\n"; + *ss << " " << size << ",\n"; *ss << " }\n"; *ss << " },\n"; } -void FormatBlob(std::ostream& ss, const SnapshotData* data) { +void FormatBlob(std::ostream& ss, + const SnapshotData* data, + const bool use_string_literals) { ss << R"(#include #include "env.h" #include "node_snapshot_builder.h" @@ -767,21 +805,32 @@ void FormatBlob(std::ostream& ss, const SnapshotData* data) { // This file is generated by tools/snapshot. Do not edit. namespace node { - -static const char v8_snapshot_blob_data[] = { )"; - WriteVector(&ss, - data->v8_snapshot_blob_data.data, - data->v8_snapshot_blob_data.raw_size); - ss << R"(}; - -static const int v8_snapshot_blob_size = )" + if (use_string_literals) { + ss << R"(static const char *v8_snapshot_blob_data = ")"; + WriteDataAsCharString( + &ss, + reinterpret_cast(data->v8_snapshot_blob_data.data), + data->v8_snapshot_blob_data.raw_size); + ss << R"(";)"; + } else { + ss << R"(static const char v8_snapshot_blob_data[] = {)"; + WriteVector(&ss, + data->v8_snapshot_blob_data.data, + data->v8_snapshot_blob_data.raw_size); + ss << R"(};)"; + } + ss << R"(static const int v8_snapshot_blob_size = )" << data->v8_snapshot_blob_data.raw_size << ";"; - // Windows can't deal with too many large vector initializers. - // Store the data into static arrays first. for (const auto& item : data->code_cache) { - WriteStaticCodeCacheData(&ss, item); + if (use_string_literals) { + WriteStaticCodeCacheDataAsStringLiteral(&ss, item); + } else { + // Windows can't deal with too many large vector initializers. + // Store the data into static arrays first. + WriteStaticCodeCacheDataAsArray(&ss, item); + } } ss << R"(const SnapshotData snapshot_data { @@ -808,7 +857,7 @@ static const int v8_snapshot_blob_size = )" // -- code_cache begins -- {)"; for (const auto& item : data->code_cache) { - WriteCodeCacheInitializer(&ss, item.id); + WriteCodeCacheInitializer(&ss, item.id, item.data.length); } ss << R"( } @@ -1022,13 +1071,14 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out, ExitCode SnapshotBuilder::Generate(std::ostream& out, const std::vector args, - const std::vector exec_args) { + const std::vector exec_args, + const bool use_string_literals) { SnapshotData data; ExitCode exit_code = Generate(&data, args, exec_args); if (exit_code != ExitCode::kNoFailure) { return exit_code; } - FormatBlob(out, &data); + FormatBlob(out, &data, use_string_literals); return exit_code; } diff --git a/tools/snapshot/node_mksnapshot.cc b/tools/snapshot/node_mksnapshot.cc index d6d92ab156da62..dab7524a522cda 100644 --- a/tools/snapshot/node_mksnapshot.cc +++ b/tools/snapshot/node_mksnapshot.cc @@ -58,15 +58,23 @@ int main(int argc, char* argv[]) { int BuildSnapshot(int argc, char* argv[]) { if (argc < 2) { - std::cerr << "Usage: " << argv[0] << " \n"; + std::cerr << "Usage: " << argv[0] + << " [--use-string-literals] \n"; std::cerr << " " << argv[0] << " --build-snapshot " << " \n"; return 1; } + std::vector args{argv, argv + argc}; + size_t size_before_remove = args.size(); + args.erase(std::remove(args.begin(), args.end(), "--use-string-literals"), + args.end()); + // If the size of args changed, we must have removed a + // "--use-string-literals". + const bool use_string_literals = args.size() != size_before_remove; + std::unique_ptr result = - node::InitializeOncePerProcess( - std::vector(argv, argv + argc)); + node::InitializeOncePerProcess(args); CHECK(!result->early_return()); CHECK_EQ(result->exit_code(), 0); @@ -87,7 +95,7 @@ int BuildSnapshot(int argc, char* argv[]) { node::ExitCode exit_code = node::ExitCode::kNoFailure; { exit_code = node::SnapshotBuilder::Generate( - out, result->args(), result->exec_args()); + out, result->args(), result->exec_args(), use_string_literals); if (exit_code == node::ExitCode::kNoFailure) { if (!out) { std::cerr << "Failed to write " << out_path << "\n";