Skip to content

Commit

Permalink
src: refactor vector writing in snapshot builder
Browse files Browse the repository at this point in the history
- Build a static table of octal strings and use it instead of
  building octal strings repeatedly during printing.
- Print a newline and an offset for every 64 bytes in the case
  of printing array literals so it's easier to locate
  variation in snapshot blobs.
- Rework the printing routines so that the differences are only
  made in a WriteByteVectorLiteral routine. We can update this
  for compression support in the future.
- Rename Snapshot::Generate() that write the data as C++ source
  instead of a blob as Snaphost::GenerateAsSource() for clarity,
  and move the file stream operations into it to streamline
  error handling.

PR-URL: #48851
Reviewed-By: Chengzhong Wu <legendecas@gmail.com>
Reviewed-By: Darshan Sen <raisinten@gmail.com>
  • Loading branch information
joyeecheung authored and RafaelGSS committed Aug 16, 2023
1 parent 5ca3bba commit d9818b1
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 96 deletions.
6 changes: 3 additions & 3 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -877,9 +877,6 @@
'node_target_type=="executable"', {
'defines': [ 'NODE_ENABLE_LARGE_CODE_PAGES=1' ],
}],
['OS in "linux mac"', {
'defines': [ 'NODE_MKSNAPSHOT_USE_STRING_LITERALS' ],
}],
[ 'use_openssl_def==1', {
# TODO(bnoordhuis) Make all platforms export the same list of symbols.
# Teach mkssldef.py to generate linker maps that UNIX linkers understand.
Expand Down Expand Up @@ -1248,6 +1245,9 @@
],

'conditions': [
['OS in "linux mac"', {
'defines': [ 'NODE_MKSNAPSHOT_USE_STRING_LITERALS=1' ],
}],
[ 'node_use_openssl=="true"', {
'defines': [
'HAVE_OPENSSL=1',
Expand Down
10 changes: 6 additions & 4 deletions src/node_snapshot_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ struct SnapshotData;

class NODE_EXTERN_PRIVATE SnapshotBuilder {
public:
static ExitCode Generate(std::ostream& out,
const std::vector<std::string>& args,
const std::vector<std::string>& exec_args,
std::optional<std::string_view> main_script);
static ExitCode GenerateAsSource(
const char* out_path,
const std::vector<std::string>& args,
const std::vector<std::string>& exec_args,
std::optional<std::string_view> main_script_path = std::nullopt,
bool use_string_literals = true);

// Generate the snapshot into out.
static ExitCode Generate(SnapshotData* out,
Expand Down
183 changes: 111 additions & 72 deletions src/node_snapshotable.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

#include "node_snapshotable.h"
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
Expand Down Expand Up @@ -715,13 +716,6 @@ SnapshotData::~SnapshotData() {
}
}

template <typename T>
void WriteVector(std::ostream* ss, const T* vec, size_t size) {
for (size_t i = 0; i < size; i++) {
*ss << std::to_string(vec[i]) << (i == size - 1 ? '\n' : ',');
}
}

static std::string GetCodeCacheDefName(const std::string& id) {
char buf[64] = {0};
size_t size = id.size();
Expand All @@ -746,48 +740,71 @@ static std::string FormatSize(size_t size) {
return buf;
}

#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
static void WriteDataAsCharString(std::ostream* ss,
const uint8_t* data,
size_t length) {
for (size_t i = 0; i < length; i++) {
const uint8_t ch = data[i];
// We can print most printable characters directly. The exceptions are '\'
// (escape characters), " (would end the string), and ? (trigraphs). The
// latter may be overly conservative: we compile with C++17 which doesn't
// support trigraphs.
if (ch >= ' ' && ch <= '~' && ch != '\\' && ch != '"' && ch != '?') {
*ss << ch;
} else {
// All other characters are blindly output as octal.
const char c0 = '0' + ((ch >> 6) & 7);
const char c1 = '0' + ((ch >> 3) & 7);
const char c2 = '0' + (ch & 7);
*ss << "\\" << c0 << c1 << c2;
}
if (i % 64 == 63) {
// Go to a newline every 64 bytes since many text editors have
// problems with very long lines.
*ss << "\"\n\"";
}
std::string ToOctalString(const uint8_t ch) {
// We can print most printable characters directly. The exceptions are '\'
// (escape characters), " (would end the string), and ? (trigraphs). The
// latter may be overly conservative: we compile with C++17 which doesn't
// support trigraphs.
if (ch >= ' ' && ch <= '~' && ch != '\\' && ch != '"' && ch != '?') {
return std::string(1, static_cast<char>(ch));
}
// All other characters are blindly output as octal.
const char c0 = '0' + ((ch >> 6) & 7);
const char c1 = '0' + ((ch >> 3) & 7);
const char c2 = '0' + (ch & 7);
return std::string("\\") + c0 + c1 + c2;
}

static void WriteStaticCodeCacheDataAsStringLiteral(
std::ostream* ss, const builtins::CodeCacheInfo& info) {
*ss << "static const uint8_t *" << GetCodeCacheDefName(info.id)
<< "= reinterpret_cast<const uint8_t *>(\"";
WriteDataAsCharString(ss, info.data.data, info.data.length);
*ss << "\");\n";
std::vector<std::string> GetOctalTable() {
size_t size = 1 << 8;
std::vector<std::string> code_table(size);
for (size_t i = 0; i < size; ++i) {
code_table[i] = ToOctalString(static_cast<uint8_t>(i));
}
return code_table;
}
#else
static void WriteStaticCodeCacheDataAsArray(
std::ostream* ss, const builtins::CodeCacheInfo& info) {
*ss << "static const uint8_t " << GetCodeCacheDefName(info.id) << "[] = {\n";
WriteVector(ss, info.data.data, info.data.length);
*ss << "};\n";

const std::string& GetOctalCode(uint8_t index) {
static std::vector<std::string> table = GetOctalTable();
return table[index];
}

template <typename T>
void WriteByteVectorLiteral(std::ostream* ss,
const T* vec,
size_t size,
const char* var_name,
bool use_string_literals) {
constexpr bool is_uint8_t = std::is_same_v<T, uint8_t>;
static_assert(is_uint8_t || std::is_same_v<T, char>);
constexpr const char* type_name = is_uint8_t ? "uint8_t" : "char";
if (use_string_literals) {
const uint8_t* data = reinterpret_cast<const uint8_t*>(vec);
*ss << "static const " << type_name << " *" << var_name << " = ";
*ss << (is_uint8_t ? R"(reinterpret_cast<const uint8_t *>(")" : "\"");
for (size_t i = 0; i < size; i++) {
const uint8_t ch = data[i];
*ss << GetOctalCode(ch);
if (i % 64 == 63) {
// Go to a newline every 64 bytes since many text editors have
// problems with very long lines.
*ss << "\"\n\"";
}
}
*ss << (is_uint8_t ? "\");\n" : "\";\n");
} else {
*ss << "static const " << type_name << " " << var_name << "[] = {";
for (size_t i = 0; i < size; i++) {
*ss << std::to_string(vec[i]) << (i == size - 1 ? '\n' : ',');
if (i % 64 == 63) {
// Print a newline every 64 units and a offset to improve
// readability.
*ss << " // " << (i / 64) << "\n";
}
}
*ss << "};\n";
}
}
#endif

static void WriteCodeCacheInitializer(std::ostream* ss,
const std::string& id,
Expand All @@ -800,7 +817,9 @@ static void WriteCodeCacheInitializer(std::ostream* ss,
*ss << " },\n";
}

void FormatBlob(std::ostream& ss, const SnapshotData* data) {
void FormatBlob(std::ostream& ss,
const SnapshotData* data,
bool use_string_literals) {
ss << R"(#include <cstddef>
#include "env.h"
#include "node_snapshot_builder.h"
Expand All @@ -811,32 +830,24 @@ void FormatBlob(std::ostream& ss, const SnapshotData* data) {
namespace node {
)";

#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
ss << R"(static const char *v8_snapshot_blob_data = ")";
WriteDataAsCharString(
&ss,
reinterpret_cast<const uint8_t*>(data->v8_snapshot_blob_data.data),
data->v8_snapshot_blob_data.raw_size);
ss << R"(";)";
#else
ss << R"(static const char v8_snapshot_blob_data[] = {)";
WriteVector(&ss,
data->v8_snapshot_blob_data.data,
data->v8_snapshot_blob_data.raw_size);
ss << R"(};)";
#endif
WriteByteVectorLiteral(&ss,
data->v8_snapshot_blob_data.data,
data->v8_snapshot_blob_data.raw_size,
"v8_snapshot_blob_data",
use_string_literals);

ss << R"(static const int v8_snapshot_blob_size = )"
<< data->v8_snapshot_blob_data.raw_size << ";";
<< data->v8_snapshot_blob_data.raw_size << ";\n";

// Windows can't deal with too many large vector initializers.
// Store the data into static arrays first.
for (const auto& item : data->code_cache) {
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
WriteStaticCodeCacheDataAsStringLiteral(&ss, item);
#else
// Windows can't deal with too many large vector initializers.
// Store the data into static arrays first.
WriteStaticCodeCacheDataAsArray(&ss, item);
#endif
std::string var_name = GetCodeCacheDefName(item.id);
WriteByteVectorLiteral(&ss,
item.data.data,
item.data.length,
var_name.c_str(),
use_string_literals);
}

ss << R"(const SnapshotData snapshot_data {
Expand Down Expand Up @@ -1073,17 +1084,45 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out,
return ExitCode::kNoFailure;
}

ExitCode SnapshotBuilder::Generate(
std::ostream& out,
ExitCode SnapshotBuilder::GenerateAsSource(
const char* out_path,
const std::vector<std::string>& args,
const std::vector<std::string>& exec_args,
std::optional<std::string_view> main_script) {
std::optional<std::string_view> main_script_path,
bool use_string_literals) {
std::string main_script_content;
std::optional<std::string_view> main_script_optional;
if (main_script_path.has_value()) {
int r = ReadFileSync(&main_script_content, main_script_path.value().data());
if (r != 0) {
FPrintF(stderr,
"Cannot read main script %s for building snapshot. %s: %s",
main_script_path.value(),
uv_err_name(r),
uv_strerror(r));
return ExitCode::kGenericUserError;
}
main_script_optional = main_script_content;
}

std::ofstream out(out_path, std::ios::out | std::ios::binary);
if (!out) {
FPrintF(stderr, "Cannot open %s for output.\n", out_path);
return ExitCode::kGenericUserError;
}

SnapshotData data;
ExitCode exit_code = Generate(&data, args, exec_args, main_script);
ExitCode exit_code = Generate(&data, args, exec_args, main_script_optional);
if (exit_code != ExitCode::kNoFailure) {
return exit_code;
}
FormatBlob(out, &data);
FormatBlob(out, &data, use_string_literals);

if (!out) {
std::cerr << "Failed to write to " << out_path << "\n";
exit_code = node::ExitCode::kGenericUserError;
}

return exit_code;
}

Expand Down
30 changes: 13 additions & 17 deletions tools/snapshot/node_mksnapshot.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <cstdio>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
Expand Down Expand Up @@ -72,29 +71,26 @@ int BuildSnapshot(int argc, char* argv[]) {
CHECK_EQ(result->exit_code(), 0);

std::string out_path;
std::optional<std::string_view> main_script_path = std::nullopt;
if (node::per_process::cli_options->per_isolate->build_snapshot) {
main_script_path = result->args()[1];
out_path = result->args()[2];
} else {
out_path = result->args()[1];
}

std::ofstream out(out_path, std::ios::out | std::ios::binary);
if (!out) {
std::cerr << "Cannot open " << out_path << "\n";
return 1;
}
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
bool use_string_literals = true;
#else
bool use_string_literals = false;
#endif

node::ExitCode exit_code = node::ExitCode::kNoFailure;
{
exit_code = node::SnapshotBuilder::Generate(
out, result->args(), result->exec_args(), std::nullopt);
if (exit_code == node::ExitCode::kNoFailure) {
if (!out) {
std::cerr << "Failed to write " << out_path << "\n";
exit_code = node::ExitCode::kGenericUserError;
}
}
}
node::ExitCode exit_code =
node::SnapshotBuilder::GenerateAsSource(out_path.c_str(),
result->args(),
result->exec_args(),
main_script_path,
use_string_literals);

node::TearDownOncePerProcess();
return static_cast<int>(exit_code);
Expand Down

0 comments on commit d9818b1

Please sign in to comment.