Skip to content

Commit

Permalink
src: register external references for source code
Browse files Browse the repository at this point in the history
Currently we use external strings for internalized builtin source code.
However when a snapshot is taken, any external string whose resource is
not registered is flattened into a SeqString (see ref). The result is
that any module source code stored in the snapshot does not use external
strings after deserialization. This patch registers an external string
resource for each internalized builtin's source. The savings are
substantial: ~1.9 MB of heap memory per isolate, or ~44% of an otherwise
empty isolate's heap usage:

```bash
$ node --expose-gc -p 'gc(),process.memoryUsage().heapUsed'
4190968
$ ./node --expose-gc -p 'gc(),process.memoryUsage().heapUsed'
2327536
```

The savings can be even higher for user snapshots which may include more
internal modules.

Doing this with the existing UnionBytes abstraction was tricky, because
UnionBytes only creates an external string resource when ToStringChecked
is called. However we need to collate a list of external resources
before isolate construction. UnionBytes can also be deallocated, which
isn't ideal since registering an external string resource which is later
deallocated would be very bad.

Rather than further complicate UnionBytes, we introduce a new class
called EternalBytes which assumes that the data has static lifetime and
creates a single external string resource on construction. It reuses
this original external string resource across V8 isolates by simply
ignoring Dispose calls from V8.

In order to distinguish between EternalBytes and UnionBytes, we
bifurcate the sources map into two maps: the internalized builtins map
(which is never modified) and the sources map (which can be changed
through externalized builtins or by the embedder).

Refs: https://github.com/v8/v8/blob/d2c8fbe9ccd1a6ce5591bb7dd319c3c00d6bf489/src/snapshot/serializer.cc#L633
  • Loading branch information
kvakil committed Mar 13, 2023
1 parent 2660a32 commit 55a5442
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 36 deletions.
1 change: 1 addition & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@
'src/node_sockaddr-inl.h',
'src/node_stat_watcher.h',
'src/node_union_bytes.h',
'src/node_eternal_bytes.h',
'src/node_url.h',
'src/node_util.h',
'src/node_version.h',
Expand Down
49 changes: 39 additions & 10 deletions src/node_builtins.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ using v8::Undefined;
using v8::Value;

BuiltinLoader::BuiltinLoader()
: config_(GetConfig()), code_cache_(std::make_shared<BuiltinCodeCache>()) {
LoadJavaScriptSource();
: code_cache_(std::make_shared<BuiltinCodeCache>()) {
#ifdef NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_LEXER_PATH
AddExternalizedBuiltin(
"internal/deps/cjs-module-lexer/lexer",
Expand All @@ -55,18 +54,33 @@ BuiltinLoader::BuiltinLoader()
}

bool BuiltinLoader::Exists(const char* id) {
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
if (internalized_builtins->find(id) != internalized_builtins->end()) {
return true;
}
auto source = source_.read();
return source->find(id) != source->end();
}

bool BuiltinLoader::Add(const char* id, const UnionBytes& source) {
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
if (internalized_builtins->find(id) != internalized_builtins->end()) {
// Cannot add this builtin as it would conflict with an
// existing internalized builtin.
return false;
}
auto result = source_.write()->emplace(id, source);
return result.second;
}

Local<Object> BuiltinLoader::GetSourceObject(Local<Context> context) {
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
Isolate* isolate = context->GetIsolate();
Local<Object> out = Object::New(isolate);
for (auto const& x : *internalized_builtins) {
Local<String> key = OneByteString(isolate, x.first.c_str(), x.first.size());
out->Set(context, key, x.second.ToStringChecked(isolate)).FromJust();
}
auto source = source_.read();
for (auto const& x : *source) {
Local<String> key = OneByteString(isolate, x.first.c_str(), x.first.size());
Expand All @@ -76,13 +90,17 @@ Local<Object> BuiltinLoader::GetSourceObject(Local<Context> context) {
}

Local<String> BuiltinLoader::GetConfigString(Isolate* isolate) {
return config_.ToStringChecked(isolate);
return GetConfig()->ToStringChecked(isolate);
}

std::vector<std::string> BuiltinLoader::GetBuiltinIds() const {
std::vector<std::string> ids;
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
auto source = source_.read();
ids.reserve(source->size());
ids.reserve(internalized_builtins->size() + source->size());
for (auto const& x : *internalized_builtins) {
ids.emplace_back(x.first);
}
for (auto const& x : *source) {
ids.emplace_back(x.first);
}
Expand Down Expand Up @@ -184,14 +202,20 @@ static std::string OnDiskFileName(const char* id) {

MaybeLocal<String> BuiltinLoader::LoadBuiltinSource(Isolate* isolate,
const char* id) const {
auto source = source_.read();
#ifndef NODE_BUILTIN_MODULES_PATH
const auto source_it = source->find(id);
if (UNLIKELY(source_it == source->end())) {
fprintf(stderr, "Cannot find native builtin: \"%s\".\n", id);
ABORT();
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
const auto internalized_builtins_it = internalized_builtins->find(id);
if (LIKELY(internalized_builtins_it != internalized_builtins->end())) {
return internalized_builtins_it->second.ToStringChecked(isolate);
}

auto source = source_.read();
auto source_it = source->find(id);
if (LIKELY(source_it != source->end())) {
return source_it->second.ToStringChecked(isolate);
}
return source_it->second.ToStringChecked(isolate);
fprintf(stderr, "Cannot find native builtin: \"%s\".\n", id);
ABORT();
#else // !NODE_BUILTIN_MODULES_PATH
std::string filename = OnDiskFileName(id);

Expand Down Expand Up @@ -710,6 +734,11 @@ void BuiltinLoader::RegisterExternalReferences(
registry->Register(GetCacheUsage);
registry->Register(CompileFunction);
registry->Register(HasCachedBuiltins);
auto internalized_builtins = GetInternalizedBuiltinSourceMap();
for (auto const& x : *internalized_builtins) {
auto resource = x.second.AsResource();
registry->Register(resource);
}
}

} // namespace builtins
Expand Down
16 changes: 9 additions & 7 deletions src/node_builtins.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <set>
#include <string>
#include <vector>
#include "node_eternal_bytes.h"
#include "node_mutex.h"
#include "node_threadsafe_cow.h"
#include "node_union_bytes.h"
Expand All @@ -25,11 +26,15 @@ class Realm;

namespace builtins {

using BuiltinSourceMap = std::map<std::string, UnionBytes>;
using InternalizedBuiltinSourceMap = std::map<std::string, EternalBytes>;
using NonInternalizedBuiltinSourceMap = std::map<std::string, UnionBytes>;
using BuiltinCodeCacheMap =
std::unordered_map<std::string,
std::unique_ptr<v8::ScriptCompiler::CachedData>>;

const InternalizedBuiltinSourceMap* GetInternalizedBuiltinSourceMap();
const EternalBytes* GetConfig();

struct CodeCacheInfo {
std::string id;
std::vector<uint8_t> data;
Expand Down Expand Up @@ -84,10 +89,6 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
// Only allow access from friends.
friend class CodeCacheBuilder;

// Generated by tools/js2c.py as node_javascript.cc
void LoadJavaScriptSource(); // Loads data into source_
UnionBytes GetConfig(); // Return data for config.gypi

std::vector<std::string> GetBuiltinIds() const;

struct BuiltinCategories {
Expand Down Expand Up @@ -133,9 +134,10 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {

void AddExternalizedBuiltin(const char* id, const char* filename);

ThreadsafeCopyOnWrite<BuiltinSourceMap> source_;
ThreadsafeCopyOnWrite<NonInternalizedBuiltinSourceMap> source_;

const UnionBytes config_;
static void RegisterSourcesAsExternalReferences(
ExternalReferenceRegistry* registry);

struct BuiltinCodeCache {
RwLock mutex;
Expand Down
120 changes: 120 additions & 0 deletions src/node_eternal_bytes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#ifndef SRC_NODE_ETERNAL_BYTES_H_
#define SRC_NODE_ETERNAL_BYTES_H_

#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

// A pointer to const uint8_t* or const uint16_t* data that can be turned into
// external v8::String when given an isolate. Unlike UnionBytes, this assumes
// that the underlying buffer is eternal (will not be garbage collected) and
// reuses the same v8::String::ExternalStringResourceBase for all materialized
// strings. This allows the resource to be registered as an external reference
// for snapshotting.

#include <variant>
#include "v8.h"

namespace node {

class EternalBytes;

template <typename Char, typename IChar, typename Base>
class EternalExternalByteResource : public Base {
static_assert(sizeof(IChar) == sizeof(Char),
"incompatible interface and internal pointers");

public:
explicit EternalExternalByteResource(const Char* data, size_t length)
: data_(data), length_(length) {}

const IChar* data() const override {
return reinterpret_cast<const IChar*>(data_);
}
size_t length() const override { return length_; }

void Dispose() override {
// Do nothing. This class is owned by the EternalBytes instance and so
// should not be destroyed. It may also be in use by other external strings
// besides the one which was collected.
}

EternalExternalByteResource(const EternalExternalByteResource&) = delete;
EternalExternalByteResource& operator=(const EternalExternalByteResource&) =
delete;

friend class EternalBytes;

private:
const Char* data_;
const size_t length_;
};

using EternalExternalOneByteResource =
EternalExternalByteResource<uint8_t,
char,
v8::String::ExternalOneByteStringResource>;
using EternalExternalTwoByteResource =
EternalExternalByteResource<uint16_t,
uint16_t,
v8::String::ExternalStringResource>;

template <class... Ts>
struct overloaded : Ts... {
using Ts::operator()...;
};
template <class... Ts>
overloaded(Ts...) -> overloaded<Ts...>;

class EternalBytes {
public:
EternalBytes(const uint8_t* data, size_t length)
: resource_(new EternalExternalOneByteResource(data, length)) {}
EternalBytes(const uint16_t* data, size_t length)
: resource_(new EternalExternalTwoByteResource(data, length)) {}

EternalBytes(const EternalBytes&) = default;
EternalBytes& operator=(const EternalBytes&) = default;
EternalBytes(EternalBytes&&) = default;
EternalBytes& operator=(EternalBytes&&) = default;

bool IsOneByte() const {
return std::holds_alternative<EternalExternalOneByteResource*>(resource_);
}

const v8::String::ExternalStringResourceBase* AsResource() const {
return std::visit(
overloaded{
[](EternalExternalOneByteResource* resource) {
return static_cast<const v8::String::ExternalStringResourceBase*>(
resource);
},
[](EternalExternalTwoByteResource* resource) {
return static_cast<const v8::String::ExternalStringResourceBase*>(
resource);
}},
resource_);
}

v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) const {
return std::visit(
overloaded{[=](EternalExternalOneByteResource* resource) {
return v8::String::NewExternalOneByte(isolate, resource)
.ToLocalChecked();
},
[=](EternalExternalTwoByteResource* resource) {
return v8::String::NewExternalTwoByte(isolate, resource)
.ToLocalChecked();
}},
resource_);
}

private:
const std::variant<EternalExternalOneByteResource*,
EternalExternalTwoByteResource*>
resource_;
};

} // namespace node

#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#endif // SRC_NODE_ETERNAL_BYTES_H_
3 changes: 2 additions & 1 deletion src/node_external_reference.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ class ExternalReferenceRegistry {
V(v8::IndexedPropertyDefinerCallback) \
V(v8::IndexedPropertyDeleterCallback) \
V(v8::IndexedPropertyQueryCallback) \
V(v8::IndexedPropertyDescriptorCallback)
V(v8::IndexedPropertyDescriptorCallback) \
V(const v8::String::ExternalStringResourceBase*)

#define V(ExternalReferenceType) \
void Register(ExternalReferenceType addr) { RegisterT(addr); }
Expand Down
24 changes: 13 additions & 11 deletions test/cctest/test_per_process.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,28 @@
#include <string>

using node::builtins::BuiltinLoader;
using node::builtins::BuiltinSourceMap;
using node::builtins::InternalizedBuiltinSourceMap;

class PerProcessTest : public ::testing::Test {
protected:
static const BuiltinSourceMap get_sources_for_test() {
return *BuiltinLoader().source_.read();
static const InternalizedBuiltinSourceMap*
get_internalized_builtin_source_map_for_test() {
return node::builtins::GetInternalizedBuiltinSourceMap();
}
};

namespace {

TEST_F(PerProcessTest, EmbeddedSources) {
const auto& sources = PerProcessTest::get_sources_for_test();
ASSERT_TRUE(std::any_of(sources.cbegin(), sources.cend(), [](auto p) {
return p.second.is_one_byte();
})) << "BuiltinLoader::source_ should have some 8bit items";

ASSERT_TRUE(std::any_of(sources.cbegin(), sources.cend(), [](auto p) {
return !p.second.is_one_byte();
})) << "BuiltinLoader::source_ should have some 16bit items";
const auto& sources =
PerProcessTest::get_internalized_builtin_source_map_for_test();
ASSERT_TRUE(std::any_of(sources->cbegin(), sources->cend(), [](auto p) {
return p.second.IsOneByte();
})) << "internalized_builtin_source_map should have some 8bit items";

ASSERT_TRUE(std::any_of(sources->cbegin(), sources->cend(), [](auto p) {
return !p.second.IsOneByte();
})) << "internalized_builtin_source_map should have some 16bit items";
}

} // end namespace
16 changes: 9 additions & 7 deletions tools/js2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,19 @@ def ReadFile(filename):
{0}
namespace {{
const ThreadsafeCopyOnWrite<BuiltinSourceMap> global_source_map {{
BuiltinSourceMap{{ {1} }}
const InternalizedBuiltinSourceMap internalized_builtin_source_map {{
InternalizedBuiltinSourceMap{{ {1} }}
}};
const EternalBytes config(config_raw, {2});
}}
void BuiltinLoader::LoadJavaScriptSource() {{
source_ = global_source_map;
const InternalizedBuiltinSourceMap* GetInternalizedBuiltinSourceMap() {{
return &internalized_builtin_source_map;
}}
UnionBytes BuiltinLoader::GetConfig() {{
return UnionBytes(config_raw, {2}); // config.gypi
const EternalBytes* GetConfig() {{
return &config;
}}
}} // namespace builtins
Expand All @@ -88,7 +90,7 @@ def ReadFile(filename):
}};
"""

INITIALIZER = '{{"{0}", UnionBytes{{{1}, {2}}} }},'
INITIALIZER = '{{"{0}", {{{1}, {2}}} }},'

CONFIG_GYPI_ID = 'config_raw'

Expand Down

0 comments on commit 55a5442

Please sign in to comment.