diff --git a/Makefile b/Makefile index 25e54f109ef58b..8e10ec6ad21fbb 100644 --- a/Makefile +++ b/Makefile @@ -1409,6 +1409,7 @@ LINT_CPP_FILES = $(filter-out $(LINT_CPP_EXCLUDE), $(wildcard \ test/fixtures/*.c \ test/js-native-api/*/*.cc \ test/node-api/*/*.cc \ + tools/js2c.cc \ tools/icu/*.cc \ tools/icu/*.h \ tools/code_cache/*.cc \ diff --git a/deps/simdutf/simdutf.gyp b/deps/simdutf/simdutf.gyp index a86e92eb608d7e..ca8cd2fa7bc079 100644 --- a/deps/simdutf/simdutf.gyp +++ b/deps/simdutf/simdutf.gyp @@ -7,6 +7,7 @@ 'targets': [ { 'target_name': 'simdutf', + 'toolsets': ['host', 'target'], 'type': 'static_library', 'include_dirs': ['.'], 'direct_dependent_settings': { diff --git a/deps/uv/uv.gyp b/deps/uv/uv.gyp index ad13f89dfa5bde..fa2dcb653c3d50 100644 --- a/deps/uv/uv.gyp +++ b/deps/uv/uv.gyp @@ -162,6 +162,7 @@ 'targets': [ { 'target_name': 'libuv', + 'toolsets': ['host', 'target'], 'type': '<(uv_library)', 'include_dirs': [ 'include', diff --git a/node.gyp b/node.gyp index f9621fc1e15470..5d0c77f41c9bb4 100644 --- a/node.gyp +++ b/node.gyp @@ -27,7 +27,7 @@ 'node_lib_target_name%': 'libnode', 'node_intermediate_lib_type%': 'static_library', 'node_builtin_modules_path%': '', - # We list the deps/ files out instead of globbing them in js2c.py since we + # We list the deps/ files out instead of globbing them in js2c.cc since we # only include a subset of all the files under these directories. # The lengths of their file names combined should not exceed the # Windows command length limit or there would be an error. @@ -362,6 +362,7 @@ 'src/quic/transportparams.h', ], 'node_mksnapshot_exec': '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)node_mksnapshot<(EXECUTABLE_SUFFIX)', + 'node_js2c_exec': '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)node_js2c<(EXECUTABLE_SUFFIX)', 'conditions': [ ['GENERATOR == "ninja"', { 'node_text_start_object_path': 'src/large_pages/node_text_start.node_text_start.o' @@ -770,6 +771,7 @@ 'deps/uvwasi/uvwasi.gyp:uvwasi', 'deps/simdutf/simdutf.gyp:simdutf', 'deps/ada/ada.gyp:ada', + 'node_js2c#host', ], 'sources': [ @@ -925,8 +927,7 @@ 'action_name': 'node_js2c', 'process_outputs_as_sources': 1, 'inputs': [ - # Put the code first so it's a dependency and can be used for invocation. - 'tools/js2c.py', + '<(node_js2c_exec)', '<@(library_files)', '<@(deps_files)', 'config.gypi' @@ -935,12 +936,9 @@ '<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc', ], 'action': [ - '<(python)', - 'tools/js2c.py', - '--directory', - 'lib', - '--target', + '<(node_js2c_exec)', '<@(_outputs)', + 'lib', 'config.gypi', '<@(deps_files)', ], @@ -1175,6 +1173,36 @@ }], ] }, # overlapped-checker + { + 'target_name': 'node_js2c', + 'type': 'executable', + 'toolsets': ['host'], + 'dependencies': [ + 'deps/simdutf/simdutf.gyp:simdutf#host', + ], + 'include_dirs': [ + 'tools' + ], + 'sources': [ + 'tools/js2c.cc', + 'tools/executable_wrapper.h' + ], + 'conditions': [ + [ 'node_shared_libuv=="false"', { + 'dependencies': [ 'deps/uv/uv.gyp:libuv#host' ], + }], + [ 'debug_node=="true"', { + 'cflags!': [ '-O3' ], + 'cflags': [ '-g', '-O0' ], + 'defines': [ 'DEBUG' ], + 'xcode_settings': { + 'OTHER_CFLAGS': [ + '-g', '-O0' + ], + }, + }], + ] + }, { 'target_name': 'node_mksnapshot', 'type': 'executable', diff --git a/test/tools/test_js2c.py b/test/tools/test_js2c.py deleted file mode 100644 index 204562117086e4..00000000000000 --- a/test/tools/test_js2c.py +++ /dev/null @@ -1,14 +0,0 @@ -import unittest -import sys, os -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), - '..', '..', 'tools'))) -from js2c import NormalizeFileName - -class Js2ctest(unittest.TestCase): - def testNormalizeFileName(self): - self.assertEqual(NormalizeFileName('dir/mod.js'), 'mod') - self.assertEqual(NormalizeFileName('deps/mod.js'), 'internal/deps/mod') - self.assertEqual(NormalizeFileName('mod.js'), 'mod') - -if __name__ == '__main__': - unittest.main() diff --git a/tools/executable_wrapper.h b/tools/executable_wrapper.h new file mode 100644 index 00000000000000..fcde6f527d72fd --- /dev/null +++ b/tools/executable_wrapper.h @@ -0,0 +1,55 @@ +#ifndef TOOLS_EXECUTABLE_WRAPPER_H_ +#define TOOLS_EXECUTABLE_WRAPPER_H_ + +// TODO(joyeecheung): reuse this in mksnapshot. +#include "uv.h" +#ifdef _WIN32 +#include +#endif + +namespace node { +#ifdef _WIN32 +using argv_type = wchar_t*; +#define NODE_MAIN int wmain + +void FixupMain(int argc, argv_type raw_argv[], char*** argv) { + // Convert argv to UTF8. + *argv = new char*[argc + 1]; + for (int i = 0; i < argc; i++) { + // Compute the size of the required buffer + DWORD size = WideCharToMultiByte( + CP_UTF8, 0, raw_argv[i], -1, nullptr, 0, nullptr, nullptr); + if (size == 0) { + // This should never happen. + fprintf(stderr, "Could not convert arguments to utf8."); + exit(1); + } + // Do the actual conversion + (*argv)[i] = new char[size]; + DWORD result = WideCharToMultiByte( + CP_UTF8, 0, raw_argv[i], -1, (*argv)[i], size, nullptr, nullptr); + if (result == 0) { + // This should never happen. + fprintf(stderr, "Could not convert arguments to utf8."); + exit(1); + } + } + (*argv)[argc] = nullptr; +} +#else + +using argv_type = char*; +#define NODE_MAIN int main + +void FixupMain(int argc, argv_type raw_argv[], char*** argv) { + *argv = uv_setup_args(argc, raw_argv); + // Disable stdio buffering, it interacts poorly with printf() + // calls elsewhere in the program (e.g., any logging from V8.) + setvbuf(stdout, nullptr, _IONBF, 0); + setvbuf(stderr, nullptr, _IONBF, 0); +} +#endif + +} // namespace node + +#endif // TOOLS_EXECUTABLE_WRAPPER_H_ diff --git a/tools/js2c.cc b/tools/js2c.cc new file mode 100644 index 00000000000000..45e3711d27bf9e --- /dev/null +++ b/tools/js2c.cc @@ -0,0 +1,756 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "executable_wrapper.h" +#include "simdutf.h" +#include "uv.h" + +#if defined(_WIN32) +#include // _S_IREAD _S_IWRITE +#ifndef S_IRUSR +#define S_IRUSR _S_IREAD +#endif // S_IRUSR +#ifndef S_IWUSR +#define S_IWUSR _S_IWRITE +#endif // S_IWUSR +#endif +namespace node { +namespace js2c { +int Main(int argc, char* argv[]); + +static bool is_verbose = false; + +void Debug(const char* format, ...) { + va_list arguments; + va_start(arguments, format); + if (is_verbose) { + vfprintf(stderr, format, arguments); + } + va_end(arguments); +} + +void PrintUvError(const char* syscall, const char* filename, int error) { + fprintf(stderr, "[%s] %s: %s\n", syscall, filename, uv_strerror(error)); +} + +int GetStats(const char* path, std::function func) { + uv_fs_t req; + int r = uv_fs_stat(nullptr, &req, path, nullptr); + if (r == 0) { + func(static_cast(req.ptr)); + } + uv_fs_req_cleanup(&req); + return r; +} + +bool IsDirectory(const std::string& filename, int* error) { + bool result = false; + *error = GetStats(filename.c_str(), [&](const uv_stat_t* stats) { + result = !!(stats->st_mode & S_IFDIR); + }); + if (*error != 0) { + PrintUvError("stat", filename.c_str(), *error); + } + return result; +} + +size_t GetFileSize(const std::string& filename, int* error) { + size_t result = 0; + *error = GetStats(filename.c_str(), + [&](const uv_stat_t* stats) { result = stats->st_size; }); + return result; +} + +bool EndsWith(const std::string& str, std::string_view suffix) { + size_t suffix_len = suffix.length(); + size_t str_len = str.length(); + if (str_len < suffix_len) { + return false; + } + return str.compare(str_len - suffix_len, suffix_len, suffix) == 0; +} + +bool StartsWith(const std::string& str, std::string_view prefix) { + size_t prefix_len = prefix.length(); + size_t str_len = str.length(); + if (str_len < prefix_len) { + return false; + } + return str.compare(0, prefix_len, prefix) == 0; +} + +typedef std::vector FileList; +typedef std::map FileMap; + +bool SearchFiles(const std::string& dir, + FileMap* file_map, + const std::string& extension) { + uv_fs_t scan_req; + int result = uv_fs_scandir(nullptr, &scan_req, dir.c_str(), 0, nullptr); + bool errored = false; + if (result < 0) { + PrintUvError("scandir", dir.c_str(), result); + errored = true; + } else { + auto it = file_map->insert({extension, FileList()}).first; + FileList& files = it->second; + files.reserve(files.size() + result); + uv_dirent_t dent; + while (true) { + result = uv_fs_scandir_next(&scan_req, &dent); + if (result == UV_EOF) { + break; + } + + if (result != 0) { + PrintUvError("scandir_next", dir.c_str(), result); + errored = true; + break; + } + + std::string path = dir + '/' + dent.name; + if (EndsWith(path, extension)) { + files.emplace_back(path); + continue; + } + if (!IsDirectory(path, &result)) { + if (result == 0) { // It's a file, no need to search further. + continue; + } else { + errored = true; + break; + } + } + + if (!SearchFiles(path, file_map, extension)) { + errored = true; + break; + } + } + } + + uv_fs_req_cleanup(&scan_req); + return !errored; +} + +constexpr std::string_view kMjsSuffix = ".mjs"; +constexpr std::string_view kJsSuffix = ".js"; +constexpr std::string_view kGypiSuffix = ".gypi"; +constexpr std::string_view depsPrefix = "deps/"; +constexpr std::string_view libPrefix = "lib/"; +std::set kAllowedExtensions{ + kGypiSuffix, kJsSuffix, kMjsSuffix}; + +std::string_view HasAllowedExtensions(const std::string& filename) { + for (const auto& ext : kAllowedExtensions) { + if (EndsWith(filename, ext)) { + return ext; + } + } + return {}; +} + +using Fragment = std::vector; +using Fragments = std::vector>; + +std::vector Join(const Fragments& fragments, + const std::string& separator) { + size_t length = separator.size() * (fragments.size() - 1); + for (size_t i = 0; i < fragments.size(); ++i) { + length += fragments[i].size(); + } + std::vector buf(length, 0); + size_t cursor = 0; + for (size_t i = 0; i < fragments.size(); ++i) { + const Fragment& fragment = fragments[i]; + // Avoid using snprintf on large chunks of data because it's much slower. + // It's fine to use it on small amount of data though. + if (i != 0) { + memcpy(buf.data() + cursor, separator.c_str(), separator.size()); + cursor += separator.size(); + } + memcpy(buf.data() + cursor, fragment.data(), fragment.size()); + cursor += fragment.size(); + } + buf.resize(cursor); + return buf; +} + +const char* kTemplate = R"( +#include "env-inl.h" +#include "node_builtins.h" +#include "node_external_reference.h" +#include "node_internals.h" + +namespace node { + +namespace builtins { + +%.*s +namespace { +const ThreadsafeCopyOnWrite global_source_map { + BuiltinSourceMap { +%.*s + } // BuiltinSourceMap +}; // ThreadsafeCopyOnWrite +} // anonymous namespace + +void BuiltinLoader::LoadJavaScriptSource() { + source_ = global_source_map; +} + +void RegisterExternalReferencesForInternalizedBuiltinCode( + ExternalReferenceRegistry* registry) { +%.*s +} + +UnionBytes BuiltinLoader::GetConfig() { + return UnionBytes(&config_resource); +} + +} // namespace builtins + +} // namespace node +)"; + +Fragment Format(const Fragments& definitions, + const Fragments& initializers, + const Fragments& registrations) { + std::vector def_buf = Join(definitions, "\n"); + size_t def_size = def_buf.size(); + std::vector init_buf = Join(initializers, "\n"); + size_t init_size = init_buf.size(); + std::vector reg_buf = Join(registrations, "\n"); + size_t reg_size = reg_buf.size(); + + size_t result_size = + def_size + init_size + reg_size + strlen(kTemplate) + 100; + std::vector result(result_size, 0); + int r = snprintf(result.data(), + result_size, + kTemplate, + static_cast(def_buf.size()), + def_buf.data(), + static_cast(init_buf.size()), + init_buf.data(), + static_cast(reg_buf.size()), + reg_buf.data()); + result.resize(r); + return result; +} + +std::vector ReadFileSync(const char* path, size_t size, int* error) { + uv_fs_t req; + Debug("ReadFileSync %s with size %zu\n", path, size); + + uv_file file = uv_fs_open(nullptr, &req, path, O_RDONLY, 0, nullptr); + if (req.result < 0) { + uv_fs_req_cleanup(&req); + *error = req.result; + return std::vector(); + } + uv_fs_req_cleanup(&req); + + std::vector contents(size); + size_t offset = 0; + + while (offset < size) { + uv_buf_t buf = uv_buf_init(contents.data() + offset, size - offset); + int bytes_read = uv_fs_read(nullptr, &req, file, &buf, 1, offset, nullptr); + offset += bytes_read; + *error = req.result; + uv_fs_req_cleanup(&req); + if (*error < 0) { + uv_fs_close(nullptr, &req, file, nullptr); + // We can't do anything if uv_fs_close returns error, so just return. + return std::vector(); + } + if (bytes_read <= 0) { + break; + } + } + assert(offset == size); + + *error = uv_fs_close(nullptr, &req, file, nullptr); + return contents; +} + +int WriteFileSync(const std::vector& out, const char* path) { + Debug("WriteFileSync %zu bytes to %s\n", out.size(), path); + uv_fs_t req; + uv_file file = uv_fs_open(nullptr, + &req, + path, + UV_FS_O_CREAT | UV_FS_O_WRONLY | UV_FS_O_TRUNC, + S_IWUSR | S_IRUSR, + nullptr); + int err = req.result; + uv_fs_req_cleanup(&req); + if (err < 0) { + return err; + } + + uv_buf_t buf = uv_buf_init(const_cast(out.data()), out.size()); + err = uv_fs_write(nullptr, &req, file, &buf, 1, 0, nullptr); + uv_fs_req_cleanup(&req); + + int r = uv_fs_close(nullptr, &req, file, nullptr); + uv_fs_req_cleanup(&req); + if (err < 0) { + // We can't do anything if uv_fs_close returns error, so just return. + return err; + } + return r; +} + +int WriteIfChanged(const Fragment& out, const std::string& dest) { + Debug("output size %zu\n", out.size()); + + int error = 0; + size_t size = GetFileSize(dest, &error); + if (error != 0 && error != UV_ENOENT) { + return error; + } + Debug("existing size %zu\n", size); + + bool changed = true; + // If it's not the same size, the file is definitely changed so we'll + // just proceed to update. Otherwise check the content before deciding + // whether we want to write it. + if (error != UV_ENOENT && size == out.size()) { + std::vector content = ReadFileSync(dest.c_str(), size, &error); + if (error == 0) { // In case of error, always write the file. + changed = (memcmp(content.data(), out.data(), size) != 0); + } + } + if (!changed) { + Debug("No change, return\n"); + return 0; + } + return WriteFileSync(out, dest.c_str()); +} + +std::string GetFileId(const std::string& filename) { + size_t end = filename.size(); + size_t start = 0; + std::string prefix; + // Strip .mjs and .js suffix + if (EndsWith(filename, kMjsSuffix)) { + end -= kMjsSuffix.size(); + } else if (EndsWith(filename, kJsSuffix)) { + end -= kJsSuffix.size(); + } + + // deps/acorn/acorn/dist/acorn.js -> internal/deps/acorn/acorn/dist/acorn + if (StartsWith(filename, depsPrefix)) { + start = depsPrefix.size(); + prefix = "internal/deps/"; + } else if (StartsWith(filename, libPrefix)) { + // lib/internal/url.js -> internal/url + start = libPrefix.size(); + prefix = ""; + } + + return prefix + std::string(filename.begin() + start, filename.begin() + end); +} + +std::string GetVariableName(const std::string& id) { + std::string result = id; + size_t length = result.size(); + + for (size_t i = 0; i < length; ++i) { + if (result[i] == '.' || result[i] == '-' || result[i] == '/') { + result[i] = '_'; + } + } + return result; +} + +std::vector GetCodeTable() { + size_t size = 1 << 16; + std::vector code_table(size); + for (size_t i = 0; i < size; ++i) { + code_table[i] = std::to_string(i) + ','; + } + return code_table; +} + +const std::string& GetCode(uint16_t index) { + static std::vector table = GetCodeTable(); + return table[index]; +} + +// Definitions: +// static const uint8_t fs_raw[] = { +// .... +// }; +// +// static StaticExternalOneByteResource fs_resource(fs_raw, 1234, nullptr); +// +// static const uint16_t internal_cli_table_raw[] = { +// .... +// }; +// +// static StaticExternalTwoByteResource +// internal_cli_table_resource(internal_cli_table_raw, 1234, nullptr); +constexpr std::string_view literal_end = "\n};\n\n"; +template +Fragment GetDefinitionImpl(const std::vector& code, const std::string& var) { + size_t count = code.size(); + + constexpr bool is_two_byte = std::is_same_v; + static_assert(is_two_byte || std::is_same_v); + constexpr size_t unit = + (is_two_byte ? 5 : 3) + 1; // 0-65536 or 0-127 and a "," + constexpr const char* arr_type = is_two_byte ? "uint16_t" : "uint8_t"; + constexpr const char* resource_type = is_two_byte + ? "StaticExternalTwoByteResource" + : "StaticExternalOneByteResource"; + + size_t def_size = 256 + (count * unit); + Fragment result(def_size, 0); + + int cur = snprintf(result.data(), + def_size, + "static const %s %s_raw[] = {\n", + arr_type, + var.c_str()); + assert(cur != 0); + for (size_t i = 0; i < count; ++i) { + // Avoid using snprintf on large chunks of data because it's much slower. + // It's fine to use it on small amount of data though. + const std::string& str = GetCode(static_cast(code[i])); + memcpy(result.data() + cur, str.c_str(), str.size()); + cur += str.size(); + } + memcpy(result.data() + cur, literal_end.data(), literal_end.size()); + cur += literal_end.size(); + + int end_size = snprintf(result.data() + cur, + result.size() - cur, + "static %s %s_resource(%s_raw, %zu, nullptr);\n", + resource_type, + var.c_str(), + var.c_str(), + count); + cur += end_size; + result.resize(cur); + return result; +} + +Fragment GetDefinition(const std::string& var, const std::vector& code) { + Debug("GetDefinition %s, code size %zu ", var.c_str(), code.size()); + bool is_one_byte = simdutf::validate_ascii(code.data(), code.size()); + Debug("with %s\n", is_one_byte ? "1-byte chars" : "2-byte chars"); + + if (is_one_byte) { + Debug("static size %zu\n", code.size()); + return GetDefinitionImpl(code, var); + } else { + size_t length = simdutf::utf16_length_from_utf8(code.data(), code.size()); + std::vector utf16(length); + size_t utf16_count = simdutf::convert_utf8_to_utf16( + code.data(), code.size(), reinterpret_cast(utf16.data())); + assert(utf16_count != 0); + utf16.resize(utf16_count); + Debug("static size %zu\n", utf16_count); + return GetDefinitionImpl(utf16, var); + } +} + +int AddModule(const std::string& filename, + Fragments* definitions, + Fragments* initializers, + Fragments* registrations) { + Debug("AddModule %s start\n", filename.c_str()); + + int error = 0; + size_t file_size = GetFileSize(filename, &error); + if (error != 0) { + return error; + } + std::vector code = ReadFileSync(filename.c_str(), file_size, &error); + if (error != 0) { + return error; + } + std::string file_id = GetFileId(filename); + std::string var = GetVariableName(file_id); + + definitions->emplace_back(GetDefinition(var, code)); + + // Initializers of the BuiltinSourceMap: + // {"fs", UnionBytes{&fs_resource}}, + Fragment& init_buf = initializers->emplace_back(Fragment(256, 0)); + int init_size = snprintf(init_buf.data(), + init_buf.size(), + " {\"%s\", UnionBytes(&%s_resource) },", + file_id.c_str(), + var.c_str()); + init_buf.resize(init_size); + + // Registrations: + // registry->Register(&fs_resource); + Fragment& reg_buf = registrations->emplace_back(Fragment(256, 0)); + int reg_size = snprintf(reg_buf.data(), + reg_buf.size(), + " registry->Register(&%s_resource);", + var.c_str()); + reg_buf.resize(reg_size); + return 0; +} + +std::vector ReplaceAll(const std::vector& data, + const std::string& search, + const std::string& replacement) { + auto cur = data.begin(); + auto last = data.begin(); + std::vector result; + result.reserve(data.size()); + while ((cur = std::search(last, data.end(), search.begin(), search.end())) != + data.end()) { + result.insert(result.end(), last, cur); + result.insert(result.end(), + replacement.c_str(), + replacement.c_str() + replacement.size()); + last = cur + search.size(); + } + result.insert(result.end(), last, data.end()); + return result; +} + +std::vector StripComments(const std::vector& input) { + std::vector result; + result.reserve(input.size()); + + auto last_hash = input.cbegin(); + auto line_begin = input.cbegin(); + auto end = input.cend(); + while ((last_hash = std::find(line_begin, end, '#')) != end) { + result.insert(result.end(), line_begin, last_hash); + line_begin = std::find(last_hash, end, '\n'); + if (line_begin != end) { + line_begin += 1; + } + } + result.insert(result.end(), line_begin, end); + return result; +} + +// This is technically unused for our config.gypi, but just porting it here to +// mimic js2c.py. +std::vector JoinMultilineString(const std::vector& input) { + std::vector result; + result.reserve(input.size()); + + auto closing_quote = input.cbegin(); + auto last_inserted = input.cbegin(); + auto end = input.cend(); + std::string search = "'\n"; + while ((closing_quote = std::search( + last_inserted, end, search.begin(), search.end())) != end) { + if (closing_quote != last_inserted) { + result.insert(result.end(), last_inserted, closing_quote - 1); + last_inserted = closing_quote - 1; + } + auto opening_quote = closing_quote + 2; + while (opening_quote != end && isspace(*opening_quote)) { + opening_quote++; + } + if (opening_quote == end) { + break; + } + if (*opening_quote == '\'') { + last_inserted = opening_quote + 1; + } else { + result.insert(result.end(), last_inserted, opening_quote); + last_inserted = opening_quote; + } + } + result.insert(result.end(), last_inserted, end); + return result; +} + +std::vector JSONify(const std::vector& code) { + // 1. Remove string comments + std::vector stripped = StripComments(code); + + // 2. join multiline strings + std::vector joined = JoinMultilineString(stripped); + + // 3. normalize string literals from ' into " + for (size_t i = 0; i < joined.size(); ++i) { + if (joined[i] == '\'') { + joined[i] = '"'; + } + } + + // 4. turn pseudo-booleans strings into Booleans + std::vector result3 = ReplaceAll(joined, R"("true")", "true"); + std::vector result4 = ReplaceAll(result3, R"("false")", "false"); + + return result4; +} + +int AddGypi(const std::string& var, + const std::string& filename, + Fragments* definitions) { + Debug("AddGypi %s start\n", filename.c_str()); + + int error = 0; + size_t file_size = GetFileSize(filename, &error); + if (error != 0) { + return error; + } + std::vector code = ReadFileSync(filename.c_str(), file_size, &error); + if (error != 0) { + return error; + } + assert(var == "config"); + + std::vector transformed = JSONify(code); + definitions->emplace_back(GetDefinition(var, transformed)); + return 0; +} + +int JS2C(const FileList& js_files, + const FileList& mjs_files, + const std::string& config, + const std::string& dest) { + Fragments defintions; + defintions.reserve(js_files.size() + mjs_files.size() + 1); + Fragments initializers; + initializers.reserve(js_files.size() + mjs_files.size()); + Fragments registrations; + registrations.reserve(js_files.size() + mjs_files.size() + 1); + + for (const auto& filename : js_files) { + int r = AddModule(filename, &defintions, &initializers, ®istrations); + if (r != 0) { + return r; + } + } + for (const auto& filename : mjs_files) { + int r = AddModule(filename, &defintions, &initializers, ®istrations); + if (r != 0) { + return r; + } + } + + assert(config == "config.gypi"); + // "config.gypi" -> config_raw. + int r = AddGypi("config", config, &defintions); + if (r != 0) { + return r; + } + Fragment out = Format(defintions, initializers, registrations); + return WriteIfChanged(out, dest); +} + +int PrintUsage(const char* argv0) { + fprintf(stderr, + "Usage: %s [--verbose] [--root /path/to/project/root] " + "path/to/output.cc path/to/directory " + "[extra-files ...]\n", + argv0); + return 1; +} + +int Main(int argc, char* argv[]) { + if (argc < 3) { + return PrintUsage(argv[0]); + } + + std::vector args; + args.reserve(argc); + std::string root_dir; + for (int i = 1; i < argc; ++i) { + std::string arg(argv[i]); + if (arg == "--verbose") { + is_verbose = true; + } else if (arg == "--root") { + if (i == argc - 1) { + fprintf(stderr, "--root must be followed by a path\n"); + return 1; + } + root_dir = argv[++i]; + } else { + args.emplace_back(argv[i]); + } + } + + if (args.size() < 2) { + return PrintUsage(argv[0]); + } + + if (!root_dir.empty()) { + int r = uv_chdir(root_dir.c_str()); + if (r != 0) { + fprintf(stderr, "Cannot switch to the directory specified by --root\n"); + PrintUvError("chdir", root_dir.c_str(), r); + return 1; + } + } + std::string output = args[0]; + + FileMap file_map; + for (size_t i = 1; i < args.size(); ++i) { + int error = 0; + const std::string& file = args[i]; + if (IsDirectory(file, &error)) { + if (!SearchFiles(file, &file_map, std::string(kJsSuffix)) || + !SearchFiles(file, &file_map, std::string(kMjsSuffix))) { + return 1; + } + } else if (error != 0) { + return 1; + } else { // It's a file. + std::string_view extension = HasAllowedExtensions(file); + if (extension.size() != 0) { + auto it = file_map.insert({std::string(extension), FileList()}).first; + it->second.push_back(file); + } else { + fprintf(stderr, "Unsupported file: %s\n", file.c_str()); + return 1; + } + } + } + + // Should have exactly 3 types: `.js`, `.mjs` and `.gypi`. + assert(file_map.size() == 3); + auto gypi_it = file_map.find(".gypi"); + std::string config = "config.gypi"; + // Currently config.gypi is the only `.gypi` file allowed + if (gypi_it == file_map.end() || gypi_it->second.size() != 1 || + gypi_it->second[0] != config) { + fprintf( + stderr, + "Arguments should contain one and only one .gypi file: config.gypi\n"); + return 1; + } + auto js_it = file_map.find(".js"); + auto mjs_it = file_map.find(".mjs"); + assert(js_it != file_map.end() && mjs_it != file_map.end()); + + std::sort(js_it->second.begin(), js_it->second.end()); + std::sort(mjs_it->second.begin(), mjs_it->second.end()); + + return JS2C(js_it->second, mjs_it->second, config, output); +} +} // namespace js2c +} // namespace node + +NODE_MAIN(int argc, node::argv_type raw_argv[]) { + char** argv; + node::FixupMain(argc, raw_argv, &argv); + return node::js2c::Main(argc, argv); +} diff --git a/tools/js2c.py b/tools/js2c.py deleted file mode 100755 index ff31f182746053..00000000000000 --- a/tools/js2c.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2006-2008 the V8 project authors. All rights reserved. -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# * Neither the name of Google Inc. nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -This is a utility for converting JavaScript source code into uint16_t[], -that are used for embedding JavaScript code into the Node.js binary. -""" -import argparse -import os -import re -import functools -import codecs -import utils - -def ReadFile(filename): - if is_verbose: - print(filename) - with codecs.open(filename, "r", "utf-8") as f: - lines = f.read() - return lines - - -TEMPLATE = """ -#include "env-inl.h" -#include "node_builtins.h" -#include "node_external_reference.h" -#include "node_internals.h" - -namespace node {{ - -namespace builtins {{ - -{0} - -namespace {{ -const ThreadsafeCopyOnWrite global_source_map {{ - BuiltinSourceMap{{ {1} }} -}}; -}} - -void BuiltinLoader::LoadJavaScriptSource() {{ - source_ = global_source_map; -}} - -void RegisterExternalReferencesForInternalizedBuiltinCode( - ExternalReferenceRegistry* registry) {{ - {2} -}} - -UnionBytes BuiltinLoader::GetConfig() {{ - return UnionBytes(&{3}); -}} - -}} // namespace builtins - -}} // namespace node -""" - -ONE_BYTE_STRING = """ -static const uint8_t {0}[] = {{ -{1} -}}; - -static StaticExternalOneByteResource {2}({0}, {3}, nullptr); -""" - -TWO_BYTE_STRING = """ -static const uint16_t {0}[] = {{ -{1} -}}; - -static StaticExternalTwoByteResource {2}({0}, {3}, nullptr); -""" - -INITIALIZER = '{{"{0}", UnionBytes(&{1}) }},' - -REGISTRATION = 'registry->Register(&{0});' - -CONFIG_GYPI_ID = 'config_raw' - -CONFIG_GYPI_RESOURCE_ID = 'config_resource' - -SLUGGER_RE = re.compile(r'[.\-/]') - -is_verbose = False - -def GetDefinition(var, source, resource_var, step=30): - template = ONE_BYTE_STRING - code_points = [ord(c) for c in source] - if any(c > 127 for c in code_points): - template = TWO_BYTE_STRING - # Treat non-ASCII as UTF-8 and encode as UTF-16 Little Endian. - encoded_source = bytearray(source, 'utf-16le') - code_points = [ - encoded_source[i] + (encoded_source[i + 1] * 256) - for i in range(0, len(encoded_source), 2) - ] - - # For easier debugging, align to the common 3 char for code-points. - elements_s = ['%3s' % x for x in code_points] - # Put no more then `step` code-points in a line. - slices = [elements_s[i:i + step] for i in range(0, len(elements_s), step)] - lines = [','.join(s) for s in slices] - array_content = ',\n'.join(lines) - length = len(code_points) - definition = template.format(var, array_content, resource_var, length) - - return definition - - -def AddModule(filename, definitions, initializers, registrations): - code = ReadFile(filename) - name = NormalizeFileName(filename) - slug = SLUGGER_RE.sub('_', name) - var = slug + '_raw' - resource_var = slug + '_resource' - definition = GetDefinition(var, code, resource_var) - initializer = INITIALIZER.format(name, resource_var) - registration = REGISTRATION.format(resource_var) - definitions.append(definition) - initializers.append(initializer) - registrations.append(registration) - -def NormalizeFileName(filename): - split = filename.split('/') - if split[0] == 'deps': - split = ['internal'] + split - else: # `lib/**/*.js` so drop the 'lib' part - split = split[1:] - if len(split): - filename = '/'.join(split) - return os.path.splitext(filename)[0] - - -def JS2C(source_files, target): - # Build source code lines - definitions = [] - initializers = [] - registrations = [] - - for filename in source_files['.js']: - AddModule(filename, definitions, initializers, registrations) - for filename in source_files['.mjs']: - AddModule(filename, definitions, initializers, registrations) - - config_def = handle_config_gypi(source_files['config.gypi']) - definitions.append(config_def) - - # Emit result - definitions = ''.join(definitions) - initializers = '\n '.join(initializers) - registrations = '\n '.join(registrations) - out = TEMPLATE.format(definitions, initializers, - registrations, CONFIG_GYPI_RESOURCE_ID) - write_if_chaged(out, target) - - -def handle_config_gypi(config_filename): - # if its a gypi file we're going to want it as json - # later on anyway, so get it out of the way now - config = ReadFile(config_filename) - config = jsonify(config) - config_def = GetDefinition(CONFIG_GYPI_ID, config, CONFIG_GYPI_RESOURCE_ID) - return config_def - - -def jsonify(config): - # 1. string comments - config = re.sub(r'#.*?\n', '', config) - # 2. join multiline strings - config = re.sub(r"'$\s+'", '', config, flags=re.M) - # 3. normalize string literals from ' into " - config = re.sub('\'', '"', config) - # 4. turn pseudo-booleans strings into Booleans - config = re.sub('"true"', 'true', config) - config = re.sub('"false"', 'false', config) - return config - - -def write_if_chaged(content, target): - if os.path.exists(target): - with open(target, 'rt') as existing: - old_content = existing.read() - else: - old_content = '' - if old_content == content: - os.utime(target, None) - return - with open(target, "wt") as output: - output.write(content) - - -def SourceFileByExt(files_by_ext, filename): - """ - :type files_by_ext: dict - :type filename: str - :rtype: dict - """ - ext = os.path.splitext(filename)[-1] - files_by_ext.setdefault(ext, []).append(filename) - return files_by_ext - -def main(): - parser = argparse.ArgumentParser( - description='Convert code files into `uint16_t[]`s', - fromfile_prefix_chars='@' - ) - parser.add_argument('--target', help='output file') - parser.add_argument( - '--directory', - default=None, - help='input file directory') - parser.add_argument( - '--root', - default=None, - help='root directory containing the sources') - parser.add_argument('--verbose', action='store_true', help='output file') - parser.add_argument('sources', nargs='*', help='input files') - options = parser.parse_args() - global is_verbose - is_verbose = options.verbose - sources = options.sources - - if options.root is not None: - os.chdir(options.root) - - if options.directory is not None: - js_files = utils.SearchFiles(options.directory, 'js') - mjs_files = utils.SearchFiles(options.directory, 'mjs') - sources = js_files + mjs_files + options.sources - - source_files = functools.reduce(SourceFileByExt, sources, {}) - - # Should have exactly 3 types: `.js`, `.mjs` and `.gypi` - assert len(source_files) == 3 - # Currently config.gypi is the only `.gypi` file allowed - assert len(source_files['.gypi']) == 1 - assert os.path.basename(source_files['.gypi'][0]) == 'config.gypi' - source_files['config.gypi'] = source_files.pop('.gypi')[0] - JS2C(source_files, options.target) - - -if __name__ == "__main__": - main()