From 072c7d29e8baf10cde3ef13e2fc2f51aafeb4d0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vin=C3=ADcius=20Louren=C3=A7o?= Date: Sat, 3 Jun 2023 16:43:13 -0300 Subject: [PATCH] src,lib: reducing C++ calls of esm legacy main resolve Instead of many C++ calls, now we make only one C++ call to return a enum number that represents the selected state. --- benchmark/esm/esm-legacyMainResolve.js | 53 ++++++ lib/internal/modules/esm/resolve.js | 81 ++++----- src/node_errors.h | 4 + src/node_file.cc | 230 +++++++++++++++++++++++++ src/node_file.h | 29 ++++ 5 files changed, 353 insertions(+), 44 deletions(-) create mode 100644 benchmark/esm/esm-legacyMainResolve.js diff --git a/benchmark/esm/esm-legacyMainResolve.js b/benchmark/esm/esm-legacyMainResolve.js new file mode 100644 index 00000000000000..f5751e6840ff9a --- /dev/null +++ b/benchmark/esm/esm-legacyMainResolve.js @@ -0,0 +1,53 @@ +// Tests the impact on eager operations required for policies affecting +// general startup, does not test lazy operations +'use strict'; +const fs = require('node:fs'); +const path = require('node:path'); +const common = require('../common.js'); + +const tmpdir = require('../../test/common/tmpdir.js'); +const { pathToFileURL } = require('node:url'); + +const benchmarkDirectory = + path.resolve(tmpdir.path, 'benchmark-import-meta-resolve'); + +const configs = { + n: [1e4], + packageJsonUrl: [ + 'node_modules/test/package.json', + ], + packageConfigMain: ['', './index.js'], + resolvedFile: [ + 'node_modules/test/index.js', + 'node_modules/test/index.json', + 'node_modules/test/index.node', + 'node_modules/non-exist', + ], +}; + +const options = { + flags: ['--expose-internals'], +}; + +const bench = common.createBenchmark(main, configs, options); + +function main(conf) { + const { legacyMainResolve } = require('internal/modules/esm/resolve'); + tmpdir.refresh(); + + fs.mkdirSync(path.join(benchmarkDirectory, 'node_modules', 'test'), { recursive: true }); + fs.writeFileSync(path.join(benchmarkDirectory, conf.resolvedFile), '\n'); + + const packageJsonUrl = pathToFileURL(conf.packageJsonUrl); + const packageConfigMain = { main: conf.packageConfigMain }; + + bench.start(); + + for (let i = 0; i < conf.n; i++) { + try { + legacyMainResolve(packageJsonUrl, packageConfigMain, undefined); + } catch { /* empty */ } + } + + bench.end(conf.n); +} diff --git a/lib/internal/modules/esm/resolve.js b/lib/internal/modules/esm/resolve.js index 927b118f8ede2b..48273b04f80db8 100644 --- a/lib/internal/modules/esm/resolve.js +++ b/lib/internal/modules/esm/resolve.js @@ -36,14 +36,16 @@ const preserveSymlinksMain = getOptionValue('--preserve-symlinks-main'); const experimentalNetworkImports = getOptionValue('--experimental-network-imports'); const typeFlag = getOptionValue('--input-type'); -const { URL, pathToFileURL, fileURLToPath, isURL, toPathIfFileURL } = require('internal/url'); +const { URL, pathToFileURL, fileURLToPath, isURL } = require('internal/url'); const { canParse: URLCanParse } = internalBinding('url'); +const { legacyMainResolve: URLLegacyMainResolve } = internalBinding('fs'); const { ERR_INPUT_TYPE_NOT_ALLOWED, ERR_INVALID_ARG_TYPE, ERR_INVALID_MODULE_SPECIFIER, ERR_INVALID_PACKAGE_CONFIG, ERR_INVALID_PACKAGE_TARGET, + ERR_INVALID_URL, ERR_MANIFEST_DEPENDENCY_MISSING, ERR_MODULE_NOT_FOUND, ERR_PACKAGE_IMPORT_NOT_DEFINED, @@ -132,13 +134,18 @@ function emitLegacyIndexDeprecation(url, packageJSONUrl, base, main) { const realpathCache = new SafeMap(); -/** - * @param {string | URL} url - * @returns {boolean} - */ -function fileExists(url) { - return internalModuleStat(toNamespacedPath(toPathIfFileURL(url))) === 0; -} +const mainResolveExtensions = [ + '', + '.js', + '.json', + '.node', + '/index.js', + '/index.json', + '/index.node', + './index.js', + './index.json', + './index.node', +]; /** * Legacy CommonJS main resolution: @@ -153,44 +160,29 @@ function fileExists(url) { * @returns {URL} */ function legacyMainResolve(packageJSONUrl, packageConfig, base) { - let guess; - if (packageConfig.main !== undefined) { - // Note: fs check redundances will be handled by Descriptor cache here. - if (fileExists(guess = new URL(`./${packageConfig.main}`, - packageJSONUrl))) { - return guess; - } else if (fileExists(guess = new URL(`./${packageConfig.main}.js`, - packageJSONUrl))); - else if (fileExists(guess = new URL(`./${packageConfig.main}.json`, - packageJSONUrl))); - else if (fileExists(guess = new URL(`./${packageConfig.main}.node`, - packageJSONUrl))); - else if (fileExists(guess = new URL(`./${packageConfig.main}/index.js`, - packageJSONUrl))); - else if (fileExists(guess = new URL(`./${packageConfig.main}/index.json`, - packageJSONUrl))); - else if (fileExists(guess = new URL(`./${packageConfig.main}/index.node`, - packageJSONUrl))); - else guess = undefined; - if (guess) { - emitLegacyIndexDeprecation(guess, packageJSONUrl, base, - packageConfig.main); - return guess; + const resolvedOption = URLLegacyMainResolve(packageJSONUrl.href, packageConfig.main); + + // less than zero means that happened some error + // -2: LegacyMainResolveReturnType::kInvalidUrl + // -1: LegacyMainResolveReturnType::kModuleNotFound + if (resolvedOption < 0) { + if (resolvedOption === -2) { + throw new ERR_INVALID_URL(packageConfig.main || './index.js'); + } else { + throw new ERR_MODULE_NOT_FOUND( + fileURLToPath(new URL('.', packageJSONUrl)), fileURLToPath(base)); } - // Fallthrough. } - if (fileExists(guess = new URL('./index.js', packageJSONUrl))); - // So fs. - else if (fileExists(guess = new URL('./index.json', packageJSONUrl))); - else if (fileExists(guess = new URL('./index.node', packageJSONUrl))); - else guess = undefined; - if (guess) { - emitLegacyIndexDeprecation(guess, packageJSONUrl, base, packageConfig.main); - return guess; - } - // Not found. - throw new ERR_MODULE_NOT_FOUND( - fileURLToPath(new URL('.', packageJSONUrl)), fileURLToPath(base)); + + // 0-6: when packageConfig.main is defined + // 7-9: when packageConfig.main is NOT defined, + // or when the previous case didn't found the file + const baseUrl = resolvedOption <= 6 ? `./${packageConfig.main}` : ''; + const guess = new URL(baseUrl + mainResolveExtensions[resolvedOption], packageJSONUrl); + + emitLegacyIndexDeprecation(guess, packageJSONUrl, base, packageConfig.main); + + return guess; } const encodedSepRegEx = /%2F|%5C/i; @@ -1078,6 +1070,7 @@ module.exports = { packageExportsResolve, packageImportsResolve, throwIfInvalidParentURL, + legacyMainResolve, }; // cycle diff --git a/src/node_errors.h b/src/node_errors.h index ddb87df20ef4af..b2d634c3dff56d 100644 --- a/src/node_errors.h +++ b/src/node_errors.h @@ -68,11 +68,14 @@ void AppendExceptionLine(Environment* env, V(ERR_INVALID_ARG_VALUE, TypeError) \ V(ERR_OSSL_EVP_INVALID_DIGEST, Error) \ V(ERR_INVALID_ARG_TYPE, TypeError) \ + V(ERR_INVALID_FILE_URL_HOST, TypeError) \ + V(ERR_INVALID_FILE_URL_PATH, TypeError) \ V(ERR_INVALID_OBJECT_DEFINE_PROPERTY, TypeError) \ V(ERR_INVALID_MODULE, Error) \ V(ERR_INVALID_STATE, Error) \ V(ERR_INVALID_THIS, TypeError) \ V(ERR_INVALID_TRANSFER_OBJECT, TypeError) \ + V(ERR_INVALID_URL_SCHEME, TypeError) \ V(ERR_MEMORY_ALLOCATION_FAILED, Error) \ V(ERR_MESSAGE_TARGET_CONTEXT_UNAVAILABLE, Error) \ V(ERR_MISSING_ARGS, TypeError) \ @@ -163,6 +166,7 @@ ERRORS_WITH_CODE(V) V(ERR_INVALID_STATE, "Invalid state") \ V(ERR_INVALID_THIS, "Value of \"this\" is the wrong type") \ V(ERR_INVALID_TRANSFER_OBJECT, "Found invalid object in transferList") \ + V(ERR_INVALID_URL_SCHEME, "The URL must be of scheme file:") \ V(ERR_MEMORY_ALLOCATION_FAILED, "Failed to allocate memory") \ V(ERR_OSSL_EVP_INVALID_DIGEST, "Invalid digest used") \ V(ERR_MESSAGE_TARGET_CONTEXT_UNAVAILABLE, \ diff --git a/src/node_file.cc b/src/node_file.cc index 5a92432019dbb1..895431dc426dfb 100644 --- a/src/node_file.cc +++ b/src/node_file.cc @@ -19,11 +19,14 @@ // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE // USE OR OTHER DEALINGS IN THE SOFTWARE. #include "node_file.h" // NOLINT(build/include_inline) +#include "ada.h" #include "aliased_buffer-inl.h" #include "memory_tracker-inl.h" #include "node_buffer.h" +#include "node_errors.h" #include "node_external_reference.h" #include "node_file-inl.h" +#include "node_metadata.h" #include "node_process-inl.h" #include "node_stat_watcher.h" #include "permission/permission.h" @@ -2727,6 +2730,229 @@ static void Mkdtemp(const FunctionCallbackInfo& args) { } } +static bool FileURLToPathImpl(Environment* env, + ada::url_aggregator* file_url, + std::unique_ptr& result_file_path) { + if (file_url->type != ada::scheme::FILE) { + env->isolate()->ThrowException(ERR_INVALID_URL_SCHEME(env->isolate())); + + return false; + } + + std::string_view pathname = file_url->get_pathname(); +#if _WIN_32 + std::string pathname_escaped_slash; + + for (size_t i = 0; (i + 2) < pathname.size(); ++i) { + pathname_escaped_slash += pathname[i]; + + if (pathname[i] == '\\') pathname_escaped_slash += '\\'; + + if (pathname[i] != '%') continue; + + char third = pathname[i + 2] | 0x20; + + bool is_slash = pathname[i + 1] == '2' && third == 102; // 2f 2F / + bool is_forward_slash = pathname[i + 1] == '5' && third == 99; // 5c 5C \ + + if (!is_slash && !is_forward_slash) continue; + + env->isolate()->ThrowException(ERR_INVALID_FILE_URL_PATH( + env->isolate(), + "File URL path must not include encoded \\ or / characters")); + + return false; + } + + std::string decoded_pathname = ada::unicode::percent_decode( + std::string_view(pathname_escaped_slash), pathname_escaped_slash.size()); + + if (hostname.size() > 0) { + // If hostname is set, then we have a UNC path + // Pass the hostname through domainToUnicode just in case + // it is an IDN using punycode encoding. We do not need to worry + // about percent encoding because the URL parser will have + // already taken care of that for us. Note that this only + // causes IDNs with an appropriate `xn--` prefix to be decoded. + *result_file_path = + "\\\\" + ada::unicode::to_unicode(hostname) + decoded_pathname; + + return true; + } + + char letter = decoded_pathname[1] | 0x20; + char sep = decoded_pathname[2]; + + // a..z A..Z + if (letter < 'a' || letter > 'z' || sep != ':') { + env->isolate()->ThrowException(ERR_INVALID_FILE_URL_PATH( + env->isolate(), "File URL path must be absolute")); + + return false; + } + + *result_file_path = decoded_pathname.substr(1); + + return true; +#else + std::string_view hostname = file_url->get_hostname(); + + if (hostname.size() > 0) { + std::string error_message = + std::string("File URL host must be \"localhost\" or empty on ") + + std::string(per_process::metadata.platform); + env->isolate()->ThrowException( + ERR_INVALID_FILE_URL_HOST(env->isolate(), error_message.c_str())); + + return false; + } + + int first_percent = 0; + + for (std::string::size_type i = 0; (i + 2) < pathname.size(); ++i) { + if (pathname[i] != '%') continue; + + if (first_percent == 0) { + first_percent = i; + } + + if (pathname[i + 1] != '2' || (pathname[i + 2] | 0x20) != 102) continue; + + env->isolate()->ThrowException(ERR_INVALID_FILE_URL_PATH( + env->isolate(), "File URL path must not include encoded / characters")); + + return false; + } + + *result_file_path = ada::unicode::percent_decode(pathname, pathname.size()); + + return true; +#endif +} + +FilePathIsFileReturnType FilePathIsFile(Environment* env, + const std::string& file_path) { + THROW_IF_INSUFFICIENT_PERMISSIONS( + env, + permission::PermissionScope::kFileSystemRead, + file_path, + FilePathIsFileReturnType::kThrowInsufficientPermissions); + + uv_fs_t req; + + int rc = uv_fs_stat(env->event_loop(), &req, file_path.c_str(), nullptr); + + if (rc == 0) { + const uv_stat_t* const s = static_cast(req.ptr); + rc = !!(s->st_mode & S_IFDIR); + } + + uv_fs_req_cleanup(&req); + + // rc is 0 if the path refers to a file + if (rc == 0) return FilePathIsFileReturnType::kIsFile; + + return FilePathIsFileReturnType::kIsNotFile; +} + +static void LegacyMainResolve(const FunctionCallbackInfo& args) { + CHECK_GE(args.Length(), 1); + // packageJSONUrl + // /home/user/project/package.json + CHECK(args[0]->IsString()); + + Environment* env = Environment::GetCurrent(args); + + std::string_view base_path = + Utf8Value(env->isolate(), args[0].As()).ToStringView(); + auto base = ada::parse(base_path); + + if (!base) { + return args.GetReturnValue().Set(LegacyMainResolveReturnType::kInvalidUrl); + } + + ada::url_aggregator* base_pointer = &base.value(); + ada::url_aggregator* file_path_agg; + ada::result file_path_url; + std::unique_ptr initial_file_path; + std::string file_path; + + // packageConfig.main + // ./index.js + // ./lib/index.js + if (args[1]->IsString()) { + std::string package_config_main = + Utf8Value(env->isolate(), args[1].As()).ToString(); + + file_path_url = ada::parse( + std::string("./") + package_config_main, base_pointer); + + if (!file_path_url) { + return args.GetReturnValue().Set( + LegacyMainResolveReturnType::kInvalidUrl); + } + + file_path_agg = &file_path_url.value(); + + if (!FileURLToPathImpl(env, file_path_agg, initial_file_path)) return; + + FromNamespacedPath(&(*initial_file_path)); + + for (int i = 0; i < legacy_main_extensions_with_main_end; i++) { + file_path = *initial_file_path + legacy_main_extensions[i]; + + switch (FilePathIsFile(env, file_path)) { + case FilePathIsFileReturnType::kIsFile: + return args.GetReturnValue().Set(i); + case FilePathIsFileReturnType::kIsNotFile: + continue; + case FilePathIsFileReturnType::kThrowInsufficientPermissions: + // the default behavior when do not have permission is to return + // and exit the execution of the method as soon as possible + // the internal function will throw the exception + return; + default: + UNREACHABLE(); + } + } + } + + file_path_url = ada::parse("./index", base_pointer); + + if (!file_path_url) { + return args.GetReturnValue().Set(LegacyMainResolveReturnType::kInvalidUrl); + } + + file_path_agg = &file_path_url.value(); + + if (!FileURLToPathImpl(env, file_path_agg, initial_file_path)) return; + + FromNamespacedPath(&(*initial_file_path)); + + for (int i = legacy_main_extensions_with_main_end; + i < legacy_main_extensions_package_fallback_end; + i++) { + file_path = *initial_file_path + legacy_main_extensions[i]; + + switch (FilePathIsFile(env, file_path)) { + case FilePathIsFileReturnType::kIsFile: + return args.GetReturnValue().Set(i); + case FilePathIsFileReturnType::kIsNotFile: + continue; + case FilePathIsFileReturnType::kThrowInsufficientPermissions: + // the default behavior when do not have permission is to return + // and exit the execution of the method as soon as possible + // the internal function will throw the exception + return; + default: + UNREACHABLE(); + } + } + + return args.GetReturnValue().Set( + LegacyMainResolveReturnType::kModuleNotFound); +} + void BindingData::MemoryInfo(MemoryTracker* tracker) const { tracker->TrackField("stats_field_array", stats_field_array); tracker->TrackField("stats_field_bigint_array", stats_field_bigint_array); @@ -2872,6 +3098,8 @@ static void CreatePerIsolateProperties(IsolateData* isolate_data, SetMethod(isolate, target, "mkdtemp", Mkdtemp); + SetMethod(isolate, target, "legacyMainResolve", LegacyMainResolve); + StatWatcher::CreatePerIsolateProperties(isolate_data, target); target->Set( @@ -2988,6 +3216,8 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(Mkdtemp); registry->Register(NewFSReqCallback); + registry->Register(LegacyMainResolve); + registry->Register(FileHandle::New); registry->Register(FileHandle::Close); registry->Register(FileHandle::ReleaseFD); diff --git a/src/node_file.h b/src/node_file.h index 7b43d027a2e652..43d401df3047c1 100644 --- a/src/node_file.h +++ b/src/node_file.h @@ -55,6 +55,35 @@ enum class FsStatFsOffset { constexpr size_t kFsStatFsBufferLength = static_cast(FsStatFsOffset::kFsStatFsFieldsNumber); +// the possible file extensions that should be tested +// 0-6: when packageConfig.main is defined +// 7-9: when packageConfig.main is NOT defined, +// or when the previous case didn't found the file +const std::vector legacy_main_extensions = {"", + ".js", + ".json", + ".node", + "/index.js", + "/index.json", + "/index.node", + ".js", + ".json", + ".node"}; +// define the final index of the algorithm resolution +// when packageConfig.main is defined. +const uint8_t legacy_main_extensions_with_main_end = 7; +// define the final index of the algorithm resolution +// when packageConfig.main is NOT defined +const uint8_t legacy_main_extensions_package_fallback_end = 10; + +enum class FilePathIsFileReturnType { + kIsFile = 0, + kIsNotFile, + kThrowInsufficientPermissions +}; + +enum LegacyMainResolveReturnType { kInvalidUrl = -2, kModuleNotFound = -1 }; + class BindingData : public SnapshotableObject { public: struct InternalFieldInfo : public node::InternalFieldInfoBase {