Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

module: detect ESM syntax by trying to recompile as SourceTextModule #52413

Merged
merged 1 commit into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/internal/modules/esm/get_format.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
// but this gets called again from `defaultLoad`/`defaultLoadSync`.
if (getOptionValue('--experimental-detect-module')) {
const format = source ?
(containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs') :
(containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs') :
null;
if (format === 'module') {
// This module has a .js extension, a package.json with no `type` field, and ESM syntax.
Expand Down Expand Up @@ -158,7 +158,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
if (!source) { return null; }
const format = getFormatOfExtensionlessFile(url);
if (format === 'module') {
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
return containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs';
joyeecheung marked this conversation as resolved.
Show resolved Hide resolved
}
return format;
}
Expand Down
34 changes: 0 additions & 34 deletions lib/internal/modules/helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,6 @@ const {
} = require('internal/errors').codes;
const { BuiltinModule } = require('internal/bootstrap/realm');

const {
shouldRetryAsESM: contextifyShouldRetryAsESM,
constants: {
syntaxDetectionErrors: {
esmSyntaxErrorMessages,
throwsOnlyInCommonJSErrorMessages,
},
},
} = internalBinding('contextify');
const { validateString } = require('internal/validators');
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
const internalFS = require('internal/fs/utils');
Expand Down Expand Up @@ -329,30 +320,6 @@ function normalizeReferrerURL(referrerName) {
}


let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM
let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM
/**
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
* We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse
* throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical
* redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM.
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
* @param {string} source Module contents
*/
function shouldRetryAsESM(errorMessage, source) {
esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages);
if (esmSyntaxErrorMessagesSet.has(errorMessage)) {
return true;
}

throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages);
if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) {
return /** @type {boolean} */(contextifyShouldRetryAsESM(source));
}

return false;
}

/**
* @param {string|undefined} url URL to convert to filename
*/
Expand Down Expand Up @@ -382,7 +349,6 @@ module.exports = {
loadBuiltinModule,
makeRequireFunction,
normalizeReferrerURL,
shouldRetryAsESM,
stripBOM,
toRealPath,
hasStartedUserCJSExecution() {
Expand Down
26 changes: 16 additions & 10 deletions lib/internal/modules/run_main.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
'use strict';

const {
ObjectGetPrototypeOf,
StringPrototypeEndsWith,
SyntaxErrorPrototype,
globalThis,
} = primordials;

Expand Down Expand Up @@ -159,27 +161,29 @@ function runEntryPointWithESMLoader(callback) {
function executeUserEntryPoint(main = process.argv[1]) {
const resolvedMain = resolveMainPath(main);
const useESMLoader = shouldUseESMLoader(resolvedMain);

let mainURL;
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
let retryAsESM = false;
if (!useESMLoader) {
const cjsLoader = require('internal/modules/cjs/loader');
const { Module } = cjsLoader;
if (getOptionValue('--experimental-detect-module')) {
// TODO(joyeecheung): handle this in the CJS loader. Don't try-catch here.
try {
// Module._load is the monkey-patchable CJS module loader.
Module._load(main, null, true);
} catch (error) {
const source = cjsLoader.entryPointSource;
const { shouldRetryAsESM } = require('internal/modules/helpers');
retryAsESM = shouldRetryAsESM(error.message, source);
// In case the entry point is a large file, such as a bundle,
// ensure no further references can prevent it being garbage-collected.
cjsLoader.entryPointSource = undefined;
if (error != null && ObjectGetPrototypeOf(error) === SyntaxErrorPrototype) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential follow-up: Do we control the error being thrown? If so, I'm wondering if we should create a sub-class like MismatchedSyntaxError 🤔

Copy link
Member Author

@joyeecheung joyeecheung Apr 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The follow up is just don't do the check in JS, and check right after parsing in C++ :)

const { shouldRetryAsESM } = internalBinding('contextify');
const mainPath = resolvedMain || main;
mainURL = pathToFileURL(mainPath).href;
joyeecheung marked this conversation as resolved.
Show resolved Hide resolved
retryAsESM = shouldRetryAsESM(error.message, cjsLoader.entryPointSource, mainURL);
// In case the entry point is a large file, such as a bundle,
// ensure no further references can prevent it being garbage-collected.
cjsLoader.entryPointSource = undefined;
}
if (!retryAsESM) {
const { enrichCJSError } = require('internal/modules/esm/translators');
enrichCJSError(error, source, resolvedMain);
Comment on lines -181 to -182
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is removing these lines not causing any tests to break? There are tests that check the output of this “enriched” error that I’d think should be failing without these lines.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those only check part of the sentence because the rest keeps changing (typically it's not a good idea to check all of the message, either). The error is now "enriched" in C++.

Copy link
Member Author

@joyeecheung joyeecheung Apr 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I was talking about my followup. In this branch, this part is already just unreachable, even before this patch, because if !retryAsESM, then the code doesn't contains ESM syntax, then the error won't be "enriched" by enrichCJSError() anyway (i.e. skipping inside enrichCJSError() already).

throw error;
}
}
Expand All @@ -190,7 +194,9 @@ function executeUserEntryPoint(main = process.argv[1]) {

if (useESMLoader || retryAsESM) {
const mainPath = resolvedMain || main;
const mainURL = pathToFileURL(mainPath).href;
if (mainURL === undefined) {
mainURL = pathToFileURL(mainPath).href;
}
joyeecheung marked this conversation as resolved.
Show resolved Hide resolved

runEntryPointWithESMLoader((cascadedLoader) => {
// Note that if the graph contains unsettled TLA, this may never resolve
Expand Down
152 changes: 100 additions & 52 deletions src/module_wrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,17 @@ v8::Maybe<bool> ModuleWrap::CheckUnsettledTopLevelAwait() {
return v8::Just(false);
}

// new ModuleWrap(url, context, source, lineOffset, columnOffset, cachedData)
Local<PrimitiveArray> ModuleWrap::GetHostDefinedOptions(
Isolate* isolate, Local<Symbol> id_symbol) {
Local<PrimitiveArray> host_defined_options =
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
return host_defined_options;
}

// new ModuleWrap(url, context, source, lineOffset, columnOffset[, cachedData]);
// new ModuleWrap(url, context, source, lineOffset, columOffset,
// hostDefinedOption)
// idSymbol);
// new ModuleWrap(url, context, exportNames, evaluationCallback[, cjsModule])
void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
CHECK(args.IsConstructCall());
Expand Down Expand Up @@ -183,9 +191,10 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
// cjsModule])
CHECK(args[3]->IsFunction());
} else {
// new ModuleWrap(url, context, source, lineOffset, columOffset, cachedData)
// new ModuleWrap(url, context, source, lineOffset, columOffset[,
// cachedData]);
// new ModuleWrap(url, context, source, lineOffset, columOffset,
// hostDefinedOption)
// idSymbol);
CHECK(args[2]->IsString());
CHECK(args[3]->IsNumber());
line_offset = args[3].As<Int32>()->Value();
Expand All @@ -199,7 +208,7 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
} else {
id_symbol = Symbol::New(isolate, url);
}
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
host_defined_options = GetHostDefinedOptions(isolate, id_symbol);

if (that->SetPrivate(context,
realm->isolate_data()->host_defined_option_symbol(),
Expand Down Expand Up @@ -234,50 +243,34 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
module = Module::CreateSyntheticModule(
isolate, url, span, SyntheticModuleEvaluationStepsCallback);
} else {
ScriptCompiler::CachedData* cached_data = nullptr;
bool used_cache_from_user = false;
// When we are compiling for the default loader, this will be
// std::nullopt, and CompileSourceTextModule() should use
// on-disk cache.
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data;
if (id_symbol !=
realm->isolate_data()->source_text_module_default_hdo()) {
user_cached_data = nullptr;
}
if (args[5]->IsArrayBufferView()) {
DCHECK(!can_use_builtin_cache); // We don't use this option internally.
used_cache_from_user = true;
CHECK(!can_use_builtin_cache); // We don't use this option internally.
Local<ArrayBufferView> cached_data_buf = args[5].As<ArrayBufferView>();
uint8_t* data =
static_cast<uint8_t*>(cached_data_buf->Buffer()->Data());
cached_data =
user_cached_data =
new ScriptCompiler::CachedData(data + cached_data_buf->ByteOffset(),
cached_data_buf->ByteLength());
}

Local<String> source_text = args[2].As<String>();
ScriptOrigin origin(isolate,
url,
line_offset,
column_offset,
true, // is cross origin
-1, // script id
Local<Value>(), // source map URL
false, // is opaque (?)
false, // is WASM
true, // is ES Module
host_defined_options);

CompileCacheEntry* cache_entry = nullptr;
if (can_use_builtin_cache && realm->env()->use_compile_cache()) {
cache_entry = realm->env()->compile_cache_handler()->GetOrInsert(
source_text, url, CachedCodeType::kESM);
}
if (cache_entry != nullptr && cache_entry->cache != nullptr) {
// source will take ownership of cached_data.
cached_data = cache_entry->CopyCache();
}

ScriptCompiler::Source source(source_text, origin, cached_data);
ScriptCompiler::CompileOptions options;
if (source.GetCachedData() == nullptr) {
options = ScriptCompiler::kNoCompileOptions;
} else {
options = ScriptCompiler::kConsumeCodeCache;
}
if (!ScriptCompiler::CompileModule(isolate, &source, options)
bool cache_rejected = false;
if (!CompileSourceTextModule(realm,
source_text,
url,
line_offset,
column_offset,
host_defined_options,
user_cached_data,
&cache_rejected)
.ToLocal(&module)) {
if (try_catch.HasCaught() && !try_catch.HasTerminated()) {
CHECK(!try_catch.Message().IsEmpty());
Expand All @@ -291,18 +284,8 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
return;
}

bool cache_rejected = false;
if (options == ScriptCompiler::kConsumeCodeCache) {
cache_rejected = source.GetCachedData()->rejected;
}

if (cache_entry != nullptr) {
realm->env()->compile_cache_handler()->MaybeSave(
cache_entry, module, cache_rejected);
}

// If the cache comes from builtin compile cache, fail silently.
if (cache_rejected && used_cache_from_user) {
if (user_cached_data.has_value() && user_cached_data.value() != nullptr &&
cache_rejected) {
THROW_ERR_VM_MODULE_CACHED_DATA_REJECTED(
realm, "cachedData buffer was rejected");
try_catch.ReThrow();
Expand Down Expand Up @@ -345,6 +328,71 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
args.GetReturnValue().Set(that);
}

MaybeLocal<Module> ModuleWrap::CompileSourceTextModule(
Realm* realm,
Local<String> source_text,
Local<String> url,
int line_offset,
int column_offset,
Local<PrimitiveArray> host_defined_options,
std::optional<ScriptCompiler::CachedData*> user_cached_data,
bool* cache_rejected) {
Isolate* isolate = realm->isolate();
EscapableHandleScope scope(isolate);
ScriptOrigin origin(isolate,
url,
line_offset,
column_offset,
true, // is cross origin
-1, // script id
Local<Value>(), // source map URL
false, // is opaque (?)
false, // is WASM
true, // is ES Module
host_defined_options);
ScriptCompiler::CachedData* cached_data = nullptr;
CompileCacheEntry* cache_entry = nullptr;
// When compiling for the default loader, user_cached_data is std::nullptr.
// When compiling for vm.Module, it's either nullptr or a pointer to the
// cached data.
if (user_cached_data.has_value()) {
cached_data = user_cached_data.value();
} else if (realm->env()->use_compile_cache()) {
cache_entry = realm->env()->compile_cache_handler()->GetOrInsert(
source_text, url, CachedCodeType::kESM);
}

if (cache_entry != nullptr && cache_entry->cache != nullptr) {
// source will take ownership of cached_data.
cached_data = cache_entry->CopyCache();
}

ScriptCompiler::Source source(source_text, origin, cached_data);
ScriptCompiler::CompileOptions options;
if (cached_data == nullptr) {
options = ScriptCompiler::kNoCompileOptions;
} else {
options = ScriptCompiler::kConsumeCodeCache;
}

Local<Module> module;
if (!ScriptCompiler::CompileModule(isolate, &source, options)
.ToLocal(&module)) {
return scope.EscapeMaybe(MaybeLocal<Module>());
}

if (options == ScriptCompiler::kConsumeCodeCache) {
*cache_rejected = source.GetCachedData()->rejected;
}

if (cache_entry != nullptr) {
realm->env()->compile_cache_handler()->MaybeSave(
cache_entry, module, *cache_rejected);
}

return scope.Escape(module);
}

static Local<Object> createImportAttributesContainer(
Realm* realm,
Isolate* isolate,
Expand Down
21 changes: 20 additions & 1 deletion src/module_wrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@

#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#include <unordered_map>
#include <optional>
#include <string>
#include <unordered_map>
#include <vector>
#include "base_object.h"
#include "v8-script.h"

namespace node {

Expand Down Expand Up @@ -69,6 +71,23 @@ class ModuleWrap : public BaseObject {
return true;
}

static v8::Local<v8::PrimitiveArray> GetHostDefinedOptions(
v8::Isolate* isolate, v8::Local<v8::Symbol> symbol);

// When user_cached_data is not std::nullopt, use the code cache if it's not
// nullptr, otherwise don't use code cache.
// TODO(joyeecheung): when it is std::nullopt, use on-disk cache
// See: https://github.com/nodejs/node/issues/47472
static v8::MaybeLocal<v8::Module> CompileSourceTextModule(
Realm* realm,
v8::Local<v8::String> source_text,
v8::Local<v8::String> url,
int line_offset,
int column_offset,
v8::Local<v8::PrimitiveArray> host_defined_options,
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data,
bool* cache_rejected);

private:
ModuleWrap(Realm* realm,
v8::Local<v8::Object> object,
Expand Down
Loading