Skip to content

Commit

Permalink
module: support require()ing synchronous ESM graphs
Browse files Browse the repository at this point in the history
This patch adds `require()` support for synchronous ESM graphs under
the flag --experimental-require-module.

This is based on the the following design aspect of ESM:

- The resolution can be synchronous (up to the host)
- The evaluation of a synchronous graph (without top-level await)
  is also synchronous, and, by the time the module graph is
  instantiated (before evaluation starts), this is is already known.

When the module being require()ed has .mjs extension or there are
other explicit indicators that it's an ES module, we load it as an
ES module. If the graph is synchronous, we return the module namespace
as the exports. If the graph contains top-level await, we throw an
error before evaluating the module. If an additional flag
--print-pending-tla is passed, we proceeds to evaluation but do not
run the microtasks, only to find out where the TLA is and print
their location to help users fix them.

If there are not explicit indicators whether a .js file is CJS or ESM,
we parse it as CJS first. If the parse error indicates that it contains
ESM syntax, we parse it again as ESM. If the second parsing succeeds,
we continue to treat it as ESM.
  • Loading branch information
joyeecheung committed Mar 6, 2024
1 parent 02cd257 commit 1ab2592
Show file tree
Hide file tree
Showing 17 changed files with 525 additions and 217 deletions.
1 change: 1 addition & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ doc/changelogs/CHANGELOG_v1*.md
!doc/changelogs/CHANGELOG_v18.md
!doc/api_assets/*.js
!.eslintrc.js
test/es-module/test-require-module-entry-point.js
17 changes: 17 additions & 0 deletions doc/api/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,22 @@ added: v11.8.0

Use the specified file as a security policy.

### `--experimental-require-module`

<!-- YAML
added: REPLACEME
-->

> Stability: 1.1 - Active Developement
Supports loading a synchronous ES module graph in `require()`. If the module
graph is not synchronous (contains top-level await), it throws an error.

By default, a `.js` file will be parsed as a CommonJS module first. If it
contains ES module syntax, Node.js will try to parse and evaluate the module
again as an ES module. If it turns out to be synchronous and can be evaluated
successfully, the module namespace object will be returned by `require()`.

### `--experimental-sea-config`

<!-- YAML
Expand Down Expand Up @@ -2523,6 +2539,7 @@ Node.js options that are allowed are:
* `--experimental-network-imports`
* `--experimental-permission`
* `--experimental-policy`
* `--experimental-require-module`
* `--experimental-shadow-realm`
* `--experimental-specifier-resolution`
* `--experimental-top-level-await`
Expand Down
115 changes: 83 additions & 32 deletions lib/internal/modules/cjs/loader.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ const {
StringPrototypeSlice,
StringPrototypeSplit,
StringPrototypeStartsWith,
Symbol,
} = primordials;

// Map used to store CJS parsing data.
Expand All @@ -76,6 +77,7 @@ module.exports = {
initializeCJS,
Module,
wrapSafe,
makeRequireWithPolicy,
};

const { BuiltinModule } = require('internal/bootstrap/realm');
Expand Down Expand Up @@ -107,7 +109,6 @@ const { safeGetenv } = internalBinding('credentials');
const {
privateSymbols: {
require_private_symbol,
host_defined_option_symbol,
},
} = internalBinding('util');
const {
Expand Down Expand Up @@ -161,6 +162,8 @@ let requireDepth = 0;
let isPreloading = false;
let statCache = null;

const is_main_symbol = Symbol('is-main-module');

/**
* Our internal implementation of `require`.
* @param {Module} module Parent module of what is being required
Expand Down Expand Up @@ -271,6 +274,7 @@ function Module(id = '', parent) {
setOwnProperty(this.__proto__, 'require', makeRequireFunction(this, redirects));
}
this[require_private_symbol] = internalRequire;
this[is_main_symbol] = false; // Set to true by the entry point handler.
}

/** @type {Record<string, Module>} */
Expand Down Expand Up @@ -396,6 +400,10 @@ function initializeCJS() {
// TODO(joyeecheung): deprecate this in favor of a proper hook?
Module.runMain =
require('internal/modules/run_main').executeUserEntryPoint;

if (getOptionValue('--experimental-require-module')) {
Module._extensions['.mjs'] = loadESMFromCJS;
}
}

// Given a module name, and a list of paths to test, returns the first
Expand Down Expand Up @@ -1010,6 +1018,7 @@ Module._load = function(request, parent, isMain) {
setOwnProperty(process, 'mainModule', module);
setOwnProperty(module.require, 'main', process.mainModule);
module.id = '.';
module[is_main_symbol] = true;
}

reportModuleToWatchMode(filename);
Expand Down Expand Up @@ -1270,57 +1279,95 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) {
);

// Cache the source map for the module if present.
if (script.sourceMapURL) {
maybeCacheSourceMap(filename, content, this, false, undefined, script.sourceMapURL);
const { sourceMapURL } = script;
if (sourceMapURL) {
maybeCacheSourceMap(filename, content, this, false, undefined, sourceMapURL);
}

return runScriptInThisContext(script, true, false);
return {
__proto__: null,
function: runScriptInThisContext(script, true, false),
sourceMapURL,
retryAsESM: false,
};
}

try {
const result = compileFunctionForCJSLoader(content, filename);
result.function[host_defined_option_symbol] = hostDefinedOptionId;

// cachedDataRejected is only set for cache coming from SEA.
if (codeCache &&
result.cachedDataRejected !== false &&
internalBinding('sea').isSea()) {
process.emitWarning('Code cache data rejected.');
}
const result = compileFunctionForCJSLoader(content, filename);

// Cache the source map for the module if present.
if (result.sourceMapURL) {
maybeCacheSourceMap(filename, content, this, false, undefined, result.sourceMapURL);
}
// cachedDataRejected is only set for cache coming from SEA.
if (codeCache &&
result.cachedDataRejected !== false &&
internalBinding('sea').isSea()) {
process.emitWarning('Code cache data rejected.');
}

return result.function;
} catch (err) {
if (process.mainModule === cjsModuleInstance) {
const { enrichCJSError } = require('internal/modules/esm/translators');
enrichCJSError(err, content, filename);
}
throw err;
// Cache the source map for the module if present.
if (result.sourceMapURL) {
maybeCacheSourceMap(filename, content, this, false, undefined, result.sourceMapURL);
}

return result;
}

// Resolve and evaluate as ESM, synchronously.
function loadESMFromCJS(mod, filename) {
const cascadedLoader = require('internal/modules/esm/loader').getOrInitializeCascadedLoader();
// Note that we are still using the CJS's path resolution here.
const parent = moduleParentCache.get(mod)?.filename;
const base = parent ? pathToFileURL(parent) : parent;
// console.log('loadESMFromCJS', mod, filename, base);
const specifier = mod[is_main_symbol] ? pathToFileURL(mod.filename) : mod.id;
const job = cascadedLoader.getModuleJobSync(specifier, base, kEmptyObject, 'from-cjs-error');
const { namespace } = job.runSync();
// TODO(joyeecheung): maybe we can do some special handling for default here. Maybe we don't.
mod.exports = namespace;
}

/**
* Create a require function for this module, apply policy if necessary.
* @param {Module} module
* @param {string} moduleURL
* @returns {Function}
*/
function makeRequireWithPolicy(module, moduleURL) {
const manifest = policy()?.manifest;
let redirects;
if (manifest) {
redirects = manifest.getDependencyMapper(moduleURL);
}
return makeRequireFunction(module, redirects);
}

/**
* Run the file contents in the correct scope or sandbox. Expose the correct helper variables (`require`, `module`,
* `exports`) to the file. Returns exception, if any.
* @param {string} content The source code of the module
* @param {string} filename The file path of the module
* @param {boolean} loadAsESM Whether it's known to be ESM - i.e. suffix is .mjs.
*/
Module.prototype._compile = function(content, filename) {
Module.prototype._compile = function(content, filename, loadAsESM = false) {
let moduleURL;
let redirects;
const manifest = policy()?.manifest;
if (manifest) {
moduleURL = pathToFileURL(filename);
redirects = manifest.getDependencyMapper(moduleURL);
manifest.assertIntegrity(moduleURL, content);
}

const compiledWrapper = wrapSafe(filename, content, this);
let compiledWrapper;
if (!loadAsESM) {
const result = wrapSafe(filename, content, this);
compiledWrapper = result.function;
loadAsESM = result.retryAsESM;
}

if (loadAsESM) {
loadESMFromCJS(this);
return;
}

// TODO(joyeecheung): the detection below is unnecessarily complex. Maybe just
// use the is_main_symbol, or a break_on_start_symbol that gets passed from
// higher level instead of doing hacky detecion here.
let inspectorWrapper = null;
if (getOptionValue('--inspect-brk') && process._eval == null) {
if (!resolvedArgv) {
Expand All @@ -1344,8 +1391,9 @@ Module.prototype._compile = function(content, filename) {
inspectorWrapper = internalBinding('inspector').callAndPauseOnStart;
}
}

const dirname = path.dirname(filename);
const require = makeRequireFunction(this, redirects);
const require = makeRequireWithPolicy(this, moduleURL);
let result;
const exports = this.exports;
const thisValue = exports;
Expand All @@ -1370,6 +1418,7 @@ Module.prototype._compile = function(content, filename) {
*/
Module._extensions['.js'] = function(module, filename) {
// If already analyzed the source, then it will be cached.
// TODO(joyeecheung): pass as buffer.
const cached = cjsParseCache.get(module);
let content;
if (cached?.source) {
Expand All @@ -1378,7 +1427,8 @@ Module._extensions['.js'] = function(module, filename) {
} else {
content = fs.readFileSync(filename, 'utf8');
}
if (StringPrototypeEndsWith(filename, '.js')) {
if (!getOptionValue('--experimental-require-module') &&
StringPrototypeEndsWith(filename, '.js')) {
const pkg = packageJsonReader.getNearestParentPackageJSON(filename);
// Function require shouldn't be used in ES modules.
if (pkg?.data.type === 'module') {
Expand Down Expand Up @@ -1414,7 +1464,8 @@ Module._extensions['.js'] = function(module, filename) {
throw err;
}
}
module._compile(content, filename);

module._compile(content, filename, false);
};

/**
Expand Down
44 changes: 29 additions & 15 deletions lib/internal/modules/esm/loader.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ const {
hardenRegExp,
} = primordials;

const assert = require('internal/assert');
const {
ERR_REQUIRE_ESM,
ERR_UNKNOWN_MODULE_FORMAT,
Expand Down Expand Up @@ -228,12 +229,12 @@ class ModuleLoader {
return this.getJobFromResolveResult(resolveResult, parentURL, importAttributes);
}

getModuleJobSync(specifier, parentURL, importAttributes) {
const resolveResult = this.resolveSync(specifier, parentURL, importAttributes);
return this.getJobFromResolveResult(resolveResult, parentURL, importAttributes, true);
getModuleJobSync(specifier, parentURL, importAttributes, requireESMHint) {
const resolveResult = this.resolveSync(specifier, parentURL, importAttributes, requireESMHint);
return this.getJobFromResolveResult(resolveResult, parentURL, importAttributes, true, requireESMHint);
}

getJobFromResolveResult(resolveResult, parentURL, importAttributes, sync) {
getJobFromResolveResult(resolveResult, parentURL, importAttributes, sync, requireESMHint) {
const { url, format } = resolveResult;
const resolvedImportAttributes = resolveResult.importAttributes ?? importAttributes;
let job = this.loadCache.get(url, resolvedImportAttributes.type);
Expand All @@ -244,7 +245,7 @@ class ModuleLoader {
}

if (job === undefined) {
job = this.#createModuleJob(url, resolvedImportAttributes, parentURL, format, sync);
job = this.#createModuleJob(url, resolvedImportAttributes, parentURL, format, sync, requireESMHint);
}

return job;
Expand All @@ -261,7 +262,7 @@ class ModuleLoader {
* `resolve` hook
* @returns {Promise<ModuleJob>} The (possibly pending) module job
*/
#createModuleJob(url, importAttributes, parentURL, format, sync) {
#createModuleJob(url, importAttributes, parentURL, format, sync, requireESMHint) {
const callTranslator = ({ format: finalFormat, responseURL, source }, isMain) => {
const translator = getTranslators().get(finalFormat);

Expand All @@ -274,7 +275,7 @@ class ModuleLoader {
const context = { format, importAttributes };

const moduleProvider = sync ?
(url, isMain) => callTranslator(this.loadSync(url, context), isMain) :
(url, isMain) => callTranslator(this.loadSync(url, context, requireESMHint), isMain) :
async (url, isMain) => callTranslator(await this.load(url, context), isMain);

const inspectBrk = (
Expand Down Expand Up @@ -358,26 +359,30 @@ class ModuleLoader {
* Just like `resolve` except synchronous. This is here specifically to support
* `import.meta.resolve` which must happen synchronously.
*/
resolveSync(originalSpecifier, parentURL, importAttributes) {
if (this.#customizations) {
resolveSync(originalSpecifier, parentURL, importAttributes, requireESMHint) {
// If this comes from the require(esm) fallback, don't apply loader hooks which are on
// a separate thread. This is ignored by require(cjs) already anyway.
// TODO(joyeecheung): add support in hooks for this?
if (this.#customizations && !requireESMHint) {
return this.#customizations.resolveSync(originalSpecifier, parentURL, importAttributes);
}
return this.defaultResolve(originalSpecifier, parentURL, importAttributes);
return this.defaultResolve(originalSpecifier, parentURL, importAttributes, requireESMHint);
}

/**
* Our `defaultResolve` is synchronous and can be used in both
* `resolve` and `resolveSync`. This function is here just to avoid
* repeating the same code block twice in those functions.
*/
defaultResolve(originalSpecifier, parentURL, importAttributes) {
defaultResolve(originalSpecifier, parentURL, importAttributes, requireESMHint) {
defaultResolve ??= require('internal/modules/esm/resolve').defaultResolve;

const context = {
__proto__: null,
conditions: this.#defaultConditions,
importAttributes,
parentURL,
requireESMHint,
};

return defaultResolve(originalSpecifier, context);
Expand All @@ -398,14 +403,23 @@ class ModuleLoader {
return result;
}

loadSync(url, context) {
loadSync(url, context, requireESMHint) {
defaultLoadSync ??= require('internal/modules/esm/load').defaultLoadSync;

let result = this.#customizations ?
const isRequireModuleAllowed = getOptionValue('--experimental-require-module');
if (requireESMHint === 'from-cjs-error') {
assert(isRequireModuleAllowed);
context.format = 'module';
}
// If this comes from the require(esm) fallback, don't apply loader hooks which are on
// a separate thread. This is ignored by require(cjs) already anyway.
// TODO(joyeecheung): add support in hooks for this?
let result = this.#customizations && !requireESMHint ?
this.#customizations.loadSync(url, context) :
defaultLoadSync(url, context);

// TODO(joyeecheung): we need a better way to detect the format and cache the result.
let format = result?.format;
if (format === 'module') {
if (format === 'module' && !isRequireModuleAllowed) {
throw new ERR_REQUIRE_ESM(url, true);
}
if (format === 'commonjs') {
Expand Down
Loading

0 comments on commit 1ab2592

Please sign in to comment.