Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: handling of default and star exports #85

Merged
merged 1 commit into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 39 additions & 151 deletions hook.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
//
// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2021 Datadog, Inc.

const { randomBytes } = require('crypto')
const { URL } = require('url')
const specifiers = new Map()
const isWin = process.platform === 'win32'
Expand All @@ -20,7 +19,7 @@ let getExports
if (NODE_MAJOR >= 20 || (NODE_MAJOR === 18 && NODE_MINOR >= 19)) {
getExports = require('./lib/get-exports.js')
} else {
getExports = ({ url }) => import(url).then(Object.keys)
getExports = (url) => import(url).then(Object.keys)
}

function hasIitm (url) {
Expand Down Expand Up @@ -117,70 +116,37 @@ function isBareSpecifier (specifier) {
}

/**
* @typedef {object} ProcessedModule
* @property {string[]} imports A set of ESM import lines to be added to the
* shimmed module source.
* @property {string[]} namespaces A set of identifiers representing the
* modules in `imports`, e.g. for `import * as foo from 'bar'`, "foo" will be
* present in this array.
* @property {Map<string, string>} setters The shimmed setters for all the
* exports from the module and any transitive export all modules. The key is
* used to deduplicate conflicting exports, assigning a priority to `default`
* exports.
*/

/**
* Processes a module's exports and builds a set of new import statements,
* namespace names, and setter blocks. If an export all export if encountered,
* the target exports will be hoisted to the current module via a generated
* namespace.
* Processes a module's exports and builds a set of setter blocks.
*
* @param {object} params
* @param {string} params.srcUrl The full URL to the module to process.
* @param {object} params.context Provided by the loaders API.
* @param {Function} params.parentGetSource Provides the source code for the
* parent module.
* @param {string} [params.ns='namespace'] A string identifier that will be
* used as the namespace for the identifiers exported by the module.
* @param {string} [params.defaultAs='default'] The name to give the default
* identifier exported by the module (if one exists). This is really only
* useful in a recursive situation where a transitive module's default export
* needs to be renamed to the name of the module.
* @param {Function} params.parentGetSource Provides the source code for the parent module.
* @param {bool} params.excludeDefault Exclude the default export.
*
* @returns {Promise<ProcessedModule>}
* @returns {Promise<Map<string, string>>} The shimmed setters for all the exports
* from the module and any transitive export all modules.
*/
async function processModule ({
srcUrl,
context,
parentGetSource,
parentResolve,
ns = 'namespace',
defaultAs = 'default'
}) {
const exportNames = await getExports({
url: srcUrl,
context,
parentLoad: parentGetSource,
defaultAs
})
const imports = [`import * as ${ns} from ${JSON.stringify(srcUrl)}`]
const namespaces = [ns]

// As we iterate found module exports we will add setter code blocks
// to this map that will eventually be inserted into the shim module's
// source code. We utilize a map in order to prevent duplicate exports.
// As a consequence of default renaming, it is possible that a file named
// `foo.mjs` which has `export function foo() {}` and `export default foo`
// exports will result in the "foo" export being defined twice in our shim.
// The map allows us to avoid this situation at the cost of losing the
// named export in favor of the default export.
async function processModule ({ srcUrl, context, parentGetSource, parentResolve, excludeDefault }) {
const exportNames = await getExports(srcUrl, context, parentGetSource)
const duplicates = new Set()
const setters = new Map()

const addSetter = (name, setter) => {
// When doing an `import *` duplicates become undefined, so do the same
if (setters.has(name)) {
duplicates.add(name)
setters.delete(name)
} else if (!duplicates.has(name)) {
setters.set(name, setter)
}
}

for (const n of exportNames) {
if (n === 'default' && excludeDefault) continue

if (isStarExportLine(n) === true) {
const [, modFile] = n.split('* from ')
const normalizedModName = normalizeModName(modFile)
const modName = Buffer.from(modFile, 'hex') + Date.now() + randomBytes(4).toString('hex')

let modUrl
if (isBareSpecifier(modFile)) {
Expand All @@ -191,70 +157,28 @@ async function processModule ({
modUrl = new URL(modFile, srcUrl).href
}

const data = await processModule({
const setters = await processModule({
srcUrl: modUrl,
context,
parentGetSource,
parentResolve,
ns: `$${modName}`,
defaultAs: normalizedModName
excludeDefault: true
})
Array.prototype.push.apply(imports, data.imports)
Array.prototype.push.apply(namespaces, data.namespaces)
for (const [k, v] of data.setters.entries()) {
setters.set(k, v)
for (const [name, setter] of setters.entries()) {
addSetter(name, setter)
}

continue
}

const matches = /^rename (.+) as (.+)$/.exec(n)
if (matches !== null) {
// Transitive modules that export a default identifier need to have
// that identifier renamed to the name of module. And our shim setter
// needs to utilize that new name while being initialized from the
// corresponding origin namespace.
const renamedExport = matches[2]
setters.set(`$${renamedExport}${ns}`, `
let $${renamedExport} = ${ns}.default
export { $${renamedExport} as ${renamedExport} }
set.${renamedExport} = (v) => {
$${renamedExport} = v
} else {
addSetter(n, `
let $${n} = _.${n}
export { $${n} as ${n} }
set.${n} = (v) => {
$${n} = v
return true
}
`)
continue
}

setters.set(`$${n}` + ns, `
let $${n} = ${ns}.${n}
export { $${n} as ${n} }
set.${n} = (v) => {
$${n} = v
return true
}
`)
}

return { imports, namespaces, setters }
}

/**
* Given a module name, e.g. 'foo-bar' or './foo-bar.js', normalize it to a
* string that is a valid JavaScript identifier, e.g. `fooBar`. Normalization
* means converting kebab-case to camelCase while removing any path tokens and
* file extensions.
*
* @param {string} name The module name to normalize.
*
* @returns {string} The normalized identifier.
*/
function normalizeModName (name) {
return name
.split('/')
.pop()
.replace(/(.+)\.(?:js|mjs)$/, '$1')
.replaceAll(/(-.)/g, x => x[1].toUpperCase())
return setters
}

function addIitm (url) {
Expand Down Expand Up @@ -312,61 +236,25 @@ function createHook (meta) {
async function getSource (url, context, parentGetSource) {
if (hasIitm(url)) {
const realUrl = deleteIitm(url)
const { imports, namespaces, setters: mapSetters } = await processModule({
const setters = await processModule({
srcUrl: realUrl,
context,
parentGetSource,
parentResolve: cachedResolve
})
const setters = Array.from(mapSetters.values())

// When we encounter modules that re-export all identifiers from other
// modules, it is possible that the transitive modules export a default
// identifier. Due to us having to merge all transitive modules into a
// single common namespace, we need to recognize these default exports
// and remap them to a name based on the module name. This prevents us
// from overriding the top-level module's (the one actually being imported
// by some source code) default export when we merge the namespaces.
const renamedDefaults = setters
.map(s => {
const matches = /let \$(.+) = (\$.+)\.default/.exec(s)
if (matches === null) return undefined
return `_['${matches[1]}'] = ${matches[2]}.default`
})
.filter(s => s)

// The for loops are how we merge namespaces into a common namespace that
// can be proxied. We can't use a simple `Object.assign` style merging
// because transitive modules can export a default identifier that would
// override the desired default identifier. So we need to do manual
// merging with some logic around default identifiers.
//
// Additionally, we need to make sure any renamed default exports in
// transitive dependencies are added to the common namespace. This is
// accomplished through the `renamedDefaults` array.
return {
source: `
import { register } from '${iitmURL}'
${imports.join('\n')}
import * as namespace from ${JSON.stringify(realUrl)}

const namespaces = [${namespaces.join(', ')}]
// Mimic a Module object (https://tc39.es/ecma262/#sec-module-namespace-objects).
const _ = Object.create(null, { [Symbol.toStringTag]: { value: 'Module' } })
const _ = Object.assign(
Object.create(null, { [Symbol.toStringTag]: { value: 'Module' } }),
namespace
)
const set = {}

const primary = namespaces.shift()
for (const [k, v] of Object.entries(primary)) {
_[k] = v
}
for (const ns of namespaces) {
for (const [k, v] of Object.entries(ns)) {
if (k === 'default') continue
_[k] = v
}
}

${setters.join('\n')}
${renamedDefaults.join('\n')}
${Array.from(setters.values()).join('\n')}

register(${JSON.stringify(realUrl)}, _, set, ${JSON.stringify(specifiers.get(realUrl))})
`
Expand Down
29 changes: 4 additions & 25 deletions lib/get-esm-exports.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,21 @@ function warn (txt) {
* Utilizes an AST parser to interpret ESM source code and build a list of
* exported identifiers. In the baseline case, the list of identifiers will be
* the simple identifier names as written in the source code of the module.
* However, there are some special cases:
* However, there is a special case:
*
* 1. When an `export * from './foo.js'` line is encountered it is rewritten
* When an `export * from './foo.js'` line is encountered it is rewritten
* as `* from ./foo.js`. This allows the interpreting code to recognize a
* transitive export and recursively parse the indicated module. The returned
* identifier list will have "* from ./foo.js" as an item.
*
* 2. When `defaultAs` has a value other than 'default', the export line will
* be rewritten as `rename <identifier> as <defaultAsValue>`. This rename string
* will be an item in the returned identifier list.
*
* @param {object} params
* @param {string} params.moduleSource The source code of the module to parse
* and interpret.
* @param {string} [defaultAs='default'] When anything other than 'default' any
* `export default` lines will be rewritten utilizing the value provided. For
* example, if a module 'foo-bar.js' has the line `export default foo` and the
* value of this parameter is 'baz', then the export will be rewritten to
* `rename foo as baz`.
*
* @returns {string[]} The identifiers exported by the module along with any
* custom directives.
*/
function getEsmExports ({ moduleSource, defaultAs = 'default' }) {
function getEsmExports (moduleSource) {
const exportedNames = new Set()
const tree = parser.parse(moduleSource, acornOpts)
for (const node of tree.body) {
Expand All @@ -56,19 +47,7 @@ function getEsmExports ({ moduleSource, defaultAs = 'default' }) {
break

case 'ExportDefaultDeclaration': {
if (defaultAs === 'default') {
exportedNames.add('default')
break
}

if (node.declaration.type.toLowerCase() === 'identifier') {
// e.g. `export default foo`
exportedNames.add(`rename ${node.declaration.name} as ${defaultAs}`)
} else {
// e.g. `export function foo () {}
exportedNames.add(`rename ${node.declaration.id.name} as ${defaultAs}`)
}

exportedNames.add('default')
break
}

Expand Down
25 changes: 9 additions & 16 deletions lib/get-exports.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ async function getFullCjsExports (url, context, parentLoad, source) {
const ex = getCjsExports(source)
const full = Array.from(new Set([
...addDefault(ex.exports),
...(await Promise.all(ex.reexports.map(re => getExports(({
url: (/^(..?($|\/|\\))/).test(re)
...(await Promise.all(ex.reexports.map(re => getExports(
(/^(..?($|\/|\\))/).test(re)
? pathToFileURL(require.resolve(fileURLToPath(new URL(re, url)))).toString()
: pathToFileURL(require.resolve(re)).toString(),
context,
parentLoad
}))))).flat()
)))).flat()
]))

urlsBeingProcessed.delete(url)
Expand All @@ -38,25 +38,18 @@ async function getFullCjsExports (url, context, parentLoad, source) {
* source code for said module from the loader API, and parses the result
* for the entities exported from that module.
*
* @param {object} params
* @param {string} params.url A file URL string pointing to the module that
* @param {string} url A file URL string pointing to the module that
* we should get the exports of.
* @param {object} params.context Context object as provided by the `load`
* @param {object} context Context object as provided by the `load`
* hook from the loaders API.
* @param {Function} params.parentLoad Next hook function in the loaders API
* @param {Function} parentLoad Next hook function in the loaders API
* hook chain.
* @param {string} [defaultAs='default'] When anything other than 'default',
* will trigger remapping of default exports in ESM source files to the
* provided name. For example, if a submodule has `export default foo` and
* 'myFoo' is provided for this parameter, the export line will be rewritten
* to `rename foo as myFoo`. This is key to being able to support
* `export * from 'something'` exports.
*
* @returns {Promise<string[]>} An array of identifiers exported by the module.
* Please see {@link getEsmExports} for caveats on special identifiers that may
* be included in the result set.
*/
async function getExports ({ url, context, parentLoad, defaultAs = 'default' }) {
async function getExports (url, context, parentLoad) {
// `parentLoad` gives us the possibility of getting the source
// from an upstream loader. This doesn't always work though,
// so later on we fall back to reading it from disk.
Expand All @@ -77,15 +70,15 @@ async function getExports ({ url, context, parentLoad, defaultAs = 'default' })
}

if (format === 'module') {
return getEsmExports({ moduleSource: source, defaultAs })
return getEsmExports(source)
}
if (format === 'commonjs') {
return getFullCjsExports(url, context, parentLoad, source)
}

// At this point our `format` is either undefined or not known by us. Fall
// back to parsing as ESM/CJS.
const esmExports = getEsmExports({ moduleSource: source, defaultAs })
const esmExports = getEsmExports(source)
if (!esmExports.length) {
// TODO(bengl) it's might be possible to get here if somehow the format
// isn't set at first and yet we have an ESM module with no exports.
Expand Down
4 changes: 4 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,19 @@
"@babel/core": "^7.23.7",
"@babel/eslint-parser": "^7.23.3",
"@babel/plugin-syntax-import-assertions": "^7.23.3",
"@react-email/components": "^0.0.19",
"@types/node": "^18.0.6",
"c8": "^7.8.0",
"date-fns": "^3.6.0",
"eslint": "^8.55.0",
"eslint-config-standard": "^17.1.0",
"eslint-plugin-import": "^2.29.0",
"eslint-plugin-n": "^16.4.0",
"eslint-plugin-node": "^11.1.0",
"eslint-plugin-promise": "^6.1.1",
"got": "^14.3.0",
"imhotap": "^2.1.0",
"openai": "^4.47.2",
"ts-node": "^10.9.1",
"typescript": "^4.7.4"
},
Expand Down
Loading
Loading