From bdc8aff870c12c737a45086c295b151293261d86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Nison?= Date: Wed, 13 Oct 2021 18:29:23 +0200 Subject: [PATCH 1/8] Adds a loader for manifest loading --- doc/design/overview.md | 2 + doc/design/proposal-chaining-iterative.md | 52 ++++++++++++++++++++++ doc/design/proposal-chaining-middleware.md | 41 +++++++++++++++++ 3 files changed, 95 insertions(+) diff --git a/doc/design/overview.md b/doc/design/overview.md index bb81b13..d46e98d 100644 --- a/doc/design/overview.md +++ b/doc/design/overview.md @@ -4,6 +4,8 @@ There are currently [three loader hooks](https://github.com/nodejs/node/tree/mas 1. `resolve`: Takes a specifier (the string after `from` in an `import` statement) and converts it into an URL to be loaded. +1. `loadManifest`: Takes the resolved URL and returns the `package.json` from the location (or `null` if it doesn't exist). + 1. `load`: Takes the resolved URL and returns runnable code (JavaScript, Wasm, etc.) as well as the name of one of Node’s ESM loader’s [“translators”](https://github.com/nodejs/node/blob/master/lib/internal/modules/esm/translators.js): * `commonjs` * `module` diff --git a/doc/design/proposal-chaining-iterative.md b/doc/design/proposal-chaining-iterative.md index b3d75f3..538623f 100644 --- a/doc/design/proposal-chaining-iterative.md +++ b/doc/design/proposal-chaining-iterative.md @@ -282,3 +282,55 @@ const babelOutputToFormat = new Map([ ]); ``` + +## Chaining `loadManifest` hooks + +Say you had a chain of three loaders: + +* `zip` adds a virtual filesystem layer for in-zip access +* `tgz` does the same but for tgz archives +* `warc` does the same for warc archives. + +Following the pattern of `--require`: + +```console +node \ + --loader zip \ + --loader tgz \ + --loader warc +``` + +These would be called in the following sequence: + +(`zip` OR `defaultLoadManifest`) → `tgz` → `warc` + +1. `defaultLoadManifest` / `zip` needs to be first to know whether the manifest exists on the actual filesystem, which is fed to the subsequent loader +1. `tgz` receives the raw source from the previous loader and, if necessary, checks for the manifest existence via its own rules +1. `warc` does the same thing + +LoadManifest hooks would have the following signature: + +```ts +export async function loadManifest( + manifestUrl: string, // A URL that may or may not point to an existing + // location + interimResult: { // result from the previous hook + manifest: string | ArrayBuffer | TypedArray | null, // The content of the + // manifest, or `null` if it doesn't exist. + }, + context: { + conditions = string[], // Export conditions of the relevant package.json + parentUrl = null, // The module importing this one, or null if + // this is the Node entry point + }, + defaultLoadManifest: function, // Node's default load hook +): { + signals?: { // Signals from this hook to the ESMLoader + contextOverride?: object, // A new `context` argument for the next hook + interimIgnored?: true, // interimResult was intentionally ignored + shortCircuit?: true, // `resolve` chain should be terminated + }, + manifest: string | ArrayBuffer | TypedArray | null, // The content of the + // manifest, or `null` if it doesn't exist. +} { +``` diff --git a/doc/design/proposal-chaining-middleware.md b/doc/design/proposal-chaining-middleware.md index 41a481c..72d5163 100644 --- a/doc/design/proposal-chaining-middleware.md +++ b/doc/design/proposal-chaining-middleware.md @@ -263,3 +263,44 @@ export async function load( } ``` + +## Chaining `loadManifest` hooks + +Say you had a chain of three loaders: + +* `zip` adds a virtual filesystem layer for in-zip access +* `tgz` does the same but for tgz archives +* `warc` does the same for warc archives. + +Following the pattern of `--require`: + +```console +node \ + --loader zip \ + --loader tgz \ + --loader warc +``` + +These would be called in the following sequence: `zip` calls `tgz`, which calls `warc`. Or in JavaScript terms, `zip(tgz(warc(input)))`: + +Load hooks would have the following signature: + +```ts +export async function loadManifest( + manifestUrl: string, // A URL that may or may not point to an existing + // location + context: { + conditions = string[], // Export conditions of the relevant `package.json` + parentUrl = null, // The module importing this one, or null if + // this is the Node entry point + }, + next: function, // The subsequent `loadManifest` hook in the chain, + // or Node’s default `loadManifest` hook after the + // last user-supplied `loadManifest` hook +): { + manifest: string | ArrayBuffer | TypedArray | null, // The content of the + // manifest, or `null` if it doesn't exist. + shortCircuit?: true, // A signal that this hook intends to terminate + // the chain of `load` hooks +} { +``` From d97a9504aab67ecb343fedd93bf268987abfdc68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Nison?= Date: Fri, 12 Nov 2021 10:13:28 +0100 Subject: [PATCH 2/8] Renames into readFile / statFile --- doc/design/overview.md | 10 ++++++---- doc/design/proposal-chaining-iterative.md | 22 ++++++++++------------ doc/design/proposal-chaining-middleware.md | 20 +++++++++----------- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/doc/design/overview.md b/doc/design/overview.md index d46e98d..6a0cf40 100644 --- a/doc/design/overview.md +++ b/doc/design/overview.md @@ -1,11 +1,9 @@ # Loaders Design -There are currently [three loader hooks](https://github.com/nodejs/node/tree/master/doc/api/esm.html#esm_hooks): +There are currently the following [loader hooks](https://github.com/nodejs/node/tree/master/doc/api/esm.html#esm_hooks): 1. `resolve`: Takes a specifier (the string after `from` in an `import` statement) and converts it into an URL to be loaded. -1. `loadManifest`: Takes the resolved URL and returns the `package.json` from the location (or `null` if it doesn't exist). - 1. `load`: Takes the resolved URL and returns runnable code (JavaScript, Wasm, etc.) as well as the name of one of Node’s ESM loader’s [“translators”](https://github.com/nodejs/node/blob/master/lib/internal/modules/esm/translators.js): * `commonjs` * `module` @@ -13,7 +11,11 @@ There are currently [three loader hooks](https://github.com/nodejs/node/tree/mas * `json` (with `--experimental-json-modules`) * `wasm` (with `--experimental-wasm-modules`) -* `globalPreload`: Defines a string of JavaScript to be injected into the application global scope. +1. `statFile`: Takes the resolved URL and returns its [`fs.Stats` record](https://nodejs.org/api/fs.html#class-fsstats) (or `null` if it doesn't exist). + +1. `readFile`: Takes the resolved URL and returns its binary content (or `null` if it doesn't exist). + +1. `globalPreload`: Defines a string of JavaScript to be injected into the application global scope. ## Chaining diff --git a/doc/design/proposal-chaining-iterative.md b/doc/design/proposal-chaining-iterative.md index 538623f..bd4718e 100644 --- a/doc/design/proposal-chaining-iterative.md +++ b/doc/design/proposal-chaining-iterative.md @@ -283,7 +283,7 @@ const babelOutputToFormat = new Map([ ``` -## Chaining `loadManifest` hooks +## Chaining `readFile` hooks Say you had a chain of three loaders: @@ -302,26 +302,24 @@ node \ These would be called in the following sequence: -(`zip` OR `defaultLoadManifest`) → `tgz` → `warc` +(`zip` OR `defaultReadFile`) → `tgz` → `warc` -1. `defaultLoadManifest` / `zip` needs to be first to know whether the manifest exists on the actual filesystem, which is fed to the subsequent loader +1. `defaultReadFile` / `zip` needs to be first to know whether the manifest exists on the actual filesystem, which is fed to the subsequent loader 1. `tgz` receives the raw source from the previous loader and, if necessary, checks for the manifest existence via its own rules 1. `warc` does the same thing LoadManifest hooks would have the following signature: ```ts -export async function loadManifest( - manifestUrl: string, // A URL that may or may not point to an existing - // location +export async function readFile( + url: string, // A URL that point to a location; whether the file + // exists or not isn't guaranteed interimResult: { // result from the previous hook - manifest: string | ArrayBuffer | TypedArray | null, // The content of the - // manifest, or `null` if it doesn't exist. + data: string | ArrayBuffer | TypedArray | null, // The content of the + // file, or `null` if it doesn't exist. }, context: { conditions = string[], // Export conditions of the relevant package.json - parentUrl = null, // The module importing this one, or null if - // this is the Node entry point }, defaultLoadManifest: function, // Node's default load hook ): { @@ -330,7 +328,7 @@ export async function loadManifest( interimIgnored?: true, // interimResult was intentionally ignored shortCircuit?: true, // `resolve` chain should be terminated }, - manifest: string | ArrayBuffer | TypedArray | null, // The content of the - // manifest, or `null` if it doesn't exist. + data: string | ArrayBuffer | TypedArray | null, // The content of the + // file, or `null` if it doesn't exist. } { ``` diff --git a/doc/design/proposal-chaining-middleware.md b/doc/design/proposal-chaining-middleware.md index 72d5163..d30a287 100644 --- a/doc/design/proposal-chaining-middleware.md +++ b/doc/design/proposal-chaining-middleware.md @@ -264,7 +264,7 @@ export async function load( ``` -## Chaining `loadManifest` hooks +## Chaining `readFile` hooks Say you had a chain of three loaders: @@ -286,20 +286,18 @@ These would be called in the following sequence: `zip` calls `tgz`, which calls Load hooks would have the following signature: ```ts -export async function loadManifest( - manifestUrl: string, // A URL that may or may not point to an existing - // location +export async function readFile( + url: string, // A URL that point to a location; whether the file + // exists or not isn't guaranteed context: { conditions = string[], // Export conditions of the relevant `package.json` - parentUrl = null, // The module importing this one, or null if - // this is the Node entry point }, - next: function, // The subsequent `loadManifest` hook in the chain, - // or Node’s default `loadManifest` hook after the - // last user-supplied `loadManifest` hook + next: function, // The subsequent `readFile` hook in the chain, + // or Node’s default `readFile` hook after the + // last user-supplied `readFile` hook ): { - manifest: string | ArrayBuffer | TypedArray | null, // The content of the - // manifest, or `null` if it doesn't exist. + data: string | ArrayBuffer | TypedArray | null, // The content of the + // file, or `null` if it doesn't exist. shortCircuit?: true, // A signal that this hook intends to terminate // the chain of `load` hooks } { From 9ddc24935cf32899b39eb41e40397c80f67402a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Nison?= Date: Thu, 18 Nov 2021 10:46:11 +0100 Subject: [PATCH 3/8] Update doc/design/overview.md Co-authored-by: Derek Lewis --- doc/design/overview.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design/overview.md b/doc/design/overview.md index f6fc9f7..1d8027a 100644 --- a/doc/design/overview.md +++ b/doc/design/overview.md @@ -1,6 +1,6 @@ # Loaders Design -There are currently the following [loader hooks](https://github.com/nodejs/node/tree/master/doc/api/esm.html#hooks): +There are currently the following [loader hooks](https://github.com/nodejs/node/blob/master/doc/api/esm.md#hooks): 1. `resolve`: Takes a specifier (the string after `from` in an `import` statement) and converts it into an URL to be loaded. From cb72fc41692ca769174d9416c6718e67175afd79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Nison?= Date: Thu, 18 Nov 2021 10:59:09 +0100 Subject: [PATCH 4/8] Adds rational to the overview document --- doc/design/overview.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doc/design/overview.md b/doc/design/overview.md index 1d8027a..8578c2f 100644 --- a/doc/design/overview.md +++ b/doc/design/overview.md @@ -2,6 +2,8 @@ There are currently the following [loader hooks](https://github.com/nodejs/node/blob/master/doc/api/esm.md#hooks): +## Basic hooks + 1. `resolve`: Takes a specifier (the string after `from` in an `import` statement) and converts it into an URL to be loaded. 1. `load`: Takes the resolved URL and returns runnable code (JavaScript, Wasm, etc.) as well as the name of one of Node’s ESM loader’s [“translators”](https://github.com/nodejs/node/blob/master/lib/internal/modules/esm/translators.js): @@ -11,10 +13,20 @@ There are currently the following [loader hooks](https://github.com/nodejs/node/ * `json` (with `--experimental-json-modules`) * `wasm` (with `--experimental-wasm-modules`) +## Filesystem hooks + +The Node resolution algorithms may rely on various filesystem operations in order to return definite answers. For example, in order to know whether the package `foo` resolves to `/path/to/foo/index.js`, one must first check the [`exports` field](https://nodejs.org/api/packages.html#exports) located in `/path/to/foo/package.json`. Similarly, a loader that would add support for import maps need to know how to retrieve those import maps in the first place. + +While this is fairly easy when operating with the traditional filesystem (one could just use the `fs` module), things get trickier when you consider that loaders may also have to deal with other data sources. For instance, a loader that would import files directly from the network (similar to how Deno operates) would be unable to leverage `fs` to access the `package.json` content for the remote packages. Same thing when the package data are kept within archives that would require special support for access (like Electron or Yarn both operate). + +To facilitate such interactions between loaders, they are given the ability to override the basic filesystem operations used by the Node resolution helpers. This way, they can remain blissfully unaware of the underlying data source (filesystem or network or otherwise) and focus on the part of the resolution they care about. + 1. `statFile`: Takes the resolved URL and returns its [`fs.Stats` record](https://nodejs.org/api/fs.html#class-fsstats) (or `null` if it doesn't exist). 1. `readFile`: Takes the resolved URL and returns its binary content (or `null` if it doesn't exist). +## Advanced hooks + 1. `globalPreload`: Defines a string of JavaScript to be injected into the application global scope. ## Chaining From fa0c8cfa86f643ef8dbf253e39a62d7db9a8961d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Nison?= Date: Thu, 18 Nov 2021 11:00:02 +0100 Subject: [PATCH 5/8] Apply suggestions from code review Co-authored-by: Jacob Smith <3012099+JakobJingleheimer@users.noreply.github.com> --- doc/design/proposal-chaining-iterative.md | 2 +- doc/design/proposal-chaining-middleware.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/design/proposal-chaining-iterative.md b/doc/design/proposal-chaining-iterative.md index bd4718e..e4ebcb7 100644 --- a/doc/design/proposal-chaining-iterative.md +++ b/doc/design/proposal-chaining-iterative.md @@ -312,7 +312,7 @@ LoadManifest hooks would have the following signature: ```ts export async function readFile( - url: string, // A URL that point to a location; whether the file + url: string, // A URL pointing to a location; whether the file // exists or not isn't guaranteed interimResult: { // result from the previous hook data: string | ArrayBuffer | TypedArray | null, // The content of the diff --git a/doc/design/proposal-chaining-middleware.md b/doc/design/proposal-chaining-middleware.md index d30a287..bf55750 100644 --- a/doc/design/proposal-chaining-middleware.md +++ b/doc/design/proposal-chaining-middleware.md @@ -287,7 +287,7 @@ Load hooks would have the following signature: ```ts export async function readFile( - url: string, // A URL that point to a location; whether the file + url: string, // A URL pointing to a location; whether the file // exists or not isn't guaranteed context: { conditions = string[], // Export conditions of the relevant `package.json` From 2c9ffb665fb05644a9851f62ddcdefd88f111ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Nison?= Date: Thu, 18 Nov 2021 11:02:46 +0100 Subject: [PATCH 6/8] Update proposal-chaining-iterative.md --- doc/design/proposal-chaining-iterative.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/design/proposal-chaining-iterative.md b/doc/design/proposal-chaining-iterative.md index e4ebcb7..87985e2 100644 --- a/doc/design/proposal-chaining-iterative.md +++ b/doc/design/proposal-chaining-iterative.md @@ -289,7 +289,7 @@ Say you had a chain of three loaders: * `zip` adds a virtual filesystem layer for in-zip access * `tgz` does the same but for tgz archives -* `warc` does the same for warc archives. +* `https` allows querying packages through the network. Following the pattern of `--require`: @@ -297,18 +297,18 @@ Following the pattern of `--require`: node \ --loader zip \ --loader tgz \ - --loader warc + --loader https ``` These would be called in the following sequence: -(`zip` OR `defaultReadFile`) → `tgz` → `warc` +(`zip` OR `defaultReadFile`) → `tgz` → `https` -1. `defaultReadFile` / `zip` needs to be first to know whether the manifest exists on the actual filesystem, which is fed to the subsequent loader -1. `tgz` receives the raw source from the previous loader and, if necessary, checks for the manifest existence via its own rules -1. `warc` does the same thing +1. `defaultReadFile` / `zip` needs to be first to know whether the file exists on the actual filesystem, which is fed to the subsequent loader +1. `tgz` receives the raw source from the previous loader and, if necessary, checks for the file existence via its own rules +1. `https` does the same thing -LoadManifest hooks would have the following signature: +ReadFile hooks would have the following signature: ```ts export async function readFile( @@ -321,7 +321,7 @@ export async function readFile( context: { conditions = string[], // Export conditions of the relevant package.json }, - defaultLoadManifest: function, // Node's default load hook + defaultReadFile: function, // Node's default load hook ): { signals?: { // Signals from this hook to the ESMLoader contextOverride?: object, // A new `context` argument for the next hook From b616257748b80d3212915f60db73873acc8e28cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Nison?= Date: Thu, 18 Nov 2021 11:04:06 +0100 Subject: [PATCH 7/8] Update proposal-chaining-middleware.md --- doc/design/proposal-chaining-middleware.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/design/proposal-chaining-middleware.md b/doc/design/proposal-chaining-middleware.md index bf55750..8839b58 100644 --- a/doc/design/proposal-chaining-middleware.md +++ b/doc/design/proposal-chaining-middleware.md @@ -270,7 +270,7 @@ Say you had a chain of three loaders: * `zip` adds a virtual filesystem layer for in-zip access * `tgz` does the same but for tgz archives -* `warc` does the same for warc archives. +* `https` allows querying packages through the network Following the pattern of `--require`: @@ -278,12 +278,12 @@ Following the pattern of `--require`: node \ --loader zip \ --loader tgz \ - --loader warc + --loader https ``` -These would be called in the following sequence: `zip` calls `tgz`, which calls `warc`. Or in JavaScript terms, `zip(tgz(warc(input)))`: +These would be called in the following sequence: `zip` calls `tgz`, which calls `https`. Or in JavaScript terms, `zip(tgz(https(input)))`: -Load hooks would have the following signature: +ReadFile hooks would have the following signature: ```ts export async function readFile( From 3eb3369b47a27795e51ed502fd6e46e34120f91b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Nison?= Date: Thu, 18 Nov 2021 15:45:56 +0100 Subject: [PATCH 8/8] Update doc/design/overview.md Co-authored-by: Antoine du Hamel --- doc/design/overview.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design/overview.md b/doc/design/overview.md index 8578c2f..f48d053 100644 --- a/doc/design/overview.md +++ b/doc/design/overview.md @@ -1,6 +1,6 @@ # Loaders Design -There are currently the following [loader hooks](https://github.com/nodejs/node/blob/master/doc/api/esm.md#hooks): +There are currently the following [loader hooks](https://github.com/nodejs/node/blob/HEAD/doc/api/esm.md#hooks): ## Basic hooks