From 105e2229557fa7bf886dd34147ae7a44759f0053 Mon Sep 17 00:00:00 2001 From: Rob Hogan Date: Fri, 26 Apr 2024 10:57:44 -0700 Subject: [PATCH] Node file crawler - warn and keep going on unreadable directories Summary: When *not* using Watchman, Metro's "node crawler" exhibits undefined behaviour when it encounters an error reading any directory at or within watched roots. An error from `fs.readdir` results in `callback` being called immediately with whatever files had been read by that point, which is generally non-deterministic due to parallel reads. Moreover, crawling other directories continues and `callback` is invoked *again* when `activeCalls` drops to zero, but this second invocation of `callback` does nothing as by then the wrapping Promise has already fulfilled. Ultimately these errors should probably be fatal, and users should be required to explicitly ignore/not include directories that can't be read (note that `graceful-fs` already ensures we've attempted to retry on typical short-lived issues), but as a first step this non-breaking change warns and skips these directories, allowing the rest of the crawl to complete. Changelog: ``` - **[Fix]**: Node crawler (non-Watchman) returns non-deterministic partial results on silent directory read errors. ``` Reviewed By: motiz88 Differential Revision: D56416365 fbshipit-source-id: aba37f35069ca496b4f6bc2d259b0c179efa2e08 --- packages/metro-file-map/src/Watcher.js | 1 + .../crawlers/__tests__/integration-test.js | 1 + .../src/crawlers/__tests__/node-test.js | 15 +++ .../metro-file-map/src/crawlers/node/index.js | 105 +++++++++++------- .../crawlers/watchman/__tests__/index-test.js | 1 + packages/metro-file-map/src/flow-types.js | 1 + 6 files changed, 82 insertions(+), 42 deletions(-) diff --git a/packages/metro-file-map/src/Watcher.js b/packages/metro-file-map/src/Watcher.js index 55c5ce07c2..3029fe0cea 100644 --- a/packages/metro-file-map/src/Watcher.js +++ b/packages/metro-file-map/src/Watcher.js @@ -103,6 +103,7 @@ export class Watcher extends EventEmitter { const crawlerOptions: CrawlerOptions = { abortSignal: options.abortSignal, computeSha1: options.computeSha1, + console: options.console, includeSymlinks: options.enableSymlinks, extensions: options.extensions, forceNodeFilesystemAPI: options.forceNodeFilesystemAPI, diff --git a/packages/metro-file-map/src/crawlers/__tests__/integration-test.js b/packages/metro-file-map/src/crawlers/__tests__/integration-test.js index 30cc8994fd..4b65f0f98b 100644 --- a/packages/metro-file-map/src/crawlers/__tests__/integration-test.js +++ b/packages/metro-file-map/src/crawlers/__tests__/integration-test.js @@ -120,6 +120,7 @@ describe.each(Object.keys(CRAWLERS))( async (includeSymlinks, expectedChangedFiles) => { invariant(crawl, 'crawl should not be null within maybeTest'); const result = await crawl({ + console: global.console, previousState: { fileSystem: new TreeFS({ rootDir: FIXTURES_DIR, diff --git a/packages/metro-file-map/src/crawlers/__tests__/node-test.js b/packages/metro-file-map/src/crawlers/__tests__/node-test.js index 0dfc299016..b98b994a4f 100644 --- a/packages/metro-file-map/src/crawlers/__tests__/node-test.js +++ b/packages/metro-file-map/src/crawlers/__tests__/node-test.js @@ -288,6 +288,7 @@ describe('node crawler', () => { nodeCrawl = require('../node'); const {changedFiles, removedFiles} = await nodeCrawl({ + console: global.console, previousState: {fileSystem: emptyFS}, extensions: ['js'], ignore: pearMatcher, @@ -309,6 +310,7 @@ describe('node crawler', () => { nodeCrawl = require('../node'); const {changedFiles, removedFiles} = await nodeCrawl({ + console: global.console, previousState: {fileSystem: emptyFS}, extensions: ['js'], forceNodeFilesystemAPI: true, @@ -331,6 +333,7 @@ describe('node crawler', () => { nodeCrawl = require('../node'); const {changedFiles, removedFiles} = await nodeCrawl({ + console: global.console, previousState: {fileSystem: emptyFS}, extensions: ['js'], forceNodeFilesystemAPI: true, @@ -346,7 +349,13 @@ describe('node crawler', () => { it('completes with fs.readdir throwing an error', async () => { nodeCrawl = require('../node'); + const mockConsole = { + ...global.console, + warn: jest.fn(), + }; + const {changedFiles, removedFiles} = await nodeCrawl({ + console: mockConsole, previousState: {fileSystem: emptyFS}, extensions: ['js'], forceNodeFilesystemAPI: true, @@ -355,6 +364,9 @@ describe('node crawler', () => { roots: ['/error'], }); + expect(mockConsole.warn).toHaveBeenCalledWith( + expect.stringContaining('Error "ENOTDIR" reading contents of "/error"'), + ); expect(changedFiles).toEqual(new Map()); expect(removedFiles).toEqual(new Set()); }); @@ -364,6 +376,7 @@ describe('node crawler', () => { const fs = require('graceful-fs'); const {changedFiles, removedFiles} = await nodeCrawl({ + console: global.console, previousState: {fileSystem: emptyFS}, extensions: ['js'], forceNodeFilesystemAPI: true, @@ -390,6 +403,7 @@ describe('node crawler', () => { const err = new Error('aborted for test'); await expect( nodeCrawl({ + console: global.console, abortSignal: abortSignalWithReason(err), previousState: {fileSystem: emptyFS}, extensions: ['js', 'json'], @@ -420,6 +434,7 @@ describe('node crawler', () => { nodeCrawl = require('../node'); await expect( nodeCrawl({ + console: global.console, perfLogger: fakePerfLogger, abortSignal: abortController.signal, previousState: {fileSystem: emptyFS}, diff --git a/packages/metro-file-map/src/crawlers/node/index.js b/packages/metro-file-map/src/crawlers/node/index.js index 8d86eac0ed..39fd8f34f1 100644 --- a/packages/metro-file-map/src/crawlers/node/index.js +++ b/packages/metro-file-map/src/crawlers/node/index.js @@ -11,6 +11,7 @@ import type { CanonicalPath, + Console, CrawlerOptions, FileData, IgnoreMatcher, @@ -33,6 +34,7 @@ function find( ignore: IgnoreMatcher, includeSymlinks: boolean, rootDir: string, + console: Console, callback: Callback, ): void { const result: FileData = new Map(); @@ -44,51 +46,52 @@ function find( fs.readdir(directory, {withFileTypes: true}, (err, entries) => { activeCalls--; if (err) { - callback(result); - return; - } - - entries.forEach((entry: fs.Dirent) => { - const file = path.join(directory, entry.name.toString()); - - if (ignore(file)) { - return; - } - - if (entry.isSymbolicLink() && !includeSymlinks) { - return; - } - - if (entry.isDirectory()) { - search(file); - return; - } + console.warn( + `Error "${err.code ?? err.message}" reading contents of "${directory}", skipping. Add this directory to your ignore list to exclude it.`, + ); + } else { + entries.forEach((entry: fs.Dirent) => { + const file = path.join(directory, entry.name.toString()); + + if (ignore(file)) { + return; + } - activeCalls++; + if (entry.isSymbolicLink() && !includeSymlinks) { + return; + } - fs.lstat(file, (err, stat) => { - activeCalls--; + if (entry.isDirectory()) { + search(file); + return; + } - if (!err && stat) { - const ext = path.extname(file).substr(1); - if (stat.isSymbolicLink() || extensions.includes(ext)) { - result.set(pathUtils.absoluteToNormal(file), [ - '', - stat.mtime.getTime(), - stat.size, - 0, - '', - null, - stat.isSymbolicLink() ? 1 : 0, - ]); + activeCalls++; + + fs.lstat(file, (err, stat) => { + activeCalls--; + + if (!err && stat) { + const ext = path.extname(file).substr(1); + if (stat.isSymbolicLink() || extensions.includes(ext)) { + result.set(pathUtils.absoluteToNormal(file), [ + '', + stat.mtime.getTime(), + stat.size, + 0, + '', + null, + stat.isSymbolicLink() ? 1 : 0, + ]); + } } - } - if (activeCalls === 0) { - callback(result); - } + if (activeCalls === 0) { + callback(result); + } + }); }); - }); + } if (activeCalls === 0) { callback(result); @@ -109,6 +112,7 @@ function findNative( ignore: IgnoreMatcher, includeSymlinks: boolean, rootDir: string, + console: Console, callback: Callback, ): void { // Examples: @@ -172,6 +176,7 @@ module.exports = async function nodeCrawl(options: CrawlerOptions): Promise<{ changedFiles: FileData, }> { const { + console, previousState, extensions, forceNodeFilesystemAPI, @@ -194,7 +199,7 @@ module.exports = async function nodeCrawl(options: CrawlerOptions): Promise<{ debug('Using system find: %s', useNativeFind); return new Promise((resolve, reject) => { - const callback = (fileData: FileData) => { + const callback: Callback = fileData => { const difference = previousState.fileSystem.getDifference(fileData); perfLogger?.point('nodeCrawl_end'); @@ -209,9 +214,25 @@ module.exports = async function nodeCrawl(options: CrawlerOptions): Promise<{ }; if (useNativeFind) { - findNative(roots, extensions, ignore, includeSymlinks, rootDir, callback); + findNative( + roots, + extensions, + ignore, + includeSymlinks, + rootDir, + console, + callback, + ); } else { - find(roots, extensions, ignore, includeSymlinks, rootDir, callback); + find( + roots, + extensions, + ignore, + includeSymlinks, + rootDir, + console, + callback, + ); } }); }; diff --git a/packages/metro-file-map/src/crawlers/watchman/__tests__/index-test.js b/packages/metro-file-map/src/crawlers/watchman/__tests__/index-test.js index 7167447b53..ce49b2d85d 100644 --- a/packages/metro-file-map/src/crawlers/watchman/__tests__/index-test.js +++ b/packages/metro-file-map/src/crawlers/watchman/__tests__/index-test.js @@ -38,6 +38,7 @@ const watchmanPath: string => string = filePath => const DEFAULT_OPTIONS: CrawlerOptions = { abortSignal: null, computeSha1: true, + console: global.console, extensions: ['js'], ignore: () => false, includeSymlinks: true, diff --git a/packages/metro-file-map/src/flow-types.js b/packages/metro-file-map/src/flow-types.js index 9ba31f6e0b..169f6beca8 100644 --- a/packages/metro-file-map/src/flow-types.js +++ b/packages/metro-file-map/src/flow-types.js @@ -88,6 +88,7 @@ export type Console = typeof global.console; export type CrawlerOptions = { abortSignal: ?AbortSignal, computeSha1: boolean, + console: Console, extensions: $ReadOnlyArray, forceNodeFilesystemAPI: boolean, ignore: IgnoreMatcher,