From 8aadde8323babb0ddc42c34d56c038b3fd427861 Mon Sep 17 00:00:00 2001 From: futpib Date: Sat, 15 Jun 2019 16:00:16 +0300 Subject: [PATCH] Add `globby.stream` (#113) --- index.d.ts | 21 ++++++++++++ index.js | 34 +++++++++++++++---- index.test-d.ts | 28 ++++++++++++++++ package.json | 5 ++- readme.md | 18 +++++++++++ stream-utils.js | 46 ++++++++++++++++++++++++++ test.js | 86 ++++++++++++++++++++++++++++++++++++++++++++++--- 7 files changed, 226 insertions(+), 12 deletions(-) create mode 100644 stream-utils.js diff --git a/index.d.ts b/index.d.ts index b124362..49d669e 100644 --- a/index.d.ts +++ b/index.d.ts @@ -111,6 +111,27 @@ declare const globby: { options?: globby.GlobbyOptions ): string[]; + /** + @param patterns - See supported `minimatch` [patterns](https://github.com/isaacs/minimatch#usage). + @param options - See the [`fast-glob` options](https://github.com/mrmlnc/fast-glob#options-1) in addition to the ones in this package. + @returns The stream of matching paths. + + @example + ``` + import globby = require('globby'); + + (async () => { + for await (const path of globby.stream('*.tmp')) { + console.log(path); + } + })(); + ``` + */ + stream( + patterns: string | readonly string[], + options?: globby.GlobbyOptions + ): NodeJS.ReadableStream; + /** Note that you should avoid running the same tasks multiple times as they contain a file system cache. Instead, run this method each time to ensure file system changes are taken into consideration. diff --git a/index.js b/index.js index f8f3a57..029a19e 100644 --- a/index.js +++ b/index.js @@ -1,10 +1,12 @@ 'use strict'; const fs = require('fs'); const arrayUnion = require('array-union'); +const merge2 = require('merge2'); const glob = require('glob'); const fastGlob = require('fast-glob'); const dirGlob = require('dir-glob'); const gitignore = require('./gitignore'); +const {FilterStream, UniqueStream} = require('./stream-utils'); const DEFAULT_FILTER = () => false; @@ -81,6 +83,12 @@ const globDirs = (task, fn) => { const getPattern = (task, fn) => task.options.expandDirectories ? globDirs(task, fn) : [task.pattern]; +const getFilterSync = options => { + return options && options.gitignore ? + gitignore.sync({cwd: options.cwd, ignore: options.ignore}) : + DEFAULT_FILTER; +}; + const globToTask = task => glob => { const {options} = task; if (options.ignore && Array.isArray(options.ignore) && options.expandDirectories) { @@ -120,24 +128,36 @@ module.exports = async (patterns, options) => { module.exports.sync = (patterns, options) => { const globTasks = generateGlobTasks(patterns, options); - const getFilter = () => { - return options && options.gitignore ? - gitignore.sync({cwd: options.cwd, ignore: options.ignore}) : - DEFAULT_FILTER; - }; - const tasks = globTasks.reduce((tasks, task) => { const newTask = getPattern(task, dirGlob.sync).map(globToTask(task)); return tasks.concat(newTask); }, []); - const filter = getFilter(); + const filter = getFilterSync(options); + return tasks.reduce( (matches, task) => arrayUnion(matches, fastGlob.sync(task.pattern, task.options)), [] ).filter(path_ => !filter(path_)); }; +module.exports.stream = (patterns, options) => { + const globTasks = generateGlobTasks(patterns, options); + + const tasks = globTasks.reduce((tasks, task) => { + const newTask = getPattern(task, dirGlob.sync).map(globToTask(task)); + return tasks.concat(newTask); + }, []); + + const filter = getFilterSync(options); + const filterStream = new FilterStream(p => !filter(p)); + const uniqueStream = new UniqueStream(); + + return merge2(tasks.map(task => fastGlob.stream(task.pattern, task.options))) + .pipe(filterStream) + .pipe(uniqueStream); +}; + module.exports.generateGlobTasks = generateGlobTasks; module.exports.hasMagic = (patterns, options) => [] diff --git a/index.test-d.ts b/index.test-d.ts index 4e7a3f0..e71dff2 100644 --- a/index.test-d.ts +++ b/index.test-d.ts @@ -4,6 +4,7 @@ import { GlobTask, FilterFunction, sync as globbySync, + stream as globbyStream, generateGlobTasks, hasMagic, gitignore @@ -45,6 +46,33 @@ expectType( expectType(globbySync('*.tmp', {gitignore: true})); expectType(globbySync('*.tmp', {ignore: ['**/b.tmp']})); +// Globby (stream) +expectType(globbyStream('*.tmp')); +expectType(globbyStream(['a.tmp', '*.tmp', '!{c,d,e}.tmp'])); + +expectType(globbyStream('*.tmp', {expandDirectories: false})); +expectType(globbyStream('*.tmp', {expandDirectories: ['a*', 'b*']})); +expectType( + globbyStream('*.tmp', { + expandDirectories: { + files: ['a', 'b'], + extensions: ['tmp'] + } + }) +); +expectType(globbyStream('*.tmp', {gitignore: true})); +expectType(globbyStream('*.tmp', {ignore: ['**/b.tmp']})); + +(async () => { + const streamResult = []; + for await (const path of globbyStream('*.tmp')) { + streamResult.push(path); + } + // `NodeJS.ReadableStream` is not generic, unfortunately, + // so it seems `(string | Buffer)[]` is the best we can get here + expectType<(string | Buffer)[]>(streamResult); +})(); + // GenerateGlobTasks expectType(generateGlobTasks('*.tmp')); expectType(generateGlobTasks(['a.tmp', '*.tmp', '!{c,d,e}.tmp'])); diff --git a/package.json b/package.json index cf84a8e..3c66658 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,8 @@ "files": [ "index.js", "gitignore.js", - "index.d.ts" + "index.d.ts", + "stream-utils.js" ], "keywords": [ "all", @@ -61,10 +62,12 @@ "fast-glob": "^2.2.6", "glob": "^7.1.3", "ignore": "^5.1.1", + "merge2": "^1.2.3", "slash": "^3.0.0" }, "devDependencies": { "ava": "^2.1.0", + "get-stream": "^5.1.0", "glob-stream": "^6.1.0", "globby": "sindresorhus/globby#master", "matcha": "^0.7.0", diff --git a/readme.md b/readme.md index 46db037..63859d0 100644 --- a/readme.md +++ b/readme.md @@ -67,6 +67,8 @@ Default: `true` If set to `true`, `globby` will automatically glob directories for you. If you define an `Array` it will only glob files that matches the patterns inside the `Array`. You can also define an `object` with `files` and `extensions` like below: ```js +const globby = require('globby'); + (async () => { const paths = await globby('images', { expandDirectories: { @@ -93,6 +95,22 @@ Respect ignore patterns in `.gitignore` files that apply to the globbed files. Returns `string[]` of matching paths. +### globby.stream(patterns, options?) + +Returns a [`stream.Readable`](https://nodejs.org/api/stream.html#stream_readable_streams) of matching paths. + +Since Node.js 10, [readable streams are iterable](https://nodejs.org/api/stream.html#stream_readable_symbol_asynciterator), so you can loop over glob matches in a [`for await...of` loop](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for-await...of) like this: + +```js +const globby = require('globby'); + +(async () => { + for await (const path of globby.stream('*.tmp')) { + console.log(path); + } +})(); +``` + ### globby.generateGlobTasks(patterns, options?) Returns an `object[]` in the format `{pattern: string, options: Object}`, which can be passed as arguments to [`fast-glob`](https://github.com/mrmlnc/fast-glob). This is useful for other globbing-related packages. diff --git a/stream-utils.js b/stream-utils.js new file mode 100644 index 0000000..98aedc8 --- /dev/null +++ b/stream-utils.js @@ -0,0 +1,46 @@ +'use strict'; +const {Transform} = require('stream'); + +class ObjectTransform extends Transform { + constructor() { + super({ + objectMode: true + }); + } +} + +class FilterStream extends ObjectTransform { + constructor(filter) { + super(); + this._filter = filter; + } + + _transform(data, encoding, callback) { + if (this._filter(data)) { + this.push(data); + } + + callback(); + } +} + +class UniqueStream extends ObjectTransform { + constructor() { + super(); + this._pushed = new Set(); + } + + _transform(data, encoding, callback) { + if (!this._pushed.has(data)) { + this.push(data); + this._pushed.add(data); + } + + callback(); + } +} + +module.exports = { + FilterStream, + UniqueStream +}; diff --git a/test.js b/test.js index 656bdfc..ffe776b 100644 --- a/test.js +++ b/test.js @@ -2,6 +2,7 @@ import fs from 'fs'; import util from 'util'; import path from 'path'; import test from 'ava'; +import getStream from 'get-stream'; import globby from '.'; const cwd = process.cwd(); @@ -72,6 +73,41 @@ test('return [] for all negative patterns - async', async t => { t.deepEqual(await globby(['!a.tmp', '!b.tmp']), []); }); +test('glob - stream', async t => { + t.deepEqual((await getStream.array(globby.stream('*.tmp'))).sort(), ['a.tmp', 'b.tmp', 'c.tmp', 'd.tmp', 'e.tmp']); +}); + +// Readable streams are iterable since Node.js 10, but this test runs on 6 and 8 too. +// So we define the test only if async iteration is supported. +if (Symbol.asyncIterator) { + // For the reason behind `eslint-disable` below see https://github.com/avajs/eslint-plugin-ava/issues/216 + // eslint-disable-next-line ava/no-async-fn-without-await + test('glob - stream async iterator support', async t => { + const results = []; + for await (const path of globby.stream('*.tmp')) { + results.push(path); + } + + t.deepEqual(results, ['a.tmp', 'b.tmp', 'c.tmp', 'd.tmp', 'e.tmp']); + }); +} + +test('glob - stream - multiple file paths', async t => { + t.deepEqual(await getStream.array(globby.stream(['a.tmp', 'b.tmp'])), ['a.tmp', 'b.tmp']); +}); + +test('glob with multiple patterns - stream', async t => { + t.deepEqual(await getStream.array(globby.stream(['a.tmp', '*.tmp', '!{c,d,e}.tmp'])), ['a.tmp', 'b.tmp']); +}); + +test('respect patterns order - stream', async t => { + t.deepEqual(await getStream.array(globby.stream(['!*.tmp', 'a.tmp'])), ['a.tmp']); +}); + +test('return [] for all negative patterns - stream', async t => { + t.deepEqual(await getStream.array(globby.stream(['!a.tmp', '!b.tmp'])), []); +}); + test('cwd option', t => { process.chdir(tmp); t.deepEqual(globby.sync('*.tmp', {cwd}), ['a.tmp', 'b.tmp', 'c.tmp', 'd.tmp', 'e.tmp']); @@ -89,6 +125,11 @@ test('don\'t mutate the options object - sync', t => { t.pass(); }); +test('don\'t mutate the options object - stream', async t => { + await getStream.array(globby.stream(['*.tmp', '!b.tmp'], Object.freeze({ignore: Object.freeze([])}))); + t.pass(); +}); + test('expose generateGlobTasks', t => { const tasks = globby.generateGlobTasks(['*.tmp', '!b.tmp'], {ignore: ['c.tmp']}); @@ -180,7 +221,7 @@ test.failing('relative paths and ignores option', t => { await t.throwsAsync(globby(value), message); }); - test(`throws for invalid patterns input: ${valueString}`, t => { + test(`throws for invalid patterns input: ${valueString} - sync`, t => { t.throws(() => { globby.sync(value); }, TypeError); @@ -190,6 +231,16 @@ test.failing('relative paths and ignores option', t => { }, message); }); + test(`throws for invalid patterns input: ${valueString} - stream`, t => { + t.throws(() => { + globby.stream(value); + }, TypeError); + + t.throws(() => { + globby.stream(value); + }, message); + }); + test(`generateGlobTasks throws for invalid patterns input: ${valueString}`, t => { t.throws(() => { globby.generateGlobTasks(value); @@ -201,7 +252,7 @@ test.failing('relative paths and ignores option', t => { }); }); -test('gitignore option defaults to false', async t => { +test('gitignore option defaults to false - async', async t => { const actual = await globby('*', {onlyFiles: false}); t.true(actual.includes('node_modules')); }); @@ -211,7 +262,12 @@ test('gitignore option defaults to false - sync', t => { t.true(actual.includes('node_modules')); }); -test('respects gitignore option true', async t => { +test('gitignore option defaults to false - stream', async t => { + const actual = await getStream.array(globby.stream('*', {onlyFiles: false})); + t.true(actual.includes('node_modules')); +}); + +test('respects gitignore option true - async', async t => { const actual = await globby('*', {gitignore: true, onlyFiles: false}); t.false(actual.includes('node_modules')); }); @@ -221,7 +277,12 @@ test('respects gitignore option true - sync', t => { t.false(actual.includes('node_modules')); }); -test('respects gitignore option false', async t => { +test('respects gitignore option true - stream', async t => { + const actual = await getStream.array(globby.stream('*', {gitignore: true, onlyFiles: false})); + t.false(actual.includes('node_modules')); +}); + +test('respects gitignore option false - async', async t => { const actual = await globby('*', {gitignore: false, onlyFiles: false}); t.true(actual.includes('node_modules')); }); @@ -237,6 +298,11 @@ test('gitignore option with stats option', async t => { t.false(actual.includes('node_modules')); }); +test('respects gitignore option false - stream', async t => { + const actual = await getStream.array(globby.stream('*', {gitignore: false, onlyFiles: false})); + t.true(actual.includes('node_modules')); +}); + // https://github.com/sindresorhus/globby/issues/97 test.failing('`{extension: false}` and `expandDirectories.extensions` option', t => { t.deepEqual( @@ -284,3 +350,15 @@ test('throws when specifying a file as cwd - sync', t => { globby.sync('*', {cwd: isFile}); }, 'The `cwd` option must be a path to a directory'); }); + +test('throws when specifying a file as cwd - stream', t => { + const isFile = path.resolve('fixtures/gitignore/bar.js'); + + t.throws(() => { + globby.stream('.', {cwd: isFile}); + }, 'The `cwd` option must be a path to a directory'); + + t.throws(() => { + globby.stream('*', {cwd: isFile}); + }, 'The `cwd` option must be a path to a directory'); +});