diff --git a/src/index.ts b/src/index.ts index d62c897d..8ef5711c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -354,7 +354,7 @@ export class Minimatch { this.parseNegate() // step 2: expand braces - this.globSet = this.braceExpand() + this.globSet = [...new Set(this.braceExpand())] if (options.debug) { this.debug = (...args: any[]) => console.error(...args) @@ -362,61 +362,17 @@ export class Minimatch { this.debug(this.pattern, this.globSet) - // step 3: now we have a set, so turn each one into a series of path-portion - // matching patterns. + // step 3: now we have a set, so turn each one into a series of + // path-portion matching patterns. // These will be regexps, except in the case of "**", which is // set to the GLOBSTAR object for globstar behavior, // and will not contain any / characters + // + // First, we preprocess to make the glob pattern sets a bit simpler + // and deduped. There are some perf-killing patterns that can cause + // problems with a glob walk, but we can simplify them down a bit. const rawGlobParts = this.globSet.map(s => this.slashSplit(s)) - - // consecutive globstars are an unncessary perf killer - // also, **/*/... is equivalent to */**/..., so swap all of those - // this turns a pattern like **/*/**/*/x into */*/**/x - // and a pattern like **/x/**/*/y becomes **/x/*/**/y - // the *later* we can push the **, the more efficient it is, - // because we can avoid having to do a recursive walk until - // the walked tree is as shallow as possible. - // Note that this is only true up to the last pattern, though, because - // a/*/** will only match a/b if b is a dir, but a/**/* will match a/b - // regardless, since it's "0 or more path segments" if it's not final. - if (this.options.noglobstar) { - // ** is * anyway - this.globParts = rawGlobParts - } else { - // do this swap BEFORE the reduce, so that we can turn a string - // of **/*/**/* into */*/**/** and then reduce the **'s into one - for (const parts of rawGlobParts) { - let swapped: boolean - do { - swapped = false - for (let i = 0; i < parts.length - 1; i++) { - if (parts[i] === '*' && parts[i - 1] === '**') { - parts[i] = '**' - parts[i - 1] = '*' - swapped = true - } - } - } while (swapped) - } - this.globParts = rawGlobParts.map(parts => { - parts = parts.reduce((set: string[], part) => { - const prev = set[set.length - 1] - if (part === '**' && prev === '**') { - return set - } - if (part === '..') { - if (prev && prev !== '..' && prev !== '.' && prev !== '**') { - set.pop() - return set - } - } - set.push(part) - return set - }, []) - return parts.length === 0 ? [''] : parts - }) - } - + this.globParts = this.preprocess(rawGlobParts) this.debug(this.pattern, this.globParts) // glob --> regexps @@ -448,6 +404,188 @@ export class Minimatch { this.debug(this.pattern, this.set) } + // various transforms to equivalent pattern sets that are + // faster to process in a filesystem walk. The goal is to + // eliminate what we can, and push all ** patterns as far + // to the right as possible, even if it increases the number + // of patterns that we have to process. + preprocess(globParts: string[][]) { + // if we're not in globstar mode, then turn all ** into * + if (this.options.noglobstar) { + for (let i = 0; i < globParts.length; i++) { + for (let j = 0; j < globParts[i].length; j++) { + if (globParts[i][j] === '**') { + globParts[i][j] = '*' + } + } + } + } + + globParts = this.firstPhasePreProcess(globParts) + globParts = this.secondPhasePreProcess(globParts) + + return globParts + } + + // First phase: single-pattern processing + //
is 1 or more portions + //is 1 or more portions + // is any portion other than ., .., '', or ** + //
is . or '' + // + // **/.. is *brutal* for filesystem walking performance, because + // it effectively resets the recursive walk each time it occurs, + // and ** cannot be reduced out by a .. pattern part like a regexp + // or most strings (other than .., ., and '') can be. + // + // /**/..//
-> { /..//
, /**//
} + // // -> /+ // //../
-> /+ // **/**/ -> **/ + // + // **/*/ -> */**/ <== not valid because ** doesn't follow + // this WOULD be allowed if ** did follow symlinks, or * didn't + firstPhasePreProcess(globParts: string[][]) { + let didSomething = false + do { + didSomething = false + // /**/..//
-> { /..//
, /**//
} + for (let parts of globParts) { + let gs: number = -1 + while (-1 !== (gs = parts.indexOf('**', gs + 1))) { + let gss: number = gs + while (parts[gss + 1] === '**') { + // /**/**/-> /**/+ gss++ + } + // eg, if gs is 2 and gss is 4, that means we have 3 ** + // parts, and can remove 2 of them. + if (gss > gs) { + parts.splice(gs + 1, gss - gs) + } + + let next = parts[gs + 1] + const p = parts[gs + 2] + if (next !== '..') continue + if (!p || p === '.' || p === '..') continue + didSomething = true + // edit parts in place, and push the new one + parts.splice(gs, 1) + const other = parts.slice(0) + other[gs] = '**' + globParts.push(other) + gs-- + } + + // // -> /+ if (!this.preserveMultipleSlashes) { + for (let i = 1; i < parts.length - 1; i++) { + const p = parts[i] + // don't squeeze out UNC patterns + if (i === 1 && p === '' && parts[0] === '') continue + if (p === '.' || p === '') { + didSomething = true + parts.splice(i, 1) + i-- + } + } + if (parts[0] === '.') { + didSomething = true + parts.shift() + } + } + + // //../
-> /+ let dd: number = 0 + while (-1 !== (dd = parts.indexOf('..', dd + 1))) { + const p = parts[dd - 1] + if (p && p !== '.' && p !== '..' && p !== '**') { + didSomething = true + parts.splice(dd - 1, 2) + if (parts.length === 0) parts.push('') + dd -= 2 + } + } + } + } while (didSomething) + + return globParts + } + + // second phase: multi-pattern dedupes + // { /*/, //
} -> /*/+ // { /, /} -> /+ // { /**/, /} -> /**/+ // + // { /**/, /**//
} -> /**/+ // ^-- not valid because ** doens't follow symlinks + secondPhasePreProcess(globParts: string[][]): string[][] { + for (let i = 0; i < globParts.length - 1; i++) { + for (let j = i + 1; j < globParts.length; j++) { + const matched = this.partsMatch( + globParts[i], + globParts[j], + !this.preserveMultipleSlashes + ) + if (!matched) continue + globParts[i] = matched + globParts[j] = [] + } + } + return globParts.filter(gs => gs.length) + } + + partsMatch( + a: string[], + b: string[], + emptyGSMatch: boolean = false + ): false | string[] { + let ai = 0 + let bi = 0 + let result: string[] = [] + let which: string = '' + while (ai < a.length && bi < b.length) { + if (a[ai] === b[bi]) { + result.push(which === 'b' ? b[bi] : a[ai]) + ai++ + bi++ + } else if (emptyGSMatch && a[ai] === '**' && b[bi] === a[ai + 1]) { + result.push(a[ai]) + ai++ + } else if (emptyGSMatch && b[bi] === '**' && a[ai] === b[bi + 1]) { + result.push(b[bi]) + bi++ + } else if ( + a[ai] === '*' && + b[bi] && + !b[bi].startsWith('.') && + b[bi] !== '**' + ) { + if (which === 'b') return false + which = 'a' + result.push(a[ai]) + ai++ + bi++ + } else if ( + b[bi] === '*' && + a[ai] && + (this.options.dot || !a[ai].startsWith('.')) && + a[ai] !== '**' + ) { + if (which === 'a') return false + which = 'b' + result.push(b[bi]) + ai++ + bi++ + } else { + return false + } + } + // if we fall out of the loop, it means they two are identical + // as long as their lengths match + return a.length === b.length && result + } + parseNegate() { if (this.nonegate) return @@ -685,10 +823,7 @@ export class Minimatch { const options = this.options // shortcuts - if (pattern === '**') { - if (!options.noglobstar) return GLOBSTAR - else pattern = '*' - } + if (pattern === '**') return GLOBSTAR if (pattern === '') return '' // far and away, the most common glob pattern parts are diff --git a/tap-snapshots/test/basic.js.test.cjs b/tap-snapshots/test/basic.js.test.cjs index cced6e6a..e8db1351 100644 --- a/tap-snapshots/test/basic.js.test.cjs +++ b/tap-snapshots/test/basic.js.test.cjs @@ -93,6 +93,10 @@ exports[`test/basic.js TAP basic tests > makeRe *****?? 1`] = ` /^(?:(?!\\.)(?=.)[^/]*?[^/]*?[^/]*?[^/]*?[^/]*?[^/][^/])$/ ` +exports[`test/basic.js TAP basic tests > makeRe **/**/** 1`] = ` +/^(?:(?:(?!(?:\\/|^)\\.).)*?)$/ +` + exports[`test/basic.js TAP basic tests > makeRe **/.x/** 1`] = ` /^(?:(?:\\/|(?:(?!(?:\\/|^)\\.).)*?\\/)?\\.x(?:\\/|(?:(?!(?:\\/|^)\\.).)*?)?)$/ ` @@ -186,11 +190,11 @@ exports[`test/basic.js TAP basic tests > makeRe .x/**/**/* 2`] = ` ` exports[`test/basic.js TAP basic tests > makeRe .x/**/*/** 1`] = ` -/^(?:\\.x\\/(?!\\.)(?=.)[^/]*?(?:\\/|(?:(?!(?:\\/|^)\\.).)*?)?)$/ +/^(?:\\.x(?:\\/|\\/(?:(?!(?:\\/|^)\\.).)*?\\/)(?!\\.)(?=.)[^/]*?)$/ ` exports[`test/basic.js TAP basic tests > makeRe .x/**/*/** 2`] = ` -/^(?:\\.x\\/(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?(?:\\/|(?:(?!(?:\\/|^)(?:\\.{1,2})($|\\/)).)*?)?)$/ +/^(?:\\.x(?:\\/|\\/(?:(?!(?:\\/|^)(?:\\.{1,2})($|\\/)).)*?\\/)(?!(?:^|\\/)\\.{1,2}(?:$|\\/))(?=.)[^/]*?)$/ ` exports[`test/basic.js TAP basic tests > makeRe .x/*/** 1`] = ` @@ -625,6 +629,10 @@ exports[`test/basic.js TAP basic tests > makeRe {a,*(b|{c,d})} 1`] = ` /^(?:a|(?=.)(?:(?!\\.)b|(?!\\.)c)*|(?=.)(?:(?!\\.)b|(?!\\.)d)*)$/ ` +exports[`test/basic.js TAP basic tests > makeRe {c*,./c*} 1`] = ` +/^(?:(?=.)c[^/]*?)$/ +` + exports[`test/basic.js TAP basic tests > makeRe Å 1`] = ` /^(?:Å)$/i ` diff --git a/tap-snapshots/test/preprocessing.ts.test.cjs b/tap-snapshots/test/preprocessing.ts.test.cjs new file mode 100644 index 00000000..798a91f6 --- /dev/null +++ b/tap-snapshots/test/preprocessing.ts.test.cjs @@ -0,0 +1,699 @@ +/* IMPORTANT + * This snapshot file is auto-generated, but designed for humans. + * It should be checked into source control and tracked carefully. + * Re-generate by setting TAP_SNAPSHOT=1 and running tests. + * Make sure to inspect the output below. Do not ignore changes! + */ +'use strict' +exports[`test/preprocessing.ts TAP **/.. > defaults 1`] = ` +Array [ + Array [ + "**", + "..", + ], +] +` + +exports[`test/preprocessing.ts TAP **/.. > multislash 1`] = ` +Array [ + Array [ + "**", + "..", + ], +] +` + +exports[`test/preprocessing.ts TAP **/.. > no globstar 1`] = ` +Array [ + Array [ + "", + ], +] +` + +exports[`test/preprocessing.ts TAP **/../ > defaults 1`] = ` +Array [ + Array [ + "**", + "..", + "", + ], +] +` + +exports[`test/preprocessing.ts TAP **/../ > multislash 1`] = ` +Array [ + Array [ + "**", + "..", + "", + ], +] +` + +exports[`test/preprocessing.ts TAP **/../ > no globstar 1`] = ` +Array [ + Array [ + "", + ], +] +` + +exports[`test/preprocessing.ts TAP **/../x > defaults 1`] = ` +Array [ + Array [ + "..", + "x", + ], + Array [ + "**", + "x", + ], +] +` + +exports[`test/preprocessing.ts TAP **/../x > multislash 1`] = ` +Array [ + Array [ + "..", + "x", + ], + Array [ + "**", + "x", + ], +] +` + +exports[`test/preprocessing.ts TAP **/../x > no globstar 1`] = ` +Array [ + Array [ + "x", + ], +] +` + +exports[`test/preprocessing.ts TAP a/b/c/**///../x/y/z > defaults 1`] = ` +Array [ + Array [ + "a", + "b", + "x", + "y", + "z", + ], + Array [ + "a", + "b", + "c", + "**", + "x", + "y", + "z", + ], +] +` + +exports[`test/preprocessing.ts TAP a/b/c/**///../x/y/z > multislash 1`] = ` +Array [ + Array [ + "a", + "b", + "c", + "**", + "", + "", + "..", + "x", + "y", + "z", + ], +] +` + +exports[`test/preprocessing.ts TAP a/b/c/**///../x/y/z > no globstar 1`] = ` +Array [ + Array [ + "a", + "b", + "c", + "x", + "y", + "z", + ], +] +` + +exports[`test/preprocessing.ts TAP a/b/c/..///d > defaults 1`] = ` +Array [ + Array [ + "a", + "b", + "d", + ], +] +` + +exports[`test/preprocessing.ts TAP a/b/c/..///d > multislash 1`] = ` +Array [ + Array [ + "a", + "b", + "", + "", + "d", + ], +] +` + +exports[`test/preprocessing.ts TAP a/b/c/..///d > no globstar 1`] = ` +Array [ + Array [ + "a", + "b", + "d", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{**/,}*/b > defaults 1`] = ` +Array [ + Array [ + "a", + "**", + "*", + "b", + ], + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{**/,}*/b > multislash 1`] = ` +Array [ + Array [ + "a", + "**", + "*", + "b", + ], + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{**/,}*/b > no globstar 1`] = ` +Array [ + Array [ + "a", + "*", + "*", + "b", + ], + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{**/,}b > defaults 1`] = ` +Array [ + Array [ + "a", + "**", + "b", + ], + Array [ + "a", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{**/,}b > multislash 1`] = ` +Array [ + Array [ + "a", + "**", + "b", + ], + Array [ + "a", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{**/,}b > no globstar 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], + Array [ + "a", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{*,.c}/b > defaults 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], + Array [ + "a", + ".c", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{*,.c}/b > multislash 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], + Array [ + "a", + ".c", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{*,.c}/b > no globstar 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], + Array [ + "a", + ".c", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{*,c}/b > defaults 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{*,c}/b > multislash 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{*,c}/b > no globstar 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{*/b/d,c/b/*}/e > defaults 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + "d", + "e", + ], + Array [ + "a", + "c", + "b", + "*", + "e", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{*/b/d,c/b/*}/e > multislash 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + "d", + "e", + ], + Array [ + "a", + "c", + "b", + "*", + "e", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{*/b/d,c/b/*}/e > no globstar 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + "d", + "e", + ], + Array [ + "a", + "c", + "b", + "*", + "e", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{,**/}*/b > defaults 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], + Array [ + "a", + "**", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{,**/}*/b > multislash 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], + Array [ + "a", + "**", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{,**/}*/b > no globstar 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], + Array [ + "a", + "*", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{,**/}b > defaults 1`] = ` +Array [ + Array [ + "a", + "b", + ], + Array [ + "a", + "**", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{,**/}b > multislash 1`] = ` +Array [ + Array [ + "a", + "b", + ], + Array [ + "a", + "**", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{,**/}b > no globstar 1`] = ` +Array [ + Array [ + "a", + "b", + ], + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{.c,*}/b > defaults 1`] = ` +Array [ + Array [ + "a", + ".c", + "b", + ], + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{.c,*}/b > multislash 1`] = ` +Array [ + Array [ + "a", + ".c", + "b", + ], + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{.c,*}/b > no globstar 1`] = ` +Array [ + Array [ + "a", + ".c", + "b", + ], + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{c,*}/b > defaults 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{c,*}/b > multislash 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{c,*}/b > no globstar 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{c/b/*,*/b/d}/e > defaults 1`] = ` +Array [ + Array [ + "a", + "c", + "b", + "*", + "e", + ], + Array [ + "a", + "*", + "b", + "d", + "e", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{c/b/*,*/b/d}/e > multislash 1`] = ` +Array [ + Array [ + "a", + "c", + "b", + "*", + "e", + ], + Array [ + "a", + "*", + "b", + "d", + "e", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{c/b/*,*/b/d}/e > no globstar 1`] = ` +Array [ + Array [ + "a", + "c", + "b", + "*", + "e", + ], + Array [ + "a", + "*", + "b", + "d", + "e", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{c/b/d,*/b/*}/e > defaults 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + "*", + "e", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{c/b/d,*/b/*}/e > multislash 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + "*", + "e", + ], +] +` + +exports[`test/preprocessing.ts TAP a/{c/b/d,*/b/*}/e > no globstar 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + "*", + "e", + ], +] +` + +exports[`test/preprocessing.ts TAP {a/**/b,a/b} > defaults 1`] = ` +Array [ + Array [ + "a", + "**", + "b", + ], + Array [ + "a", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP {a/**/b,a/b} > multislash 1`] = ` +Array [ + Array [ + "a", + "**", + "b", + ], + Array [ + "a", + "b", + ], +] +` + +exports[`test/preprocessing.ts TAP {a/**/b,a/b} > no globstar 1`] = ` +Array [ + Array [ + "a", + "*", + "b", + ], + Array [ + "a", + "b", + ], +] +` diff --git a/test/patterns.js b/test/patterns.js index f214d035..0754a603 100644 --- a/test/patterns.js +++ b/test/patterns.js @@ -37,7 +37,9 @@ module.exports = [ ['b*/', ['bdir/']], ['c*', ['c', 'ca', 'cb']], + ['{c*,./c*}', ['c', 'ca', 'cb']], ['**', files], + ['**/**/**', files], ['\\.\\./*/', ['\\.\\./*/'], { nonull: true }], ['s/\\..*//', ['s/\\..*//'], { nonull: true }], diff --git a/test/preprocessing.ts b/test/preprocessing.ts new file mode 100644 index 00000000..59119e60 --- /dev/null +++ b/test/preprocessing.ts @@ -0,0 +1,38 @@ +import {Minimatch, braceExpand} from '../' +import t from 'tap' + +const m = new Minimatch('*') +const noGS = new Minimatch('*', { noglobstar: true }) +const ms = new Minimatch('*', {preserveMultipleSlashes: true}) + +const patterns = [ + '**/..', + '**/../', + '**/../x', + 'a/b/c/**///../x/y/z', + 'a/b/c/..///d', + 'a/{*,c}/b', + 'a/{*,.c}/b', + 'a/{c,*}/b', + 'a/{.c,*}/b', + 'a/{c/b/d,*/b/*}/e', + 'a/{*/b/d,c/b/*}/e', + 'a/{c/b/*,*/b/d}/e', + '{a/**/b,a/b}', + 'a/{,**/}b', + 'a/{,**/}*/b', + 'a/{**/,}b', + 'a/{**/,}*/b', +] + +const exp = (p:string) => braceExpand(p).map(s => s.split('/')) + +t.plan(patterns.length) +for (const p of patterns) { + t.test(p, t => { + t.matchSnapshot(m.preprocess(exp(p)), 'defaults') + t.matchSnapshot(noGS.preprocess(exp(p)), 'no globstar') + t.matchSnapshot(ms.preprocess(exp(p)), 'multislash') + t.end() + }) +}