From f9e5019f37b7265fde02b8c5e04590904694e02e Mon Sep 17 00:00:00 2001 From: Tomas Matousek Date: Wed, 19 Jun 2019 14:14:07 -0700 Subject: [PATCH] GitIgnore optimizations --- .../Managed/GitIgnore.Matcher.cs | 235 ++++++++++++++++++ .../Managed/GitIgnore.cs | 192 +------------- .../GitIgnoreTests.cs | 85 ++++--- 3 files changed, 302 insertions(+), 210 deletions(-) create mode 100644 src/Microsoft.Build.Tasks.Git.Operations/Managed/GitIgnore.Matcher.cs diff --git a/src/Microsoft.Build.Tasks.Git.Operations/Managed/GitIgnore.Matcher.cs b/src/Microsoft.Build.Tasks.Git.Operations/Managed/GitIgnore.Matcher.cs new file mode 100644 index 00000000..f17e3283 --- /dev/null +++ b/src/Microsoft.Build.Tasks.Git.Operations/Managed/GitIgnore.Matcher.cs @@ -0,0 +1,235 @@ +// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; + +namespace Microsoft.Build.Tasks.Git +{ + partial class GitIgnore + { + internal sealed class Matcher + { + public GitIgnore Ignore { get; } + + /// + /// Maps full posix slash-terminated directory name to a pattern group. + /// + private readonly Dictionary _patternGroups; + + /// + /// The result of "is ignored" for directories. + /// + private readonly Dictionary _directoryIgnoreStateCache; + + private readonly List _reusableGroupList; + + internal Matcher(GitIgnore ignore) + { + Ignore = ignore; + _patternGroups = new Dictionary(); + _directoryIgnoreStateCache = new Dictionary(Ignore.PathComparer); + _reusableGroupList = new List(); + } + + // test only: + internal IReadOnlyDictionary DirectoryIgnoreStateCache + => _directoryIgnoreStateCache; + + private PatternGroup GetPatternGroup(string directory) + { + if (_patternGroups.TryGetValue(directory, out var group)) + { + return group; + } + + PatternGroup parent; + if (directory.Equals(Ignore.WorkingDirectory, Ignore.PathComparison)) + { + parent = Ignore.Root; + } + else + { + parent = GetPatternGroup(PathUtils.ToPosixDirectoryPath(Path.GetDirectoryName(PathUtils.TrimTrailingSlash(directory)))); + } + + group = LoadFromFile(Path.Combine(directory, GitIgnoreFileName), parent) ?? parent; + + _patternGroups.Add(directory, group); + return group; + } + + /// + /// Checks if the specified file path is ignored. + /// + /// Normalized path. + /// True if the path is ignored, fale if it is not, null if it is outside of the working directory. + public bool? IsNormalizedFilePathIgnored(string fullPath) + { + if (!PathUtils.IsAbsolute(fullPath)) + { + throw new ArgumentException("Path must be absolute", nameof(fullPath)); + } + + if (PathUtils.HasTrailingDirectorySeparator(fullPath)) + { + throw new ArgumentException("Path must be a file path", nameof(fullPath)); + } + + return IsPathIgnored(PathUtils.ToPosixPath(fullPath), isDirectoryPath: false); + } + + /// + /// Checks if the specified path is ignored. + /// + /// Full path. + /// True if the path is ignored, fale if it is not, null if it is outside of the working directory. + public bool? IsPathIgnored(string fullPath) + { + if (!PathUtils.IsAbsolute(fullPath)) + { + throw new ArgumentException("Path must be absolute", nameof(fullPath)); + } + + // git uses the FS case-sensitivity for checking directory existence: + bool isDirectoryPath = PathUtils.HasTrailingDirectorySeparator(fullPath) || Directory.Exists(fullPath); + + var fullPathNoSlash = PathUtils.TrimTrailingSlash(PathUtils.ToPosixPath(Path.GetFullPath(fullPath))); + if (isDirectoryPath && fullPathNoSlash.Equals(Ignore._workingDirectoryNoSlash, Ignore.PathComparison)) + { + return false; + } + + return IsPathIgnored(fullPathNoSlash, isDirectoryPath); + } + + private bool? IsPathIgnored(string normalizedPosixPath, bool isDirectoryPath) + { + Debug.Assert(PathUtils.IsAbsolute(normalizedPosixPath)); + Debug.Assert(PathUtils.IsPosixPath(normalizedPosixPath)); + Debug.Assert(!PathUtils.HasTrailingSlash(normalizedPosixPath)); + + // paths outside of working directory: + if (!normalizedPosixPath.StartsWith(Ignore.WorkingDirectory, Ignore.PathComparison)) + { + return null; + } + + if (isDirectoryPath && _directoryIgnoreStateCache.TryGetValue(normalizedPosixPath, out var isIgnored)) + { + return isIgnored; + } + + isIgnored = IsIgnoredRecursive(normalizedPosixPath, isDirectoryPath); + if (isDirectoryPath) + { + _directoryIgnoreStateCache.Add(normalizedPosixPath, isIgnored); + } + + return isIgnored; + } + + private bool IsIgnoredRecursive(string normalizedPosixPath, bool isDirectoryPath) + { + SplitPath(normalizedPosixPath, out var directory, out var fileName); + if (directory == null || !directory.StartsWith(Ignore.WorkingDirectory, Ignore.PathComparison)) + { + return false; + } + + var isIgnored = IsIgnored(normalizedPosixPath, directory, fileName, isDirectoryPath); + if (isIgnored) + { + return true; + } + + // The target file/directory itself is not ignored, but its containing directory might be. + normalizedPosixPath = PathUtils.TrimTrailingSlash(directory); + if (_directoryIgnoreStateCache.TryGetValue(normalizedPosixPath, out isIgnored)) + { + return isIgnored; + } + + isIgnored = IsIgnoredRecursive(normalizedPosixPath, isDirectoryPath: true); + _directoryIgnoreStateCache.Add(normalizedPosixPath, isIgnored); + return isIgnored; + } + + private static void SplitPath(string fullPath, out string directoryWithSlash, out string fileName) + { + Debug.Assert(!PathUtils.HasTrailingSlash(fullPath)); + int i = fullPath.LastIndexOf('/'); + if (i < 0) + { + directoryWithSlash = null; + fileName = fullPath; + } + else + { + directoryWithSlash = fullPath.Substring(0, i + 1); + fileName = fullPath.Substring(i + 1); + } + } + + private bool IsIgnored(string normalizedPosixPath, string directory, string fileName, bool isDirectoryPath) + { + // Default patterns can't be overriden by a negative pattern: + if (fileName.Equals(".git", Ignore.PathComparison)) + { + return true; + } + + bool isIgnored = false; + + // Visit groups in reverse order. + // Patterns specified closer to the target file override those specified above. + _reusableGroupList.Clear(); + var groups = _reusableGroupList; + for (var patternGroup = GetPatternGroup(directory); patternGroup != null; patternGroup = patternGroup.Parent) + { + groups.Add(patternGroup); + } + + for (int i = groups.Count - 1; i >= 0; i--) + { + var patternGroup = groups[i]; + + if (!normalizedPosixPath.StartsWith(patternGroup.ContainingDirectory, Ignore.PathComparison)) + { + continue; + } + + string lazyRelativePath = null; + + foreach (var pattern in patternGroup.Patterns) + { + // If a pattern is matched as ignored only look for a negative pattern that matches as well. + // If a pattern is not matched then skip negative patterns. + if (isIgnored != pattern.IsNegative) + { + continue; + } + + if (pattern.IsDirectoryPattern && !isDirectoryPath) + { + continue; + } + + string matchPath = pattern.IsFullPathPattern ? + lazyRelativePath ??= normalizedPosixPath.Substring(patternGroup.ContainingDirectory.Length) : + fileName; + + if (Glob.IsMatch(pattern.Glob, matchPath, Ignore.IgnoreCase, matchWildCardWithDirectorySeparator: false)) + { + // TODO: optimize negative pattern lookup (once we match, do we need to continue matching?) + isIgnored = !pattern.IsNegative; + } + } + } + + return isIgnored; + } + } + } +} diff --git a/src/Microsoft.Build.Tasks.Git.Operations/Managed/GitIgnore.cs b/src/Microsoft.Build.Tasks.Git.Operations/Managed/GitIgnore.cs index a94db190..9008c0bc 100644 --- a/src/Microsoft.Build.Tasks.Git.Operations/Managed/GitIgnore.cs +++ b/src/Microsoft.Build.Tasks.Git.Operations/Managed/GitIgnore.cs @@ -9,7 +9,7 @@ namespace Microsoft.Build.Tasks.Git { - internal sealed class GitIgnore + internal sealed partial class GitIgnore { internal sealed class PatternGroup { @@ -69,200 +69,32 @@ internal enum PatternFlags /// /// Full posix slash terminated path. /// - private readonly string _workingDirectory; + public string WorkingDirectory { get; } private readonly string _workingDirectoryNoSlash; - private readonly bool _ignoreCase; + public bool IgnoreCase { get; } - /// - /// Maps full posix slash-terminated directory name to a pattern group. - /// - private readonly Dictionary _patternGroups; - - private readonly PatternGroup _root; + public PatternGroup Root { get; } internal GitIgnore(PatternGroup root, string workingDirectory, bool ignoreCase) { Debug.Assert(PathUtils.IsPosixPath(workingDirectory)); Debug.Assert(PathUtils.HasTrailingSlash(workingDirectory)); - _ignoreCase = ignoreCase; - _workingDirectory = workingDirectory; + IgnoreCase = ignoreCase; + WorkingDirectory = workingDirectory; _workingDirectoryNoSlash = PathUtils.TrimTrailingSlash(workingDirectory); - _root = root; - _patternGroups = new Dictionary(); + Root = root; } private StringComparison PathComparison - => _ignoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal; - - /// - /// Checks if the specified file path is ignored. - /// - /// Normalized path. - /// True if the path is ignored, fale if it is not, null if it is outside of the working directory. - public bool? IsNormalizedFilePathIgnored(string fullPath) - { - if (!PathUtils.IsAbsolute(fullPath)) - { - throw new ArgumentException("Path must be absolute", nameof(fullPath)); - } - - if (PathUtils.HasTrailingDirectorySeparator(fullPath)) - { - throw new ArgumentException("Path must be a file path", nameof(fullPath)); - } - - return IsPathIgnored(fullPath, isDirectoryPath: false); - } - - /// - /// Checks if the specified path is ignored. - /// - /// Full path. - /// True if the path is ignored, fale if it is not, null if it is outside of the working directory. - public bool? IsPathIgnored(string fullPath) - { - if (!PathUtils.IsAbsolute(fullPath)) - { - throw new ArgumentException("Path must be absolute", nameof(fullPath)); - } - - var fullPathNoSlash = PathUtils.TrimTrailingDirectorySeparator(Path.GetFullPath(fullPath)); - - // git uses the FS case-sensitivity for checking directory existence: - bool isDirectoryPath = PathUtils.HasTrailingDirectorySeparator(fullPath) || Directory.Exists(fullPath); - - if (isDirectoryPath && fullPathNoSlash.Equals(_workingDirectoryNoSlash, PathComparison)) - { - return false; - } - - return IsPathIgnored(fullPathNoSlash, isDirectoryPath); - } - - private bool? IsPathIgnored(string normalizedPath, bool isDirectoryPath) - { - Debug.Assert(PathUtils.IsAbsolute(normalizedPath)); - Debug.Assert(!PathUtils.HasTrailingDirectorySeparator(normalizedPath)); - - normalizedPath = PathUtils.ToPosixPath(normalizedPath); - - // paths outside of working directory: - if (!normalizedPath.StartsWith(_workingDirectory, PathComparison)) - { - return null; - } - - static void splitPath(string fullPath, out string directoryWithSlash, out string fileName) - { - int i = fullPath.LastIndexOf('/', fullPath.Length - (PathUtils.HasTrailingSlash(fullPath) ? 2 : 1)); - if (i < 0) - { - directoryWithSlash = null; - fileName = fullPath; - } - else - { - directoryWithSlash = fullPath.Substring(0, i + 1); - fileName = fullPath.Substring(i + 1); - } - } - - splitPath(normalizedPath, out var directory, out var fileName); - Debug.Assert(directory != null); - - // Default patterns can't be overriden by a negative pattern: - if (fileName.Equals(".git", PathComparison)) - { - return true; - } + => IgnoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal; - var groups = new List(); - - while (true) - { - bool isIgnored = false; - - // Visit groups in reverse order. - // Patterns specified closer to the target file override those specified above. - for (var patternGroup = GetPatternGroup(directory); patternGroup != null; patternGroup = patternGroup.Parent) - { - groups.Add(patternGroup); - } - - for (int i = groups.Count - 1; i >= 0; i--) - { - var patternGroup = groups[i]; - - if (!normalizedPath.StartsWith(patternGroup.ContainingDirectory, PathComparison)) - { - continue; - } - - var relativePath = normalizedPath.Substring(patternGroup.ContainingDirectory.Length); - - foreach (var pattern in patternGroup.Patterns) - { - // If a pattern is matched as ignored only look for a negative pattern that matches as well. - // If a pattern is not matched then skip negative patterns. - if (isIgnored != pattern.IsNegative) - { - continue; - } - - if (pattern.IsDirectoryPattern && !isDirectoryPath) - { - continue; - } - - var matchPath = pattern.IsFullPathPattern ? relativePath : fileName; - if (Glob.IsMatch(pattern.Glob, matchPath, _ignoreCase, matchWildCardWithDirectorySeparator: false)) - { - // TODO: optimize negative pattern lookup (once we match, do we need to continue matching?) - isIgnored = !pattern.IsNegative; - } - } - } + private IEqualityComparer PathComparer + => IgnoreCase ? StringComparer.OrdinalIgnoreCase : StringComparer.Ordinal; - if (isIgnored) - { - return true; - } - - splitPath(directory, out directory, out fileName); - if (directory == null || !directory.StartsWith(_workingDirectory, PathComparison)) - { - return false; - } - - isDirectoryPath = true; - groups.Clear(); - } - } - - private PatternGroup GetPatternGroup(string directory) - { - if (_patternGroups.TryGetValue(directory, out var group)) - { - return group; - } - - PatternGroup parent; - if (directory.Equals(_workingDirectory, PathComparison)) - { - parent = _root; - } - else - { - parent = GetPatternGroup(PathUtils.ToPosixDirectoryPath(Path.GetDirectoryName(PathUtils.TrimTrailingSlash(directory)))); - } - - group = LoadFromFile(Path.Combine(directory, GitIgnoreFileName), parent) ?? parent; - - _patternGroups.Add(directory, group); - return group; - } + public Matcher CreateMatcher() + => new Matcher(this); /// /// is invalid diff --git a/src/Microsoft.Build.Tasks.Git.UnitTests/GitIgnoreTests.cs b/src/Microsoft.Build.Tasks.Git.UnitTests/GitIgnoreTests.cs index 5715b32e..142a7011 100644 --- a/src/Microsoft.Build.Tasks.Git.UnitTests/GitIgnoreTests.cs +++ b/src/Microsoft.Build.Tasks.Git.UnitTests/GitIgnoreTests.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. using System.IO; +using System.Linq; using System.Text; using TestUtilities; using Xunit; @@ -52,8 +53,8 @@ public void IsIgnored_CaseSensitive() { using var temp = new TempRoot(); - var root = temp.CreateDirectory(); - var workingDir = root.CreateDirectory("Repo"); + var rootDir = temp.CreateDirectory(); + var workingDir = rootDir.CreateDirectory("Repo"); // root // A (.gitignore) @@ -86,56 +87,72 @@ public void IsIgnored_CaseSensitive() "); var ignore = new GitIgnore(root: null, PathUtils.ToPosixDirectoryPath(workingDir.Path), ignoreCase: false); + var matcher = ignore.CreateMatcher(); // outside of the working directory: - Assert.Null(ignore.IsPathIgnored(root.Path)); - Assert.Null(ignore.IsPathIgnored(workingDir.Path.ToUpperInvariant())); + Assert.Null(matcher.IsPathIgnored(rootDir.Path)); + Assert.Null(matcher.IsPathIgnored(workingDir.Path.ToUpperInvariant())); // special case: - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, ".git"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, ".git") + Path.DirectorySeparatorChar)); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, ".git", "config"))); - Assert.False(ignore.IsPathIgnored(workingDir.Path)); - Assert.False(ignore.IsPathIgnored(workingDir.Path + Path.DirectorySeparatorChar)); - Assert.False(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "X"))); + Assert.False(matcher.IsPathIgnored(workingDir.Path)); + Assert.False(matcher.IsPathIgnored(workingDir.Path + Path.DirectorySeparatorChar)); + Assert.False(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "X"))); // matches "*.txt" - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "b.txt"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "b.txt"))); // matches "!a.txt" - Assert.False(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "a.txt"))); + Assert.False(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "a.txt"))); // matches "*.txt", "!z.txt" is ignored - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "z.txt"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "z.txt"))); // matches "*.txt", overriden by "!u.txt" - Assert.False(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "u.txt"))); + Assert.False(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "u.txt"))); // matches "*.txt", overriden by "!v.txt", which is overriden by "v.txt" - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "v.txt"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "v.txt"))); // matches directory name "D2" - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D2", "E", "a.txt"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D2", "E", "a.txt"))); // does not match "b/" (treated as a file path) - Assert.False(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "b"))); + Assert.False(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "b"))); // matches "b/" (treated as a directory path) - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "b") + Path.DirectorySeparatorChar)); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "b") + Path.DirectorySeparatorChar)); // matches "D3/" (existing directory path) - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D3"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D3"))); // matches "D1/c.cs" - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "c.cs"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "c.cs"))); // matches "Bar/**/*.xyz" - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "Bar", "Baz", "Goo", ".xyz"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "Bar", "Baz", "Goo", ".xyz"))); // matches "/*.c" - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "x.c"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "x.c"))); // does not match "/*.c" - Assert.False(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "x.c"))); + Assert.False(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "B", "C", "D1", "x.c"))); + + AssertEx.Equal(new[] + { + "/Repo/.git: True", + "/Repo/A/B/C/D1/b: True", + "/Repo/A/B/C/D1: False", + "/Repo/A/B/C/D2/E: True", + "/Repo/A/B/C/D2: True", + "/Repo/A/B/C/D3: True", + "/Repo/A/B/C: False", + "/Repo/A/B: False", + "/Repo/A: False", + "/Repo: False" + }, matcher.DirectoryIgnoreStateCache.Select(kvp => $"{kvp.Key.Substring(rootDir.Path.Length)}: {kvp.Value}").OrderBy(s => s)); } [Fact] @@ -143,8 +160,8 @@ public void IsIgnored_IgnoreCase() { using var temp = new TempRoot(); - var root = temp.CreateDirectory(); - var workingDir = root.CreateDirectory("Repo"); + var rootDir = temp.CreateDirectory(); + var workingDir = rootDir.CreateDirectory("Repo"); // root // A (.gitignore) @@ -159,27 +176,35 @@ public void IsIgnored_IgnoreCase() "); var ignore = new GitIgnore(root: null, PathUtils.ToPosixDirectoryPath(workingDir.Path), ignoreCase: true); + var matcher = ignore.CreateMatcher(); // outside of the working directory: - Assert.Null(ignore.IsPathIgnored(root.Path.ToUpperInvariant())); + Assert.Null(matcher.IsPathIgnored(rootDir.Path.ToUpperInvariant())); // special case: - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, ".GIT"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, ".GIT"))); // matches "*.txt" - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "b.TXT"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "b.TXT"))); // matches "!a.TXT" - Assert.False(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "a.txt"))); + Assert.False(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "a.txt"))); // matches directory name "dir/" - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "DIr", "a.txt"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "DIr", "a.txt"))); // matches "dir/" (treated as a directory path) - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "DiR") + Path.DirectorySeparatorChar)); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "DiR") + Path.DirectorySeparatorChar)); // matches "dir/" (existing directory path) - Assert.True(ignore.IsPathIgnored(Path.Combine(workingDir.Path, "A", "DIR"))); + Assert.True(matcher.IsPathIgnored(Path.Combine(workingDir.Path, "A", "DIR"))); + + AssertEx.Equal(new[] + { + "/Repo/A/DIr: True", + "/Repo/A: False", + "/Repo: False", + }, matcher.DirectoryIgnoreStateCache.Select(kvp => $"{kvp.Key.Substring(rootDir.Path.Length)}: {kvp.Value}").OrderBy(s => s)); } } }