Skip to content

Commit

Permalink
Merge pull request #70 Update prefetch to match cone patterns
Browse files Browse the repository at this point in the history
Update prefetch to match cone patterns
  • Loading branch information
wilbaker authored Aug 21, 2019
2 parents 5dd5fac + 747afce commit 36e85ec
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 15 deletions.
129 changes: 120 additions & 9 deletions Scalar.Common/Prefetch/Git/DiffHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;

namespace Scalar.Common.Prefetch.Git
{
Expand All @@ -15,7 +16,17 @@ public class DiffHelper
private ITracer tracer;
private HashSet<string> exactFileList;
private List<string> patternList;
private List<string> folderList;

// The maximum depth of any path provided in 'fileList'
private int maxIncludedFolderPathDepth;

// All paths provided in 'fileList', these are treated as recusive
private HashSet<string> includedRecursiveFolders;

// The parents of all folders 'fileList'. Immediate children of these
// folders are also included when folders are specified.
private HashSet<string> includedFolderParents;

private HashSet<string> filesAdded = new HashSet<string>(StringComparer.OrdinalIgnoreCase);

private HashSet<DiffTreeResult> stagedDirectoryOperations = new HashSet<DiffTreeResult>(new DiffTreeByNameComparer());
Expand All @@ -34,7 +45,13 @@ public DiffHelper(ITracer tracer, Enlistment enlistment, GitProcess git, IEnumer
this.tracer = tracer;
this.exactFileList = new HashSet<string>(fileList.Where(x => !x.StartsWith("*")), StringComparer.OrdinalIgnoreCase);
this.patternList = fileList.Where(x => x.StartsWith("*")).ToList();
this.folderList = new List<string>(folderList);

GenerateRecursiveAndParentPathSets(
folderList,
out this.includedFolderParents,
out this.includedRecursiveFolders,
out this.maxIncludedFolderPathDepth);

this.enlistment = enlistment;
this.git = git;
this.ShouldIncludeSymLinks = includeSymLinks;
Expand Down Expand Up @@ -78,6 +95,42 @@ public int TotalFileDeletes
/// </summary>
public bool UpdatedWholeTree { get; internal set; } = false;

// public for unit tests
public static void GenerateRecursiveAndParentPathSets(
IEnumerable<string> folderList,
out HashSet<string> parentFolders,
out HashSet<string> recursiveFolders,
out int maxFolderPathDepth)
{
// Every path in folderList is a recursive path
recursiveFolders = new HashSet<string>(folderList, StringComparer.OrdinalIgnoreCase);

// Build a hash set that contains all of the parents of the paths in folderList
maxFolderPathDepth = 0;
parentFolders = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
StringBuilder parentPaths = new StringBuilder(capacity: ScalarConstants.MaxPath);
foreach (string folderPath in folderList)
{
string[] pathParts = folderPath.Split(new char[] { Path.DirectorySeparatorChar }, StringSplitOptions.RemoveEmptyEntries);

if (pathParts.Length > maxFolderPathDepth)
{
maxFolderPathDepth = pathParts.Length;
}

// Check 'pathParts.Length - 1' because the full folder path is already included in recursiveFolders.
// There is no need to add it to parentFolders as well.
for (int pathIndex = 0; pathIndex < pathParts.Length - 1; ++pathIndex)
{
parentPaths.Append(pathParts[pathIndex]);
parentPaths.Append(Path.DirectorySeparatorChar);
parentFolders.Add(parentPaths.ToString());
}

parentPaths.Clear();
}
}

public void PerformDiff(string targetCommitSha)
{
string targetTreeSha;
Expand Down Expand Up @@ -164,6 +217,69 @@ public void ParseDiffFile(string filename)
}
}

// public for unit testing
/// <summary>
/// Return true if the specific file path should be included in the cone specified
/// by the folderList that was provided when constructing the DiffHelper.
/// </summary>
/// <param name="filePath">Path to a file to compare against the folder list</param>
/// <returns>true if the file path is included in the folders list and false otherwise</returns>
public bool PathMatchesFolders(string filePath)
{
if (this.maxIncludedFolderPathDepth == 0)
{
// At least one folder must be specified to for any paths to match the list
return false;
}

int lastBlobPathSeparator = filePath.LastIndexOf(Path.DirectorySeparatorChar);
if (lastBlobPathSeparator < 0)
{
// Always include paths in the root directory
return true;
}

string parentPath = filePath.Substring(0, lastBlobPathSeparator + 1);
if (this.includedFolderParents.Contains(parentPath))
{
return true;
}

// Check ancestors of filePath against the recursive set of folders
// maxIncludedFolderPathDepth stores the maximum depth of folders in includedRecursiveFolders
// so that we can avoid checking folders too deep to be in the set
//
// Example:
//
// includedRecursiveFolders -> { "A\", "D\E\F\" }
// maxIncludedFolderPathDepth -> 3
//
//
// When checking if the ancestors of "G\H\I\J\K\L\m.txt" are in includedRecursiveFolders
// there is no need to check beyond "G\H\I\" because includedRecursiveFolders does
// not contains any paths more the 3 levels deep.
int pathSeparatorIndex = 0;
for (int i = 0; i < this.maxIncludedFolderPathDepth; ++i)
{
pathSeparatorIndex = filePath.IndexOf(Path.DirectorySeparatorChar, startIndex: pathSeparatorIndex);
if (pathSeparatorIndex < 0)
{
// We've tested every ancestor folder of filePath and not found a match
return false;
}

string ancestorPath = filePath.Substring(0, pathSeparatorIndex + 1);
if (this.includedRecursiveFolders.Contains(ancestorPath))
{
return true;
}

++pathSeparatorIndex;
}

return false;
}

private void FlushStagedQueues()
{
List<string> deletedPaths = new List<string>();
Expand Down Expand Up @@ -328,7 +444,7 @@ private bool ShouldIncludeResult(DiffTreeResult blobAdd)

if (this.exactFileList.Count == 0 &&
this.patternList.Count == 0 &&
this.folderList.Count == 0)
this.maxIncludedFolderPathDepth == 0)
{
return true;
}
Expand All @@ -339,12 +455,7 @@ private bool ShouldIncludeResult(DiffTreeResult blobAdd)
return true;
}

if (this.folderList.Any(path => blobAdd.TargetPath.StartsWith(path, StringComparison.OrdinalIgnoreCase)))
{
return true;
}

return false;
return this.PathMatchesFolders(blobAdd.TargetPath);
}

private void EnqueueFileDeleteOperation(ITracer activity, string targetPath)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,18 @@ public void PrefetchByFilesWithHydrateWhoseObjectsAreAlreadyDownloaded()
[TestCase, Order(6)]
public void PrefetchFolders()
{
this.ExpectBlobCount(this.Enlistment.Prefetch($"--folders {Path.Combine("GVFS", "GVFS")}"), 17);
this.ExpectBlobCount(this.Enlistment.Prefetch($"--folders {Path.Combine("GVFS", "GVFS")};{Path.Combine("GVFS", "GVFS.FunctionalTests")}"), 65);
this.ExpectBlobCount(this.Enlistment.Prefetch($"--folders {Path.Combine("GVFS", "GVFS")}"), 24);
this.ExpectBlobCount(this.Enlistment.Prefetch($"--folders {Path.Combine("GVFS", "GVFS", "CommandLine")}"), 23);
this.ExpectBlobCount(this.Enlistment.Prefetch($"--folders {Path.Combine("GVFS", "GVFS")};{Path.Combine("GVFS", "GVFS.FunctionalTests")}"), 72);
}

[TestCase, Order(7)]
public void PrefetchIsAllowedToDoNothing()
public void PrefetchNonExistentFilesAndFolders()
{
this.ExpectBlobCount(this.Enlistment.Prefetch("--files nonexistent.txt"), 0);
this.ExpectBlobCount(this.Enlistment.Prefetch("--folders nonexistent_folder"), 0);

// Whenever folder(s) are specified, all files in the root will match
this.ExpectBlobCount(this.Enlistment.Prefetch("--folders nonexistent_folder"), 7);
}

[TestCase, Order(8)]
Expand All @@ -93,7 +96,7 @@ public void PrefetchFolderListFromFile()
"GVFS/"
});

this.ExpectBlobCount(this.Enlistment.Prefetch("--folders-list \"" + tempFilePath + "\""), 279);
this.ExpectBlobCount(this.Enlistment.Prefetch("--folders-list \"" + tempFilePath + "\""), 286);
File.Delete(tempFilePath);
}

Expand Down Expand Up @@ -186,7 +189,7 @@ public void PrefetchFolderListFromStdin()
"GVFS/"
});

this.ExpectBlobCount(this.Enlistment.Prefetch("--stdin-folders-list", standardInput: input), 279);
this.ExpectBlobCount(this.Enlistment.Prefetch("--stdin-folders-list", standardInput: input), 286);
}

public void PrefetchPathsWithLsTreeTypeInPath()
Expand Down
97 changes: 97 additions & 0 deletions Scalar.UnitTests/Prefetch/DiffHelperTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,103 @@ public void DetectsFailuresInLsTree()
diffBackwards.HasFailures.ShouldEqual(true);
}

[TestCase]
public void GenerateRecursiveAndParentPathSets()
{
char dir = Path.DirectorySeparatorChar;
List<string> folderList = new List<string>();
CheckGenerateRecursiveAndParentPathSets(
folderList,
expectedParentFolders: new HashSet<string>(),
expectedMaxFolderPathDepth: 0);

folderList = new List<string> { $"A{dir}" };
CheckGenerateRecursiveAndParentPathSets(
folderList,
expectedParentFolders: new HashSet<string>(),
expectedMaxFolderPathDepth: 1);

folderList = new List<string> { $"A{dir}", $"A{dir}B{dir}" };
CheckGenerateRecursiveAndParentPathSets(
folderList,
expectedParentFolders: new HashSet<string> { $"A{dir}" },
expectedMaxFolderPathDepth: 2);

folderList = new List<string> { $"A{dir}", $"A{dir}B{dir}", $"C{dir}" };
CheckGenerateRecursiveAndParentPathSets(
folderList,
expectedParentFolders: new HashSet<string> { $"A{dir}" },
expectedMaxFolderPathDepth: 2);

folderList = new List<string> { $"A{dir}", $"A{dir}B{dir}", $"C{dir}", $"G{dir}H{dir}I{dir}J{dir}" };
CheckGenerateRecursiveAndParentPathSets(
folderList,
expectedParentFolders: new HashSet<string> { $"A{dir}", $"G{dir}", $"G{dir}H{dir}", $"G{dir}H{dir}I{dir}" },
expectedMaxFolderPathDepth: 4);
}

[TestCase]
public void PathMatchesFoldersReturnsFalseWithNoFoldersList()
{
MockTracer tracer = new MockTracer();
DiffHelper diffHelper = new DiffHelper(tracer, new MockScalarEnlistment(), fileList: new List<string>(), folderList: new List<string>(), includeSymLinks: false);

diffHelper.PathMatchesFolders("a.txt").ShouldBeFalse("Paths (even in root) should not match if there is no folders list");
}

[TestCase]
public void PathMatchesFoldersTests()
{
char dir = Path.DirectorySeparatorChar;
List<string> folderList = new List<string>
{
$"A{dir}",
$"a{dir}b{dir}",
$"C{dir}",
$"G{dir}H{dir}I{dir}J{dir}"
};

MockTracer tracer = new MockTracer();
DiffHelper diffHelper = new DiffHelper(tracer, new MockScalarEnlistment(), fileList: new List<string>(), folderList: folderList, includeSymLinks: false);

diffHelper.PathMatchesFolders("a").ShouldBeTrue("Paths in the root should always be included");
diffHelper.PathMatchesFolders("C.txt").ShouldBeTrue("Paths in the root should always be included");

diffHelper.PathMatchesFolders($"A{dir}D{dir}foo.txt").ShouldBeTrue("Descendants of folders in the list should be included");
diffHelper.PathMatchesFolders($"A{dir}D{dir}E{dir}foo.txt").ShouldBeTrue("Descendants of folders in the list should be included");
diffHelper.PathMatchesFolders($"a{dir}d{dir}e{dir}FOO.txt").ShouldBeTrue("Descendants of folders in the list should be included");
diffHelper.PathMatchesFolders($"C{dir}bar.txt").ShouldBeTrue("Descendants of folders in the list should be included");

diffHelper.PathMatchesFolders($"G{dir}foo.txt").ShouldBeTrue("Immediate children of intermediate folders in the list should be included");
diffHelper.PathMatchesFolders($"G{dir}H{dir}foo.txt").ShouldBeTrue("Immediate children of intermediate folders in the list should be included");
diffHelper.PathMatchesFolders($"G{dir}H{dir}I{dir}foo.txt").ShouldBeTrue("Immediate children of intermediate folders in the list should be included");
diffHelper.PathMatchesFolders($"g{dir}h{dir}i{dir}foo.txt").ShouldBeTrue("Immediate children of intermediate folders in the list should be included");

// Paths that are not children/descendants should not match
diffHelper.PathMatchesFolders($"B{dir}foo.txt").ShouldBeFalse("Files that are not children/descendants should not be included");
diffHelper.PathMatchesFolders($"B{dir}D{dir}baz.txt").ShouldBeFalse("Files that are not children/descendants should not be included");

// Paths that are not descendants (and not immediate children) of intermediate folders should not match
diffHelper.PathMatchesFolders($"G{dir}H{dir}Z{dir}foo.txt").ShouldBeFalse("Files that are not children/descendants should not be included");
diffHelper.PathMatchesFolders($"G{dir}H{dir}I{dir}Z{dir}foo.txt").ShouldBeFalse("Files that are not children/descendants should not be included");
}

private static void CheckGenerateRecursiveAndParentPathSets(
IEnumerable<string> folderList,
HashSet<string> expectedParentFolders,
int expectedMaxFolderPathDepth)
{
HashSet<string> parentFolders;
HashSet<string> recursiveParents;
int maxRecursiveDepth;

HashSet<string> expectedRecursiveFolders = new HashSet<string>(folderList);
DiffHelper.GenerateRecursiveAndParentPathSets(folderList, out parentFolders, out recursiveParents, out maxRecursiveDepth);
parentFolders.ShouldMatchInOrder(expectedParentFolders);
recursiveParents.ShouldMatchInOrder(expectedRecursiveFolders);
maxRecursiveDepth.ShouldEqual(expectedMaxFolderPathDepth);
}

private static string GetDataPath(string fileName)
{
string workingDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
Expand Down

0 comments on commit 36e85ec

Please sign in to comment.