Skip to content

Commit

Permalink
update the regex-based matcher with new logic
Browse files Browse the repository at this point in the history
  • Loading branch information
Konrad Jamrozik committed Jan 20, 2023
1 parent 11c9286 commit 04c6f7e
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 70 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public class CodeownersFileTests
new( "/" , "a/" , true , true ),
new( "/" , "/a/" , true , true ),
new( "/" , "/a/b" , true , true ),
new( "/a" , "a" , true , true ),
new( "/a" , "a" , true , true ),
new( "/a" , "A" , true , false ),
new( "/a" , "/a" , true , true ),
new( "/a" , "a/" , true , false ),
Expand All @@ -49,18 +49,19 @@ public class CodeownersFileTests
new( "a" , "/a/b" , true , false ),
new( "a" , "/a/b/" , true , false ),
new( "a" , "/x/a/b" , false , false ),
new( "/a/" , "a" , true , true ),
new( "/a/" , "/a" , true , true ),
new( "/a/" , "a" , true , false ),
new( "/a/" , "/a" , true , false ),
new( "/a/" , "a/" , true , true ),
new( "/a/" , "/a/" , true , true ),
new( "/a/" , "/a/b" , true , true ),
new( "/a/" , "/a\\ b" , true , true ),
new( "/a/" , "/a\\ b" , true , false ),
new( "/a/" , "/a\\ b/" , true , false ),
new( "/a/" , "/a/b/" , true , true ),
new( "/a/" , "/A/b/" , true , false ),
new( "/a/" , "/x/a/b" , false , false ),
new( "/a/b/" , "/a" , false , false ),
new( "/a/b/" , "/a/" , false , false ),
new( "/a/b/" , "/a/b" , true , true ),
new( "/a/b/" , "/a/b" , true , false ),
new( "/a/b/" , "/a/b/" , true , true ),
new( "/a/b/" , "/a/b/c" , true , true ),
new( "/a/b/" , "/a/b/c/" , true , true ),
Expand All @@ -86,16 +87,19 @@ public class CodeownersFileTests
new( "/*" , "[" , false , true ),
new( "/*" , "]" , false , true ),
new( "/*" , "!" , false , true ),
new( "/**" , "a" , false , true ),
new( "/**" , "a/" , false , true ),
new( "/**" , "a/b" , false , true ),
new( "/**" , "[" , false , true ),
new( "/**" , "]" , false , true ),
new( "/**" , "!" , false , true ),
new( "/**" , "a" , false , false ),
new( "/**" , "a/" , false , false ),
new( "/**" , "a/b" , false , false ),
new( "/**" , "[" , false , false ),
new( "/**" , "]" , false , false ),
new( "/**" , "!" , false , false ),
new( "/a/**" , "a" , false , false ),
new( "/*/**" , "a" , false , false ),
new( "/*/**" , "a/" , false , true ),
new( "/*/**" , "a/b" , false , true ),
new( "/*/**" , "a/" , false , false ),
new( "/*/**" , "a/b" , false , false ),
new( "/*/" , "a" , false , false ),
new( "/*/" , "a/" , false , true ),
new( "/*/b" , "a/b" , false , true ),
new( "/**/a" , "a" , false , true ),
new( "**/a" , "a" , false , true ),
new( "/**/a" , "x/ba" , false , false ),
Expand All @@ -105,36 +109,48 @@ public class CodeownersFileTests
new( "/a/*" , "a/b" , false , true ),
new( "/a/*" , "a/b/" , false , false ),
new( "/a/*" , "a/b/c" , false , false ),
new( "/a/*/" , "a" , false , false ),
new( "/a/*/" , "a/" , false , false ),
new( "/a/*/" , "a/b" , false , false ),
new( "/a/*/" , "a/b/" , false , true ),
new( "/a/*/" , "a/b/c" , false , true ),
new( "/a/**" , "a" , false , false ),
new( "/a/**" , "a/" , false , true ),
new( "/a/**" , "a/b" , false , true ),
new( "/a/**" , "a/b/" , false , true ),
new( "/a/**" , "a/b/c" , false , true ),
new( "/**/a/" , "a" , false , true ),
new( "/a/**" , "a/" , false , false ),
new( "/a/**" , "a/b" , false , false ),
new( "/a/**" , "a/b/" , false , false ),
new( "/a/**" , "a/b/c" , false , false ),
new( "/a/**/" , "a" , false , false ),
new( "/a/**/" , "a/" , false , false ),
new( "/a/**/" , "a/b" , false , false ),
new( "/a/**/" , "a/b/" , false , false ),
new( "/a/**/" , "a/b/c" , false , false ),
new( "/**/a/" , "a" , false , false ),
new( "/**/a/" , "a/" , false , true ),
new( "/**/a/" , "a/b" , false , true ),
new( "/**/b/" , "a/b" , false , true ),
new( "/**/b/" , "a/c" , false , false ),
new( "/a/*/b/" , "a/b" , false , false ),
new( "/a/*/b/" , "a/x/b" , false , true ),
new( "/**/b/" , "a/b" , false , false ),
new( "/**/b/" , "a/b/" , false , true ),
new( "/**/b/" , "a/c/" , false , false ),
new( "/a/*/b/" , "a/b/" , false , false ),
new( "/a/*/b/" , "a/x/b/" , false , true ),
new( "/a/*/b/" , "a/x/b/c" , false , true ),
new( "/a/*/b/" , "a/x/c" , false , false ),
new( "/a/*/b/" , "a/x/y/b" , false , false ),
new( "/a**b/" , "a/x/y/b" , false , true ),
new( "/a/**/b/" , "a/b" , false , true ),
new( "/a/**/b/" , "a/x/b" , false , true ),
new( "/a/**/b/" , "a/x/y/b" , false , true ),
new( "/a**b/" , "a/x/y/b" , false , false ),
new( "/a/**/b/" , "a/b" , false , false ),
new( "/a/**/b/" , "a/b/" , false , true ),
new( "/a/**/b/" , "a/x/b/" , false , true ),
new( "/a/**/b/" , "a/x/y/b/" , false , true ),
new( "/a/**/b/" , "a/x/y/c" , false , false ),
new( "/a/**/b/" , "a-b" , false , false ),
new( "/a/**/b/" , "a-b/" , false , false ),
new( "a/*/*" , "a/b" , false , false ),
new( "/a/*/*/d" , "a/b/c/d" , false , true ),
new( "/a/*/*/d" , "a/b/x/c/d" , false , false ),
new( "/a/**/*/d" , "a/b/x/c/d" , false , true ),
new( "*/*/b" , "a/b" , false , false ),
new( "/a*/" , "abc" , false , true ),
new( "/*b*/" , "abc" , false , true ),
new( "/*c/" , "abc" , false , true ),
new( "/a*c/" , "abc" , false , true ),
new( "/a*/" , "abc/" , false , true ),
new( "/*b*/" , "abc/" , false , true ),
new( "/*c/" , "abc/" , false , true ),
new( "/a*c/" , "abc/" , false , true ),
new( "/**/*x*/" , "a/b/cxy/d" , false , true ),
new( "/a/*.md" , "a/x.md" , false , true ),
new( "/*/*/*.md" , "a/b/x.md" , false , true ),
Expand All @@ -143,7 +159,7 @@ public class CodeownersFileTests
new( "/a.*" , "a.b" , false , true ),
new( "/a.*" , "a.b/" , false , false ),
new( "/a.*" , "x/a.b/" , false , false ),
new( "/a.*/" , "a.b" , false , true ),
new( "/a.*/" , "a.b" , false , false ),
new( "/a.*/" , "a.b/" , false , true ),
new( "/**/*x*/AB/*/CD" , "a/b/cxy/AB/fff/CD" , false, true ),
new( "/**/*x*/AB/*/CD" , "a/b/cxy/AB/ff/ff/CD" , false, false ),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ public void OutputsCodeownersForGlobPath()
["a.txt"] = new CodeownersEntry("/*", new List<string> { "star" }),
["b.txt"] = new CodeownersEntry("/*", new List<string> { "star" }),
["foo/a.txt"] = new CodeownersEntry("/foo/**/a.txt", new List<string> { "foo_2star_a" }),
["foo/b.txt"] = new CodeownersEntry("/**", new List<string> { "2star" }),
["foo/b.txt"] = new CodeownersEntry("/", new List<string> { "slash" }),
["foo/bar/a.txt"] = new CodeownersEntry("/foo/*/a.txt", new List<string> { "foo_star_a_1", "foo_star_a_2" }),
["foo/bar/b.txt"] = new CodeownersEntry("/**", new List<string> { "2star" }),
["foo/bar/b.txt"] = new CodeownersEntry("/", new List<string> { "slash" }),
// @formatter:on
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Azure.Sdk.Tools.RetrieveCodeOwners.Tests.WildcardTests.TestWildcard
#

/** @2star
/ @slash
/* @star
/foo/**/a.txt @foo_2star_a
/foo/*/a.txt @foo_star_a_1 @foo_star_a_2
88 changes: 53 additions & 35 deletions tools/code-owners-parser/CodeOwnersParser/MatchedCodeownersEntry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ private CodeownersEntry GetMatchingCodeownersEntry(
// by the virtue of not ending with "/".

CodeownersEntry matchedEntry = codeownersEntries
.Where(entry => !ContainsUnsupportedCharacters(entry.PathExpression))
.Where(entry => !ContainsUnsupportedFragments(entry.PathExpression))
// Entries listed in CODEOWNERS file below take precedence, hence we read the file from the bottom up.
// By convention, entries in CODEOWNERS should be sorted top-down in the order of:
// - 'RepoPath',
Expand All @@ -105,6 +105,11 @@ private CodeownersEntry GetMatchingCodeownersEntry(

private CodeownersEntry NoMatchCodeownersEntry { get; } = new CodeownersEntry();


private static bool ContainsUnsupportedFragments(string codeownersPath)
=> ContainsUnsupportedCharacters(codeownersPath)
|| ContainsUnsupportedSequences(codeownersPath);

/// <summary>
/// See the comment on unsupportedChars.
/// </summary>
Expand All @@ -121,6 +126,24 @@ private static bool ContainsUnsupportedCharacters(string codeownersPath)
return contains;
}

private static bool ContainsUnsupportedSequences(string codeownersPath)
{
string[] sequences = { "/**", "/**/" };

foreach (var sequence in sequences)
{
if (codeownersPath.EndsWith(sequence))
{
Console.Error.WriteLine(
$"CODEOWNERS path \"{codeownersPath}\" ends with " +
$"unsupported sequence of \"{sequence}\". Replace it with \"/\".");
return true;
}
}

return false;
}

/// <summary>
/// Returns true if the regex expression representing the PathExpression
/// of CODEOWNERS entry matches a prefix of targetPath.
Expand Down Expand Up @@ -148,33 +171,34 @@ private Regex ConvertToRegex(string targetPath, string codeownersPath)
$"\"{DoubleStar}\" or \"{SingleStar}\"");
}

pattern = pattern.Replace("**", DoubleStar);
// We replace "/**/", not "**", because we disallow "**" in any other context.
// Specifically:
// - because we normalize the path to start with "/", any prefix "**/" is
// effectively "/**/";
// - any suffix "/**" is implicit and equivalent to "/", hence we forbid "/**";
// - any inline "**" like "/a**/" or "/**a/" or "/a**b/" would be equivalent to
// single star, hence we forbid double star, to avoid confusion.
pattern = pattern.Replace("/**/", "/" + DoubleStar + "/");
pattern = pattern.Replace("*", SingleStar);

pattern = Regex.Escape(pattern);

// Denote that all paths are absolute by pre-pending "beginning of string" symbol.
pattern = "^" + pattern;

pattern = SetPatternSuffix(targetPath, pattern);
pattern = SetPatternSuffix(pattern);

// Note we can assume there is "/" after "^" because we normalize
// the path by prepending "/" if absent.
pattern = pattern.Replace($"^/{DoubleStar}", "^(.*)");
pattern = pattern.Replace($"/{DoubleStar}/", "((/.*/)|/)");
// This case is necessary to cover:
// - inline **, e.g. "/a**b/";
// - suffix **, e.g. "/a/**".
pattern = pattern.Replace(DoubleStar, "(.*)");
pattern = pattern.Replace(SingleStar, "([^/]*)");

return new Regex(pattern);
}

private static string SetPatternSuffix(string targetPath, string pattern)
private static string SetPatternSuffix(string pattern)
{
// Lack of slash at the end denotes the path is a path to a file,
// per our validation logic.
//
// Note we assume this is path to a file even if the path is invalid,
// even though in such case the path might be a path to a directory.
if (!pattern.EndsWith("/"))
Expand All @@ -183,30 +207,24 @@ private static string SetPatternSuffix(string targetPath, string pattern)
// not a substring, as we are dealing with a file.
pattern += "$";
}
// If the CODEOWNERS pattern is matching only against directories,
// but the targetPath may not be a directory
// (as it doesn't have "/" at the end), we need to trim
// the "/" from the pattern to ensure match is present.
//
// To illustrate this, consider following cases:
//
// 1. 2.
// targetPath: /a , /a*/
// pattern: /a/ , /abc
//
// Without trimming pattern to be "/a" and "/a*" respectively,
// these wouldn't match, but they should.
//
// On the other hand, trimming the suffix "/" when it is not
// necessary won't lead to issues. E.g.:
//
// targetPath: /a/b
// pattern: /a/
//
// Here we still have a prefix match even if we trim pattern to "/a".
else if (pattern.EndsWith("/") && !targetPath.EndsWith("/"))
else // The pattern ends with "/", denoting it is a directory.
{
pattern = pattern.TrimEnd('/');
// We do not append the end-of-string symbol, "$",
// because we want to allow matching to any string suffix,
// which semantically means matching anything in the directory
// represented by the path.

// Note that if the targetPath is exactly the same as pattern,
// except that it is missing the trailing "/", then we assume
// no match. This is because The pattern is matching
// against a directory, which, due to our validation,
// should exist. The targetPath, because it doesn't have trailing "/",
// matches to a file with the same name, hence it cannot exist.
//
// For example:
// pattern: /a/
// targetPath: /a
// result: no match
}

return pattern;
Expand Down Expand Up @@ -243,6 +261,6 @@ private static string NormalizePath(string codeownersPath)
/// file exists in the repository.
/// </summary>
private bool IsCodeownersPathValid(string codeownersPath)
=> codeownersPath.StartsWith("/") && !ContainsUnsupportedCharacters(codeownersPath);
=> codeownersPath.StartsWith("/") && !ContainsUnsupportedFragments(codeownersPath);
}
}

0 comments on commit 04c6f7e

Please sign in to comment.