Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix multiple OpenAPI generation issues with new AST-based generator #18554

Merged
merged 11 commits into from
Jan 31, 2023
3 changes: 3 additions & 0 deletions changelog/18554.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
openapi: Fix many incorrect details in generated API spec, by using better techniques to parse path regexps
```
252 changes: 173 additions & 79 deletions sdk/framework/openapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"reflect"
"regexp"
"regexp/syntax"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -194,24 +195,13 @@ var OASStdRespNoContent = &OASResponse{
Description: "empty body",
}

// Regex for handling optional and named parameters in paths, and string cleanup.
// Regex for handling fields in paths, and string cleanup.
// Predefined here to avoid substantial recompilation.

// Capture optional path elements in ungreedy (?U) fashion
// Both "(leases/)?renew" and "(/(?P<name>.+))?" formats are detected
var optRe = regexp.MustCompile(`(?U)\([^(]*\)\?|\(/\(\?P<[^(]*\)\)\?`)

var (
altFieldsGroupRe = regexp.MustCompile(`\(\?P<\w+>\w+(\|\w+)+\)`) // Match named groups that limit options, e.g. "(?<foo>a|b|c)"
altFieldsRe = regexp.MustCompile(`\w+(\|\w+)+`) // Match an options set, e.g. "a|b|c"
altRe = regexp.MustCompile(`\((.*)\|(.*)\)`) // Capture alternation elements, e.g. "(raw/?$|raw/(?P<path>.+))"
altRootsRe = regexp.MustCompile(`^\(([\w\-_]+(?:\|[\w\-_]+)+)\)(/.*)$`) // Pattern starting with alts, e.g. "(root1|root2)/(?P<name>regex)"
cleanCharsRe = regexp.MustCompile("[()^$?]") // Set of regex characters that will be stripped during cleaning
cleanSuffixRe = regexp.MustCompile(`/\?\$?$`) // Path suffix patterns that will be stripped during cleaning
nonWordRe = regexp.MustCompile(`[^a-zA-Z0-9]+`) // Match a sequence of non-word characters
pathFieldsRe = regexp.MustCompile(`{(\w+)}`) // Capture OpenAPI-style named parameters, e.g. "lookup/{urltoken}",
reqdRe = regexp.MustCompile(`\(?\?P<(\w+)>[^)]*\)?`) // Capture required parameters, e.g. "(?P<name>regex)"
wsRe = regexp.MustCompile(`\s+`) // Match whitespace, to be compressed during cleaning
nonWordRe = regexp.MustCompile(`[^a-zA-Z0-9]+`) // Match a sequence of non-word characters
pathFieldsRe = regexp.MustCompile(`{(\w+)}`) // Capture OpenAPI-style named parameters, e.g. "lookup/{urltoken}",
wsRe = regexp.MustCompile(`\s+`) // Match whitespace, to be compressed during cleaning
)

// documentPaths parses all paths in a framework.Backend into OpenAPI paths.
Expand Down Expand Up @@ -575,84 +565,188 @@ func specialPathMatch(path string, specialPaths []string) bool {
// expandPattern expands a regex pattern by generating permutations of any optional parameters
// and changing named parameters into their {openapi} equivalents.
func expandPattern(pattern string) []string {
var paths []string

// Determine if the pattern starts with an alternation for multiple roots
// example (root1|root2)/(?P<name>regex) -> match['(root1|root2)/(?P<name>regex)','root1|root2','/(?P<name>regex)']
match := altRootsRe.FindStringSubmatch(pattern)
if len(match) == 3 {
var expandedRoots []string
for _, root := range strings.Split(match[1], "|") {
expandedRoots = append(expandedRoots, expandPattern(root+match[2])...)
}
return expandedRoots
// Happily, the Go regexp library exposes its underlying "parse to AST" functionality, so we can rely on that to do
// the hard work of interpreting the regexp syntax.
rx, err := syntax.Parse(pattern, syntax.Perl)
if err != nil {
// This should be impossible to reach, since regexps have previously been compiled with MustCompile in
// Backend.init.
panic(err)
averche marked this conversation as resolved.
Show resolved Hide resolved
}

// GenericNameRegex adds a regex that complicates our parsing. It is much easier to
// detect and remove it now than to compensate for in the other regexes.
//
// example: (?P<foo>\\w(([\\w-.]+)?\\w)?) -> (?P<foo>)
base := GenericNameRegex("")
start := strings.Index(base, ">")
end := strings.LastIndex(base, ")")
regexToRemove := ""
if start != -1 && end != -1 && end > start {
regexToRemove = base[start+1 : end]
}
pattern = strings.ReplaceAll(pattern, regexToRemove, "")

// Simplify named fields that have limited options, e.g. (?P<foo>a|b|c) -> (<P<foo>.+)
pattern = altFieldsGroupRe.ReplaceAllStringFunc(pattern, func(s string) string {
return altFieldsRe.ReplaceAllString(s, ".+")
})

// Initialize paths with the original pattern or the halves of an
// alternation, which is also present in some patterns.
matches := altRe.FindAllStringSubmatch(pattern, -1)
if len(matches) > 0 {
paths = []string{matches[0][1], matches[0][2]}
} else {
paths = []string{pattern}
paths, err := collectPathsFromRegexpAST(rx)
if err != nil {
// Pattern cannot be transformed into sensible OpenAPI paths, so drop it entirely.
// See lengthier comment inside collectPathsFromRegexpASTInternal below for more detailed discussion.
return nil
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was undecided as to whether I should add trace-level logging at this point.

On the plus side: It makes it easier for plugin developers to find out this condition is being hit.

On the minus side: It is additional logging which is uninteresting to server operators, as there is nothing that can be done to change it other than rebuilding the Vault or plugin binary, and it will trigger every time sys/internal/specs/openapi or any relevant HelpOperation is invoked.

One alternative approach here would be to move the check to Backend.init, which is already set up to panic on initialisation if a backend author writes an empty or uncompilable path pattern - this could be expanded to include impossible-to-OpenAPI patterns not also marked as Unpublished. (I furthermore have another open PR, #18492, which seeks to add a check for CreateOperation being defined without an ExistenceCheck at the same point.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps a reasonable compromise could be added to change this error:

return nil, fmt.Errorf("pattern could not be translated for OpenAPI due to %v operation", rx.Op)

into a named error and then capturing it here with if errors.Is(...) { return nil }. The advantage of this approach is that the code will be explicit about the intention of skipping specific regex operations. If collectPathsFromRegexpASTInternal is modified in the future to return other errors, those will be propagated correctly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

}

// Expand all optional regex elements into two paths. This approach is really only useful up to 2 optional
// groups, but we probably don't want to deal with the exponential increase beyond that anyway.
for i := 0; i < len(paths); i++ {
p := paths[i]

// match is a 2-element slice that will have a start and end index
// for the left-most match of a regex of form: (lease/)?
match := optRe.FindStringIndex(p)
return paths
maxb marked this conversation as resolved.
Show resolved Hide resolved
}

if match != nil {
// create a path that includes the optional element but without
// parenthesis or the '?' character.
paths[i] = p[:match[0]] + p[match[0]+1:match[1]-2] + p[match[1]:]
type pathCollector struct {
strings.Builder
conditionalSlashAppendedAtLength int
}

// create a path that excludes the optional element.
paths = append(paths, p[:match[0]]+p[match[1]:])
i--
// collectPathsFromRegexpAST performs a depth-first recursive walk through a regexp AST, collecting an OpenAPI-style
// path as it goes.
//
// Each time it encounters alternation (a|b) or an optional part (a?), it forks its processing to produce additional
// results, to account for each possibility. Note: This does mean that an input pattern with lots of these regexp
// features can produce a lot of different OpenAPI endpoints. At the time of writing, the most complex known example is
//
// "issuer/" + framework.GenericNameRegex(issuerRefParam) + "/crl(/pem|/der|/delta(/pem|/der)?)?"
//
// in the PKI secrets engine which expands to 6 separate paths.
//
// Each named capture group - i.e. (?P<name>something here) - is replaced with an OpenAPI parameter - i.e. {name} - and
// the subtree of regexp AST inside the parameter is completely skipped.
func collectPathsFromRegexpAST(rx *syntax.Regexp) ([]string, error) {
pathCollectors, err := collectPathsFromRegexpASTInternal(rx, []*pathCollector{{}})
if err != nil {
return nil, err
}
paths := make([]string, 0, len(pathCollectors))
for _, collector := range pathCollectors {
if collector.conditionalSlashAppendedAtLength != collector.Len() {
paths = append(paths, collector.String())
}
}
return paths, nil
}

// Replace named parameters (?P<foo>) with {foo}
var replacedPaths []string
func collectPathsFromRegexpASTInternal(rx *syntax.Regexp, appendingTo []*pathCollector) ([]*pathCollector, error) {
var err error

switch rx.Op {
maxb marked this conversation as resolved.
Show resolved Hide resolved

// These AST operations are leaf nodes (no children), that match zero characters, so require no processing at all
case syntax.OpEmptyMatch: // e.g. (?:)
case syntax.OpBeginLine: // i.e. ^ when (?m)
case syntax.OpEndLine: // i.e. $ when (?m)
case syntax.OpBeginText: // i.e. \A, or ^ when (?-m)
case syntax.OpEndText: // i.e. \z, or $ when (?-m)
case syntax.OpWordBoundary: // i.e. \b
case syntax.OpNoWordBoundary: // i.e. \B

// OpConcat simply represents multiple parts of the pattern appearing one after the other, so just recurse through
// those pieces.
case syntax.OpConcat:
for _, child := range rx.Sub {
appendingTo, err = collectPathsFromRegexpASTInternal(child, appendingTo)
if err != nil {
return nil, err
}
}

for _, path := range paths {
result := reqdRe.FindAllStringSubmatch(path, -1)
if result != nil {
for _, p := range result {
par := p[1]
path = strings.Replace(path, p[0], fmt.Sprintf("{%s}", par), 1)
// OpLiteral is a literal string in the pattern - append it to the paths we are building.
case syntax.OpLiteral:
for _, collector := range appendingTo {
collector.WriteString(string(rx.Rune))
}

// OpAlternate, i.e. a|b, means we clone all of the pathCollector instances we are currently accumulating paths
// into, and independently recurse through each alternate option.
case syntax.OpAlternate: // i.e |
var totalAppendingTo []*pathCollector
lastIndex := len(rx.Sub) - 1
for index, child := range rx.Sub {
var childAppendingTo []*pathCollector
if index == lastIndex {
// Optimization: last time through this loop, we can simply re-use the existing set of pathCollector
// instances, as we no longer need to preserve them unmodified to make further copies of.
childAppendingTo = appendingTo
} else {
for _, collector := range appendingTo {
newCollector := new(pathCollector)
newCollector.WriteString(collector.String())
newCollector.conditionalSlashAppendedAtLength = collector.conditionalSlashAppendedAtLength
childAppendingTo = append(childAppendingTo, newCollector)
}
}
childAppendingTo, err = collectPathsFromRegexpASTInternal(child, childAppendingTo)
if err != nil {
return nil, err
}
totalAppendingTo = append(totalAppendingTo, childAppendingTo...)
}
appendingTo = totalAppendingTo

// OpQuest, i.e. a?, is much like an alternation between exactly two options, one of which is the empty string.
case syntax.OpQuest:
child := rx.Sub[0]
var childAppendingTo []*pathCollector
for _, collector := range appendingTo {
newCollector := new(pathCollector)
newCollector.WriteString(collector.String())
newCollector.conditionalSlashAppendedAtLength = collector.conditionalSlashAppendedAtLength
childAppendingTo = append(childAppendingTo, newCollector)
}
// Final cleanup
path = cleanSuffixRe.ReplaceAllString(path, "")
path = cleanCharsRe.ReplaceAllString(path, "")
replacedPaths = append(replacedPaths, path)
childAppendingTo, err = collectPathsFromRegexpASTInternal(child, childAppendingTo)
if err != nil {
return nil, err
}
appendingTo = append(appendingTo, childAppendingTo...)

// Many Vault path patterns end with `/?` to accept paths that end with or without a slash. Our current
// convention for generating the OpenAPI is to strip away these slashes. To do that, this very special case
// detects when we just appended a single conditional slash, and records the length of the path at this point,
// so we can later discard this path variant, if nothing else is appended to it later.
if child.Op == syntax.OpLiteral && string(child.Rune) == "/" {
for _, collector := range childAppendingTo {
collector.conditionalSlashAppendedAtLength = collector.Len()
}
}

// OpCapture, i.e. ( ) or (?P<name> ), a capturing group
case syntax.OpCapture:
if rx.Name == "" {
// In Vault, an unnamed capturing group is not actually used for capturing.
// We treat it exactly the same as OpConcat.
for _, child := range rx.Sub {
appendingTo, err = collectPathsFromRegexpASTInternal(child, appendingTo)
if err != nil {
return nil, err
}
}
} else {
// A named capturing group is replaced with the OpenAPI parameter syntax, and the regexp inside the group
// is NOT added to the OpenAPI path.
for _, builder := range appendingTo {
builder.WriteRune('{')
builder.WriteString(rx.Name)
builder.WriteRune('}')
}
}

// Any other kind of operation is a problem, and will trigger an error, resulting in the pattern being left out of
// the OpenAPI entirely - that's better than generating a path which is incorrect.
//
// The Op types we expect to hit the default condition are:
//
// OpCharClass - i.e. [something]
// OpAnyCharNotNL - i.e. .
// OpAnyChar - i.e. (?s:.)
// OpStar - i.e. *
// OpPlus - i.e. +
// OpRepeat - i.e. {N}, {N,M}, etc.
//
// In any of these conditions, there is no sensible translation of the path to OpenAPI syntax. (Note, this only
// applies to these appearing outside of a named capture group, otherwise they are handled in the previous case.)
//
// At the time of writing, the only pattern in the builtin Vault plugins that hits this codepath is the ".*"
// pattern in the KVv2 secrets engine, which is not a valid path, but rather, is a catch-all used to implement
// custom error handling behaviour to guide users who attempt to treat a KVv2 as a KVv2. It is already marked as
maxb marked this conversation as resolved.
Show resolved Hide resolved
// Unpublished, so is withheld from the OpenAPI anyway.
//
// For completeness, one other Op type exists, OpNoMatch, which is never generated by syntax.Parse - only by
// subsequent Simplify in preparation to Compile, which is not used here.
default:
return nil, fmt.Errorf("pattern could not be translated for OpenAPI due to %v operation", rx.Op)
}

return replacedPaths
return appendingTo, nil
}

// schemaType is a subset of the JSON Schema elements used as a target
Expand Down
84 changes: 0 additions & 84 deletions sdk/framework/openapi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,75 +18,6 @@ import (
)

func TestOpenAPI_Regex(t *testing.T) {
t.Run("Required", func(t *testing.T) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are all of these tests just not doing anything any more, or is there a need for more test cases to be added back in place of these?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of the tests being deleted, test the behaviour of various ad-hoc regexps, which were being used to parse other regexps (!).

They have all been deleted, and their job replaced by regexp/syntax from the Go standard library.

The new code that has been written to process the output of the stdlib's regexp/syntax is already well-exercised by the existing TestOpenAPI_ExpandPattern - although, I could add some additional inputs and expected outputs to that test, which would fail with the old implementation, but pass with this rewritten one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could add some additional inputs and expected outputs to that test, which would fail with the old implementation, but pass with this rewritten one.

These would be awesome! I would also love to see some of the edge cases tested like ".*" that you mentioned in the comments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

tests := []struct {
input string
captures []string
}{
{`/foo/bar/(?P<val>.*)`, []string{"val"}},
{`/foo/bar/` + GenericNameRegex("val"), []string{"val"}},
{`/foo/bar/` + GenericNameRegex("first") + "/b/" + GenericNameRegex("second"), []string{"first", "second"}},
{`/foo/bar`, []string{}},
}

for _, test := range tests {
result := reqdRe.FindAllStringSubmatch(test.input, -1)
if len(result) != len(test.captures) {
t.Fatalf("Capture error (%s): expected %d matches, actual: %d", test.input, len(test.captures), len(result))
}

for i := 0; i < len(result); i++ {
if result[i][1] != test.captures[i] {
t.Fatalf("Capture error (%s): expected %s, actual: %s", test.input, test.captures[i], result[i][1])
}
}
}
})
t.Run("Optional", func(t *testing.T) {
input := "foo/(maybe/)?bar"
expStart := len("foo/")
expEnd := len(input) - len("bar")

match := optRe.FindStringIndex(input)
if diff := deep.Equal(match, []int{expStart, expEnd}); diff != nil {
t.Fatal(diff)
}

input = "/foo/maybe/bar"
match = optRe.FindStringIndex(input)
if match != nil {
t.Fatalf("Expected nil match (%s), got %+v", input, match)
}
})
t.Run("Alternation", func(t *testing.T) {
input := `(raw/?$|raw/(?P<path>.+))`

matches := altRe.FindAllStringSubmatch(input, -1)
exp1 := "raw/?$"
exp2 := "raw/(?P<path>.+)"
if matches[0][1] != exp1 || matches[0][2] != exp2 {
t.Fatalf("Capture error. Expected %s and %s, got %v", exp1, exp2, matches[0][1:])
}

input = `/foo/bar/` + GenericNameRegex("val")

matches = altRe.FindAllStringSubmatch(input, -1)
if matches != nil {
t.Fatalf("Expected nil match (%s), got %+v", input, matches)
}
})
t.Run("Alternation Fields", func(t *testing.T) {
input := `/foo/bar/(?P<type>auth|database|secret)/(?P<blah>a|b)`

act := altFieldsGroupRe.ReplaceAllStringFunc(input, func(s string) string {
return altFieldsRe.ReplaceAllString(s, ".+")
})

exp := "/foo/bar/(?P<type>.+)/(?P<blah>.+)"
if act != exp {
t.Fatalf("Replace error. Expected %s, got %v", exp, act)
}
})
t.Run("Path fields", func(t *testing.T) {
input := `/foo/bar/{inner}/baz/{outer}`

Expand All @@ -111,21 +42,6 @@ func TestOpenAPI_Regex(t *testing.T) {
regex *regexp.Regexp
output string
}{
{
input: `ab?cde^fg(hi?j$k`,
regex: cleanCharsRe,
output: "abcdefghijk",
},
{
input: `abcde/?`,
regex: cleanSuffixRe,
output: "abcde",
},
{
input: `abcde/?$`,
regex: cleanSuffixRe,
output: "abcde",
},
{
input: `abcde`,
regex: wsRe,
Expand Down
Loading