Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix multiple OpenAPI generation issues with new AST-based generator #18554

Merged
merged 11 commits into from
Jan 31, 2023
3 changes: 3 additions & 0 deletions changelog/18554.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
openapi: Fix many incorrect details in generated API spec, by using better techniques to parse path regexps
```
279 changes: 199 additions & 80 deletions sdk/framework/openapi.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package framework

import (
"errors"
"fmt"
"reflect"
"regexp"
"regexp/syntax"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -194,24 +196,13 @@ var OASStdRespNoContent = &OASResponse{
Description: "empty body",
}

// Regex for handling optional and named parameters in paths, and string cleanup.
// Regex for handling fields in paths, and string cleanup.
// Predefined here to avoid substantial recompilation.

// Capture optional path elements in ungreedy (?U) fashion
// Both "(leases/)?renew" and "(/(?P<name>.+))?" formats are detected
var optRe = regexp.MustCompile(`(?U)\([^(]*\)\?|\(/\(\?P<[^(]*\)\)\?`)

var (
altFieldsGroupRe = regexp.MustCompile(`\(\?P<\w+>\w+(\|\w+)+\)`) // Match named groups that limit options, e.g. "(?<foo>a|b|c)"
altFieldsRe = regexp.MustCompile(`\w+(\|\w+)+`) // Match an options set, e.g. "a|b|c"
altRe = regexp.MustCompile(`\((.*)\|(.*)\)`) // Capture alternation elements, e.g. "(raw/?$|raw/(?P<path>.+))"
altRootsRe = regexp.MustCompile(`^\(([\w\-_]+(?:\|[\w\-_]+)+)\)(/.*)$`) // Pattern starting with alts, e.g. "(root1|root2)/(?P<name>regex)"
cleanCharsRe = regexp.MustCompile("[()^$?]") // Set of regex characters that will be stripped during cleaning
cleanSuffixRe = regexp.MustCompile(`/\?\$?$`) // Path suffix patterns that will be stripped during cleaning
nonWordRe = regexp.MustCompile(`[^\w]+`) // Match a sequence of non-word characters
pathFieldsRe = regexp.MustCompile(`{(\w+)}`) // Capture OpenAPI-style named parameters, e.g. "lookup/{urltoken}",
reqdRe = regexp.MustCompile(`\(?\?P<(\w+)>[^)]*\)?`) // Capture required parameters, e.g. "(?P<name>regex)"
wsRe = regexp.MustCompile(`\s+`) // Match whitespace, to be compressed during cleaning
nonWordRe = regexp.MustCompile(`[^\w]+`) // Match a sequence of non-word characters
pathFieldsRe = regexp.MustCompile(`{(\w+)}`) // Capture OpenAPI-style named parameters, e.g. "lookup/{urltoken}",
wsRe = regexp.MustCompile(`\s+`) // Match whitespace, to be compressed during cleaning
)

// documentPaths parses all paths in a framework.Backend into OpenAPI paths.
Expand All @@ -236,7 +227,22 @@ func documentPath(p *Path, specialPaths *logical.Paths, requestResponsePrefix st
}

// Convert optional parameters into distinct patterns to be processed independently.
paths := expandPattern(p.Pattern)
forceUnpublished := false
paths, err := expandPattern(p.Pattern)
if err != nil {
if errors.Is(err, errUnsupportableRegexpOperationForOpenAPI) {
// Pattern cannot be transformed into sensible OpenAPI paths. In this case, we override the later
// processing to use the regexp, as is, as the path, and behave as if Unpublished was set on every
// operation (meaning the operations will not be represented in the OpenAPI document).
//
// This allows a human reading the OpenAPI document to notice that, yes, a path handler does exist,
// even though it was not able to contribute actual OpenAPI operations.
forceUnpublished = true
paths = []string{p.Pattern}
} else {
return err
}
}

for _, path := range paths {
// Construct a top level PathItem which will be populated as the path is processed.
Expand Down Expand Up @@ -305,7 +311,7 @@ func documentPath(p *Path, specialPaths *logical.Paths, requestResponsePrefix st
// with descriptions, properties and examples from the framework.Path data.
for opType, opHandler := range operations {
props := opHandler.Properties()
if props.Unpublished {
if props.Unpublished || forceUnpublished {
continue
}

Expand Down Expand Up @@ -554,85 +560,198 @@ func specialPathMatch(path string, specialPaths []string) bool {

// expandPattern expands a regex pattern by generating permutations of any optional parameters
// and changing named parameters into their {openapi} equivalents.
func expandPattern(pattern string) []string {
var paths []string
func expandPattern(pattern string) ([]string, error) {
// Happily, the Go regexp library exposes its underlying "parse to AST" functionality, so we can rely on that to do
// the hard work of interpreting the regexp syntax.
rx, err := syntax.Parse(pattern, syntax.Perl)
if err != nil {
// This should be impossible to reach, since regexps have previously been compiled with MustCompile in
// Backend.init.
panic(err)
averche marked this conversation as resolved.
Show resolved Hide resolved
}

// Determine if the pattern starts with an alternation for multiple roots
// example (root1|root2)/(?P<name>regex) -> match['(root1|root2)/(?P<name>regex)','root1|root2','/(?P<name>regex)']
match := altRootsRe.FindStringSubmatch(pattern)
if len(match) == 3 {
var expandedRoots []string
for _, root := range strings.Split(match[1], "|") {
expandedRoots = append(expandedRoots, expandPattern(root+match[2])...)
}
return expandedRoots
paths, err := collectPathsFromRegexpAST(rx)
if err != nil {
return nil, err
}

// GenericNameRegex adds a regex that complicates our parsing. It is much easier to
// detect and remove it now than to compensate for in the other regexes.
//
// example: (?P<foo>\\w(([\\w-.]+)?\\w)?) -> (?P<foo>)
base := GenericNameRegex("")
start := strings.Index(base, ">")
end := strings.LastIndex(base, ")")
regexToRemove := ""
if start != -1 && end != -1 && end > start {
regexToRemove = base[start+1 : end]
return paths, nil
}

type pathCollector struct {
strings.Builder
conditionalSlashAppendedAtLength int
}

// collectPathsFromRegexpAST performs a depth-first recursive walk through a regexp AST, collecting an OpenAPI-style
// path as it goes.
//
// Each time it encounters alternation (a|b) or an optional part (a?), it forks its processing to produce additional
// results, to account for each possibility. Note: This does mean that an input pattern with lots of these regexp
// features can produce a lot of different OpenAPI endpoints. At the time of writing, the most complex known example is
//
// "issuer/" + framework.GenericNameRegex(issuerRefParam) + "/crl(/pem|/der|/delta(/pem|/der)?)?"
//
// in the PKI secrets engine which expands to 6 separate paths.
//
// Each named capture group - i.e. (?P<name>something here) - is replaced with an OpenAPI parameter - i.e. {name} - and
// the subtree of regexp AST inside the parameter is completely skipped.
func collectPathsFromRegexpAST(rx *syntax.Regexp) ([]string, error) {
pathCollectors, err := collectPathsFromRegexpASTInternal(rx, []*pathCollector{{}})
if err != nil {
return nil, err
}
pattern = strings.ReplaceAll(pattern, regexToRemove, "")

// Simplify named fields that have limited options, e.g. (?P<foo>a|b|c) -> (<P<foo>.+)
pattern = altFieldsGroupRe.ReplaceAllStringFunc(pattern, func(s string) string {
return altFieldsRe.ReplaceAllString(s, ".+")
})

// Initialize paths with the original pattern or the halves of an
// alternation, which is also present in some patterns.
matches := altRe.FindAllStringSubmatch(pattern, -1)
if len(matches) > 0 {
paths = []string{matches[0][1], matches[0][2]}
} else {
paths = []string{pattern}
paths := make([]string, 0, len(pathCollectors))
for _, collector := range pathCollectors {
if collector.conditionalSlashAppendedAtLength != collector.Len() {
paths = append(paths, collector.String())
}
}
return paths, nil
}

// Expand all optional regex elements into two paths. This approach is really only useful up to 2 optional
// groups, but we probably don't want to deal with the exponential increase beyond that anyway.
for i := 0; i < len(paths); i++ {
p := paths[i]
var errUnsupportableRegexpOperationForOpenAPI = errors.New("path regexp uses an operation that cannot be translated to an OpenAPI pattern")

// match is a 2-element slice that will have a start and end index
// for the left-most match of a regex of form: (lease/)?
match := optRe.FindStringIndex(p)
func collectPathsFromRegexpASTInternal(rx *syntax.Regexp, appendingTo []*pathCollector) ([]*pathCollector, error) {
var err error

if match != nil {
// create a path that includes the optional element but without
// parenthesis or the '?' character.
paths[i] = p[:match[0]] + p[match[0]+1:match[1]-2] + p[match[1]:]
// Depending on the type of this regexp AST node (its Op, i.e. operation), figure out whether it contributes any
// characters to the URL path, and whether we need to recurse through child AST nodes.
//
// Each element of the appendingTo slice tracks a separate path, defined by the alternatives chosen when traversing
// the | and ? conditional regexp features, and new elements are added as each of these features are traversed.
//
// To share this slice across multiple recursive calls of this function, it is passed down as a parameter to each
// recursive call, potentially modified throughout this switch block, and passed back up as a return value at the
// end of this function - the parent call uses the return value to update its own local variable.
switch rx.Op {
maxb marked this conversation as resolved.
Show resolved Hide resolved

// These AST operations are leaf nodes (no children), that match zero characters, so require no processing at all
case syntax.OpEmptyMatch: // e.g. (?:)
case syntax.OpBeginLine: // i.e. ^ when (?m)
case syntax.OpEndLine: // i.e. $ when (?m)
case syntax.OpBeginText: // i.e. \A, or ^ when (?-m)
case syntax.OpEndText: // i.e. \z, or $ when (?-m)
case syntax.OpWordBoundary: // i.e. \b
case syntax.OpNoWordBoundary: // i.e. \B

// OpConcat simply represents multiple parts of the pattern appearing one after the other, so just recurse through
// those pieces.
case syntax.OpConcat:
for _, child := range rx.Sub {
appendingTo, err = collectPathsFromRegexpASTInternal(child, appendingTo)
if err != nil {
return nil, err
}
}

// create a path that excludes the optional element.
paths = append(paths, p[:match[0]]+p[match[1]:])
i--
// OpLiteral is a literal string in the pattern - append it to the paths we are building.
case syntax.OpLiteral:
for _, collector := range appendingTo {
collector.WriteString(string(rx.Rune))
}
}

// Replace named parameters (?P<foo>) with {foo}
var replacedPaths []string
// OpAlternate, i.e. a|b, means we clone all of the pathCollector instances we are currently accumulating paths
// into, and independently recurse through each alternate option.
case syntax.OpAlternate: // i.e |
var totalAppendingTo []*pathCollector
lastIndex := len(rx.Sub) - 1
for index, child := range rx.Sub {
var childAppendingTo []*pathCollector
if index == lastIndex {
// Optimization: last time through this loop, we can simply re-use the existing set of pathCollector
// instances, as we no longer need to preserve them unmodified to make further copies of.
childAppendingTo = appendingTo
} else {
for _, collector := range appendingTo {
newCollector := new(pathCollector)
newCollector.WriteString(collector.String())
newCollector.conditionalSlashAppendedAtLength = collector.conditionalSlashAppendedAtLength
childAppendingTo = append(childAppendingTo, newCollector)
}
}
childAppendingTo, err = collectPathsFromRegexpASTInternal(child, childAppendingTo)
if err != nil {
return nil, err
}
totalAppendingTo = append(totalAppendingTo, childAppendingTo...)
}
appendingTo = totalAppendingTo

// OpQuest, i.e. a?, is much like an alternation between exactly two options, one of which is the empty string.
case syntax.OpQuest:
child := rx.Sub[0]
var childAppendingTo []*pathCollector
for _, collector := range appendingTo {
newCollector := new(pathCollector)
newCollector.WriteString(collector.String())
newCollector.conditionalSlashAppendedAtLength = collector.conditionalSlashAppendedAtLength
childAppendingTo = append(childAppendingTo, newCollector)
}
childAppendingTo, err = collectPathsFromRegexpASTInternal(child, childAppendingTo)
if err != nil {
return nil, err
}
appendingTo = append(appendingTo, childAppendingTo...)

// Many Vault path patterns end with `/?` to accept paths that end with or without a slash. Our current
// convention for generating the OpenAPI is to strip away these slashes. To do that, this very special case
// detects when we just appended a single conditional slash, and records the length of the path at this point,
// so we can later discard this path variant, if nothing else is appended to it later.
if child.Op == syntax.OpLiteral && string(child.Rune) == "/" {
for _, collector := range childAppendingTo {
collector.conditionalSlashAppendedAtLength = collector.Len()
}
}

for _, path := range paths {
result := reqdRe.FindAllStringSubmatch(path, -1)
if result != nil {
for _, p := range result {
par := p[1]
path = strings.Replace(path, p[0], fmt.Sprintf("{%s}", par), 1)
// OpCapture, i.e. ( ) or (?P<name> ), a capturing group
case syntax.OpCapture:
if rx.Name == "" {
// In Vault, an unnamed capturing group is not actually used for capturing.
// We treat it exactly the same as OpConcat.
for _, child := range rx.Sub {
appendingTo, err = collectPathsFromRegexpASTInternal(child, appendingTo)
if err != nil {
return nil, err
}
}
} else {
// A named capturing group is replaced with the OpenAPI parameter syntax, and the regexp inside the group
// is NOT added to the OpenAPI path.
for _, builder := range appendingTo {
builder.WriteRune('{')
builder.WriteString(rx.Name)
builder.WriteRune('}')
}
}
// Final cleanup
path = cleanSuffixRe.ReplaceAllString(path, "")
path = cleanCharsRe.ReplaceAllString(path, "")
replacedPaths = append(replacedPaths, path)

// Any other kind of operation is a problem, and will trigger an error, resulting in the pattern being left out of
// the OpenAPI entirely - that's better than generating a path which is incorrect.
//
// The Op types we expect to hit the default condition are:
//
// OpCharClass - i.e. [something]
// OpAnyCharNotNL - i.e. .
// OpAnyChar - i.e. (?s:.)
// OpStar - i.e. *
// OpPlus - i.e. +
// OpRepeat - i.e. {N}, {N,M}, etc.
//
// In any of these conditions, there is no sensible translation of the path to OpenAPI syntax. (Note, this only
// applies to these appearing outside of a named capture group, otherwise they are handled in the previous case.)
//
// At the time of writing, the only pattern in the builtin Vault plugins that hits this codepath is the ".*"
// pattern in the KVv2 secrets engine, which is not a valid path, but rather, is a catch-all used to implement
// custom error handling behaviour to guide users who attempt to treat a KVv2 as a KVv1. It is already marked as
// Unpublished, so is withheld from the OpenAPI anyway.
//
// For completeness, one other Op type exists, OpNoMatch, which is never generated by syntax.Parse - only by
// subsequent Simplify in preparation to Compile, which is not used here.
default:
return nil, errUnsupportableRegexpOperationForOpenAPI
}

return replacedPaths
return appendingTo, nil
}

// schemaType is a subset of the JSON Schema elements used as a target
Expand Down
Loading