From ccaee22204d65e751cc1840ce9b3816139045a6d Mon Sep 17 00:00:00 2001 From: Roger Peppe Date: Fri, 13 Dec 2024 14:11:52 +0000 Subject: [PATCH] encoding/jsonschema: add structBuilder type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This type plays a key role in the upcoming jsonschema refactoring: it moves the generation of the final syntax from an ad-hoc approach to a more general approach that allows placing a given piece of syntax anywhere in the final result. Even though it's not exported, the functionality stands alone and could potentially be moved into another package in time. Signed-off-by: Roger Peppe Change-Id: I68054a40d420ed1b27a887ee16395d15e21c97ee Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1205772 Reviewed-by: Daniel Martí Unity-Result: CUE porcuepine TryBot-Result: CUEcueckoo --- encoding/jsonschema/ref.go | 27 --- encoding/jsonschema/structbuilder.go | 281 ++++++++++++++++++++++ encoding/jsonschema/structbuilder_test.go | 110 +++++++++ encoding/jsonschema/util.go | 171 +++++++++++++ 4 files changed, 562 insertions(+), 27 deletions(-) create mode 100644 encoding/jsonschema/structbuilder.go create mode 100644 encoding/jsonschema/structbuilder_test.go create mode 100644 encoding/jsonschema/util.go diff --git a/encoding/jsonschema/ref.go b/encoding/jsonschema/ref.go index 9bfee63e3c5..bef86d3fac1 100644 --- a/encoding/jsonschema/ref.go +++ b/encoding/jsonschema/ref.go @@ -428,30 +428,3 @@ func DefaultMapURL(u *url.URL) (string, cue.Path, error) { } return u.Host + p, cue.Path{}, nil } - -// pathRefSyntax returns the syntax for an expression which -// looks up the path inside the given root expression's value. -// It returns an error if the path contains any elements with -// type [cue.OptionalConstraint], [cue.RequiredConstraint], or [cue.PatternConstraint], -// none of which are expressible as a CUE index expression. -// -// TODO implement this properly and move to a method on [cue.Path]. -func pathRefSyntax(cuePath cue.Path, root ast.Expr) (ast.Expr, error) { - expr := root - for _, sel := range cuePath.Selectors() { - switch sel.LabelType() { - case cue.StringLabel, cue.DefinitionLabel: - ident := sel.String() - if !ast.IsValidIdent(ident) { - return nil, fmt.Errorf("cannot form expression for path %q", cuePath) - } - expr = &ast.SelectorExpr{ - X: expr, - Sel: ast.NewIdent(sel.String()), - } - default: - return nil, fmt.Errorf("cannot form expression for path %q", cuePath) - } - } - return expr, nil -} diff --git a/encoding/jsonschema/structbuilder.go b/encoding/jsonschema/structbuilder.go new file mode 100644 index 00000000000..1909680bdb1 --- /dev/null +++ b/encoding/jsonschema/structbuilder.go @@ -0,0 +1,281 @@ +package jsonschema + +import ( + "cmp" + "fmt" + + "cuelang.org/go/cue" + "cuelang.org/go/cue/ast" + "cuelang.org/go/cue/token" +) + +// structBuilder builds a struct value incrementally by +// putting values for its component paths. +// The [structBuilder.getRef] method can be used +// to obtain reliable references into the resulting struct. +type structBuilder struct { + root structBuilderNode + + // refIdents records all the identifiers that refer to entries + // at the top level of the struct, keyed by the selector + // they're referring to. + // + // The [Ident.Node] field needs to refer to the field value rather + // than the field label, and we don't know that until the syntax + // method has been invoked, so we fix up the [Ident.Node] fields when + // that happens. + refIdents map[cue.Selector][]*ast.Ident + + // rootRefIdents is like refIdents but for references to the + // struct root itself. + rootRefIdents []*ast.Ident +} + +// structBuilderNode represents one node in the tree of values +// being built. +type structBuilderNode struct { + // value holds the value associated with the node, if any. + // This does not include entries added underneath it by + // [structBuilder.put]. + value ast.Expr + + // comment holds any doc comment associated with the value. + comment *ast.CommentGroup + + // entries holds the children of this node, keyed by the + // name of each child's struct field selector. + entries map[cue.Selector]*structBuilderNode +} + +// put associates value with the given path. It reports whether +// the value was successfully put, returning false if a value +// already exists for the path. +func (b *structBuilder) put(p cue.Path, value ast.Expr, comment *ast.CommentGroup) bool { + e := b.entryForPath(p) + if e.value != nil { + // redefinition + return false + } + e.value = value + e.comment = comment + return true +} + +const rootIdentName = "_schema" + +// getRef returns CUE syntax for a reference to the path p within b. +// It ensures that, if possible, the identifier at the start of the +// reference expression has the correct target node. +func (b *structBuilder) getRef(p cue.Path) (ast.Expr, error) { + if err := p.Err(); err != nil { + return nil, fmt.Errorf("invalid path %v", p) + } + sels := p.Selectors() + if len(sels) == 0 { + // There's no natural name for the root element, + // so use an arbitrary one. + ref := ast.NewIdent(rootIdentName) + + b.rootRefIdents = append(b.rootRefIdents, ref) + return ref, nil + } + base, err := labelForSelector(sels[0]) + if err != nil { + return nil, err + } + baseExpr, ok := base.(*ast.Ident) + if !ok { + return nil, fmt.Errorf("initial element of path %q must be expressed as an identifier", p) + } + // The base identifier needs to refer to the + // first element of the path; the rest doesn't matter. + if b.refIdents == nil { + b.refIdents = make(map[cue.Selector][]*ast.Ident) + } + b.refIdents[sels[0]] = append(b.refIdents[sels[0]], baseExpr) + return pathRefSyntax(cue.MakePath(sels[1:]...), baseExpr) +} + +func (b *structBuilder) entryForPath(p cue.Path) *structBuilderNode { + if err := p.Err(); err != nil { + panic(fmt.Errorf("invalid path %v", p)) + } + sels := p.Selectors() + + n := &b.root + for _, sel := range sels { + if n.entries == nil { + n.entries = make(map[cue.Selector]*structBuilderNode) + } + n1, ok := n.entries[sel] + if !ok { + n1 = &structBuilderNode{} + n.entries[sel] = n1 + } + n = n1 + } + return n +} + +// syntax returns an expression for the whole struct. +func (b *structBuilder) syntax() (*ast.File, error) { + var db declBuilder + if err := b.appendDecls(&b.root, &db); err != nil { + return nil, err + } + // Fix up references (we don't need to do this if the root is a single + // expression, because that only happens when there's nothing + // to refer to). + for _, decl := range db.decls { + if f, ok := decl.(*ast.Field); ok { + for _, ident := range b.refIdents[selectorForLabel(f.Label)] { + ident.Node = f.Value + } + } + } + + var f *ast.File + if len(b.rootRefIdents) == 0 { + // No reference to root, so can use declarations as they are. + f = &ast.File{ + Decls: db.decls, + } + } else { + rootExpr := exprFromDecls(db.decls) + // Fix up references to the root node. + for _, ident := range b.rootRefIdents { + ident.Node = rootExpr + } + rootRef, err := b.getRef(cue.Path{}) + if err != nil { + return nil, err + } + f = &ast.File{ + Decls: []ast.Decl{ + &ast.EmbedDecl{Expr: rootRef}, + &ast.Field{ + Label: ast.NewIdent(rootIdentName), + Value: rootExpr, + }, + }, + } + } + if b.root.comment != nil { + // If Doc is true, as it is for comments on fields, + // then the CUE formatting will join it to any import + // directives, which is not what we want, as then + // it will no longer appear as a comment on the file. + // So set Doc to false to prevent that happening. + b.root.comment.Doc = false + ast.SetComments(f, []*ast.CommentGroup{b.root.comment}) + } + + return f, nil +} + +func (b *structBuilder) appendDecls(n *structBuilderNode, db *declBuilder) (_err error) { + if n.value != nil { + if len(n.entries) > 0 { + // We've got a value associated with this node and also some entries inside it. + // We need to make a struct literal to hold the value and those entries + // because the value might be scalar and + // #x: string + // #x: #y: bool + // is not allowed. + // + // So make a new declBuilder instance with a fresh empty path + // to build the declarations to put inside a struct literal. + db0 := db + db = &declBuilder{} + defer func() { + if _err != nil { + return + } + db0.decls, _err = appendField(db0.decls, cue.MakePath(db0.path...), exprFromDecls(db.decls), n.comment) + }() + } + // Note: when the path is empty, we rely on the outer level + // to add any doc comment required. + db.decls, _err = appendField(db.decls, cue.MakePath(db.path...), n.value, n.comment) + if _err != nil { + return _err + } + } + // TODO slices.SortedFunc(maps.Keys(n.entries), cmpSelector) + for _, sel := range sortedKeys(n.entries, cmpSelector) { + entry := n.entries[sel] + db.pushPath(sel) + err := b.appendDecls(entry, db) + db.popPath() + if err != nil { + return err + } + } + return nil +} + +type declBuilder struct { + decls []ast.Decl + path []cue.Selector +} + +func (b *declBuilder) pushPath(sel cue.Selector) { + b.path = append(b.path, sel) +} + +func (b *declBuilder) popPath() { + b.path = b.path[:len(b.path)-1] +} + +func exprFromDecls(decls []ast.Decl) ast.Expr { + if len(decls) == 1 { + if decl, ok := decls[0].(*ast.EmbedDecl); ok { + // It's a single embedded expression which we can use directly. + return decl.Expr + } + } + return &ast.StructLit{ + Elts: decls, + } +} + +func appendDeclsExpr(decls []ast.Decl, expr ast.Expr) []ast.Decl { + switch expr := expr.(type) { + case *ast.StructLit: + decls = append(decls, expr.Elts...) + default: + elt := &ast.EmbedDecl{Expr: expr} + ast.SetRelPos(elt, token.NewSection) + decls = append(decls, elt) + } + return decls +} +func appendField(decls []ast.Decl, path cue.Path, v ast.Expr, comment *ast.CommentGroup) ([]ast.Decl, error) { + if len(path.Selectors()) == 0 { + return appendDeclsExpr(decls, v), nil + } + expr, err := exprAtPath(path, v) + if err != nil { + return nil, err + } + // exprAtPath will always return a struct literal with exactly + // one element when the path is non-empty. + structLit := expr.(*ast.StructLit) + elt := structLit.Elts[0] + if comment != nil { + ast.SetComments(elt, []*ast.CommentGroup{comment}) + } + ast.SetRelPos(elt, token.NewSection) + return append(decls, elt), nil +} + +func cmpSelector(s1, s2 cue.Selector) int { + if s1 == s2 { + // Avoid String allocation when we can. + return 0 + } + if c := cmp.Compare(s1.Type(), s2.Type()); c != 0 { + return c + } + return cmp.Compare(s1.String(), s2.String()) +} diff --git a/encoding/jsonschema/structbuilder_test.go b/encoding/jsonschema/structbuilder_test.go new file mode 100644 index 00000000000..1f89b0e33de --- /dev/null +++ b/encoding/jsonschema/structbuilder_test.go @@ -0,0 +1,110 @@ +package jsonschema + +import ( + "strings" + "testing" + + "github.com/go-quicktest/qt" + + "cuelang.org/go/cue" + "cuelang.org/go/cue/ast" + "cuelang.org/go/cue/ast/astutil" + "cuelang.org/go/cue/format" + "cuelang.org/go/cue/token" + "cuelang.org/go/internal" +) + +func TestStructBuilderShadowedRef(t *testing.T) { + var b structBuilder + ref, err := b.getRef(cue.ParsePath("#foo.bar.baz")) + qt.Assert(t, qt.IsNil(err)) + ok := b.put(cue.ParsePath("#foo.bar.baz"), ast.NewString("hello"), nil) + qt.Assert(t, qt.IsTrue(ok)) + ok = b.put(cue.ParsePath("#bar.#foo.xxx"), ref, nil) + qt.Assert(t, qt.IsTrue(ok)) + assertStructBuilderSyntax(t, &b, `#bar: #foo: xxx: #foo_1.bar.baz + +#foo_1=#foo: bar: baz: "hello" +`) +} + +func TestStructBuilderSelfRef(t *testing.T) { + var b structBuilder + ref, err := b.getRef(cue.Path{}) + qt.Assert(t, qt.IsNil(err)) + ok := b.put(cue.Path{}, ast.NewStruct(ast.NewIdent("next"), token.OPTION, ref), nil) + qt.Assert(t, qt.IsTrue(ok)) + assertStructBuilderSyntax(t, &b, ` +_schema +_schema: { + next?: _schema +} +`) +} + +func TestStructBuilderEntryInsideValue(t *testing.T) { + var b structBuilder + ok := b.put(cue.ParsePath("#foo"), ast.NewString("hello"), internal.NewComment(true, "foo comment")) + qt.Assert(t, qt.IsTrue(ok)) + ok = b.put(cue.ParsePath("#foo.#bar.#baz"), ast.NewString("goodbye"), internal.NewComment(true, "baz comment")) + qt.Assert(t, qt.IsTrue(ok)) + assertStructBuilderSyntax(t, &b, ` +// foo comment +#foo: { + "hello" + + // baz comment + #bar: #baz: "goodbye" +} +`) +} + +func TestStructBuilderNonIdentifierStringNode(t *testing.T) { + var b structBuilder + ref, err := b.getRef(cue.ParsePath(`#foo."a b".baz`)) + qt.Assert(t, qt.IsNil(err)) + ok := b.put(cue.ParsePath(`#foo."a b".baz`), ast.NewString("hello"), nil) + qt.Assert(t, qt.IsTrue(ok)) + ok = b.put(cue.ParsePath("#bar.#foo.xxx"), ref, nil) + qt.Assert(t, qt.IsTrue(ok)) + assertStructBuilderSyntax(t, &b, ` +#bar: #foo: xxx: #foo_1."a b".baz + +#foo_1=#foo: "a b": baz: "hello" +`) +} + +func TestStructBuilderNonIdentifierStringNodeAtRoot(t *testing.T) { + var b structBuilder + _, err := b.getRef(cue.ParsePath(`"a b".baz`)) + qt.Assert(t, qt.ErrorMatches(err, `initial element of path "\\"a b\\"\.baz" must be expressed as an identifier`)) +} + +func TestStructBuilderRedefinition(t *testing.T) { + var b structBuilder + ok := b.put(cue.ParsePath(`a.b.c`), ast.NewString("hello"), nil) + qt.Assert(t, qt.IsTrue(ok)) + ok = b.put(cue.ParsePath(`a.b.c`), ast.NewString("hello"), nil) + qt.Assert(t, qt.IsFalse(ok)) +} + +func TestStructBuilderNonPresentNodeOmittedFromSyntax(t *testing.T) { + var b structBuilder + _, err := b.getRef(cue.ParsePath(`b.c`)) + qt.Assert(t, qt.IsNil(err)) + _, err = b.getRef(cue.ParsePath(`a.c.d`)) + qt.Assert(t, qt.IsNil(err)) + ok := b.put(cue.ParsePath(`a.b`), ast.NewString("hello"), nil) + qt.Assert(t, qt.IsTrue(ok)) + assertStructBuilderSyntax(t, &b, `a: b: "hello"`) +} + +func assertStructBuilderSyntax(t *testing.T, b *structBuilder, want string) { + f, err := b.syntax() + qt.Assert(t, qt.IsNil(err)) + err = astutil.Sanitize(f) + qt.Assert(t, qt.IsNil(err)) + data, err := format.Node(f) + qt.Assert(t, qt.IsNil(err)) + qt.Assert(t, qt.Equals(strings.TrimSpace(string(data)), strings.TrimSpace(want))) +} diff --git a/encoding/jsonschema/util.go b/encoding/jsonschema/util.go new file mode 100644 index 00000000000..17c66eea7d8 --- /dev/null +++ b/encoding/jsonschema/util.go @@ -0,0 +1,171 @@ +// Copyright 2024 CUE Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package jsonschema + +import ( + "fmt" + "slices" + "strings" + + "cuelang.org/go/cue" + "cuelang.org/go/cue/ast" + "cuelang.org/go/cue/token" +) + +// TODO a bunch of stuff in this file is potentially suitable +// for more general use. Consider moving some of it +// to the cue package. + +func pathConcat(p1, p2 cue.Path) cue.Path { + sels1, sels2 := p1.Selectors(), p2.Selectors() + if len(sels1) == 0 { + return p2 + } + if len(sels2) == 0 { + return p1 + } + return cue.MakePath(append(slices.Clip(sels1), sels2...)...) +} + +func labelsToCUEPath(labels []ast.Label) (cue.Path, error) { + sels := make([]cue.Selector, len(labels)) + for i, label := range labels { + // Note: we can't use cue.Label because that doesn't + // allow hidden fields. + sels[i] = selectorForLabel(label) + } + path := cue.MakePath(sels...) + if err := path.Err(); err != nil { + return cue.Path{}, err + } + return path, nil +} + +// selectorForLabel is like [cue.Label] except that it allows +// hidden fields, which aren't allowed there because technically +// we can't work out what package to associate with the resulting +// selector. In our case we always imply the local package so +// we don't mind about that. +func selectorForLabel(label ast.Label) cue.Selector { + if label, _ := label.(*ast.Ident); label != nil && strings.HasPrefix(label.Name, "_") { + return cue.Hid(label.Name, "_") + } + return cue.Label(label) +} + +// pathRefSyntax returns the syntax for an expression which +// looks up the path inside the given root expression's value. +// It returns an error if the path contains any elements with +// type [cue.OptionalConstraint], [cue.RequiredConstraint], or [cue.PatternConstraint], +// none of which are expressible as a CUE index expression. +// +// TODO implement this properly and move to a method on [cue.Path]. +func pathRefSyntax(cuePath cue.Path, root ast.Expr) (ast.Expr, error) { + expr := root + for _, sel := range cuePath.Selectors() { + if sel.LabelType() == cue.IndexLabel { + expr = &ast.IndexExpr{ + X: expr, + Index: &ast.BasicLit{ + Kind: token.INT, + Value: sel.String(), + }, + } + } else { + lab, err := labelForSelector(sel) + if err != nil { + return nil, err + } + expr = &ast.SelectorExpr{ + X: expr, + Sel: lab, + } + } + } + return expr, nil +} + +// exprAtPath returns an expression that places the given +// expression at the given path. +// For example: +// +// declAtPath(cue.ParsePath("a.b.#c"), ast.NewIdent("foo")) +// +// would result in the declaration: +// +// a: b: #c: foo +// +// TODO this is potentially generally useful. It could +// be exposed as a method on [cue.Path], say +// `SyntaxForDefinition` or something. +func exprAtPath(path cue.Path, expr ast.Expr) (ast.Expr, error) { + sels := path.Selectors() + for i := len(sels) - 1; i >= 0; i-- { + sel := sels[i] + label, err := labelForSelector(sel) + if err != nil { + return nil, err + } + // A StructLit is inlined if both: + // - the Lbrace position is invalid + // - the Label position is valid. + rel := token.Blank + if i == 0 { + rel = token.Newline + } + ast.SetPos(label, token.NoPos.WithRel(rel)) + expr = &ast.StructLit{ + Elts: []ast.Decl{ + &ast.Field{ + Label: label, + Value: expr, + }, + }, + } + } + return expr, nil +} + +// TODO define this as a Label method on cue.Selector? +func labelForSelector(sel cue.Selector) (ast.Label, error) { + switch sel.LabelType() { + case cue.StringLabel, cue.DefinitionLabel, cue.HiddenLabel, cue.HiddenDefinitionLabel: + str := sel.String() + switch { + case strings.HasPrefix(str, `"`): + // It's quoted for a reason, so maintain the quotes. + return &ast.BasicLit{ + Kind: token.STRING, + Value: str, + }, nil + case ast.IsValidIdent(str): + return ast.NewIdent(str), nil + } + // Should never happen. + return nil, fmt.Errorf("cannot form expression for selector %q", sel) + default: + return nil, fmt.Errorf("cannot form label for selector %q with type %v", sel, sel.LabelType()) + } +} + +// TODO remove this when we can use [slices.SortedFunc] and [maps.Keys]. +func sortedKeys[K comparable, V any](m map[K]V, cmp func(K, K) int) []K { + ks := make([]K, 0, len(m)) + for k := range m { + ks = append(ks, k) + } + slices.SortFunc(ks, cmp) + return ks +}