Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bundle: Add info about the correct rego version to parse modules on the store #7278

Merged
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 25 additions & 11 deletions v1/bundle/bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ type Bundle struct {

// Raw contains raw bytes representing the bundle's content
type Raw struct {
Path string
Value []byte
Path string
Value []byte
module *ModuleFile
}

// Patch contains an array of objects wherein each object represents the patch operation to be
Expand Down Expand Up @@ -633,15 +634,6 @@ func (r *Reader) Read() (Bundle, error) {
fullPath := r.fullPath(path)
bs := buf.Bytes()

if r.lazyLoadingMode {
p := fullPath
if r.name != "" {
p = modulePathWithPrefix(r.name, fullPath)
}

raw = append(raw, Raw{Path: p, Value: bs})
}

// Modules are parsed after we've had a chance to read the manifest
mf := ModuleFile{
URL: f.URL(),
Expand All @@ -650,6 +642,15 @@ func (r *Reader) Read() (Bundle, error) {
Raw: bs,
}
modules = append(modules, mf)

if r.lazyLoadingMode {
p := fullPath
if r.name != "" {
p = modulePathWithPrefix(r.name, fullPath)
}

raw = append(raw, Raw{Path: p, Value: bs, module: &mf})
}
} else if filepath.Base(path) == WasmFile {
bundle.WasmModules = append(bundle.WasmModules, WasmModuleFile{
URL: f.URL(),
Expand Down Expand Up @@ -1220,6 +1221,19 @@ func (b *Bundle) RegoVersionForFile(path string, def ast.RegoVersion) (ast.RegoV
return def, fmt.Errorf("unknown bundle rego-version %d for file '%s'", *version, path)
}

func (m *Manifest) RegoVersionForFile(path string) (ast.RegoVersion, error) {
v, err := m.numericRegoVersionForFile(path)
if err != nil {
return ast.RegoUndefined, err
}

if v == nil {
return ast.RegoUndefined, nil
}

return ast.RegoVersionFromInt(*v), nil
}

func (m *Manifest) numericRegoVersionForFile(path string) (*int, error) {
var version *int

Expand Down
23 changes: 16 additions & 7 deletions v1/bundle/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,15 +438,11 @@ func (it *iterator) Next() (*storage.Update, error) {
for _, item := range it.raw {
f := file{name: item.Path}

fpath := strings.TrimLeft(normalizePath(filepath.Dir(f.name)), "/.")
if strings.HasSuffix(f.name, RegoExt) {
fpath = strings.Trim(normalizePath(f.name), "/")
p, err := getFileStoragePath(f.name)
if err != nil {
return nil, err
}

p, ok := storage.ParsePathEscaped("/" + fpath)
if !ok {
return nil, fmt.Errorf("storage path invalid: %v", f.name)
}
f.path = p

f.raw = item.Value
Expand Down Expand Up @@ -506,3 +502,16 @@ func getdepth(path string, isDir bool) int {
basePath := strings.Trim(filepath.Dir(filepath.ToSlash(path)), "/")
return len(strings.Split(basePath, "/"))
}

func getFileStoragePath(path string) (storage.Path, error) {
fpath := strings.TrimLeft(normalizePath(filepath.Dir(path)), "/.")
if strings.HasSuffix(path, RegoExt) {
fpath = strings.Trim(normalizePath(path), "/")
}

p, ok := storage.ParsePathEscaped("/" + fpath)
if !ok {
return nil, fmt.Errorf("storage path invalid: %v", path)
}
return p, nil
}
140 changes: 125 additions & 15 deletions v1/bundle/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
// BundlesBasePath is the storage path used for storing bundle metadata
var BundlesBasePath = storage.MustParsePath("/system/bundles")

var ModulesInfoBasePath = storage.MustParsePath("/system/modules")

// Note: As needed these helpers could be memoized.

// ManifestStoragePath is the storage path used for the given named bundle manifest.
Expand Down Expand Up @@ -59,6 +61,14 @@ func metadataPath(name string) storage.Path {
return append(BundlesBasePath, name, "manifest", "metadata")
}

func moduleRegoVersionPath(id string) storage.Path {
return append(ModulesInfoBasePath, strings.Trim(id, "/"), "rego_version")
}

func moduleInfoPath(id string) storage.Path {
return append(ModulesInfoBasePath, strings.Trim(id, "/"))
}

func read(ctx context.Context, store storage.Store, txn storage.Transaction, path storage.Path) (interface{}, error) {
value, err := store.Read(ctx, txn, path)
if err != nil {
Expand Down Expand Up @@ -166,6 +176,16 @@ func eraseWasmModulesFromStore(ctx context.Context, store storage.Store, txn sto
return suppressNotFound(err)
}

func eraseModuleRegoVersionsFromStore(ctx context.Context, store storage.Store, txn storage.Transaction, modules []string) error {
for _, module := range modules {
err := store.Write(ctx, txn, storage.RemoveOp, moduleInfoPath(module), nil)
if err := suppressNotFound(err); err != nil {
return err
}
}
return nil
}

// ReadWasmMetadataFromStore will read Wasm module resolver metadata from the store.
func ReadWasmMetadataFromStore(ctx context.Context, store storage.Store, txn storage.Transaction, name string) ([]WasmResolver, error) {
path := wasmEntrypointsPath(name)
Expand Down Expand Up @@ -470,7 +490,7 @@ func activateBundles(opts *ActivateOpts) error {
remainingAndExtra[name] = mod
}

err = compileModules(opts.Compiler, opts.Metrics, snapshotBundles, remainingAndExtra, opts.legacy, opts.AuthorizationDecisionRef)
_, err = compileModules(opts.Compiler, opts.Metrics, snapshotBundles, remainingAndExtra, opts.legacy, opts.AuthorizationDecisionRef)
if err != nil {
return err
}
Expand Down Expand Up @@ -626,7 +646,7 @@ func eraseBundles(ctx context.Context, store storage.Store, txn storage.Transact
return nil, err
}

remaining, err := erasePolicies(ctx, store, txn, parserOpts, roots)
remaining, removed, err := erasePolicies(ctx, store, txn, parserOpts, roots)
if err != nil {
return nil, err
}
Expand All @@ -649,6 +669,11 @@ func eraseBundles(ctx context.Context, store storage.Store, txn storage.Transact
}
}

err = eraseModuleRegoVersionsFromStore(ctx, store, txn, removed)
if err != nil {
return nil, err
}

return remaining, nil
}

Expand All @@ -668,44 +693,94 @@ func eraseData(ctx context.Context, store storage.Store, txn storage.Transaction
return nil
}

func erasePolicies(ctx context.Context, store storage.Store, txn storage.Transaction, parserOpts ast.ParserOptions, roots map[string]struct{}) (map[string]*ast.Module, error) {
type moduleInfo struct {
RegoVersion ast.RegoVersion `json:"rego_version"`
}

func readModuleInfoFromStore(ctx context.Context, store storage.Store, txn storage.Transaction) (map[string]moduleInfo, error) {
versions := map[string]moduleInfo{}

value, err := read(ctx, store, txn, ModulesInfoBasePath)
if suppressNotFound(err) != nil {
return nil, err
}

if value != nil {
bs, err := json.Marshal(value)
if err != nil {
return nil, fmt.Errorf("corrupt rego version")
}

err = util.UnmarshalJSON(bs, &versions)
if err != nil {
return nil, fmt.Errorf("corrupt rego version")
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to do this roundtripping, exactly? I'd suspect it's a little more annoying, but not harder, to do this via

if m, ok := value.(map[string]any); ok {
  versions := make(map[string]moduleInfo, len(m))
  
  for k, v := range m {
    if m0, ok := v.(map[string]any); ok {
      if ver, ok := v["rego_version"]; ok {
        if vs, ok := ver.(int) { // float64?      
          versions[k] = moduleInfo{RegoVersion: fromInt(vs)}
        }
      }
    }
  }
  return versions, nil
}

return nil, fmt.Errorf("corrupt rego version")

}

return versions, nil
}

func erasePolicies(ctx context.Context, store storage.Store, txn storage.Transaction, parserOpts ast.ParserOptions, roots map[string]struct{}) (map[string]*ast.Module, []string, error) {

ids, err := store.ListPolicies(ctx, txn)
if err != nil {
return nil, err
return nil, nil, err
}

modulesInfo, err := readModuleInfoFromStore(ctx, store, txn)
if err != nil {
return nil, nil, fmt.Errorf("failed to read module info from store: %w", err)
}

getRegoVersion := func(modId string) (ast.RegoVersion, bool) {
info, ok := modulesInfo[modId]
if !ok {
return ast.RegoUndefined, false
}
return info.RegoVersion, true
}

remaining := map[string]*ast.Module{}
var removed []string

for _, id := range ids {
bs, err := store.GetPolicy(ctx, txn, id)
if err != nil {
return nil, err
return nil, nil, err
}

parserOptsCpy := parserOpts
if regoVersion, ok := getRegoVersion(id); ok {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're doing a store read on every policy id. Is there a performance hit here especially for large bundles? Can we read the entire data once and then use that in the loop? I don't think there would be much performance overhead. Maybe I'm missing something here and we need to structure the data this way. Multiple bundles, maybe?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This got shifted around a bit when making the tests green, but there should be no issue in moving it back to reading the full thing 👍 .

parserOptsCpy.RegoVersion = regoVersion
}
module, err := ast.ParseModuleWithOpts(id, string(bs), parserOpts)

module, err := ast.ParseModuleWithOpts(id, string(bs), parserOptsCpy)
if err != nil {
return nil, err
return nil, nil, err
}
path, err := module.Package.Path.Ptr()
if err != nil {
return nil, err
return nil, nil, err
}
deleted := false
for root := range roots {
if RootPathsContain([]string{root}, path) {
if err := store.DeletePolicy(ctx, txn, id); err != nil {
return nil, err
return nil, nil, err
}
deleted = true
break
}
}
if !deleted {

if deleted {
removed = append(removed, id)
} else {
remaining[id] = module
}
}

return remaining, nil
return remaining, removed, nil
}

func writeManifestToStore(opts *ActivateOpts, name string, manifest Manifest) error {
Expand Down Expand Up @@ -758,6 +833,12 @@ func writeDataAndModules(ctx context.Context, store storage.Store, txn storage.T
if err := store.UpsertPolicy(ctx, txn, path, mf.Raw); err != nil {
return err
}

if regoVersion, err := b.RegoVersionForFile(mf.Path, ast.RegoUndefined); err == nil && regoVersion != ast.RegoUndefined {
if err := write(ctx, store, txn, moduleRegoVersionPath(path), regoVersion); err != nil {
return fmt.Errorf("failed to write rego version for '%s' in bundle '%s': %w", mf.Path, name, err)
}
}
}
} else {
params.BasePaths = *b.Manifest.Roots
Expand All @@ -766,6 +847,25 @@ func writeDataAndModules(ctx context.Context, store storage.Store, txn storage.T
if err != nil {
return fmt.Errorf("store truncate failed for bundle '%s': %v", name, err)
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I understand why we're doing the store write here? Couldn't we just build a mapping in compileModules? It would be helpful to understand the purpose of the code below.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately not. The bundle-store/compiler doesn't necessarily agree with the bundle on module naming; if the bundle reader was initialized with a bundle name, it'll get prefixed to the module name during truncation, but if the reader wasn't, the prefix isn't included. This means that, for raw bundles, if the bundle reader wasn't initialized with a bundle name (outside of the store's control), then the modules are compiled with the bundle name as name prefix, but when truncated into the store, they won't have that prefix.
There are a couple of tests that implicitly cause this state (that failed before); I'll add an explicit test.

for _, f := range b.Raw {
if strings.HasSuffix(f.Path, RegoExt) {
p, err := getFileStoragePath(f.Path)
if err != nil {
return fmt.Errorf("failed get storage path for module '%s' in bundle '%s': %w", f.Path, name, err)
}

if m := f.module; m != nil {
// 'f.module.Path' contains the module's path as it relates to the bundle root, and can be used for looking up the rego-version.
// 'f.Path' can differ, based on how the bundle reader was initialized.
if regoVersion, err := b.RegoVersionForFile(f.module.Path, ast.RegoUndefined); err == nil && regoVersion != ast.RegoUndefined {
if err := write(ctx, store, txn, moduleRegoVersionPath(p.String()), regoVersion); err != nil {
return fmt.Errorf("failed to write rego version for '%s' in bundle '%s': %w", f.Path, name, err)
}
}
}
}
}
}
}

Expand All @@ -792,7 +892,7 @@ func writeData(ctx context.Context, store storage.Store, txn storage.Transaction
return nil
}

func compileModules(compiler *ast.Compiler, m metrics.Metrics, bundles map[string]*Bundle, extraModules map[string]*ast.Module, legacy bool, authorizationDecisionRef ast.Ref) error {
func compileModules(compiler *ast.Compiler, m metrics.Metrics, bundles map[string]*Bundle, extraModules map[string]*ast.Module, legacy bool, authorizationDecisionRef ast.Ref) (map[string]ast.RegoVersion, error) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're not using the returned value in the call site, let's not return it.


m.Timer(metrics.RegoModuleCompile).Start()
defer m.Timer(metrics.RegoModuleCompile).Stop()
Expand All @@ -809,28 +909,38 @@ func compileModules(compiler *ast.Compiler, m metrics.Metrics, bundles map[strin
modules[name] = module
}

moduleIDToRegoVersion := map[string]ast.RegoVersion{}

// include all the new bundle modules
for bundleName, b := range bundles {
if legacy {
// FIXME: Do we need to account for legacy mode in moduleIdToRegoVersion?
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can avoid this as this has been deprecated for a while now.

for _, mf := range b.Modules {
modules[mf.Path] = mf.Parsed
}
} else {
for name, module := range b.ParsedModules(bundleName) {
modules[name] = module

p, err := getFileStoragePath(name)
if err != nil {
return nil, err
}

moduleIDToRegoVersion[strings.TrimLeft(p.String(), "/")] = module.RegoVersion()
}
}
}

if compiler.Compile(modules); compiler.Failed() {
return compiler.Errors
return nil, compiler.Errors
}

if authorizationDecisionRef.Equal(ast.EmptyRef()) {
return nil
return moduleIDToRegoVersion, nil
}

return iCompiler.VerifyAuthorizationPolicySchema(compiler, authorizationDecisionRef)
return moduleIDToRegoVersion, iCompiler.VerifyAuthorizationPolicySchema(compiler, authorizationDecisionRef)
}

func writeModules(ctx context.Context, store storage.Store, txn storage.Transaction, compiler *ast.Compiler, m metrics.Metrics, bundles map[string]*Bundle, extraModules map[string]*ast.Module, legacy bool) error {
Expand Down
Loading
Loading