Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add command to check ocis backup consistency #9238

Merged
merged 14 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changelog/unreleased/add-consistency-check-command.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Enhancement: Add command to check ocis backup consistency

Adds a command that checks the consistency of an ocis backup.

https://github.com/owncloud/ocis/pull/9238
10 changes: 10 additions & 0 deletions docs/ocis/backup.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,13 @@ BACKUP RECOMMENDED/OMITABLE. This folder contains custom web assets. Can be spec

When using an external idp/idm/nats or blobstore, its data needs to be backed up separately. Refer to your idp/idm/nats/blobstore documentation for backup details.

## Backup Consistency Command

Infinite Scale now allows checking an existing backup for consistency. Use the command:
```bash
ocis backup consistency -p "<path-to-base-folder>"
```

`path-to-base-folder` needs to be replaced with the path to the storage providers base path. Should be same as the `STORAGE_USERS_OCIS_ROOT`

Use the `-b s3ng` option when using an external (s3) blobstore. Note: When using this flag, the path to the blobstore must be configured via envvars or a yaml file to match the configuration of the original instance. Consistency checks for other blobstores than `ocis` and `s3ng` are not supported at the moment.
173 changes: 173 additions & 0 deletions ocis/pkg/backup/backup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// Package backup contains ocis backup functionality.
package backup

import (
"fmt"
"os"
"regexp"
)

// Inconsistency describes the type of inconsistency
type Inconsistency string

var (
// InconsistencyBlobMissing is an inconsistency where a blob is missing in the blobstore
InconsistencyBlobMissing Inconsistency = "blob missing"
// InconsistencyBlobOrphaned is an inconsistency where a blob in the blobstore has no reference
InconsistencyBlobOrphaned Inconsistency = "blob orphaned"
// InconsistencyNodeMissing is an inconsistency where a symlink points to a non-existing node
InconsistencyNodeMissing Inconsistency = "node missing"
// InconsistencyMetadataMissing is an inconsistency where a node is missing metadata
InconsistencyMetadataMissing Inconsistency = "metadata missing"
// InconsistencySymlinkMissing is an inconsistency where a node is missing a symlink
InconsistencySymlinkMissing Inconsistency = "symlink missing"
// InconsistencyFilesMissing is an inconsistency where a node is missing metadata files like .mpk or .mlock
InconsistencyFilesMissing Inconsistency = "files missing"
// InconsistencyMalformedFile is an inconsistency where a node has a malformed metadata file
InconsistencyMalformedFile Inconsistency = "malformed file"

// regex to determine if a node is trashed or versioned.
// 9113a718-8285-4b32-9042-f930f1a58ac2.REV.2024-05-22T07:32:53.89969726Z
_versionRegex = regexp.MustCompile(`\.REV\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+Z$`)
// 9113a718-8285-4b32-9042-f930f1a58ac2.T.2024-05-23T08:25:20.006571811Z <- this HAS a symlink
_trashRegex = regexp.MustCompile(`\.T\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+Z$`)
)

// Consistency holds the node and blob data of a storage provider
type Consistency struct {
// Storing the data like this might take a lot of memory
// we might need to optimize this if we run into memory issues
Nodes map[string][]Inconsistency
LinkedNodes map[string][]Inconsistency
BlobReferences map[string][]Inconsistency
Blobs map[string][]Inconsistency

nodeToLink map[string]string
blobToNode map[string]string
}

// NewConsistency creates a new Consistency object
func NewConsistency() *Consistency {
return &Consistency{
Nodes: make(map[string][]Inconsistency),
LinkedNodes: make(map[string][]Inconsistency),
BlobReferences: make(map[string][]Inconsistency),
Blobs: make(map[string][]Inconsistency),

nodeToLink: make(map[string]string),
blobToNode: make(map[string]string),
}
}

// CheckProviderConsistency checks the consistency of a space
func CheckProviderConsistency(storagepath string, lbs ListBlobstore) error {
fsys := os.DirFS(storagepath)

p := NewProvider(fsys, storagepath, lbs)
if err := p.ProduceData(); err != nil {
return err
}

c := NewConsistency()
c.GatherData(p.Events)

return c.PrintResults(storagepath)
}

// GatherData gathers and evaluates data produced by the DataProvider
func (c *Consistency) GatherData(events <-chan interface{}) {
for ev := range events {
switch d := ev.(type) {
case NodeData:
// does it have inconsistencies?
if len(d.Inconsistencies) != 0 {
c.Nodes[d.NodePath] = append(c.Nodes[d.NodePath], d.Inconsistencies...)
}
// is it linked?
if _, ok := c.LinkedNodes[d.NodePath]; ok {
deleteInconsistency(c.LinkedNodes, d.NodePath)
} else if d.RequiresSymlink && c.Nodes[d.NodePath] == nil {
c.Nodes[d.NodePath] = []Inconsistency{}
}
// does it have a blob?
if d.BlobPath != "" {
if _, ok := c.Blobs[d.BlobPath]; ok {
deleteInconsistency(c.Blobs, d.BlobPath)
} else {
c.BlobReferences[d.BlobPath] = []Inconsistency{}
c.blobToNode[d.BlobPath] = d.NodePath
}
}
case LinkData:
// does it have a node?
if _, ok := c.Nodes[d.NodePath]; ok {
deleteInconsistency(c.Nodes, d.NodePath)
} else {
c.LinkedNodes[d.NodePath] = []Inconsistency{}
c.nodeToLink[d.NodePath] = d.LinkPath
}
case BlobData:
// does it have a reference?
if _, ok := c.BlobReferences[d.BlobPath]; ok {
deleteInconsistency(c.BlobReferences, d.BlobPath)
} else {
c.Blobs[d.BlobPath] = []Inconsistency{}
}

}
}

for n := range c.Nodes {
if len(c.Nodes[n]) == 0 {
c.Nodes[n] = append(c.Nodes[n], InconsistencySymlinkMissing)
}
}
for l := range c.LinkedNodes {
c.LinkedNodes[l] = append(c.LinkedNodes[l], InconsistencyNodeMissing)
}
for b := range c.Blobs {
c.Blobs[b] = append(c.Blobs[b], InconsistencyBlobOrphaned)
}
for b := range c.BlobReferences {
c.BlobReferences[b] = append(c.BlobReferences[b], InconsistencyBlobMissing)
}
}

// PrintResults prints the results of the evaluation
func (c *Consistency) PrintResults(discpath string) error {
if len(c.Nodes) != 0 {
fmt.Println("\n🚨 Inconsistent Nodes:")
}
for n := range c.Nodes {
fmt.Printf("\t👉️ %v\tpath: %s\n", c.Nodes[n], n)
}
if len(c.LinkedNodes) != 0 {
fmt.Println("\n🚨 Inconsistent Links:")
}
for l := range c.LinkedNodes {
fmt.Printf("\t👉️ %v\tpath: %s\n\t\t\t\tmissing node:%s\n", c.LinkedNodes[l], c.nodeToLink[l], l)
}
if len(c.Blobs) != 0 {
fmt.Println("\n🚨 Inconsistent Blobs:")
}
for b := range c.Blobs {
fmt.Printf("\t👉️ %v\tblob: %s\n", c.Blobs[b], b)
}
if len(c.BlobReferences) != 0 {
fmt.Println("\n🚨 Inconsistent BlobReferences:")
}
for b := range c.BlobReferences {
fmt.Printf("\t👉️ %v\tblob: %s\n\t\t\t\treferencing node:%s\n", c.BlobReferences[b], b, c.blobToNode[b])
}
if len(c.Nodes) == 0 && len(c.LinkedNodes) == 0 && len(c.Blobs) == 0 && len(c.BlobReferences) == 0 {
fmt.Printf("💚 No inconsistency found. The backup in '%s' seems to be valid.\n", discpath)
}
return nil

}

func deleteInconsistency(incs map[string][]Inconsistency, path string) {
if len(incs[path]) == 0 {
delete(incs, path)
}
}
162 changes: 162 additions & 0 deletions ocis/pkg/backup/backup_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package backup_test

import (
"testing"

"github.com/owncloud/ocis/v2/ocis/pkg/backup"
"github.com/test-go/testify/require"
)

func TestGatherData(t *testing.T) {
testcases := []struct {
Name string
Events []interface{}
Expected *backup.Consistency
}{
{
Name: "no symlinks - no blobs",
Events: []interface{}{
nodeData("nodepath", "blobpath", true),
},
Expected: consistency(func(c *backup.Consistency) {
node(c, "nodepath", backup.InconsistencySymlinkMissing)
blobReference(c, "blobpath", backup.InconsistencyBlobMissing)
}),
},
{
Name: "symlink not required - no blobs",
Events: []interface{}{
nodeData("nodepath", "blobpath", false),
},
Expected: consistency(func(c *backup.Consistency) {
blobReference(c, "blobpath", backup.InconsistencyBlobMissing)
}),
},
{
Name: "no inconsistencies",
Events: []interface{}{
nodeData("nodepath", "blobpath", true),
linkData("linkpath", "nodepath"),
blobData("blobpath"),
},
Expected: consistency(func(c *backup.Consistency) {
}),
},
{
Name: "orphaned blob",
Events: []interface{}{
nodeData("nodepath", "blobpath", true),
linkData("linkpath", "nodepath"),
blobData("blobpath"),
blobData("anotherpath"),
},
Expected: consistency(func(c *backup.Consistency) {
blob(c, "anotherpath", backup.InconsistencyBlobOrphaned)
}),
},
{
Name: "missing node",
Events: []interface{}{
linkData("linkpath", "nodepath"),
blobData("blobpath"),
},
Expected: consistency(func(c *backup.Consistency) {
linkedNode(c, "nodepath", backup.InconsistencyNodeMissing)
blob(c, "blobpath", backup.InconsistencyBlobOrphaned)
}),
},
{
Name: "corrupt metadata",
Events: []interface{}{
nodeData("nodepath", "blobpath", true, backup.InconsistencyMetadataMissing),
linkData("linkpath", "nodepath"),
blobData("blobpath"),
},
Expected: consistency(func(c *backup.Consistency) {
node(c, "nodepath", backup.InconsistencyMetadataMissing)
}),
},
{
Name: "corrupt metadata, no blob",
Events: []interface{}{
nodeData("nodepath", "blobpath", true, backup.InconsistencyMetadataMissing),
linkData("linkpath", "nodepath"),
},
Expected: consistency(func(c *backup.Consistency) {
node(c, "nodepath", backup.InconsistencyMetadataMissing)
blobReference(c, "blobpath", backup.InconsistencyBlobMissing)
}),
},
}

for _, tc := range testcases {
events := make(chan interface{})

go func() {
for _, ev := range tc.Events {
switch e := ev.(type) {
case backup.NodeData:
events <- e
case backup.LinkData:
events <- e
case backup.BlobData:
events <- e
}
}
close(events)
}()

c := backup.NewConsistency()
c.GatherData(events)

require.Equal(t, tc.Expected.Nodes, c.Nodes)
require.Equal(t, tc.Expected.LinkedNodes, c.LinkedNodes)
require.Equal(t, tc.Expected.Blobs, c.Blobs)
require.Equal(t, tc.Expected.BlobReferences, c.BlobReferences)
}

}

func nodeData(nodePath, blobPath string, requiresSymlink bool, incs ...backup.Inconsistency) backup.NodeData {
return backup.NodeData{
NodePath: nodePath,
BlobPath: blobPath,
RequiresSymlink: requiresSymlink,
Inconsistencies: incs,
}
}

func linkData(linkPath, nodePath string) backup.LinkData {
return backup.LinkData{
LinkPath: linkPath,
NodePath: nodePath,
}
}

func blobData(blobPath string) backup.BlobData {
return backup.BlobData{
BlobPath: blobPath,
}
}

func consistency(f func(*backup.Consistency)) *backup.Consistency {
c := backup.NewConsistency()
f(c)
return c
}

func node(c *backup.Consistency, path string, inc ...backup.Inconsistency) {
c.Nodes[path] = inc
}

func linkedNode(c *backup.Consistency, path string, inc ...backup.Inconsistency) {
c.LinkedNodes[path] = inc
}

func blob(c *backup.Consistency, path string, inc ...backup.Inconsistency) {
c.Blobs[path] = inc
}

func blobReference(c *backup.Consistency, path string, inc ...backup.Inconsistency) {
c.BlobReferences[path] = inc
}
Loading