From b32957089cff550598158130fef084130ff2aeaf Mon Sep 17 00:00:00 2001 From: Martin Martinez Rivera Date: Thu, 30 Apr 2020 11:41:20 -0700 Subject: [PATCH] Add GraphQL admin endpoint to list backups. (#5307) An endpoint to read the backup manifests in the given location. Fixes DGRAPH-1229 --- ee/backup/run.go | 2 +- graphql/admin/admin.go | 5 + graphql/admin/endpoints_ee.go | 85 +++++++++++- graphql/admin/list_backups.go | 125 ++++++++++++++++++ systest/online-restore/online_restore_test.go | 34 +++++ worker/backup.go | 6 + worker/backup_common.go | 101 ++++++++++++++ worker/backup_ee.go | 17 +++ worker/backup_handler.go | 39 +----- worker/backup_processor.go | 43 ------ 10 files changed, 375 insertions(+), 82 deletions(-) create mode 100644 graphql/admin/list_backups.go create mode 100644 worker/backup_common.go diff --git a/ee/backup/run.go b/ee/backup/run.go index d2f3fbf7eee..72f8f333f22 100644 --- a/ee/backup/run.go +++ b/ee/backup/run.go @@ -231,7 +231,7 @@ func runRestoreCmd() error { func runLsbackupCmd() error { fmt.Println("Listing backups from:", opt.location) - manifests, err := worker.ListBackupManifests(opt.location) + manifests, err := worker.ListBackupManifests(opt.location, nil) if err != nil { return errors.Wrapf(err, "while listing manifests") } diff --git a/graphql/admin/admin.go b/graphql/admin/admin.go index b41c5874c77..189fee26e32 100644 --- a/graphql/admin/admin.go +++ b/graphql/admin/admin.go @@ -449,6 +449,11 @@ func newAdminResolverFactory() resolve.ResolverFactory { }(resolver) } + // Add admin query endpoints. + rf = rf.WithQueryResolver("listBackups", func(q schema.Query) resolve.QueryResolver { + return resolve.QueryResolverFunc(resolveListBackups) + }) + return rf.WithSchemaIntrospection() } diff --git a/graphql/admin/endpoints_ee.go b/graphql/admin/endpoints_ee.go index 1feb9e07b8f..817c98356c2 100644 --- a/graphql/admin/endpoints_ee.go +++ b/graphql/admin/endpoints_ee.go @@ -100,6 +100,84 @@ const adminTypes = ` response: Response } + input ListBackupsInput { + """ + Destination for the backup: e.g. Minio or S3 bucket. + """ + location: String! + + """ + Access key credential for the destination. + """ + accessKey: String + + """ + Secret key credential for the destination. + """ + secretKey: String + + """ + AWS session token, if required. + """ + sessionToken: String + + """ + Whether the destination doesn't require credentials (e.g. S3 public bucket). + """ + anonymous: Boolean + + } + + type BackupGroup { + """ + The ID of the cluster group. + """ + groupId: Int + + """ + List of predicates assigned to the group. + """ + predicates: [String] + } + + type Manifest { + """ + Unique ID for the backup series. + """ + backupId: String + + """ + Number of this backup within the backup series. The full backup always has a value of one. + """ + backupNum: Int + + """ + Whether this backup was encrypted. + """ + encrypted: Boolean + + """ + List of groups and the predicates they store in this backup. + """ + groups: [BackupGroup] + + """ + Path to the manifest file. + """ + path: String + + """ + The timestamp at which this backup was taken. The next incremental backup will + start from this timestamp. + """ + since: Int + + """ + The type of backup, either full or incremental. + """ + type: String + } + type LoginResponse { """ @@ -348,4 +426,9 @@ const adminQueries = ` getCurrentUser: User queryUser(filter: UserFilter, order: UserOrder, first: Int, offset: Int): [User] - queryGroup(filter: GroupFilter, order: GroupOrder, first: Int, offset: Int): [Group]` + queryGroup(filter: GroupFilter, order: GroupOrder, first: Int, offset: Int): [Group] + + """ + Get the information about the backups at a given location. + """ + listBackups(input: ListBackupsInput!) : [Manifest]` diff --git a/graphql/admin/list_backups.go b/graphql/admin/list_backups.go new file mode 100644 index 00000000000..2a81ba03e1f --- /dev/null +++ b/graphql/admin/list_backups.go @@ -0,0 +1,125 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package admin + +import ( + "context" + "encoding/json" + + "github.com/dgraph-io/dgraph/graphql/resolve" + "github.com/dgraph-io/dgraph/graphql/schema" + "github.com/dgraph-io/dgraph/worker" + "github.com/dgraph-io/dgraph/x" + "github.com/pkg/errors" +) + +type lsBackupInput struct { + Location string + AccessKey string + SecretKey string + SessionToken string + Anonymous bool + ForceFull bool +} + +type group struct { + GroupId uint32 `json:"groupId,omitempty"` + Predicates []string `json:"predicates,omitempty"` +} + +type manifest struct { + Type string `json:"type,omitempty"` + Since uint64 `json:"since,omitempty"` + Groups []*group `json:"groups,omitempty"` + BackupId string `json:"backupId,omitempty"` + BackupNum uint64 `json:"backupNum,omitempty"` + Path string `json:"path,omitempty"` + Encrypted bool `json:"encrypted,omitempty"` +} + +func resolveListBackups(ctx context.Context, q schema.Query) *resolve.Resolved { + input, err := getLsBackupInput(q) + if err != nil { + return emptyResult(q, err) + } + + creds := &worker.Credentials{ + AccessKey: input.AccessKey, + SecretKey: input.SecretKey, + SessionToken: input.SessionToken, + Anonymous: input.Anonymous, + } + manifests, err := worker.ProcessListBackups(ctx, input.Location, creds) + if err != nil { + return emptyResult(q, errors.Errorf("%s: %s", x.Error, err.Error())) + } + convertedManifests := convertManifests(manifests) + + results := make([]map[string]interface{}, 0) + for _, m := range convertedManifests { + b, err := json.Marshal(m) + if err != nil { + return emptyResult(q, err) + } + var result map[string]interface{} + err = json.Unmarshal(b, &result) + if err != nil { + return emptyResult(q, err) + } + results = append(results, result) + } + + return &resolve.Resolved{ + Data: map[string]interface{}{q.Name(): results}, + Field: q, + } +} + +func getLsBackupInput(q schema.Query) (*lsBackupInput, error) { + inputArg := q.ArgValue(schema.InputArgName) + inputByts, err := json.Marshal(inputArg) + if err != nil { + return nil, schema.GQLWrapf(err, "couldn't get input argument") + } + + var input lsBackupInput + err = json.Unmarshal(inputByts, &input) + return &input, schema.GQLWrapf(err, "couldn't get input argument") +} + +func convertManifests(manifests []*worker.Manifest) []*manifest { + res := make([]*manifest, len(manifests)) + for i, m := range manifests { + res[i] = &manifest{ + Type: m.Type, + Since: m.Since, + BackupId: m.BackupId, + BackupNum: m.BackupNum, + Path: m.Path, + Encrypted: m.Encrypted, + } + + res[i].Groups = make([]*group, 0) + for gid, preds := range m.Groups { + res[i].Groups = append(res[i].Groups, &group{ + GroupId: gid, + Predicates: preds, + }) + } + } + return res +} diff --git a/systest/online-restore/online_restore_test.go b/systest/online-restore/online_restore_test.go index 2166507a134..18e9e04ebb7 100644 --- a/systest/online-restore/online_restore_test.go +++ b/systest/online-restore/online_restore_test.go @@ -161,3 +161,37 @@ func TestInvalidBackupId(t *testing.T) { require.NoError(t, err) require.Contains(t, string(buf), "failed to verify backup") } + +func TestListBackups(t *testing.T) { + query := `query backup() { + listBackups(input: {location: "/data/backup"}) { + backupId + backupNum + encrypted + groups { + groupId + predicates + } + path + since + type + } + }` + + adminUrl := "http://localhost:8180/admin" + params := testutil.GraphQLParams{ + Query: query, + } + b, err := json.Marshal(params) + require.NoError(t, err) + + resp, err := http.Post(adminUrl, "application/json", bytes.NewBuffer(b)) + require.NoError(t, err) + buf, err := ioutil.ReadAll(resp.Body) + require.NoError(t, err) + sbuf := string(buf) + require.Contains(t, sbuf, `"backupId":"heuristic_sammet9"`) + require.Contains(t, sbuf, `"backupNum":1`) + require.Contains(t, sbuf, `"backupNum":2`) + require.Contains(t, sbuf, "initial_release_date") +} diff --git a/worker/backup.go b/worker/backup.go index d6ec5d0ab31..fa731882b53 100644 --- a/worker/backup.go +++ b/worker/backup.go @@ -36,3 +36,9 @@ func ProcessBackupRequest(ctx context.Context, req *pb.BackupRequest, forceFull glog.Warningf("Backup failed: %v", x.ErrNotSupported) return x.ErrNotSupported } + +func ProcessListBackups(ctx context.Context, location string, creds *Credentials) ( + []*Manifest, error) { + + return nil, x.ErrNotSupported +} diff --git a/worker/backup_common.go b/worker/backup_common.go new file mode 100644 index 00000000000..e91a3c8d417 --- /dev/null +++ b/worker/backup_common.go @@ -0,0 +1,101 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package worker + +import ( + "sync" + + "github.com/dgraph-io/dgraph/protos/pb" +) + +// predicateSet is a map whose keys are predicates. It is meant to be used as a set. +type predicateSet map[string]struct{} + +// Manifest records backup details, these are values used during restore. +// Since is the timestamp from which the next incremental backup should start (it's set +// to the readTs of the current backup). +// Groups are the IDs of the groups involved. +type Manifest struct { + sync.Mutex + //Type is the type of backup, either full or incremental. + Type string `json:"type"` + // Since is the timestamp at which this backup was taken. It's called Since + // because it will become the timestamp from which to backup in the next + // incremental backup. + Since uint64 `json:"since"` + // Groups is the map of valid groups to predicates at the time the backup was created. + Groups map[uint32][]string `json:"groups"` + // BackupId is a unique ID assigned to all the backups in the same series + // (from the first full backup to the last incremental backup). + BackupId string `json:"backup_id"` + // BackupNum is a monotonically increasing number assigned to each backup in + // a series. The full backup as BackupNum equal to one and each incremental + // backup gets assigned the next available number. Used to verify the integrity + // of the data during a restore. + BackupNum uint64 `json:"backup_num"` + // Path is the path to the manifest file. This field is only used during + // processing and is not written to disk. + Path string `json:"-"` + // Encrypted indicates whether this backup was encrypted or not. + Encrypted bool `json:"encrypted"` +} + +func (m *Manifest) getPredsInGroup(gid uint32) predicateSet { + preds, ok := m.Groups[gid] + if !ok { + return nil + } + + predSet := make(predicateSet) + for _, pred := range preds { + predSet[pred] = struct{}{} + } + return predSet +} + +// Credentials holds the credentials needed to perform a backup operation. +// If these credentials are missing the default credentials will be used. +type Credentials struct { + AccessKey string + SecretKey string + SessionToken string + Anonymous bool +} + +func (creds *Credentials) hasCredentials() bool { + if creds == nil { + return false + } + return creds.AccessKey != "" || creds.SecretKey != "" || creds.SessionToken != "" +} + +func (creds *Credentials) isAnonymous() bool { + if creds == nil { + return false + } + return creds.Anonymous +} + +// GetCredentialsFromRequest extracts the credentials from a backup request. +func GetCredentialsFromRequest(req *pb.BackupRequest) *Credentials { + return &Credentials{ + AccessKey: req.GetAccessKey(), + SecretKey: req.GetSecretKey(), + SessionToken: req.GetSessionToken(), + Anonymous: req.GetAnonymous(), + } +} diff --git a/worker/backup_ee.go b/worker/backup_ee.go index b8791cc0b55..accc006ed9e 100644 --- a/worker/backup_ee.go +++ b/worker/backup_ee.go @@ -15,6 +15,7 @@ package worker import ( "context" "net/url" + "sort" "time" "github.com/dgraph-io/dgraph/posting" @@ -189,3 +190,19 @@ func ProcessBackupRequest(ctx context.Context, req *pb.BackupRequest, forceFull bp := &BackupProcessor{Request: req} return bp.CompleteBackup(ctx, &m) } + +func ProcessListBackups(ctx context.Context, location string, creds *Credentials) ( + []*Manifest, error) { + + manifests, err := ListBackupManifests(location, creds) + if err != nil { + return nil, errors.Wrapf(err, "cannot read manfiests at location %s", location) + } + + res := make([]*Manifest, 0) + for _, m := range manifests { + res = append(res, m) + } + sort.Slice(res, func(i, j int) bool { return res[i].Path < res[j].Path }) + return res, nil +} diff --git a/worker/backup_handler.go b/worker/backup_handler.go index 62b603ac2e0..cb915dcef3c 100644 --- a/worker/backup_handler.go +++ b/worker/backup_handler.go @@ -94,39 +94,6 @@ type UriHandler interface { ReadManifest(string, *Manifest) error } -// Credentials holds the credentials needed to perform a backup operation. -// If these credentials are missing the default credentials will be used. -type Credentials struct { - AccessKey string - SecretKey string - SessionToken string - Anonymous bool -} - -func (creds *Credentials) hasCredentials() bool { - if creds == nil { - return false - } - return creds.AccessKey != "" || creds.SecretKey != "" || creds.SessionToken != "" -} - -func (creds *Credentials) isAnonymous() bool { - if creds == nil { - return false - } - return creds.Anonymous -} - -// GetCredentialsFromRequest extracts the credentials from a backup request. -func GetCredentialsFromRequest(req *pb.BackupRequest) *Credentials { - return &Credentials{ - AccessKey: req.GetAccessKey(), - SecretKey: req.GetSecretKey(), - SessionToken: req.GetSessionToken(), - Anonymous: req.GetAnonymous(), - } -} - // getHandler returns a UriHandler for the URI scheme. func getHandler(scheme string, creds *Credentials) UriHandler { switch scheme { @@ -174,9 +141,6 @@ func NewUriHandler(uri *url.URL, creds *Credentials) (UriHandler, error) { return h, nil } -// predicateSet is a map whose keys are predicates. It is meant to be used as a set. -type predicateSet map[string]struct{} - // loadFn is a function that will receive the current file being read. // A reader, the backup groupId, and a map whose keys are the predicates to restore // are passed as arguments. @@ -216,7 +180,7 @@ func VerifyBackup(location, backupId string, creds *Credentials, currentGroups [ } // ListBackupManifests scans location l for backup files and returns the list of manifests. -func ListBackupManifests(l string) (map[string]*Manifest, error) { +func ListBackupManifests(l string, creds *Credentials) (map[string]*Manifest, error) { uri, err := url.Parse(l) if err != nil { return nil, err @@ -239,6 +203,7 @@ func ListBackupManifests(l string) (map[string]*Manifest, error) { if err := h.ReadManifest(path, &m); err != nil { return nil, errors.Wrapf(err, "While reading %q", path) } + m.Path = path listedManifests[path] = &m } diff --git a/worker/backup_processor.go b/worker/backup_processor.go index 74bc842ea21..0f5a3145199 100644 --- a/worker/backup_processor.go +++ b/worker/backup_processor.go @@ -21,7 +21,6 @@ import ( "fmt" "io" "net/url" - "sync" "github.com/dgraph-io/badger/v2" bpb "github.com/dgraph-io/badger/v2/pb" @@ -53,48 +52,6 @@ type LoadResult struct { Err error } -// Manifest records backup details, these are values used during restore. -// Since is the timestamp from which the next incremental backup should start (it's set -// to the readTs of the current backup). -// Groups are the IDs of the groups involved. -type Manifest struct { - sync.Mutex - //Type is the type of backup, either full or incremental. - Type string `json:"type"` - // Since is the timestamp at which this backup was taken. It's called Since - // because it will become the timestamp from which to backup in the next - // incremental backup. - Since uint64 `json:"since"` - // Groups is the map of valid groups to predicates at the time the backup was created. - Groups map[uint32][]string `json:"groups"` - // BackupId is a unique ID assigned to all the backups in the same series - // (from the first full backup to the last incremental backup). - BackupId string `json:"backup_id"` - // BackupNum is a monotonically increasing number assigned to each backup in - // a series. The full backup as BackupNum equal to one and each incremental - // backup gets assigned the next available number. Used to verify the integrity - // of the data during a restore. - BackupNum uint64 `json:"backup_num"` - // Path is the path to the manifest file. This field is only used during - // processing and is not written to disk. - Path string `json:"-"` - // Encrypted indicates whether this backup was encrypted or not. - Encrypted bool `json:"encrypted"` -} - -func (m *Manifest) getPredsInGroup(gid uint32) predicateSet { - preds, ok := m.Groups[gid] - if !ok { - return nil - } - - predSet := make(predicateSet) - for _, pred := range preds { - predSet[pred] = struct{}{} - } - return predSet -} - // WriteBackup uses the request values to create a stream writer then hand off the data // retrieval to stream.Orchestrate. The writer will create all the fd's needed to // collect the data and later move to the target.