Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datastore copy util #469

Merged
merged 6 commits into from
Dec 9, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion db/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func createBenchDB(b *testing.B, opts ...NewOption) (*DB, func()) {
common.WithNetDebug(true),
)
checkBenchErr(b, err)
store, err := util.NewBadgerDatastore(dir, false)
store, err := util.NewBadgerDatastore(dir, "eventstore", false)
checkBenchErr(b, err)
d, err := NewDB(context.Background(), store, n, thread.NewIDV1(thread.Raw, 32), opts...)
checkBenchErr(b, err)
Expand Down
10 changes: 5 additions & 5 deletions db/db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func TestE2EWithThreads(t *testing.T) {
checkErr(t, err)
defer n1.Close()

store, err := util.NewBadgerDatastore(tmpDir1, false)
store, err := util.NewBadgerDatastore(tmpDir1, "eventstore", false)
checkErr(t, err)
defer store.Close()

Expand Down Expand Up @@ -92,7 +92,7 @@ func TestE2EWithThreads(t *testing.T) {
Schema: util.SchemaFromInstance(&dummy{}, false),
}

store2, err := util.NewBadgerDatastore(tmpDir2, false)
store2, err := util.NewBadgerDatastore(tmpDir2, "eventstore", false)
checkErr(t, err)
defer store2.Close()

Expand Down Expand Up @@ -139,7 +139,7 @@ func TestMissingCollection(t *testing.T) {
checkErr(t, err)
defer n.Close()

store, err := util.NewBadgerDatastore(tmpDir, false)
store, err := util.NewBadgerDatastore(tmpDir, "eventstore", false)
checkErr(t, err)
defer store.Close()

Expand Down Expand Up @@ -175,7 +175,7 @@ func TestWithNewName(t *testing.T) {
)
checkErr(t, err)

store, err := util.NewBadgerDatastore(tmpDir, false)
store, err := util.NewBadgerDatastore(tmpDir, "eventstore", false)
checkErr(t, err)
defer store.Close()

Expand Down Expand Up @@ -223,7 +223,7 @@ func TestWithNewEventCodec(t *testing.T) {
)
checkErr(t, err)

store, err := util.NewBadgerDatastore(tmpDir, false)
store, err := util.NewBadgerDatastore(tmpDir, "eventstore", false)
checkErr(t, err)
defer store.Close()

Expand Down
3 changes: 1 addition & 2 deletions db/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@ import (
"fmt"
"sort"

dse "github.com/textileio/go-datastore-extensions"

"github.com/alecthomas/jsonschema"
ds "github.com/ipfs/go-datastore"
"github.com/ipfs/go-datastore/query"
dse "github.com/textileio/go-datastore-extensions"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
Expand Down
4 changes: 2 additions & 2 deletions db/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func TestManager_GetDB(t *testing.T) {
common.WithNetDebug(true),
)
checkErr(t, err)
store, err := util.NewBadgerDatastore(dir, false)
store, err := util.NewBadgerDatastore(dir, "eventstore", false)
checkErr(t, err)
man, err := NewManager(store, n, WithNewDebug(true))
checkErr(t, err)
Expand Down Expand Up @@ -256,7 +256,7 @@ func createTestManager(t *testing.T) (*Manager, func()) {
common.WithNetDebug(true),
)
checkErr(t, err)
store, err := util.NewBadgerDatastore(dir, false)
store, err := util.NewBadgerDatastore(dir, "eventstore", false)
checkErr(t, err)
m, err := NewManager(store, n, WithNewDebug(true))
checkErr(t, err)
Expand Down
2 changes: 1 addition & 1 deletion db/testutils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func createTestDB(t *testing.T, opts ...NewOption) (*DB, func()) {
common.WithNetDebug(true),
)
checkErr(t, err)
store, err := util.NewBadgerDatastore(dir, false)
store, err := util.NewBadgerDatastore(dir, "eventstore", false)
checkErr(t, err)
d, err := NewDB(context.Background(), store, n, thread.NewIDV1(thread.Raw, 32), opts...)
checkErr(t, err)
Expand Down
4 changes: 2 additions & 2 deletions integrationtests/foldersync/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func newRootClient(name, folderPath, repoPath string) (*client, error) {
return nil, err
}

s, err := util.NewBadgerDatastore(repoPath, false)
s, err := util.NewBadgerDatastore(repoPath, "eventstore", false)
if err != nil {
return nil, err
}
Expand All @@ -106,7 +106,7 @@ func newJoinerClient(name, folderPath, repoPath string, addr ma.Multiaddr, key t
return nil, err
}

s, err := util.NewBadgerDatastore(repoPath, false)
s, err := util.NewBadgerDatastore(repoPath, "eventstore", false)
if err != nil {
return nil, err
}
Expand Down
8 changes: 4 additions & 4 deletions integrationtests/foldersync/foldersync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func TestSimple(t *testing.T) {
network0, err := newNetwork(repoPath0)
checkErr(t, err)

store0, err := util.NewBadgerDatastore(repoPath0, false)
store0, err := util.NewBadgerDatastore(repoPath0, "eventstore", false)
checkErr(t, err)
defer store0.Close()
db0, err := db.NewDB(context.Background(), store0, network0, id, db.WithNewCollections(cc))
Expand All @@ -63,7 +63,7 @@ func TestSimple(t *testing.T) {
network1, err := newNetwork(repoPath1)
checkErr(t, err)

store1, err := util.NewBadgerDatastore(repoPath1, false)
store1, err := util.NewBadgerDatastore(repoPath1, "eventstore", false)
checkErr(t, err)
defer store1.Close()
db1, err := db.NewDBFromAddr(
Expand All @@ -87,7 +87,7 @@ func TestSimple(t *testing.T) {
network2, err := newNetwork(repoPath2)
checkErr(t, err)

store2, err := util.NewBadgerDatastore(repoPath2, false)
store2, err := util.NewBadgerDatastore(repoPath2, "eventstore", false)
checkErr(t, err)
defer store2.Close()
db2, err := db.NewDBFromAddr(
Expand All @@ -111,7 +111,7 @@ func TestSimple(t *testing.T) {
network3, err := newNetwork(repoPath3)
checkErr(t, err)

store3, err := util.NewBadgerDatastore(repoPath3, false)
store3, err := util.NewBadgerDatastore(repoPath3, "eventstore", false)
checkErr(t, err)
defer store3.Close()
db3, err := db.NewDBFromAddr(
Expand Down
2 changes: 1 addition & 1 deletion threadsd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func main() {
if *mongoUri != "" {
store, err = mongods.New(ctx, *mongoUri, *mongoDatabase, mongods.WithCollName("eventstore"))
} else {
store, err = util.NewBadgerDatastore(*repo, *badgerLowMem)
store, err = util.NewBadgerDatastore(*repo, "eventstore", *badgerLowMem)
}
if err != nil {
log.Fatal(err)
Expand Down
127 changes: 127 additions & 0 deletions util/datastore/dscopy/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package main

import (
"context"
"net/url"
"os"

ds "github.com/ipfs/go-datastore"
"github.com/ipfs/go-datastore/query"
logging "github.com/ipfs/go-log/v2"
"github.com/namsral/flag"
badger "github.com/textileio/go-ds-badger"
mongods "github.com/textileio/go-ds-mongo"
)

var log = logging.Logger("ds-copy")

func main() {
fs := flag.NewFlagSet(os.Args[0], 0)

fromBadgerRepo := fs.String("fromBadgerRepo", "", "Source badger repo path")
toBadgerRepo := fs.String("toBadgerRepo", "", "Destination badger repo path")

fromMongoUri := fs.String("fromMongoUri", "", "Source MongoDB URI")
fromMongoDatabase := fs.String("fromMongoDatabase", "", "Source MongoDB database")
fromMongoCollection := fs.String("fromMongoCollection", "", "Source MongoDB collection")
toMongoUri := fs.String("toMongoUri", "", "Destination MongoDB URI")
toMongoDatabase := fs.String("toMongoDatabase", "", "Destination MongoDB database")
toMongoCollection := fs.String("toMongoCollection", "", "Destination MongoDB collection")

verbose := fs.Bool("verbose", false, "More verbose output")
if err := fs.Parse(os.Args[1:]); err != nil {
log.Fatal(err)
}

logging.SetupLogging(logging.Config{
Format: logging.ColorizedOutput,
Stderr: true,
Level: logging.LevelError,
})
if err := logging.SetLogLevel("ds-copy", "info"); err != nil {
log.Fatal(err)
}

if len(*fromBadgerRepo) != 0 && len(*fromMongoUri) != 0 {
log.Fatal("multiple sources specified")
}
if len(*fromBadgerRepo) == 0 && len(*fromMongoUri) == 0 {
log.Fatal("source not specified")
}
if len(*toBadgerRepo) != 0 && len(*toMongoUri) != 0 {
log.Fatal("multiple destinations specified")
}
if len(*toBadgerRepo) == 0 && len(*toMongoUri) == 0 {
log.Fatal("destination not specified")
}

var from, to ds.Datastore
var err error
if len(*fromBadgerRepo) != 0 {
from, err = badger.NewDatastore(*fromBadgerRepo, &badger.DefaultOptions)
if err != nil {
log.Fatalf("connecting to badger source: %v", err)
}
log.Infof("connected to badger source: %s", *fromBadgerRepo)
}
if len(*toBadgerRepo) != 0 {
to, err = badger.NewDatastore(*toBadgerRepo, &badger.DefaultOptions)
if err != nil {
log.Fatalf("connecting to badger destination: %v", err)
}
log.Infof("connected to badger destination: %s", *toBadgerRepo)
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if len(*fromMongoUri) != 0 {
uri, err := url.Parse(*fromMongoUri)
if err != nil {
log.Fatalf("parsing source mongo URI: %v", err)
}
if len(*fromMongoDatabase) == 0 {
log.Fatal("source mongo database not specified")
}
if len(*fromMongoCollection) == 0 {
log.Fatal("source mongo collection not specified")
}
from, err = mongods.New(ctx, *fromMongoUri, *fromMongoDatabase, mongods.WithCollName(*fromMongoCollection))
if err != nil {
log.Fatalf("connecting to mongo source: %v", err)
}
log.Infof("connected to mongo source: %s", uri.Redacted())
}
if len(*toMongoUri) != 0 {
uri, err := url.Parse(*toMongoUri)
if err != nil {
log.Fatalf("parsing destination mongo URI: %v", err)
}
if len(*toMongoDatabase) == 0 {
log.Fatal("destination mongo database not specified")
}
if len(*toMongoCollection) == 0 {
log.Fatal("destination mongo collection not specified")
}
to, err = mongods.New(ctx, *toMongoUri, *toMongoDatabase, mongods.WithCollName(*toMongoCollection))
if err != nil {
log.Fatalf("connecting to mongo destination: %v", err)
}
log.Infof("connected to mongo destination: %s", uri.Redacted())
}

results, err := from.Query(query.Query{})
if err != nil {
log.Fatalf("querying source: %v", err)
}
defer results.Close()
for r := range results.Next() {
if err := to.Put(ds.NewKey(r.Key), r.Value); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Between these two lines we need to add:

if r.Error != nil {
   log.Fatal(...)
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah, thanks!

log.Fatalf("copying %s: %v", r.Key, err)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I discovered while working and testing Powergate migrations that this is too slow for copying (at least for my taste).

Here is what I did for migrations; which reduced times by some orders of magnitude. Nothing crazy, ignoring the migration logic, basically some rate-limiting to Read-and-Put up to 1000 in concurrently instead of serially.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, copied your approach there

if *verbose {
log.Infof("copied %s", r.Key)
}
}

log.Info("done")
}
4 changes: 2 additions & 2 deletions util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ var (
)

// NewBadgerDatastore returns a badger based datastore.
func NewBadgerDatastore(repoPath string, lowMem bool) (kt.TxnDatastoreExtended, error) {
path := filepath.Join(repoPath, "eventstore")
func NewBadgerDatastore(dirPath, name string, lowMem bool) (kt.TxnDatastoreExtended, error) {
path := filepath.Join(dirPath, name)
if err := os.MkdirAll(path, os.ModePerm); err != nil {
return nil, err
}
Expand Down