Skip to content

Commit

Permalink
First version of rocksdb_dump and rocksdb_undump.
Browse files Browse the repository at this point in the history
Summary: Hack up rocksdb_dump and rocksdb_undump utilities to get this task rolling/promote discussion.

Test Plan: Dump/undump databases recursively to see if nothing is lost.

Reviewers: sdong, yhchiang, rven, anthony, kradhakrishnan, igor

Reviewed By: igor

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D37269
  • Loading branch information
mcallahan committed Jun 19, 2015
1 parent 04251e1 commit 15325bf
Show file tree
Hide file tree
Showing 7 changed files with 321 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ package/
.phutil_module_cache
unity
tags
rocksdb_dump
rocksdb_undump

java/out
java/target
Expand Down
16 changes: 16 additions & 0 deletions DUMP_FORMAT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
## RocksDB dump format

The version 1 RocksDB dump format is fairly simple:

1) The dump starts with the magic 8 byte identifier "ROCKDUMP"

2) The magic is followed by an 8 byte big-endian version which is 0x00000001.

3) Next are arbitrarily sized chunks of bytes prepended by 4 byte little endian number indicating how large each chunk is.

4) The first chunk is special and is a json string indicating some things about the creation of this dump. It contains the following keys:
* database-path: The path of the database this dump was created from.
* hostname: The hostname of the machine where the dump was created.
* creation-time: Unix seconds since epoc when this dump was created.

5) Following the info dump the slices paired into are key/value pairs.
12 changes: 11 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,9 @@ TOOLS = \
db_sanity_test \
db_stress \
ldb \
db_repl_stress
db_repl_stress \
rocksdb_dump \
rocksdb_undump

BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench

Expand Down Expand Up @@ -516,6 +518,8 @@ check: all
echo "===== Running $$t"; ./$$t || exit 1; done; \
fi
rm -rf $(TMPD)
python tools/ldb_test.py
sh tools/rocksdb_dump_test.sh

check_some: $(SUBSET) ldb_tests
for t in $(SUBSET); do echo "===== Running $$t"; ./$$t || exit 1; done
Expand Down Expand Up @@ -795,6 +799,12 @@ deletefile_test: db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS)
geodb_test: utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

rocksdb_dump: tools/dump/rocksdb_dump.o $(LIBOBJECTS)
$(AM_LINK)

rocksdb_undump: tools/dump/rocksdb_undump.o $(LIBOBJECTS)
$(AM_LINK)

cuckoo_table_builder_test: table/cuckoo_table_builder_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

Expand Down
149 changes: 149 additions & 0 deletions tools/dump/rocksdb_dump.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

#ifndef GFLAGS
#include <cstdio>
int main() {
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
return 1;
}
#else

#include <gflags/gflags.h>
#include <iostream>

#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "util/coding.h"

DEFINE_bool(anonymous, false, "Output an empty information blob.");

void usage(const char* name) {
std::cout << "usage: " << name << " [--anonymous] <db> <dumpfile>"
<< std::endl;
}

int main(int argc, char** argv) {
rocksdb::DB* dbptr;
rocksdb::Options options;
rocksdb::Status status;
std::unique_ptr<rocksdb::WritableFile> dumpfile;
char hostname[1024];
int64_t timesec;
std::string abspath;
char json[4096];

GFLAGS::ParseCommandLineFlags(&argc, &argv, true);

static const char* magicstr = "ROCKDUMP";
static const char versionstr[8] = {0, 0, 0, 0, 0, 0, 0, 1};

if (argc != 3) {
usage(argv[0]);
exit(1);
}

rocksdb::Env* env = rocksdb::Env::Default();

// Open the database
options.create_if_missing = false;
status = rocksdb::DB::OpenForReadOnly(options, argv[1], &dbptr);
if (!status.ok()) {
std::cerr << "Unable to open database '" << argv[1]
<< "' for reading: " << status.ToString() << std::endl;
exit(1);
}

const std::unique_ptr<rocksdb::DB> db(dbptr);

status = env->NewWritableFile(argv[2], &dumpfile, rocksdb::EnvOptions());
if (!status.ok()) {
std::cerr << "Unable to open dump file '" << argv[2]
<< "' for writing: " << status.ToString() << std::endl;
exit(1);
}

rocksdb::Slice magicslice(magicstr, 8);
status = dumpfile->Append(magicslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}

rocksdb::Slice versionslice(versionstr, 8);
status = dumpfile->Append(versionslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}

if (FLAGS_anonymous) {
snprintf(json, sizeof(json), "{}");
} else {
status = env->GetHostName(hostname, sizeof(hostname));
status = env->GetCurrentTime(&timesec);
status = env->GetAbsolutePath(argv[1], &abspath);
snprintf(json, sizeof(json),
"{ \"database-path\": \"%s\", \"hostname\": \"%s\", "
"\"creation-time\": %ld }",
abspath.c_str(), hostname, timesec);
}

rocksdb::Slice infoslice(json, strlen(json));
char infosize[4];
rocksdb::EncodeFixed32(infosize, (uint32_t)infoslice.size());
rocksdb::Slice infosizeslice(infosize, 4);
status = dumpfile->Append(infosizeslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
status = dumpfile->Append(infoslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}

const std::unique_ptr<rocksdb::Iterator> it(
db->NewIterator(rocksdb::ReadOptions()));
for (it->SeekToFirst(); it->Valid(); it->Next()) {
char keysize[4];
rocksdb::EncodeFixed32(keysize, (uint32_t)it->key().size());
rocksdb::Slice keysizeslice(keysize, 4);
status = dumpfile->Append(keysizeslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
status = dumpfile->Append(it->key());
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}

char valsize[4];
rocksdb::EncodeFixed32(valsize, (uint32_t)it->value().size());
rocksdb::Slice valsizeslice(valsize, 4);
status = dumpfile->Append(valsizeslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
status = dumpfile->Append(it->value());
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
}
if (!it->status().ok()) {
std::cerr << "Database iteration failed: " << status.ToString()
<< std::endl;
exit(1);
}

return 0;
}

#endif // GFLAGS
136 changes: 136 additions & 0 deletions tools/dump/rocksdb_undump.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.

#include <cstring>
#include <iostream>

#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "util/coding.h"

void usage(const char *name) {
std::cout << "usage: " << name << " <dumpfile> <rocksdb>" << std::endl;
}

int main(int argc, char **argv) {
rocksdb::DB *dbptr;
rocksdb::Options options;
rocksdb::Status status;
rocksdb::Env *env;
std::unique_ptr<rocksdb::SequentialFile> dumpfile;
rocksdb::Slice slice;
char scratch8[8];

static const char *magicstr = "ROCKDUMP";
static const char versionstr[8] = {0, 0, 0, 0, 0, 0, 0, 1};

if (argc != 3) {
usage(argv[0]);
exit(1);
}

env = rocksdb::Env::Default();

status = env->NewSequentialFile(argv[1], &dumpfile, rocksdb::EnvOptions());
if (!status.ok()) {
std::cerr << "Unable to open dump file '" << argv[1]
<< "' for reading: " << status.ToString() << std::endl;
exit(1);
}

status = dumpfile->Read(8, &slice, scratch8);
if (!status.ok() || slice.size() != 8 ||
memcmp(slice.data(), magicstr, 8) != 0) {
std::cerr << "File '" << argv[1] << "' is not a recognizable dump file."
<< std::endl;
exit(1);
}

status = dumpfile->Read(8, &slice, scratch8);
if (!status.ok() || slice.size() != 8 ||
memcmp(slice.data(), versionstr, 8) != 0) {
std::cerr << "File '" << argv[1] << "' version not recognized."
<< std::endl;
exit(1);
}

status = dumpfile->Read(4, &slice, scratch8);
if (!status.ok() || slice.size() != 4) {
std::cerr << "Unable to read info blob size." << std::endl;
exit(1);
}
uint32_t infosize = rocksdb::DecodeFixed32(slice.data());
status = dumpfile->Skip(infosize);
if (!status.ok()) {
std::cerr << "Unable to skip info blob: " << status.ToString() << std::endl;
exit(1);
}

options.create_if_missing = true;
status = rocksdb::DB::Open(options, argv[2], &dbptr);
if (!status.ok()) {
std::cerr << "Unable to open database '" << argv[2]
<< "' for writing: " << status.ToString() << std::endl;
exit(1);
}

const std::unique_ptr<rocksdb::DB> db(dbptr);

uint32_t last_keysize = 64;
size_t last_valsize = 1 << 20;
std::unique_ptr<char[]> keyscratch(new char[last_keysize]);
std::unique_ptr<char[]> valscratch(new char[last_valsize]);

while (1) {
uint32_t keysize, valsize;
rocksdb::Slice keyslice;
rocksdb::Slice valslice;

status = dumpfile->Read(4, &slice, scratch8);
if (!status.ok() || slice.size() != 4) break;
keysize = rocksdb::DecodeFixed32(slice.data());
if (keysize > last_keysize) {
while (keysize > last_keysize) last_keysize *= 2;
keyscratch = std::unique_ptr<char[]>(new char[last_keysize]);
}

status = dumpfile->Read(keysize, &keyslice, keyscratch.get());
if (!status.ok() || keyslice.size() != keysize) {
std::cerr << "Key read failure: "
<< (status.ok() ? "insufficient data" : status.ToString())
<< std::endl;
exit(1);
}

status = dumpfile->Read(4, &slice, scratch8);
if (!status.ok() || slice.size() != 4) {
std::cerr << "Unable to read value size: "
<< (status.ok() ? "insufficient data" : status.ToString())
<< std::endl;
exit(1);
}
valsize = rocksdb::DecodeFixed32(slice.data());
if (valsize > last_valsize) {
while (valsize > last_valsize) last_valsize *= 2;
valscratch = std::unique_ptr<char[]>(new char[last_valsize]);
}

status = dumpfile->Read(valsize, &valslice, valscratch.get());
if (!status.ok() || valslice.size() != valsize) {
std::cerr << "Unable to read value: "
<< (status.ok() ? "insufficient data" : status.ToString())
<< std::endl;
exit(1);
}

status = db->Put(rocksdb::WriteOptions(), keyslice, valslice);
if (!status.ok()) {
fprintf(stderr, "Unable to write database entry\n");
exit(1);
}
}

return 0;
}
7 changes: 7 additions & 0 deletions tools/rocksdb_dump_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
TESTDIR=`mktemp -d /tmp/rocksdb-dump-test.XXXXX`
DUMPFILE="tools/sample-dump.dmp"

# Verify that the sample dump file is undumpable and then redumpable.
./rocksdb_undump $DUMPFILE $TESTDIR/db
./rocksdb_dump --anonymous $TESTDIR/db $TESTDIR/dump
cmp $DUMPFILE $TESTDIR/dump
Binary file added tools/sample-dump.dmp
Binary file not shown.

0 comments on commit 15325bf

Please sign in to comment.