Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrated BlobDB for backup/restore support #8129

Closed
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
### Public API change
* Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead.
* Update DB::StartIOTrace and remove Env object from the arguments as its redundant and DB already has Env object that is passed down to IOTracer::StartIOTrace
* For new integrated BlobDB, add support for blob files for backup/restore like table files. Because of current limitations, blob files always use the kLegacyCrc32cAndFileSize naming scheme, and incremental backups must read and checksum all blob files in a DB, even for files that are already backed up.

### New Features
* Added the ability to open BackupEngine backups as read-only DBs, using BackupInfo::name_for_open and env_for_open provided by BackupEngine::GetBackupInfo() with include_file_details=true.
Expand Down
7 changes: 6 additions & 1 deletion env/composite_env_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,12 @@ class CompositeEnv : public Env {
return file_system_->OptimizeForCompactionTableRead(
FileOptions(env_options), db_options);
}

EnvOptions OptimizeForBlobFileRead(
const EnvOptions& env_options,
const ImmutableDBOptions& db_options) const override {
return file_system_->OptimizeForBlobFileRead(FileOptions(env_options),
db_options);
}
// This seems to clash with a macro on Windows, so #undef it here
#ifdef GetFreeSpace
#undef GetFreeSpace
Expand Down
11 changes: 11 additions & 0 deletions env/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,11 @@ class LegacyFileSystemWrapper : public FileSystem {
const ImmutableDBOptions& db_options) const override {
return target_->OptimizeForCompactionTableRead(file_options, db_options);
}
FileOptions OptimizeForBlobFileRead(
const FileOptions& file_options,
const ImmutableDBOptions& db_options) const override {
return target_->OptimizeForBlobFileRead(file_options, db_options);
}

#ifdef GetFreeSpace
#undef GetFreeSpace
Expand Down Expand Up @@ -997,6 +1002,12 @@ EnvOptions Env::OptimizeForCompactionTableRead(
optimized_env_options.use_direct_reads = db_options.use_direct_reads;
return optimized_env_options;
}
EnvOptions Env::OptimizeForBlobFileRead(
const EnvOptions& env_options, const ImmutableDBOptions& db_options) const {
EnvOptions optimized_env_options(env_options);
optimized_env_options.use_direct_reads = db_options.use_direct_reads;
return optimized_env_options;
}

EnvOptions::EnvOptions(const DBOptions& options) {
AssignEnvOptions(this, options);
Expand Down
8 changes: 8 additions & 0 deletions env/file_system.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ FileOptions FileSystem::OptimizeForCompactionTableRead(
return optimized_file_options;
}

FileOptions FileSystem::OptimizeForBlobFileRead(
const FileOptions& file_options,
const ImmutableDBOptions& db_options) const {
FileOptions optimized_file_options(file_options);
optimized_file_options.use_direct_reads = db_options.use_direct_reads;
return optimized_file_options;
}

IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
const std::string& fname, bool should_sync) {
std::unique_ptr<FSWritableFile> file;
Expand Down
12 changes: 12 additions & 0 deletions include/rocksdb/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,13 @@ class Env {
const EnvOptions& env_options,
const ImmutableDBOptions& db_options) const;

// OptimizeForBlobFileRead will create a new EnvOptions object that
// is a copy of the EnvOptions in the parameters, but is optimized for reading
// blob files.
virtual EnvOptions OptimizeForBlobFileRead(
const EnvOptions& env_options,
const ImmutableDBOptions& db_options) const;

// Returns the status of all threads that belong to the current Env.
virtual Status GetThreadList(std::vector<ThreadStatus>* /*thread_list*/) {
return Status::NotSupported("Env::GetThreadList() not supported.");
Expand Down Expand Up @@ -1495,6 +1502,11 @@ class EnvWrapper : public Env {
const ImmutableDBOptions& db_options) const override {
return target_->OptimizeForCompactionTableRead(env_options, db_options);
}
EnvOptions OptimizeForBlobFileRead(
const EnvOptions& env_options,
const ImmutableDBOptions& db_options) const override {
return target_->OptimizeForBlobFileRead(env_options, db_options);
}
Status GetFreeSpace(const std::string& path, uint64_t* diskfree) override {
return target_->GetFreeSpace(path, diskfree);
}
Expand Down
12 changes: 12 additions & 0 deletions include/rocksdb/file_system.h
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,13 @@ class FileSystem {
const FileOptions& file_options,
const ImmutableDBOptions& db_options) const;

// OptimizeForBlobFileRead will create a new FileOptions object that
// is a copy of the FileOptions in the parameters, but is optimized for
// reading blob files.
virtual FileOptions OptimizeForBlobFileRead(
const FileOptions& file_options,
const ImmutableDBOptions& db_options) const;

// This seems to clash with a macro on Windows, so #undef it here
#ifdef GetFreeSpace
#undef GetFreeSpace
Expand Down Expand Up @@ -1289,6 +1296,11 @@ class FileSystemWrapper : public FileSystem {
const ImmutableDBOptions& db_options) const override {
return target_->OptimizeForCompactionTableRead(file_options, db_options);
}
FileOptions OptimizeForBlobFileRead(
const FileOptions& file_options,
const ImmutableDBOptions& db_options) const override {
return target_->OptimizeForBlobFileRead(file_options, db_options);
}
IOStatus GetFreeSpace(const std::string& path, const IOOptions& options,
uint64_t* diskfree, IODebugContext* dbg) override {
return target_->GetFreeSpace(path, options, diskfree, dbg);
Expand Down
47 changes: 28 additions & 19 deletions include/rocksdb/utilities/backupable_db.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,14 @@ struct BackupableDBOptions {
// Default: nullptr
Env* backup_env;

// If share_table_files == true, backup will assume that table files with
// same name have the same contents. This enables incremental backups and
// avoids unnecessary data copies.
// If share_table_files == false, each backup will be on its own and will
// not share any data with other backups.
// share_table_files supports table and blob files.
//
// If share_table_files == true, the backup directory will share table and
// blob files among backups, to save space among backups of the same DB and to
// enable incremental backups by only copying new files.
// If share_table_files == false, each backup will be on its own and will not
// share any data with other backups.
//
// default: true
bool share_table_files;

Expand Down Expand Up @@ -92,13 +95,15 @@ struct BackupableDBOptions {
// Default: nullptr
std::shared_ptr<RateLimiter> restore_rate_limiter{nullptr};

// share_files_with_checksum supports table and blob files.
//
// Only used if share_table_files is set to true. Setting to false is
// DEPRECATED and potentially dangerous because in that case BackupEngine
// can lose data if backing up databases with distinct or divergent
// history, for example if restoring from a backup other than the latest,
// writing to the DB, and creating another backup. Setting to true (default)
// prevents these issues by ensuring that different table files (SSTs) with
// the same number are treated as distinct. See
// prevents these issues by ensuring that different table files (SSTs) and
// blob files with the same number are treated as distinct. See
// share_files_with_checksum_naming and ShareFilesNaming.
//
// Default: true
Expand Down Expand Up @@ -126,11 +131,12 @@ struct BackupableDBOptions {
int max_valid_backups_to_open;

// ShareFilesNaming describes possible naming schemes for backup
// table file names when the table files are stored in the shared_checksum
// directory (i.e., both share_table_files and share_files_with_checksum
// are true).
// table and blob file names when they are stored in the
// shared_checksum directory (i.e., both share_table_files and
// share_files_with_checksum are true).
enum ShareFilesNaming : uint32_t {
// Backup SST filenames are <file_number>_<crc32c>_<file_size>.sst
// Backup blob filenames are <file_number>_<crc32c>_<file_size>.blob and
// backup SST filenames are <file_number>_<crc32c>_<file_size>.sst
// where <crc32c> is an unsigned decimal integer. This is the
// original/legacy naming scheme for share_files_with_checksum,
// with two problems:
Expand All @@ -139,6 +145,7 @@ struct BackupableDBOptions {
// * Determining the name to use requires computing the checksum,
// so generally requires reading the whole file even if the file
// is already backed up.
//
// ** ONLY RECOMMENDED FOR PRESERVING OLD BEHAVIOR **
kLegacyCrc32cAndFileSize = 1U,

Expand All @@ -148,6 +155,8 @@ struct BackupableDBOptions {
// the value is a DB session id, not a checksum.
//
// Exceptions:
// * For blob files, kLegacyCrc32cAndFileSize is used as currently
// db_session_id is not supported by the blob file format.
akankshamahajan15 marked this conversation as resolved.
Show resolved Hide resolved
// * For old SST files without a DB session id, kLegacyCrc32cAndFileSize
// will be used instead, matching the names assigned by RocksDB versions
// not supporting the newer naming scheme.
Expand All @@ -158,25 +167,25 @@ struct BackupableDBOptions {

// If not already part of the naming scheme, insert
// _<file_size>
// before .sst in the name. In case of user code actually parsing the
// last _<whatever> before the .sst as the file size, this preserves that
// feature of kLegacyCrc32cAndFileSize. In other words, this option makes
// official that unofficial feature of the backup metadata.
// before .sst and .blob in the name. In case of user code actually parsing
// the last _<whatever> before the .sst and .blob as the file size, this
// preserves that feature of kLegacyCrc32cAndFileSize. In other words, this
// option makes official that unofficial feature of the backup metadata.
//
// We do not consider SST file sizes to have sufficient entropy to
// We do not consider SST and blob file sizes to have sufficient entropy to
// contribute significantly to naming uniqueness.
kFlagIncludeFileSize = 1U << 31,

kMaskNamingFlags = ~kMaskNoNamingFlags,
};

// Naming option for share_files_with_checksum table files. See
// Naming option for share_files_with_checksum table and blob files. See
// ShareFilesNaming for details.
//
// Modifying this option cannot introduce a downgrade compatibility issue
// because RocksDB can read, restore, and delete backups using different file
// names, and it's OK for a backup directory to use a mixture of table file
// naming schemes.
// names, and it's OK for a backup directory to use a mixture of table and
// blob files naming schemes.
//
// However, modifying this option and saving more backups to the same
// directory can lead to the same file getting saved again to that
Expand Down
44 changes: 29 additions & 15 deletions utilities/backupable/backupable_db.cc
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ class BackupEngineImpl {
BackupID backup_id, bool shared, const std::string& src_dir,
const std::string& fname, // starts with "/"
const EnvOptions& src_env_options, RateLimiter* rate_limiter,
uint64_t size_bytes, uint64_t size_limit = 0,
FileType file_type, uint64_t size_bytes, uint64_t size_limit = 0,
bool shared_checksum = false,
std::function<void()> progress_callback = []() {},
const std::string& contents = std::string(),
Expand Down Expand Up @@ -1287,7 +1287,7 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
Log(options_.info_log, "add file for backup %s", fname.c_str());
uint64_t size_bytes = 0;
Status st;
if (type == kTableFile) {
if (type == kTableFile || type == kBlobFile) {
st = db_env_->GetFileSize(src_dirname + fname, &size_bytes);
}
EnvOptions src_env_options;
Expand All @@ -1304,6 +1304,10 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
src_env_options =
db_env_->OptimizeForManifestRead(src_raw_env_options);
break;
case kBlobFile:
src_env_options = db_env_->OptimizeForBlobFileRead(
src_raw_env_options, ImmutableDBOptions(db_options));
break;
default:
// Other backed up files (like options file) are not read by live
// DB, so don't need to worry about avoiding mixing buffered and
Expand All @@ -1314,22 +1318,25 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
if (st.ok()) {
st = AddBackupFileWorkItem(
live_dst_paths, backup_items_to_finish, new_backup_id,
options_.share_table_files && type == kTableFile, src_dirname,
fname, src_env_options, rate_limiter, size_bytes,
size_limit_bytes,
options_.share_files_with_checksum && type == kTableFile,
options_.share_table_files &&
(type == kTableFile || type == kBlobFile),
src_dirname, fname, src_env_options, rate_limiter, type,
size_bytes, size_limit_bytes,
options_.share_files_with_checksum &&
(type == kTableFile || type == kBlobFile),
options.progress_callback, "" /* contents */,
checksum_func_name, checksum_val);
}
return st;
} /* copy_file_cb */,
[&](const std::string& fname, const std::string& contents, FileType) {
[&](const std::string& fname, const std::string& contents,
FileType type) {
Log(options_.info_log, "add file for backup %s", fname.c_str());
return AddBackupFileWorkItem(
live_dst_paths, backup_items_to_finish, new_backup_id,
false /* shared */, "" /* src_dir */, fname,
EnvOptions() /* src_env_options */, rate_limiter, contents.size(),
0 /* size_limit */, false /* shared_checksum */,
EnvOptions() /* src_env_options */, rate_limiter, type,
contents.size(), 0 /* size_limit */, false /* shared_checksum */,
options.progress_callback, contents);
} /* create_file_cb */,
&sequence_number, options.flush_before_backup ? 0 : port::kMaxUint64,
Expand Down Expand Up @@ -1872,9 +1879,10 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
std::vector<BackupAfterCopyOrCreateWorkItem>& backup_items_to_finish,
BackupID backup_id, bool shared, const std::string& src_dir,
const std::string& fname, const EnvOptions& src_env_options,
RateLimiter* rate_limiter, uint64_t size_bytes, uint64_t size_limit,
bool shared_checksum, std::function<void()> progress_callback,
const std::string& contents, const std::string& src_checksum_func_name,
RateLimiter* rate_limiter, FileType file_type, uint64_t size_bytes,
uint64_t size_limit, bool shared_checksum,
std::function<void()> progress_callback, const std::string& contents,
const std::string& src_checksum_func_name,
const std::string& src_checksum_str) {
assert(!fname.empty() && fname[0] == '/');
assert(contents.empty() != src_dir.empty());
Expand All @@ -1887,8 +1895,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
std::string checksum_hex;

// Whenever a default checksum function name is passed in, we will compares
// the corresponding checksum values after copying. Note that only table files
// may have a known checksum function name passed in.
// the corresponding checksum values after copying. Note that only table and
// blob files may have a known checksum function name passed in.
//
// If no default checksum function name is passed in and db session id is not
// available, we will calculate the checksum *before* copying in two cases
Expand All @@ -1906,7 +1914,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem(

// Step 1: Prepare the relative path to destination
if (shared && shared_checksum) {
if (GetNamingNoFlags() != BackupableDBOptions::kLegacyCrc32cAndFileSize) {
if (GetNamingNoFlags() != BackupableDBOptions::kLegacyCrc32cAndFileSize &&
file_type != kBlobFile) {
// Prepare db_session_id to add to the file name
// Ignore the returned status
// In the failed cases, db_id and db_session_id will be empty
Expand Down Expand Up @@ -1938,6 +1947,11 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
// shared_checksum/<file_number>_<db_session_id>.sst
// Otherwise, dst_relative is of the form
// shared_checksum/<file_number>_<checksum>_<size>.sst
//
// For blob files, db_session_id is not supported with the blob file format.
// It uses original/legacy naming scheme.
// dst_relative will be of the form:
// shared_checksum/<file_number>_<checksum>_<size>.blob
dst_relative = GetSharedFileWithChecksum(dst_relative, checksum_hex,
size_bytes, db_session_id);
dst_relative_tmp = GetSharedFileWithChecksumRel(dst_relative, true);
Expand Down
Loading