Skip to content

Commit

Permalink
use new FlushWAL API in MyRocks
Browse files Browse the repository at this point in the history
Summary:
RocksDB has recently added a FlushWAL API which will improve upon the
performance of MySQL 2PC (more details here facebook/rocksdb#2345).
This patch adds support for using the FlushWAL API in MyRocks and also matches flush_log_at_trx_commit with innodb_flush_log_at_trx_commit behaviour.

Finally, it updates the submodule to include the removal of an unneeded
assertion in the write path, which was tripped by this change.

Differential Revision: D5503719
  • Loading branch information
alxyang authored and inikep committed Jan 28, 2022
1 parent 456c298 commit 684061a
Show file tree
Hide file tree
Showing 10 changed files with 142 additions and 47 deletions.
14 changes: 7 additions & 7 deletions mysql-test/suite/rocksdb/r/2pc_group_commit.result
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@ CREATE DATABASE mysqlslap;
USE mysqlslap;
CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb;
# 2PC enabled, MyRocks durability enabled
SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
## 2PC + durability + single thread
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 1000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 1000 then 'true' else 'false' end
false
true
## 2PC + durability + group commit
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true' else 'false' end
false
true
# 2PC enabled, MyRocks durability disabled
SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
Expand All @@ -28,16 +28,16 @@ select case when variable_value-@c = 0 then 'true' else 'false' end from informa
case when variable_value-@c = 0 then 'true' else 'false' end
true
# 2PC disabled, MyRocks durability enabled
SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 0 then 'true' else 'false' end
false
true
select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
select case when variable_value-@c = 0 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
case when variable_value-@c = 0 then 'true' else 'false' end
false
true
SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
DROP TABLE t1;
Expand Down
2 changes: 2 additions & 0 deletions mysql-test/suite/rocksdb/r/rocksdb.result
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,7 @@ rocksdb_compaction_sequential_deletes 0
rocksdb_compaction_sequential_deletes_count_sd OFF
rocksdb_compaction_sequential_deletes_file_size 0
rocksdb_compaction_sequential_deletes_window 0
rocksdb_concurrent_prepare ON
rocksdb_create_checkpoint
rocksdb_create_if_missing ON
rocksdb_create_missing_column_families OFF
Expand Down Expand Up @@ -917,6 +918,7 @@ rocksdb_lock_scanned_rows OFF
rocksdb_lock_wait_timeout 1
rocksdb_log_file_time_to_roll 0
rocksdb_manifest_preallocation_size 4194304
rocksdb_manual_wal_flush ON
rocksdb_master_skip_tx_api OFF
rocksdb_max_background_jobs 2
rocksdb_max_log_file_size 0
Expand Down
25 changes: 8 additions & 17 deletions mysql-test/suite/rocksdb/r/write_sync.result
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,26 @@ SET GLOBAL rocksdb_write_disable_wal=false;
SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;
set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
0
insert aaa(id, i) values(2,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
0
insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
0
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
insert aaa(id, i) values(4,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
1
insert aaa(id, i) values(5,1);
insert aaa(id, i) values(2,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
2
insert aaa(id, i) values(6,1);
insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
variable_value-@a
3
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(4,1);
SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
insert aaa(id, i) values(7,1);
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(5,1);
truncate table aaa;
drop table aaa;
set @@global.rocksdb_flush_log_at_trx_commit=@save_rocksdb_flush_log_at_trx_commit;
Expand Down
6 changes: 3 additions & 3 deletions mysql-test/suite/rocksdb/t/2pc_group_commit.test
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ USE mysqlslap;
CREATE TABLE t1(id BIGINT AUTO_INCREMENT, value BIGINT, PRIMARY KEY(id)) ENGINE=rocksdb;

--echo # 2PC enabled, MyRocks durability enabled
SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;

--echo ## 2PC + durability + single thread
Expand All @@ -28,7 +28,7 @@ select case when variable_value-@c > 0 and variable_value-@c < 10000 then 'true'


--echo # 2PC enabled, MyRocks durability disabled
SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;

select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
Expand All @@ -41,7 +41,7 @@ select case when variable_value-@c = 0 then 'true' else 'false' end from informa


--echo # 2PC disabled, MyRocks durability enabled
SET GLOBAL rocksdb_enable_2pc=1;
SET GLOBAL rocksdb_enable_2pc=0;
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;

select variable_value into @c from information_schema.global_status where variable_name='rocksdb_wal_group_syncs';
Expand Down
23 changes: 13 additions & 10 deletions mysql-test/suite/rocksdb/t/write_sync.test
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ SET GLOBAL rocksdb_write_ignore_missing_column_families=true;
create table aaa (id int primary key, i int) engine rocksdb;

set @save_rocksdb_flush_log_at_trx_commit=@@global.rocksdb_flush_log_at_trx_commit;
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
--exec sleep 30
SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
--exec sleep 5
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(1,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
Expand All @@ -16,19 +16,22 @@ select variable_value-@a from information_schema.global_status where variable_na
insert aaa(id, i) values(3,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';

SET GLOBAL rocksdb_flush_log_at_trx_commit=1;
SET GLOBAL rocksdb_flush_log_at_trx_commit=0;
--exec sleep 5
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(4,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(5,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(6,1);
select variable_value-@a from information_schema.global_status where variable_name='rocksdb_wal_synced';

let $status_var=rocksdb_wal_synced;
let $status_var_value=`select @a+1`;
source include/wait_for_status_var.inc;

SET GLOBAL rocksdb_flush_log_at_trx_commit=2;
insert aaa(id, i) values(7,1);
--exec sleep 5
select variable_value into @a from information_schema.global_status where variable_name='rocksdb_wal_synced';
insert aaa(id, i) values(5,1);

let $status_var=rocksdb_wal_synced;
let $status_var_value=`select @a+4`;
let $status_var_value=`select @a+1`;
source include/wait_for_status_var.inc;

truncate table aaa;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO valid_values VALUES(1);
INSERT INTO valid_values VALUES(1024);
CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'aaa\'');
SET @start_global_value = @@global.ROCKSDB_CONCURRENT_PREPARE;
SELECT @start_global_value;
@start_global_value
1
"Trying to set variable @@global.ROCKSDB_CONCURRENT_PREPARE to 444. It should fail because it is readonly."
SET @@global.ROCKSDB_CONCURRENT_PREPARE = 444;
ERROR HY000: Variable 'rocksdb_concurrent_prepare' is a read only variable
DROP TABLE valid_values;
DROP TABLE invalid_values;
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO valid_values VALUES(1);
INSERT INTO valid_values VALUES(1024);
CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'aaa\'');
SET @start_global_value = @@global.ROCKSDB_MANUAL_WAL_FLUSH;
SELECT @start_global_value;
@start_global_value
1
"Trying to set variable @@global.ROCKSDB_MANUAL_WAL_FLUSH to 444. It should fail because it is readonly."
SET @@global.ROCKSDB_MANUAL_WAL_FLUSH = 444;
ERROR HY000: Variable 'rocksdb_manual_wal_flush' is a read only variable
DROP TABLE valid_values;
DROP TABLE invalid_values;
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
--source include/have_rocksdb.inc

CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO valid_values VALUES(1);
INSERT INTO valid_values VALUES(1024);

CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'aaa\'');

--let $sys_var=ROCKSDB_CONCURRENT_PREPARE
--let $read_only=1
--let $session=0
--source ../include/rocksdb_sys_var.inc

DROP TABLE valid_values;
DROP TABLE invalid_values;
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
--source include/have_rocksdb.inc

CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO valid_values VALUES(1);
INSERT INTO valid_values VALUES(1024);

CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'aaa\'');

--let $sys_var=ROCKSDB_MANUAL_WAL_FLUSH
--let $read_only=1
--let $session=0
--source ../include/rocksdb_sys_var.inc

DROP TABLE valid_values;
DROP TABLE invalid_values;
59 changes: 49 additions & 10 deletions storage/rocksdb/ha_rocksdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,8 @@ static std::unique_ptr<rocksdb::DBOptions> rdb_init_rocksdb_db_options(void) {
o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL;
o->max_subcompactions = DEFAULT_SUBCOMPACTIONS;

o->concurrent_prepare = true;
o->manual_wal_flush = true;
return o;
}

Expand Down Expand Up @@ -670,6 +672,20 @@ static MYSQL_SYSVAR_BOOL(
"DBOptions::create_if_missing for RocksDB", nullptr, nullptr,
rocksdb_db_options->create_if_missing);

static MYSQL_SYSVAR_BOOL(
concurrent_prepare,
*reinterpret_cast<my_bool *>(&rocksdb_db_options->concurrent_prepare),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::concurrent_prepare for RocksDB", nullptr, nullptr,
rocksdb_db_options->concurrent_prepare);

static MYSQL_SYSVAR_BOOL(
manual_wal_flush,
*reinterpret_cast<my_bool *>(&rocksdb_db_options->manual_wal_flush),
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::manual_wal_flush for RocksDB", nullptr, nullptr,
rocksdb_db_options->manual_wal_flush);

static MYSQL_SYSVAR_BOOL(
create_missing_column_families,
*reinterpret_cast<my_bool *>(
Expand Down Expand Up @@ -1077,12 +1093,21 @@ static MYSQL_SYSVAR_STR(update_cf_options, rocksdb_update_cf_options,
"Option updates per column family for RocksDB", nullptr,
rocksdb_set_update_cf_options, nullptr);

enum rocksdb_flush_log_at_trx_commit_type : unsigned int {
FLUSH_LOG_NEVER = 0,
FLUSH_LOG_SYNC,
FLUSH_LOG_BACKGROUND,
FLUSH_LOG_MAX /* must be last */
};

static MYSQL_SYSVAR_UINT(flush_log_at_trx_commit,
rocksdb_flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG,
"Sync on transaction commit. Similar to "
"innodb_flush_log_at_trx_commit. 1: sync on commit, "
"0,2: not sync on commit",
nullptr, nullptr, 1, 0, 2, 0);
nullptr, nullptr, /* default */ FLUSH_LOG_SYNC,
/* min */ FLUSH_LOG_NEVER,
/* max */ FLUSH_LOG_BACKGROUND, 0);

static MYSQL_THDVAR_BOOL(write_disable_wal, PLUGIN_VAR_RQCMDARG,
"WriteOptions::disableWAL for RocksDB", nullptr,
Expand Down Expand Up @@ -1375,6 +1400,8 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(skip_bloom_filter_on_read),

MYSQL_SYSVAR(create_if_missing),
MYSQL_SYSVAR(concurrent_prepare),
MYSQL_SYSVAR(manual_wal_flush),
MYSQL_SYSVAR(create_missing_column_families),
MYSQL_SYSVAR(error_if_exists),
MYSQL_SYSVAR(paranoid_checks),
Expand Down Expand Up @@ -1490,7 +1517,7 @@ static rocksdb::WriteOptions
rdb_get_rocksdb_write_options(my_core::THD *const thd) {
rocksdb::WriteOptions opt;

opt.sync = (rocksdb_flush_log_at_trx_commit == 1);
opt.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
opt.disableWAL = THDVAR(thd, write_disable_wal);
opt.ignore_missing_column_families =
THDVAR(thd, write_ignore_missing_column_families);
Expand Down Expand Up @@ -2249,7 +2276,7 @@ class Rdb_transaction_impl : public Rdb_transaction {
tx_opts.deadlock_detect = THDVAR(m_thd, deadlock_detect);
tx_opts.max_write_batch_size = THDVAR(m_thd, write_batch_max_bytes);

write_opts.sync = (rocksdb_flush_log_at_trx_commit == 1);
write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
write_opts.ignore_missing_column_families =
THDVAR(m_thd, write_ignore_missing_column_families);
Expand Down Expand Up @@ -2468,7 +2495,7 @@ class Rdb_writebatch_impl : public Rdb_transaction {

void start_tx() override {
reset();
write_opts.sync = (rocksdb_flush_log_at_trx_commit == 1);
write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
write_opts.ignore_missing_column_families =
THDVAR(m_thd, write_ignore_missing_column_families);
Expand Down Expand Up @@ -2628,8 +2655,17 @@ static std::string rdb_xid_to_string(const XID &src) {
static bool rocksdb_flush_wal(handlerton *const hton MY_ATTRIBUTE((__unused__)),
ulonglong target_lsn MY_ATTRIBUTE((__unused__))) {
DBUG_ASSERT(rdb != nullptr);
rocksdb_wal_group_syncs++;
const rocksdb::Status s = rdb->SyncWAL();

rocksdb::Status s;
/*
target_lsn is set to 0 when MySQL wants to sync the wal files
*/
if (target_lsn == 0 || rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
rocksdb_wal_group_syncs++;
s = rdb->FlushWAL(target_lsn == 0 ||
rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
}

if (!s.ok()) {
rdb_log_status_error(s);
return HA_EXIT_FAILURE;
Expand Down Expand Up @@ -2668,7 +2704,7 @@ static int rocksdb_prepare(handlerton *const hton, THD *const thd,
return HA_EXIT_FAILURE;
}
if (thd->durability_property == HA_IGNORE_DURABILITY &&
(rocksdb_flush_log_at_trx_commit == 1)) {
(rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER)) {
/**
we set the log sequence as '1' just to trigger hton->flush_logs
*/
Expand Down Expand Up @@ -11022,10 +11058,13 @@ void Rdb_background_thread::run() {
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);

// Flush the WAL.
if (rdb && (rocksdb_flush_log_at_trx_commit == 2)) {
// Flush the WAL. Sync it for both background and never modes to copy
// InnoDB's behavior. For mode never, the wal file isn't even written,
// whereas background writes to the wal file, but issues the syncs in a
// background thread.
if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC)) {
DBUG_ASSERT(!rocksdb_db_options->allow_mmap_writes);
const rocksdb::Status s = rdb->SyncWAL();
const rocksdb::Status s = rdb->FlushWAL(true);
if (!s.ok()) {
rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD);
}
Expand Down

0 comments on commit 684061a

Please sign in to comment.