From 612ec75814f30076f53d294cd0267533c807022f Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Thu, 5 Apr 2018 21:46:45 +0000 Subject: [PATCH 1/2] Issue #802: MyRocks: Statement rollback doesnt work correctly for nested statements Variant#2: Use a different approach to taking RocksDB SavePoints. - Take one at transaction start (so we can rollback to it) - Take one when top-level statement completes (there is a place to do that. Detecting when a top-level statement starts is much harder) - Also, don't make a new Savepoint if the previous statement didn't make any writes. This way, a long transaction doing lots of SELECTs will not create a lot of savepoints. - Also, don't call get_or_create_tx() from ha_rocksdb::external_lock(F_UNLCK). This call is made right after rocksdb_commit(). We end up creating a new RocksDB transaction just to do cleanups and immediately discard it. --- mysql-test/suite/rocksdb/r/transaction.result | 24 ++++ mysql-test/suite/rocksdb/t/transaction.test | 30 +++++ storage/rocksdb/ha_rocksdb.cc | 127 +++++++++++++++--- 3 files changed, 160 insertions(+), 21 deletions(-) diff --git a/mysql-test/suite/rocksdb/r/transaction.result b/mysql-test/suite/rocksdb/r/transaction.result index fe13c1633a8e..006baaf13392 100644 --- a/mysql-test/suite/rocksdb/r/transaction.result +++ b/mysql-test/suite/rocksdb/r/transaction.result @@ -934,3 +934,27 @@ value 3 rollback; drop table t1; +# +# #802: MyRocks: Statement rollback doesnt work correctly for nested statements +# +create table t1 (a varchar(100)) engine=rocksdb; +create table t2(a int) engine=rocksdb; +insert into t2 values (1), (2); +create table t3(a varchar(100)) engine=rocksdb; +create function func() returns varchar(100) deterministic +begin +insert into t3 values ('func-called'); +set @a= (select a from t2); +return 'func-returned'; +end;// +begin; +insert into t1 values (func()); +ERROR 21000: Subquery returns more than 1 row +select * from t1; +a +# The following must not produce 'func-called': +select * from t3; +a +rollback; +drop function func; +drop table t1,t2,t3; diff --git a/mysql-test/suite/rocksdb/t/transaction.test b/mysql-test/suite/rocksdb/t/transaction.test index a76fa8f9871d..3350db99dab0 100644 --- a/mysql-test/suite/rocksdb/t/transaction.test +++ b/mysql-test/suite/rocksdb/t/transaction.test @@ -103,3 +103,33 @@ update t1 set id=115 where id=3; rollback; drop table t1; + +--echo # +--echo # #802: MyRocks: Statement rollback doesnt work correctly for nested statements +--echo # +create table t1 (a varchar(100)) engine=rocksdb; +create table t2(a int) engine=rocksdb; +insert into t2 values (1), (2); + +create table t3(a varchar(100)) engine=rocksdb; + +delimiter //; +create function func() returns varchar(100) deterministic +begin + insert into t3 values ('func-called'); + set @a= (select a from t2); + return 'func-returned'; +end;// +delimiter ;// + +begin; +--error ER_SUBQUERY_NO_1_ROW +insert into t1 values (func()); +select * from t1; +--echo # The following must not produce 'func-called': +select * from t3; + +rollback; +drop function func; +drop table t1,t2,t3; + diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index c75e3220368c..2e2e919a0bb8 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -1854,6 +1854,17 @@ class Rdb_transaction { bool m_is_delayed_snapshot = false; bool m_is_two_phase = false; +private: + /* Number of RockDB savepoints taken */ + int m_n_savepoints; + /* + Number of write operations this transaction had when we took the last + savepoint (the idea is not to take another savepoint if we haven't made + any changes) + */ + ulonglong m_writes_at_last_savepoint; + +protected: THD *m_thd = nullptr; static std::multiset s_tx_list; @@ -1903,6 +1914,14 @@ class Rdb_transaction { return s; } +protected: + /* + The following two are helper functions to be overloaded by child classes. + They should provide RocksDB's savepoint semantics. + */ + virtual void do_set_savepoint() = 0; + virtual void do_rollback_to_savepoint() = 0; + public: rocksdb::ReadOptions m_read_opts; const char *m_mysql_log_file_name; @@ -2389,6 +2408,39 @@ class Rdb_transaction { virtual bool is_tx_started() const = 0; virtual void start_tx() = 0; virtual void start_stmt() = 0; + + /* + Called when a "top-level" statement inside a transaction completes + successfully and its changes become part of the transaction's changes. + */ + void make_stmt_savepoint_permanent() { + + // Take another RocksDB savepoint only if we had changes since the last + // one. This is very important for long transactions doing lots of + // SELECTs. + if (m_writes_at_last_savepoint != m_write_count) + { + do_set_savepoint(); + m_writes_at_last_savepoint= m_write_count; + m_n_savepoints++; + } + } + + + /* + Rollback to the savepoint we've set before the last statement + */ + void rollback_to_stmt_savepoint() { + if (m_writes_at_last_savepoint != m_write_count) { + do_rollback_to_savepoint(); + if (!--m_n_savepoints) { + do_set_savepoint(); + m_n_savepoints= 1; + } + m_writes_at_last_savepoint= m_write_count; + } + } + virtual void rollback_stmt() = 0; void set_tx_failed(bool failed_arg) { m_is_tx_failed = failed_arg; } @@ -2715,9 +2767,25 @@ class Rdb_transaction_impl : public Rdb_transaction { m_read_opts = rocksdb::ReadOptions(); + /* + Set the initial savepoint. If the first statement in the transaction + fails, we need something to roll back to, without rolling back the + entire transaction. + */ + do_set_savepoint(); + m_ddl_transaction = false; } + /* Implementations of do_*savepoint based on rocksdB::Transaction savepoints */ + void do_set_savepoint() override { + m_rocksdb_tx->SetSavePoint(); + } + + void do_rollback_to_savepoint() override { + m_rocksdb_tx->RollbackToSavePoint(); + } + /* Start a statement inside a multi-statement transaction. @@ -2730,7 +2798,6 @@ class Rdb_transaction_impl : public Rdb_transaction { void start_stmt() override { // Set the snapshot to delayed acquisition (SetSnapshotOnNextOperation) acquire_snapshot(false); - m_rocksdb_tx->SetSavePoint(); } /* @@ -2741,7 +2808,7 @@ class Rdb_transaction_impl : public Rdb_transaction { /* TODO: here we must release the locks taken since the start_stmt() call */ if (m_rocksdb_tx) { const rocksdb::Snapshot *const org_snapshot = m_rocksdb_tx->GetSnapshot(); - m_rocksdb_tx->RollbackToSavePoint(); + rollback_to_stmt_savepoint(); const rocksdb::Snapshot *const cur_snapshot = m_rocksdb_tx->GetSnapshot(); if (org_snapshot != cur_snapshot) { @@ -2831,6 +2898,15 @@ class Rdb_writebatch_impl : public Rdb_transaction { return res; } + /* Implementations of do_*savepoint based on rocksdB::WriteBatch savepoints */ + void do_set_savepoint() override { + m_batch->SetSavePoint(); + } + + void do_rollback_to_savepoint() override { + m_batch->RollbackToSavePoint(); + } + public: bool is_writebatch_trx() const override { return true; } @@ -2936,13 +3012,20 @@ class Rdb_writebatch_impl : public Rdb_transaction { write_opts.disableWAL = THDVAR(m_thd, write_disable_wal); write_opts.ignore_missing_column_families = THDVAR(m_thd, write_ignore_missing_column_families); + + /* + Set the initial savepoint. If the first statement in the transaction + fails, we need something to roll back to, without rolling back the + entire transaction. + */ + do_set_savepoint(); } - void start_stmt() override { m_batch->SetSavePoint(); } + void start_stmt() override {} void rollback_stmt() override { if (m_batch) - m_batch->RollbackToSavePoint(); + rollback_to_stmt_savepoint(); } explicit Rdb_writebatch_impl(THD *const thd) @@ -3157,6 +3240,8 @@ static int rocksdb_prepare(handlerton *const hton, THD *const thd, DEBUG_SYNC(thd, "rocksdb.prepared"); } + else + tx->make_stmt_savepoint_permanent(); return HA_EXIT_SUCCESS; } @@ -3328,11 +3413,9 @@ static int rocksdb_commit(handlerton *const hton, THD *const thd, } else { /* We get here when committing a statement within a transaction. - - We don't need to do anything here. tx->start_stmt() will notify - Rdb_transaction_impl that another statement has started. */ tx->set_tx_failed(false); + tx->make_stmt_savepoint_permanent(); } if (my_core::thd_tx_isolation(thd) <= ISO_READ_COMMITTED) { @@ -10315,22 +10398,24 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) { } if (lock_type == F_UNLCK) { - Rdb_transaction *const tx = get_or_create_tx(thd); + Rdb_transaction *const tx = get_tx_from_thd(thd); - tx->io_perf_end_and_record(&m_io_perf); - tx->m_n_mysql_tables_in_use--; - if (tx->m_n_mysql_tables_in_use == 0 && - !my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - /* - Do like InnoDB: when we get here, it's time to commit a - single-statement transaction. + if (tx) { + tx->io_perf_end_and_record(&m_io_perf); + tx->m_n_mysql_tables_in_use--; + if (tx->m_n_mysql_tables_in_use == 0 && + !my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { + /* + Do like InnoDB: when we get here, it's time to commit a + single-statement transaction. - If the statement involved multiple tables, this code will be executed - for each of them, but that's ok because non-first tx->commit() calls - will be no-ops. - */ - if (tx->commit_or_rollback()) { - res = HA_ERR_INTERNAL_ERROR; + If the statement involved multiple tables, this code will be executed + for each of them, but that's ok because non-first tx->commit() calls + will be no-ops. + */ + if (tx->commit_or_rollback()) { + res = HA_ERR_INTERNAL_ERROR; + } } } } else { From c90fbbc8c5f264f0b86fdf451f2e053785c9652d Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 9 Apr 2018 16:50:49 +0000 Subject: [PATCH 2/2] Issue #802: MyRocks: Statement rollback doesnt work correctly for nested statements Variant #2: initialize m_n_savepoints and m_writes_at_last_savepoint at transaction start. --- storage/rocksdb/ha_rocksdb.cc | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 2e2e919a0bb8..2f5f0f380e54 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -2409,6 +2409,17 @@ class Rdb_transaction { virtual void start_tx() = 0; virtual void start_stmt() = 0; + void set_initial_savepoint() { + /* + Set the initial savepoint. If the first statement in the transaction + fails, we need something to roll back to, without rolling back the + entire transaction. + */ + do_set_savepoint(); + m_n_savepoints= 1; + m_writes_at_last_savepoint= m_write_count; + } + /* Called when a "top-level" statement inside a transaction completes successfully and its changes become part of the transaction's changes. @@ -2767,12 +2778,7 @@ class Rdb_transaction_impl : public Rdb_transaction { m_read_opts = rocksdb::ReadOptions(); - /* - Set the initial savepoint. If the first statement in the transaction - fails, we need something to roll back to, without rolling back the - entire transaction. - */ - do_set_savepoint(); + set_initial_savepoint(); m_ddl_transaction = false; } @@ -3013,12 +3019,7 @@ class Rdb_writebatch_impl : public Rdb_transaction { write_opts.ignore_missing_column_families = THDVAR(m_thd, write_ignore_missing_column_families); - /* - Set the initial savepoint. If the first statement in the transaction - fails, we need something to roll back to, without rolling back the - entire transaction. - */ - do_set_savepoint(); + set_initial_savepoint(); } void start_stmt() override {}