diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 0e818507b8b7..11ac33627995 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -6389,13 +6389,12 @@ ha_rocksdb::ha_rocksdb(my_core::handlerton *const hton, m_lock_rows(RDB_LOCK_NONE), m_keyread_only(false), m_insert_with_update(false), - m_dup_key_found(false) + m_dup_key_found(false), #if defined(ROCKSDB_INCLUDE_RFR) && ROCKSDB_INCLUDE_RFR - , m_in_rpl_delete_rows(false), - m_in_rpl_update_rows(false) + m_in_rpl_update_rows(false), #endif // defined(ROCKSDB_INCLUDE_RFR) && ROCKSDB_INCLUDE_RFR -{ + m_need_build_decoder(false) { } ha_rocksdb::~ha_rocksdb() { @@ -7956,8 +7955,7 @@ int ha_rocksdb::create(const char *const name, TABLE *const table_arg, */ int ha_rocksdb::truncate_table(Rdb_tbl_def *tbl_def_arg, const std::string &actual_user_table_name, - const TABLE *table_arg, - ulonglong auto_increment_value, + TABLE *table_arg, ulonglong auto_increment_value, dd::Table *table_def) { DBUG_ENTER_FUNC(); @@ -8043,6 +8041,7 @@ int ha_rocksdb::truncate_table(Rdb_tbl_def *tbl_def_arg, /* Update the local m_tbl_def reference */ m_tbl_def = ddl_manager.find(orig_tablename); + m_converter.reset(new Rdb_converter(ha_thd(), m_tbl_def, table_arg)); DBUG_RETURN(err); } @@ -8344,8 +8343,9 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, #endif // !defined(DBUG_OFF) DBUG_EXECUTE_IF("dbug.rocksdb.HA_EXTRA_KEYREAD", { m_keyread_only = true; }); - bool covered_lookup = (m_keyread_only && kd.can_cover_lookup()) || - kd.covers_lookup(&value, &m_lookup_bitmap); + bool covered_lookup = + (m_keyread_only && kd.can_cover_lookup()) || + kd.covers_lookup(&value, m_converter->get_lookup_bitmap()); #if !defined(DBUG_OFF) m_keyread_only = save_keyread_only; @@ -8473,7 +8473,8 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) { rocksdb::Slice value = m_scan_it->value(); bool covered_lookup = (m_keyread_only && m_key_descr_arr[keyno]->can_cover_lookup()) || - m_key_descr_arr[keyno]->covers_lookup(&value, &m_lookup_bitmap); + m_key_descr_arr[keyno]->covers_lookup( + &value, m_converter->get_lookup_bitmap()); if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) { rc = m_key_descr_arr[keyno]->unpack_record( table, buf, &key, &value, @@ -8513,6 +8514,8 @@ int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, enum ha_rkey_function find_flag) { DBUG_ENTER_FUNC(); + check_build_decoder(); + int rc = 0; THD *thd = ha_thd(); @@ -9097,6 +9100,8 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, int ha_rocksdb::index_next(uchar *const buf) { DBUG_ENTER_FUNC(); + check_build_decoder(); + bool moves_forward = true; ha_statistic_increment(&System_status_var::ha_read_next_count); if (m_key_descr_arr[active_index]->m_is_reverse_cf) { @@ -9117,6 +9122,8 @@ int ha_rocksdb::index_next(uchar *const buf) { int ha_rocksdb::index_prev(uchar *const buf) { DBUG_ENTER_FUNC(); + check_build_decoder(); + bool moves_forward = false; ha_statistic_increment(&System_status_var::ha_read_prev_count); if (m_key_descr_arr[active_index]->m_is_reverse_cf) { @@ -9179,6 +9186,8 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { int ha_rocksdb::index_first(uchar *const buf) { DBUG_ENTER_FUNC(); + check_build_decoder(); + m_sk_match_prefix = nullptr; ha_statistic_increment(&System_status_var::ha_read_first_count); int rc = m_key_descr_arr[active_index]->m_is_reverse_cf @@ -9197,6 +9206,8 @@ int ha_rocksdb::index_first(uchar *const buf) { int ha_rocksdb::index_last(uchar *const buf) { DBUG_ENTER_FUNC(); + check_build_decoder(); + m_sk_match_prefix = nullptr; ha_statistic_increment(&System_status_var::ha_read_last_count); int rc = m_key_descr_arr[active_index]->m_is_reverse_cf @@ -10601,6 +10612,9 @@ void ha_rocksdb::setup_iterator_for_rnd_scan() { int ha_rocksdb::rnd_init(bool scan) { DBUG_ENTER_FUNC(); + m_need_build_decoder = true; + active_index = table->s->primary_key; + THD *thd = ha_thd(); if (thd && thd->killed) { DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); @@ -10608,10 +10622,6 @@ int ha_rocksdb::rnd_init(bool scan) { Rdb_transaction *const tx = get_or_create_tx(table->in_use); - // when this table is being updated, decode all fields - m_converter->setup_field_decoders(table->read_set, - m_lock_rows == RDB_LOCK_WRITE); - if (scan) { m_rnd_scan_is_new_snapshot = !tx->has_snapshot(); setup_iterator_for_rnd_scan(); @@ -10635,6 +10645,8 @@ int ha_rocksdb::rnd_init(bool scan) { int ha_rocksdb::rnd_next(uchar *const buf) { DBUG_ENTER_FUNC(); + check_build_decoder(); + int rc; ha_statistic_increment(&System_status_var::ha_read_rnd_next_count); for (;;) { @@ -10767,11 +10779,26 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { int ha_rocksdb::rnd_end() { DBUG_ENTER_FUNC(); + m_need_build_decoder = false; + release_scan_iterator(); DBUG_RETURN(HA_EXIT_SUCCESS); } +void ha_rocksdb::build_decoder() { + m_converter->setup_field_decoders(table->read_set, active_index, + m_keyread_only, + m_lock_rows == RDB_LOCK_WRITE); +} + +void ha_rocksdb::check_build_decoder() { + if (m_need_build_decoder) { + build_decoder(); + m_need_build_decoder = false; + } +} + /** @return HA_EXIT_SUCCESS OK @@ -10780,6 +10807,9 @@ int ha_rocksdb::rnd_end() { int ha_rocksdb::index_init(uint idx, bool sorted) { DBUG_ENTER_FUNC(); + m_need_build_decoder = true; + active_index = idx; + THD *thd = ha_thd(); if (thd && thd->killed) { DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); @@ -10788,21 +10818,11 @@ int ha_rocksdb::index_init(uint idx, bool sorted) { Rdb_transaction *const tx = get_or_create_tx(table->in_use); DBUG_ASSERT(tx != nullptr); - // when this table is being updated, decode all fields - m_converter->setup_field_decoders(table->read_set, - m_lock_rows == RDB_LOCK_WRITE); - - if (!m_keyread_only) { - m_key_descr_arr[idx]->get_lookup_bitmap(table, &m_lookup_bitmap); - } - // If m_lock_rows is not RDB_LOCK_NONE then we will be doing a get_for_update // when accessing the index, so don't acquire the snapshot right away. // Otherwise acquire the snapshot immediately. tx->acquire_snapshot(m_lock_rows == RDB_LOCK_NONE); - active_index = idx; - DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -10813,9 +10833,9 @@ int ha_rocksdb::index_init(uint idx, bool sorted) { int ha_rocksdb::index_end() { DBUG_ENTER_FUNC(); - release_scan_iterator(); + m_need_build_decoder = false; - bitmap_free(&m_lookup_bitmap); + release_scan_iterator(); active_index = MAX_KEY; in_range_check_pushed_down = false; @@ -11152,6 +11172,8 @@ void ha_rocksdb::position(const uchar *const record) { int ha_rocksdb::rnd_pos(uchar *const buf, uchar *const pos) { DBUG_ENTER_FUNC(); + check_build_decoder(); + int rc; size_t len; diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index ac3f43f0136e..d737bb1ef1b1 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -395,13 +395,6 @@ class ha_rocksdb : public my_core::handler { void set_last_rowkey(const uchar *const old_data); - /* - For the active index, indicates which columns must be covered for the - current lookup to be covered. If the bitmap field is null, that means this - index does not cover the current lookup for any record. - */ - MY_BITMAP m_lookup_bitmap; - int alloc_key_buffers(const TABLE *const table_arg, const Rdb_tbl_def *const tbl_def_arg, bool alloc_alter_buffers = false) @@ -913,7 +906,7 @@ class ha_rocksdb : public my_core::handler { dd::Table *table_def); int truncate_table(Rdb_tbl_def *tbl_def, const std::string &actual_user_table_name, - const TABLE *table_arg, ulonglong auto_increment_value, + TABLE *table_arg, ulonglong auto_increment_value, dd::Table *table_def); bool check_if_incompatible_data(HA_CREATE_INFO *const info, uint table_changes) override @@ -962,6 +955,9 @@ class ha_rocksdb : public my_core::handler { int adjust_handler_stats_sst_and_memtable(); int adjust_handler_stats_table_scan(); + void build_decoder(); + void check_build_decoder(); + #if defined(ROCKSDB_INCLUDE_RFR) && ROCKSDB_INCLUDE_RFR public: virtual void rpl_before_delete_rows() override; @@ -995,6 +991,9 @@ class ha_rocksdb : public my_core::handler { bool m_in_rpl_delete_rows; bool m_in_rpl_update_rows; #endif // defined(ROCKSDB_INCLUDE_RFR) && ROCKSDB_INCLUDE_RFR + + /* Need to build decoder on next read operation */ + bool m_need_build_decoder; }; /* diff --git a/storage/rocksdb/rdb_converter.cc b/storage/rocksdb/rdb_converter.cc index bac2eda7acbe..aefc916fcc0a 100644 --- a/storage/rocksdb/rdb_converter.cc +++ b/storage/rocksdb/rdb_converter.cc @@ -309,6 +309,7 @@ Rdb_converter::~Rdb_converter() { m_encoder_arr = nullptr; // These are needed to suppress valgrind errors in rocksdb.partition m_storage_record.mem_free(); + bitmap_free(&m_lookup_bitmap); } /* @@ -333,23 +334,25 @@ void Rdb_converter::get_storage_type(Rdb_field_encoder *const encoder, Setup which fields will be unpacked when reading rows @detail - Three special cases when we still unpack all fields: + Two special cases when we still unpack all fields: - When client requires decode_all_fields, such as this table is being updated (m_lock_rows==RDB_LOCK_WRITE). - - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to - read all fields to find whether there is a row checksum at the end. We could - skip the fields instead of decoding them, but currently we do decoding.) - - On index merge as bitmap is cleared during that operation + - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need + to read all fields to find whether there is a row checksum at the end. We + could skip the fields instead of decoding them, but currently we do + decoding.) @seealso Rdb_converter::setup_field_encoders() Rdb_converter::convert_record_from_storage_format() */ void Rdb_converter::setup_field_decoders(const MY_BITMAP *field_map, + uint active_index, bool keyread_only, bool decode_all_fields) { DBUG_TRACE; m_key_requested = false; m_decoders_vect.clear(); + bitmap_free(&m_lookup_bitmap); int last_useful = 0; int skip_size = 0; @@ -361,7 +364,6 @@ void Rdb_converter::setup_field_decoders(const MY_BITMAP *field_map, for (uint i = 0; i < m_table->s->fields; i++) { const bool field_requested = decode_all_fields || m_verify_row_debug_checksums || - bitmap_is_clear_all(field_map) || bitmap_is_set(field_map, m_table->field[i]->field_index()); if (field_requested && m_table->field[i]->is_virtual_gcol()) { @@ -374,10 +376,8 @@ void Rdb_converter::setup_field_decoders(const MY_BITMAP *field_map, } for (uint i = 0; i < m_table->s->fields; i++) { - // bitmap is cleared on index merge, but it still needs to decode columns bool field_requested = decode_all_fields || m_verify_row_debug_checksums || - bitmap_is_clear_all(field_map) || (bitmap_is_set(field_map, m_table->field[i]->field_index())) || bases[i]; @@ -414,6 +414,11 @@ void Rdb_converter::setup_field_decoders(const MY_BITMAP *field_map, // skipping. Remove them. m_decoders_vect.erase(m_decoders_vect.begin() + last_useful, m_decoders_vect.end()); + + if (!keyread_only && active_index != m_table->s->primary_key) { + m_tbl_def->m_key_descr_arr[active_index]->get_lookup_bitmap( + m_table, &m_lookup_bitmap); + } } void Rdb_converter::setup_field_encoders() { @@ -607,6 +612,11 @@ int Rdb_converter::convert_record_from_storage_format( const rocksdb::Slice *const key_slice, const rocksdb::Slice *const value_slice, uchar *const dst, bool decode_value = true) { + bool skip_value = !decode_value || get_decode_fields()->size() == 0; + if (!m_key_requested && skip_value) { + return HA_EXIT_SUCCESS; + } + int err = HA_EXIT_SUCCESS; Rdb_string_reader value_slice_reader(value_slice); @@ -628,7 +638,7 @@ int Rdb_converter::convert_record_from_storage_format( } } - if (!decode_value || get_decode_fields()->size() == 0) { + if (skip_value) { // We are done return HA_EXIT_SUCCESS; } diff --git a/storage/rocksdb/rdb_converter.h b/storage/rocksdb/rdb_converter.h index d9905df9d54a..3ae0216fe43f 100644 --- a/storage/rocksdb/rdb_converter.h +++ b/storage/rocksdb/rdb_converter.h @@ -134,8 +134,8 @@ class Rdb_converter { Rdb_converter &operator=(const Rdb_converter &decoder) = delete; ~Rdb_converter(); - void setup_field_decoders(const MY_BITMAP *field_map, - bool decode_all_fields = false); + void setup_field_decoders(const MY_BITMAP *field_map, uint active_index, + bool keyread_only, bool decode_all_fields = false); int decode(const std::shared_ptr &key_def, uchar *dst, const rocksdb::Slice *key_slice, const rocksdb::Slice *value_slice, @@ -172,6 +172,8 @@ class Rdb_converter { return &m_decoders_vect; } + const MY_BITMAP *get_lookup_bitmap() { return &m_lookup_bitmap; } + private: int decode_value_header_for_pk(Rdb_string_reader *reader, const std::shared_ptr &pk_def, @@ -241,5 +243,11 @@ class Rdb_converter { my_core::ha_rows m_row_checksums_checked; // buffer to hold data during encode_value_slice String m_storage_record; + /* + For the active index, indicates which columns must be covered for the + current lookup to be covered. If the bitmap field is null, that means this + index does not cover the current lookup for any record. + */ + MY_BITMAP m_lookup_bitmap; }; } // namespace myrocks