From b871522499ff646c0b52f4b7ca8e88cf41935a5a Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Fri, 17 Jun 2022 21:34:59 +0800 Subject: [PATCH 01/17] fix issue 5154: use name instead of offset to find the pk id --- dbms/src/Debug/dbgFuncMockRaftCommand.cpp | 42 +++--- dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp | 66 +++++---- dbms/src/Debug/dbgFuncRegion.cpp | 8 +- dbms/src/Debug/dbgTools.cpp | 17 ++- .../DecodingStorageSchemaSnapshot.h | 19 ++- dbms/src/Storages/Transaction/TiDB.h | 11 +- .../Storages/Transaction/TiKVRecordFormat.h | 126 +++++++++++++----- .../Transaction/tests/RowCodecTestUtils.h | 8 +- .../tests/gtest_region_block_reader.cpp | 68 +++++----- 9 files changed, 238 insertions(+), 127 deletions(-) diff --git a/dbms/src/Debug/dbgFuncMockRaftCommand.cpp b/dbms/src/Debug/dbgFuncMockRaftCommand.cpp index df93ee1c78d..3626041f428 100644 --- a/dbms/src/Debug/dbgFuncMockRaftCommand.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftCommand.cpp @@ -40,7 +40,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); const String & database_name = typeid_cast(*args[1]).name; const String & table_name = typeid_cast(*args[2]).name; auto table = MockTiDB::instance().getTableByName(database_name, table_name); @@ -49,7 +49,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar if (4 + handle_column_size * 4 != args.size()) throw Exception("Args not matched, should be: region-id1, database-name, table-name, start1, end1, start2, end2, region-id2", ErrorCodes::BAD_ARGUMENTS); - RegionID region_id2 = (RegionID)safeGet(typeid_cast(*args[args.size() - 1]).value); + auto region_id2 = static_cast(safeGet(typeid_cast(*args[args.size() - 1]).value)); auto table_id = table->id(); TiKVKey start_key1, start_key2, end_key1, end_key2; @@ -59,9 +59,17 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar std::vector start_keys2; std::vector end_keys1; std::vector end_keys2; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + for (size_t i = 0; i < handle_column_size; i++) { - auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; auto start_field1 = RegionBench::convertField(column_info, typeid_cast(*args[3 + i]).value); TiDB::DatumBumpy start_datum1 = TiDB::DatumBumpy(start_field1, column_info.tp); @@ -88,10 +96,10 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar } else { - HandleID start1 = (HandleID)safeGet(typeid_cast(*args[3]).value); - HandleID end1 = (HandleID)safeGet(typeid_cast(*args[4]).value); - HandleID start2 = (HandleID)safeGet(typeid_cast(*args[5]).value); - HandleID end2 = (HandleID)safeGet(typeid_cast(*args[6]).value); + auto start1 = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end1 = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto start2 = static_cast(safeGet(typeid_cast(*args[5]).value)); + auto end2 = static_cast(safeGet(typeid_cast(*args[6]).value)); start_key1 = RecordKVFormat::genKey(table_id, start1); start_key2 = RecordKVFormat::genKey(table_id, start2); end_key1 = RecordKVFormat::genKey(table_id, end1); @@ -110,7 +118,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar request.set_cmd_type(raft_cmdpb::AdminCmdType::BatchSplit); raft_cmdpb::BatchSplitResponse * splits = response.mutable_splits(); { - auto region = splits->add_regions(); + auto * region = splits->add_regions(); region->set_id(region_id); region->set_start_key(start_key1); region->set_end_key(end_key1); @@ -118,7 +126,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar *region->mutable_region_epoch() = new_epoch; } { - auto region = splits->add_regions(); + auto * region = splits->add_regions(); region->set_id(region_id2); region->set_start_key(start_key2); region->set_end_key(end_key2); @@ -144,8 +152,8 @@ void MockRaftCommand::dbgFuncPrepareMerge(Context & context, const ASTs & args, throw Exception("Args not matched, should be: source-id1, target-id2", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); - RegionID target_id = (RegionID)safeGet(typeid_cast(*args[1]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto target_id = static_cast(safeGet(typeid_cast(*args[1]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -157,7 +165,7 @@ void MockRaftCommand::dbgFuncPrepareMerge(Context & context, const ASTs & args, { request.set_cmd_type(raft_cmdpb::AdminCmdType::PrepareMerge); - auto prepare_merge = request.mutable_prepare_merge(); + auto * prepare_merge = request.mutable_prepare_merge(); { auto min_index = region->appliedIndex(); prepare_merge->set_min_index(min_index); @@ -184,8 +192,8 @@ void MockRaftCommand::dbgFuncCommitMerge(Context & context, const ASTs & args, D throw Exception("Args not matched, should be: source-id1, current-id2", ErrorCodes::BAD_ARGUMENTS); } - RegionID source_id = (RegionID)safeGet(typeid_cast(*args[0]).value); - RegionID current_id = (RegionID)safeGet(typeid_cast(*args[1]).value); + auto source_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto current_id = static_cast(safeGet(typeid_cast(*args[1]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -196,7 +204,7 @@ void MockRaftCommand::dbgFuncCommitMerge(Context & context, const ASTs & args, D { request.set_cmd_type(raft_cmdpb::AdminCmdType::CommitMerge); - auto commit_merge = request.mutable_commit_merge(); + auto * commit_merge = request.mutable_commit_merge(); { commit_merge->set_commit(source_region->appliedIndex()); *commit_merge->mutable_source() = source_region->getMetaRegion(); @@ -220,7 +228,7 @@ void MockRaftCommand::dbgFuncRollbackMerge(Context & context, const ASTs & args, throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -231,7 +239,7 @@ void MockRaftCommand::dbgFuncRollbackMerge(Context & context, const ASTs & args, { request.set_cmd_type(raft_cmdpb::AdminCmdType::RollbackMerge); - auto rollback_merge = request.mutable_rollback_merge(); + auto * rollback_merge = request.mutable_rollback_merge(); { auto merge_state = region->getMergeState(); rollback_merge->set_commit(merge_state.commit()); diff --git a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp index 7eecbbdf6f7..c5a1fa87415 100644 --- a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp @@ -60,7 +60,7 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) { const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); TableID table_id = RegionBench::getTableID(context, database_name, table_name, ""); MockTiDB::TablePtr table = MockTiDB::instance().getTableByName(database_name, table_name); auto & table_info = table->table_info; @@ -68,10 +68,16 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) size_t handle_column_size = is_common_handle ? table_info.getPrimaryIndexInfo().idx_cols.size() : 1; RegionPtr region; + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + if (!is_common_handle) { - HandleID start = static_cast(safeGet(typeid_cast(*args[3]).value)); - HandleID end = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto start = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end = static_cast(safeGet(typeid_cast(*args[4]).value)); region = RegionBench::createRegion(table_id, region_id, start, end); } else @@ -81,7 +87,8 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) std::vector end_keys; for (size_t i = 0; i < handle_column_size; i++) { - auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[3 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); @@ -105,8 +112,8 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) for (auto it = args_begin; it != args_end; it += len) { HandleID handle_id = is_common_handle ? 0 : static_cast(safeGet(typeid_cast(*it[0]).value)); - Timestamp tso = static_cast(safeGet(typeid_cast(*it[1]).value)); - UInt8 del = static_cast(safeGet(typeid_cast(*it[2]).value)); + auto tso = static_cast(safeGet(typeid_cast(*it[1]).value)); + auto del = static_cast(safeGet(typeid_cast(*it[2]).value)); { std::vector fields; @@ -122,9 +129,9 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) std::vector keys; // handle key for (size_t i = 0; i < table_info.getPrimaryIndexInfo().idx_cols.size(); i++) { - auto & idx_col = table_info.getPrimaryIndexInfo().idx_cols[i]; - auto & column_info = table_info.columns[idx_col.offset]; - auto start_field = RegionBench::convertField(column_info, fields[idx_col.offset]); + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; + auto start_field = RegionBench::convertField(column_info, fields[idx]); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); keys.emplace_back(start_datum.field()); } @@ -168,7 +175,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotWithData(Context & context, const AST // DBGInvoke region_snapshot(region-id, start-key, end-key, database-name, table-name[, partition-id]) void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args, DBGInvoker::Printer output) { - RegionID region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); bool has_partition_id = false; size_t args_size = args.size(); if (dynamic_cast(args[args_size - 1].get()) != nullptr) @@ -198,9 +205,16 @@ void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args // Get start key and end key form multiple column if it is clustered_index. std::vector start_keys; std::vector end_keys; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < handle_column_size; i++) { - const auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[1 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); @@ -214,8 +228,8 @@ void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args } else { - HandleID start = static_cast(safeGet(typeid_cast(*args[1]).value)); - HandleID end = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto start = static_cast(safeGet(typeid_cast(*args[1]).value)); + auto end = static_cast(safeGet(typeid_cast(*args[2]).value)); start_key = RecordKVFormat::genKey(table_id, start); end_key = RecordKVFormat::genKey(table_id, end); } @@ -432,9 +446,9 @@ void MockRaftCommand::dbgFuncIngestSST(Context & context, const ASTs & args, DBG { const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); - RegionID start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); - RegionID end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); MockTiDB::TablePtr table = MockTiDB::instance().getTableByName(database_name, table_name); const auto & table_info = RegionBench::getTableInfo(context, database_name, table_name); @@ -555,7 +569,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotApplyBlock(Context & context, const A throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); auto [region, block_cache] = GLOBAL_REGION_MAP.popRegionCache("__snap_" + std::to_string(region_id)); auto & tmt = context.getTMTContext(); context.getTMTContext().getKVStore()->checkAndApplySnapshot({region, std::move(block_cache)}, tmt); @@ -577,12 +591,12 @@ void MockRaftCommand::dbgFuncRegionSnapshotPreHandleDTFiles(Context & context, c const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); - RegionID start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); - RegionID end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); - const String schema_str = safeGet(typeid_cast(*args[5]).value); - String handle_pk_name = safeGet(typeid_cast(*args[6]).value); + const auto schema_str = safeGet(typeid_cast(*args[5]).value); + auto handle_pk_name = safeGet(typeid_cast(*args[6]).value); UInt64 test_fields = 1; if (args.size() > 7) @@ -677,10 +691,10 @@ void MockRaftCommand::dbgFuncRegionSnapshotPreHandleDTFilesWithHandles(Context & const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); - const String schema_str = safeGet(typeid_cast(*args[3]).value); - String handle_pk_name = safeGet(typeid_cast(*args[4]).value); + const auto schema_str = safeGet(typeid_cast(*args[3]).value); + auto handle_pk_name = safeGet(typeid_cast(*args[4]).value); std::vector handles; for (size_t i = 5; i < args.size(); ++i) @@ -770,7 +784,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotApplyDTFiles(Context & context, const if (args.size() != 1) throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); - RegionID region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); const auto region_name = "__snap_snap_" + std::to_string(region_id); auto [new_region, ingest_ids] = GLOBAL_REGION_MAP.popRegionSnap(region_name); auto & tmt = context.getTMTContext(); diff --git a/dbms/src/Debug/dbgFuncRegion.cpp b/dbms/src/Debug/dbgFuncRegion.cpp index b2024eac1d8..f65a18b8fd0 100644 --- a/dbms/src/Debug/dbgFuncRegion.cpp +++ b/dbms/src/Debug/dbgFuncRegion.cpp @@ -61,9 +61,15 @@ void dbgFuncPutRegion(Context & context, const ASTs & args, DBGInvoker::Printer { std::vector start_keys; std::vector end_keys; + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < handle_column_size; i++) { - const auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[1 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index 685b2563a3b..59733bdd685 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -310,7 +310,7 @@ void insert( // // Parse the fields in the inserted row std::vector fields; { - for (ASTs::const_iterator it = values_begin; it != values_end; ++it) + for (auto it = values_begin; it != values_end; ++it) { auto field = typeid_cast((*it).get())->value; fields.emplace_back(field); @@ -330,11 +330,18 @@ void insert( // if (table_info.is_common_handle) { std::vector keys; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + for (size_t i = 0; i < table_info.getPrimaryIndexInfo().idx_cols.size(); i++) { - const auto & idx_col = table_info.getPrimaryIndexInfo().idx_cols[i]; - const auto & column_info = table_info.columns[idx_col.offset]; - auto start_field = RegionBench::convertField(column_info, fields[idx_col.offset]); + const auto & idx_col = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[idx_col]; + auto start_field = RegionBench::convertField(column_info, fields[idx_col]); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); keys.emplace_back(start_datum.field()); } @@ -417,7 +424,7 @@ struct BatchCtrl { assert(max_strlen >= min_strlen); assert(min_strlen >= 1); - auto str_len = static_cast(random() % (max_strlen - min_strlen + 1) + min_strlen); + auto str_len = static_cast(arc4random() % (max_strlen - min_strlen + 1) + min_strlen); default_str = String(str_len, '_'); } diff --git a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h index c636d9e60ab..8114fde5fe2 100644 --- a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h +++ b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB @@ -77,10 +78,12 @@ struct DecodingStorageSchemaSnapshot , decoding_schema_version{decoding_schema_version_} { std::unordered_map column_lut; + std::unordered_map column_name_id_map; for (size_t i = 0; i < table_info_.columns.size(); i++) { const auto & ci = table_info_.columns[i]; column_lut.emplace(ci.id, i); + column_name_id_map.emplace(ci.name, ci.id); } for (size_t i = 0; i < column_defines->size(); i++) { @@ -88,7 +91,7 @@ struct DecodingStorageSchemaSnapshot sorted_column_id_with_pos.insert({cd.id, i}); if (cd.id != TiDBPkColumnID && cd.id != VersionColumnID && cd.id != DelMarkColumnID) { - auto & columns = table_info_.columns; + const auto & columns = table_info_.columns; column_infos.push_back(columns[column_lut.at(cd.id)]); } else @@ -100,10 +103,14 @@ struct DecodingStorageSchemaSnapshot // create pk related metadata if needed if (is_common_handle) { - const auto & primary_index_info = table_info_.getPrimaryIndexInfo(); - for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) + /// we will not update the IndexInfo except Rename DDL. + /// When the add column / drop column action happenes, the offset of each column may change + /// Thus, we should not use offset to get the column we want, + /// but use to compare the column name to get the column id. + const auto & primary_index_cols = table_info_.getPrimaryIndexInfo().idx_cols; + for (const auto & col : primary_index_cols) { - auto pk_column_id = table_info_.columns[primary_index_info.idx_cols[i].offset].id; + auto pk_column_id = column_name_id_map[col.name]; pk_column_ids.emplace_back(pk_column_id); pk_pos_map.emplace(pk_column_id, reinterpret_cast(std::numeric_limits::max())); } @@ -125,11 +132,11 @@ struct DecodingStorageSchemaSnapshot { auto pk_pos_iter = pk_pos_map.begin(); size_t column_pos_in_block = 0; - for (auto iter = sorted_column_id_with_pos.begin(); iter != sorted_column_id_with_pos.end(); iter++) + for (auto & column_id_with_pos : sorted_column_id_with_pos) { if (pk_pos_iter == pk_pos_map.end()) break; - if (pk_pos_iter->first == iter->first) + if (pk_pos_iter->first == column_id_with_pos.first) { pk_pos_iter->second = column_pos_in_block; pk_pos_iter++; diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index f67bfb332c7..a3692ed30aa 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -179,6 +179,10 @@ struct ColumnInfo ColumnID id = -1; String name; + + /// please be very careful when you have to use offset, + /// because we never update offset when DDL action changes. + /// Thus, our offset will not exactly correspond the order of columns. Int32 offset = -1; Poco::Dynamic::Var origin_default_value; Poco::Dynamic::Var default_value; @@ -385,7 +389,12 @@ struct TableInfo bool isLogicalPartitionTable() const { return is_partition_table && belonging_table_id == DB::InvalidTableID && partition.enable; } - /// should not be called if is_common_handle = false + /// should not be called if is_common_handle = false. + /// when use IndexInfo, please avoid to use the offset info + /// the offset value may be wrong in some cases, + /// due to we will not update IndexInfo except RENAME DDL action, + /// but DDL like add column / drop column may change the offset of columns + /// Thus, please be very careful when you must have to use offset information !!!!! const IndexInfo & getPrimaryIndexInfo() const { return index_infos[0]; } IndexInfo & getPrimaryIndexInfo() { return index_infos[0]; } diff --git a/dbms/src/Storages/Transaction/TiKVRecordFormat.h b/dbms/src/Storages/Transaction/TiKVRecordFormat.h index 4a25b6d9292..8d678d317ee 100644 --- a/dbms/src/Storages/Transaction/TiKVRecordFormat.h +++ b/dbms/src/Storages/Transaction/TiKVRecordFormat.h @@ -83,17 +83,35 @@ inline TiKVKey encodeAsTiKVKey(const String & ori_str) return TiKVKey(ss.releaseStr()); } -inline UInt64 encodeUInt64(const UInt64 x) { return toBigEndian(x); } +inline UInt64 encodeUInt64(const UInt64 x) +{ + return toBigEndian(x); +} -inline UInt64 encodeInt64(const Int64 x) { return encodeUInt64(static_cast(x) ^ SIGN_MASK); } +inline UInt64 encodeInt64(const Int64 x) +{ + return encodeUInt64(static_cast(x) ^ SIGN_MASK); +} -inline UInt64 encodeUInt64Desc(const UInt64 x) { return encodeUInt64(~x); } +inline UInt64 encodeUInt64Desc(const UInt64 x) +{ + return encodeUInt64(~x); +} -inline UInt64 decodeUInt64(const UInt64 x) { return toBigEndian(x); } +inline UInt64 decodeUInt64(const UInt64 x) +{ + return toBigEndian(x); +} -inline UInt64 decodeUInt64Desc(const UInt64 x) { return ~decodeUInt64(x); } +inline UInt64 decodeUInt64Desc(const UInt64 x) +{ + return ~decodeUInt64(x); +} -inline Int64 decodeInt64(const UInt64 x) { return static_cast(decodeUInt64(x) ^ SIGN_MASK); } +inline Int64 decodeInt64(const UInt64 x) +{ + return static_cast(decodeUInt64(x) ^ SIGN_MASK); +} inline void encodeInt64(const Int64 x, WriteBuffer & ss) { @@ -125,7 +143,10 @@ inline DecodedTiKVKey genRawKey(const TableID tableId, const HandleID handleId) return key; } -inline TiKVKey genKey(const TableID tableId, const HandleID handleId) { return encodeAsTiKVKey(genRawKey(tableId, handleId)); } +inline TiKVKey genKey(const TableID tableId, const HandleID handleId) +{ + return encodeAsTiKVKey(genRawKey(tableId, handleId)); +} inline TiKVKey genKey(const TiDB::TableInfo & table_info, std::vector keys) { @@ -135,9 +156,16 @@ inline TiKVKey genKey(const TiDB::TableInfo & table_info, std::vector key memcpy(key.data() + 1, reinterpret_cast(&big_endian_table_id), 8); memcpy(key.data() + 1 + 8, RecordKVFormat::RECORD_PREFIX_SEP, 2); WriteBufferFromOwnString ss; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < keys.size(); i++) { - DB::EncodeDatum(keys[i], table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset].getCodecFlag(), ss); + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + DB::EncodeDatum(keys[i], table_info.columns[idx].getCodecFlag(), ss); } return encodeAsTiKVKey(key + ss.releaseStr()); } @@ -176,29 +204,50 @@ inline std::tuple decodeTiKVKeyFull(const TiKVKey & key) } } -inline DecodedTiKVKey decodeTiKVKey(const TiKVKey & key) { return std::get<0>(decodeTiKVKeyFull(key)); } +inline DecodedTiKVKey decodeTiKVKey(const TiKVKey & key) +{ + return std::get<0>(decodeTiKVKeyFull(key)); +} -inline Timestamp getTs(const TiKVKey & key) { return decodeUInt64Desc(read(key.data() + key.dataSize() - 8)); } +inline Timestamp getTs(const TiKVKey & key) +{ + return decodeUInt64Desc(read(key.data() + key.dataSize() - 8)); +} -inline TableID getTableId(const DecodedTiKVKey & key) { return decodeInt64(read(key.data() + 1)); } +inline TableID getTableId(const DecodedTiKVKey & key) +{ + return decodeInt64(read(key.data() + 1)); +} -inline HandleID getHandle(const DecodedTiKVKey & key) { return decodeInt64(read(key.data() + RAW_KEY_NO_HANDLE_SIZE)); } +inline HandleID getHandle(const DecodedTiKVKey & key) +{ + return decodeInt64(read(key.data() + RAW_KEY_NO_HANDLE_SIZE)); +} inline RawTiDBPK getRawTiDBPK(const DecodedTiKVKey & key) { return std::make_shared(key.begin() + RAW_KEY_NO_HANDLE_SIZE, key.end()); } -inline TableID getTableId(const TiKVKey & key) { return getTableId(decodeTiKVKey(key)); } +inline TableID getTableId(const TiKVKey & key) +{ + return getTableId(decodeTiKVKey(key)); +} -inline HandleID getHandle(const TiKVKey & key) { return getHandle(decodeTiKVKey(key)); } +inline HandleID getHandle(const TiKVKey & key) +{ + return getHandle(decodeTiKVKey(key)); +} inline bool isRecord(const DecodedTiKVKey & raw_key) { return raw_key.size() >= RAW_KEY_SIZE && raw_key[0] == TABLE_PREFIX && memcmp(raw_key.data() + 9, RECORD_PREFIX_SEP, 2) == 0; } -inline TiKVKey truncateTs(const TiKVKey & key) { return TiKVKey(String(key.data(), key.dataSize() - sizeof(Timestamp))); } +inline TiKVKey truncateTs(const TiKVKey & key) +{ + return TiKVKey(String(key.data(), key.dataSize() - sizeof(Timestamp))); +} inline TiKVKey appendTs(const TiKVKey & key, Timestamp ts) { @@ -215,7 +264,12 @@ inline TiKVKey genKey(TableID tableId, HandleID handleId, Timestamp ts) } inline TiKVValue encodeLockCfValue( - UInt8 lock_type, const String & primary, Timestamp ts, UInt64 ttl, const String * short_value = nullptr, Timestamp min_commit_ts = 0) + UInt8 lock_type, + const String & primary, + Timestamp ts, + UInt64 ttl, + const String * short_value = nullptr, + Timestamp min_commit_ts = 0) { WriteBufferFromOwnString res; res.write(lock_type); @@ -275,7 +329,10 @@ inline R readVarInt(const char *& data, size_t & len) return res; } -inline UInt64 readVarUInt(const char *& data, size_t & len) { return readVarInt(data, len); } +inline UInt64 readVarUInt(const char *& data, size_t & len) +{ + return readVarInt(data, len); +} inline UInt8 readUInt8(const char *& data, size_t & len) { @@ -347,30 +404,29 @@ inline DecodedWriteCFValue decodeWriteCfValue(const TiKVValue & value) auto flag = RecordKVFormat::readUInt8(data, len); switch (flag) { - case RecordKVFormat::SHORT_VALUE_PREFIX: - { - size_t slen = RecordKVFormat::readUInt8(data, len); - if (slen > len) - throw Exception("content len not equal to short value len", ErrorCodes::LOGICAL_ERROR); - short_value = RecordKVFormat::readRawString(data, len, slen); - break; - } - case RecordKVFormat::FLAG_OVERLAPPED_ROLLBACK: - // ignore - break; - case RecordKVFormat::GC_FENCE_PREFIX: - /** + case RecordKVFormat::SHORT_VALUE_PREFIX: + { + size_t slen = RecordKVFormat::readUInt8(data, len); + if (slen > len) + throw Exception("content len not equal to short value len", ErrorCodes::LOGICAL_ERROR); + short_value = RecordKVFormat::readRawString(data, len, slen); + break; + } + case RecordKVFormat::FLAG_OVERLAPPED_ROLLBACK: + // ignore + break; + case RecordKVFormat::GC_FENCE_PREFIX: + /** * according to https://github.com/tikv/tikv/pull/9207, when meet `GC fence` flag, it is definitely a * rewriting record and there must be a complete row written to tikv, just ignore it in tiflash. */ - return std::nullopt; - default: - throw Exception("invalid flag " + std::to_string(flag) + " in write cf", ErrorCodes::LOGICAL_ERROR); + return std::nullopt; + default: + throw Exception("invalid flag " + std::to_string(flag) + " in write cf", ErrorCodes::LOGICAL_ERROR); } } - return InnerDecodedWriteCFValue{write_type, prewrite_ts, - short_value.empty() ? nullptr : std::make_shared(short_value.data(), short_value.length())}; + return InnerDecodedWriteCFValue{write_type, prewrite_ts, short_value.empty() ? nullptr : std::make_shared(short_value.data(), short_value.length())}; } inline TiKVValue encodeWriteCfValue(UInt8 write_type, Timestamp ts, std::string_view short_value = {}, bool gc_fence = false) diff --git a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h index 20b395a9952..34e0d3d4104 100644 --- a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h +++ b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h @@ -237,14 +237,14 @@ std::pair> getTableInfoAndFields(ColumnIDs handle_ { table_info.is_common_handle = true; TiDB::IndexInfo index_info; - for (size_t i = 0; i < handle_ids.size(); i++) + for (auto handle_id : handle_ids) { TiDB::IndexColumnInfo index_column_info; - for (size_t pos = 0; pos < table_info.columns.size(); pos++) + for (auto & column : table_info.columns) { - if (table_info.columns[pos].id == handle_ids[i]) + if (column.id == handle_id) { - index_column_info.offset = pos; + index_column_info.name = column.name; break; } } diff --git a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp index 6a883230854..d08b4dd3738 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp @@ -26,13 +26,13 @@ using ColumnIDs = std::vector; class RegionBlockReaderTestFixture : public ::testing::Test { protected: - Int64 handle_value_ = 100; - UInt8 del_mark_value_ = 0; - UInt64 version_value_ = 100; - size_t rows_ = 3; + Int64 handle_value = 100; + UInt8 del_mark_value = 0; + UInt64 version_value = 100; + size_t rows = 3; - RegionDataReadInfoList data_list_read_; - std::unordered_map fields_map_; + RegionDataReadInfoList data_list_read; + std::unordered_map fields_map; enum RowEncodeVersion { @@ -43,8 +43,8 @@ class RegionBlockReaderTestFixture : public ::testing::Test protected: void SetUp() override { - data_list_read_.clear(); - fields_map_.clear(); + data_list_read.clear(); + fields_map.clear(); } void TearDown() override {} @@ -52,8 +52,12 @@ class RegionBlockReaderTestFixture : public ::testing::Test void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version) { // for later check + std::unordered_map column_name_columns_index_map; for (size_t i = 0; i < table_info.columns.size(); i++) - fields_map_.emplace(table_info.columns[i].id, fields[i]); + { + fields_map.emplace(table_info.columns[i].id, fields[i]); + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } std::vector value_fields; std::vector pk_fields; @@ -72,13 +76,13 @@ class RegionBlockReaderTestFixture : public ::testing::Test auto & primary_index_info = table_info.getPrimaryIndexInfo(); for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) { - size_t pk_offset = primary_index_info.idx_cols[i].offset; - EncodeDatum(pk_fields[i], table_info.columns[pk_offset].getCodecFlag(), pk_buf); + auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; + EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); } } else { - DB::EncodeInt64(handle_value_, pk_buf); + DB::EncodeInt64(handle_value, pk_buf); } RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; // create value @@ -96,44 +100,44 @@ class RegionBlockReaderTestFixture : public ::testing::Test throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); } auto row_value = std::make_shared(std::move(value_buf.str())); - for (size_t i = 0; i < rows_; i++) - data_list_read_.emplace_back(pk, del_mark_value_, version_value_, row_value); + for (size_t i = 0; i < rows; i++) + data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); } void checkBlock(DecodingStorageSchemaSnapshotConstPtr decoding_schema, const Block & block) const { ASSERT_EQ(block.columns(), decoding_schema->column_defines->size()); - for (size_t row = 0; row < rows_; row++) + for (size_t row = 0; row < rows; row++) { for (size_t pos = 0; pos < block.columns(); pos++) { - auto & column_element = block.getByPosition(pos); + const auto & column_element = block.getByPosition(pos); if (row == 0) { - ASSERT_EQ(column_element.column->size(), rows_); + ASSERT_EQ(column_element.column->size(), rows); } if (column_element.name == EXTRA_HANDLE_COLUMN_NAME) { if (decoding_schema->is_common_handle) { - ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read_[row]))); + ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read[row]))); } else { - ASSERT_EQ((*column_element.column)[row], Field(handle_value_)); + ASSERT_EQ((*column_element.column)[row], Field(handle_value)); } } else if (column_element.name == VERSION_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(version_value_)); + ASSERT_EQ((*column_element.column)[row], Field(version_value)); } else if (column_element.name == TAG_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value_))); + ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value))); } else { - ASSERT_EQ((*column_element.column)[row], fields_map_.at(column_element.column_id)); + ASSERT_EQ((*column_element.column)[row], fields_map.at(column_element.column_id)); } } } @@ -143,7 +147,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test { RegionBlockReader reader{decoding_schema}; Block block = createBlockSortByColumnID(decoding_schema); - if (!reader.read(block, data_list_read_, force_decode)) + if (!reader.read(block, data_list_read, force_decode)) return false; checkBlock(decoding_schema, block); @@ -155,7 +159,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test return getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), @@ -170,7 +174,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test handle_ids, is_common_handle, ColumnIDValue(1, String("")), - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(8, String("")), @@ -182,12 +186,12 @@ class RegionBlockReaderTestFixture : public ::testing::Test // add default value for missing column std::vector missing_column_ids{1, 8, 13}; String missing_column_default_value = String("default"); - for (size_t i = 0; i < table_info.columns.size(); i++) + for (auto & column : table_info.columns) { - if (std::find(missing_column_ids.begin(), missing_column_ids.end(), table_info.columns[i].id) != missing_column_ids.end()) + if (std::find(missing_column_ids.begin(), missing_column_ids.end(), column.id) != missing_column_ids.end()) { - table_info.columns[i].origin_default_value = missing_column_default_value; - fields_map_.emplace(table_info.columns[i].id, Field(missing_column_default_value)); + column.origin_default_value = missing_column_default_value; + fields_map.emplace(column.id, Field(missing_column_default_value)); } } return table_info; @@ -199,7 +203,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), ColumnIDValue(10, DecimalField(ToDecimal(12345678910ULL, 4), 4))); @@ -212,7 +216,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), @@ -227,7 +231,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), From 6eea5eeca87edcf8a334fc91662273bb101ec3a9 Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Fri, 17 Jun 2022 22:32:33 +0800 Subject: [PATCH 02/17] add tests about clustered index --- .../clustered_index/ddl.test | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/tests/fullstack-test-dt/clustered_index/ddl.test b/tests/fullstack-test-dt/clustered_index/ddl.test index 8abe450c11a..e6344e46ed8 100644 --- a/tests/fullstack-test-dt/clustered_index/ddl.test +++ b/tests/fullstack-test-dt/clustered_index/ddl.test @@ -66,3 +66,87 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t_2 mysql> drop table test.t_1; mysql> drop table test.t_2; + + +### about issue 5154 to check whether add column/drop column will effect the cluster index decode +### drop the column between two columns that are cluster index columns + +mysql> drop table if exists test.t_3; +mysql> set session tidb_isolation_read_engines='tiflash'; +mysql> create table test.t_3 (A int, B varchar(20), C int, D int, PRIMARY KEY(A,C) CLUSTERED); +mysql> insert into test.t_3 values (1,'1',1,1),(2,'2',2,2); + +mysql> alter table test.t_3 set tiflash replica 1; + +mysql> select * from test.t_3; ++---+---+---+---+ +| A | B | C | D | ++---+---+---+---+ +| 1 | 1 | 1 | 1 | +| 2 | 2 | 2 | 2 | ++---+---+---+---+ + +mysql> alter table test.t_3 drop column B; + +mysql> select * from test.t_3; ++---+---+---+ +| A | C | D | ++---+---+---+ +| 1 | 1 | 1 | +| 2 | 2 | 2 | ++---+---+---+ + +# insert some rows +mysql> insert into test.t_3 values (3,3,3),(4,4,4); + +mysql> select * from test.t_3; ++---+---+---+ +| A | C | D | ++---+---+---+ +| 1 | 1 | 1 | +| 2 | 2 | 2 | +| 3 | 3 | 3 | +| 4 | 4 | 4 | ++---+---+---+ + +mysql> drop table test.t_3; + +### add the column between two columns that are cluster index columns +mysql> drop table if exists test.t_4 +mysql> create table test.t_4 (A int, B varchar(20), C int, D int, PRIMARY KEY(A,C) CLUSTERED); + +mysql> insert into test.t_4 values (1,'1',1,1),(2,'2',2,2); + +mysql> alter table test.t_4 set tiflash replica 1; + +mysql> select * from test.t_4; ++---+---+---+---+ +| A | B | C | D | ++---+---+---+---+ +| 1 | 1 | 1 | 1 | +| 2 | 2 | 2 | 2 | ++---+---+---+---+ + +mysql> alter table test.t_4 Add column E int after B; + +mysql> select * from test.t_4; ++---+---+------+---+---+ +| A | B | E | C | D | ++---+---+------+---+---+ +| 1 | 1 | NULL | 1 | 1 | +| 2 | 2 | NULL | 2 | 2 | ++---+---+------+---+---+ + +mysql> insert into test.t_4 values (3,'3',3,3,3),(4,'4',4,4,4); + +mysql> select * from test.t_4; ++---+---+------+------+------+ +| A | B | E | C | D | ++---+---+------+------+------+ +| 1 | 1 | NULL | 1 | 1 | +| 2 | 2 | NULL | 2 | 2 | +| 3 | 3 | 3 | 3 | 3 | +| 4 | 4 | 4 | 4 | 4 | ++---+---+------+------+------+ + +mysql> drop table test.t_4; \ No newline at end of file From 110f2fb49c74165bb7d6830dbf9ac585e60b4966 Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Sat, 18 Jun 2022 10:24:43 +0800 Subject: [PATCH 03/17] fix clang tidy --- dbms/src/Debug/dbgTools.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index 59733bdd685..dd9b50d5e4d 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -424,7 +424,7 @@ struct BatchCtrl { assert(max_strlen >= min_strlen); assert(min_strlen >= 1); - auto str_len = static_cast(arc4random() % (max_strlen - min_strlen + 1) + min_strlen); + auto str_len = static_cast(random() % (max_strlen - min_strlen + 1) + min_strlen); default_str = String(str_len, '_'); } From e68d45d857329bdd248ff870dfed469fa9562f46 Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Mon, 20 Jun 2022 08:44:47 +0800 Subject: [PATCH 04/17] remove useless code --- dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h index 8114fde5fe2..b0cacefe6f4 100644 --- a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h +++ b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h @@ -17,7 +17,6 @@ #include #include #include -#include namespace DB From 01285457c5e484331dbe2069e795b5ef3f5e43e1 Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Mon, 20 Jun 2022 14:46:25 +0800 Subject: [PATCH 05/17] fix the comments --- dbms/src/Debug/MockTiDB.cpp | 5 ++--- dbms/src/Debug/dbgTools.cpp | 6 +++--- .../Storages/Transaction/RegionBlockReader.cpp | 1 + dbms/src/Storages/Transaction/TiDB.cpp | 4 ++-- dbms/src/Storages/Transaction/TiDB.h | 18 ++++++++++++------ 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/dbms/src/Debug/MockTiDB.cpp b/dbms/src/Debug/MockTiDB.cpp index 42ab56a97c1..7b3bdb0948f 100644 --- a/dbms/src/Debug/MockTiDB.cpp +++ b/dbms/src/Debug/MockTiDB.cpp @@ -221,7 +221,6 @@ TiDB::TableInfoPtr MockTiDB::parseColumns( { String & name = string_tokens[index]; index_info.idx_cols[index].name = name; - index_info.idx_cols[index].offset = pk_column_pos_map[name]; index_info.idx_cols[index].length = -1; } } @@ -302,7 +301,7 @@ int MockTiDB::newTables( tables_by_id.emplace(table->table_info.id, table); tables_by_name.emplace(qualified_name, table); - AffectedOption opt; + AffectedOption opt{}; opt.schema_id = table->database_id; opt.table_id = table->id(); opt.old_schema_id = table->database_id; @@ -571,7 +570,7 @@ void MockTiDB::renameTables(const std::vectordatabase_id; opt.table_id = new_table->id(); opt.old_schema_id = table->database_id; diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index dd9b50d5e4d..854d8a18bd5 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -339,9 +339,9 @@ void insert( // for (size_t i = 0; i < table_info.getPrimaryIndexInfo().idx_cols.size(); i++) { - const auto & idx_col = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; - const auto & column_info = table_info.columns[idx_col]; - auto start_field = RegionBench::convertField(column_info, fields[idx_col]); + const auto & col_idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[col_idx]; + auto start_field = RegionBench::convertField(column_info, fields[col_idx]); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); keys.emplace_back(start_datum.field()); } diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.cpp b/dbms/src/Storages/Transaction/RegionBlockReader.cpp index a9384e4a14d..5643f3c679c 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.cpp +++ b/dbms/src/Storages/Transaction/RegionBlockReader.cpp @@ -208,6 +208,7 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d } index++; } + block.checkNumberOfRows(); return true; } diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index 15bf2a3fb58..dc7f1f3e348 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -631,8 +631,8 @@ catch (const Poco::Exception & e) /////////////////////// IndexColumnInfo::IndexColumnInfo(Poco::JSON::Object::Ptr json) - : offset(0) - , length(0) + : length(0) + , offset(0) { deserialize(json); } diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index a3692ed30aa..4c28a614857 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -179,11 +179,6 @@ struct ColumnInfo ColumnID id = -1; String name; - - /// please be very careful when you have to use offset, - /// because we never update offset when DDL action changes. - /// Thus, our offset will not exactly correspond the order of columns. - Int32 offset = -1; Poco::Dynamic::Var origin_default_value; Poco::Dynamic::Var default_value; Poco::Dynamic::Var default_bit_value; @@ -216,6 +211,12 @@ struct ColumnInfo static Int64 getTimeValue(const String &); static Int64 getYearValue(const String &); static UInt64 getBitValue(const String &); + +private: + /// please be very careful when you have to use offset, + /// because we never update offset when DDL action changes. + /// Thus, our offset will not exactly correspond the order of columns. + Int32 offset = -1; }; enum PartitionType @@ -302,8 +303,13 @@ struct IndexColumnInfo void deserialize(Poco::JSON::Object::Ptr json); String name; - Int32 offset; Int32 length; + +private: + /// please be very careful when you have to use offset, + /// because we never update offset when DDL action changes. + /// Thus, our offset will not exactly correspond the order of columns. + Int32 offset; }; struct IndexInfo { From 6e2bbb114ceca3841c65ef39ef4e1721eefca0b0 Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Mon, 20 Jun 2022 15:14:49 +0800 Subject: [PATCH 06/17] change session statement --- .../fullstack-test-dt/clustered_index/ddl.test | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/fullstack-test-dt/clustered_index/ddl.test b/tests/fullstack-test-dt/clustered_index/ddl.test index e6344e46ed8..6c4925c9619 100644 --- a/tests/fullstack-test-dt/clustered_index/ddl.test +++ b/tests/fullstack-test-dt/clustered_index/ddl.test @@ -67,18 +67,18 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t_2 mysql> drop table test.t_1; mysql> drop table test.t_2; - ### about issue 5154 to check whether add column/drop column will effect the cluster index decode ### drop the column between two columns that are cluster index columns mysql> drop table if exists test.t_3; -mysql> set session tidb_isolation_read_engines='tiflash'; mysql> create table test.t_3 (A int, B varchar(20), C int, D int, PRIMARY KEY(A,C) CLUSTERED); mysql> insert into test.t_3 values (1,'1',1,1),(2,'2',2,2); mysql> alter table test.t_3 set tiflash replica 1; -mysql> select * from test.t_3; +func> wait_table test t_3 + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; +---+---+---+---+ | A | B | C | D | +---+---+---+---+ @@ -88,7 +88,7 @@ mysql> select * from test.t_3; mysql> alter table test.t_3 drop column B; -mysql> select * from test.t_3; +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; +---+---+---+ | A | C | D | +---+---+---+ @@ -99,7 +99,7 @@ mysql> select * from test.t_3; # insert some rows mysql> insert into test.t_3 values (3,3,3),(4,4,4); -mysql> select * from test.t_3; +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; +---+---+---+ | A | C | D | +---+---+---+ @@ -119,7 +119,9 @@ mysql> insert into test.t_4 values (1,'1',1,1),(2,'2',2,2); mysql> alter table test.t_4 set tiflash replica 1; -mysql> select * from test.t_4; +func> wait_table test t_4 + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; +---+---+---+---+ | A | B | C | D | +---+---+---+---+ @@ -129,7 +131,7 @@ mysql> select * from test.t_4; mysql> alter table test.t_4 Add column E int after B; -mysql> select * from test.t_4; +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; +---+---+------+---+---+ | A | B | E | C | D | +---+---+------+---+---+ @@ -139,7 +141,7 @@ mysql> select * from test.t_4; mysql> insert into test.t_4 values (3,'3',3,3,3),(4,'4',4,4,4); -mysql> select * from test.t_4; +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; +---+---+------+------+------+ | A | B | E | C | D | +---+---+------+------+------+ From 08b122a5b4cf2bbae08d3246d992786e4b0db25d Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Mon, 20 Jun 2022 21:18:11 +0800 Subject: [PATCH 07/17] add unit test --- ...gtest_decoding_storage_schema_snapshot.cpp | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp diff --git a/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp new file mode 100644 index 00000000000..1339c3fbc44 --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp @@ -0,0 +1,28 @@ +#include +#include +#include "RowCodecTestUtils.h" +#include + +namespace DB::tests +{ +static TableInfo getTableInfoByJson(const String & json_table_info) +{ + return TableInfo(json_table_info); +} +TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndex) { + const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":2,"name":{"O":"B","L":"b"},"offset":1,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":0,"Flen":20,"Decimal":0,"Charset":"utf8mb4","Collate":"utf8mb4_bin","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; + + auto table_info = getTableInfoByJson(json_table_info); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + + //check decoding_schema->pk_column_ids infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), 2); + ASSERT_EQ(decoding_schema->pk_column_ids[0], 1); + ASSERT_EQ(decoding_schema->pk_column_ids[1], 3); + + //check decoding_schema->pk_pos_map infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), decoding_schema->pk_pos_map.size()); + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 5); +}; +} \ No newline at end of file From 8d740591789bd32819c91cadbc681658afb744ad Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Mon, 20 Jun 2022 21:37:51 +0800 Subject: [PATCH 08/17] fix format --- .../tests/gtest_decoding_storage_schema_snapshot.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp index 1339c3fbc44..cc328d38ed0 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp @@ -1,15 +1,17 @@ #include #include -#include "RowCodecTestUtils.h" #include +#include "RowCodecTestUtils.h" + namespace DB::tests { static TableInfo getTableInfoByJson(const String & json_table_info) { return TableInfo(json_table_info); } -TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndex) { +TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndex) +{ const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":2,"name":{"O":"B","L":"b"},"offset":1,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":0,"Flen":20,"Decimal":0,"Charset":"utf8mb4","Collate":"utf8mb4_bin","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; auto table_info = getTableInfoByJson(json_table_info); @@ -25,4 +27,4 @@ TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndex) { ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 5); }; -} \ No newline at end of file +} // namespace DB::tests \ No newline at end of file From 313959315f899afe9c780cbb3b3750fda2c0185f Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Mon, 20 Jun 2022 21:44:02 +0800 Subject: [PATCH 09/17] add license --- .../gtest_decoding_storage_schema_snapshot.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp index cc328d38ed0..0c00a796ab1 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp @@ -1,3 +1,17 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include #include #include From ffbd63e234de9b65a4672b3258a0fdf8a82d8e73 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 21 Jun 2022 12:42:37 +0800 Subject: [PATCH 10/17] add one more case Signed-off-by: JaySon-Huang --- ...gtest_decoding_storage_schema_snapshot.cpp | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp index 0c00a796ab1..1de9809ecad 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp @@ -26,8 +26,8 @@ static TableInfo getTableInfoByJson(const String & json_table_info) } TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndex) { + // table with column [A,B,C,D], primary keys [A,C] const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":2,"name":{"O":"B","L":"b"},"offset":1,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":0,"Flen":20,"Decimal":0,"Charset":"utf8mb4","Collate":"utf8mb4_bin","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; - auto table_info = getTableInfoByJson(json_table_info); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); @@ -38,7 +38,28 @@ TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndex) //check decoding_schema->pk_pos_map infos ASSERT_EQ(decoding_schema->pk_column_ids.size(), decoding_schema->pk_pos_map.size()); + // there are three hidden column in the decoded block, so the position of A,C is 3,5 ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 5); -}; -} // namespace DB::tests \ No newline at end of file +} + +TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndexAfterDropColumn) +{ + // drop column B for [A,B,C,D]; table with column [A,C,D], primary keys [A,C] + const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; + auto table_info = getTableInfoByJson(json_table_info); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + + //check decoding_schema->pk_column_ids infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), 2); + ASSERT_EQ(decoding_schema->pk_column_ids[0], 1); + ASSERT_EQ(decoding_schema->pk_column_ids[1], 3); + + //check decoding_schema->pk_pos_map infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), decoding_schema->pk_pos_map.size()); + // there are three hidden column in the decoded block, so the position of A,C is 3,4 + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 4); +} + +} // namespace DB::tests From fcf09ab5cecae625b525f97ab09df05530aed631 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 21 Jun 2022 12:43:22 +0800 Subject: [PATCH 11/17] Fix error message Signed-off-by: JaySon-Huang --- dbms/src/Core/Block.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 28db7af82e1..971e8f36e2a 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -238,10 +238,18 @@ void Block::checkNumberOfRows() const if (rows == -1) rows = size; else if (rows != size) - throw Exception("Sizes of columns doesn't match: " - + data.front().name + ": " + toString(rows) - + ", " + elem.name + ": " + toString(size), + { + auto first_col = data.front(); + throw Exception(fmt::format( + "Sizes of columns doesn't match: {}(id={}): {}, {}(id={}): {}", + first_col.name, + first_col.column_id, + rows, + elem.name, + elem.column_id, + size), ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + } } } From 6decafe299591f21a471088a0f703f4d0ee35369 Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Tue, 21 Jun 2022 18:25:24 +0800 Subject: [PATCH 12/17] add benchmark --- .../Transaction/RegionBlockReader.cpp | 18 +- .../Storages/Transaction/RegionBlockReader.h | 4 +- .../tests/bench_region_block_reader.cpp | 205 ++++++++++++++++++ 3 files changed, 218 insertions(+), 9 deletions(-) create mode 100644 dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.cpp b/dbms/src/Storages/Transaction/RegionBlockReader.cpp index 5643f3c679c..4f5f4e002c4 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.cpp +++ b/dbms/src/Storages/Transaction/RegionBlockReader.cpp @@ -33,23 +33,23 @@ RegionBlockReader::RegionBlockReader(DecodingStorageSchemaSnapshotConstPtr schem : schema_snapshot{std::move(schema_snapshot_)} {} -bool RegionBlockReader::read(Block & block, const RegionDataReadInfoList & data_list, bool force_decode) +bool RegionBlockReader::read(Block & block, const RegionDataReadInfoList & data_list, bool force_decode, bool with_check) { switch (schema_snapshot->pk_type) { case TMTPKType::INT64: - return readImpl(block, data_list, force_decode); + return readImpl(block, data_list, force_decode, with_check); case TMTPKType::UINT64: - return readImpl(block, data_list, force_decode); + return readImpl(block, data_list, force_decode, with_check); case TMTPKType::STRING: - return readImpl(block, data_list, force_decode); + return readImpl(block, data_list, force_decode, with_check); default: - return readImpl(block, data_list, force_decode); + return readImpl(block, data_list, force_decode, with_check); } } template -bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & data_list, bool force_decode) +bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & data_list, bool force_decode, bool with_check) { if (unlikely(block.columns() != schema_snapshot->column_defines->size())) throw Exception("block structure doesn't match schema_snapshot.", ErrorCodes::LOGICAL_ERROR); @@ -208,7 +208,11 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d } index++; } - block.checkNumberOfRows(); + if (with_check) + { + block.checkNumberOfRows(); + } + return true; } diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.h b/dbms/src/Storages/Transaction/RegionBlockReader.h index 004d9f40447..ba2411f5cf8 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.h +++ b/dbms/src/Storages/Transaction/RegionBlockReader.h @@ -51,11 +51,11 @@ class RegionBlockReader : private boost::noncopyable /// /// `RegionBlockReader::read` is the common routine used by both 'flush' and 'read' processes of TXN engine (Delta-Tree, TXN-MergeTree), /// each of which will use carefully adjusted 'force_decode' with appropriate error handling/retry to get what they want. - bool read(Block & block, const RegionDataReadInfoList & data_list, bool force_decode); + bool read(Block & block, const RegionDataReadInfoList & data_list, bool force_decode, bool with_check = true); private: template - bool readImpl(Block & block, const RegionDataReadInfoList & data_list, bool force_decode); + bool readImpl(Block & block, const RegionDataReadInfoList & data_list, bool force_decode, bool with_check = true); private: DecodingStorageSchemaSnapshotConstPtr schema_snapshot; diff --git a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp new file mode 100644 index 00000000000..a5db9cf2300 --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp @@ -0,0 +1,205 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "RowCodecTestUtils.h" +#include +#include +#include + +#include + +using TableInfo = TiDB::TableInfo; +namespace DB::tests +{ +using ColumnIDs = std::vector; +class RegionBlockReaderBenchTest : public benchmark::Fixture +{ +protected: + Int64 handle_value = 100; + UInt8 del_mark_value = 0; + UInt64 version_value = 100; + size_t rows = 100; + + RegionDataReadInfoList data_list_read; + std::unordered_map fields_map; + + enum RowEncodeVersion + { + RowV1, + RowV2 + }; + +protected: + void SetUp(const benchmark::State & /*state*/ ) override + { + data_list_read.clear(); + fields_map.clear(); + } + + // void TearDown() override {} + + void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version) + { + // for later check + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + fields_map.emplace(table_info.columns[i].id, fields[i]); + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + + std::vector value_fields; + std::vector pk_fields; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + if (!table_info.columns[i].hasPriKeyFlag()) + value_fields.emplace_back(fields[i]); + else + pk_fields.emplace_back(fields[i]); + } + + // create PK + WriteBufferFromOwnString pk_buf; + if (table_info.is_common_handle) + { + auto & primary_index_info = table_info.getPrimaryIndexInfo(); + for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) + { + auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; + EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); + } + } + else + { + DB::EncodeInt64(handle_value, pk_buf); + } + RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; + // create value + WriteBufferFromOwnString value_buf; + if (row_version == RowEncodeVersion::RowV1) + { + encodeRowV1(table_info, value_fields, value_buf); + } + else if (row_version == RowEncodeVersion::RowV2) + { + encodeRowV2(table_info, value_fields, value_buf); + } + else + { + throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); + } + auto row_value = std::make_shared(std::move(value_buf.str())); + for (size_t i = 0; i < rows; i++) + data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); + } + + bool decodeColumns(DecodingStorageSchemaSnapshotConstPtr decoding_schema, bool force_decode, bool with_check) const + { + RegionBlockReader reader{decoding_schema}; + Block block = createBlockSortByColumnID(decoding_schema); + return reader.read(block, data_list_read, force_decode, with_check); + } + + std::pair> getNormalTableInfoFields(const ColumnIDs & handle_ids, bool is_common_handle) const + { + return getTableInfoAndFields( + handle_ids, + is_common_handle, + ColumnIDValue(2, handle_value), + ColumnIDValue(3, std::numeric_limits::max()), + ColumnIDValue(4, std::numeric_limits::min()), + ColumnIDValue(9, String("aaa")), + ColumnIDValue(10, DecimalField(ToDecimal(12345678910ULL, 4), 4)), + ColumnIDValueNull(11)); + } +}; + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandleWithCheck) +(benchmark::State & state) +{ + auto [table_info, fields] = getNormalTableInfoFields({2, 3, 4}, true); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true, true); + } +} +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandleWithoutCheck) +(benchmark::State & state) +{ + auto [table_info, fields] = getNormalTableInfoFields({2, 3, 4}, true); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true, false); + } +} + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandleWithCheck) +(benchmark::State & state) +{ + auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true, true); + } +} +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck) +(benchmark::State & state) +{ + auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true, false); + } +} + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithCheck) +(benchmark::State & state) +{ + auto [table_info, fields] = getNormalTableInfoFields({2}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true, true); + } +} +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithoutCheck) +(benchmark::State & state) +{ + auto [table_info, fields] = getNormalTableInfoFields({2}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true, false); + } +} + +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandleWithCheck)->Iterations(1000); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandleWithoutCheck)->Iterations(1000); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandleWithCheck)->Iterations(1000); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandleWithoutCheck)->Iterations(1000); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithCheck)->Iterations(1000); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck)->Iterations(1000); + + +} From d89b308dd55a87194ccb836d96a59eb5fce187b8 Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Tue, 21 Jun 2022 18:32:46 +0800 Subject: [PATCH 13/17] remove useless code --- .../Storages/Transaction/tests/bench_region_block_reader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp index a5db9cf2300..b2d8226a96c 100644 --- a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp @@ -47,8 +47,6 @@ class RegionBlockReaderBenchTest : public benchmark::Fixture fields_map.clear(); } - // void TearDown() override {} - void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version) { // for later check From 44b6798cf4fa7593be9a6ec975c95b6b25b36e51 Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Tue, 21 Jun 2022 19:32:21 +0800 Subject: [PATCH 14/17] fix format --- .../tests/bench_region_block_reader.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp index b2d8226a96c..1c40156577f 100644 --- a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "RowCodecTestUtils.h" -#include #include +#include +#include #include -#include +#include "RowCodecTestUtils.h" using TableInfo = TiDB::TableInfo; namespace DB::tests @@ -41,7 +41,7 @@ class RegionBlockReaderBenchTest : public benchmark::Fixture }; protected: - void SetUp(const benchmark::State & /*state*/ ) override + void SetUp(const benchmark::State & /*state*/) override { data_list_read.clear(); fields_map.clear(); @@ -164,7 +164,7 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck) encodeColumns(table_info, fields, RowEncodeVersion::RowV2); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); for (auto _ : state) - { + { decodeColumns(decoding_schema, true, false); } } @@ -182,7 +182,7 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithCheck) } BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithoutCheck) (benchmark::State & state) -{ +{ auto [table_info, fields] = getNormalTableInfoFields({2}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV2); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); @@ -200,4 +200,4 @@ BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithCheck)->Iterat BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck)->Iterations(1000); -} +} // namespace DB::tests From 2162411a4b2aee5b42e263a92b5f3401b914763f Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 21 Jun 2022 20:05:33 +0800 Subject: [PATCH 15/17] add benchmark for differen num of rows Signed-off-by: JaySon-Huang --- .../tests/bench_region_block_reader.cpp | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp index b2d8226a96c..92409920237 100644 --- a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp @@ -29,7 +29,6 @@ class RegionBlockReaderBenchTest : public benchmark::Fixture Int64 handle_value = 100; UInt8 del_mark_value = 0; UInt64 version_value = 100; - size_t rows = 100; RegionDataReadInfoList data_list_read; std::unordered_map fields_map; @@ -47,7 +46,7 @@ class RegionBlockReaderBenchTest : public benchmark::Fixture fields_map.clear(); } - void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version) + void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version, size_t num_rows) { // for later check std::unordered_map column_name_columns_index_map; @@ -98,7 +97,7 @@ class RegionBlockReaderBenchTest : public benchmark::Fixture throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); } auto row_value = std::make_shared(std::move(value_buf.str())); - for (size_t i = 0; i < rows; i++) + for (size_t i = 0; i < num_rows; i++) data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); } @@ -126,8 +125,9 @@ class RegionBlockReaderBenchTest : public benchmark::Fixture BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandleWithCheck) (benchmark::State & state) { + size_t num_rows = state.range(0); auto [table_info, fields] = getNormalTableInfoFields({2, 3, 4}, true); - encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); for (auto _ : state) { @@ -137,8 +137,9 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandleWithCheck) BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandleWithoutCheck) (benchmark::State & state) { + size_t num_rows = state.range(0); auto [table_info, fields] = getNormalTableInfoFields({2, 3, 4}, true); - encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); for (auto _ : state) { @@ -149,8 +150,9 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandleWithoutCheck) BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandleWithCheck) (benchmark::State & state) { + size_t num_rows = state.range(0); auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); - encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); for (auto _ : state) { @@ -160,8 +162,9 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandleWithCheck) BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck) (benchmark::State & state) { + size_t num_rows = state.range(0); auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); - encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); for (auto _ : state) { @@ -172,8 +175,9 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck) BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithCheck) (benchmark::State & state) { + size_t num_rows = state.range(0); auto [table_info, fields] = getNormalTableInfoFields({2}, false); - encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); for (auto _ : state) { @@ -183,8 +187,9 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithCheck) BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithoutCheck) (benchmark::State & state) { + size_t num_rows = state.range(0); auto [table_info, fields] = getNormalTableInfoFields({2}, false); - encodeColumns(table_info, fields, RowEncodeVersion::RowV2); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); for (auto _ : state) { @@ -192,12 +197,12 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithoutCheck) } } -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandleWithCheck)->Iterations(1000); -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandleWithoutCheck)->Iterations(1000); -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandleWithCheck)->Iterations(1000); -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandleWithoutCheck)->Iterations(1000); -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithCheck)->Iterations(1000); -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck)->Iterations(1000); - +constexpr size_t num_iterations_test = 1000; +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandleWithCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandleWithoutCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandleWithCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandleWithoutCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); } From 4275d2a3d90a0b3636ac6c43d14d643a38f82452 Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Wed, 22 Jun 2022 08:31:46 +0800 Subject: [PATCH 16/17] fix format --- .../Storages/Transaction/tests/bench_region_block_reader.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp index 4c2cc5bec9c..38aeaddaa6a 100644 --- a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp @@ -186,7 +186,7 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithCheck) } BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithoutCheck) (benchmark::State & state) -{ +{ size_t num_rows = state.range(0); auto [table_info, fields] = getNormalTableInfoFields({2}, false); encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); @@ -207,4 +207,3 @@ BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithCheck)->Iterat BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); } // namespace DB::tests - From 72c75bc8c2eb6778515087df548defa25be46604 Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Wed, 22 Jun 2022 15:14:54 +0800 Subject: [PATCH 17/17] remove with_check variable --- .../Transaction/RegionBlockReader.cpp | 17 +++-- .../Storages/Transaction/RegionBlockReader.h | 4 +- .../tests/bench_region_block_reader.cpp | 62 ++++--------------- 3 files changed, 21 insertions(+), 62 deletions(-) diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.cpp b/dbms/src/Storages/Transaction/RegionBlockReader.cpp index 4f5f4e002c4..2ec690c467b 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.cpp +++ b/dbms/src/Storages/Transaction/RegionBlockReader.cpp @@ -33,23 +33,23 @@ RegionBlockReader::RegionBlockReader(DecodingStorageSchemaSnapshotConstPtr schem : schema_snapshot{std::move(schema_snapshot_)} {} -bool RegionBlockReader::read(Block & block, const RegionDataReadInfoList & data_list, bool force_decode, bool with_check) +bool RegionBlockReader::read(Block & block, const RegionDataReadInfoList & data_list, bool force_decode) { switch (schema_snapshot->pk_type) { case TMTPKType::INT64: - return readImpl(block, data_list, force_decode, with_check); + return readImpl(block, data_list, force_decode); case TMTPKType::UINT64: - return readImpl(block, data_list, force_decode, with_check); + return readImpl(block, data_list, force_decode); case TMTPKType::STRING: - return readImpl(block, data_list, force_decode, with_check); + return readImpl(block, data_list, force_decode); default: - return readImpl(block, data_list, force_decode, with_check); + return readImpl(block, data_list, force_decode); } } template -bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & data_list, bool force_decode, bool with_check) +bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & data_list, bool force_decode) { if (unlikely(block.columns() != schema_snapshot->column_defines->size())) throw Exception("block structure doesn't match schema_snapshot.", ErrorCodes::LOGICAL_ERROR); @@ -208,10 +208,7 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d } index++; } - if (with_check) - { - block.checkNumberOfRows(); - } + block.checkNumberOfRows(); return true; } diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.h b/dbms/src/Storages/Transaction/RegionBlockReader.h index ba2411f5cf8..004d9f40447 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.h +++ b/dbms/src/Storages/Transaction/RegionBlockReader.h @@ -51,11 +51,11 @@ class RegionBlockReader : private boost::noncopyable /// /// `RegionBlockReader::read` is the common routine used by both 'flush' and 'read' processes of TXN engine (Delta-Tree, TXN-MergeTree), /// each of which will use carefully adjusted 'force_decode' with appropriate error handling/retry to get what they want. - bool read(Block & block, const RegionDataReadInfoList & data_list, bool force_decode, bool with_check = true); + bool read(Block & block, const RegionDataReadInfoList & data_list, bool force_decode); private: template - bool readImpl(Block & block, const RegionDataReadInfoList & data_list, bool force_decode, bool with_check = true); + bool readImpl(Block & block, const RegionDataReadInfoList & data_list, bool force_decode); private: DecodingStorageSchemaSnapshotConstPtr schema_snapshot; diff --git a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp index 38aeaddaa6a..05ab637de7f 100644 --- a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp @@ -101,11 +101,11 @@ class RegionBlockReaderBenchTest : public benchmark::Fixture data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); } - bool decodeColumns(DecodingStorageSchemaSnapshotConstPtr decoding_schema, bool force_decode, bool with_check) const + bool decodeColumns(DecodingStorageSchemaSnapshotConstPtr decoding_schema, bool force_decode) const { RegionBlockReader reader{decoding_schema}; Block block = createBlockSortByColumnID(decoding_schema); - return reader.read(block, data_list_read, force_decode, with_check); + return reader.read(block, data_list_read, force_decode); } std::pair> getNormalTableInfoFields(const ColumnIDs & handle_ids, bool is_common_handle) const @@ -122,7 +122,7 @@ class RegionBlockReaderBenchTest : public benchmark::Fixture } }; -BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandleWithCheck) +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandle) (benchmark::State & state) { size_t num_rows = state.range(0); @@ -131,35 +131,12 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandleWithCheck) auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); for (auto _ : state) { - decodeColumns(decoding_schema, true, true); - } -} -BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandleWithoutCheck) -(benchmark::State & state) -{ - size_t num_rows = state.range(0); - auto [table_info, fields] = getNormalTableInfoFields({2, 3, 4}, true); - encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); - auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); - for (auto _ : state) - { - decodeColumns(decoding_schema, true, false); + decodeColumns(decoding_schema, true); } } -BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandleWithCheck) -(benchmark::State & state) -{ - size_t num_rows = state.range(0); - auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); - encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); - auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); - for (auto _ : state) - { - decodeColumns(decoding_schema, true, true); - } -} -BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck) + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandle) (benchmark::State & state) { size_t num_rows = state.range(0); @@ -168,23 +145,11 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck) auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); for (auto _ : state) { - decodeColumns(decoding_schema, true, false); + decodeColumns(decoding_schema, true); } } -BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithCheck) -(benchmark::State & state) -{ - size_t num_rows = state.range(0); - auto [table_info, fields] = getNormalTableInfoFields({2}, false); - encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); - auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); - for (auto _ : state) - { - decodeColumns(decoding_schema, true, true); - } -} -BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithoutCheck) +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandle) (benchmark::State & state) { size_t num_rows = state.range(0); @@ -193,17 +158,14 @@ BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandleWithoutCheck) auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); for (auto _ : state) { - decodeColumns(decoding_schema, true, false); + decodeColumns(decoding_schema, true); } } constexpr size_t num_iterations_test = 1000; -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandleWithCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandleWithoutCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandleWithCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandleWithoutCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); -BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandleWithoutCheck)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); } // namespace DB::tests