Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FLASH-402] Skip unneeded columns when decode row #173

Merged
merged 45 commits into from
Aug 20, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
6abfa1d
skip unneeded column
lidezhu Aug 10, 2019
b83a4b5
small fix
lidezhu Aug 11, 2019
445fc4d
check unknown column id when decode row
lidezhu Aug 12, 2019
fce6f15
add initializer for boolean variable
lidezhu Aug 13, 2019
0231b95
fix the case when tikv value contains just a nill flag
lidezhu Aug 13, 2019
0366142
remove unnecessary copy
lidezhu Aug 13, 2019
f5013e9
small improvement
lidezhu Aug 13, 2019
b0d3e9c
fix comment
lidezhu Aug 14, 2019
93a10a7
don't store column id in field
lidezhu Aug 14, 2019
8a5bb7b
small improvement
lidezhu Aug 15, 2019
dcc4265
remove unnecessary blank line
lidezhu Aug 15, 2019
a92d878
add log for performance debug
lidezhu Aug 15, 2019
1881a5c
adjust time point
lidezhu Aug 15, 2019
eae234b
small fix and adjust log
lidezhu Aug 15, 2019
43472ba
small improvement
lidezhu Aug 15, 2019
1ddb5d2
small improvement
lidezhu Aug 16, 2019
1952272
remove unnecessary construction
lidezhu Aug 16, 2019
d051cbb
small improvement
lidezhu Aug 16, 2019
7aee3e5
fix conflict with master
lidezhu Aug 16, 2019
9191506
uncomment flushregion
lidezhu Aug 16, 2019
8befbd6
Merge branch 'master' into FLASH402
solotzg Aug 19, 2019
a64e2c5
skip uint by calling DecodeVarUInt
lidezhu Aug 19, 2019
68a688d
Merge branch 'FLASH402' of github.com:lidezhu/tics into FLASH402
lidezhu Aug 19, 2019
33b14ef
Merge branch 'master' into FLASH402
lidezhu Aug 19, 2019
171b2f9
avoid insert column id
lidezhu Aug 20, 2019
dfabdb2
Merge branch 'FLASH402' of github.com:lidezhu/tics into FLASH402
lidezhu Aug 20, 2019
a8a25ca
small fix
lidezhu Aug 20, 2019
72beacc
add exception message
lidezhu Aug 20, 2019
237b7f2
comment flushRegion
lidezhu Aug 20, 2019
ad152f8
uncomment flushRegion and other minor fix
lidezhu Aug 20, 2019
c2afabb
modify push_back to emplace_back
lidezhu Aug 20, 2019
f50a6f4
add const
lidezhu Aug 20, 2019
9daf974
fix comment
lidezhu Aug 20, 2019
b6cb23f
fix comment
lidezhu Aug 20, 2019
8b89ea1
optimize by using dense_hash_map&dense_hash_set
solotzg Aug 20, 2019
8970e15
fix
solotzg Aug 20, 2019
1422d0a
fix
solotzg Aug 20, 2019
8b2e256
fix comment
lidezhu Aug 20, 2019
b574f5b
small fix
lidezhu Aug 20, 2019
12db36f
Merge branch 'FLASH402' of github.com:lidezhu/tics into lidezhu-FLASH…
solotzg Aug 20, 2019
96d62ed
Merge branch 'FLASH402' of github.com:lidezhu/tics into lidezhu-FLASH…
solotzg Aug 20, 2019
5119717
remove useless comment
lidezhu Aug 20, 2019
ad9bb51
format
solotzg Aug 20, 2019
e361ea5
Merge pull request #1 from solotzg/FLASH-402-optimize-hash
lidezhu Aug 20, 2019
f886ff3
[FLASH-402] optimize column_map (#2)
solotzg Aug 20, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 32 additions & 36 deletions dbms/src/Storages/Transaction/RegionBlockReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,13 +191,11 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
// optimize for only need handle, tso, delmark.
if (column_names_to_read.size() > 3)
{
std::unordered_set<ColumnID> col_id_included;

// TODO: optimize columns' insertion, use better implementation rather than Field, it's terrible.
std::vector<ColumnID> col_ids;
std::vector<Field> fields;
col_ids.reserve(target_col_size);
fields.reserve(target_col_size);
std::vector<ColumnID> decoded_col_ids;
std::vector<Field> decoded_fields;
decoded_col_ids.reserve(target_col_size);
decoded_fields.reserve(target_col_size);

for (const auto & [handle, write_type, commit_ts, value_ptr] : data_list)
{
Expand All @@ -207,72 +205,70 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
if (commit_ts > start_ts)
continue;

col_ids.clear();
fields.clear();
decoded_col_ids.clear();
decoded_fields.clear();
if (write_type == Region::DelFlag)
{
col_ids.reserve(target_col_size);
fields.reserve(target_col_size);
for (auto col_id : column_ids_to_read)
{
const auto & column = table_info.columns[column_id_to_info_index_map[col_id]];

col_ids.push_back(column.id);
fields.emplace_back(GenDecodeRow(column.getCodecFlag()));
decoded_col_ids.push_back(column.id);
decoded_fields.emplace_back(GenDecodeRow(column.getCodecFlag()));
}
}
else
{
bool schema_is_match = RecordKVFormat::DecodeRow(*value_ptr, column_ids_to_read, col_ids, fields, schema_all_column_ids);
if (!schema_is_match && !force_decode)
bool schema_matches = RecordKVFormat::DecodeRow(*value_ptr, column_ids_to_read, decoded_col_ids, decoded_fields, schema_all_column_ids);
if (!schema_matches && !force_decode)
{
return std::make_tuple(block, false);
}
if (col_ids.empty() && fields.size() == 1 && fields[0].isNull())
if (decoded_col_ids.empty() && decoded_fields.size() == 1 && decoded_fields[0].isNull())
{
// all field is null
fields.clear();
decoded_fields.clear();
}
}

if (col_ids.size() != fields.size())
if (decoded_col_ids.size() != decoded_fields.size())
throw Exception("row size is wrong.", ErrorCodes::LOGICAL_ERROR);

/// Modify `row` by adding missing column values or removing useless column values.
if (unlikely(col_ids.size() > column_ids_to_read.size()))
if (unlikely(decoded_col_ids.size() > column_ids_to_read.size()))
{
throw Exception("read unexpected columns.", ErrorCodes::LOGICAL_ERROR);
}
if (col_ids.size() < column_ids_to_read.size())

// redundant column values (column id not in current schema) has been dropped when decoding row
// this branch handles the case when the row doesn't contain all the needed column
if (decoded_col_ids.size() < column_ids_to_read.size())
{
col_id_included.clear();
for (size_t i = 0; i < col_ids.size(); i++)
col_id_included.emplace(col_ids[i]);
std::unordered_set<ColumnID> decoded_col_ids_set(decoded_col_ids.begin(), decoded_col_ids.end());
solotzg marked this conversation as resolved.
Show resolved Hide resolved

// Fill in missing column values.
for (auto col_id : column_ids_to_read)
{
if (col_id_included.count(col_id))
if (decoded_col_ids_set.count(col_id))
continue;

const auto & column = table_info.columns[column_id_to_info_index_map[col_id]];
col_ids.push_back(column.id);
decoded_col_ids.push_back(column.id);
if (column.hasNoDefaultValueFlag())
// Fill `zero` value if NOT NULL specified or else NULL.
fields.push_back(column.hasNotNullFlag() ? GenDecodeRow(column.getCodecFlag()) : Field());
decoded_fields.push_back(column.hasNotNullFlag() ? GenDecodeRow(column.getCodecFlag()) : Field());
else
// Fill default value.
fields.push_back(column.defaultValueToField());
decoded_fields.push_back(column.defaultValueToField());
}
}

if (col_ids.size() != target_col_size || fields.size() != target_col_size)
if (decoded_col_ids.size() != target_col_size || decoded_fields.size() != target_col_size)
throw Exception("decode row error.", ErrorCodes::LOGICAL_ERROR);

/// Transform `row` to columnar format.
for (size_t i = 0; i < col_ids.size(); i++)
for (size_t i = 0; i < decoded_col_ids.size(); i++)
{
ColumnID col_id = col_ids[i];
ColumnID col_id = decoded_col_ids[i];
auto it = column_map.find(col_id);
if (it == column_map.end())
throw Exception("col_id not found in column_map", ErrorCodes::LOGICAL_ERROR);
Expand All @@ -281,10 +277,10 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
if (tp->isDateOrDateTime()
|| (tp->isNullable() && dynamic_cast<const DataTypeNullable *>(tp.get())->getNestedType()->isDateOrDateTime()))
{
Field & field = fields[i];
Field & field = decoded_fields[i];
if (field.isNull())
{
it->second.first->insert(fields[i]);
it->second.first->insert(decoded_fields[i]);
continue;
}
UInt64 packed = field.get<UInt64>();
Expand Down Expand Up @@ -329,7 +325,7 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
}
else
{
it->second.first->insert(fields[i]);
it->second.first->insert(decoded_fields[i]);

// Check overflow for potential un-synced data type widen,
// i.e. schema is old and narrow, meanwhile data is new and wide.
Expand All @@ -347,14 +343,14 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
{
// Unsigned checking by bitwise compare.
UInt64 inserted = nested_column.get64(inserted_index);
UInt64 orig = fields[i].get<UInt64>();
UInt64 orig = decoded_fields[i].get<UInt64>();
overflow = inserted != orig;
}
else
{
// Singed checking by arithmetical cast.
Int64 inserted = nested_column.getInt(inserted_index);
Int64 orig = fields[i].get<Int64>();
Int64 orig = decoded_fields[i].get<Int64>();
overflow = inserted != orig;
}
if (overflow)
Expand All @@ -364,7 +360,7 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
// Otherwise return false to outer, outer should sync schema and try again.
if (force_decode)
throw Exception(
"Detected overflow for data " + std::to_string(fields[i].get<UInt64>()) + " of type " + tp->getName(),
"Detected overflow for data " + std::to_string(decoded_fields[i].get<UInt64>()) + " of type " + tp->getName(),
ErrorCodes::LOGICAL_ERROR);

return std::make_tuple(block, false);
Expand Down
12 changes: 6 additions & 6 deletions dbms/src/Storages/Transaction/TiKVRecordFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,21 +40,21 @@ inline bool DecodeRow(const TiKVValue & value, const std::unordered_set<ColumnID
{
const String & raw_value = value.getStr();
size_t cursor = 0;
bool schema_is_match = true;
bool schema_matches = true;
size_t column_cnt = 0;
while (cursor < raw_value.size())
solotzg marked this conversation as resolved.
Show resolved Hide resolved
{
Field f = DecodeDatum(cursor, raw_value);
if (f.isNull())
{
fields.push_back(std::move(f));
fields.emplace_back(std::move(f));
break;
}
ColumnID col_id = f.get<ColumnID>();
column_cnt++;
if (!schema_all_column_ids.count(col_id))
zanmato1984 marked this conversation as resolved.
Show resolved Hide resolved
{
schema_is_match = false;
schema_matches = false;
}
if (!column_ids_to_read.count(col_id))
{
Expand All @@ -63,17 +63,17 @@ inline bool DecodeRow(const TiKVValue & value, const std::unordered_set<ColumnID
else
{
col_ids.push_back(col_id);
fields.push_back(DecodeDatum(cursor, raw_value));
fields.emplace_back(DecodeDatum(cursor, raw_value));
}
}
if (column_cnt != schema_all_column_ids.size())
{
schema_is_match = false;
schema_matches = false;
}

if (cursor != raw_value.size())
throw Exception("DecodeRow cursor is not end", ErrorCodes::LOGICAL_ERROR);
return schema_is_match;
return schema_matches;
}

// Key format is here:
Expand Down