Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FLASH-402] Skip unneeded columns when decode row #173

Merged
merged 45 commits into from
Aug 20, 2019
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
6abfa1d
skip unneeded column
lidezhu Aug 10, 2019
b83a4b5
small fix
lidezhu Aug 11, 2019
445fc4d
check unknown column id when decode row
lidezhu Aug 12, 2019
fce6f15
add initializer for boolean variable
lidezhu Aug 13, 2019
0231b95
fix the case when tikv value contains just a nill flag
lidezhu Aug 13, 2019
0366142
remove unnecessary copy
lidezhu Aug 13, 2019
f5013e9
small improvement
lidezhu Aug 13, 2019
b0d3e9c
fix comment
lidezhu Aug 14, 2019
93a10a7
don't store column id in field
lidezhu Aug 14, 2019
8a5bb7b
small improvement
lidezhu Aug 15, 2019
dcc4265
remove unnecessary blank line
lidezhu Aug 15, 2019
a92d878
add log for performance debug
lidezhu Aug 15, 2019
1881a5c
adjust time point
lidezhu Aug 15, 2019
eae234b
small fix and adjust log
lidezhu Aug 15, 2019
43472ba
small improvement
lidezhu Aug 15, 2019
1ddb5d2
small improvement
lidezhu Aug 16, 2019
1952272
remove unnecessary construction
lidezhu Aug 16, 2019
d051cbb
small improvement
lidezhu Aug 16, 2019
7aee3e5
fix conflict with master
lidezhu Aug 16, 2019
9191506
uncomment flushregion
lidezhu Aug 16, 2019
8befbd6
Merge branch 'master' into FLASH402
solotzg Aug 19, 2019
a64e2c5
skip uint by calling DecodeVarUInt
lidezhu Aug 19, 2019
68a688d
Merge branch 'FLASH402' of github.com:lidezhu/tics into FLASH402
lidezhu Aug 19, 2019
33b14ef
Merge branch 'master' into FLASH402
lidezhu Aug 19, 2019
171b2f9
avoid insert column id
lidezhu Aug 20, 2019
dfabdb2
Merge branch 'FLASH402' of github.com:lidezhu/tics into FLASH402
lidezhu Aug 20, 2019
a8a25ca
small fix
lidezhu Aug 20, 2019
72beacc
add exception message
lidezhu Aug 20, 2019
237b7f2
comment flushRegion
lidezhu Aug 20, 2019
ad152f8
uncomment flushRegion and other minor fix
lidezhu Aug 20, 2019
c2afabb
modify push_back to emplace_back
lidezhu Aug 20, 2019
f50a6f4
add const
lidezhu Aug 20, 2019
9daf974
fix comment
lidezhu Aug 20, 2019
b6cb23f
fix comment
lidezhu Aug 20, 2019
8b89ea1
optimize by using dense_hash_map&dense_hash_set
solotzg Aug 20, 2019
8970e15
fix
solotzg Aug 20, 2019
1422d0a
fix
solotzg Aug 20, 2019
8b2e256
fix comment
lidezhu Aug 20, 2019
b574f5b
small fix
lidezhu Aug 20, 2019
12db36f
Merge branch 'FLASH402' of github.com:lidezhu/tics into lidezhu-FLASH…
solotzg Aug 20, 2019
96d62ed
Merge branch 'FLASH402' of github.com:lidezhu/tics into lidezhu-FLASH…
solotzg Aug 20, 2019
5119717
remove useless comment
lidezhu Aug 20, 2019
ad9bb51
format
solotzg Aug 20, 2019
e361ea5
Merge pull request #1 from solotzg/FLASH-402-optimize-hash
lidezhu Aug 20, 2019
f886ff3
[FLASH-402] optimize column_map (#2)
solotzg Aug 20, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions dbms/src/Storages/Transaction/Codec.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,96 @@ inline Field DecodeDatum(size_t & cursor, const String & raw_value)
}
}

inline void SkipBytes(size_t & cursor, const String & raw_value)
{
while (true)
{
size_t next_cursor = cursor + 9;
if (next_cursor > raw_value.size())
throw Exception("Wrong format, cursor over buffer size. (DecodeBytes)", ErrorCodes::LOGICAL_ERROR);
UInt8 marker = (UInt8)raw_value[cursor + 8];
UInt8 pad_size = ENC_MARKER - marker;

if (pad_size > 8)
throw Exception("Wrong format, too many padding bytes. (DecodeBytes)", ErrorCodes::LOGICAL_ERROR);
cursor = next_cursor;
if (pad_size != 0)
break;
}
}

inline void SkipCompactBytes(size_t & cursor, const String & raw_value)
{
size_t size = DecodeVarInt(cursor, raw_value);
cursor += size;
}

inline void SkipVarUInt(size_t & cursor, const String & raw_value)
{
for (int i = 0; cursor < raw_value.size(); i++)
{
UInt64 v = raw_value[cursor++];
if (v < 0x80)
{
if (i > 9 || (i == 9 && v > 1))
throw Exception("Overflow when DecodeVarUInt", ErrorCodes::LOGICAL_ERROR);
return;
}
}
throw Exception("Wrong format. (DecodeVarUInt)", ErrorCodes::LOGICAL_ERROR);
}

inline void SkipVarInt(size_t & cursor, const String & raw_value)
{
SkipVarUInt(cursor, raw_value);
}

inline void SkipDecimal(size_t & cursor, const String & raw_value)
{
PrecType prec = raw_value[cursor++];
ScaleType frac = raw_value[cursor++];

int binSize = getBytes(prec, frac);
cursor += binSize;
}

inline void SkipDatum(size_t & cursor, const String & raw_value)
{
switch (raw_value[cursor++])
{
case TiDB::CodecFlagNil:
return;
case TiDB::CodecFlagInt:
cursor += sizeof(Int64);
return;
case TiDB::CodecFlagUInt:
cursor += sizeof(UInt64);
return;
case TiDB::CodecFlagBytes:
SkipBytes(cursor, raw_value);
return;
case TiDB::CodecFlagCompactBytes:
SkipCompactBytes(cursor, raw_value);
return;
case TiDB::CodecFlagFloat:
cursor += sizeof(UInt64);
return;
case TiDB::CodecFlagVarUInt:
SkipVarUInt(cursor, raw_value);
return;
case TiDB::CodecFlagVarInt:
SkipVarInt(cursor, raw_value);
return;
case TiDB::CodecFlagDuration:
throw Exception("Not implented yet. DecodeDatum: CodecFlagDuration", ErrorCodes::LOGICAL_ERROR);
case TiDB::CodecFlagDecimal:
SkipDecimal(cursor, raw_value);
return;
default:
throw Exception("Unknown Type:" + std::to_string(raw_value[cursor - 1]), ErrorCodes::LOGICAL_ERROR);
}
}

template <typename T>
inline void writeIntBinary(const T & x, std::stringstream & ss)
{
Expand Down
28 changes: 23 additions & 5 deletions dbms/src/Storages/Transaction/RegionBlockReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,18 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
ColumnID handle_col_id = InvalidColumnID;

std::unordered_map<ColumnID, std::pair<MutableColumnPtr, NameAndTypePair>> column_map;
std::unordered_set<ColumnID> column_ids_to_read;
std::unordered_set<ColumnID> all_column_ids;
for (const auto & column_info : table_info.columns)
{
ColumnID col_id = column_info.id;
String col_name = column_info.name;
all_column_ids.emplace(col_id);
if (std::find(column_names_to_read.begin(), column_names_to_read.end(), col_name) == column_names_to_read.end())
{
continue;
}
column_ids_to_read.emplace(col_id);
auto ch_col = columns.getPhysical(col_name);
column_map[col_id] = std::make_pair(ch_col.type->createColumn(), ch_col);
column_map[col_id].first->reserve(data_list.size());
Expand Down Expand Up @@ -170,7 +178,7 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,

std::unordered_set<ColumnID> col_id_included;

const size_t target_col_size = (!table_info.pk_is_handle ? table_info.columns.size() : table_info.columns.size() - 1) * 2;
const size_t target_col_size = (column_names_to_read.size() - 3) * 2;

Block block;

Expand All @@ -188,6 +196,7 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
// TODO: optimize columns' insertion, use better implementation rather than Field, it's terrible.

std::vector<Field> row;
bool has_unknown_col_id = false;

if (write_type == Region::DelFlag)
{
Expand All @@ -197,12 +206,15 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
if (handle_col_id == column.id)
continue;

if (!column_ids_to_read.count(column.id))
continue;

row.push_back(Field(column.id));
row.push_back(GenDecodeRow(column.getCodecFlag()));
}
}
else
row = RecordKVFormat::DecodeRow(*value_ptr);
std::tie(row, has_unknown_col_id) = RecordKVFormat::DecodeRow(*value_ptr, column_ids_to_read, all_column_ids);

if (row.size() == 1 && row[0].isNull())
{
Expand All @@ -213,6 +225,12 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
if (row.size() & 1)
throw Exception("row size is wrong.", ErrorCodes::LOGICAL_ERROR);

/// row contains unknown column id
/// if force_decode is false, the schema may be too old
/// if force decode is true, the column may have been dropped and just ignore it
if (has_unknown_col_id && !force_decode)
return std::make_tuple(block, false);

/// Modify `row` by adding missing column values or removing useless column values.

col_id_included.clear();
Expand All @@ -230,6 +248,9 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
if (!force_decode)
return std::make_tuple(block, false);

if (!column_ids_to_read.count(column.id))
continue;

row.emplace_back(Field(column.id));
if (column.hasNoDefaultValueFlag())
// Fill `zero` value if NOT NULL specified or else NULL.
Expand All @@ -246,9 +267,6 @@ std::tuple<Block, bool> readRegionBlock(const TiDB::TableInfo & table_info,
Field & col_id = row[i];
if (column_map.find(col_id.get<ColumnID>()) == column_map.end())
{
if (!force_decode)
return std::make_tuple(block, false);

row.erase(row.begin() + i, row.begin() + i + 2);
}
}
Expand Down
22 changes: 19 additions & 3 deletions dbms/src/Storages/Transaction/TiKVRecordFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,36 @@ static const UInt64 SIGN_MARK = UInt64(1) << 63;
static const size_t RAW_KEY_NO_HANDLE_SIZE = 1 + 8 + 2;
static const size_t RAW_KEY_SIZE = RAW_KEY_NO_HANDLE_SIZE + 8;

inline std::vector<Field> DecodeRow(const TiKVValue & value)
inline std::tuple<std::vector<Field>, bool> DecodeRow(const TiKVValue & value, const std::unordered_set<ColumnID> & column_ids_to_read, const std::unordered_set<ColumnID> & all_column_ids)
{
std::vector<Field> vec;
const String & raw_value = value.getStr();
size_t cursor = 0;
bool has_unknown_col_id = false;
while (cursor < raw_value.size())
solotzg marked this conversation as resolved.
Show resolved Hide resolved
{
vec.push_back(DecodeDatum(cursor, raw_value));
Field f = DecodeDatum(cursor, raw_value);
if (f.isNull())
break;
if (!has_unknown_col_id && !all_column_ids.count(f.get<ColumnID>()))
{
has_unknown_col_id = true;
}
if (!column_ids_to_read.count(f.get<ColumnID>()))
{
SkipDatum(cursor, raw_value);
}
else
{
vec.push_back(std::move(f));
vec.push_back(DecodeDatum(cursor, raw_value));
}
}

if (cursor != raw_value.size())
throw Exception("DecodeRow cursor is not end", ErrorCodes::LOGICAL_ERROR);

return vec;
return std::make_tuple(vec, has_unknown_col_id);
lidezhu marked this conversation as resolved.
Show resolved Hide resolved
}

// Key format is here:
Expand Down