Skip to content

Commit

Permalink
Make MyRocks range scan cost calculation more accurate (percona#212)
Browse files Browse the repository at this point in the history
Summary:
MyRocks had two bugs when calculating index scan cost.
1. block_size was not considered. This made covering index scan cost
(both full index scan and range scan) much higher
2. ha_rocksdb::records_in_range() may have estimated more rows
than the estimated number of rows in the table. This was wrong,
and MySQL optimizer decided to use full index scan even though
range scan was more efficient.

This diff fixes #1 by setting stats.block_size at ha_rocksdb::open(),
and fixes #2 by reducing the number of estimated rows if it was
larger than stats.records.

Differential Revision: https://reviews.facebook.net/D55869

fbshipit-source-id: 15ca2c3fa8e
  • Loading branch information
yoshinorim authored and facebook-github-bot committed Mar 8, 2021
1 parent 821e066 commit 553a47c
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 13 deletions.
2 changes: 1 addition & 1 deletion mysql-test/suite/rocksdb/r/records_in_range.result
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ Note 1003 /* select#1 */ select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`t
set rocksdb_records_in_range=444;
explain extended select * from t1 where a< 750 and b> 500 and b< 750;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 index kab kab 10 NULL 1000 44.40 Using where; Using index
1 SIMPLE t1 range kab kab 5 NULL 444 100.00 Using where; Using index
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where ((`test`.`t1`.`a` < 750) and (`test`.`t1`.`b` > 500) and (`test`.`t1`.`b` < 750))
set rocksdb_records_in_range=0;
Expand Down
16 changes: 8 additions & 8 deletions mysql-test/suite/rocksdb/r/rocksdb.result
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ insert into t1 values (-5,-5,-5);
explain
select key1 from t1 where key1=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index key1 key1 5 NULL # Using where; Using index
1 SIMPLE t1 ref key1 key1 5 const # Using index
select key1 from t1 where key1=2;
key1
2
Expand All @@ -1011,7 +1011,7 @@ insert into t2 values (1,1,1), (2,2,2);
explain
select key1 from t2 where key1=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 index key1 key1 5 NULL # Using where; Using index
1 SIMPLE t2 ref key1 key1 5 const # Using index
select key1 from t2 where key1=2;
key1
2
Expand All @@ -1022,7 +1022,7 @@ insert into t3 values (1,1,1), (2,2,2);
explain
select key1 from t3 where key1=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t3 index key1 key1 9 NULL # Using where; Using index
1 SIMPLE t3 ref key1 key1 9 const # Using index
select key1 from t3 where key1=2;
key1
2
Expand All @@ -1040,7 +1040,7 @@ insert into t1 values(1, 'one',11), (2,'two',22);
explain
select key1 from t1 where key1='one';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index key1 key1 11 NULL # Using where; Using index
1 SIMPLE t1 ref key1 key1 11 const # Using where; Using index
# The following will produce no rows. This looks like a bug,
# but it is actually correct behavior. Binary strings are end-padded
# with \0 character (and not space). Comparison does not ignore
Expand All @@ -1050,7 +1050,7 @@ key1
explain
select hex(key1) from t1 where key1='one\0\0\0\0\0\0\0';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index key1 key1 11 NULL # Using where; Using index
1 SIMPLE t1 ref key1 key1 11 const # Using where; Using index
select hex(key1) from t1 where key1='one\0\0\0\0\0\0\0';
hex(key1)
6F6E6500000000000000
Expand All @@ -1065,7 +1065,7 @@ insert into t2 values(1, 'one',11), (2,'two',22);
explain
select key1 from t2 where key1='one';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 index key1 key1 11 NULL # Using where; Using index
1 SIMPLE t2 ref key1 key1 11 const # Using where; Using index
select key1 from t2 where key1='one';
key1
one
Expand All @@ -1080,7 +1080,7 @@ insert into t3 values(1, 'one',11), (2,'two',22);
explain
select key1 from t3 where key1='one';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t3 index key1 key1 31 NULL # Using where; Using index
1 SIMPLE t3 ref key1 key1 31 const # Using where; Using index
select key1 from t3 where key1='one';
key1
one
Expand All @@ -1095,7 +1095,7 @@ insert into t4 values(1, 'one'), (2,'two'),(3,'threee'),(55,'fifty-five');
explain
select key1 from t4 where key1='two';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t4 index key1 key1 13 NULL # Using where; Using index
1 SIMPLE t4 ref key1 key1 13 const # Using where; Using index
select key1 from t4 where key1='two';
key1
two
Expand Down
8 changes: 4 additions & 4 deletions mysql-test/suite/rocksdb/r/rocksdb_range.result
Original file line number Diff line number Diff line change
Expand Up @@ -255,14 +255,14 @@ insert into t4 select pk,pk,pk,pk from t2 where pk < 100;
explain
select * from t4 where a=1 and b in (1) order by c desc;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t4 index a a 15 NULL # Using where; Using index
1 SIMPLE t4 ref a a 10 const,const # Using where; Using index
select * from t4 where a=1 and b in (1) order by c desc;
pk a b c
1 1 1 1
explain
select * from t4 where a=5 and b in (4) order by c desc;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t4 index a a 15 NULL # Using where; Using index
1 SIMPLE t4 ref a a 10 const,const # Using where; Using index
select * from t4 where a=5 and b in (4) order by c desc;
pk a b c
# HA_READ_PREFIX_LAST for reverse-ordered CF
Expand All @@ -277,14 +277,14 @@ insert into t5 select pk,pk,pk,pk from t2 where pk < 100;
explain
select * from t5 where a=1 and b in (1) order by c desc;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t5 index a a 15 NULL # Using where; Using index
1 SIMPLE t5 ref a a 10 const,const # Using where; Using index
select * from t5 where a=1 and b in (1) order by c desc;
pk a b c
1 1 1 1
explain
select * from t5 where a=5 and b in (4) order by c desc;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t5 index a a 15 NULL # Using where; Using index
1 SIMPLE t5 ref a a 10 const,const # Using where; Using index
select * from t5 where a=5 and b in (4) order by c desc;
pk a b c
drop table t0,t1,t2,t3,t4,t5;
11 changes: 11 additions & 0 deletions mysql-test/suite/rocksdb/r/rocksdb_range2.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select count(*) from t1;
count(*)
10000
explain select c1 from t1 where c1 > 5 limit 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range i i 9 NULL 9900 Using where; Using index
drop table t1;
20 changes: 20 additions & 0 deletions mysql-test/suite/rocksdb/t/rocksdb_range2.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
--source include/have_rocksdb.inc

# Issue#212 MyRocks chooses full index scan even if range scan is more efficient
# rocksdb_debug_optimizer_n_rows must not be set.

create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
--disable_query_log
let $i=0;
while ($i<10000)
{
inc $i;
eval insert t1(id1, id2, c1, c2, c3, c4, c5, c6, c7)
values($i, 0, $i, 0, 0, 0, 0, 0, 0);
}
--enable_query_log
analyze table t1;
select count(*) from t1;
explain select c1 from t1 where c1 > 5 limit 10;
drop table t1;

16 changes: 16 additions & 0 deletions storage/rocksdb/ha_rocksdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3788,6 +3788,9 @@ int ha_rocksdb::open(const char *name, int mode, uint test_if_locked)
if (has_hidden_pk(table) && load_hidden_pk_value())
DBUG_RETURN(HA_ERR_INTERNAL_ERROR);

/* Index block size in MyRocks: used by MySQL in query optimization */
stats.block_size = table_options.block_size;

DBUG_RETURN(0);
}

Expand Down Expand Up @@ -7477,6 +7480,19 @@ ha_rows ha_rocksdb::records_in_range(uint inx, key_range *min_key,
}

ret = rows*sz/disk_size;

/*
GetApproximateSizes() gives estimates so ret might exceed stats.records.
MySQL then decides to use full index scan rather than range scan, which
is not efficient for most cases.
To prevent this, changing estimated records slightly smaller than
stats.records.
*/
if (ret >= stats.records)
{
ret = stats.records * 0.99;
}

if (rocksdb_debug_optimizer_n_rows > 0)
{
ret= rocksdb_debug_optimizer_n_rows;
Expand Down

0 comments on commit 553a47c

Please sign in to comment.