From 553a47c2aca5ccea77ca2c296505890f67598419 Mon Sep 17 00:00:00 2001 From: Yoshinori Matsunobu Date: Wed, 23 Mar 2016 09:16:24 -0700 Subject: [PATCH] Make MyRocks range scan cost calculation more accurate (#212) Summary: MyRocks had two bugs when calculating index scan cost. 1. block_size was not considered. This made covering index scan cost (both full index scan and range scan) much higher 2. ha_rocksdb::records_in_range() may have estimated more rows than the estimated number of rows in the table. This was wrong, and MySQL optimizer decided to use full index scan even though range scan was more efficient. This diff fixes #1 by setting stats.block_size at ha_rocksdb::open(), and fixes #2 by reducing the number of estimated rows if it was larger than stats.records. Differential Revision: https://reviews.facebook.net/D55869 fbshipit-source-id: 15ca2c3fa8e --- .../suite/rocksdb/r/records_in_range.result | 2 +- mysql-test/suite/rocksdb/r/rocksdb.result | 16 +++++++-------- .../suite/rocksdb/r/rocksdb_range.result | 8 ++++---- .../suite/rocksdb/r/rocksdb_range2.result | 11 ++++++++++ .../suite/rocksdb/t/rocksdb_range2.test | 20 +++++++++++++++++++ storage/rocksdb/ha_rocksdb.cc | 16 +++++++++++++++ 6 files changed, 60 insertions(+), 13 deletions(-) create mode 100644 mysql-test/suite/rocksdb/r/rocksdb_range2.result create mode 100644 mysql-test/suite/rocksdb/t/rocksdb_range2.test diff --git a/mysql-test/suite/rocksdb/r/records_in_range.result b/mysql-test/suite/rocksdb/r/records_in_range.result index 9fd9f2d5639a..021d5d4f989e 100644 --- a/mysql-test/suite/rocksdb/r/records_in_range.result +++ b/mysql-test/suite/rocksdb/r/records_in_range.result @@ -115,7 +115,7 @@ Note 1003 /* select#1 */ select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`t set rocksdb_records_in_range=444; explain extended select * from t1 where a< 750 and b> 500 and b< 750; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 index kab kab 10 NULL 1000 44.40 Using where; Using index +1 SIMPLE t1 range kab kab 5 NULL 444 100.00 Using where; Using index Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where ((`test`.`t1`.`a` < 750) and (`test`.`t1`.`b` > 500) and (`test`.`t1`.`b` < 750)) set rocksdb_records_in_range=0; diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result index 0e7a4dfe1aba..df840dd7f4d6 100644 --- a/mysql-test/suite/rocksdb/r/rocksdb.result +++ b/mysql-test/suite/rocksdb/r/rocksdb.result @@ -997,7 +997,7 @@ insert into t1 values (-5,-5,-5); explain select key1 from t1 where key1=2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index key1 key1 5 NULL # Using where; Using index +1 SIMPLE t1 ref key1 key1 5 const # Using index select key1 from t1 where key1=2; key1 2 @@ -1011,7 +1011,7 @@ insert into t2 values (1,1,1), (2,2,2); explain select key1 from t2 where key1=2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 index key1 key1 5 NULL # Using where; Using index +1 SIMPLE t2 ref key1 key1 5 const # Using index select key1 from t2 where key1=2; key1 2 @@ -1022,7 +1022,7 @@ insert into t3 values (1,1,1), (2,2,2); explain select key1 from t3 where key1=2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t3 index key1 key1 9 NULL # Using where; Using index +1 SIMPLE t3 ref key1 key1 9 const # Using index select key1 from t3 where key1=2; key1 2 @@ -1040,7 +1040,7 @@ insert into t1 values(1, 'one',11), (2,'two',22); explain select key1 from t1 where key1='one'; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index key1 key1 11 NULL # Using where; Using index +1 SIMPLE t1 ref key1 key1 11 const # Using where; Using index # The following will produce no rows. This looks like a bug, # but it is actually correct behavior. Binary strings are end-padded # with \0 character (and not space). Comparison does not ignore @@ -1050,7 +1050,7 @@ key1 explain select hex(key1) from t1 where key1='one\0\0\0\0\0\0\0'; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index key1 key1 11 NULL # Using where; Using index +1 SIMPLE t1 ref key1 key1 11 const # Using where; Using index select hex(key1) from t1 where key1='one\0\0\0\0\0\0\0'; hex(key1) 6F6E6500000000000000 @@ -1065,7 +1065,7 @@ insert into t2 values(1, 'one',11), (2,'two',22); explain select key1 from t2 where key1='one'; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 index key1 key1 11 NULL # Using where; Using index +1 SIMPLE t2 ref key1 key1 11 const # Using where; Using index select key1 from t2 where key1='one'; key1 one @@ -1080,7 +1080,7 @@ insert into t3 values(1, 'one',11), (2,'two',22); explain select key1 from t3 where key1='one'; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t3 index key1 key1 31 NULL # Using where; Using index +1 SIMPLE t3 ref key1 key1 31 const # Using where; Using index select key1 from t3 where key1='one'; key1 one @@ -1095,7 +1095,7 @@ insert into t4 values(1, 'one'), (2,'two'),(3,'threee'),(55,'fifty-five'); explain select key1 from t4 where key1='two'; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t4 index key1 key1 13 NULL # Using where; Using index +1 SIMPLE t4 ref key1 key1 13 const # Using where; Using index select key1 from t4 where key1='two'; key1 two diff --git a/mysql-test/suite/rocksdb/r/rocksdb_range.result b/mysql-test/suite/rocksdb/r/rocksdb_range.result index 0c798df6f3b5..54471314aa7b 100644 --- a/mysql-test/suite/rocksdb/r/rocksdb_range.result +++ b/mysql-test/suite/rocksdb/r/rocksdb_range.result @@ -255,14 +255,14 @@ insert into t4 select pk,pk,pk,pk from t2 where pk < 100; explain select * from t4 where a=1 and b in (1) order by c desc; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t4 index a a 15 NULL # Using where; Using index +1 SIMPLE t4 ref a a 10 const,const # Using where; Using index select * from t4 where a=1 and b in (1) order by c desc; pk a b c 1 1 1 1 explain select * from t4 where a=5 and b in (4) order by c desc; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t4 index a a 15 NULL # Using where; Using index +1 SIMPLE t4 ref a a 10 const,const # Using where; Using index select * from t4 where a=5 and b in (4) order by c desc; pk a b c # HA_READ_PREFIX_LAST for reverse-ordered CF @@ -277,14 +277,14 @@ insert into t5 select pk,pk,pk,pk from t2 where pk < 100; explain select * from t5 where a=1 and b in (1) order by c desc; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t5 index a a 15 NULL # Using where; Using index +1 SIMPLE t5 ref a a 10 const,const # Using where; Using index select * from t5 where a=1 and b in (1) order by c desc; pk a b c 1 1 1 1 explain select * from t5 where a=5 and b in (4) order by c desc; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t5 index a a 15 NULL # Using where; Using index +1 SIMPLE t5 ref a a 10 const,const # Using where; Using index select * from t5 where a=5 and b in (4) order by c desc; pk a b c drop table t0,t1,t2,t3,t4,t5; diff --git a/mysql-test/suite/rocksdb/r/rocksdb_range2.result b/mysql-test/suite/rocksdb/r/rocksdb_range2.result new file mode 100644 index 000000000000..d7a4f9dd0652 --- /dev/null +++ b/mysql-test/suite/rocksdb/r/rocksdb_range2.result @@ -0,0 +1,11 @@ +create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2)); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select count(*) from t1; +count(*) +10000 +explain select c1 from t1 where c1 > 5 limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range i i 9 NULL 9900 Using where; Using index +drop table t1; diff --git a/mysql-test/suite/rocksdb/t/rocksdb_range2.test b/mysql-test/suite/rocksdb/t/rocksdb_range2.test new file mode 100644 index 000000000000..6b8d0b90e905 --- /dev/null +++ b/mysql-test/suite/rocksdb/t/rocksdb_range2.test @@ -0,0 +1,20 @@ +--source include/have_rocksdb.inc + +# Issue#212 MyRocks chooses full index scan even if range scan is more efficient +# rocksdb_debug_optimizer_n_rows must not be set. + +create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2)); +--disable_query_log +let $i=0; +while ($i<10000) +{ + inc $i; + eval insert t1(id1, id2, c1, c2, c3, c4, c5, c6, c7) + values($i, 0, $i, 0, 0, 0, 0, 0, 0); +} +--enable_query_log +analyze table t1; +select count(*) from t1; +explain select c1 from t1 where c1 > 5 limit 10; +drop table t1; + diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index d44ca448b1be..b6b1b922ee6e 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -3788,6 +3788,9 @@ int ha_rocksdb::open(const char *name, int mode, uint test_if_locked) if (has_hidden_pk(table) && load_hidden_pk_value()) DBUG_RETURN(HA_ERR_INTERNAL_ERROR); + /* Index block size in MyRocks: used by MySQL in query optimization */ + stats.block_size = table_options.block_size; + DBUG_RETURN(0); } @@ -7477,6 +7480,19 @@ ha_rows ha_rocksdb::records_in_range(uint inx, key_range *min_key, } ret = rows*sz/disk_size; + + /* + GetApproximateSizes() gives estimates so ret might exceed stats.records. + MySQL then decides to use full index scan rather than range scan, which + is not efficient for most cases. + To prevent this, changing estimated records slightly smaller than + stats.records. + */ + if (ret >= stats.records) + { + ret = stats.records * 0.99; + } + if (rocksdb_debug_optimizer_n_rows > 0) { ret= rocksdb_debug_optimizer_n_rows;