Make MyRocks range scan cost calculation more accurate (percona#212)

Summary: MyRocks had two bugs when calculating index scan cost. 1. block_size was not considered. This made covering index scan cost (both full index scan and range scan) much higher 2. ha_rocksdb::records_in_range() may have estimated more rows than the estimated number of rows in the table. This was wrong, and MySQL optimizer decided to use full index scan even though range scan was more efficient. This diff fixes #1 by setting stats.block_size at ha_rocksdb::open(), and fixes #2 by reducing the number of estimated rows if it was larger than stats.records. Differential Revision: https://reviews.facebook.net/D55869 fbshipit-source-id: 15ca2c3fa8e
inikep · Mar 8, 2021 · 553a47c · 553a47c
1 parent 821e066
commit 553a47c
Show file tree

Hide file tree

Showing 6 changed files with 60 additions and 13 deletions.
diff --git a/mysql-test/suite/rocksdb/r/records_in_range.result b/mysql-test/suite/rocksdb/r/records_in_range.result
@@ -115,7 +115,7 @@ Note	1003	/* select#1 */ select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`t
 set rocksdb_records_in_range=444;
 explain extended select * from t1 where a< 750 and b> 500 and b< 750;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	index	kab	kab	10	NULL	1000	44.40	Using where; Using index
+1	SIMPLE	t1	range	kab	kab	5	NULL	444	100.00	Using where; Using index
 Warnings:
 Note	1003	/* select#1 */ select `test`.`t1`.`i` AS `i`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where ((`test`.`t1`.`a` < 750) and (`test`.`t1`.`b` > 500) and (`test`.`t1`.`b` < 750))
 set rocksdb_records_in_range=0;

diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result
@@ -997,7 +997,7 @@ insert into t1 values (-5,-5,-5);
 explain
 select key1 from t1 where key1=2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	index	key1	key1	5	NULL	#	Using where; Using index
+1	SIMPLE	t1	ref	key1	key1	5	const	#	Using index
 select key1 from t1 where key1=2;
 key1
 2
@@ -1011,7 +1011,7 @@ insert into t2 values (1,1,1), (2,2,2);
 explain
 select key1 from t2 where key1=2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	index	key1	key1	5	NULL	#	Using where; Using index
+1	SIMPLE	t2	ref	key1	key1	5	const	#	Using index
 select key1 from t2 where key1=2;
 key1
 2
@@ -1022,7 +1022,7 @@ insert into t3 values (1,1,1), (2,2,2);
 explain 
 select key1 from t3 where key1=2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t3	index	key1	key1	9	NULL	#	Using where; Using index
+1	SIMPLE	t3	ref	key1	key1	9	const	#	Using index
 select key1 from t3 where key1=2;
 key1
 2
@@ -1040,7 +1040,7 @@ insert into t1 values(1, 'one',11), (2,'two',22);
 explain 
 select key1 from t1 where key1='one';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	index	key1	key1	11	NULL	#	Using where; Using index
+1	SIMPLE	t1	ref	key1	key1	11	const	#	Using where; Using index
 # The following will produce no rows. This looks like a bug,
 #  but it is actually correct behavior. Binary strings are end-padded
 #  with \0 character (and not space).  Comparison does not ignore
@@ -1050,7 +1050,7 @@ key1
 explain
 select hex(key1) from t1 where key1='one\0\0\0\0\0\0\0';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	index	key1	key1	11	NULL	#	Using where; Using index
+1	SIMPLE	t1	ref	key1	key1	11	const	#	Using where; Using index
 select hex(key1) from t1 where key1='one\0\0\0\0\0\0\0';
 hex(key1)
 6F6E6500000000000000
@@ -1065,7 +1065,7 @@ insert into t2 values(1, 'one',11), (2,'two',22);
 explain 
 select key1 from t2 where key1='one';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	index	key1	key1	11	NULL	#	Using where; Using index
+1	SIMPLE	t2	ref	key1	key1	11	const	#	Using where; Using index
 select key1 from t2 where key1='one';
 key1
 one
@@ -1080,7 +1080,7 @@ insert into t3 values(1, 'one',11), (2,'two',22);
 explain 
 select key1 from t3 where key1='one';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t3	index	key1	key1	31	NULL	#	Using where; Using index
+1	SIMPLE	t3	ref	key1	key1	31	const	#	Using where; Using index
 select key1 from t3 where key1='one';
 key1
 one
@@ -1095,7 +1095,7 @@ insert into t4 values(1, 'one'), (2,'two'),(3,'threee'),(55,'fifty-five');
 explain 
 select key1 from t4 where key1='two';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t4	index	key1	key1	13	NULL	#	Using where; Using index
+1	SIMPLE	t4	ref	key1	key1	13	const	#	Using where; Using index
 select key1 from t4 where key1='two';
 key1
 two

diff --git a/mysql-test/suite/rocksdb/r/rocksdb_range.result b/mysql-test/suite/rocksdb/r/rocksdb_range.result
@@ -255,14 +255,14 @@ insert into t4 select pk,pk,pk,pk from t2 where pk < 100;
 explain 
 select * from t4 where a=1 and b in (1) order by c desc;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t4	index	a	a	15	NULL	#	Using where; Using index
+1	SIMPLE	t4	ref	a	a	10	const,const	#	Using where; Using index
 select * from t4 where a=1 and b in (1) order by c desc;
 pk	a	b	c
 1	1	1	1
 explain 
 select * from t4 where a=5 and b in (4) order by c desc;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t4	index	a	a	15	NULL	#	Using where; Using index
+1	SIMPLE	t4	ref	a	a	10	const,const	#	Using where; Using index
 select * from t4 where a=5 and b in (4) order by c desc;
 pk	a	b	c
 # HA_READ_PREFIX_LAST for reverse-ordered CF
@@ -277,14 +277,14 @@ insert into t5 select pk,pk,pk,pk from t2 where pk < 100;
 explain 
 select * from t5 where a=1 and b in (1) order by c desc;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t5	index	a	a	15	NULL	#	Using where; Using index
+1	SIMPLE	t5	ref	a	a	10	const,const	#	Using where; Using index
 select * from t5 where a=1 and b in (1) order by c desc;
 pk	a	b	c
 1	1	1	1
 explain 
 select * from t5 where a=5 and b in (4) order by c desc;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t5	index	a	a	15	NULL	#	Using where; Using index
+1	SIMPLE	t5	ref	a	a	10	const,const	#	Using where; Using index
 select * from t5 where a=5 and b in (4) order by c desc;
 pk	a	b	c
 drop table t0,t1,t2,t3,t4,t5;
diff --git a/mysql-test/suite/rocksdb/r/rocksdb_range2.result b/mysql-test/suite/rocksdb/r/rocksdb_range2.result
@@ -0,0 +1,11 @@
+create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+select count(*) from t1;
+count(*)
+10000
+explain select c1 from t1 where c1 > 5 limit 10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	i	i	9	NULL	9900	Using where; Using index
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb_range2.test b/mysql-test/suite/rocksdb/t/rocksdb_range2.test
@@ -0,0 +1,20 @@
+--source include/have_rocksdb.inc
+
+# Issue#212 MyRocks chooses full index scan even if range scan is more efficient
+# rocksdb_debug_optimizer_n_rows must not be set.
+
+create table t1 (id1 bigint, id2 bigint, c1 bigint, c2 bigint, c3 bigint, c4 bigint, c5 bigint, c6 bigint, c7 bigint, primary key (id1, id2), index i(c1, c2));
+--disable_query_log
+let $i=0;
+while ($i<10000)
+{
+  inc $i;
+  eval insert t1(id1, id2, c1, c2, c3, c4, c5, c6, c7)
+          values($i,    0, $i,  0,  0,  0,  0,  0,  0);
+}
+--enable_query_log
+analyze table t1;
+select count(*) from t1;
+explain select c1 from t1 where c1 > 5 limit 10;
+drop table t1;
+
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
@@ -3788,6 +3788,9 @@ int ha_rocksdb::open(const char *name, int mode, uint test_if_locked)
   if (has_hidden_pk(table) && load_hidden_pk_value())
     DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
 
+  /* Index block size in MyRocks: used by MySQL in query optimization */
+  stats.block_size = table_options.block_size;
+
   DBUG_RETURN(0);
 }
 
@@ -7477,6 +7480,19 @@ ha_rows ha_rocksdb::records_in_range(uint inx, key_range *min_key,
   }
 
   ret = rows*sz/disk_size;
+
+  /*
+    GetApproximateSizes() gives estimates so ret might exceed stats.records.
+    MySQL then decides to use full index scan rather than range scan, which
+    is not efficient for most cases.
+    To prevent this, changing estimated records slightly smaller than
+    stats.records.
+  */
+  if (ret >= stats.records)
+  {
+    ret = stats.records * 0.99;
+  }
+
   if (rocksdb_debug_optimizer_n_rows > 0)
   {
     ret= rocksdb_debug_optimizer_n_rows;