Skip to content

Commit 26db313

Browse files
committed
MDEV-34720: Poor plan choice for large JOIN with ORDER BY and small LIMIT
Part #1: hook into the join optimizer. The choice between index-based access methods is not implemented yet.
1 parent 1e78e1e commit 26db313

File tree

7 files changed

+299
-0
lines changed

7 files changed

+299
-0
lines changed

mysql-test/main/mysqld--help.result

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,14 @@ The following specify which files/extra groups are read (specified before remain
723723
in MariaDB 11.0 as it is not needed with the new 11.0
724724
optimizer.
725725
Use 'ALL' to set all combinations.
726+
--optimizer-join-limit-pref-ratio=#
727+
For queries with JOIN and ORDER BY LIMIT : change the
728+
join plan to one that can short-cut after producing
729+
#LIMIT matches if that promises N times speedup. (That
730+
is, a conservative setting is a high value, like var=100
731+
to change only if this promises 100x) The default is 0
732+
which gives old behavior (don't change no matter what the
733+
speedup)
726734
--optimizer-max-sel-arg-weight=#
727735
The maximum weight of the SEL_ARG graph. Set to 0 for no
728736
limit
@@ -1696,6 +1704,7 @@ old-mode UTF8_IS_UTF8MB3
16961704
old-passwords FALSE
16971705
old-style-user-limits FALSE
16981706
optimizer-adjust-secondary-key-costs
1707+
optimizer-join-limit-pref-ratio 0
16991708
optimizer-max-sel-arg-weight 32000
17001709
optimizer-max-sel-args 16000
17011710
optimizer-prune-level 1

mysql-test/suite/sys_vars/r/sysvars_server_embedded.result

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2282,6 +2282,16 @@ NUMERIC_BLOCK_SIZE NULL
22822282
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by
22832283
READ_ONLY NO
22842284
COMMAND_LINE_ARGUMENT REQUIRED
2285+
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO
2286+
VARIABLE_SCOPE SESSION
2287+
VARIABLE_TYPE BIGINT UNSIGNED
2288+
VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : change the join plan to one that can short-cut after producing #LIMIT matches if that promises N times speedup. (That is, a conservative setting is a high value, like var=100 to change only if this promises 100x) The default is 0 which gives old behavior (don't change no matter what the speedup)
2289+
NUMERIC_MIN_VALUE 0
2290+
NUMERIC_MAX_VALUE 4294967295
2291+
NUMERIC_BLOCK_SIZE 1
2292+
ENUM_VALUE_LIST NULL
2293+
READ_ONLY NO
2294+
COMMAND_LINE_ARGUMENT REQUIRED
22852295
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS
22862296
VARIABLE_SCOPE SESSION
22872297
VARIABLE_TYPE BIGINT UNSIGNED

mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2442,6 +2442,16 @@ NUMERIC_BLOCK_SIZE NULL
24422442
ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by
24432443
READ_ONLY NO
24442444
COMMAND_LINE_ARGUMENT REQUIRED
2445+
VARIABLE_NAME OPTIMIZER_JOIN_LIMIT_PREF_RATIO
2446+
VARIABLE_SCOPE SESSION
2447+
VARIABLE_TYPE BIGINT UNSIGNED
2448+
VARIABLE_COMMENT For queries with JOIN and ORDER BY LIMIT : change the join plan to one that can short-cut after producing #LIMIT matches if that promises N times speedup. (That is, a conservative setting is a high value, like var=100 to change only if this promises 100x) The default is 0 which gives old behavior (don't change no matter what the speedup)
2449+
NUMERIC_MIN_VALUE 0
2450+
NUMERIC_MAX_VALUE 4294967295
2451+
NUMERIC_BLOCK_SIZE 1
2452+
ENUM_VALUE_LIST NULL
2453+
READ_ONLY NO
2454+
COMMAND_LINE_ARGUMENT REQUIRED
24452455
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS
24462456
VARIABLE_SCOPE SESSION
24472457
VARIABLE_TYPE BIGINT UNSIGNED

sql/sql_class.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,7 @@ typedef struct system_variables
758758
ulong net_retry_count;
759759
ulong net_wait_timeout;
760760
ulong net_write_timeout;
761+
ulong optimizer_join_limit_pref_ratio;
761762
ulong optimizer_prune_level;
762763
ulong optimizer_search_depth;
763764
ulong optimizer_selectivity_sampling_limit;

sql/sql_select.cc

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, Item *cond);
330330
static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit,
331331
Item *cond);
332332

333+
bool join_shortcut_limit_is_applicable(JOIN *join);
333334
static
334335
bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order,
335336
key_map *usable_keys);
@@ -5803,6 +5804,13 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
58035804
join->sort_by_table= get_sort_by_table(join->order, join->group_list,
58045805
join->select_lex->leaf_tables,
58055806
join->const_table_map);
5807+
5808+
join->limit_shortcut_applicable= join_shortcut_limit_is_applicable(join);
5809+
/*
5810+
psergey-todo: check the applicability and figure which indexes we can use.
5811+
TODO: move this to after the join optimization?
5812+
*/
5813+
58065814
/*
58075815
Update info on indexes that can be used for search lookups as
58085816
reading const tables may has added new sargable predicates.
@@ -10330,6 +10338,232 @@ check_if_edge_table(POSITION *pos,
1033010338
}
1033110339

1033210340

10341+
/*
10342+
Check if we would be able to short-cut join execution for ORDER BY ... LIMIT
10343+
*/
10344+
bool join_shortcut_limit_is_applicable(JOIN *join)
10345+
{
10346+
/*
10347+
Any post-join operation like GROUP BY or DISTINCT or window functions
10348+
means we cannot short-cut join execution
10349+
*/
10350+
if (!join->thd->variables.optimizer_join_limit_pref_ratio ||
10351+
!join->order ||
10352+
join->select_limit == HA_POS_ERROR ||
10353+
join->group_list ||
10354+
join->select_distinct ||
10355+
join->select_options & SELECT_BIG_RESULT ||
10356+
join->rollup.state != ROLLUP::STATE_NONE ||
10357+
join->select_lex->have_window_funcs()
10358+
// || TODO: aggregates and implicit grouping
10359+
)
10360+
{
10361+
return false;
10362+
}
10363+
10364+
/* If sorting is not done by one table can't do that either */
10365+
if (!join->sort_by_table)
10366+
return false;
10367+
10368+
Json_writer_object wrapper(join->thd);
10369+
Json_writer_object trace(join->thd, "join_limit_shortcut_is_applicable");
10370+
trace.add("applicable", 1);
10371+
/* It looks like we can short-cut limit due to join */
10372+
return true;
10373+
}
10374+
10375+
10376+
JOIN_TAB **join_check_shortcut_limit_now(JOIN *join, uint idx)
10377+
{
10378+
if (join->limit_shortcut_applicable && idx == join->const_tables &&
10379+
!join->emb_sjm_nest &&
10380+
join->join_record_count > join->select_limit &&
10381+
join->best_positions[join->const_tables].table->table != join->sort_by_table)
10382+
{
10383+
JOIN_TAB **sort_tbl;
10384+
for (sort_tbl= join->best_ref + idx ; *sort_tbl ; sort_tbl++)
10385+
{
10386+
if ((*sort_tbl)->table == join->sort_by_table)
10387+
return sort_tbl;
10388+
}
10389+
}
10390+
return NULL;
10391+
}
10392+
10393+
class Shortcut_opt : public Sql_alloc
10394+
{
10395+
Json_writer_object wrapper;
10396+
Json_writer_object trace;
10397+
Json_writer_array trace_array;
10398+
10399+
public:
10400+
Shortcut_opt(THD *thd):
10401+
wrapper(thd),
10402+
trace(thd, "join_limit_shortcut_optimization"),
10403+
trace_array(thd, "opt")
10404+
{}
10405+
double save_join_record_count;
10406+
double save_best_read;
10407+
POSITION *save_best_pos;
10408+
};
10409+
10410+
10411+
Shortcut_opt *join_start_shortcut_limit_run(JOIN *join)
10412+
{
10413+
THD *thd= join->thd;
10414+
// We expect that there is some query plan already.
10415+
DBUG_ASSERT(join->best_read < DBL_MAX);
10416+
10417+
Shortcut_opt *opt;
10418+
POSITION *pos;
10419+
if (!(opt= new (thd->mem_root) Shortcut_opt(thd)) ||
10420+
!(pos= (POSITION*)alloc_root(thd->mem_root,
10421+
sizeof(POSITION)*
10422+
(join->table_count + 1))))
10423+
return NULL;
10424+
10425+
memcpy((uchar*)pos, (uchar*)join->best_positions,
10426+
sizeof(POSITION)*join->table_count);
10427+
opt->save_join_record_count= join->join_record_count;
10428+
opt->save_best_read= join->best_read;
10429+
opt->save_best_pos= pos;
10430+
10431+
join->best_read= DBL_MAX;
10432+
join->limit_optimization_mode=1;
10433+
return opt;
10434+
}
10435+
10436+
#if 0
10437+
/*
10438+
Check if we could use an index-based access method to produce rows
10439+
in the order for ORDER BY ... LIMIT.
10440+
10441+
This should basically repeat the logic from test_if_skip_sort_order()
10442+
but alas, we cannot call that function here because we didn't do
10443+
the plan fix-up stages in get_best_combination().
10444+
10445+
Also, test_if_skip_sort_order() seems to do destructive modifications
10446+
to query plan structures. That is, one can't call test_if_skip_sort_order()
10447+
and then decide to switch to using a different join order.
10448+
*/
10449+
10450+
void join_get_limit_cost(JOIN *join, ha_rows select_limit)
10451+
{
10452+
// Find which indexes produces the required ordering.
10453+
// If we're using one of them, we're good.
10454+
10455+
// Can we use an index that matches the ordering?
10456+
// to call test_if_cheaper_ordering, we'll need to infer the values for
10457+
/*
10458+
tab->type (check position.type )
10459+
tab->ref.key_parts (walk the keyuse in position->key?)
10460+
tab->ref.const_ref_part_map (walk the keyuse in position->key)
10461+
*/
10462+
10463+
/*
10464+
bool fatal_err;
10465+
test_if_skip_sort_order(tab, join->order, join->select_limit,
10466+
true, // no_changes
10467+
&tab->table->keys_in_use_for_order_by,
10468+
&fatal_err);
10469+
*/
10470+
}
10471+
#endif
10472+
10473+
10474+
/*
10475+
Compute the cost of join assuming we only need fraction_limit
10476+
of the output.
10477+
*/
10478+
10479+
double recompute_join_cost_with_limit(const JOIN *join, double fraction)
10480+
{
10481+
/*
10482+
Generally, we assume that producing X% of output takes X% of the cost.
10483+
*/
10484+
double join_cost_fraction= join->best_read * fraction;
10485+
10486+
/*
10487+
However, if we used filesort() for the first table, we still had to
10488+
read all rows and check the WHERE for them.
10489+
Add the substracted part pack:
10490+
*/
10491+
double extra_first_table_cost=
10492+
join->best_positions[0].read_time * (1.0 - fraction);
10493+
10494+
double extra_first_table_where=
10495+
join->best_positions[0].records_read * (1.0 - fraction) / TIME_FOR_COMPARE;
10496+
10497+
double new_read_time= COST_ADD(join_cost_fraction,
10498+
COST_ADD(extra_first_table_cost,
10499+
extra_first_table_where));
10500+
return new_read_time;
10501+
}
10502+
10503+
10504+
void join_end_shortcut_limit_run(JOIN *join, Shortcut_opt *opt)
10505+
{
10506+
join->limit_optimization_mode= false;
10507+
bool use_shortcut_plan= false;
10508+
10509+
{
10510+
Json_writer_object wrapper(join->thd);
10511+
Json_writer_object trace(join->thd, "limit_shortcut_choice");
10512+
if (join->best_read < DBL_MAX)
10513+
{
10514+
/* We have produced a query plan with a matching join order */
10515+
10516+
/* Check which fraction of join output we need */
10517+
double fraction= 1.0;
10518+
if (join->join_record_count > join->select_limit)
10519+
{
10520+
fraction= join->select_limit / join->join_record_count;
10521+
trace.add("limit_fraction", fraction);
10522+
}
10523+
10524+
/*
10525+
TODO: here, check if the first table's access method produces the
10526+
required ordering.
10527+
Possible options:
10528+
1. Yes: we can just take a fraction of the execution cost.
10529+
2A No: change the access method to one that does produce
10530+
the required ordering, update the costs.
10531+
2B No: Need to pass the first table to filesort().
10532+
*/
10533+
double limited_cost= recompute_join_cost_with_limit(join, fraction);
10534+
trace.add("full_join_cost", join->best_read);
10535+
trace.add("shortcut_join_cost", limited_cost);
10536+
10537+
double needed_speedup = join->thd->variables.optimizer_join_limit_pref_ratio;
10538+
if (limited_cost * needed_speedup < opt->save_best_read)
10539+
{
10540+
// LIMIT plan is cheaper.
10541+
// It is already in join->best_positions so do nothing
10542+
// (TODO: update the cost in join->best_read?)
10543+
// LIMIT short-cutting will be done automatically.
10544+
// test_if_skip_sort_order() may switch to use index-based access
10545+
// method
10546+
use_shortcut_plan= true;
10547+
}
10548+
}
10549+
else
10550+
trace.add("got_shortcut_plan", false);
10551+
10552+
trace.add("using_original_plan", use_shortcut_plan);
10553+
}
10554+
10555+
if (!use_shortcut_plan)
10556+
{
10557+
// Restore back the original plan
10558+
memcpy((uchar*)join->best_positions, (uchar*)opt->save_best_pos,
10559+
sizeof(POSITION)*join->table_count);
10560+
join->join_record_count= opt->save_join_record_count;
10561+
join->best_read= opt->save_best_read;
10562+
}
10563+
delete opt;
10564+
}
10565+
10566+
1033310567
/**
1033410568
Find a good, possibly optimal, query execution plan (QEP) by a possibly
1033510569
exhaustive search.
@@ -10497,6 +10731,8 @@ best_extension_by_limited_search(JOIN *join,
1049710731
if (join->emb_sjm_nest)
1049810732
allowed_tables= join->emb_sjm_nest->sj_inner_tables & ~join->const_table_map;
1049910733

10734+
Shortcut_opt *optimizing_shortcut= NULL;
10735+
1050010736
for (pos= join->best_ref + idx ; (s= *pos) ; pos++)
1050110737
{
1050210738
table_map real_table_bit= s->table->map;
@@ -10685,6 +10921,21 @@ best_extension_by_limited_search(JOIN *join,
1068510921
goto end;
1068610922
}
1068710923
}
10924+
10925+
if (optimizing_shortcut)
10926+
{
10927+
join_end_shortcut_limit_run(join, optimizing_shortcut);
10928+
break;
10929+
}
10930+
10931+
JOIN_TAB **sort_tbl;
10932+
if (!pos[1] && (sort_tbl= join_check_shortcut_limit_now(join, idx)))
10933+
{
10934+
// Do another pass by putting the table of interest first.
10935+
if (!(optimizing_shortcut= join_start_shortcut_limit_run(join)))
10936+
DBUG_RETURN(SEARCH_ERROR);
10937+
pos= sort_tbl - 1;
10938+
}
1068810939
}
1068910940
best_res= SEARCH_OK;
1069010941

sql/sql_select.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,6 +1205,10 @@ class JOIN :public Sql_alloc
12051205
passing 1st non-const table to filesort(). NULL means no such table exists.
12061206
*/
12071207
TABLE *sort_by_table;
1208+
1209+
bool limit_shortcut_applicable;
1210+
bool limit_optimization_mode;
1211+
12081212
/*
12091213
Number of tables in the join.
12101214
(In MySQL, it is named 'tables' and is also the number of elements in

sql/sys_vars.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2702,6 +2702,20 @@ static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit(
27022702
VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX),
27032703
DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1));
27042704

2705+
static Sys_var_ulong Sys_optimizer_join_limit_pref_ratio(
2706+
"optimizer_join_limit_pref_ratio",
2707+
"For queries with JOIN and ORDER BY LIMIT : change the join plan "
2708+
"to one that can short-cut after producing #LIMIT matches if that "
2709+
"promises N times speedup. "
2710+
"(That is, a conservative setting is a high value, like var=100 to "
2711+
"change only if this promises 100x) "
2712+
"The default is 0 which gives old behavior (don't change no matter "
2713+
"what the speedup)",
2714+
SESSION_VAR(optimizer_join_limit_pref_ratio),
2715+
CMD_LINE(REQUIRED_ARG),
2716+
VALID_RANGE(0, UINT_MAX),
2717+
DEFAULT(0), BLOCK_SIZE(1));
2718+
27052719
static Sys_var_ulong Sys_optimizer_use_condition_selectivity(
27062720
"optimizer_use_condition_selectivity",
27072721
"Controls selectivity of which conditions the optimizer takes into "

0 commit comments

Comments
 (0)