From af83e9a907756dc62efdd938f784356d1b528c9d Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Sat, 17 Aug 2024 16:27:38 +0200 Subject: [PATCH] Selection: refined heuristic for if-conversion Turn if-conversion off in some cases where it would prevent later optimization of conditional branches in the continuation of the `if`. --- backend/Selectionaux.ml | 102 +++++++++++++++++++++++++++++++++------- 1 file changed, 84 insertions(+), 18 deletions(-) diff --git a/backend/Selectionaux.ml b/backend/Selectionaux.ml index 0d3bcb3de..ec0331740 100644 --- a/backend/Selectionaux.ml +++ b/backend/Selectionaux.ml @@ -81,35 +81,101 @@ let fast_cmove ty = | _, _ -> assert false -(* The if-conversion heuristic depend on the - -fif-conversion and -Obranchless flags. +(* The if-conversion heuristic depend on the [-fif-conversion] +and [-Obranchless] flags. With [-fno-if-conversion] or [-0O], if-conversion is turned off entirely. + With [-Obranchless], if-conversion is performed whenever semantically correct, regardless of how much it could cost. + Otherwise (and by default), optimization is performed when it seems beneficial. If-conversion seems beneficial if: - the target architecture supports an efficient "conditional move" instruction - (not an emulation that takes several instructions) -- the total cost the "then" and "else" branches is not too high -- the cost difference between the "then" and "else" branches is low enough. - -Intuition: on a modern processor, the "then" and the "else" branches -can generally be computed in parallel, there is enough ILP for that. -So, the bad case is if the most taken branch is much cheaper than the -other branch. Another bad case is if both branches are big: since the -code for one branch precedes entirely the code for the other branch, -if the first branch contains a lot of instructions, -dynamic reordering of instructions will not look ahead far enough -to execute instructions from the other branch in parallel with -instructions from the first branch. + (not an emulation that takes several instructions); +- the total cost the "then" and "else" branches is not too high; +- the cost difference between the "then" and "else" branches is low enough; +- if-conversion will not inhibit further optimization such as + the [successor] optimization in [Constprop]. + +Intuitions for the two cost criteria: + +On a modern processor, the "then" and the "else" branches can +generally be computed in parallel, there is enough ILP for that. +But, if one of the branches is much cheaper than the other, and happens +to be the most taken branch, we're wasting CPU time computing the other +branch every time. + +Another bad case is if both branches are big: since the code for one +branch precedes entirely the code for the other branch, if the first +branch contains a lot of instructions, dynamic reordering of +instructions will not look ahead far enough to execute instructions +from the other branch in parallel with instructions from the first +branch. +*) + +let cost_criterion ifso ifnot = + let c1 = cost_expr ifso and c2 = cost_expr ifnot in + c1 + c2 <= 24 && abs (c1 - c2) <= 8 + +(* +Intuition for the later optimization that should not be prevented: + +Consider the C code +<< + if (c1 && c2) goto lbl1; else goto lbl2; +>> +where [c1] and [c2] are simple comparisons. The corresponding Cminor code is +<< + if (c1) t = (_Bool) c2; else t = 0; + if (t != 0) goto lbl1; else goto lbl2; +>> +Without if-conversion of the first [if], the Constprop and CSE/CombineOp +passes manage to produce RTL code equivalent to +<< + if (c1) { if (c2) goto lbl1; else goto lbl2;} else goto lbl2; +>> +With if-conversion of the first [if], we obtain the following RTL code +<< + t = select(c1, (_Bool) c2, 0); + if (t != 0) goto lbl1; else goto lbl2; +>> +which is generally less efficient, as the conversion of [c2] to a Boolean value +and the selection on [c1] take significantly more instructions than +two conditional branches on [c1] and [c2]. + +We recognize the following pattern, for which we turn if-conversion off: +- one of the arms of the [if] statement is [t := constant]; +- just after the [if] statement comes a [if (t cmp constant)] conditional + statement. + +For this pattern, we know that the [successor] optimization in [Constprop] +will generate a direct jump from the [t := constant] to the appropriate +arm of the [if (t cmp constant)] conditional. *) -let if_conversion_heuristic cond ifso ifnot ty kont = +let is_const = function + | Econst(Ointconst _) -> true + | _ -> false + +let rec is_if (k : CminorSel.stmt) = + let open! CminorSel in + match k with + | Sifthenelse(CEcond(Op.Ccompuimm _, Econs (Evar id, Enil)), _, _) -> Some id + | Sseq(Sskip, s2) -> is_if s2 + | Sseq(Sbuiltin(_, EF_debug _, _), s2) -> is_if s2 + | Sseq(s1, s2) -> is_if s1 + | _ -> None + +let optimization_criterion id ifso ifnot kont = + match is_if kont with + | Some id' -> not (id' = id && (is_const ifso || is_const ifnot)) + | _ -> true + +let if_conversion_heuristic id cond ifso ifnot ty kont = if not !Clflags.option_fifconversion then false else if !Clflags.option_Obranchless then true else if not (fast_cmove ty) then false else - let c1 = cost_expr ifso and c2 = cost_expr ifnot in - c1 + c2 <= 24 && abs (c1 - c2) <= 8 + cost_criterion ifso ifnot && optimization_criterion id ifso ifnot kont