Skip to content

Commit 49eef91

Browse files
authored
[interp] Squash multiple call args moves into single opcode (#52242)
* [interp] Replace multiplication and division by 1 with simple mov * [interp] Skip emitting redundant branch to next basic block * [interp] Squash multiple call args moves into single opcode Some vars cannot be used directly as an argument to another call. In this case, the var offset allocator generates new intermediary vars. For methods with a lot of parameters, we can end up with quite a lot of these stores. As an example, for the following method: ``` public static void MethodPartial (int a, int b, object c, object d) { MethodFull (a, b, c, d, 12523); } ``` Before: ``` IR_0000: ldc.i8 [72 <- nil], 12523 IR_0006: mov.4 [40 <- 0], IR_0009: mov.4 [48 <- 8], IR_000c: mov.8 [56 <- 16], IR_000f: mov.8 [64 <- 24], IR_0012: call [32 <- 40], 0 IR_0016: ret.void [nil <- nil], ``` After: ``` IR_0000: ldc.i8 [72 <- nil], 12523 IR_0006: mov.8.4 [nil <- nil], 40 <- 0, 48 <- 8, 56 <- 16, 64 <- 24 IR_000f: call [32 <- 40], 0 IR_0013: ret.void [nil <- nil] ```
1 parent a50f309 commit 49eef91

File tree

4 files changed

+138
-15
lines changed

4 files changed

+138
-15
lines changed

src/mono/mono/mini/interp/interp.c

+19
Original file line numberDiff line numberDiff line change
@@ -6576,6 +6576,25 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
65766576
MINT_IN_BREAK;
65776577
}
65786578

6579+
MINT_IN_CASE(MINT_MOV_8_2)
6580+
LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64);
6581+
LOCAL_VAR (ip [3], guint64) = LOCAL_VAR (ip [4], guint64);
6582+
ip += 5;
6583+
MINT_IN_BREAK;
6584+
MINT_IN_CASE(MINT_MOV_8_3)
6585+
LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64);
6586+
LOCAL_VAR (ip [3], guint64) = LOCAL_VAR (ip [4], guint64);
6587+
LOCAL_VAR (ip [5], guint64) = LOCAL_VAR (ip [6], guint64);
6588+
ip += 7;
6589+
MINT_IN_BREAK;
6590+
MINT_IN_CASE(MINT_MOV_8_4)
6591+
LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64);
6592+
LOCAL_VAR (ip [3], guint64) = LOCAL_VAR (ip [4], guint64);
6593+
LOCAL_VAR (ip [5], guint64) = LOCAL_VAR (ip [6], guint64);
6594+
LOCAL_VAR (ip [7], guint64) = LOCAL_VAR (ip [8], guint64);
6595+
ip += 9;
6596+
MINT_IN_BREAK;
6597+
65796598
MINT_IN_CASE(MINT_LOCALLOC) {
65806599
int len = LOCAL_VAR (ip [2], gint32);
65816600
gpointer mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT));

src/mono/mono/mini/interp/mintops.def

+6
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ OPDEF(MINT_MOV_4, "mov.4", 3, 1, 1, MintOpNoArgs)
108108
OPDEF(MINT_MOV_8, "mov.8", 3, 1, 1, MintOpNoArgs)
109109
OPDEF(MINT_MOV_VT, "mov.vt", 4, 1, 1, MintOpShortInt)
110110

111+
// These opcodes represent multiple moves stacked together. They have multiple src and dst
112+
// but they are not represented here. They are generated by the var offset allocator.
113+
OPDEF(MINT_MOV_8_2, "mov.8.2", 5, 0, 0, MintOpPair2)
114+
OPDEF(MINT_MOV_8_3, "mov.8.3", 7, 0, 0, MintOpPair3)
115+
OPDEF(MINT_MOV_8_4, "mov.8.4", 9, 0, 0, MintOpPair4)
116+
111117
OPDEF(MINT_LDLOCA_S, "ldloca.s", 3, 1, 0, MintOpUShortInt)
112118

113119
OPDEF(MINT_LDIND_I1, "ldind.i1", 3, 1, 1, MintOpNoArgs)

src/mono/mono/mini/interp/mintops.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ typedef enum
2424
MintOpClassToken,
2525
MintOpTwoShorts,
2626
MintOpShortAndInt,
27-
MintOpShortAndShortBranch
27+
MintOpShortAndShortBranch,
28+
MintOpPair2,
29+
MintOpPair3,
30+
MintOpPair4
2831
} MintOpArgType;
2932

3033
#define OPDEF(a,b,c,d,e,f) a,
@@ -74,6 +77,8 @@ typedef enum {
7477
#define MINT_CALL_ARGS 2
7578
#define MINT_CALL_ARGS_SREG -2
7679

80+
#define MINT_MOV_PAIRS_MAX 4
81+
7782
extern unsigned char const mono_interp_oplen[];
7883
extern int const mono_interp_op_dregs [];
7984
extern int const mono_interp_op_sregs [];

src/mono/mono/mini/interp/transform.c

+107-14
Original file line numberDiff line numberDiff line change
@@ -1411,6 +1411,14 @@ dump_interp_ins_data (InterpInst *ins, gint32 ins_offset, const guint16 *data, g
14111411
target = ins_offset + *(gint16*)(data + 1);
14121412
g_string_append_printf (str, " %u, IR_%04x", *(guint16*)data, target);
14131413
}
1414+
case MintOpPair2:
1415+
g_string_append_printf (str, " %u <- %u, %u <- %u", data [0], data [1], data [2], data [3]);
1416+
break;
1417+
case MintOpPair3:
1418+
g_string_append_printf (str, " %u <- %u, %u <- %u, %u <- %u", data [0], data [1], data [2], data [3], data [4], data [5]);
1419+
break;
1420+
case MintOpPair4:
1421+
g_string_append_printf (str, " %u <- %u, %u <- %u, %u <- %u, %u <- %u", data [0], data [1], data [2], data [3], data [4], data [5], data [6], data [7]);
14141422
break;
14151423
default:
14161424
g_string_append_printf (str, "unknown arg type\n");
@@ -7549,6 +7557,9 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
75497557
if (ins->info.target_bb->native_offset >= 0) {
75507558
// Backwards branch. We can already patch it.
75517559
*ip++ = ins->info.target_bb->native_offset - br_offset;
7560+
} else if (opcode == MINT_BR_S && ins->info.target_bb == td->cbb->next_bb) {
7561+
// Ignore branch to the next basic block. Revert the added MINT_BR_S.
7562+
ip--;
75527563
} else {
75537564
// We don't know the in_offset of the target, add a reloc
75547565
Reloc *reloc = (Reloc*)mono_mempool_alloc0 (td->mempool, sizeof (Reloc));
@@ -7647,6 +7658,12 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
76477658
for (int i = size - 1; i < (jit_call2_size - 1); i++)
76487659
*ip++ = MINT_NIY;
76497660
#endif
7661+
} else if (opcode >= MINT_MOV_8_2 && opcode <= MINT_MOV_8_4) {
7662+
// This instruction is not marked as operating on any vars, all instruction slots are
7663+
// actually vas. Resolve their offset
7664+
int num_vars = mono_interp_oplen [opcode] - 1;
7665+
for (int i = 0; i < num_vars; i++)
7666+
*ip++ = td->locals [ins->data [i]].offset;
76507667
} else {
76517668
if (mono_interp_op_dregs [opcode])
76527669
*ip++ = td->locals [ins->dreg].offset;
@@ -7696,6 +7713,7 @@ generate_compacted_code (TransformData *td)
76967713
for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
76977714
InterpInst *ins = bb->first_ins;
76987715
bb->native_offset = ip - td->new_code;
7716+
td->cbb = bb;
76997717
while (ins) {
77007718
ip = emit_compacted_instruction (td, ip, ins);
77017719
ins = ins->next;
@@ -7984,7 +8002,7 @@ interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, LocalValue *
79848002

79858003

79868004
static InterpInst*
7987-
interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins)
8005+
interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins, gboolean *folded)
79888006
{
79898007
int *local_ref_count = td->local_ref_count;
79908008
// ins should be a binop, therefore it should have a single dreg and two sregs
@@ -7995,6 +8013,8 @@ interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins)
79958013
LocalValue *val2 = &local_defs [sreg2];
79968014
LocalValue result;
79978015

8016+
*folded = FALSE;
8017+
79988018
if (val1->type != LOCAL_VALUE_I4 && val1->type != LOCAL_VALUE_I8)
79998019
return ins;
80008020
if (val2->type != LOCAL_VALUE_I4 && val2->type != LOCAL_VALUE_I8)
@@ -8066,7 +8086,7 @@ interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins)
80668086
// with a LDC of the constant. We leave alone the sregs of this instruction, for
80678087
// deadce to kill the instructions initializing them.
80688088
mono_interp_stats.constant_folds++;
8069-
8089+
*folded = TRUE;
80708090
if (result.type == LOCAL_VALUE_I4)
80718091
ins = interp_get_ldc_i4_from_const (td, ins, result.i, dreg);
80728092
else if (result.type == LOCAL_VALUE_I8)
@@ -8341,7 +8361,42 @@ interp_cprop (TransformData *td)
83418361
} else if (MINT_IS_UNOP_CONDITIONAL_BRANCH (opcode)) {
83428362
ins = interp_fold_unop_cond_br (td, bb, local_defs, ins);
83438363
} else if (MINT_IS_BINOP (opcode)) {
8344-
ins = interp_fold_binop (td, local_defs, ins);
8364+
gboolean folded;
8365+
ins = interp_fold_binop (td, local_defs, ins, &folded);
8366+
if (!folded) {
8367+
int sreg = -1;
8368+
int mov_op;
8369+
if ((opcode == MINT_MUL_I4 || opcode == MINT_DIV_I4) &&
8370+
local_defs [ins->sregs [1]].type == LOCAL_VALUE_I4 &&
8371+
local_defs [ins->sregs [1]].i == 1) {
8372+
sreg = ins->sregs [0];
8373+
mov_op = MINT_MOV_4;
8374+
} else if ((opcode == MINT_MUL_I8 || opcode == MINT_DIV_I8) &&
8375+
local_defs [ins->sregs [1]].type == LOCAL_VALUE_I8 &&
8376+
local_defs [ins->sregs [1]].l == 1) {
8377+
sreg = ins->sregs [0];
8378+
mov_op = MINT_MOV_8;
8379+
} else if (opcode == MINT_MUL_I4 &&
8380+
local_defs [ins->sregs [0]].type == LOCAL_VALUE_I4 &&
8381+
local_defs [ins->sregs [0]].i == 1) {
8382+
sreg = ins->sregs [1];
8383+
mov_op = MINT_MOV_4;
8384+
} else if (opcode == MINT_MUL_I8 &&
8385+
local_defs [ins->sregs [0]].type == LOCAL_VALUE_I8 &&
8386+
local_defs [ins->sregs [0]].l == 1) {
8387+
sreg = ins->sregs [1];
8388+
mov_op = MINT_MOV_8;
8389+
}
8390+
if (sreg != -1) {
8391+
ins->opcode = mov_op;
8392+
ins->sregs [0] = sreg;
8393+
if (td->verbose_level) {
8394+
g_print ("Replace idempotent binop :\n\t");
8395+
dump_interp_inst (ins);
8396+
}
8397+
needs_retry = TRUE;
8398+
}
8399+
}
83458400
} else if (MINT_IS_BINOP_CONDITIONAL_BRANCH (opcode)) {
83468401
ins = interp_fold_binop_cond_br (td, bb, local_defs, ins);
83478402
} else if (MINT_IS_LDFLD (opcode) && ins->data [0] == 0) {
@@ -9105,7 +9160,11 @@ interp_alloc_offsets (TransformData *td)
91059160
if (ins->flags & INTERP_INST_FLAG_CALL) {
91069161
int *call_args = ins->info.call_args;
91079162
if (call_args) {
9163+
int pair_sregs [MINT_MOV_PAIRS_MAX];
9164+
int pair_dregs [MINT_MOV_PAIRS_MAX];
9165+
int num_pairs = 0;
91089166
int var = *call_args;
9167+
91099168
while (var != -1) {
91109169
if (td->locals [var].flags & INTERP_LOCAL_FLAG_GLOBAL ||
91119170
td->locals [var].flags & INTERP_LOCAL_FLAG_NO_CALL_ARGS) {
@@ -9114,17 +9173,27 @@ interp_alloc_offsets (TransformData *td)
91149173
int new_var = create_interp_local (td, td->locals [var].type);
91159174
td->locals [new_var].call = ins;
91169175
td->locals [new_var].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
9117-
int opcode = get_mov_for_type (mint_type (td->locals [var].type), FALSE);
9118-
InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode);
9119-
interp_ins_set_dreg (new_inst, new_var);
9120-
interp_ins_set_sreg (new_inst, var);
9121-
if (opcode == MINT_MOV_VT)
9122-
new_inst->data [0] = td->locals [var].size;
9123-
// The arg of the call is no longer global
9124-
*call_args = new_var;
9125-
// Also update liveness for this instruction
9126-
foreach_local_var (td, new_inst, ins_index, set_var_live_range);
9127-
ins_index++;
9176+
9177+
int mt = mint_type (td->locals [var].type);
9178+
if (mt != MINT_TYPE_VT && num_pairs < MINT_MOV_PAIRS_MAX) {
9179+
pair_sregs [num_pairs] = var;
9180+
pair_dregs [num_pairs] = new_var;
9181+
num_pairs++;
9182+
// The arg of the call is no longer global
9183+
*call_args = new_var;
9184+
} else {
9185+
int opcode = get_mov_for_type (mt, FALSE);
9186+
InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode);
9187+
interp_ins_set_dreg (new_inst, new_var);
9188+
interp_ins_set_sreg (new_inst, var);
9189+
if (opcode == MINT_MOV_VT)
9190+
new_inst->data [0] = td->locals [var].size;
9191+
// The arg of the call is no longer global
9192+
*call_args = new_var;
9193+
// Also update liveness for this instruction
9194+
foreach_local_var (td, new_inst, ins_index, set_var_live_range);
9195+
ins_index++;
9196+
}
91289197
} else {
91299198
// Flag this var as it has special storage on the call args stack
91309199
td->locals [var].call = ins;
@@ -9133,6 +9202,30 @@ interp_alloc_offsets (TransformData *td)
91339202
call_args++;
91349203
var = *call_args;
91359204
}
9205+
if (num_pairs > 0) {
9206+
int i;
9207+
for (i = 0; i < num_pairs; i++) {
9208+
set_var_live_range (td, pair_sregs [i], ins_index);
9209+
set_var_live_range (td, pair_dregs [i], ins_index);
9210+
}
9211+
if (num_pairs == 1) {
9212+
int mt = mint_type (td->locals [pair_sregs [0]].type);
9213+
int opcode = get_mov_for_type (mt, FALSE);
9214+
InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode);
9215+
interp_ins_set_dreg (new_inst, pair_dregs [0]);
9216+
interp_ins_set_sreg (new_inst, pair_sregs [0]);
9217+
} else {
9218+
// Squash together multiple moves to the param area into a single opcode
9219+
int opcode = MINT_MOV_8_2 + num_pairs - 2;
9220+
InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode);
9221+
int k = 0;
9222+
for (i = 0; i < num_pairs; i++) {
9223+
new_inst->data [k++] = pair_dregs [i];
9224+
new_inst->data [k++] = pair_sregs [i];
9225+
}
9226+
}
9227+
ins_index++;
9228+
}
91369229
}
91379230
}
91389231
// Set live_start and live_end for every referenced local that is not global

0 commit comments

Comments
 (0)