Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[interp] Squash multiple call args moves into single opcode #52242

Merged
merged 3 commits into from
May 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/mono/mono/mini/interp/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -6576,6 +6576,25 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
MINT_IN_BREAK;
}

MINT_IN_CASE(MINT_MOV_8_2)
LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64);
LOCAL_VAR (ip [3], guint64) = LOCAL_VAR (ip [4], guint64);
ip += 5;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_MOV_8_3)
LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64);
LOCAL_VAR (ip [3], guint64) = LOCAL_VAR (ip [4], guint64);
LOCAL_VAR (ip [5], guint64) = LOCAL_VAR (ip [6], guint64);
ip += 7;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_MOV_8_4)
LOCAL_VAR (ip [1], guint64) = LOCAL_VAR (ip [2], guint64);
LOCAL_VAR (ip [3], guint64) = LOCAL_VAR (ip [4], guint64);
LOCAL_VAR (ip [5], guint64) = LOCAL_VAR (ip [6], guint64);
LOCAL_VAR (ip [7], guint64) = LOCAL_VAR (ip [8], guint64);
ip += 9;
MINT_IN_BREAK;

MINT_IN_CASE(MINT_LOCALLOC) {
int len = LOCAL_VAR (ip [2], gint32);
gpointer mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT));
Expand Down
6 changes: 6 additions & 0 deletions src/mono/mono/mini/interp/mintops.def
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ OPDEF(MINT_MOV_4, "mov.4", 3, 1, 1, MintOpNoArgs)
OPDEF(MINT_MOV_8, "mov.8", 3, 1, 1, MintOpNoArgs)
OPDEF(MINT_MOV_VT, "mov.vt", 4, 1, 1, MintOpShortInt)

// These opcodes represent multiple moves stacked together. They have multiple src and dst
// but they are not represented here. They are generated by the var offset allocator.
OPDEF(MINT_MOV_8_2, "mov.8.2", 5, 0, 0, MintOpPair2)
OPDEF(MINT_MOV_8_3, "mov.8.3", 7, 0, 0, MintOpPair3)
OPDEF(MINT_MOV_8_4, "mov.8.4", 9, 0, 0, MintOpPair4)

OPDEF(MINT_LDLOCA_S, "ldloca.s", 3, 1, 0, MintOpUShortInt)

OPDEF(MINT_LDIND_I1, "ldind.i1", 3, 1, 1, MintOpNoArgs)
Expand Down
7 changes: 6 additions & 1 deletion src/mono/mono/mini/interp/mintops.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ typedef enum
MintOpClassToken,
MintOpTwoShorts,
MintOpShortAndInt,
MintOpShortAndShortBranch
MintOpShortAndShortBranch,
MintOpPair2,
MintOpPair3,
MintOpPair4
} MintOpArgType;

#define OPDEF(a,b,c,d,e,f) a,
Expand Down Expand Up @@ -74,6 +77,8 @@ typedef enum {
#define MINT_CALL_ARGS 2
#define MINT_CALL_ARGS_SREG -2

#define MINT_MOV_PAIRS_MAX 4

extern unsigned char const mono_interp_oplen[];
extern int const mono_interp_op_dregs [];
extern int const mono_interp_op_sregs [];
Expand Down
121 changes: 107 additions & 14 deletions src/mono/mono/mini/interp/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -1411,6 +1411,14 @@ dump_interp_ins_data (InterpInst *ins, gint32 ins_offset, const guint16 *data, g
target = ins_offset + *(gint16*)(data + 1);
g_string_append_printf (str, " %u, IR_%04x", *(guint16*)data, target);
}
case MintOpPair2:
g_string_append_printf (str, " %u <- %u, %u <- %u", data [0], data [1], data [2], data [3]);
break;
case MintOpPair3:
g_string_append_printf (str, " %u <- %u, %u <- %u, %u <- %u", data [0], data [1], data [2], data [3], data [4], data [5]);
break;
case MintOpPair4:
g_string_append_printf (str, " %u <- %u, %u <- %u, %u <- %u, %u <- %u", data [0], data [1], data [2], data [3], data [4], data [5], data [6], data [7]);
break;
default:
g_string_append_printf (str, "unknown arg type\n");
Expand Down Expand Up @@ -7549,6 +7557,9 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
if (ins->info.target_bb->native_offset >= 0) {
// Backwards branch. We can already patch it.
*ip++ = ins->info.target_bb->native_offset - br_offset;
} else if (opcode == MINT_BR_S && ins->info.target_bb == td->cbb->next_bb) {
// Ignore branch to the next basic block. Revert the added MINT_BR_S.
ip--;
} else {
// We don't know the in_offset of the target, add a reloc
Reloc *reloc = (Reloc*)mono_mempool_alloc0 (td->mempool, sizeof (Reloc));
Expand Down Expand Up @@ -7647,6 +7658,12 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
for (int i = size - 1; i < (jit_call2_size - 1); i++)
*ip++ = MINT_NIY;
#endif
} else if (opcode >= MINT_MOV_8_2 && opcode <= MINT_MOV_8_4) {
// This instruction is not marked as operating on any vars, all instruction slots are
// actually vas. Resolve their offset
int num_vars = mono_interp_oplen [opcode] - 1;
for (int i = 0; i < num_vars; i++)
*ip++ = td->locals [ins->data [i]].offset;
} else {
if (mono_interp_op_dregs [opcode])
*ip++ = td->locals [ins->dreg].offset;
Expand Down Expand Up @@ -7696,6 +7713,7 @@ generate_compacted_code (TransformData *td)
for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) {
InterpInst *ins = bb->first_ins;
bb->native_offset = ip - td->new_code;
td->cbb = bb;
while (ins) {
ip = emit_compacted_instruction (td, ip, ins);
ins = ins->next;
Expand Down Expand Up @@ -7984,7 +8002,7 @@ interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, LocalValue *


static InterpInst*
interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins)
interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins, gboolean *folded)
{
int *local_ref_count = td->local_ref_count;
// ins should be a binop, therefore it should have a single dreg and two sregs
Expand All @@ -7995,6 +8013,8 @@ interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins)
LocalValue *val2 = &local_defs [sreg2];
LocalValue result;

*folded = FALSE;

if (val1->type != LOCAL_VALUE_I4 && val1->type != LOCAL_VALUE_I8)
return ins;
if (val2->type != LOCAL_VALUE_I4 && val2->type != LOCAL_VALUE_I8)
Expand Down Expand Up @@ -8066,7 +8086,7 @@ interp_fold_binop (TransformData *td, LocalValue *local_defs, InterpInst *ins)
// with a LDC of the constant. We leave alone the sregs of this instruction, for
// deadce to kill the instructions initializing them.
mono_interp_stats.constant_folds++;

*folded = TRUE;
if (result.type == LOCAL_VALUE_I4)
ins = interp_get_ldc_i4_from_const (td, ins, result.i, dreg);
else if (result.type == LOCAL_VALUE_I8)
Expand Down Expand Up @@ -8341,7 +8361,42 @@ interp_cprop (TransformData *td)
} else if (MINT_IS_UNOP_CONDITIONAL_BRANCH (opcode)) {
ins = interp_fold_unop_cond_br (td, bb, local_defs, ins);
} else if (MINT_IS_BINOP (opcode)) {
ins = interp_fold_binop (td, local_defs, ins);
gboolean folded;
ins = interp_fold_binop (td, local_defs, ins, &folded);
if (!folded) {
int sreg = -1;
int mov_op;
if ((opcode == MINT_MUL_I4 || opcode == MINT_DIV_I4) &&
local_defs [ins->sregs [1]].type == LOCAL_VALUE_I4 &&
local_defs [ins->sregs [1]].i == 1) {
sreg = ins->sregs [0];
mov_op = MINT_MOV_4;
} else if ((opcode == MINT_MUL_I8 || opcode == MINT_DIV_I8) &&
local_defs [ins->sregs [1]].type == LOCAL_VALUE_I8 &&
local_defs [ins->sregs [1]].l == 1) {
sreg = ins->sregs [0];
mov_op = MINT_MOV_8;
} else if (opcode == MINT_MUL_I4 &&
local_defs [ins->sregs [0]].type == LOCAL_VALUE_I4 &&
local_defs [ins->sregs [0]].i == 1) {
sreg = ins->sregs [1];
mov_op = MINT_MOV_4;
} else if (opcode == MINT_MUL_I8 &&
local_defs [ins->sregs [0]].type == LOCAL_VALUE_I8 &&
local_defs [ins->sregs [0]].l == 1) {
sreg = ins->sregs [1];
mov_op = MINT_MOV_8;
}
if (sreg != -1) {
ins->opcode = mov_op;
ins->sregs [0] = sreg;
if (td->verbose_level) {
g_print ("Replace idempotent binop :\n\t");
dump_interp_inst (ins);
}
needs_retry = TRUE;
}
}
} else if (MINT_IS_BINOP_CONDITIONAL_BRANCH (opcode)) {
ins = interp_fold_binop_cond_br (td, bb, local_defs, ins);
} else if (MINT_IS_LDFLD (opcode) && ins->data [0] == 0) {
Expand Down Expand Up @@ -9105,7 +9160,11 @@ interp_alloc_offsets (TransformData *td)
if (ins->flags & INTERP_INST_FLAG_CALL) {
int *call_args = ins->info.call_args;
if (call_args) {
int pair_sregs [MINT_MOV_PAIRS_MAX];
int pair_dregs [MINT_MOV_PAIRS_MAX];
int num_pairs = 0;
int var = *call_args;

while (var != -1) {
if (td->locals [var].flags & INTERP_LOCAL_FLAG_GLOBAL ||
td->locals [var].flags & INTERP_LOCAL_FLAG_NO_CALL_ARGS) {
Expand All @@ -9114,17 +9173,27 @@ interp_alloc_offsets (TransformData *td)
int new_var = create_interp_local (td, td->locals [var].type);
td->locals [new_var].call = ins;
td->locals [new_var].flags |= INTERP_LOCAL_FLAG_CALL_ARGS;
int opcode = get_mov_for_type (mint_type (td->locals [var].type), FALSE);
InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode);
interp_ins_set_dreg (new_inst, new_var);
interp_ins_set_sreg (new_inst, var);
if (opcode == MINT_MOV_VT)
new_inst->data [0] = td->locals [var].size;
// The arg of the call is no longer global
*call_args = new_var;
// Also update liveness for this instruction
foreach_local_var (td, new_inst, ins_index, set_var_live_range);
ins_index++;

int mt = mint_type (td->locals [var].type);
if (mt != MINT_TYPE_VT && num_pairs < MINT_MOV_PAIRS_MAX) {
pair_sregs [num_pairs] = var;
pair_dregs [num_pairs] = new_var;
num_pairs++;
// The arg of the call is no longer global
*call_args = new_var;
} else {
int opcode = get_mov_for_type (mt, FALSE);
InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode);
interp_ins_set_dreg (new_inst, new_var);
interp_ins_set_sreg (new_inst, var);
if (opcode == MINT_MOV_VT)
new_inst->data [0] = td->locals [var].size;
// The arg of the call is no longer global
*call_args = new_var;
// Also update liveness for this instruction
foreach_local_var (td, new_inst, ins_index, set_var_live_range);
ins_index++;
}
} else {
// Flag this var as it has special storage on the call args stack
td->locals [var].call = ins;
Expand All @@ -9133,6 +9202,30 @@ interp_alloc_offsets (TransformData *td)
call_args++;
var = *call_args;
}
if (num_pairs > 0) {
int i;
for (i = 0; i < num_pairs; i++) {
set_var_live_range (td, pair_sregs [i], ins_index);
set_var_live_range (td, pair_dregs [i], ins_index);
}
if (num_pairs == 1) {
int mt = mint_type (td->locals [pair_sregs [0]].type);
int opcode = get_mov_for_type (mt, FALSE);
InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode);
interp_ins_set_dreg (new_inst, pair_dregs [0]);
interp_ins_set_sreg (new_inst, pair_sregs [0]);
} else {
// Squash together multiple moves to the param area into a single opcode
int opcode = MINT_MOV_8_2 + num_pairs - 2;
InterpInst *new_inst = interp_insert_ins_bb (td, bb, ins->prev, opcode);
int k = 0;
for (i = 0; i < num_pairs; i++) {
new_inst->data [k++] = pair_dregs [i];
new_inst->data [k++] = pair_sregs [i];
}
}
ins_index++;
}
}
}
// Set live_start and live_end for every referenced local that is not global
Expand Down