diff --git a/src/mono/browser/runtime/jiterpreter-trace-generator.ts b/src/mono/browser/runtime/jiterpreter-trace-generator.ts index 616792ffec25ba..f84bb6a662f4a0 100644 --- a/src/mono/browser/runtime/jiterpreter-trace-generator.ts +++ b/src/mono/browser/runtime/jiterpreter-trace-generator.ts @@ -1098,18 +1098,6 @@ export function generateWasmBody( break; } - case MintOpcode.MINT_NEWOBJ_VT_INLINED: { - const ret_size = getArgU16(ip, 3); - // memset (this_vt, 0, ret_size); - append_ldloca(builder, getArgU16(ip, 2), ret_size); - append_memset_dest(builder, 0, ret_size); - // LOCAL_VAR (ip [1], gpointer) = this_vt; - builder.local("pLocals"); - append_ldloca(builder, getArgU16(ip, 2), ret_size); - append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store); - break; - } - case MintOpcode.MINT_NEWOBJ: case MintOpcode.MINT_NEWOBJ_VT: case MintOpcode.MINT_CALLVIRT_FAST: diff --git a/src/mono/mono/mini/interp/interp-internals.h b/src/mono/mono/mini/interp/interp-internals.h index 72ef097eac22d7..8c4fe67b002abf 100644 --- a/src/mono/mono/mini/interp/interp-internals.h +++ b/src/mono/mono/mini/interp/interp-internals.h @@ -275,6 +275,13 @@ typedef struct { typedef struct { gint64 transform_time; gint64 methods_transformed; + gint64 optimize_time; + gint64 ssa_compute_time; + gint64 ssa_compute_dominance_time; + gint64 ssa_compute_global_vars_time; + gint64 ssa_compute_pruned_liveness_time; + gint64 ssa_rename_vars_time; + gint64 optimize_bblocks_time; gint64 cprop_time; gint64 super_instructions_time; gint32 emitted_instructions; diff --git a/src/mono/mono/mini/interp/interp-intrins.c b/src/mono/mono/mini/interp/interp-intrins.c index c9e7b5ba2a7e27..1489ddfa09c4a2 100644 --- a/src/mono/mono/mini/interp/interp-intrins.c +++ b/src/mono/mono/mini/interp/interp-intrins.c @@ -16,7 +16,7 @@ rotate_left (guint32 value, int offset) } void -interp_intrins_marvin_block (guint32 *pp0, guint32 *pp1) +interp_intrins_marvin_block (guint32 *pp0, guint32 *pp1, guint32 *dest0, guint32 *dest1) { // Marvin.Block guint32 p0 = *pp0; @@ -34,8 +34,8 @@ interp_intrins_marvin_block (guint32 *pp0, guint32 *pp1) p0 += p1; p1 = rotate_left (p1, 19); - *pp0 = p0; - *pp1 = p1; + *dest0 = p0; + *dest1 = p1; } guint32 diff --git a/src/mono/mono/mini/interp/interp-intrins.h b/src/mono/mono/mini/interp/interp-intrins.h index 1e3b218af927a5..98229f26d41361 100644 --- a/src/mono/mono/mini/interp/interp-intrins.h +++ b/src/mono/mono/mini/interp/interp-intrins.h @@ -124,7 +124,7 @@ interp_intrins_popcount_i8 (guint64 val) #endif void -interp_intrins_marvin_block (guint32 *pp0, guint32 *pp1); +interp_intrins_marvin_block (guint32 *pp0, guint32 *pp1, guint32 *dest0, guint32 *dest1); guint32 interp_intrins_ascii_chars_to_uppercase (guint32 val); diff --git a/src/mono/mono/mini/interp/interp-simd.c b/src/mono/mono/mini/interp/interp-simd.c index edaa7615652e52..cdc6a951c5f92a 100644 --- a/src/mono/mono/mini/interp/interp-simd.c +++ b/src/mono/mono/mini/interp/interp-simd.c @@ -315,47 +315,15 @@ interp_v128_u2_widen_upper (gpointer res, gpointer v1) static void interp_v128_u1_narrow (gpointer res, gpointer v1, gpointer v2) { - guint8 *res_typed = (guint8*)res; + guint8 res_typed [SIZEOF_V128]; guint16 *v1_typed = (guint16*)v1; guint16 *v2_typed = (guint16*)v2; - if (res != v2) { - res_typed [0] = v1_typed [0]; - res_typed [1] = v1_typed [1]; - res_typed [2] = v1_typed [2]; - res_typed [3] = v1_typed [3]; - res_typed [4] = v1_typed [4]; - res_typed [5] = v1_typed [5]; - res_typed [6] = v1_typed [6]; - res_typed [7] = v1_typed [7]; - - res_typed [8] = v2_typed [0]; - res_typed [9] = v2_typed [1]; - res_typed [10] = v2_typed [2]; - res_typed [11] = v2_typed [3]; - res_typed [12] = v2_typed [4]; - res_typed [13] = v2_typed [5]; - res_typed [14] = v2_typed [6]; - res_typed [15] = v2_typed [7]; - } else { - res_typed [15] = v2_typed [7]; - res_typed [14] = v2_typed [6]; - res_typed [13] = v2_typed [5]; - res_typed [12] = v2_typed [4]; - res_typed [11] = v2_typed [3]; - res_typed [10] = v2_typed [2]; - res_typed [9] = v2_typed [1]; - res_typed [8] = v2_typed [0]; - - res_typed [0] = v1_typed [0]; - res_typed [1] = v1_typed [1]; - res_typed [2] = v1_typed [2]; - res_typed [3] = v1_typed [3]; - res_typed [4] = v1_typed [4]; - res_typed [5] = v1_typed [5]; - res_typed [6] = v1_typed [6]; - res_typed [7] = v1_typed [7]; - } + for (int i = 0; i < 8; i++) + res_typed [i] = v1_typed [i]; + for (int i = 0; i < 8; i++) + res_typed [i + 8] = v2_typed [i]; + memcpy (res, res_typed, SIZEOF_V128); } // GreaterThan diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 23bf79620a73e2..eba7d1c0f57113 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -5786,15 +5786,6 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; cmethod = (InterpMethod*)frame->imethod->data_items [imethod_index]; goto jit_call; } - MINT_IN_CASE(MINT_NEWOBJ_VT_INLINED) { - guint16 ret_size = ip [3]; - gpointer this_vt = locals + ip [2]; - - memset (this_vt, 0, ret_size); - LOCAL_VAR (ip [1], gpointer) = this_vt; - ip += 4; - MINT_IN_BREAK; - } MINT_IN_CASE(MINT_NEWOBJ_SLOW) { guint32 const token = ip [3]; return_offset = ip [1]; @@ -5937,8 +5928,13 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; MINT_IN_BREAK; } MINT_IN_CASE(MINT_INTRINS_MARVIN_BLOCK) { - interp_intrins_marvin_block ((guint32*)(locals + ip [1]), (guint32*)(locals + ip [2])); - ip += 3; + guint32 *pp0 = (guint32*)(locals + ip [1]); + guint32 *pp1 = (guint32*)(locals + ip [2]); + guint32 *dest0 = (guint32*)(locals + ip [3]); + guint32 *dest1 = (guint32*)(locals + ip [4]); + + interp_intrins_marvin_block (pp0, pp1, dest0, dest1); + ip += 5; MINT_IN_BREAK; } MINT_IN_CASE(MINT_INTRINS_ASCII_CHARS_TO_UPPERCASE) { @@ -7931,6 +7927,8 @@ interp_parse_options (const char *options) else if (strncmp (arg, "jiterp", 6) == 0) opt = INTERP_OPT_JITERPRETER; #endif + else if (strncmp (arg, "ssa", 3) == 0) + opt = INTERP_OPT_SSA; else if (strncmp (arg, "all", 3) == 0) opt = ~INTERP_OPT_NONE; @@ -8682,19 +8680,6 @@ interp_cleanup (void) #endif } -static void -register_interp_stats (void) -{ - mono_counters_init (); - mono_counters_register ("Total transform time", MONO_COUNTER_INTERP | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &mono_interp_stats.transform_time); - mono_counters_register ("Methods transformed", MONO_COUNTER_INTERP | MONO_COUNTER_LONG, &mono_interp_stats.methods_transformed); - mono_counters_register ("Total cprop time", MONO_COUNTER_INTERP | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &mono_interp_stats.cprop_time); - mono_counters_register ("Total super instructions time", MONO_COUNTER_INTERP | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &mono_interp_stats.super_instructions_time); - mono_counters_register ("Emitted instructions", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.emitted_instructions); - mono_counters_register ("Methods inlined", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.inlined_methods); - mono_counters_register ("Inline failures", MONO_COUNTER_INTERP | MONO_COUNTER_INT, &mono_interp_stats.inline_failures); -} - #undef MONO_EE_CALLBACK #define MONO_EE_CALLBACK(ret, name, sig) interp_ ## name, @@ -8723,8 +8708,6 @@ mono_ee_interp_init (const char *opts) mini_install_interp_callbacks (&mono_interp_callbacks); - register_interp_stats (); - #ifdef HOST_WASI debugger_enabled = mini_get_debug_options ()->mdb_optimizations; #endif diff --git a/src/mono/mono/mini/interp/interp.h b/src/mono/mono/mini/interp/interp.h index 137b40e1dd49ec..742e93bf06e59a 100644 --- a/src/mono/mono/mini/interp/interp.h +++ b/src/mono/mono/mini/interp/interp.h @@ -41,7 +41,8 @@ enum { #if HOST_BROWSER INTERP_OPT_JITERPRETER = 64, #endif - INTERP_OPT_DEFAULT = INTERP_OPT_INLINE | INTERP_OPT_CPROP | INTERP_OPT_SUPER_INSTRUCTIONS | INTERP_OPT_BBLOCKS | INTERP_OPT_TIERING | INTERP_OPT_SIMD + INTERP_OPT_SSA = 128, + INTERP_OPT_DEFAULT = INTERP_OPT_INLINE | INTERP_OPT_CPROP | INTERP_OPT_SUPER_INSTRUCTIONS | INTERP_OPT_BBLOCKS | INTERP_OPT_TIERING | INTERP_OPT_SIMD | INTERP_OPT_SSA #if HOST_BROWSER | INTERP_OPT_JITERPRETER #endif diff --git a/src/mono/mono/mini/interp/jiterpreter-opcode-values.h b/src/mono/mono/mini/interp/jiterpreter-opcode-values.h index d109c70ba25bd0..db88ec1dd03ca9 100644 --- a/src/mono/mono/mini/interp/jiterpreter-opcode-values.h +++ b/src/mono/mono/mini/interp/jiterpreter-opcode-values.h @@ -135,7 +135,6 @@ OP(MINT_INTRINS_RUNTIMEHELPERS_OBJECT_HAS_COMPONENT_SIZE, HIGH) OP(MINT_INTRINS_ENUM_HASFLAG, HIGH) OP(MINT_INTRINS_ORDINAL_IGNORE_CASE_ASCII, HIGH) OP(MINT_NEWOBJ_INLINED, HIGH) -OP(MINT_NEWOBJ_VT_INLINED, MASSIVE) OP(MINT_CPBLK, HIGH) OP(MINT_INITBLK, HIGH) OP(MINT_ROL_I4_IMM, HIGH) diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index 7dd9d914fa328e..d45579ce40fe39 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -363,7 +363,6 @@ OPDEF(MINT_NEWOBJ_STRING, "newobj_string", 4, 1, 1, MintOpMethodToken) OPDEF(MINT_NEWOBJ, "newobj", 5, 1, 1, MintOpMethodToken) OPDEF(MINT_NEWOBJ_INLINED, "newobj_inlined", 3, 1, 0, MintOpVTableToken) OPDEF(MINT_NEWOBJ_VT, "newobj_vt", 5, 1, 1, MintOpMethodToken) -OPDEF(MINT_NEWOBJ_VT_INLINED, "newobj_vt_inlined", 4, 1, 1, MintOpShortInt) OPDEF(MINT_INITOBJ, "initobj", 3, 0, 1, MintOpShortInt) OPDEF(MINT_CASTCLASS, "castclass", 4, 1, 1, MintOpClassToken) OPDEF(MINT_ISINST, "isinst", 4, 1, 1, MintOpClassToken) @@ -815,7 +814,8 @@ OPDEF(MINT_INTRINS_GET_TYPE, "intrins_get_type", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_INTRINS_SPAN_CTOR, "intrins_span_ctor", 4, 1, 2, MintOpNoArgs) OPDEF(MINT_INTRINS_RUNTIMEHELPERS_OBJECT_HAS_COMPONENT_SIZE, "intrins_runtimehelpers_object_has_component_size", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_INTRINS_CLEAR_WITH_REFERENCES, "intrin_clear_with_references", 3, 0, 2, MintOpNoArgs) -OPDEF(MINT_INTRINS_MARVIN_BLOCK, "intrins_marvin_block", 3, 0, 2, MintOpNoArgs) +// This actually has 2 dregs and 2 sregs. Dregs are displayed as the metadata +OPDEF(MINT_INTRINS_MARVIN_BLOCK, "intrins_marvin_block", 5, 0, 2, MintOpTwoShorts) OPDEF(MINT_INTRINS_ASCII_CHARS_TO_UPPERCASE, "intrins_ascii_chars_to_uppercase", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_INTRINS_MEMORYMARSHAL_GETARRAYDATAREF, "intrins_memorymarshal_getarraydataref", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_INTRINS_ORDINAL_IGNORE_CASE_ASCII, "intrins_ordinal_ignore_case_ascii", 4, 1, 2, MintOpNoArgs) @@ -834,12 +834,18 @@ OPDEF(MINT_TIER_MONITOR_JITERPRETER, "tier_monitor_jiterpreter", 4, 0, 0, MintOp IROPDEF(MINT_NOP, "nop", 1, 0, 0, MintOpNoArgs) IROPDEF(MINT_DEF, "def", 2, 1, 0, MintOpNoArgs) +IROPDEF(MINT_DEF_ARG, "def_arg", 2, 1, 0, MintOpNoArgs) +IROPDEF(MINT_DEF_TIER_VAR, "def_tier_var", 3, 1, 1, MintOpNoArgs) IROPDEF(MINT_IL_SEQ_POINT, "il_seq_point", 1, 0, 0, MintOpNoArgs) IROPDEF(MINT_DUMMY_USE, "dummy_use", 2, 0, 1, MintOpNoArgs) IROPDEF(MINT_TIER_PATCHPOINT_DATA, "tier_patchpoint_data", 2, 0, 0, MintOpShortInt) // These two opcodes are resolved to a normal MINT_MOV when emitting compacted instructions IROPDEF(MINT_MOV_SRC_OFF, "mov.src.off", 6, 1, 1, MintOpTwoShorts) -IROPDEF(MINT_MOV_DST_OFF, "mov.dst.off", 6, 1, 1, MintOpTwoShorts) +IROPDEF(MINT_MOV_DST_OFF, "mov.dst.off", 8, 1, 2, MintOpTwoShorts) +IROPDEF(MINT_PHI, "phi", 2, 1, 0, MintOpNoArgs) +IROPDEF(MINT_DEAD_PHI, "dead_phi", 1, 0, 0, MintOpNoArgs) +IROPDEF(MINT_INTRINS_MARVIN_BLOCK_SSA1, "intrins_marvin_block_ssa1", 4, 1, 2, MintOpNoArgs) +IROPDEF(MINT_INTRINS_MARVIN_BLOCK_SSA2, "intrins_marvin_block_ssa2", 4, 1, 2, MintOpNoArgs) #ifdef __DEFINED_IROPDEF__ #undef IROPDEF diff --git a/src/mono/mono/mini/interp/mintops.h b/src/mono/mono/mini/interp/mintops.h index ec8e5f050c678d..73b767daccb094 100644 --- a/src/mono/mono/mini/interp/mintops.h +++ b/src/mono/mono/mini/interp/mintops.h @@ -208,7 +208,8 @@ typedef enum { #define MINT_SWITCH_LEN(n) (4 + (n) * 2) -#define MINT_IS_NOP(op) ((op) == MINT_NOP || (op) == MINT_DEF || (op) == MINT_DUMMY_USE || (op) == MINT_IL_SEQ_POINT) +#define MINT_IS_NOP(op) ((op) == MINT_NOP || (op) == MINT_DEF || (op) == MINT_DEF_ARG || (op) == MINT_DUMMY_USE || (op) == MINT_IL_SEQ_POINT) +#define MINT_IS_EMIT_NOP(op) ((op) == MINT_NOP || (op) == MINT_DEF || (op) == MINT_DEF_ARG || (op) == MINT_DEF_TIER_VAR || (op) == MINT_DUMMY_USE) #define MINT_IS_MOV(op) ((op) >= MINT_MOV_I4_I1 && (op) <= MINT_MOV_VT) #define MINT_IS_UNCONDITIONAL_BRANCH(op) ((op) >= MINT_BR && (op) <= MINT_CALL_HANDLER_S) #define MINT_IS_CONDITIONAL_BRANCH(op) ((op) >= MINT_BRFALSE_I4 && (op) <= MINT_BLT_UN_R8_S) @@ -234,7 +235,7 @@ typedef enum { #define MINT_IS_RETURN(op) (((op) >= MINT_RET && (op) <= MINT_RET_U2) || (op) == MINT_RET_I4_IMM || (op) == MINT_RET_I8_IMM) // TODO Add more -#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_LDC_R4 || op == MINT_LDC_R8 || op == MINT_LDPTR || op == MINT_BOX) +#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_LDC_R4 || op == MINT_LDC_R8 || op == MINT_LDPTR || op == MINT_BOX || op == MINT_INITLOCAL) #define MINT_CALL_ARGS 2 #define MINT_CALL_ARGS_SREG -2 diff --git a/src/mono/mono/mini/interp/transform-opt.c b/src/mono/mono/mini/interp/transform-opt.c index c6fc3a069c045d..9933dbcc09a7ad 100644 --- a/src/mono/mono/mini/interp/transform-opt.c +++ b/src/mono/mono/mini/interp/transform-opt.c @@ -5,6 +5,10 @@ #include "mintops.h" #include "transform.h" +/* + * VAR OFFSET ALLOCATOR + */ + // Allocates var at the offset that tos points to, also updating it. static int alloc_var_offset (TransformData *td, int local, gint32 *ptos) @@ -215,7 +219,6 @@ end_active_call (TransformData *td, ActiveCalls *ac, InterpInst *call) } // Data structure used for offset allocation of local vars - typedef struct { int var; gboolean is_alive; @@ -342,7 +345,7 @@ interp_alloc_offsets (TransformData *td) while (var != -1) { if (td->vars [var].global || - !td->local_ref_count || td->local_ref_count [var] > 1 || + !td->var_values || td->var_values [var].ref_count > 1 || td->vars [var].no_call_args) { // Some vars can't be allocated on the call args stack, since the constraint is that // call args vars die after the call. This isn't necessarily true for global vars or @@ -481,45 +484,1072 @@ interp_alloc_offsets (TransformData *td) } final_total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT); - // Iterate over all call args locals, update their final offset (aka add td->total_locals_size to them) - // then also update td->total_locals_size to account for this space. - td->param_area_offset = final_total_locals_size; + // Iterate over all call args locals, update their final offset (aka add td->total_locals_size to them) + // then also update td->total_locals_size to account for this space. + td->param_area_offset = final_total_locals_size; + for (unsigned int i = 0; i < td->vars_size; i++) { + // These are allocated separately at the end of the stack + if (td->vars [i].call_args) { + td->vars [i].offset += td->param_area_offset; + final_total_locals_size = MAX (td->vars [i].offset + td->vars [i].size, final_total_locals_size); + } + } + td->total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT); +} + +/* + * DOMINANCE COMPUTATION + */ + +static GString* +interp_get_bb_links (InterpBasicBlock *bb) +{ + GString *str = g_string_new (""); + + if (bb->in_count) { + g_string_append_printf (str, "IN (%d", bb->in_bb [0]->index); + for (int i = 1; i < bb->in_count; i++) + g_string_append_printf (str, " %d", bb->in_bb [i]->index); + g_string_append_printf (str, "), "); + } else { + g_string_append_printf (str, "IN (nil), "); + } + + if (bb->out_count) { + g_string_append_printf (str, "OUT (%d", bb->out_bb [0]->index); + for (int i = 1; i < bb->out_count; i++) + g_string_append_printf (str, " %d", bb->out_bb [i]->index); + g_string_append_printf (str, ")"); + } else { + g_string_append_printf (str, "OUT (nil)"); + } + + return str; +} + +static int +dfs_visit (TransformData *td) +{ + int dfs_index = 0; + int next_stack_index = 0; + td->bblocks = (InterpBasicBlock**)mono_mempool_alloc0 (td->opt_mempool, sizeof (InterpBasicBlock*) * td->bb_count); + InterpBasicBlock **stack = (InterpBasicBlock**)g_malloc0 (sizeof (InterpBasicBlock*) * td->bb_count); + + g_assert (!td->entry_bb->in_count); + stack [next_stack_index++] = td->entry_bb; + + while (next_stack_index > 0) { + // Pop last added element + next_stack_index--; + InterpBasicBlock *bb = stack [next_stack_index]; + + // Process current bblock + td->bblocks [dfs_index] = bb; + bb->dfs_index = dfs_index++; + + // Push all nodes to process next + for (int i = 0; i < bb->out_count; i++) { + InterpBasicBlock *out_bb = bb->out_bb [i]; + if (out_bb->dfs_index == -1) { + stack [next_stack_index++] = out_bb; + // Mark node as gray so it is not pushed again + out_bb->dfs_index = -2; + } + } + } + + g_free (stack); + return dfs_index; +} + +static void +interp_compute_dfs_indexes (TransformData *td) +{ + for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) + bb->dfs_index = -1; + // Sort bblocks in reverse postorder + int dfs_index = dfs_visit (td); + td->bblocks_count_no_eh = dfs_index; + + // Visit also bblocks reachable from eh handlers. These bblocks are not linked + // to the main cfg (where we do dominator computation, ssa transformation etc) + if (td->header->num_clauses > 0) { + InterpBasicBlock *current = td->entry_bb; + while (current != NULL) { + if (current->reachable && current->dfs_index == -1) { + current->dfs_index = dfs_index; + td->bblocks [dfs_index] = current; + dfs_index++; + } + current = current->next_bb; + } + } + td->bblocks_count_eh = dfs_index; + + if (td->verbose_level) { + InterpBasicBlock *bb; + g_print ("\nBASIC BLOCK GRAPH:\n"); + for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + GString* bb_info = interp_get_bb_links (bb); + g_print ("BB%d: DFS%s(%d), %s\n", bb->index, (bb->dfs_index >= td->bblocks_count_no_eh) ? "_EH" : "" , bb->dfs_index, bb_info->str); + g_string_free (bb_info, TRUE); + } + } +} + +static InterpBasicBlock* +dom_intersect (InterpBasicBlock **idoms, InterpBasicBlock *bb1, InterpBasicBlock *bb2) +{ + while (bb1 != bb2) { + while (bb1->dfs_index < bb2->dfs_index) + bb2 = idoms [bb2->dfs_index]; + while (bb2->dfs_index < bb1->dfs_index) + bb1 = idoms [bb1->dfs_index]; + } + return bb1; +} + +static gboolean +is_bblock_ssa_cfg (TransformData *td, InterpBasicBlock *bb) +{ + // bblocks with uninitialized dfs_index are unreachable + if (bb->dfs_index == -1) + return FALSE; + if (bb->dfs_index < td->bblocks_count_no_eh) + return TRUE; + return FALSE; +} + +static void +interp_compute_dominators (TransformData *td) +{ + InterpBasicBlock **idoms = (InterpBasicBlock**)mono_mempool_alloc0 (td->opt_mempool, sizeof (InterpBasicBlock*) * td->bblocks_count_no_eh); + + idoms [0] = td->entry_bb; + gboolean changed = TRUE; + while (changed) { + changed = FALSE; + // all bblocks in reverse post order except entry + for (int i = 1; i < td->bblocks_count_no_eh; i++) { + InterpBasicBlock *bb = td->bblocks [i]; + InterpBasicBlock *new_idom = NULL; + // pick candidate idom from first processed predecessor of it + int j; + for (j = 0; j < bb->in_count; j++) { + InterpBasicBlock *in_bb = bb->in_bb [j]; + if (is_bblock_ssa_cfg (td, in_bb) && idoms [in_bb->dfs_index]) { + new_idom = in_bb; + break; + } + } + + // intersect new_idom with dominators from the other predecessors + for (; j < bb->in_count; j++) { + InterpBasicBlock *in_bb = bb->in_bb [j]; + if (is_bblock_ssa_cfg (td, in_bb) && idoms [in_bb->dfs_index]) + new_idom = dom_intersect (idoms, in_bb, new_idom); + } + + // check if we obtained new idom + if (idoms [i] != new_idom) { + idoms [i] = new_idom; + changed = TRUE; + } + } + } + + td->idoms = idoms; + + // Build `dominated` bblock list for each bblock + for (int i = 1; i < td->bblocks_count_no_eh; i++) { + InterpBasicBlock *bb = td->bblocks [i]; + InterpBasicBlock *idom = td->idoms [i]; + if (idom) + idom->dominated = g_slist_prepend (idom->dominated, bb); + } + + if (td->verbose_level) { + InterpBasicBlock *bb; + g_print ("\nBASIC BLOCK IDOMS:\n"); + for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + if (!is_bblock_ssa_cfg (td, bb)) + continue; + g_print ("IDOM (BB%d) = BB%d\n", bb->index, td->idoms [bb->dfs_index]->index); + } + + g_print ("\nBASIC BLOCK DOMINATED:\n"); + for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + if (!is_bblock_ssa_cfg (td, bb)) + continue; + if (bb->dominated) { + g_print ("DOMINATED (BB%d) = {", bb->index); + GSList *dominated = bb->dominated; + while (dominated) { + InterpBasicBlock *dominated_bb = (InterpBasicBlock*)dominated->data; + g_print (" BB%d", dominated_bb->index); + dominated = dominated->next; + } + g_print (" }\n"); + } + } + } +} + +static void +interp_compute_dominance_frontier (TransformData *td) +{ + int bitsize = mono_bitset_alloc_size (td->bblocks_count_no_eh, 0); + char *mem = (char *)mono_mempool_alloc0 (td->opt_mempool, bitsize * td->bblocks_count_no_eh); + + for (int i = 0; i < td->bblocks_count_no_eh; i++) { + td->bblocks [i]->dfrontier = mono_bitset_mem_new (mem, td->bblocks_count_no_eh, 0); + mem += bitsize; + } + + for (int i = 0; i < td->bblocks_count_no_eh; i++) { + InterpBasicBlock *bb = td->bblocks [i]; + + if (bb->in_count > 1) { + for (int j = 0; j < bb->in_count; ++j) { + InterpBasicBlock *p = bb->in_bb [j]; + if (!is_bblock_ssa_cfg (td, p)) + continue; + + g_assert (p->dfs_index || p == td->entry_bb); + + while (p != td->idoms [bb->dfs_index]) { + g_assert (bb->dfs_index < td->bblocks_count_no_eh); + mono_bitset_set_fast (p->dfrontier, bb->dfs_index); + p = td->idoms [p->dfs_index]; + } + } + } + } + + if (td->verbose_level) { + InterpBasicBlock *bb; + g_print ("\nBASIC BLOCK DFRONTIERS:\n"); + for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + if (!is_bblock_ssa_cfg (td, bb)) + continue; + g_print ("DFRONTIER (BB%d) = {", bb->index); + int i; + mono_bitset_foreach_bit (bb->dfrontier, i, td->bb_count) { + g_print (" BB%d", td->bblocks [i]->index); + } + g_print (" }\n"); + } + } +} + +static void +interp_compute_dominance (TransformData *td) +{ + /* + * A dominator for a bblock n, is a bblock that is reached on every path to n. Dominance is transitive. + * An immediate dominator for a bblock n, is the bblock that dominates n, but doesn't dominate any other + * dominators of n, meaning it is the closest dominator to n. The dominance frontier of a node V is the set + * of nodes where the dominance stops. This means that it is the set of nodes where node V doesn't dominate + * it, but it does dominate a predecessor of it (including if the predecessor is V itself). + * + * The dominance frontier is relevant for SSA computation since, for a var defined in a bblock, the DF of bblock + * represents the set of bblocks where we need to add a PHI opcode for that variable. + */ + interp_compute_dfs_indexes (td); + + interp_compute_dominators (td); + + interp_compute_dominance_frontier (td); +} + +/* + * SSA TRANSFORMATION + */ +static void +compute_eh_var_cb (TransformData *td, int *pvar, gpointer data) +{ + int var = *pvar; + td->vars [var].eh_var = TRUE; +} + +static void +interp_compute_eh_vars (TransformData *td) +{ + // FIXME we can now remove EH bblocks. This means some vars can stop being EH vars + + // EH bblocks are stored separately and are not reachable from the non-EF control flow + // path. Any var reachable from EH bblocks will not be in SSA form. + for (int i = td->bblocks_count_no_eh; i < td->bblocks_count_eh; i++) { + InterpBasicBlock *bb = td->bblocks [i]; + for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { + if (ins->opcode == MINT_LDLOCA_S) + td->vars [ins->sregs [0]].eh_var = TRUE; + interp_foreach_ins_var (td, ins, bb, compute_eh_var_cb); + } + } + + // If we have a try block that might catch exceptions, then we can't do any propagation + // of the values defined in the block since an exception could interrupt the normal control + // flow. All vars defined in this block will not be in SSA form. + for (unsigned int i = 0; i < td->header->num_clauses; i++) { + MonoExceptionClause *c = &td->header->clauses [i]; + if (c->flags == MONO_EXCEPTION_CLAUSE_NONE || + c->flags == MONO_EXCEPTION_CLAUSE_FILTER) { + InterpBasicBlock *bb = td->offset_to_bb [c->try_offset]; + int try_end = c->try_offset + c->try_len; + g_assert (bb); + while (bb->il_offset != -1 && bb->il_offset < try_end) { + for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { + if (mono_interp_op_dregs [ins->opcode]) + td->vars [ins->dreg].eh_var = TRUE; + } + bb = bb->next_bb; + } + } + } + + td->eh_vars_computed = TRUE; +} + +static void +interp_compute_ssa_vars (TransformData *td) +{ + if (!td->eh_vars_computed) + interp_compute_eh_vars (td); + + for (unsigned int i = 0; i < td->vars_size; i++) { + if (td->vars [i].indirects > 0) { + td->vars [i].no_ssa = TRUE; + td->vars [i].has_indirects = TRUE; + } else { + td->vars [i].has_indirects = FALSE; + if (td->vars [i].eh_var) + td->vars [i].no_ssa = TRUE; + else + td->vars [i].no_ssa = FALSE; + } + } +} + +static gboolean +var_is_ssa_form (TransformData *td, int var) +{ + if (td->vars [var].no_ssa) + return FALSE; + + return TRUE; +} + +static gboolean +var_has_indirects (TransformData *td, int var) +{ + if (td->vars [var].has_indirects) + return TRUE; + + return FALSE; +} + +static InterpVarValue* +get_var_value (TransformData *td, int var) +{ + if (var_is_ssa_form (td, var)) + return &td->var_values [var]; + + if (var_has_indirects (td, var)) + return NULL; + + // No ssa var, check if we have a def set for the current bblock + if (td->var_values [var].def) { + if (td->var_values [var].liveness.bb_dfs_index == td->cbb->dfs_index) + return &td->var_values [var]; + } + return NULL; + +} + +static InterpInst* +get_var_value_def (TransformData *td, int var) +{ + InterpVarValue *val = get_var_value (td, var); + if (val) + return val->def; + return NULL; +} + +static int +get_var_value_type (TransformData *td, int var) +{ + InterpVarValue *val = get_var_value (td, var); + if (val) + return val->type; + return VAR_VALUE_NONE; +} + +static void +compute_global_var_cb (TransformData *td, int *pvar, gpointer data) +{ + int var = *pvar; + InterpBasicBlock *bb = (InterpBasicBlock*)data; + InterpVar *var_data = &td->vars [var]; + if (!var_is_ssa_form (td, var) || td->vars [var].ext_index == -1) + return; + // If var is used in another block than the one that it is declared then mark it as global + if (var_data->declare_bbs && var_data->declare_bbs->data != bb) { + int ext_index = td->vars [var].ext_index; + td->renamable_vars [ext_index].ssa_global = TRUE; + } +} + +// We obtain the list of global vars, as well as the list of bblocks where each one of the global vars is declared. +static void +interp_compute_global_vars (TransformData *td) +{ + InterpBasicBlock *bb; + for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + if (!is_bblock_ssa_cfg (td, bb)) + continue; + InterpInst *ins; + for (ins = bb->first_ins; ins != NULL; ins = ins->next) { + if (mono_interp_op_dregs [ins->opcode] && var_is_ssa_form (td, ins->dreg)) { + // Save the list of bblocks where a global var is defined in + InterpVar *var_data = &td->vars [ins->dreg]; + if (!var_data->declare_bbs) { + var_data->declare_bbs = g_slist_prepend (NULL, bb); + } else { + int ext_index = interp_make_var_renamable (td, ins->dreg); + if (!g_slist_find (var_data->declare_bbs, bb)) { + // Var defined in multiple bblocks, it is ssa global + var_data->declare_bbs = g_slist_prepend (var_data->declare_bbs, bb); + td->renamable_vars [ext_index].ssa_global = TRUE; + } + } + } + } + } + + for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + if (!is_bblock_ssa_cfg (td, bb)) + continue; + InterpInst *ins; + for (ins = bb->first_ins; ins != NULL; ins = ins->next) + interp_foreach_ins_svar (td, ins, bb, compute_global_var_cb); + } + + if (td->verbose_level) { + g_print ("\nSSA GLOBALS:\n"); + for (unsigned int i = 0; i < td->renamable_vars_size; i++) { + if (td->renamable_vars [i].ssa_global) { + int var = td->renamable_vars [i].var_index; + g_print ("DECLARE_BB (%d) = {", var); + GSList *l = td->vars [var].declare_bbs; + while (l) { + g_print (" BB%d", ((InterpBasicBlock*)l->data)->index); + l = l->next; + } + g_print (" }\n"); + } + } + } +} + +static void +compute_gen_set_cb (TransformData *td, int *pvar, gpointer data) +{ + int var = *pvar; + InterpBasicBlock *bb = (InterpBasicBlock*)data; + + int ext_index = td->vars [var].ext_index; + if (ext_index == -1) + return; + + if (!td->renamable_vars [ext_index].ssa_global) + return; + + if (!mono_bitset_test_fast (bb->kill_set, ext_index)) + mono_bitset_set_fast (bb->gen_set, ext_index); +} + +// For each bblock, computes the kill set (the set of vars defined by the bblock) +// and gen set (the set of vars used by the bblock, with the definition not being +// in the bblock). +static void +compute_gen_kill_sets (TransformData *td) +{ + int bitsize = mono_bitset_alloc_size (td->renamable_vars_size, 0); + char *mem = (char *)mono_mempool_alloc0 (td->opt_mempool, bitsize * td->bblocks_count_no_eh * 4); + + for (int i = 0; i < td->bblocks_count_no_eh; i++) { + InterpBasicBlock *bb = td->bblocks [i]; + + bb->gen_set = mono_bitset_mem_new (mem, td->renamable_vars_size, 0); + mem += bitsize; + bb->kill_set = mono_bitset_mem_new (mem, td->renamable_vars_size, 0); + mem += bitsize; + bb->live_in_set = mono_bitset_mem_new (mem, td->renamable_vars_size, 0); + mem += bitsize; + bb->live_out_set = mono_bitset_mem_new (mem, td->renamable_vars_size, 0); + mem += bitsize; + + for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { + interp_foreach_ins_svar (td, ins, bb, compute_gen_set_cb); + if (mono_interp_op_dregs [ins->opcode]) { + int ext_index = td->vars [ins->dreg].ext_index; + if (ext_index != -1 && td->renamable_vars [ext_index].ssa_global) + mono_bitset_set_fast (bb->kill_set, ext_index); + } + } + } +} + +// Compute live_in and live_out sets +// For a bblock, live_in contains all vars that are live at exit of bblock and not redefined, +// together with all vars used in the bblock without being defined. For a bblock, live_out set +// contains all vars that are live_in any successor. This computation starts with empty sets +// (starting to generate live vars from the gen sets) and it is run iteratively until the +// computation converges. +static void +recompute_live_out (TransformData *td, InterpBasicBlock *bb) +{ + for (int i = 0; i < bb->out_count; i++) { + InterpBasicBlock *sbb = bb->out_bb [i]; + + // Recompute live_in_set for each successor of bb + mono_bitset_copyto_fast (sbb->live_out_set, sbb->live_in_set); + mono_bitset_sub_fast (sbb->live_in_set, sbb->kill_set); + mono_bitset_union_fast (sbb->live_in_set, sbb->gen_set); + + // Recompute live_out_set of bb, by adding the live_in_set of each successor + mono_bitset_union_fast (bb->live_out_set, sbb->live_in_set); + } +} + +// For each bblock, compute LiveIn, LiveOut sets tracking liveness for the previously computed global vars +static void +interp_compute_pruned_ssa_liveness (TransformData *td) +{ + compute_gen_kill_sets (td); + + gboolean changed = TRUE; + while (changed) { + changed = FALSE; + for (int i = 0; i < td->bblocks_count_no_eh; i++) { + InterpBasicBlock *bb = td->bblocks [i]; + guint32 prev_count = mono_bitset_count (bb->live_out_set); + recompute_live_out (td, bb); + if (prev_count != mono_bitset_count (bb->live_out_set)) + changed = TRUE; + } + } + + if (td->verbose_level) { + InterpBasicBlock *bb; + g_print ("\nBASIC BLOCK LIVENESS:\n"); + for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + unsigned int i; + if (!is_bblock_ssa_cfg (td, bb)) + continue; + g_print ("BB%d\n\tLIVE_IN = {", bb->index); + mono_bitset_foreach_bit (bb->live_in_set, i, td->renamable_vars_size) { + g_print (" %d", td->renamable_vars [i].var_index); + } + g_print (" }\n\tLIVE_OUT = {", bb->index); + mono_bitset_foreach_bit (bb->live_out_set, i, td->renamable_vars_size) { + g_print (" %d", td->renamable_vars [i].var_index); + } + g_print (" }\n"); + } + } +} + +static gboolean +bb_has_phi (TransformData *td, InterpBasicBlock *bb, int var) +{ + InterpInst *ins = bb->first_ins; + while (ins) { + if (ins->opcode == MINT_PHI) { + if (ins->dreg == var) + return TRUE; + } else if (ins->opcode == MINT_DEAD_PHI) { + MonoBitSet *bitset = ins->info.dead_phi_vars; + int ext_index = td->vars [var].ext_index; + if (mono_bitset_test_fast (bitset, ext_index)) + return TRUE; + } else { + // if we have a phi it is at the start of the bb + return FALSE; + } + ins = ins->next; + } + return FALSE; +} + +static void +bb_insert_phi (TransformData *td, InterpBasicBlock *bb, int var) +{ + InterpInst *first_ins = NULL; + // We keep dead phi as first instruction so we can find it quickly + if (bb->first_ins && bb->first_ins->opcode == MINT_DEAD_PHI) + first_ins = bb->first_ins; + InterpInst *phi = interp_insert_ins_bb (td, bb, first_ins, MINT_PHI); + if (td->verbose_level) + g_print ("BB%d NEW_PHI %d\n", bb->index, var); + + phi->dreg = var; + phi->info.args = (int*)mono_mempool_alloc (td->opt_mempool, (bb->in_count + 1) * sizeof (int)); + int i; + for (i = 0; i < bb->in_count; i++) + phi->info.args [i] = var; + phi->info.args [i] = -1; +} + +static void +bb_insert_dead_phi (TransformData *td, InterpBasicBlock *bb, int var) +{ + MonoBitSet *bitset; + if (bb->first_ins && bb->first_ins->opcode == MINT_DEAD_PHI) { + bitset = bb->first_ins->info.dead_phi_vars; + } else { + InterpInst *phi = interp_insert_ins_bb (td, bb, NULL, MINT_DEAD_PHI); + gpointer mem = mono_mempool_alloc0 (td->opt_mempool, mono_bitset_alloc_size (td->renamable_vars_size, 0)); + phi->info.dead_phi_vars = bitset = mono_bitset_mem_new (mem, td->renamable_vars_size, 0); + } + int ext_index = td->vars [var].ext_index; + mono_bitset_set_fast (bitset, ext_index); + if (td->verbose_level) + g_print ("BB%d NEW_DEAD_PHI %d\n", bb->index, var); + +} + +static void +insert_phi_nodes (TransformData *td) +{ + if (td->verbose_level) + g_print ("\nINSERT PHI NODES:\n"); + for (unsigned int i = 0; i < td->renamable_vars_size; i++) { + if (!td->renamable_vars [i].ssa_global) + continue; + + // For every definition of this var, we add a phi node at the start of + // all bblocks in the dominance frontier of the defining bblock. + int var = td->renamable_vars [i].var_index; + GSList *workset = g_slist_copy (td->vars [var].declare_bbs); + while (workset) { + GSList *old_head = workset; + InterpBasicBlock *bb = (InterpBasicBlock*)workset->data; + workset = workset->next; + g_free (old_head); + g_assert (is_bblock_ssa_cfg (td, bb)); + int j; + mono_bitset_foreach_bit (bb->dfrontier, j, td->bb_count) { + InterpBasicBlock *bd = td->bblocks [j]; + g_assert (is_bblock_ssa_cfg (td, bb)); + if (!bb_has_phi (td, bd, var)) { + if (mono_bitset_test_fast (bd->live_in_set, i)) { + td->renamable_vars [i].ssa_fixed = TRUE; + bb_insert_phi (td, bd, var); + } else { + // We need this only for vars that are ssa fixed, but it is not clear + // if the current var is fixed or not. We will ignore these opcodes if + // the var is not actually ssa fixed. + bb_insert_dead_phi (td, bd, var); + } + if (!g_slist_find (workset, bd)) + workset = g_slist_prepend (workset, bd); + } + } + } + } +} + +// Additional fixed vars, in addition to vars that are args to phi nodes +static void +insert_tiering_defs (TransformData *td) +{ + for (int i = 0; i < td->bblocks_count_no_eh; i++) { + InterpBasicBlock *bb = td->bblocks [i]; + if (!bb->patchpoint_bb) + continue; + + // All IL locals live at entry to this bb have to be fixed + for (unsigned int k = 0; k < td->renamable_vars_size; k++) { + int var_index = td->renamable_vars [k].var_index; + if (td->vars [var_index].il_global && mono_bitset_test_fast (bb->live_in_set, k)) { + td->renamable_vars [k].ssa_fixed = TRUE; + + // Patchpoints introduce some complications since some variables have to be + // accessed from same offset between unoptimized and optimized methods. + // + // Consider the following scenario + // BB0 -> BB2 BB0: TMP <- def; IL_VAR <- TMP + // | ^ BB1: Use IL_VAR + // v | BB2: Use IL_VAR + // BB1 + // + // BB1 is a basic block containing a patchpoint, BB0 dominates both BB1 and BB2. + // IL_VAR is used both in BB1 and BB2. In BB1, in optimized code, we could normally + // replace use of IL_VAR with use of TMP. However, this is incorrect, because TMP + // can be allocated at a different offset from IL_VAR and, if we enter the method + // from the patchpoint in BB1, the data at var TMP would not be initialized since + // we only copy the IL var space. + // Even if we prevent the copy propagation in BB1, then tiering is still broken. + // In BB2 we could replace use of IL_VAR with TMP, and we end up hitting the same problem. + // Optimized code will attempt to access value of IL_VAR from the offset of TMP_VAR, + // which is not initialized if we enter from the patchpoint in BB1. + // We solve these issues by inserting a MINT_DEF_TIER_VAR in BB1. This instruction + // prevents cprop of the IL_VAR in the patchpoint bblock since MINT_DEF_TIER_VAR is seen + // as a redefinition. In addition to that, in BB2 we now have 2 reaching definitions for + // IL_VAR, the original one from BB0 and the one from patchpoint bblock from BB1. This + // will force a phi definition in BB2 and we will once again be force to access IL_VAR + // from the original offset that is equal to the one in unoptimized method. + InterpInst *def = interp_insert_ins_bb (td, bb, NULL, MINT_DEF_TIER_VAR); + def->sregs [0] = var_index; + def->dreg = var_index; + InterpVar *var_data = &td->vars [var_index]; + // Record the new declaration for this var. Phi nodes insertion phase will account for this + if (!g_slist_find (var_data->declare_bbs, bb)) + var_data->declare_bbs = g_slist_prepend (var_data->declare_bbs, bb); + if (td->verbose_level) { + g_print ("insert patchpoint var define in BB%d:\n\t", bb->index); + interp_dump_ins (def, td->data_items); + } + } + } + } +} + +static int +get_renamed_var (TransformData *td, int var, gboolean def_arg) +{ + int ext_index = td->vars [var].ext_index; + g_assert (ext_index != -1); + int renamed_var = interp_create_var (td, td->vars [var].type); + td->vars [renamed_var].def_arg = def_arg; + + if (td->renamable_vars [ext_index].ssa_fixed) { + td->vars [renamed_var].renamed_ssa_fixed = TRUE; + interp_create_renamed_fixed_var (td, renamed_var, var); + } else { + // Renamed var reference the orignal var through the ext_index + td->vars [renamed_var].ext_index = ext_index; + } + td->renamable_vars [ext_index].ssa_stack = g_slist_prepend (td->renamable_vars [ext_index].ssa_stack, (gpointer)(gsize)renamed_var); + return renamed_var; +} + +static void +rename_ins_var_cb (TransformData *td, int *pvar, gpointer data) +{ + int var = *pvar; + int ext_index = td->vars [var].ext_index; + if (ext_index != -1) { + int renamed_var = (int)(gsize)td->renamable_vars [ext_index].ssa_stack->data; + g_assert (renamed_var != -1); + *pvar = renamed_var; + } +} + +static void +rename_phi_args_in_out_bbs (TransformData *td, InterpBasicBlock *bb) +{ + for (int i = 0; i < bb->out_count; i++) { + InterpBasicBlock *bb_out = bb->out_bb [i]; + + int aindex; + for (aindex = 0; aindex < bb_out->in_count; aindex++) + if (bb_out->in_bb [aindex] == bb) + break; + + for (InterpInst *ins = bb_out->first_ins; ins != NULL; ins = ins->next) { + if (ins->opcode == MINT_PHI) { + int var = ins->info.args [aindex]; + int ext_index = td->vars [var].ext_index; + GSList *stack = td->renamable_vars [ext_index].ssa_stack; + ins->info.args [aindex] = (int)(gsize)stack->data; + } else if (ins->opcode == MINT_DEAD_PHI) { + continue; + } else if (ins->opcode != MINT_NOP) { + break; + } + } + } +} + +static void +rename_vars_in_bb_start (TransformData *td, InterpBasicBlock *bb) +{ + InterpInst *ins; + + // Rename vars defined with MINT_PHI + for (ins = bb->first_ins; ins != NULL; ins = ins->next) { + if (ins->opcode == MINT_PHI) { + ins->dreg = get_renamed_var (td, ins->dreg, FALSE); + } else if (ins->opcode == MINT_DEAD_PHI) { + unsigned int ext_index; + mono_bitset_foreach_bit (ins->info.dead_phi_vars, ext_index, td->renamable_vars_size) { + if (td->renamable_vars [ext_index].ssa_fixed) { + // we push an invalid var that will be just a marker for marking var live limits + td->renamable_vars [ext_index].ssa_stack = g_slist_prepend (td->renamable_vars [ext_index].ssa_stack, (gpointer)(gsize)-1); + } + } + } else { + break; + } + } + + InterpLivenessPosition current_liveness; + current_liveness.bb_dfs_index = bb->dfs_index; + current_liveness.ins_index = 0; + + // Use renamed definition for sources + for (; ins != NULL; ins = ins->next) { + if (interp_ins_is_nop (ins) || ins->opcode == MINT_DEAD_PHI) + continue; + ins->flags |= INTERP_INST_FLAG_LIVENESS_MARKER; + current_liveness.ins_index++; + + interp_foreach_ins_svar (td, ins, NULL, rename_ins_var_cb); + if (!mono_interp_op_dregs [ins->opcode] || td->vars [ins->dreg].ext_index == -1) + continue; + + if (ins->opcode == MINT_DEF_ARG) { + ins->dreg = get_renamed_var (td, ins->dreg, TRUE); + } else if (mono_interp_op_dregs [ins->opcode]) { + g_assert (!td->vars [ins->dreg].renamed_ssa_fixed); + int renamable_ext_index = td->vars [ins->dreg].ext_index; + if (td->renamable_vars [renamable_ext_index].ssa_fixed && + td->renamable_vars [renamable_ext_index].ssa_stack) { + // Mark the exact liveness end limit for the ssa fixed var that is overwritten (the old entry on the stack) + int renamed_var = (int)(gsize)td->renamable_vars [renamable_ext_index].ssa_stack->data; + if (renamed_var != -1) { + g_assert (td->vars [renamed_var].renamed_ssa_fixed); + int renamed_var_ext = td->vars [renamed_var].ext_index; + InterpLivenessPosition *liveness_ptr = (InterpLivenessPosition*)mono_mempool_alloc (td->opt_mempool, sizeof (InterpLivenessPosition)); + *liveness_ptr = current_liveness; + td->renamed_fixed_vars [renamed_var_ext].live_limit_bblocks = g_slist_prepend (td->renamed_fixed_vars [renamed_var_ext].live_limit_bblocks, liveness_ptr); + } + } + ins->dreg = get_renamed_var (td, ins->dreg, FALSE); + } + } + + rename_phi_args_in_out_bbs (td, bb); +} + +static void +rename_vars_in_bb_end (TransformData *td, InterpBasicBlock *bb) +{ + InterpInst *ins; + + // All vars currently on the ssa stack are live until the end of the bblock + for (unsigned int i = 0; i < td->renamable_vars_size; i++) { + if (td->renamable_vars [i].ssa_fixed && td->renamable_vars [i].ssa_stack) { + int renamed_var = (int)(gsize)td->renamable_vars [i].ssa_stack->data; + if (renamed_var != -1) { + g_assert (td->vars [renamed_var].renamed_ssa_fixed); + int renamed_var_ext = td->vars [renamed_var].ext_index; + if (!td->renamed_fixed_vars [renamed_var_ext].live_out_bblocks) { + gpointer mem = mono_mempool_alloc0 (td->opt_mempool, mono_bitset_alloc_size (td->bblocks_count_no_eh, 0)); + td->renamed_fixed_vars [renamed_var_ext].live_out_bblocks = mono_bitset_mem_new (mem, td->bblocks_count_no_eh, 0); + } + + mono_bitset_set_fast (td->renamed_fixed_vars [renamed_var_ext].live_out_bblocks, bb->dfs_index); + } + } + } + + // Pop from the stack any new vars defined in this bblock + for (ins = bb->first_ins; ins != NULL; ins = ins->next) { + if (mono_interp_op_dregs [ins->opcode]) { + int ext_index = td->vars [ins->dreg].ext_index; + if (ext_index == -1) + continue; + if (td->vars [ins->dreg].renamed_ssa_fixed) + ext_index = td->renamed_fixed_vars [ext_index].renamable_var_ext_index; + GSList *prev_head = td->renamable_vars [ext_index].ssa_stack; + td->renamable_vars [ext_index].ssa_stack = prev_head->next; + g_free (prev_head); + } else if (ins->opcode == MINT_DEAD_PHI) { + unsigned int ext_index; + mono_bitset_foreach_bit (ins->info.dead_phi_vars, ext_index, td->renamable_vars_size) { + if (td->renamable_vars [ext_index].ssa_fixed) { + GSList *prev_head = td->renamable_vars [ext_index].ssa_stack; + td->renamable_vars [ext_index].ssa_stack = prev_head->next; + g_free (prev_head); + } + } + interp_clear_ins (ins); + } + } + +} + +static void +rename_vars (TransformData *td) +{ + int next_stack_index = 0; + InterpBasicBlock **stack = (InterpBasicBlock**)g_malloc0 (sizeof (InterpBasicBlock*) * td->bblocks_count_no_eh); + gboolean *bb_status = (gboolean*)g_malloc0 (sizeof (InterpBasicBlock*) * td->bblocks_count_no_eh); + + stack [next_stack_index++] = td->entry_bb; + + while (next_stack_index > 0) { + next_stack_index--; + InterpBasicBlock *bb = stack [next_stack_index]; + + if (!bb_status [bb->dfs_index]) { + rename_vars_in_bb_start (td, bb); + bb_status [bb->dfs_index] = TRUE; + stack [next_stack_index++] = bb; + + // Rename recursively every successor of bb in the dominator tree + GSList *dominated = bb->dominated; + while (dominated) { + InterpBasicBlock *dominated_bb = (InterpBasicBlock*)dominated->data; + g_assert (!bb_status [dominated_bb->dfs_index]); + stack [next_stack_index++] = dominated_bb; + dominated = dominated->next; + } + } else { + // We reach this entry after all the successors have been processed + rename_vars_in_bb_end (td, bb); + } + } + + g_free (stack); + g_free (bb_status); + + if (td->verbose_level) { + g_print ("\nFIXED SSA VARS LIVENESS LIMIT:\n"); + for (unsigned int i = 0; i < td->renamed_fixed_vars_size; i++) { + g_print ("FIXED VAR %d\n\tNO LIVE LIMIT BBLOCKS: {", td->renamed_fixed_vars [i].var_index); + MonoBitSet *live_out_bblocks = td->renamed_fixed_vars [i].live_out_bblocks; + if (live_out_bblocks) { + int j; + mono_bitset_foreach_bit (live_out_bblocks, j, td->bblocks_count_no_eh) { + g_print (" BB%d", td->bblocks [j]->index); + } + } + g_print (" }\n"); + g_print ("\tLIVE LIMIT BBLOCKS: {"); + GSList *live_limit_bblocks = td->renamed_fixed_vars [i].live_limit_bblocks; + while (live_limit_bblocks) { + InterpLivenessPosition *live_limit = (InterpLivenessPosition*)live_limit_bblocks->data; + g_print (" (BB%d, %d)", td->bblocks [live_limit->bb_dfs_index]->index, live_limit->ins_index); + live_limit_bblocks = live_limit_bblocks->next; + } + g_print (" }\n"); + } + } +} + +static void +interp_compute_ssa (TransformData *td) +{ + if (td->verbose_level) { + g_print ("\nIR before SSA compute:\n"); + mono_interp_print_td_code (td); + } + + MONO_TIME_TRACK (mono_interp_stats.ssa_compute_dominance_time, interp_compute_dominance (td)); + + interp_compute_ssa_vars (td); + + MONO_TIME_TRACK (mono_interp_stats.ssa_compute_global_vars_time, interp_compute_global_vars (td)); + + MONO_TIME_TRACK (mono_interp_stats.ssa_compute_pruned_liveness_time, interp_compute_pruned_ssa_liveness (td)); + + insert_tiering_defs (td); + + insert_phi_nodes (td); + + MONO_TIME_TRACK (mono_interp_stats.ssa_rename_vars_time, rename_vars (td)); + + if (td->verbose_level) { + g_print ("\nIR after SSA compute:\n"); + mono_interp_print_td_code (td); + } +} + +static void +revert_ssa_rename_cb (TransformData *td, int *pvar, gpointer data) +{ + int var = *pvar; + int ext_index = td->vars [var].ext_index; + if (ext_index == -1) + return; + + int new_var = -1; + if (td->vars [var].renamed_ssa_fixed) { + int renamable_var_ext_index = td->renamed_fixed_vars [ext_index].renamable_var_ext_index; + new_var = td->renamable_vars [renamable_var_ext_index].var_index; + } else if (td->vars [var].def_arg) { + new_var = td->renamable_vars [ext_index].var_index; + } + + if (new_var != -1) { + *pvar = new_var; + // Offset allocator checks ref_count to detect single use vars. Keep it updated + td->var_values [new_var].ref_count += td->var_values [var].ref_count; + } +} + +static void +interp_exit_ssa (TransformData *td) +{ + // Remove all MINT_PHI opcodes and revert ssa renaming + for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + InterpInst *ins; + for (ins = bb->first_ins; ins != NULL; ins = ins->next) { + if (ins->opcode == MINT_PHI || ins->opcode == MINT_DEAD_PHI) + ins->opcode = MINT_NOP; + else + interp_foreach_ins_var (td, ins, NULL, revert_ssa_rename_cb); + + ins->flags &= ~INTERP_INST_FLAG_LIVENESS_MARKER; + } + } + + // Free memory and restore state for (unsigned int i = 0; i < td->vars_size; i++) { - // These are allocated separately at the end of the stack - if (td->vars [i].call_args) { - td->vars [i].offset += td->param_area_offset; - final_total_locals_size = MAX (td->vars [i].offset + td->vars [i].size, final_total_locals_size); + if (td->vars [i].declare_bbs) { + g_slist_free (td->vars [i].declare_bbs); + td->vars [i].declare_bbs = NULL; } + td->vars [i].ext_index = -1; } - td->total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT); -} - -static GString* -interp_get_bb_links (InterpBasicBlock *bb) -{ - GString *str = g_string_new (""); - if (bb->in_count) { - g_string_append_printf (str, "IN (%d", bb->in_bb [0]->index); - for (int i = 1; i < bb->in_count; i++) - g_string_append_printf (str, " %d", bb->in_bb [i]->index); - g_string_append_printf (str, "), "); - } else { - g_string_append_printf (str, "IN (nil), "); + for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + if (bb->dominated) { + g_slist_free (bb->dominated); + bb->dominated = NULL; + } + bb->gen_set = NULL; + bb->kill_set = NULL; + bb->live_in_set = NULL; + bb->live_out_set = NULL; } - if (bb->out_count) { - g_string_append_printf (str, "OUT (%d", bb->out_bb [0]->index); - for (int i = 1; i < bb->out_count; i++) - g_string_append_printf (str, " %d", bb->out_bb [i]->index); - g_string_append_printf (str, ")"); - } else { - g_string_append_printf (str, "OUT (nil)"); + for (unsigned int i = 0; i < td->renamable_vars_size; i++) { + if (td->renamable_vars [i].ssa_stack) { + g_slist_free (td->renamable_vars [i].ssa_stack); + td->renamable_vars [i].ssa_stack = NULL; + } } + td->renamable_vars_size = 0; - return str; + for (unsigned int i = 0; i < td->renamed_fixed_vars_size; i++) { + if (td->renamed_fixed_vars [i].live_limit_bblocks) { + g_slist_free (td->renamed_fixed_vars [i].live_limit_bblocks); + td->renamed_fixed_vars [i].live_limit_bblocks = NULL; + } + } + td->renamed_fixed_vars_size = 0; } +/* + * BASIC BLOCK OPTIMIZATION + */ + static void mark_bb_as_dead (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock *replace_bb) { @@ -544,6 +1574,8 @@ mark_bb_as_dead (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock *repl break; } + if (bb->dominated) + g_slist_free (bb->dominated); bb->dead = TRUE; // bb should never be used/referenced after this } @@ -618,28 +1650,35 @@ interp_unlink_bblocks (InterpBasicBlock *from, InterpBasicBlock *to) to->in_count--; } -static gboolean -interp_remove_bblock (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock *prev_bb) +static void +interp_handle_unreachable_bblock (TransformData *td, InterpBasicBlock *bb) { - gboolean needs_cprop = FALSE; - for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { if (ins->opcode == MINT_LDLOCA_S) { td->vars [ins->sregs [0]].indirects--; if (!td->vars [ins->sregs [0]].indirects) { - // We can do cprop now through this local. Run cprop again. - needs_cprop = TRUE; + if (td->verbose_level) + g_print ("Remove bblock %d, var %d no longer indirect\n", bb->index, ins->sregs [0]); + td->need_optimization_retry = TRUE; } } + + // If preserve is set, even if we know this bblock is unreachable, we still have to keep + // it alive (for now at least). We just remove all instructions from it in this case. + if (bb->preserve) + interp_clear_ins (ins); } +} + +static void +interp_remove_bblock (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock *prev_bb) +{ while (bb->in_count) interp_unlink_bblocks (bb->in_bb [0], bb); while (bb->out_count) interp_unlink_bblocks (bb, bb->out_bb [0]); prev_bb->next_bb = bb->next_bb; mark_bb_as_dead (td, bb, bb->next_bb); - - return needs_cprop; } void @@ -683,26 +1722,21 @@ interp_link_bblocks (TransformData *td, InterpBasicBlock *from, InterpBasicBlock static void interp_mark_reachable_bblocks (TransformData *td) { - InterpBasicBlock **queue = mono_mempool_alloc0 (td->mempool, td->bb_count * sizeof (InterpBasicBlock*)); + InterpBasicBlock **queue = g_malloc0 (td->bb_count * sizeof (InterpBasicBlock*)); InterpBasicBlock *current; int cur_index = 0; int next_position = 0; - // FIXME There is no need to force eh bblocks to remain alive current = td->entry_bb; while (current != NULL) { - if (current->eh_block || current->patchpoint_data) { - queue [next_position++] = current; - current->reachable = TRUE; - } else { - current->reachable = FALSE; - } + current->reachable = FALSE; current = current->next_bb; } queue [next_position++] = td->entry_bb; td->entry_bb->reachable = TRUE; +retry: // We have the roots, traverse everything else while (cur_index < next_position) { current = queue [cur_index++]; @@ -714,6 +1748,25 @@ interp_mark_reachable_bblocks (TransformData *td) } } } + + if (td->header->num_clauses) { + gboolean needs_retry = FALSE; + current = td->entry_bb; + while (current != NULL) { + if (current->try_bblock && !current->reachable && current->try_bblock->reachable) { + // Try bblock is reachable and the handler is not yet marked + queue [next_position++] = current; + current->reachable = TRUE; + needs_retry = TRUE; + } + current = current->next_bb; + } + + if (needs_retry) + goto retry; + } + + g_free (queue); } /** @@ -804,7 +1857,13 @@ interp_reorder_bblocks (TransformData *td) { InterpBasicBlock *bb; for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { - if (bb->eh_block) + if (bb->preserve) + continue; + // We do optimizations below where we reduce the in count of bb, but it is ideal to have + // this bblock remain alive so we can correctly resolve mapping from unoptimized method. + // We could in theory address this and attempt to remove bb, but this scenario is extremely + // rare and doesn't seem worth the investment. + if (bb->patchpoint_data) continue; InterpInst *first = interp_first_ins (bb); if (!first) @@ -910,11 +1969,10 @@ interp_reorder_bblocks (TransformData *td) } // Traverse the list of basic blocks and merge adjacent blocks -static gboolean +static void interp_optimize_bblocks (TransformData *td) { InterpBasicBlock *bb = td->entry_bb; - gboolean needs_cprop = FALSE; interp_reorder_bblocks (td); @@ -925,82 +1983,79 @@ interp_optimize_bblocks (TransformData *td) if (!next_bb) break; if (!next_bb->reachable) { - if (td->verbose_level) - g_print ("Removed BB%d\n", next_bb->index); - needs_cprop |= interp_remove_bblock (td, next_bb, bb); - continue; - } else if (bb->out_count == 1 && bb->out_bb [0] == next_bb && next_bb->in_count == 1 && !next_bb->eh_block && !next_bb->patchpoint_data) { + interp_handle_unreachable_bblock (td, next_bb); + if (next_bb->preserve) { + if (td->verbose_level) + g_print ("Removed BB%d, cleared instructions only\n", next_bb->index); + } else { + if (td->verbose_level) + g_print ("Removed BB%d\n", next_bb->index); + interp_remove_bblock (td, next_bb, bb); + continue; + } + } else if (bb->out_count == 1 && bb->out_bb [0] == next_bb && next_bb->in_count == 1 && !next_bb->preserve && !next_bb->patchpoint_data) { g_assert (next_bb->in_bb [0] == bb); interp_merge_bblocks (td, bb, next_bb); if (td->verbose_level) g_print ("Merged BB%d and BB%d\n", bb->index, next_bb->index); - needs_cprop = TRUE; continue; } bb = next_bb; } - return needs_cprop; } -static gboolean -interp_local_deadce (TransformData *td) +static void +decrement_ref_count (TransformData *td, int *varp, gpointer data) { - int *local_ref_count = td->local_ref_count; - gboolean needs_dce = FALSE; - gboolean needs_cprop = FALSE; + int var = *varp; + td->var_values [var].ref_count--; + // FIXME we could clear recursively + if (!td->var_values [var].ref_count) + *(gboolean*)data = TRUE; +} - for (unsigned int i = 0; i < td->vars_size; i++) { - g_assert (local_ref_count [i] >= 0); - g_assert (td->vars [i].indirects >= 0); - if (td->vars [i].indirects || td->vars [i].dead) - continue; - if (!local_ref_count [i]) { - needs_dce = TRUE; - td->vars [i].dead = TRUE; - } else if (!td->vars [i].unknown_use) { - if (!td->vars [i].local_only) { - // The value of this var is not passed between multiple basic blocks - td->vars [i].local_only = TRUE; - if (td->verbose_level) - g_print ("Var %d is local only\n", i); - needs_cprop = TRUE; - } - } - td->vars [i].unknown_use = FALSE; - } +static void +interp_var_deadce (TransformData *td) +{ + gboolean need_retry; - // Return early if all locals are alive - if (!needs_dce) - return needs_cprop; +retry: + need_retry = FALSE; - // Kill instructions that don't use stack and are storing into dead locals + // Kill instructions that are storing into unreferenced vars for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { if (MINT_NO_SIDE_EFFECTS (ins->opcode) || ins->opcode == MINT_LDLOCA_S) { int dreg = ins->dreg; - if (td->vars [dreg].dead) { + if (var_has_indirects (td, dreg)) + continue; + + if (!td->var_values [dreg].ref_count) { if (td->verbose_level) { g_print ("kill dead ins:\n\t"); interp_dump_ins (ins, td->data_items); } - if (ins->opcode == MINT_LDLOCA_S) { td->vars [ins->sregs [0]].indirects--; if (!td->vars [ins->sregs [0]].indirects) { - // We can do cprop now through this local. Run cprop again. - needs_cprop = TRUE; + if (td->verbose_level) + g_print ("Kill ldloca, var %d no longer indirect\n", ins->sregs [0]); + td->need_optimization_retry = TRUE; } } + + interp_foreach_ins_svar (td, ins, &need_retry, decrement_ref_count); + interp_clear_ins (ins); - // FIXME This is lazy. We should update the ref count for the sregs and redo deadce. - needs_cprop = TRUE; } } } } - return needs_cprop; + + if (need_retry) + goto retry; } static InterpInst* @@ -1071,15 +2126,16 @@ interp_get_mt_for_ldind (int ldind_op) break; static InterpInst* -interp_fold_unop (TransformData *td, InterpVarValue *local_defs, InterpInst *ins) +interp_fold_unop (TransformData *td, InterpInst *ins) { - int *local_ref_count = td->local_ref_count; // ins should be an unop, therefore it should have a single dreg and a single sreg int dreg = ins->dreg; int sreg = ins->sregs [0]; - InterpVarValue *val = &local_defs [sreg]; + InterpVarValue *val = get_var_value (td, sreg); InterpVarValue result; + if (!val) + return ins; if (val->type != VAR_VALUE_I4 && val->type != VAR_VALUE_I8) return ins; @@ -1151,10 +2207,10 @@ interp_fold_unop (TransformData *td, InterpVarValue *local_defs, InterpInst *ins interp_dump_ins (ins, td->data_items); } - local_ref_count [sreg]--; - result.ins = ins; - result.ref_count = 0; - local_defs [dreg] = result; + td->var_values [sreg].ref_count--; + result.def = ins; + result.ref_count = td->var_values [dreg].ref_count; // preserve ref count + td->var_values [dreg] = result; return ins; } @@ -1174,13 +2230,14 @@ interp_fold_unop (TransformData *td, InterpVarValue *local_defs, InterpInst *ins break; static InterpInst* -interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpVarValue *local_defs, InterpInst *ins) +interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpInst *ins) { - int *local_ref_count = td->local_ref_count; // ins should be an unop conditional branch, therefore it should have a single sreg int sreg = ins->sregs [0]; - InterpVarValue *val = &local_defs [sreg]; + InterpVarValue *val = get_var_value (td, sreg); + if (!val) + return ins; if (val->type != VAR_VALUE_I4 && val->type != VAR_VALUE_I8 && val->type != VAR_VALUE_NON_NULL) return ins; @@ -1212,7 +2269,7 @@ interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpVarVal interp_dump_ins (ins, td->data_items); } - local_ref_count [sreg]--; + td->var_values [sreg].ref_count--; return ins; } @@ -1243,19 +2300,20 @@ interp_fold_unop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpVarVal static InterpInst* -interp_fold_binop (TransformData *td, InterpVarValue *local_defs, InterpInst *ins, gboolean *folded) +interp_fold_binop (TransformData *td, InterpInst *ins, gboolean *folded) { - int *local_ref_count = td->local_ref_count; // ins should be a binop, therefore it should have a single dreg and two sregs int dreg = ins->dreg; int sreg1 = ins->sregs [0]; int sreg2 = ins->sregs [1]; - InterpVarValue *val1 = &local_defs [sreg1]; - InterpVarValue *val2 = &local_defs [sreg2]; + InterpVarValue *val1 = get_var_value (td, sreg1); + InterpVarValue *val2 = get_var_value (td, sreg2); InterpVarValue result; *folded = FALSE; + if (!val1 || !val2) + return ins; if (val1->type != VAR_VALUE_I4 && val1->type != VAR_VALUE_I8) return ins; if (val2->type != VAR_VALUE_I4 && val2->type != VAR_VALUE_I8) @@ -1339,11 +2397,12 @@ interp_fold_binop (TransformData *td, InterpVarValue *local_defs, InterpInst *in interp_dump_ins (ins, td->data_items); } - local_ref_count [sreg1]--; - local_ref_count [sreg2]--; - result.ins = ins; - result.ref_count = 0; - local_defs [dreg] = result; + td->var_values [sreg1].ref_count--; + td->var_values [sreg2].ref_count--; + result.def = ins; + result.ref_count = td->var_values [dreg].ref_count; // preserve ref count + td->var_values [dreg] = result; + return ins; } @@ -1366,15 +2425,16 @@ interp_fold_binop (TransformData *td, InterpVarValue *local_defs, InterpInst *in break; static InterpInst* -interp_fold_binop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpVarValue *local_defs, InterpInst *ins) +interp_fold_binop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpInst *ins) { - int *local_ref_count = td->local_ref_count; // ins should be a conditional binop, therefore it should have only two sregs int sreg1 = ins->sregs [0]; int sreg2 = ins->sregs [1]; - InterpVarValue *val1 = &local_defs [sreg1]; - InterpVarValue *val2 = &local_defs [sreg2]; + InterpVarValue *val1 = get_var_value (td, sreg1); + InterpVarValue *val2 = get_var_value (td, sreg2); + if (!val1 || !val2) + return ins; if (val1->type != VAR_VALUE_I4 && val1->type != VAR_VALUE_I8) return ins; if (val2->type != VAR_VALUE_I4 && val2->type != VAR_VALUE_I8) @@ -1411,8 +2471,8 @@ interp_fold_binop_cond_br (TransformData *td, InterpBasicBlock *cbb, InterpVarVa interp_dump_ins (ins, td->data_items); } - local_ref_count [sreg1]--; - local_ref_count [sreg2]--; + td->var_values [sreg1].ref_count--; + td->var_values [sreg2].ref_count--; return ins; } @@ -1432,15 +2492,15 @@ write_v128_element (gpointer v128_addr, InterpVarValue *val, int index, int el_s } static InterpInst* -interp_fold_simd_create (TransformData *td, InterpBasicBlock *cbb, InterpVarValue *local_defs, InterpInst *ins) +interp_fold_simd_create (TransformData *td, InterpBasicBlock *cbb, InterpInst *ins) { - int *local_ref_count = td->local_ref_count; - int *args = ins->info.call_info->call_args; int index = 0; int var = args [index]; while (var != -1) { - InterpVarValue *val = &local_defs [var]; + InterpVarValue *val = get_var_value (td, var); + if (!val) + return ins; if (val->type != VAR_VALUE_I4 && val->type != VAR_VALUE_I8 && val->type != VAR_VALUE_R4) return ins; index++; @@ -1461,10 +2521,9 @@ interp_fold_simd_create (TransformData *td, InterpBasicBlock *cbb, InterpVarValu index = 0; var = args [index]; while (var != -1) { - InterpVarValue *val = &local_defs [var]; + InterpVarValue *val = &td->var_values [var]; write_v128_element (v128_addr, val, index, el_size); val->ref_count--; - local_ref_count [var]--; index++; var = args [index]; } @@ -1474,106 +2533,200 @@ interp_fold_simd_create (TransformData *td, InterpBasicBlock *cbb, InterpVarValu interp_dump_ins (ins, td->data_items); } - local_defs [dreg].ins = ins; - local_defs [dreg].type = VAR_VALUE_NONE; + td->var_values [dreg].def = ins; + td->var_values [dreg].type = VAR_VALUE_NONE; return ins; } -static void -cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, InterpVarValue *local_defs) +static gboolean +can_extend_ssa_var_liveness (TransformData *td, int var, InterpLivenessPosition cur_liveness) { - int *local_ref_count = td->local_ref_count; - int sreg = *psreg; + if (!td->vars [var].renamed_ssa_fixed) + return TRUE; + + InterpRenamedFixedVar *fixed_var_ext = &td->renamed_fixed_vars [td->vars [var].ext_index]; + + // If var was already live at the end of this bblocks, there is no liveness extension happening + if (fixed_var_ext->live_out_bblocks && mono_bitset_test_fast (fixed_var_ext->live_out_bblocks, cur_liveness.bb_dfs_index)) + return TRUE; + + GSList *bb_liveness = fixed_var_ext->live_limit_bblocks; + while (bb_liveness) { + InterpLivenessPosition *liveness_limit = (InterpLivenessPosition*)bb_liveness->data; + if (cur_liveness.bb_dfs_index == liveness_limit->bb_dfs_index) { + if (cur_liveness.ins_index <= liveness_limit->ins_index) + return TRUE; + else + return FALSE; + } else { + bb_liveness = bb_liveness->next; + } + } - local_ref_count [sreg]++; - local_defs [sreg].ref_count++; - if (local_defs [sreg].type == VAR_VALUE_OTHER_VAR) { - int cprop_local = local_defs [sreg].var; + return FALSE; +} - // We are trying to replace sregs [i] with its def local (cprop_local), but cprop_local has since been - // modified, so we can't use it. - if (local_defs [cprop_local].ins != NULL && local_defs [cprop_local].def_index > local_defs [sreg].def_index) - return; +// We are attempting to extend liveness of var to cur_liveness (propagate its use). +// We know that var was still alive at the point of original_liveness. +// cur_liveness is in td->cbb +static gboolean +can_extend_var_liveness (TransformData *td, int var, InterpLivenessPosition original_liveness, InterpLivenessPosition cur_liveness) +{ + if (var_is_ssa_form (td, var)) { + // If var is fixed ssa, we can extend liveness if it doesn't overlap with other renamed + // vars. If var is normal ssa, we can extend its liveness with no constraints + return can_extend_ssa_var_liveness (td, var, cur_liveness); + } else { + gboolean original_in_curbb = original_liveness.bb_dfs_index == td->cbb->dfs_index; + if (!original_in_curbb) { + // var is not in ssa form and we only track its value within a single bblock. + // The original liveness information is not in cbb and, by the time we get to cbb, + // its value could be different so we can't use it. + return FALSE; + } else { + InterpVarValue *var_val = get_var_value (td, var); + if (!var_val) { + // We know that var is alive at original_liveness, which is in cbb, and that + // the var has not been defined yet in cbb, meaning its value was not overwritten + // and we can use it. + return TRUE; + } else { + // We know that var is alive at original_liveness, which is in cbb, and that + // the var has been redefined in cbb. We can extend its liveness to cur_liveness, + // only if it hasn't been redefined between original and cur liveness. + g_assert (var_val->liveness.bb_dfs_index == original_liveness.bb_dfs_index); + return var_val->liveness.ins_index < original_liveness.ins_index; + } + } + } +} +static void +replace_svar_use (TransformData *td, int *pvar, gpointer data) +{ + int *var_pair = (int*)data; + int old_var = var_pair [0]; + if (*pvar == old_var) { + int new_var = var_pair [1]; + td->var_values [old_var].ref_count--; + td->var_values [new_var].ref_count++; + *pvar = new_var; if (td->verbose_level) - g_print ("cprop %d -> %d:\n\t", sreg, cprop_local); - local_ref_count [sreg]--; - *psreg = cprop_local; - local_ref_count [cprop_local]++; - if (td->verbose_level) - interp_dump_ins (ins, td->data_items); - } else if (!local_defs [sreg].ins) { - td->vars [sreg].unknown_use = TRUE; + g_print ("\treplace svar use: %d -> %d\n", old_var, new_var); } } static void -clear_local_defs (TransformData *td, int *pvar, void *data) +replace_svar_uses (TransformData *td, InterpInst *first, InterpInst *last, int old_var, int new_var) { - int var = *pvar; - InterpVarValue *local_defs = (InterpVarValue*) data; - local_defs [var].type = VAR_VALUE_NONE; - local_defs [var].ins = NULL; - local_defs [var].ref_count = 0; + int *var_pair = alloca (2 * sizeof (int)); + var_pair [0] = old_var; + var_pair [1] = new_var; + for (InterpInst *ins = first; ins != last; ins = ins->next) + interp_foreach_ins_svar (td, ins, var_pair, replace_svar_use); } static void -clear_unused_defs (TransformData *td, int *pvar, void *data) +cprop_svar (TransformData *td, InterpInst *ins, int *pvar, InterpLivenessPosition current_liveness) { int var = *pvar; - if (!td->vars [var].local_only) - return; - if (td->vars [var].indirects) + if (var_has_indirects (td, var)) return; - InterpVarValue *local_def = &((InterpVarValue*) data) [var]; - InterpInst *def_ins = local_def->ins; - if (!def_ins) - return; - if (local_def->ref_count) - return; + InterpVarValue *val = get_var_value (td, var); + if (val && val->type == VAR_VALUE_OTHER_VAR) { + // var <- cprop_var; + // .... + // use var; + int cprop_var = val->var; + if (td->vars [var].renamed_ssa_fixed && !td->vars [cprop_var].renamed_ssa_fixed) { + // ssa fixed vars are likely to live, keep using them + val->ref_count++; + } else if (can_extend_var_liveness (td, cprop_var, val->liveness, current_liveness)) { + if (td->verbose_level) + g_print ("cprop %d -> %d:\n\t", var, cprop_var); + td->var_values [cprop_var].ref_count++; + *pvar = cprop_var; + if (td->verbose_level) + interp_dump_ins (ins, td->data_items); + } else { + val->ref_count++; + } + } else { + td->var_values [var].ref_count++; + } - // This is a local only var that is defined in this bblock and its value is not used - // at all in this bblock. Clear the definition - if (MINT_NO_SIDE_EFFECTS (def_ins->opcode)) { - for (int i = 0; i < mono_interp_op_sregs [def_ins->opcode]; i++) - td->local_ref_count [def_ins->sregs [i]]--; - if (td->verbose_level) { - g_print ("kill unused local def:\n\t"); - interp_dump_ins (def_ins, td->data_items); + // Mark the last use for a renamable fixed var + var = *pvar; + if (td->vars [var].renamed_ssa_fixed) { + int ext_index = td->renamed_fixed_vars [td->vars [var].ext_index].renamable_var_ext_index; + td->renamable_vars [ext_index].last_use_liveness = current_liveness; + } +} + +static gboolean +can_cprop_dreg (TransformData *td, InterpInst *mov_ins) +{ + int dreg = mov_ins->dreg; + int sreg = mov_ins->sregs [0]; + + // sreg = def + // mov sreg -> dreg + + InterpVarValue *sreg_val = get_var_value (td, sreg); + if (!sreg_val) + return FALSE; + // We only apply this optimization if the definition is in the same bblock as this use + if (sreg_val->liveness.bb_dfs_index != td->cbb->dfs_index) + return FALSE; + if (td->var_values [sreg].def->opcode == MINT_DEF_ARG) + return FALSE; + if (sreg_val->def->flags & INTERP_INST_FLAG_PROTECTED_NEWOBJ) + return FALSE; + // reordering moves might break conversions + if (td->vars [dreg].mt != td->vars [sreg].mt) + return FALSE; + + if (var_is_ssa_form (td, sreg)) { + // check if dreg is a renamed ssa fixed var (likely to remain alive) + if (td->vars [dreg].renamed_ssa_fixed && !td->vars [sreg].renamed_ssa_fixed) { + InterpLivenessPosition last_use_liveness = td->renamable_vars [td->renamed_fixed_vars [td->vars [dreg].ext_index].renamable_var_ext_index].last_use_liveness; + if (last_use_liveness.bb_dfs_index != td->cbb->dfs_index || + sreg_val->liveness.ins_index >= last_use_liveness.ins_index) { + // No other conflicting renamed fixed vars (of dreg) are used in this bblock, or their + // last use predates the definition. This means we can tweak def of sreg to store directly + // into dreg and patch all intermediary instructions to use dreg instead. + return TRUE; + } } - interp_clear_ins (def_ins); + } else if (!var_is_ssa_form (td, dreg)) { + // Neither sreg nor dreg are in SSA form. IL globals are likely to remain alive + // We ensure that stores to no SSA vars, that are il globals, are not reordered. + // For simplicity, we apply the optimization only if the def and move are adjacent. + if (td->vars [dreg].il_global && !td->vars [sreg].il_global && mov_ins == interp_next_ins (sreg_val->def)) + return TRUE; } + + return FALSE; } static void interp_cprop (TransformData *td) { - InterpVarValue *local_defs = (InterpVarValue*) g_malloc (td->vars_size * sizeof (InterpVarValue)); - int *local_ref_count = (int*) g_malloc (td->vars_size * sizeof (int)); - InterpBasicBlock *bb; - gboolean needs_retry; - int ins_index; - int iteration_count = 0; - - td->local_ref_count = local_ref_count; -retry: - needs_retry = FALSE; - memset (local_ref_count, 0, td->vars_size * sizeof (int)); - if (td->verbose_level) - g_print ("\ncprop iteration %d\n", iteration_count++); - - for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { - InterpInst *ins; - ins_index = 0; + g_print ("\nCPROP:\n"); - // Set cbb since we do some instruction inserting below - td->cbb = bb; + // FIXME + // There is no need to zero, if we pay attention to phi args vars. They + // can be used before the definition. + td->var_values = (InterpVarValue*) g_malloc0 (td->vars_size * sizeof (InterpVarValue)); - for (ins = bb->first_ins; ins != NULL; ins = ins->next) - interp_foreach_ins_var (td, ins, local_defs, clear_local_defs); + // Traverse in dfs order. This guarantees that we always reach the definition first before the + // use of the var. Exception is only for phi nodes, where we don't care about the definition + // anyway. + for (int bb_dfs_index = 0; bb_dfs_index < td->bblocks_count_eh; bb_dfs_index++) { + InterpBasicBlock *bb = td->bblocks [bb_dfs_index]; if (td->verbose_level) { GString* bb_info = interp_get_bb_links (bb); @@ -1581,58 +2734,69 @@ interp_cprop (TransformData *td) g_string_free (bb_info, TRUE); } - for (ins = bb->first_ins; ins != NULL; ins = ins->next) { - int opcode = ins->opcode; - - if (opcode == MINT_NOP) + InterpLivenessPosition current_liveness; + current_liveness.bb_dfs_index = bb->dfs_index; + current_liveness.ins_index = 0; + // Set cbb since we do some instruction inserting below + td->cbb = bb; + for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { + int opcode, num_sregs, num_dregs; + gint32 *sregs; + gint32 dreg; + // LIVENESS_MARKER is set only for non-eh bblocks + if (bb->dfs_index >= td->bblocks_count_no_eh || bb->dfs_index == -1 || (ins->flags & INTERP_INST_FLAG_LIVENESS_MARKER)) + current_liveness.ins_index++; + + if (interp_ins_is_nop (ins)) continue; - int num_sregs = mono_interp_op_sregs [opcode]; - int num_dregs = mono_interp_op_dregs [opcode]; - gint32 *sregs = &ins->sregs [0]; - gint32 dreg = ins->dreg; +retry_instruction: + opcode = ins->opcode; + num_sregs = mono_interp_op_sregs [opcode]; + num_dregs = mono_interp_op_dregs [opcode]; + sregs = &ins->sregs [0]; + dreg = ins->dreg; - if (td->verbose_level && ins->opcode != MINT_NOP && ins->opcode != MINT_IL_SEQ_POINT) + if (td->verbose_level) interp_dump_ins (ins, td->data_items); - for (int i = 0; i < num_sregs; i++) { - if (sregs [i] == MINT_CALL_ARGS_SREG) { - if (ins->info.call_info && ins->info.call_info->call_args) { - int *call_args = ins->info.call_info->call_args; - while (*call_args != -1) { - cprop_sreg (td, ins, call_args, local_defs); - call_args++; + if (opcode == MINT_DEF_TIER_VAR) { + // We can't do any var propagation into this instruction since it will be deleted + // dreg and sreg should always be identical, a ssa fixed var. + td->var_values [sregs [0]].ref_count++; + } else if (num_sregs) { + for (int i = 0; i < num_sregs; i++) { + if (sregs [i] == MINT_CALL_ARGS_SREG) { + if (ins->info.call_info && ins->info.call_info->call_args) { + int *call_args = ins->info.call_info->call_args; + while (*call_args != -1) { + cprop_svar (td, ins, call_args, current_liveness); + call_args++; + } } + } else { + cprop_svar (td, ins, &sregs [i], current_liveness); } - } else { - cprop_sreg (td, ins, &sregs [i], local_defs); + } + } else if (opcode == MINT_PHI) { + // no cprop but add ref counts + int *args = ins->info.args; + while (*args != -1) { + td->var_values [*args].ref_count++; + args++; } } if (num_dregs) { - // Check if the previous definition of this var was used at all. - // If it wasn't we can just clear the instruction - // - // MINT_MOV_DST_OFF doesn't fully write to the var, so we special case it here - if (local_defs [dreg].ins != NULL && - local_defs [dreg].ref_count == 0 && - !td->vars [dreg].indirects && - opcode != MINT_MOV_DST_OFF) { - InterpInst *prev_def = local_defs [dreg].ins; - if (MINT_NO_SIDE_EFFECTS (prev_def->opcode)) { - for (int i = 0; i < mono_interp_op_sregs [prev_def->opcode]; i++) - local_ref_count [prev_def->sregs [i]]--; - interp_clear_ins (prev_def); - } - } - local_defs [dreg].type = VAR_VALUE_NONE; - local_defs [dreg].ins = ins; - local_defs [dreg].def_index = ins_index; + InterpVarValue *dval = &td->var_values [dreg]; + dval->type = VAR_VALUE_NONE; + dval->def = ins; + dval->liveness = current_liveness; } // We always store to the full i4, except as part of STIND opcodes. These opcodes can be // applied to a local var only if that var has LDLOCA applied to it - if ((opcode >= MINT_MOV_I4_I1 && opcode <= MINT_MOV_I4_U2) && !td->vars [sregs [0]].indirects) { + if ((opcode >= MINT_MOV_I4_I1 && opcode <= MINT_MOV_I4_U2) && !var_has_indirects (td, sregs [0])) { ins->opcode = MINT_MOV_4; opcode = MINT_MOV_4; } @@ -1643,140 +2807,138 @@ interp_cprop (TransformData *td) if (td->verbose_level) g_print ("clear redundant mov\n"); interp_clear_ins (ins); - local_ref_count [sreg]--; - } else if (td->vars [sreg].indirects || td->vars [dreg].indirects) { + td->var_values [sreg].ref_count--; + } else if (var_has_indirects (td, sreg) || var_has_indirects (td, dreg)) { // Don't bother with indirect locals - } else if (local_defs [sreg].type == VAR_VALUE_I4 || local_defs [sreg].type == VAR_VALUE_I8) { + } else if (get_var_value_type (td, sreg) == VAR_VALUE_I4 || get_var_value_type (td, sreg) == VAR_VALUE_I8) { // Replace mov with ldc - gboolean is_i4 = local_defs [sreg].type == VAR_VALUE_I4; - g_assert (!td->vars [sreg].indirects); - local_defs [dreg].type = local_defs [sreg].type; + gboolean is_i4 = td->var_values [sreg].type == VAR_VALUE_I4; + td->var_values [dreg].type = td->var_values [sreg].type; if (is_i4) { - int ct = local_defs [sreg].i; + int ct = td->var_values [sreg].i; ins = interp_get_ldc_i4_from_const (td, ins, ct, dreg); - local_defs [dreg].i = ct; + td->var_values [dreg].i = ct; } else { - gint64 ct = local_defs [sreg].l; + gint64 ct = td->var_values [sreg].l; ins = interp_inst_replace_with_i8_const (td, ins, ct); - local_defs [dreg].l = ct; + td->var_values [dreg].l = ct; } - local_defs [dreg].ins = ins; - local_ref_count [sreg]--; + td->var_values [dreg].def = ins; + td->var_values [sreg].ref_count--; if (td->verbose_level) { g_print ("cprop loc %d -> ct :\n\t", sreg); interp_dump_ins (ins, td->data_items); } - } else if (local_defs [sreg].ins != NULL && - td->vars [sreg].execution_stack && - !td->vars [dreg].execution_stack && - interp_prev_ins (ins) == local_defs [sreg].ins && - !(interp_prev_ins (ins)->flags & INTERP_INST_FLAG_PROTECTED_NEWOBJ)) { - // hackish temporary optimization that won't be necessary in the future - // We replace `local1 <- ?, local2 <- local1` with `local2 <- ?, local1 <- local2` - // if local1 is execution stack local and local2 is normal global local. This makes - // it more likely for `local1 <- local2` to be killed, while before we always needed - // to store to the global local, which is likely accessed by other instructions. - InterpInst *def = local_defs [sreg].ins; - int original_dreg = def->dreg; - - def->dreg = dreg; - ins->dreg = original_dreg; - sregs [0] = dreg; - - local_defs [dreg].type = VAR_VALUE_NONE; - local_defs [dreg].ins = def; - local_defs [dreg].def_index = local_defs [original_dreg].def_index; - local_defs [dreg].ref_count++; - local_defs [original_dreg].type = VAR_VALUE_OTHER_VAR; - local_defs [original_dreg].ins = ins; - local_defs [original_dreg].var = dreg; - local_defs [original_dreg].def_index = ins_index; - local_defs [original_dreg].ref_count--; - - local_ref_count [original_dreg]--; - local_ref_count [dreg]++; + } else if (can_cprop_dreg (td, ins)) { + int dreg_ref_count = td->var_values [dreg].ref_count; + td->var_values [dreg] = td->var_values [sreg]; + td->var_values [dreg].ref_count = dreg_ref_count; + td->var_values [dreg].def->dreg = dreg; if (td->verbose_level) { - g_print ("cprop dreg:\n\t"); - interp_dump_ins (def, td->data_items); + g_print ("cprop fixed dreg %d:\n\t", dreg); + interp_dump_ins (td->var_values [dreg].def, td->data_items); + } + // Overwrite all uses of sreg with dreg up to this point + replace_svar_uses (td, td->var_values [dreg].def->next, ins, sreg, dreg); + + // Transform `mov dreg <- sreg` into `mov sreg <- dreg` in case sreg is still used + ins->dreg = sreg; + ins->sregs [0] = dreg; + td->var_values [dreg].ref_count++; + td->var_values [sreg].ref_count--; + + td->var_values [sreg].def = ins; + td->var_values [sreg].type = VAR_VALUE_OTHER_VAR; + td->var_values [sreg].var = dreg; + td->var_values [sreg].liveness = current_liveness; + if (td->verbose_level) { g_print ("\t"); interp_dump_ins (ins, td->data_items); } } else { if (td->verbose_level) g_print ("local copy %d <- %d\n", dreg, sreg); - local_defs [dreg].type = VAR_VALUE_OTHER_VAR; - local_defs [dreg].var = sreg; + td->var_values [dreg].type = VAR_VALUE_OTHER_VAR; + td->var_values [dreg].var = sreg; } } else if (opcode == MINT_LDLOCA_S) { // The local that we are taking the address of is not a sreg but still referenced - local_ref_count [ins->sregs [0]]++; + td->var_values [ins->sregs [0]].ref_count++; } else if (MINT_IS_LDC_I4 (opcode)) { - local_defs [dreg].type = VAR_VALUE_I4; - local_defs [dreg].i = interp_get_const_from_ldc_i4 (ins); + td->var_values [dreg].type = VAR_VALUE_I4; + td->var_values [dreg].i = interp_get_const_from_ldc_i4 (ins); } else if (MINT_IS_LDC_I8 (opcode)) { - local_defs [dreg].type = VAR_VALUE_I8; - local_defs [dreg].l = interp_get_const_from_ldc_i8 (ins); + td->var_values [dreg].type = VAR_VALUE_I8; + td->var_values [dreg].l = interp_get_const_from_ldc_i8 (ins); } else if (opcode == MINT_LDC_R4) { guint32 val_u = READ32 (&ins->data [0]); float f = *(float*)(&val_u); - local_defs [dreg].type = VAR_VALUE_R4; - local_defs [dreg].f = f; + td->var_values [dreg].type = VAR_VALUE_R4; + td->var_values [dreg].f = f; } else if (ins->opcode == MINT_LDPTR) { #if SIZEOF_VOID_P == 8 - local_defs [dreg].type = VAR_VALUE_I8; - local_defs [dreg].l = (gint64)td->data_items [ins->data [0]]; + td->var_values [dreg].type = VAR_VALUE_I8; + td->var_values [dreg].l = (gint64)td->data_items [ins->data [0]]; #else - local_defs [dreg].type = VAR_VALUE_I4; - local_defs [dreg].i = (gint32)td->data_items [ins->data [0]]; + td->var_values [dreg].type = VAR_VALUE_I4; + td->var_values [dreg].i = (gint32)td->data_items [ins->data [0]]; #endif } else if (MINT_IS_UNOP (opcode)) { - ins = interp_fold_unop (td, local_defs, ins); + ins = interp_fold_unop (td, ins); } else if (MINT_IS_UNOP_CONDITIONAL_BRANCH (opcode)) { - ins = interp_fold_unop_cond_br (td, bb, local_defs, ins); + ins = interp_fold_unop_cond_br (td, bb, ins); } else if (MINT_IS_SIMD_CREATE (opcode)) { - ins = interp_fold_simd_create (td, bb, local_defs, ins); + ins = interp_fold_simd_create (td, bb, ins); } else if (MINT_IS_BINOP (opcode)) { gboolean folded; - ins = interp_fold_binop (td, local_defs, ins, &folded); + ins = interp_fold_binop (td, ins, &folded); if (!folded) { int sreg = -1; guint16 mov_op = 0; - if ((opcode == MINT_MUL_I4 || opcode == MINT_DIV_I4) && - local_defs [ins->sregs [1]].type == VAR_VALUE_I4 && - local_defs [ins->sregs [1]].i == 1) { - sreg = ins->sregs [0]; - mov_op = MINT_MOV_4; - } else if ((opcode == MINT_MUL_I8 || opcode == MINT_DIV_I8) && - local_defs [ins->sregs [1]].type == VAR_VALUE_I8 && - local_defs [ins->sregs [1]].l == 1) { - sreg = ins->sregs [0]; - mov_op = MINT_MOV_8; - } else if (opcode == MINT_MUL_I4 && - local_defs [ins->sregs [0]].type == VAR_VALUE_I4 && - local_defs [ins->sregs [0]].i == 1) { - sreg = ins->sregs [1]; - mov_op = MINT_MOV_4; - } else if (opcode == MINT_MUL_I8 && - local_defs [ins->sregs [0]].type == VAR_VALUE_I8 && - local_defs [ins->sregs [0]].l == 1) { - sreg = ins->sregs [1]; - mov_op = MINT_MOV_8; + InterpVarValue *vv0 = get_var_value (td, ins->sregs [0]); + InterpVarValue *vv1 = get_var_value (td, ins->sregs [1]); + if (vv1) { + if ((opcode == MINT_MUL_I4 || opcode == MINT_DIV_I4) && + vv1->type == VAR_VALUE_I4 && + vv1->i == 1) { + sreg = ins->sregs [0]; + mov_op = MINT_MOV_4; + } else if ((opcode == MINT_MUL_I8 || opcode == MINT_DIV_I8) && + vv1->type == VAR_VALUE_I8 && + vv1->l == 1) { + sreg = ins->sregs [0]; + mov_op = MINT_MOV_8; + } + } else if (vv0) { + if (opcode == MINT_MUL_I4 && + vv0->type == VAR_VALUE_I4 && + vv0->i == 1) { + sreg = ins->sregs [1]; + mov_op = MINT_MOV_4; + } else if (opcode == MINT_MUL_I8 && + vv0->type == VAR_VALUE_I8 && + vv0->l == 1) { + sreg = ins->sregs [1]; + mov_op = MINT_MOV_8; + } } if (sreg != -1) { + td->var_values [ins->sregs [0]].ref_count--; + td->var_values [ins->sregs [1]].ref_count--; ins->opcode = mov_op; ins->sregs [0] = sreg; if (td->verbose_level) { g_print ("Replace idempotent binop :\n\t"); interp_dump_ins (ins, td->data_items); } - needs_retry = TRUE; + goto retry_instruction; } } } else if (MINT_IS_BINOP_CONDITIONAL_BRANCH (opcode)) { - ins = interp_fold_binop_cond_br (td, bb, local_defs, ins); + ins = interp_fold_binop_cond_br (td, bb, ins); } else if (MINT_IS_LDIND (opcode)) { - InterpInst *ldloca = local_defs [sregs [0]].ins; + InterpInst *ldloca = get_var_value_def (td, sregs [0]); if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) { int local = ldloca->sregs [0]; int mt = td->vars [local].mt; @@ -1794,23 +2956,22 @@ interp_cprop (TransformData *td) ins->opcode = GINT_TO_OPCODE (interp_get_mov_for_type (ldind_mt, FALSE)); break; } - local_ref_count [sregs [0]]--; + td->var_values [sregs [0]].ref_count--; interp_ins_set_sreg (ins, local); if (td->verbose_level) { g_print ("Replace ldloca/ldind pair :\n\t"); interp_dump_ins (ins, td->data_items); } - needs_retry = TRUE; } } } else if (MINT_IS_LDFLD (opcode)) { - InterpInst *ldloca = local_defs [sregs [0]].ins; + InterpInst *ldloca = get_var_value_def (td, sregs [0]); if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) { int mt = ins->opcode - MINT_LDFLD_I1; int local = ldloca->sregs [0]; // Allow ldloca instruction to be killed - local_ref_count [sregs [0]]--; + td->var_values [sregs [0]].ref_count--; if (td->vars [local].mt == (ins->opcode - MINT_LDFLD_I1) && ins->data [0] == 0) { // Replace LDLOCA + LDFLD with LDLOC, when the loading field represents // the entire local. This is the case with loading the only field of an @@ -1835,16 +2996,16 @@ interp_cprop (TransformData *td) ins->data [2] = ldsize; interp_clear_ins (ins->prev); + td->var_values [ins->dreg].def = ins; } if (td->verbose_level) { g_print ("Replace ldloca/ldfld pair :\n\t"); interp_dump_ins (ins, td->data_items); } - needs_retry = TRUE; } } else if (opcode == MINT_INITOBJ) { - InterpInst *ldloca = local_defs [sregs [0]].ins; + InterpInst *ldloca = get_var_value_def (td, sregs [0]); if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) { int size = ins->data [0]; int local = ldloca->sregs [0]; @@ -1855,27 +3016,25 @@ interp_cprop (TransformData *td) ins->opcode = MINT_LDC_I8_0; else ins->opcode = MINT_INITLOCAL; - local_ref_count [sregs [0]]--; + td->var_values [sregs [0]].ref_count--; ins->dreg = local; if (td->verbose_level) { g_print ("Replace ldloca/initobj pair :\n\t"); interp_dump_ins (ins, td->data_items); } - needs_retry = TRUE; } } else if (opcode == MINT_LDOBJ_VT) { - InterpInst *ldloca = local_defs [sregs [0]].ins; + InterpInst *ldloca = get_var_value_def (td, sregs [0]); if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) { int ldsize = ins->data [0]; int local = ldloca->sregs [0]; - local_ref_count [sregs [0]]--; + td->var_values [sregs [0]].ref_count--; if (ldsize == td->vars [local].size) { // Replace LDLOCA + LDOBJ_VT with MOV_VT ins->opcode = MINT_MOV_VT; sregs [0] = local; - needs_retry = TRUE; } else { // This loads just a part of the local valuetype ins = interp_insert_ins (td, ins, MINT_MOV_SRC_OFF); @@ -1893,18 +3052,17 @@ interp_cprop (TransformData *td) } } } else if (opcode == MINT_STOBJ_VT || opcode == MINT_STOBJ_VT_NOREF) { - InterpInst *ldloca = local_defs [sregs [0]].ins; + InterpInst *ldloca = get_var_value_def (td, sregs [0]); if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) { int stsize = ins->data [0]; int local = ldloca->sregs [0]; if (stsize == td->vars [local].size) { // Replace LDLOCA + STOBJ_VT with MOV_VT - local_ref_count [sregs [0]]--; + td->var_values [sregs [0]].ref_count--; ins->opcode = MINT_MOV_VT; sregs [0] = sregs [1]; ins->dreg = local; - needs_retry = TRUE; if (td->verbose_level) { g_print ("Replace ldloca/stobj_vt pair :\n\t"); @@ -1913,13 +3071,13 @@ interp_cprop (TransformData *td) } } } else if (MINT_IS_STIND (opcode)) { - InterpInst *ldloca = local_defs [sregs [0]].ins; + InterpInst *ldloca = get_var_value_def (td, sregs [0]); if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) { int local = ldloca->sregs [0]; int mt = td->vars [local].mt; if (mt != MINT_TYPE_VT) { // We have an 8 byte local, just replace the stind with a mov - local_ref_count [sregs [0]]--; + td->var_values [sregs [0]].ref_count--; // We make the assumption that the STIND matches the local type ins->opcode = GINT_TO_OPCODE (interp_get_mov_for_type (mt, TRUE)); interp_ins_set_dreg (ins, local); @@ -1929,15 +3087,14 @@ interp_cprop (TransformData *td) g_print ("Replace ldloca/stind pair :\n\t"); interp_dump_ins (ins, td->data_items); } - needs_retry = TRUE; } } } else if (MINT_IS_STFLD (opcode)) { - InterpInst *ldloca = local_defs [sregs [0]].ins; + InterpInst *ldloca = get_var_value_def (td, sregs [0]); if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) { int mt = ins->opcode - MINT_STFLD_I1; int local = ldloca->sregs [0]; - local_ref_count [sregs [0]]--; + td->var_values [sregs [0]].ref_count--; // Allow ldloca instruction to be killed if (td->vars [local].mt == (ins->opcode - MINT_STFLD_I1) && ins->data [0] == 0) { ins->opcode = GINT_TO_OPCODE (interp_get_mov_for_type (mt, FALSE)); @@ -1962,57 +3119,49 @@ interp_cprop (TransformData *td) // This stores just to part of the dest valuetype ins = interp_insert_ins (td, ins, MINT_MOV_DST_OFF); interp_ins_set_dreg (ins, local); - interp_ins_set_sreg (ins, sregs [1]); + interp_ins_set_sregs2 (ins, sregs [1], local); ins->data [0] = GINT_TO_UINT16 (foffset); ins->data [1] = GINT_TO_UINT16 (mt); ins->data [2] = vtsize; interp_clear_ins (ins->prev); + + // MINT_MOV_DST_OFF doesn't work if dreg is allocated at the same location as the + // field value to be stored, because its behavior is not atomic in nature. We first + // copy the original whole vt, potentially overwritting the new field value. + ins = interp_insert_ins (td, ins, MINT_DUMMY_USE); + interp_ins_set_sreg (ins, sregs [1]); + td->var_values [sregs [1]].ref_count++; } if (td->verbose_level) { g_print ("Replace ldloca/stfld pair (off %p) :\n\t", (void *)(uintptr_t) ldloca->il_offset); interp_dump_ins (ins, td->data_items); } - needs_retry = TRUE; } } else if (opcode == MINT_GETITEM_SPAN) { - InterpInst *ldloca = local_defs [sregs [0]].ins; + InterpInst *ldloca = get_var_value_def (td, sregs [0]); if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) { int local = ldloca->sregs [0]; // Allow ldloca instruction to be killed - local_ref_count [sregs [0]]--; + td->var_values [sregs [0]].ref_count--; // Instead of loading from the indirect pointer pass directly the vt var ins->opcode = MINT_GETITEM_LOCALSPAN; sregs [0] = local; - needs_retry = TRUE; } } else if (opcode == MINT_CKNULL) { - InterpInst *def = local_defs [sregs [0]].ins; + InterpInst *def = get_var_value_def (td, sregs [0]); if (def && def->opcode == MINT_LDLOCA_S) { // CKNULL on LDLOCA is a NOP ins->opcode = MINT_MOV_P; - needs_retry = TRUE; + td->var_values [ins->sregs [0]].ref_count--; + goto retry_instruction; } } else if (opcode == MINT_BOX) { // TODO Add more relevant opcodes - local_defs [dreg].type = VAR_VALUE_NON_NULL; + td->var_values [dreg].type = VAR_VALUE_NON_NULL; } - - ins_index++; } - - for (ins = bb->first_ins; ins != NULL; ins = ins->next) - interp_foreach_ins_var (td, ins, local_defs, clear_unused_defs); } - - needs_retry |= interp_local_deadce (td); - if (mono_interp_opt & INTERP_OPT_BBLOCKS) - needs_retry |= interp_optimize_bblocks (td); - - if (needs_retry) - goto retry; - - g_free (local_defs); } void @@ -2024,8 +3173,13 @@ mono_test_interp_cprop (TransformData *td) static gboolean get_sreg_imm (TransformData *td, int sreg, gint16 *imm, int result_mt) { - InterpInst *def = td->vars [sreg].def; - if (def != NULL && td->local_ref_count [sreg] == 1) { + if (var_has_indirects (td, sreg)) + return FALSE; + InterpInst *def = get_var_value_def (td, sreg); + if (!def) + return FALSE; + InterpVarValue *sreg_val = &td->var_values [sreg]; + if (sreg_val->ref_count == 1) { gint64 ct; if (MINT_IS_LDC_I4 (def->opcode)) ct = interp_get_const_from_ldc_i4 (def); @@ -2133,42 +3287,79 @@ get_unop_condbr_sp (int opcode) } } +// We have the pattern of: +// +// var <- def (v1, v2, ..) +// ... +// use var +// +// We want to optimize out `var <- def` and replace `use var` with `use v1, v2, ...` in a super instruction. +// This can be done only if var is used only once (otherwise `var <- def` will remain alive and in the +// superinstruction we duplicate the calculation of var) and v1, v2, .. can have their liveness extended +// to the current liveness +static gboolean +can_propagate_var_def (TransformData *td, int var, InterpLivenessPosition cur_liveness) +{ + InterpVarValue *val = get_var_value (td, var); + if (!val) + return FALSE; + if (val->ref_count != 1) + return FALSE; + + InterpInst *def = val->def; + int num_sregs = mono_interp_op_sregs [def->opcode]; + + for (int i = 0; i < num_sregs; i++) { + int svar = def->sregs [i]; + if (svar == MINT_CALL_ARGS_SREG) + return FALSE; // We don't care for these in super instructions + + if (!can_extend_var_liveness (td, svar, val->liveness, cur_liveness)) + return FALSE; + } + return TRUE; +} + static void interp_super_instructions (TransformData *td) { - InterpBasicBlock *bb; - int *local_ref_count = td->local_ref_count; - interp_compute_native_offset_estimates (td); // Add some actual super instructions - for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { - InterpInst *ins; - int noe; + for (int bb_dfs_index = 0; bb_dfs_index < td->bblocks_count_eh; bb_dfs_index++) { + InterpBasicBlock *bb = td->bblocks [bb_dfs_index]; // Set cbb since we do some instruction inserting below td->cbb = bb; - noe = bb->native_offset_estimate; - for (ins = bb->first_ins; ins != NULL; ins = ins->next) { + int noe = bb->native_offset_estimate; + InterpLivenessPosition current_liveness; + current_liveness.bb_dfs_index = bb->dfs_index; + current_liveness.ins_index = 0; + for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { int opcode = ins->opcode; + if (bb->dfs_index >= td->bblocks_count_no_eh || bb->dfs_index == -1 || (ins->flags & INTERP_INST_FLAG_LIVENESS_MARKER)) + current_liveness.ins_index++; if (MINT_IS_NOP (opcode)) continue; - if (mono_interp_op_dregs [opcode] && !td->vars [ins->dreg].global) - td->vars [ins->dreg].def = ins; + if (mono_interp_op_dregs [opcode] && !var_is_ssa_form (td, ins->dreg) && !var_has_indirects (td, ins->dreg)) { + InterpVarValue *dval = &td->var_values [ins->dreg]; + dval->type = VAR_VALUE_NONE; + dval->def = ins; + dval->liveness = current_liveness; + } if (opcode == MINT_RET || (opcode >= MINT_RET_I1 && opcode <= MINT_RET_U2)) { // ldc + ret -> ret.imm int sreg = ins->sregs [0]; gint16 imm; if (get_sreg_imm (td, sreg, &imm, (opcode == MINT_RET) ? MINT_TYPE_I2 : opcode - MINT_RET_I1)) { - InterpInst *def = td->vars [sreg].def; + InterpInst *def = td->var_values [sreg].def; int ret_op = MINT_IS_LDC_I4 (def->opcode) ? MINT_RET_I4_IMM : MINT_RET_I8_IMM; InterpInst *new_inst = interp_insert_ins (td, ins, ret_op); new_inst->data [0] = imm; interp_clear_ins (def); interp_clear_ins (ins); - local_ref_count [sreg]--; - + td->var_values [sreg].ref_count--; // 0 if (td->verbose_level) { g_print ("superins: "); interp_dump_ins (new_inst, td->data_items); @@ -2199,9 +3390,10 @@ interp_super_instructions (TransformData *td) new_inst->dreg = ins->dreg; new_inst->sregs [0] = sreg; new_inst->data [0] = imm; - interp_clear_ins (td->vars [sreg_imm].def); + interp_clear_ins (td->var_values [sreg_imm].def); interp_clear_ins (ins); - local_ref_count [sreg_imm]--; + td->var_values [sreg_imm].ref_count--; // 0 + td->var_values [new_inst->dreg].def = new_inst; if (td->verbose_level) { g_print ("superins: "); interp_dump_ins (new_inst, td->data_items); @@ -2217,9 +3409,10 @@ interp_super_instructions (TransformData *td) new_inst->dreg = ins->dreg; new_inst->sregs [0] = ins->sregs [0]; new_inst->data [0] = -imm; - interp_clear_ins (td->vars [sreg_imm].def); + interp_clear_ins (td->var_values [sreg_imm].def); interp_clear_ins (ins); - local_ref_count [sreg_imm]--; + td->var_values [sreg_imm].ref_count--; // 0 + td->var_values [new_inst->dreg].def = new_inst; if (td->verbose_level) { g_print ("superins: "); interp_dump_ins (new_inst, td->data_items); @@ -2227,8 +3420,8 @@ interp_super_instructions (TransformData *td) } } else if (opcode == MINT_MUL_I4_IMM || opcode == MINT_MUL_I8_IMM) { int sreg = ins->sregs [0]; - InterpInst *def = td->vars [sreg].def; - if (def != NULL && td->local_ref_count [sreg] == 1) { + if (can_propagate_var_def (td, sreg, current_liveness)) { + InterpInst *def = get_var_value_def (td, sreg); gboolean is_i4 = opcode == MINT_MUL_I4_IMM; if ((is_i4 && def->opcode == MINT_ADD_I4_IMM) || (!is_i4 && def->opcode == MINT_ADD_I8_IMM)) { @@ -2239,7 +3432,8 @@ interp_super_instructions (TransformData *td) new_inst->data [1] = ins->data [0]; interp_clear_ins (def); interp_clear_ins (ins); - local_ref_count [sreg]--; + td->var_values [sreg].ref_count--; // 0 + td->var_values [new_inst->dreg].def = new_inst; if (td->verbose_level) { g_print ("superins: "); interp_dump_ins (new_inst, td->data_items); @@ -2256,17 +3450,18 @@ interp_super_instructions (TransformData *td) new_inst->dreg = ins->dreg; new_inst->sregs [0] = ins->sregs [0]; new_inst->data [0] = imm; - interp_clear_ins (td->vars [sreg_imm].def); + interp_clear_ins (td->var_values [sreg_imm].def); interp_clear_ins (ins); - local_ref_count [sreg_imm]--; + td->var_values [sreg_imm].ref_count--; // 0 + td->var_values [new_inst->dreg].def = new_inst; if (td->verbose_level) { g_print ("superins: "); interp_dump_ins (new_inst, td->data_items); } } else if (opcode == MINT_SHL_I4 || opcode == MINT_SHL_I8) { int amount_var = ins->sregs [1]; - InterpInst *amount_def = td->vars [amount_var].def; - if (amount_def != NULL && td->local_ref_count [amount_var] == 1 && amount_def->opcode == MINT_AND_I4) { + InterpInst *amount_def = get_var_value_def (td, amount_var); + if (amount_def != NULL && td->var_values [amount_var].ref_count == 1 && amount_def->opcode == MINT_AND_I4) { int mask_var = amount_def->sregs [1]; if (get_sreg_imm (td, mask_var, &imm, MINT_TYPE_I2)) { // ldc + and + shl -> shl_and_imm @@ -2282,10 +3477,11 @@ interp_super_instructions (TransformData *td) new_inst->sregs [0] = ins->sregs [0]; new_inst->sregs [1] = amount_def->sregs [0]; - local_ref_count [amount_var]--; - local_ref_count [mask_var]--; + td->var_values [amount_var].ref_count--; // 0 + td->var_values [mask_var].ref_count--; // 0 + td->var_values [new_inst->dreg].def = new_inst; - interp_clear_ins (td->vars [mask_var].def); + interp_clear_ins (td->var_values [mask_var].def); interp_clear_ins (amount_def); interp_clear_ins (ins); if (td->verbose_level) { @@ -2299,8 +3495,8 @@ interp_super_instructions (TransformData *td) } else if (opcode == MINT_DIV_UN_I4 || opcode == MINT_DIV_UN_I8) { // ldc + div.un -> shr.imm int sreg_imm = ins->sregs [1]; - InterpInst *def = td->vars [sreg_imm].def; - if (def != NULL && td->local_ref_count [sreg_imm] == 1) { + InterpInst *def = get_var_value_def (td, sreg_imm); + if (def != NULL && td->var_values [sreg_imm].ref_count == 1) { int power2 = -1; if (MINT_IS_LDC_I4 (def->opcode)) { guint32 ct = interp_get_const_from_ldc_i4 (def); @@ -2322,7 +3518,8 @@ interp_super_instructions (TransformData *td) interp_clear_ins (def); interp_clear_ins (ins); - local_ref_count [sreg_imm]--; + td->var_values [sreg_imm].ref_count--; + td->var_values [new_inst->dreg].def = new_inst; if (td->verbose_level) { g_print ("lower div.un: "); interp_dump_ins (new_inst, td->data_items); @@ -2331,8 +3528,8 @@ interp_super_instructions (TransformData *td) } } else if (MINT_IS_LDIND_INT (opcode)) { int sreg_base = ins->sregs [0]; - InterpInst *def = td->vars [sreg_base].def; - if (def != NULL && td->local_ref_count [sreg_base] == 1) { + if (can_propagate_var_def (td, sreg_base, current_liveness)) { + InterpInst *def = get_var_value_def (td, sreg_base); InterpInst *new_inst = NULL; if (def->opcode == MINT_ADD_P) { int ldind_offset_op = MINT_LDIND_OFFSET_I1 + (opcode - MINT_LDIND_I1); @@ -2350,7 +3547,8 @@ interp_super_instructions (TransformData *td) if (new_inst) { interp_clear_ins (def); interp_clear_ins (ins); - local_ref_count [sreg_base]--; + td->var_values [sreg_base].ref_count--; + td->var_values [new_inst->dreg].def = new_inst; if (td->verbose_level) { g_print ("superins: "); interp_dump_ins (new_inst, td->data_items); @@ -2359,8 +3557,8 @@ interp_super_instructions (TransformData *td) } } else if (MINT_IS_LDIND_OFFSET (opcode)) { int sreg_off = ins->sregs [1]; - InterpInst *def = td->vars [sreg_off].def; - if (def != NULL && td->local_ref_count [sreg_off] == 1) { + if (can_propagate_var_def (td, sreg_off, current_liveness)) { + InterpInst *def = get_var_value_def (td, sreg_off); if (def->opcode == MINT_MUL_P_IMM || def->opcode == MINT_ADD_P_IMM || def->opcode == MINT_ADD_MUL_P_IMM) { int ldind_offset_op = MINT_LDIND_OFFSET_ADD_MUL_IMM_I1 + (opcode - MINT_LDIND_OFFSET_I1); InterpInst *new_inst = interp_insert_ins (td, ins, ldind_offset_op); @@ -2386,17 +3584,18 @@ interp_super_instructions (TransformData *td) interp_clear_ins (def); interp_clear_ins (ins); - local_ref_count [sreg_off]--; + td->var_values [sreg_off].ref_count--; // 0 + td->var_values [new_inst->dreg].def = new_inst; if (td->verbose_level) { - g_print ("method %s:%s, superins: ", m_class_get_name (td->method->klass), td->method->name); + g_print ("superins: "); interp_dump_ins (new_inst, td->data_items); } } } } else if (MINT_IS_STIND_INT (opcode)) { int sreg_base = ins->sregs [0]; - InterpInst *def = td->vars [sreg_base].def; - if (def != NULL && td->local_ref_count [sreg_base] == 1) { + if (can_propagate_var_def (td, sreg_base, current_liveness)) { + InterpInst *def = get_var_value_def (td, sreg_base); InterpInst *new_inst = NULL; if (def->opcode == MINT_ADD_P) { int stind_offset_op = MINT_STIND_OFFSET_I1 + (opcode - MINT_STIND_I1); @@ -2414,7 +3613,7 @@ interp_super_instructions (TransformData *td) if (new_inst) { interp_clear_ins (def); interp_clear_ins (ins); - local_ref_count [sreg_base]--; + td->var_values [sreg_base].ref_count--; if (td->verbose_level) { g_print ("superins: "); interp_dump_ins (new_inst, td->data_items); @@ -2427,16 +3626,16 @@ interp_super_instructions (TransformData *td) // when inlining property accessors. We should have more advanced cknull removal // optimzations, so we can catch cases where instructions are not next to each other. int obj_sreg = ins->sregs [0]; - InterpInst *def = td->vars [obj_sreg].def; + InterpInst *def = get_var_value_def (td, obj_sreg); if (def != NULL && def->opcode == MINT_CKNULL && interp_prev_ins (ins) == def && - def->dreg == obj_sreg && local_ref_count [obj_sreg] == 1) { + def->dreg == obj_sreg && td->var_values [obj_sreg].ref_count == 1) { if (td->verbose_level) { - g_print ("remove redundant cknull (%s): ", td->method->name); + g_print ("remove redundant cknull: "); interp_dump_ins (def, td->data_items); } ins->sregs [0] = def->sregs [0]; interp_clear_ins (def); - local_ref_count [obj_sreg]--; + td->var_values [obj_sreg].ref_count--; } } else if (MINT_IS_BINOP_CONDITIONAL_BRANCH (opcode) && interp_is_short_offset (noe, ins->info.target_bb->native_offset_estimate)) { gint16 imm; @@ -2452,9 +3651,9 @@ interp_super_instructions (TransformData *td) new_ins->sregs [0] = ins->sregs [0]; new_ins->data [0] = imm; new_ins->info.target_bb = ins->info.target_bb; - interp_clear_ins (td->vars [sreg_imm].def); + interp_clear_ins (td->var_values [sreg_imm].def); interp_clear_ins (ins); - local_ref_count [sreg_imm]--; + td->var_values [sreg_imm].ref_count--; // 0 if (td->verbose_level) { g_print ("superins: "); interp_dump_ins (new_ins, td->data_items); @@ -2478,8 +3677,8 @@ interp_super_instructions (TransformData *td) if (opcode == MINT_BRFALSE_I4 || opcode == MINT_BRTRUE_I4) { gboolean negate = opcode == MINT_BRFALSE_I4; int cond_sreg = ins->sregs [0]; - InterpInst *def = td->vars [cond_sreg].def; - if (def != NULL && local_ref_count [cond_sreg] == 1) { + if (can_propagate_var_def (td, cond_sreg, current_liveness)) { + InterpInst *def = get_var_value_def (td, cond_sreg); int replace_opcode = -1; switch (def->opcode) { case MINT_CEQ_I4: replace_opcode = negate ? MINT_BNE_UN_I4 : MINT_BEQ_I4; break; @@ -2513,7 +3712,7 @@ interp_super_instructions (TransformData *td) if (def->opcode != MINT_CEQ0_I4) ins->sregs [1] = def->sregs [1]; interp_clear_ins (def); - local_ref_count [cond_sreg]--; + td->var_values [cond_sreg].ref_count--; if (td->verbose_level) { g_print ("superins: "); interp_dump_ins (ins, td->data_items); @@ -2538,8 +3737,8 @@ interp_super_instructions (TransformData *td) } } else if (opcode == MINT_STOBJ_VT_NOREF) { int sreg_src = ins->sregs [1]; - InterpInst *def = td->vars [sreg_src].def; - if (def != NULL && interp_prev_ins (ins) == def && def->opcode == MINT_LDOBJ_VT && ins->data [0] == def->data [0] && td->local_ref_count [sreg_src] == 1) { + InterpInst *def = get_var_value_def (td, sreg_src); + if (def != NULL && interp_prev_ins (ins) == def && def->opcode == MINT_LDOBJ_VT && ins->data [0] == def->data [0] && td->var_values [sreg_src].ref_count == 1) { InterpInst *new_inst = interp_insert_ins (td, ins, MINT_CPOBJ_VT_NOREF); new_inst->sregs [0] = ins->sregs [0]; // dst new_inst->sregs [1] = def->sregs [0]; // src @@ -2547,34 +3746,175 @@ interp_super_instructions (TransformData *td) interp_clear_ins (def); interp_clear_ins (ins); - local_ref_count [sreg_src]--; + td->var_values [sreg_src].ref_count--; if (td->verbose_level) { g_print ("superins: "); interp_dump_ins (new_inst, td->data_items); } } + } else if (opcode == MINT_MOV_4 || opcode == MINT_MOV_8 || opcode == MINT_MOV_VT) { + int sreg = ins->sregs [0]; + InterpInst *def = get_var_value_def (td, sreg); + if (def && td->var_values [sreg].ref_count == 1) { + // The svar is used only for this mov. Try to get the definition to store directly instead + if (def->opcode != MINT_DEF_ARG && def->opcode != MINT_PHI && def->opcode != MINT_DEF_TIER_VAR && + !(def->flags & INTERP_INST_FLAG_PROTECTED_NEWOBJ)) { + int dreg = ins->dreg; + // if var is not ssa or it is a renamed fixed, then we can't replace the dreg + // since there can be conflicting liveness, unless the instructions are adjacent + if ((var_is_ssa_form (td, dreg) && !td->vars [dreg].renamed_ssa_fixed) || + interp_prev_ins (ins) == def) { + def->dreg = dreg; + + // Copy var value, while keeping the ref count intact + int dreg_ref_count = td->var_values [dreg].ref_count; + td->var_values [dreg] = td->var_values [sreg]; + td->var_values [dreg].ref_count = dreg_ref_count; + + // clear the move + td->var_values [sreg].ref_count--; // 0 + interp_clear_ins (ins); + + if (td->verbose_level) { + g_print ("forward dreg: "); + interp_dump_ins (def, td->data_items); + } + } + } + } } noe += interp_get_ins_length (ins); } } } +static void +interp_prepare_no_ssa_opt (TransformData *td) +{ + for (unsigned int i = 0; i < td->vars_size; i++) { + td->vars [i].no_ssa = TRUE; + td->vars [i].has_indirects = (td->vars [i].indirects > 0) ? TRUE : FALSE; + } + + td->bblocks = (InterpBasicBlock**)mono_mempool_alloc0 (td->opt_mempool, sizeof (InterpBasicBlock*) * td->bb_count); + + int i = 0; + for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + td->bblocks [i] = bb; + bb->dfs_index = i; + i++; + } + td->bblocks_count_no_eh = 0; + td->bblocks_count_eh = i; +} + +static void +interp_remove_ins (InterpBasicBlock *bb, InterpInst *ins) +{ + if (ins->next) + ins->next->prev = ins->prev; + else + bb->last_ins = ins->prev; + + if (ins->prev) + ins->prev->next = ins->next; + else + bb->first_ins = ins->next; +} + +static void +interp_remove_nops (TransformData *td) +{ + InterpBasicBlock *bb; + for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + InterpInst *ins; + for (ins = bb->first_ins; ins != NULL; ins = ins->next) { + if (ins->opcode == MINT_NOP && ins->prev && + (ins->il_offset == -1 || + ins->prev->il_offset == ins->il_offset)) { + // This is a NOP instruction that has no relevant il_offset, actually remove it + interp_remove_ins (bb, ins); + } + + } + } +} + void interp_optimize_code (TransformData *td) { if (mono_interp_opt & INTERP_OPT_BBLOCKS) - interp_optimize_bblocks (td); + MONO_TIME_TRACK (mono_interp_stats.optimize_bblocks_time, interp_optimize_bblocks (td)); - if (mono_interp_opt & INTERP_OPT_CPROP) - MONO_TIME_TRACK (mono_interp_stats.cprop_time, interp_cprop (td)); + // Nothing to optimize if we don't have cprop enabled + if (!(mono_interp_opt & INTERP_OPT_CPROP)) + return; - // After this point control optimizations on control flow can no longer happen, so we can determine - // which vars are global. This helps speed up the super instructions pass, which only operates on - // single def, single use local vars. - initialize_global_vars (td); + if (!(mono_interp_opt & INTERP_OPT_SSA)) + td->disable_ssa = TRUE; + + gboolean ssa_enabled_retry = FALSE; + + if (!td->disable_ssa && td->bb_count > 1000) { + // We have ssa enabled but we are compiling a huge method. Do the first iteration + // in ssa disabled mode. This should greatly simplify the CFG and the code, so the + // following iteration with SSA transformation enabled is much faster. In general, + // for huge methods we end up doing multiple optimization iterations anyway. + ssa_enabled_retry = TRUE; + td->disable_ssa = TRUE; + if (td->verbose_level) + g_print ("Huge method. SSA disabled for first iteration\n"); + } +optimization_retry: + if (td->opt_mempool != NULL) + mono_mempool_destroy (td->opt_mempool); + if (td->var_values != NULL) { + g_free (td->var_values); + td->var_values = NULL; + } + td->opt_mempool = mono_mempool_new (); + + td->need_optimization_retry = FALSE; - if ((mono_interp_opt & INTERP_OPT_SUPER_INSTRUCTIONS) && - (mono_interp_opt & INTERP_OPT_CPROP)) + if (td->disable_ssa) + interp_prepare_no_ssa_opt (td); + else + MONO_TIME_TRACK (mono_interp_stats.ssa_compute_time, interp_compute_ssa (td)); + + MONO_TIME_TRACK (mono_interp_stats.cprop_time, interp_cprop (td)); + + interp_var_deadce (td); + + // We run this after var deadce to detect more single use vars. This pass will clear + // unnecessary instruction on the fly so deadce is no longer needed to run. + if (mono_interp_opt & INTERP_OPT_SUPER_INSTRUCTIONS) MONO_TIME_TRACK (mono_interp_stats.super_instructions_time, interp_super_instructions (td)); + + if (!td->disable_ssa) + interp_exit_ssa (td); + + interp_remove_nops (td); + + if (mono_interp_opt & INTERP_OPT_BBLOCKS) + MONO_TIME_TRACK (mono_interp_stats.optimize_bblocks_time, interp_optimize_bblocks (td)); + + if (ssa_enabled_retry) { + ssa_enabled_retry = FALSE; + td->disable_ssa = FALSE; + if (td->verbose_level) + g_print ("Retry optimization with SSA enabled\n"); + goto optimization_retry; + } else if (td->need_optimization_retry) { + if (td->verbose_level) + g_print ("Retry optimization\n"); + goto optimization_retry; + } + + mono_mempool_destroy (td->opt_mempool); + + if (td->verbose_level) { + g_print ("\nOptimized IR:\n"); + mono_interp_print_td_code (td); + } } diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 6f4a46e0b653bf..30488e00622fac 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -349,6 +349,64 @@ mono_mint_type (MonoType *type) return -1; } +// This marks the var as renamable, allocating space for additional data. +// The original var data (InterpVar) will have an index that points to this +// additional data. +int +interp_make_var_renamable (TransformData *td, int var) +{ + // Check if already allocated + if (td->vars [var].ext_index != -1) + return td->vars [var].ext_index; + + if (td->renamable_vars_size == td->renamable_vars_capacity) { + td->renamable_vars_capacity *= 2; + if (td->renamable_vars_capacity == 0) + td->renamable_vars_capacity = 2; + td->renamable_vars = (InterpRenamableVar*) g_realloc (td->renamable_vars, td->renamable_vars_capacity * sizeof (InterpRenamableVar)); + } + + int ext_index = td->renamable_vars_size; + InterpRenamableVar *ext = &td->renamable_vars [ext_index]; + memset (ext, 0, sizeof (InterpRenamableVar)); + ext->var_index = var; + + td->vars [var].ext_index = ext_index; + + td->renamable_vars_size++; + + return ext_index; +} + +// This doesn't allocate a new var, rather additional information for fixed renamed vars +int +interp_create_renamed_fixed_var (TransformData *td, int var_index, int renamable_var_index) +{ + g_assert (td->vars [renamable_var_index].ext_index != -1); + g_assert (td->vars [var_index].ext_index == -1); + g_assert (td->vars [var_index].renamed_ssa_fixed); + + if (td->renamed_fixed_vars_size == td->renamed_fixed_vars_capacity) { + td->renamed_fixed_vars_capacity *= 2; + if (td->renamed_fixed_vars_capacity == 0) + td->renamed_fixed_vars_capacity = 2; + td->renamed_fixed_vars = (InterpRenamedFixedVar*) g_realloc (td->renamed_fixed_vars, td->renamed_fixed_vars_capacity * sizeof (InterpRenamedFixedVar)); + } + + int ext_index = td->renamed_fixed_vars_size; + InterpRenamedFixedVar *ext = &td->renamed_fixed_vars [ext_index]; + + ext->var_index = var_index; + ext->renamable_var_ext_index = td->vars [renamable_var_index].ext_index; + ext->live_out_bblocks = NULL; + ext->live_limit_bblocks = NULL; + + td->vars [var_index].ext_index = ext_index; + + td->renamed_fixed_vars_size++; + + return ext_index; +} /* * These are additional locals that can be allocated as we transform the code. @@ -377,7 +435,7 @@ interp_create_var_explicit (TransformData *td, MonoType *type, int size) local->size = size; local->live_start = -1; local->bb_index = -1; - local->def = NULL; + local->ext_index = -1; td->vars_size++; return td->vars_size - 1; @@ -659,8 +717,7 @@ handle_branch (TransformData *td, int long_op, int offset) if (offset < 0 && td->sp == td->stack && !td->inlined_method) { // Backwards branch inside unoptimized method where the IL stack is empty // This is candidate for a patchpoint - if (!td->optimized) - target_bb->emit_patchpoint = TRUE; + target_bb->patchpoint_bb = TRUE; if (mono_interp_tiering_enabled () && !target_bb->patchpoint_data && td->optimized) { // The optimized imethod will store mapping from bb index to native offset so it // can resume execution in the optimized method, once we tier up in patchpoint @@ -673,9 +730,12 @@ handle_branch (TransformData *td, int long_op, int offset) if (offset > 0) init_bb_stack_state (td, target_bb); - if (td->cbb->no_inlining && long_op != MINT_CALL_HANDLER) - target_bb->jump_targets--; - interp_link_bblocks (td, td->cbb, target_bb); + if (long_op != MINT_CALL_HANDLER) { + if (td->cbb->no_inlining) + target_bb->jump_targets--; + // We don't link finally blocks into the cfg (or other handler blocks for that matter) + interp_link_bblocks (td, td->cbb, target_bb); + } interp_add_ins (td, long_op); td->last_ins->info.target_bb = target_bb; @@ -1023,6 +1083,7 @@ store_local (TransformData *td, int local) { int mt = td->vars [local].mt; CHECK_STACK_RET_VOID (td, 1); + #if SIZEOF_VOID_P == 8 // nint and int32 can be used interchangeably. Add implicit conversions. if (td->sp [-1].type == STACK_TYPE_I4 && stack_type [mt] == STACK_TYPE_I8) @@ -1441,7 +1502,14 @@ interp_dump_ins (InterpInst *ins, gpointer *data_items) else g_string_append_printf (str, " [nil <-"); - if (mono_interp_op_sregs [opcode] > 0) { + if (opcode == MINT_PHI) { + int *args = ins->info.args; + while (*args != -1) { + g_string_append_printf (str, " %d", *args); + args++; + } + g_string_append_printf (str, "],"); + } else if (mono_interp_op_sregs [opcode] > 0) { for (int i = 0; i < mono_interp_op_sregs [opcode]; i++) { if (ins->sregs [i] == MINT_CALL_ARGS_SREG) { g_string_append_printf (str, " c:"); @@ -1507,7 +1575,6 @@ mono_interp_print_code (InterpMethod *imethod) void mono_interp_print_td_code (TransformData *td) { - g_print ("Unoptimized IR:\n"); for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) interp_dump_bb (bb, td->data_items); } @@ -1880,23 +1947,41 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas if (ldloca2 != NULL && ldloca2->opcode == MINT_LDLOCA_S) { InterpInst *ldloca1 = interp_prev_ins (ldloca2); if (ldloca1 != NULL && ldloca1->opcode == MINT_LDLOCA_S) { - interp_add_ins (td, MINT_INTRINS_MARVIN_BLOCK); - td->last_ins->sregs [0] = ldloca1->sregs [0]; - td->last_ins->sregs [1] = ldloca2->sregs [0]; - - // This intrinsic would normally receive two local refs, however, we try optimizing - // away both ldlocas for better codegen. This means that this intrinsic will instead - // modify the values of both sregs. In order to not overcomplicate the optimization - // passes and offset allocator with support for modifiable sregs or multi dregs, we - // just redefine both sregs after the intrinsic. - interp_add_ins (td, MINT_DEF); - td->last_ins->dreg = ldloca1->sregs [0]; - interp_add_ins (td, MINT_DEF); - td->last_ins->dreg = ldloca2->sregs [0]; + int var1 = ldloca1->sregs [0]; + int var2 = ldloca2->sregs [0]; + if (!td->optimized) { + interp_add_ins (td, MINT_INTRINS_MARVIN_BLOCK); + td->last_ins->sregs [0] = var1; + td->last_ins->sregs [1] = var2; + td->last_ins->data [0] = GINT_TO_UINT16 (var1); + td->last_ins->data [1] = GINT_TO_UINT16 (var2); + } else { + // Convert this instruction to SSA form by splitting it into 2 different + // single dreg instructions. When we generate final code, we will couple them + // together. + int result1 = interp_create_var (td, m_class_get_byval_arg (mono_defaults.uint32_class)); + int result2 = interp_create_var (td, m_class_get_byval_arg (mono_defaults.uint32_class)); + interp_add_ins (td, MINT_INTRINS_MARVIN_BLOCK_SSA1); + td->last_ins->sregs [0] = var1; + td->last_ins->sregs [1] = var2; + td->last_ins->dreg = result1; + + interp_add_ins (td, MINT_INTRINS_MARVIN_BLOCK_SSA2); + td->last_ins->sregs [0] = var1; + td->last_ins->sregs [1] = var2; + td->last_ins->dreg = result2; + + interp_add_ins (td, MINT_MOV_4); + td->last_ins->sregs [0] = result1; + td->last_ins->dreg = var1; + interp_add_ins (td, MINT_MOV_4); + td->last_ins->sregs [0] = result2; + td->last_ins->dreg = var2; + } // Remove the ldlocas - td->vars [ldloca1->sregs [0]].indirects--; - td->vars [ldloca2->sregs [0]].indirects--; + td->vars [var1].indirects--; + td->vars [var2].indirects--; interp_clear_ins (ldloca1); interp_clear_ins (ldloca2); td->sp -= 2; @@ -2734,6 +2819,11 @@ interp_method_check_inlining (TransformData *td, MonoMethod *method, MonoMethodS if (td->cbb->no_inlining) return FALSE; + // Exception handlers are always uncommon, with the exception of finally. + int inner_clause = td->clause_indexes [td->current_il_offset]; + if (inner_clause != -1 && td->header->clauses [inner_clause].flags != MONO_EXCEPTION_CLAUSE_FINALLY) + return FALSE; + if (method->flags & METHOD_ATTRIBUTE_REQSECOBJ) /* Used to mark methods containing StackCrawlMark locals */ return FALSE; @@ -2962,11 +3052,9 @@ interp_inline_newobj (TransformData *td, MonoMethod *target_method, MonoMethodSi dreg = interp_create_var (td, get_type_from_stack (stack_type [ret_mt], klass)); - // For valuetypes, we need to control the lifetime of the valuetype. - // MINT_NEWOBJ_VT_INLINED takes the address of this reg and we should keep - // the vt alive until the inlining is completed. - interp_add_ins (td, MINT_DEF); + interp_add_ins (td, MINT_INITLOCAL); interp_ins_set_dreg (td->last_ins, dreg); + td->last_ins->data [0] = GINT_TO_UINT16 (vtsize); } else { dreg = interp_create_var (td, get_type_from_stack (stack_type [ret_mt], klass)); } @@ -2985,11 +3073,10 @@ interp_inline_newobj (TransformData *td, MonoMethod *target_method, MonoMethodSi td->sp += csignature->param_count; if (is_vt) { - // Receives the valuetype allocated with MINT_DEF, and returns its address - newobj_fast = interp_add_ins (td, MINT_NEWOBJ_VT_INLINED); + newobj_fast = interp_add_ins (td, MINT_LDLOCA_S); interp_ins_set_dreg (newobj_fast, this_reg); interp_ins_set_sreg (newobj_fast, dreg); - newobj_fast->data [0] = GUINTPTR_TO_UINT16 (ALIGN_TO (vtsize, MINT_STACK_SLOT_SIZE)); + td->vars [dreg].indirects++; } else { MonoVTable *vtable = mono_class_vtable_checked (klass, error); goto_if_nok (error, fail); @@ -3007,11 +3094,6 @@ interp_inline_newobj (TransformData *td, MonoMethod *target_method, MonoMethodSi if (!interp_inline_method (td, target_method, mheader, error)) goto fail; - if (is_vt) { - interp_add_ins (td, MINT_DUMMY_USE); - interp_ins_set_sreg (td->last_ins, dreg); - } - push_var (td, dreg); return TRUE; fail: @@ -4220,6 +4302,10 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet td->vars = (InterpVar*)g_malloc0 (num_locals * sizeof (InterpVar)); td->vars_size = num_locals; td->vars_capacity = td->vars_size; + + td->renamable_vars = (InterpRenamableVar*)g_malloc (num_locals * sizeof (InterpRenamableVar)); + td->renamable_vars_size = 0; + td->renamable_vars_capacity = num_locals; offset = 0; /* @@ -4236,9 +4322,10 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet int mt = mono_mint_type (type); td->vars [i].type = type; td->vars [i].global = TRUE; + td->vars [i].il_global = TRUE; td->vars [i].indirects = 0; td->vars [i].mt = mt; - td->vars [i].def = NULL; + td->vars [i].ext_index = -1; size = mono_interp_type_size (type, mt, &align); td->vars [i].size = size; offset = ALIGN_TO (offset, align); @@ -4263,9 +4350,10 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet td->vars [index].type = header->locals [i]; td->vars [index].offset = offset; td->vars [index].global = TRUE; + td->vars [index].il_global = TRUE; td->vars [index].indirects = 0; td->vars [index].mt = mono_mint_type (header->locals [i]); - td->vars [index].def = NULL; + td->vars [index].ext_index = -1; td->vars [index].size = size; // Every local takes a MINT_STACK_SLOT_SIZE so IL locals have same behavior as execution locals offset += size; @@ -4534,21 +4622,22 @@ initialize_clause_bblocks (TransformData *td) for (guint i = 0; i < header->num_clauses; i++) { MonoExceptionClause *c = header->clauses + i; - InterpBasicBlock *bb; + InterpBasicBlock *try_bb, *bb; for (uint32_t j = c->handler_offset; j < c->handler_offset + c->handler_len; j++) { if (td->clause_indexes [j] == -1) td->clause_indexes [j] = i; } - bb = td->offset_to_bb [c->try_offset]; - g_assert (bb); - bb->eh_block = TRUE; + try_bb = td->offset_to_bb [c->try_offset]; + g_assert (try_bb); + try_bb->preserve = TRUE; /* We never inline methods with clauses, so we can hard code stack heights */ bb = td->offset_to_bb [c->handler_offset]; g_assert (bb); - bb->eh_block = TRUE; + bb->preserve = TRUE; + bb->try_bblock = try_bb; if (c->flags == MONO_EXCEPTION_CLAUSE_FINALLY) { bb->stack_height = 0; @@ -4564,7 +4653,9 @@ initialize_clause_bblocks (TransformData *td) if (c->flags == MONO_EXCEPTION_CLAUSE_FILTER) { bb = td->offset_to_bb [c->data.filter_offset]; g_assert (bb); - bb->eh_block = TRUE; + bb->preserve = TRUE; + bb->try_bblock = try_bb; + bb->stack_height = 1; bb->stack_state = (StackInfo*) mono_mempool_alloc0 (td->mempool, sizeof (StackInfo)); bb->stack_state [0].type = STACK_TYPE_O; @@ -4705,20 +4796,16 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, td->in_start = td->ip = header->code; end = td->ip + header->code_size; - td->cbb = td->entry_bb = (InterpBasicBlock*)mono_mempool_alloc0 (td->mempool, sizeof (InterpBasicBlock)); + td->cbb = td->entry_bb = interp_alloc_bb (td); if (td->gen_sdb_seq_points) td->basic_blocks = g_list_prepend_mempool (td->mempool, td->basic_blocks, td->cbb); - td->cbb->index = td->bb_count++; - td->cbb->native_offset = -1; td->cbb->stack_height = GPTRDIFF_TO_INT (td->sp - td->stack); - if (inlining) { - exit_bb = (InterpBasicBlock*)mono_mempool_alloc0 (td->mempool, sizeof (InterpBasicBlock)); - exit_bb->index = td->bb_count++; - exit_bb->native_offset = -1; - exit_bb->stack_height = -1; - } + if (inlining) + exit_bb = interp_alloc_bb (td); + else + td->entry_bb->il_offset = 0; il_targets = mono_bitset_mem_new ( mono_mempool_alloc0 (td->mempool, mono_bitset_alloc_size (header->code_size, 0)), @@ -4790,6 +4877,14 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, g_free (name); } + if (td->optimized && !td->disable_ssa) { + // Add arg defining instructions for SSA machinery + for (int i = 0; i < num_args; i++) { + interp_add_ins (td, MINT_DEF_ARG); + interp_ins_set_dreg (td->last_ins, i); + } + } + if (rtm->vararg) { // vararg calls are identical to normal calls on the call site. However, the // first instruction in a vararg method needs to copy the variable arguments @@ -4805,23 +4900,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, td->has_localloc = TRUE; } - /* - * We initialize the locals regardless of the presence of the init_locals - * flag. Locals holding references need to be zeroed so we don't risk - * crashing the GC if they end up being stored in an object. - * - * FIXME - * Track values of locals over multiple basic blocks. This would enable - * us to kill the MINT_INITLOCALS instruction if all locals are initialized - * before use. We also don't need this instruction if the init locals flag - * is not set and there are no locals holding references. - */ - if (header->num_locals) { - interp_add_ins (td, MINT_INITLOCALS); - td->last_ins->data [0] = GUINT_TO_UINT16 (td->il_locals_offset); - td->last_ins->data [1] = GUINT_TO_UINT16 (td->il_locals_size); - } - guint16 enter_profiling = 0; if (mono_jit_trace_calls != NULL && mono_trace_eval (method)) enter_profiling |= TRACING_FLAG; @@ -4865,11 +4943,32 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, } local_locals = (guint32*) g_malloc (header->num_locals * sizeof (guint32)); - /* Allocate locals to store inlined method args from stack */ for (int i = 0; i < header->num_locals; i++) local_locals [i] = interp_create_var (td, header->locals [i]); } + /* + * We initialize the locals regardless of the presence of the init_locals + * flag. Locals holding references need to be zeroed so we don't risk + * crashing the GC if they end up being stored in an object. + */ + if (header->num_locals) { + if (td->optimized) { + // Add individual initlocal for each IL local. These should + // all be optimized out by SSA cprop/deadce optimizations. + for (int i = 0; i < header->num_locals; i++) { + interp_add_ins (td, MINT_INITLOCAL); + int local_var = inlining ? local_locals [i] : (num_args + i); + td->last_ins->dreg = local_var; + td->last_ins->data [0] = GINT_TO_UINT16 (td->vars [local_var].size); + } + } else { + interp_add_ins (td, MINT_INITLOCALS); + td->last_ins->data [0] = GUINT_TO_UINT16 (td->il_locals_offset); + td->last_ins->data [1] = GUINT_TO_UINT16 (td->il_locals_size); + } + } + td->dont_inline = g_list_prepend (td->dont_inline, method); while (td->ip < end) { // Check here for every opcode to avoid code bloat @@ -6127,6 +6226,11 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, // First arg is dummy var, it is null when passed to the ctor call_args [0] = interp_create_var (td, get_type_from_stack (stack_type [ret_mt], NULL)); + if (!td->disable_ssa) { + // Make sure this arg is defined for SSA optimizations + interp_add_ins (td, MINT_DEF); + } + td->last_ins->dreg = call_args [0]; for (int i = 0; i < csignature->param_count; i++) { call_args [i + 1] = td->sp [i].var; } @@ -6267,6 +6371,9 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, push_type (td, stack_type [ret_mt], klass); push_type (td, stack_type [ret_mt], klass); } + // Make sure this arg is defined for SSA optimizations + interp_add_ins (td, MINT_DEF); + td->last_ins->dreg = td->sp [-1].var; int dreg = td->sp [-2].var; // Push back the params to top of stack. The original vars are maintained. @@ -6432,6 +6539,16 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, case CEE_THROW: if (!td->aggressive_inlining) INLINE_FAILURE; + if (!inlining) { + guint32 il_offset = GINT_TO_UINT32(td->current_il_offset); + for (unsigned int i = 0; i < td->header->num_clauses; i++) { + MonoExceptionClause *clause = &td->header->clauses [i]; + // If we throw during try and then catch we don't have the bblocks + // properly linked, just disable ssa for now + if (clause->flags == MONO_EXCEPTION_CLAUSE_NONE && (clause->try_offset <= il_offset) && (il_offset < (clause->try_offset + clause->try_len))) + td->disable_ssa = TRUE; + } + } CHECK_STACK (td, 1); interp_add_ins (td, MINT_THROW); interp_ins_set_sreg (td->last_ins, td->sp [-1].var); @@ -7437,7 +7554,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, } else { handle_branch (td, MINT_BR, target_offset); } - td->last_ins->info.target_bb->eh_block = TRUE; + td->last_ins->info.target_bb->preserve = TRUE; if (*td->ip == CEE_LEAVE) td->ip += 5; @@ -8257,13 +8374,13 @@ interp_compute_native_offset_estimates (TransformData *td) for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { InterpInst *ins; bb->native_offset_estimate = noe; - if (bb->emit_patchpoint) + if (!td->optimized && bb->patchpoint_bb) noe += 2; for (ins = bb->first_ins; ins != NULL; ins = ins->next) { int opcode = ins->opcode; // Skip dummy opcodes for more precise offset computation - if (MINT_IS_NOP (opcode)) + if (MINT_IS_EMIT_NOP (opcode)) continue; noe += interp_get_ins_length (ins); if (!td->optimized) @@ -8312,10 +8429,10 @@ get_short_brop (int opcode) } static int -get_local_offset (TransformData *td, int local) +get_var_offset (TransformData *td, int var) { - if (td->vars [local].offset != -1) - return td->vars [local].offset; + if (td->vars [var].offset != -1) + return td->vars [var].offset; // FIXME Some vars might end up with unitialized offset because they are not declared at all in the code. // This can happen if the bblock declaring the var gets removed, while other unreachable bblocks, that access @@ -8327,10 +8444,10 @@ get_local_offset (TransformData *td, int local) // If we use the optimized offset allocator, all locals should have had their offsets already allocated g_assert (!td->optimized); // The only remaining locals to allocate are the ones from the execution stack - g_assert (td->vars [local].execution_stack); + g_assert (td->vars [var].execution_stack); - td->vars [local].offset = td->total_locals_size + td->vars [local].stack_offset; - return td->vars [local].offset; + td->vars [var].offset = td->total_locals_size + td->vars [var].stack_offset; + return td->vars [var].offset; } static guint16* @@ -8351,13 +8468,13 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in g_array_append_val (td->line_numbers, lne); } - if (opcode == MINT_NOP || opcode == MINT_DEF || opcode == MINT_DUMMY_USE) + if (MINT_IS_EMIT_NOP (opcode)) return ip; *ip++ = opcode; if (opcode == MINT_SWITCH) { int labels = READ32 (&ins->data [0]); - *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [0])); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->sregs [0])); // Write number of switch labels *ip++ = ins->data [0]; *ip++ = ins->data [1]; @@ -8385,7 +8502,7 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in const int br_offset = GPTRDIFF_TO_INT (start_ip - td->new_code); gboolean has_imm = opcode >= MINT_BEQ_I4_IMM_SP && opcode <= MINT_BLT_UN_I8_IMM_SP; for (int i = 0; i < mono_interp_op_sregs [opcode]; i++) - *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [i])); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->sregs [i])); if (has_imm) *ip++ = ins->data [0]; @@ -8404,9 +8521,14 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in ip--; } else { // If the estimate offset is short, then surely the real offset is short - gboolean is_short = interp_is_short_offset (br_offset, ins->info.target_bb->native_offset_estimate); + // otherwise we conservatively have to use long branch opcodes + int cur_estimation_error = td->cbb->native_offset_estimate - td->cbb->native_offset; + int target_bb_estimated_offset = ins->info.target_bb->native_offset_estimate - cur_estimation_error; + gboolean is_short = interp_is_short_offset (br_offset, target_bb_estimated_offset); if (is_short) *start_ip = GINT_TO_OPCODE (get_short_brop (opcode)); + else + g_assert (!MINT_IS_SUPER_BRANCH (opcode)); // FIXME missing handling for long branch // We don't know the in_offset of the target, add a reloc Reloc *reloc = (Reloc*)mono_mempool_alloc0 (td->mempool, sizeof (Reloc)); @@ -8452,9 +8574,18 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in guint16 foff = ins->data [0]; guint16 mt = ins->data [1]; guint16 fsize = ins->data [2]; + ip--; + + if (opcode == MINT_MOV_DST_OFF && get_var_offset (td, ins->dreg) != get_var_offset (td, ins->sregs [1])) { + // We are no longer storing a field into the same valuetype. Copy also the whole vt. + *ip++ = MINT_MOV_VT; + *ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->dreg)); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->sregs [1])); + *ip++ = GINT_TO_UINT16 (td->vars [ins->dreg].size); + } - int dest_off = get_local_offset (td, ins->dreg); - int src_off = get_local_offset (td, ins->sregs [0]); + int dest_off = get_var_offset (td, ins->dreg); + int src_off = get_var_offset (td, ins->sregs [0]); if (opcode == MINT_MOV_SRC_OFF) src_off += foff; else @@ -8478,8 +8609,7 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in } } } - // Replace MINT_MOV_OFF with the real instruction - ip [-1] = opcode; + *ip++ = opcode; *ip++ = GINT_TO_UINT16 (dest_off); *ip++ = GINT_TO_UINT16 (src_off); if (opcode == MINT_MOV_VT) @@ -8513,17 +8643,54 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in // actually vars. Resolve their offset int num_vars = mono_interp_oplen [opcode] - 1; for (int i = 0; i < num_vars; i++) - *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->data [i])); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->data [i])); } else if (opcode == MINT_MOV_STACK_UNOPT) { g_assert (!td->optimized); // ins->data [0] represents the stack offset of the call args (within the execution stack) *ip++ = GINT_TO_UINT16 (td->param_area_offset + ins->data [0]); *ip++ = GINT_TO_UINT16 (ins->data [1]); *ip++ = GINT_TO_UINT16 (ins->data [2]); + } else if (opcode == MINT_INTRINS_MARVIN_BLOCK) { + // Generated only in unoptimized code + int var0 = ins->sregs [0]; + int var1 = ins->sregs [1]; + g_assert (var0 == ins->data [0]); + g_assert (var1 == ins->data [1]); + + *ip++ = GINT_TO_UINT16 (get_var_offset (td, var0)); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, var1)); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, var0)); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, var1)); + } else if (opcode == MINT_INTRINS_MARVIN_BLOCK_SSA1) { + int var0 = ins->sregs [0]; + int var1 = ins->sregs [1]; + g_assert (ins->next->opcode == MINT_INTRINS_MARVIN_BLOCK_SSA2); + g_assert (var0 == ins->next->sregs [0]); + g_assert (var1 == ins->next->sregs [1]); + int dvar0 = ins->dreg; + int dvar1 = ins->next->dreg; + ip [-1] = MINT_INTRINS_MARVIN_BLOCK; + *ip++ = GINT_TO_UINT16 (get_var_offset (td, var0)); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, var1)); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, dvar0)); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, dvar1)); + + ins->next->opcode = MINT_NOP; + InterpInst *next = interp_next_ins (ins); + // We ensure that next->sregs [0] is not used again, it will no longer be set by intrinsic + if (next->opcode == MINT_MOV_4 && td->var_values && td->var_values [next->sregs [0]].ref_count == 1) { + if (next->sregs [0] == dvar0) { + ip [-2] = GINT_TO_UINT16 (get_var_offset (td, next->dreg)); + next->opcode = MINT_NOP; + } else if (next->sregs [0] == dvar1) { + ip [-1] = GINT_TO_UINT16 (get_var_offset (td, next->dreg)); + next->opcode = MINT_NOP; + } + } } else { opcode_emit: if (mono_interp_op_dregs [opcode]) - *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->dreg)); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->dreg)); if (mono_interp_op_sregs [opcode]) { for (int i = 0; i < mono_interp_op_sregs [opcode]; i++) { @@ -8531,12 +8698,12 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in int offset = td->param_area_offset + ins->info.call_info->call_offset; *ip++ = GINT_TO_UINT16 (offset); } else { - *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [i])); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->sregs [i])); } } } else if (opcode == MINT_LDLOCA_S) { // This opcode receives a local but it is not viewed as a sreg since we don't load the value - *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [0])); + *ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->sregs [0])); } int left = interp_get_ins_length (ins) - GPTRDIFF_TO_INT(ip - start_ip); @@ -8592,6 +8759,7 @@ generate_compacted_code (InterpMethod *rtm, TransformData *td) for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { InterpInst *ins = bb->first_ins; bb->native_offset = GPTRDIFF_TO_INT (ip - td->new_code); + g_assert (bb->native_offset <= bb->native_offset_estimate); td->cbb = bb; #if HOST_BROWSER @@ -8603,7 +8771,7 @@ generate_compacted_code (InterpMethod *rtm, TransformData *td) if (bb->patchpoint_data) patchpoint_data_index = add_patchpoint_data (td, patchpoint_data_index, bb->native_offset, bb->index); - if (bb->emit_patchpoint) { + if (!td->optimized && bb->patchpoint_bb) { // Add patchpoint in unoptimized method *ip++ = MINT_TIER_PATCHPOINT; *ip++ = (guint16)bb->index; @@ -8694,6 +8862,40 @@ interp_fix_localloc_ret (TransformData *td) } } +static void +interp_squash_initlocals (TransformData *td) +{ + InterpInst *last_initlocal = NULL; + int last_start = 0, last_end = 0; + + for (InterpInst *ins = td->entry_bb->first_ins; ins != NULL; ins = ins->next) { + // Once we reach the real method code, we are finished with this pass + if (ins->il_offset != -1) + break; + if (ins->opcode == MINT_INITLOCAL) { + if (!last_initlocal) { + last_initlocal = ins; + last_start = get_var_offset (td, ins->dreg); + last_end = last_start + (int)ins->data [0]; + } else { + int new_start = get_var_offset (td, ins->dreg); + // We allow a maximum of 64 bytes of redundant memset when squashing initlocals + if (new_start >= last_end && new_start <= (last_end + 64)) { + last_initlocal->opcode = MINT_INITLOCALS; + last_initlocal->data [0] = GINT_TO_UINT16 (last_start); + last_end = new_start + ins->data [0]; + last_initlocal->data [1] = GINT_TO_UINT16 (last_end - last_start); + interp_clear_ins (ins); + } else { + last_initlocal = ins; + last_start = get_var_offset (td, ins->dreg); + last_end = last_start + ins->data [0]; + } + } + } + } +} + static int get_native_offset (TransformData *td, int il_offset) { @@ -8803,12 +9005,21 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG if (td->has_localloc) interp_fix_localloc_ret (td); - if (td->verbose_level) + if (td->verbose_level) { + g_print ("\nUnoptimized IR:\n"); mono_interp_print_td_code (td); + } if (td->optimized) { - interp_optimize_code (td); + MONO_TIME_TRACK (mono_interp_stats.optimize_time, interp_optimize_code (td)); interp_alloc_offsets (td); + // Offset allocator uses computed ref counts from var values. We have to free this + // table later here. + if (td->var_values != NULL) { + g_free (td->var_values); + td->var_values = NULL; + } + interp_squash_initlocals (td); #if HOST_BROWSER if (mono_interp_opt & INTERP_OPT_JITERPRETER) jiterp_insert_entry_points (rtm, td); diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index 1949698ca4a7a8..a10a517e99ebc5 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -14,6 +14,9 @@ #define INTERP_INST_FLAG_ACTIVE_CALL 64 // This instruction is protected by a clause #define INTERP_INST_FLAG_PROTECTED_NEWOBJ 128 +// This instruction bumps the liveness index. Enables liveness checks as new instructions +// are added in the code, since new instructions won't have this flag set. +#define INTERP_INST_FLAG_LIVENESS_MARKER 256 typedef struct _InterpInst InterpInst; typedef struct _InterpBasicBlock InterpBasicBlock; @@ -41,11 +44,15 @@ typedef struct #define VAR_VALUE_I8 3 #define VAR_VALUE_R4 4 #define VAR_VALUE_NON_NULL 5 +#define VAR_VALUE_COUNT 6 -// LocalValue contains data to construct an InterpInst that is equivalent with the contents -// of the stack slot / local / argument. typedef struct { - // Indicates the type of the stored information. It can be another local or a constant + guint32 bb_dfs_index; + guint32 ins_index; +} InterpLivenessPosition; + +typedef struct { + // Indicates the type of the stored information. It can be another var or a constant int type; // Holds the local index or the actual constant value union { @@ -55,9 +62,11 @@ typedef struct { float f; }; // The instruction that writes this local. - InterpInst *ins; - int def_index; - // ref count for ins->dreg + InterpInst *def; + // Liveness marker of the definition + InterpLivenessPosition liveness; + // The number of times this var is referenced. After optimizations + // this can become 0, in which case we can clear the def instruction. int ref_count; } InterpVarValue; @@ -78,6 +87,8 @@ struct _InterpInst { InterpBasicBlock *target_bb; InterpBasicBlock **target_bb_table; InterpCallInfo *call_info; + int *args; // for variable number of args, used only for phi + MonoBitSet *dead_phi_vars; // only for MINT_DEAD_PHI } info; // Variable data immediately following the dreg/sreg information. This is represented exactly // in the final code stream as in this array. @@ -98,6 +109,21 @@ struct _InterpBasicBlock { gint16 out_count; InterpBasicBlock **out_bb; + /* Index into td->bblocks */ + int dfs_index; + + /* Dominance frontier for this bblock */ + MonoBitSet *dfrontier; + + /* List of bblocks that are immediately dominated by this bblock */ + GSList *dominated; + + /* Live variable analysis, for vars in locals_ext */ + MonoBitSet *gen_set; + MonoBitSet *kill_set; + MonoBitSet *live_in_set; + MonoBitSet *live_out_set; + /* The real native offset of this bblock, computed when emitting the instructions in the code stream */ int native_offset; /* @@ -119,13 +145,15 @@ struct _InterpBasicBlock { int index; int jump_targets; + InterpBasicBlock *try_bblock; + // This will hold a list of last sequence points of incoming basic blocks SeqPoint **pred_seq_points; guint num_pred_seq_points; guint reachable : 1; // This block has special semantics and it shouldn't be optimized away - guint eh_block : 1; + guint preserve : 1; guint dead: 1; // This bblock is detectead early as being dead, we don't inline into it guint no_inlining: 1; @@ -133,7 +161,7 @@ struct _InterpBasicBlock { // InterpMethod. In the unoptimized method we will map from native offset to the bb_index while in the // optimized method we will map the bb_index to the corresponding native offset. guint patchpoint_data: 1; - guint emit_patchpoint: 1; + guint patchpoint_bb: 1; // used by jiterpreter guint backwards_branch_target: 1; guint contains_call_instruction: 1; @@ -174,6 +202,8 @@ typedef struct { int indirects; int offset; int size; + int ext_index; + GSList *declare_bbs; union { // live_start and live_end are used by the offset allocator for optimized code int live_start; @@ -187,21 +217,50 @@ typedef struct { // If var is INTERP_LOCAL_FLAG_CALL_ARGS, this is the call instruction using it. // Only used during var offset allocator InterpInst *call; - // For local vars, this represents the instruction declaring it. - // Only used during super instruction pass. - InterpInst *def; }; - guint dead : 1; guint execution_stack : 1; guint call_args : 1; guint global : 1; guint no_call_args : 1; - guint unknown_use : 1; - guint local_only : 1; guint simd : 1; // We use this flag to avoid addition of align field in InterpVar, for now + guint eh_var : 1; // This var is used inside a clause handler. It will not be in ssa form. + guint no_ssa : 1; // Var is not in ssa form, not subject to all optimizations + guint has_indirects : 1; // Var had ldloca applied to it, not subject to optimizations + guint il_global : 1; // Args and IL locals + guint renamed_ssa_fixed : 1; // If true, ext_index points to InterpRenamedVar, otherwise to InterpRenamableVar + guint def_arg : 1; // Var is a result of MINT_DEF_ARG. This var will have to be renamed back to the original arg var } InterpVar; +typedef struct { + int var_index; + GSList *ssa_stack; + // This liveness is bblock only. It is used during cprop to determine whether we + // can move the definition of a renamed fixed var earlier (if there are no conflicts with + // other renamed vars from the same var) + InterpLivenessPosition last_use_liveness; + + // Var that is global and might take part in phi opcodes + guint ssa_global : 1; + // IL locals/args. Vars included in phi opcodes. All renamed vars are allocated + // to the same offset. Optimizations need to ensure there is no overlapping liveness + guint ssa_fixed : 1; +} InterpRenamableVar; + +// In addition to InterpRenamableVar information, this stores liveness information that enables us +// to ensure that the liveness of the corresponding var is not overlapping with the other renamed vars, +// after optimization. +typedef struct { + int var_index; + int renamable_var_ext_index; + // Bit set of bblocks where the renamed var is live at the bb end + // This means that within these bblocks we can freely increase the var liveness + MonoBitSet *live_out_bblocks; + // This is a list of InterpLivenessPosition*, that indicates that in bblock with + // index bb_index, the var can have its liveness extended to at most inst_index + GSList *live_limit_bblocks; +} InterpRenamedFixedVar; + typedef struct { MonoMethod *method; @@ -235,6 +294,19 @@ typedef struct unsigned int vars_size; unsigned int vars_capacity; + // Additional information for vars that are renamable + InterpRenamableVar *renamable_vars; + unsigned int renamable_vars_size; + unsigned int renamable_vars_capacity; + + // Newly created, renamed vars of fixed vars. We compute liveness on this subset + // of vars so we ensure we don't have conflicting liveness. + unsigned int renamed_fixed_vars_size; + unsigned int renamed_fixed_vars_capacity; + InterpRenamedFixedVar *renamed_fixed_vars; + + InterpVarValue *var_values; + int n_data_items; int max_data_items; void **data_items; @@ -250,8 +322,13 @@ typedef struct GPtrArray *seq_points; InterpBasicBlock **offset_to_bb; InterpBasicBlock *entry_bb, *cbb; + InterpBasicBlock **bblocks; // ordering of bblocks in reverse postorder dfs + int bblocks_count_no_eh; + int bblocks_count_eh; + InterpBasicBlock **idoms; // immediate dominator for each bblock, index from reverse postorder dfs int bb_count; MonoMemPool *mempool; + MonoMemPool *opt_mempool; MonoMemoryManager *mem_manager; GList *basic_blocks; GPtrArray *relocs; @@ -273,6 +350,9 @@ typedef struct guint optimized : 1; guint has_invalid_code : 1; guint has_inlined_one_call : 1; + guint need_optimization_retry : 1; + guint disable_ssa : 1; + guint eh_vars_computed : 1; } TransformData; #define STACK_TYPE_I4 0 @@ -449,6 +529,12 @@ interp_alloc_global_var_offset (TransformData *td, int var); int interp_create_var (TransformData *td, MonoType *type); +int +interp_make_var_renamable (TransformData *td, int var); + +int +interp_create_renamed_fixed_var (TransformData *td, int var_index, int renamable_var_index); + void interp_foreach_ins_var (TransformData *td, InterpInst *ins, gpointer data, void (*callback)(TransformData*, int*, gpointer));