Exclude function-rooted traces from global_State->lasttrace

The variable global_State.lasttrace records the trace that is "to blame" by the profiler when the bytecode interpreter is running. This had previously been defined as the trace that most recently exited to the interpreter. Now it is restricted to exclude function-rooted traces and instead to always blame the most recently exited loop-rooted tace. This is based on the assumption that calling function-rooted traces is usually a consequence of running interpreted rather than the root cause. First a looping trace fails to compile (e.g. "fallback to interpreter" side-exit) and then the interpreter may execute a series of function-rooted traces before it enters a loop again. This makes the profiler information more actionable in common cases. The profiler will count time spent in the interpreter to the trace that most likely needs to be changed if you want to eliminate that interpreter time. Here is an example program: function save(x) saved = x end for i = 1, 1e4 do for i = 1, 1e3 do save(i) end for i = 1, 1e3 do save(function() end) end end This code runs in the interpreter because it includes an NYI i.e. creation of a closure with 'function() end'. This causes the second loop to run interpreted, which causes save(x) to get hot and be compiled. Previously RaptorJIT would have blamed the VM time on the trace for 'function save(x)' since that is the most recently executed trace when the profiler wakes up. However, with this change the profiler will assume that the function-rooted trace is a consequence of running interpreted rather than the root cause and so it will instead "blame" the exit from the first loop -- which is good because this is the point in the source code where we will find the root cause for ending up running interpreted (we will see the relevant trace aborts there.)
eugeneia · Aug 1, 2018 · 2c5faa8 · 2c5faa8
1 parent 9abd48a
commit 2c5faa8
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 8 deletions.
diff --git a/src/lj_asm.c b/src/lj_asm.c
@@ -1953,6 +1953,23 @@ static void asm_setup_regsp(ASMState *as)
 
 /* -- Assembler core ------------------------------------------------------ */
 
+/* Do we want the profiler to attribute VM time to this trace?
+ *
+ * Not if the root of this trace is a Lua function. We assume that the
+ * root cause of running the interpreter is a loop that failed to
+ * compile somewhere and that entry/exit through function traces is
+ * only noise that should be filtered out.
+ *
+ * This helps the profiler to point out the code that needs to be
+ * changed to reduce time spent in the interpreter.
+ */
+static int asm_should_profile_exit(jit_State *J, GCtrace *T)
+{
+ GCtrace *root = traceref(J, T->root ? T->root : T->traceno);
+ BCOp op = bc_op(root->startins);
+ return op != BC_FUNCF && op != BC_FUNCV;
+}
+
 /* Assemble a trace. */
 void lj_asm_trace(jit_State *J, GCtrace *T)
 {
@@ -2103,7 +2120,8 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
  T->mcode = as->mcp;
  T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
  if (!as->loopref)
- asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
+ /* Note: this may change as->mctop! */
+ asm_tail_fixup(as, T->link, asm_should_profile_exit(J, T));
  T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
  lj_mcode_sync(T->mcode, origtop);
 }

diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
@@ -2273,7 +2273,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
 /* -- Tail of trace ------------------------------------------------------- */
 
 /* Fixup the tail code. */
-static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+static void asm_tail_fixup(ASMState *as, TraceNo lnk, int track)
 {
  /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */
  MCode *p = as->mctop;
@@ -2304,7 +2304,9 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
  }
  }
  /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
+ target = (lnk ? traceref(as->J, lnk)->mcode :
+ (track ? (MCode *)lj_vm_exit_interp :
+ (MCode *)lj_vm_exit_interp_notrack));
  *(int32_t *)(p-4) = jmprel(p, target);
  p[-5] = XI_JMP;
  /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */

diff --git a/src/lj_vm.h b/src/lj_vm.h
@@ -38,6 +38,7 @@ LJ_ASMF void lj_vm_callhook(void);
 /* Trace exit handling. */
 LJ_ASMF void lj_vm_exit_handler(void);
 LJ_ASMF void lj_vm_exit_interp(void);
+LJ_ASMF void lj_vm_exit_interp_notrack(void);
 
 /* Internal math helper functions. */
 LJ_ASMF double lj_vm_floor(double);

diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
@@ -2046,14 +2046,13 @@ static void build_subroutines(BuildCtx *ctx)
  | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
  | jmp >1
  |->vm_exit_interp:
+ | // Record which trace exited to the interpreter.
+ | mov TMPRd, dword [DISPATCH+DISPATCH_GL(vmstate)]
+ | mov dword [DISPATCH+DISPATCH_GL(lasttrace)], TMPRd
+ |->vm_exit_interp_notrack:
  | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
  | // Restore additional callee-save registers only used in compiled code.
  | lea RA, [rsp+16]
- | // Record which trace exited to the interpreter (if called from a trace)
- | mov TMPRd, dword [DISPATCH+DISPATCH_GL(vmstate)]
- | cmp TMPRd, 1
- | jb >1
- | mov dword [DISPATCH+DISPATCH_GL(lasttrace)], TMPRd
  |1:
  | mov r13, [RA-8]
  | mov r12, [RA]