Skip to content

Commit

Permalink
Improve noinline use
Browse files Browse the repository at this point in the history
- introduce macros NOINLINE and NOINLINE_DECL
- adjust existing code in task.c
- rewrite change for jl_egal using NOINLINE
- add extensive comment explaining jl_egal change
  • Loading branch information
drepper committed Jan 12, 2015
1 parent 7251bff commit df443d3
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 43 deletions.
36 changes: 24 additions & 12 deletions src/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,27 @@ static int bits_equal(void *a, void *b, int sz)
}
}

#if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_)
static int __declspec(noinline)
#else
static int __attribute__((noinline))
#endif
compare_tuple(jl_value_t *a, jl_value_t *b)
// jl_egal
// The frequently used jl_egal function deserves special attention when it
// comes to performance which is made challenging by the fact that the
// function has to handle quite a few different cases and because it is
// called recursively. To optimize performance many special cases are
// handle with separate comparisons which can dramatically reduce the run
// time of the function. The compiler can translate these simple tests
// with little effort, e.g., few registers are used.
//
// The complex cases require more effort and more registers to be translated
// efficiently. The effected cases include comparing tuples and fields. If
// the code to perform these operation would be inlined in the jl_egal
// function then the compiler would generate at the or close to the top of
// the function a prologue which saves all the callee-save registers and at
// the end the respective epilogue. The result is that even the fast cases
// are slowed down.
//
// The solution is to keep the code in jl_egal simple and split out the
// (more) complex cases into their own functions which are marked with
// NOINLINE.
static int NOINLINE compare_tuple(jl_value_t *a, jl_value_t *b)
{
size_t l = jl_tuple_len(a);
if (l != jl_tuple_len(b))
Expand All @@ -222,12 +237,9 @@ compare_tuple(jl_value_t *a, jl_value_t *b)
return 1;
}

#if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_)
static int __declspec(noinline)
#else
static int __attribute__((noinline))
#endif
compare_fields(jl_value_t *a, jl_value_t *b, jl_datatype_t *dt, size_t nf)
// See comment above for an explanation of NOINLINE.
static int NOINLINE compare_fields(jl_value_t *a, jl_value_t *b,
jl_datatype_t *dt, size_t nf)
{
for (size_t f=0; f < nf; f++) {
size_t offs = dt->fields[f].offset;
Expand Down
12 changes: 10 additions & 2 deletions src/support/dtypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,16 @@
# define STATIC_INLINE static __inline
# define INLINE __inline
#else
# define STATIC_INLINE static inline
# define INLINE inline
# define STATIC_INLINE static inline
# define INLINE inline
#endif

#if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_)
# define NOINLINE __declspec(noinline)
# define NOINLINE_DECL(f) __declspec(noinline) f
#else
# define NOINLINE __attribute__((noinline))
# define NOINLINE_DECL(f) f __attribute__((noinline))
#endif

typedef int bool_t;
Expand Down
34 changes: 5 additions & 29 deletions src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,7 @@ static void boundlow(struct _probe_data *p)
}

// we need this function to exist so we can measure its stack frame!
#if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_)
static void __declspec(noinline) fill(struct _probe_data *p);
#else
static void fill(struct _probe_data *p) __attribute__ ((noinline));
#endif
static void NOINLINE_DECL(fill(struct _probe_data *p));

static void fill(struct _probe_data *p)
{
Expand Down Expand Up @@ -166,12 +162,7 @@ void *jl_stackbase;
static jl_jmp_buf jl_base_ctx; // base context of stack
#endif

#if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_)
static void __declspec(noinline)
#else
static void __attribute__((noinline))
#endif
save_stack(jl_task_t *t)
static void NOINLINE save_stack(jl_task_t *t)
{
if (t->state == done_sym || t->state == failed_sym)
return;
Expand All @@ -190,12 +181,7 @@ save_stack(jl_task_t *t)
memcpy(buf, (char*)&_x, nb);
}

#if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_)
void __declspec(noinline)
#else
void __attribute__((noinline))
#endif
restore_stack(jl_task_t *t, jl_jmp_buf *where, char *p)
void NOINLINE restore_stack(jl_task_t *t, jl_jmp_buf *where, char *p)
{
char *_x = (char*)jl_stackbase - t->ssize;
if (!p) {
Expand Down Expand Up @@ -235,12 +221,7 @@ static void NORETURN finish_task(jl_task_t *t, jl_value_t *resultval)
abort();
}

#if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_)
static void __declspec(noinline)
#else
static void __attribute__((noinline))
#endif
NORETURN start_task()
static void NOINLINE NORETURN start_task()
{
// this runs the first time we switch to a task
jl_task_t *t = jl_current_task;
Expand All @@ -251,12 +232,7 @@ NORETURN start_task()
}

#ifndef ASM_COPY_STACKS
#if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_)
static void __declspec(noinline)
#else
static void __attribute__((noinline))
#endif
set_base_ctx(char *__stk)
static void NOINLINE set_base_ctx(char *__stk)
{
if (jl_setjmp(jl_base_ctx, 1)) {
start_task();
Expand Down

1 comment on commit df443d3

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 i was just thinking of commenting that the NOINLINE pattern is used frequently enough it should probably have been abstracted into a macro

Please sign in to comment.