Skip to content

Commit

Permalink
Add profiling of already running tasks via SIGINFO/SIGUSR1 (JuliaLang…
Browse files Browse the repository at this point in the history
  • Loading branch information
IanButterworth authored Feb 15, 2022
1 parent 07f0fdb commit a9aad97
Show file tree
Hide file tree
Showing 10 changed files with 277 additions and 41 deletions.
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,14 +146,17 @@ Standard library changes
* Now uses `textwidth` for formatting `%s` and `%c` widths ([#41085]).

#### Profile
* Profiling now records sample metadata including thread and task. `Profile.print()` has a new `groupby` kwarg that allows
* CPU profiling now records sample metadata including thread and task. `Profile.print()` has a new `groupby` kwarg that allows
grouping by thread, task, or nested thread/task, task/thread, and `threads` and `tasks` kwargs to allow filtering.
Further, percent utilization is now reported as a total or per-thread, based on whether the thread is idle or not at
each sample. `Profile.fetch()` by default strips out the new metadata to ensure backwards compatibility with external
profiling data consumers, but can be included with the `include_meta` kwarg. ([#41742])
* The new `Profile.Allocs` module allows memory allocations to be profiled. The stack trace, type, and size of each
allocation is recorded, and a `sample_rate` argument allows a tunable amount of allocations to be skipped,
reducing performance overhead. ([#42768])
* A fixed duration cpu profile can now be triggered by the user during running tasks without `Profile` being loaded
first and the report will show during execution. On MacOS & FreeBSD press `ctrl-t` or raise a `SIGINFO`.
For other platforms raise a `SIGUSR1` i.e. `% kill -USR1 $julia_pid`. Not currently available on windows. ([#43179])

#### Random

Expand Down
1 change: 1 addition & 0 deletions contrib/generate_precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ Profile = get(Base.loaded_modules,
Base.PkgId(Base.UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile"),
nothing)
if Profile !== nothing
repl_script *= Profile.precompile_script
hardcoded_precompile_statements *= """
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
Expand Down
4 changes: 4 additions & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ int jl_running_under_rr(int recheck) JL_NOTSAFEPOINT;
// Returns time in nanosec
JL_DLLEXPORT uint64_t jl_hrtime(void) JL_NOTSAFEPOINT;

JL_DLLEXPORT void jl_set_peek_cond(uintptr_t);
JL_DLLEXPORT double jl_get_profile_peek_duration(void);
JL_DLLEXPORT void jl_set_profile_peek_duration(double);

// number of cycles since power-on
static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
{
Expand Down
29 changes: 29 additions & 0 deletions src/signal-handling.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,35 @@ static size_t jl_safe_read_mem(const volatile char *ptr, char *out, size_t len)
return i;
}

static double profile_autostop_time = -1.0;
static double profile_peek_duration = 1.0; // seconds

double jl_get_profile_peek_duration(void) {
return profile_peek_duration;
}
void jl_set_profile_peek_duration(double t) {
profile_peek_duration = t;
return;
}

uintptr_t profile_show_peek_cond_loc;
JL_DLLEXPORT void jl_set_peek_cond(uintptr_t cond)
{
profile_show_peek_cond_loc = cond;
return;
}

static void jl_check_profile_autostop(void) {
if ((profile_autostop_time != -1.0) && (jl_hrtime() > profile_autostop_time)) {
profile_autostop_time = -1.0;
jl_profile_stop_timer();
jl_safe_printf("\n==============================================================\n");
jl_safe_printf("Profile collected. A report will print at the next yield point\n");
jl_safe_printf("==============================================================\n\n");
uv_async_send((uv_async_t*)profile_show_peek_cond_loc);
}
}

#if defined(_WIN32)
#include "signals-win.c"
#else
Expand Down
1 change: 1 addition & 0 deletions src/signals-mach.c
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,7 @@ void *mach_profile_listener(void *arg)
}
jl_unlock_profile_mach(0, keymgr_locked);
if (running) {
jl_check_profile_autostop();
// Reset the alarm
kern_return_t ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port);
HANDLE_MACH_ERROR("clock_alarm", ret)
Expand Down
35 changes: 29 additions & 6 deletions src/signals-unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ bt_context_t *jl_to_bt_context(void *sigctx)
#endif
}


static int thread0_exit_count = 0;
static void jl_exit_thread0(int exitstate, jl_bt_element_t *bt_data, size_t bt_size);

Expand Down Expand Up @@ -519,17 +518,22 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
sigprof.sigev_notify = SIGEV_SIGNAL;
sigprof.sigev_signo = SIGUSR1;
sigprof.sigev_value.sival_ptr = &timerprof;
if (timer_create(CLOCK_REALTIME, &sigprof, &timerprof) == -1)
// Because SIGUSR1 is multipurpose, set `running` before so that we know that the first SIGUSR1 came from the timer
running = 1;
if (timer_create(CLOCK_REALTIME, &sigprof, &timerprof) == -1) {
running = 0;
return -2;
}

// Start the timer
itsprof.it_interval.tv_sec = 0;
itsprof.it_interval.tv_nsec = 0;
itsprof.it_value.tv_sec = nsecprof / GIGA;
itsprof.it_value.tv_nsec = nsecprof % GIGA;
if (timer_settime(timerprof, 0, &itsprof, NULL) == -1)
if (timer_settime(timerprof, 0, &itsprof, NULL) == -1) {
running = 0;
return -3;
running = 1;
}
return 0;
}

Expand Down Expand Up @@ -648,6 +652,18 @@ static void kqueue_signal(int *sigqueue, struct kevent *ev, int sig)
}
#endif

void trigger_profile_peek(void)
{
jl_safe_printf("\n======================================================================================\n");
jl_safe_printf("Information request received. A stacktrace will print followed by a %.1f second profile\n", profile_peek_duration);
jl_safe_printf("======================================================================================\n");
bt_size_cur = 0; // clear profile buffer
if (jl_profile_start_timer() < 0)
jl_safe_printf("ERROR: Could not start profile timer\n");
else
profile_autostop_time = jl_hrtime() + (profile_peek_duration * 1e9);
}

static void *signal_listener(void *arg)
{
static jl_bt_element_t bt_data[JL_MAX_BT_SIZE + 1];
Expand Down Expand Up @@ -753,11 +769,17 @@ static void *signal_listener(void *arg)

int doexit = critical;
#ifdef SIGINFO
if (sig == SIGINFO)
if (sig == SIGINFO) {
if (running != 1)
trigger_profile_peek();
doexit = 0;
}
#else
if (sig == SIGUSR1)
if (sig == SIGUSR1) {
if (running != 1)
trigger_profile_peek();
doexit = 0;
}
#endif

bt_size = 0;
Expand Down Expand Up @@ -835,6 +857,7 @@ static void *signal_listener(void *arg)
}
#ifndef HAVE_MACH
if (profile && running) {
jl_check_profile_autostop();
#if defined(HAVE_TIMER)
timer_settime(timerprof, 0, &itsprof, NULL);
#elif defined(HAVE_ITIMER)
Expand Down
1 change: 1 addition & 0 deletions src/signals-win.c
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
jl_gc_debug_critical_error();
abort();
}
jl_check_profile_autostop();
}
}
}
Expand Down
75 changes: 75 additions & 0 deletions stdlib/Profile/docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,79 @@

## CPU Profiling

There are two main approaches to CPU profiling julia code:

## Via `@profile`

Where profiling is enabled for a given call via the `@profile` macro.

```julia-repl
julia> using Profile
julia> @profile foo()
julia> Profile.print()
Overhead ╎ [+additional indent] Count File:Line; Function
=========================================================
╎147 @Base/client.jl:506; _start()
╎ 147 @Base/client.jl:318; exec_options(opts::Base.JLOptions)
...
```

## Triggered During Execution

Tasks that are already running can also be profiled for a fixed time period at any user-triggered time.

To trigger the profiling:
- MacOS & FreeBSD (BSD-based platforms): Use `ctrl-t` or pass a `SIGINFO` signal to the julia process i.e. `% kill -INFO $julia_pid`
- Linux: Pass a `SIGUSR1` signal to the julia process i.e. `% kill -USR1 $julia_pid`
- Windows: Not currently supported.

First, a single stack trace at the instant that the signal was thrown is shown, then a 1 second profile is collected,
followed by the profile report at the next yield point, which may be at task completion for code without yield points
e.g. tight loops.

```julia-repl
julia> foo()
##== the user sends a trigger while foo is running ==##
load: 2.53 cmd: julia 88903 running 6.16u 0.97s
======================================================================================
Information request received. A stacktrace will print followed by a 1.0 second profile
======================================================================================
signal (29): Information request: 29
__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
_pthread_cond_wait at /usr/lib/system/libsystem_pthread.dylib (unknown line)
...
======================================================================
Profile collected. A report will print if the Profile module is loaded
======================================================================
Overhead ╎ [+additional indent] Count File:Line; Function
=========================================================
Thread 1 Task 0x000000011687c010 Total snapshots: 572. Utilization: 100%
╎147 @Base/client.jl:506; _start()
╎ 147 @Base/client.jl:318; exec_options(opts::Base.JLOptions)
...
Thread 2 Task 0x0000000116960010 Total snapshots: 572. Utilization: 0%
╎572 @Base/task.jl:587; task_done_hook(t::Task)
╎ 572 @Base/task.jl:879; wait()
...
```

### Customization

The duration of the profiling can be adjusted via [`Profile.set_peek_duration`](@ref)

The profile report is broken down by thread and task. Pass a no-arg function to `Profile.peek_report[]` to override this.
i.e. `Profile.peek_report[] = () -> Profile.print()` to remove any grouping. This could also be overridden by an external
profile data consumer.

## Reference

```@docs
Profile.@profile
```
Expand All @@ -16,6 +89,8 @@ Profile.fetch
Profile.retrieve
Profile.callers
Profile.clear_malloc_data
Profile.get_peek_duration
Profile.set_peek_duration
```

## Memory profiling
Expand Down
Loading

0 comments on commit a9aad97

Please sign in to comment.