diff --git a/NEWS.md b/NEWS.md index 529a084fac6b9..b7e09e25deebe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,8 @@ Compiler/Runtime improvements Command-line option changes --------------------------- +* In Linux and Windows, `--threads=auto` now tries to infer usable number of CPUs from the + process affinity which is set typically in HPC and cloud environments ([#42340]). Multi-threading changes ----------------------- diff --git a/doc/man/julia.1 b/doc/man/julia.1 index 0b008619014e1..552c45eb131a2 100644 --- a/doc/man/julia.1 +++ b/doc/man/julia.1 @@ -103,7 +103,13 @@ Load immediately on all processors .TP -t, --threads -Enable n threads +Enable n threads; "auto" tries to infer a useful default number +of threads to use but the exact behavior might change in the future. +Currently, "auto" uses the number of CPUs assigned to this julia +process based on the OS-specific affinity assignment interface, if +supported (Linux and Windows). If this is not supported (macOS) or +process affinity is not configured, it uses the number of CPU +threads. .TP -p, --procs diff --git a/doc/src/manual/command-line-options.md b/doc/src/manual/command-line-options.md index f3ad39a6aed16..387c0d9d896bd 100644 --- a/doc/src/manual/command-line-options.md +++ b/doc/src/manual/command-line-options.md @@ -89,7 +89,7 @@ The following is a complete list of command-line switches available when launchi |`-e`, `--eval ` |Evaluate ``| |`-E`, `--print ` |Evaluate `` and display the result| |`-L`, `--load ` |Load `` immediately on all processors| -|`-t`, `--threads {N\|auto`} |Enable N threads; `auto` currently sets N to the number of local CPU threads but this might change in the future| +|`-t`, `--threads {N\|auto`} |Enable N threads; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future. Currently, `auto` uses the number of CPUs assigned to this julia process based on the OS-specific affinity assignment interface, if supported (Linux and Windows). If this is not supported (macOS) or process affinity is not configured, it uses the number of CPU threads.| |`-p`, `--procs {N\|auto`} |Integer value N launches N additional local worker processes; `auto` launches as many workers as the number of local CPU threads (logical cores)| |`--machine-file ` |Run processes on hosts listed in ``| |`-i` |Interactive mode; REPL runs and `isinteractive()` is true| @@ -111,6 +111,8 @@ The following is a complete list of command-line switches available when launchi |`--track-allocation={none\|user\|all}` |Count bytes allocated by each source line| |`--track-allocation` |equivalent to `--track-allocation=user`| + + !!! compat "Julia 1.1" In Julia 1.0, the default `--project=@.` option did not search up from the root directory of a Git repository for the `Project.toml` file. From Julia 1.1 forward, it diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md index 14d42013ae2ff..48607ed5fa7e6 100644 --- a/doc/src/manual/multi-threading.md +++ b/doc/src/manual/multi-threading.md @@ -19,8 +19,8 @@ The number of execution threads is controlled either by using the specified, then `-t`/`--threads` takes precedence. The number of threads can either be specified as an integer (`--threads=4`) or as `auto` -(`--threads=auto`), where `auto` sets the number of threads to the number of local CPU -threads. +(`--threads=auto`), where `auto` tries to infer a useful default number of threads to use +(see [Command-line Options](@id command-line-options) for more details). !!! compat "Julia 1.5" The `-t`/`--threads` command line argument requires at least Julia 1.5. diff --git a/src/jloptions.c b/src/jloptions.c index 1ff4da7c5c10b..529b0c4dcad02 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -108,8 +108,13 @@ static const char opts[] = " -L, --load Load immediately on all processors\n\n" // parallel options - " -t, --threads {N|auto} Enable N threads; \"auto\" currently sets N to the number of local\n" - " CPU threads but this might change in the future\n" + " -t, --threads {N|auto} Enable N threads; \"auto\" tries to infer a useful default number\n" + " of threads to use but the exact behavior might change in the future.\n" + " Currently, \"auto\" uses the number of CPUs assigned to this julia\n" + " process based on the OS-specific affinity assignment interface, if\n" + " supported (Linux and Windows). If this is not supported (macOS) or\n" + " process affinity is not configured, it uses the number of CPU\n" + " threads.\n" " -p, --procs {N|auto} Integer value N launches N additional local worker processes\n" " \"auto\" launches as many workers as the number of local CPU threads (logical cores)\n" " --machine-file Run processes on hosts listed in \n\n" @@ -441,7 +446,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) case 'p': // procs errno = 0; if (!strcmp(optarg,"auto")) { - jl_options.nprocs = jl_cpu_threads(); + jl_options.nprocs = jl_effective_threads(); } else { long nprocs = strtol(optarg, &endptr, 10); diff --git a/src/julia_internal.h b/src/julia_internal.h index 6ec9c52348b07..0fe527e2041a8 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -766,6 +766,7 @@ extern JL_DLLEXPORT ssize_t jl_tls_offset; extern JL_DLLEXPORT const int jl_tls_elf_support; void jl_init_threading(void); void jl_start_threads(void); +int jl_effective_threads(void); // Whether the GC is running extern char *jl_safepoint_pages; diff --git a/src/sys.c b/src/sys.c index 2538eaf62163c..39682b24ef0b0 100644 --- a/src/sys.c +++ b/src/sys.c @@ -661,6 +661,29 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT #endif } +int jl_effective_threads(void) JL_NOTSAFEPOINT +{ + int cpu = jl_cpu_threads(); + int masksize = uv_cpumask_size(); + if (masksize < 0 || jl_running_under_rr(0)) + return cpu; + uv_thread_t tid = uv_thread_self(); + char *cpumask = (char *)calloc(masksize, sizeof(char)); + int err = uv_thread_getaffinity(&tid, cpumask, masksize); + if (err) { + free(cpumask); + jl_safe_printf("WARNING: failed to get thread affinity (%s %d)\n", uv_err_name(err), + err); + return cpu; + } + int n = 0; + for (size_t i = 0; i < masksize; i++) { + n += cpumask[i]; + } + free(cpumask); + return n < cpu ? n : cpu; +} + // -- high resolution timers -- // Returns time in nanosec diff --git a/src/threading.c b/src/threading.c index 2f50783dafaf0..ab04100083d35 100644 --- a/src/threading.c +++ b/src/threading.c @@ -454,7 +454,7 @@ void jl_init_threading(void) // how many threads available, usable jl_n_threads = JULIA_NUM_THREADS; if (jl_options.nthreads < 0) { // --threads=auto - jl_n_threads = jl_cpu_threads(); + jl_n_threads = jl_effective_threads(); } else if (jl_options.nthreads > 0) { // --threads=N jl_n_threads = jl_options.nthreads; @@ -463,7 +463,7 @@ void jl_init_threading(void) if (strcmp(cp, "auto")) jl_n_threads = (uint64_t)strtol(cp, NULL, 10); // ENV[NUM_THREADS_NAME] == "N" else - jl_n_threads = jl_cpu_threads(); // ENV[NUM_THREADS_NAME] == "auto" + jl_n_threads = jl_effective_threads(); // ENV[NUM_THREADS_NAME] == "auto" } if (jl_n_threads <= 0) jl_n_threads = 1; diff --git a/test/threads.jl b/test/threads.jl index 718358f847dd5..dde50590ae08b 100644 --- a/test/threads.jl +++ b/test/threads.jl @@ -93,13 +93,42 @@ else end # Note also that libuv does not support affinity in macOS and it is known to # hang in FreeBSD. So, it's tested only in Linux and Windows: -if Sys.islinux() || Sys.iswindows() - if Sys.CPU_THREADS > 1 && !running_under_rr() +const AFFINITY_SUPPORTED = (Sys.islinux() || Sys.iswindows()) && !running_under_rr() + +if AFFINITY_SUPPORTED + if Sys.CPU_THREADS > 1 @test run_with_affinity([2]) == "2" @test run_with_affinity([1, 2]) == "1,2" end end +function get_nthreads(options = ``; cpus = nothing) + cmd = `$(Base.julia_cmd()) --startup-file=no $(options)` + cmd = `$cmd -e "print(Threads.nthreads())"` + cmd = addenv(cmd, "JULIA_EXCLUSIVE" => "0", "JULIA_NUM_THREADS" => "auto") + if cpus !== nothing + cmd = setcpuaffinity(cmd, cpus) + end + return parse(Int, read(cmd, String)) +end + +@testset "nthreads determined based on CPU affinity" begin + if AFFINITY_SUPPORTED && Sys.CPU_THREADS ≥ 2 + @test get_nthreads() ≥ 2 + @test get_nthreads(cpus = [1]) == 1 + @test get_nthreads(cpus = [2]) == 1 + @test get_nthreads(cpus = [1, 2]) == 2 + @test get_nthreads(`-t1`, cpus = [1]) == 1 + @test get_nthreads(`-t1`, cpus = [2]) == 1 + @test get_nthreads(`-t1`, cpus = [1, 2]) == 1 + + if Sys.CPU_THREADS ≥ 3 + @test get_nthreads(cpus = [1, 3]) == 2 + @test get_nthreads(cpus = [2, 3]) == 2 + end + end +end + # issue #34769 function idle_callback(handle) idle = @Base.handle_as handle UvTestIdle