From 4c73cd67ca9a251175f32acdd690bc2d92a9ef21 Mon Sep 17 00:00:00 2001 From: David 'Digit' Turner Date: Fri, 13 Sep 2024 16:46:46 +0200 Subject: [PATCH] Implement jobserver pool in Ninja. This allows Ninja to implement a jobserver-style pool of job slots, to better coordinate parallel jobs between spawned processes which compete for CPU cores/threads. With this feature, there is no need for being invoked from GNU Make or a script like misc/jobserver_pool.py. NOTE: This implementation is basic and doesn't support broken protocol clients that release more tokens than they acquired. If your build includes these, expect severe build performance degradation. To enable this use --jobserver or --jobserver=MODE on the command-line, where MODE is one of the following values: 0 Do not enable the feature (the default) 1 Enable the feature, using best mode for the current system. pipe Implement the pool with an anonymous pipe (Posix only). fifo Implement the pool with a FIFO file (Posix only). sem Implement the pool with a Win32 semaphore (Windows only). NOTE: The `fifo` mode is only implemented since GNU Make 4.4 and many older clients may not support it. Alternatively, set the NINJA_JOBSERVER environment variable to one of these values to activate it without a command-line option. Note that if MAKEFLAGS is set in the environment, Ninja assumes that it is already running in the context of another jobserver and will not try to create its own pool. --- .github/workflows/linux.yml | 3 + doc/manual.asciidoc | 47 ++++++++++----- misc/jobserver_test.py | 62 ++++++++++++++++++++ src/build.h | 1 + src/ninja.cc | 113 ++++++++++++++++++++++++++++++++---- 5 files changed, 201 insertions(+), 25 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 9cbc8a79cd..e4f8c461d7 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -27,6 +27,7 @@ jobs: run: | ./ninja_test ../../misc/output_test.py + ../../misc/jobserver_test.py - name: Build release ninja run: ninja -f build-Release.ninja working-directory: build @@ -35,6 +36,7 @@ jobs: run: | ./ninja_test ../../misc/output_test.py + ../../misc/jobserver_test.py build: runs-on: [ubuntu-latest] @@ -170,6 +172,7 @@ jobs: ./ninja all python3 misc/ninja_syntax_test.py ./misc/output_test.py + ./misc/jobserver_test.py build-aarch64: name: Build Linux ARM64 diff --git a/doc/manual.asciidoc b/doc/manual.asciidoc index 6ab50c8c71..8319424206 100644 --- a/doc/manual.asciidoc +++ b/doc/manual.asciidoc @@ -190,31 +190,49 @@ you don't need to pass `-j`.) GNU Jobserver support ~~~~~~~~~~~~~~~~~~~~~ -Since version 1.13., Ninja builds can follow the +Since version 1.13., Ninja builds support the https://https://www.gnu.org/software/make/manual/html_node/Job-Slots.html[GNU Make jobserver] -client protocol (on Posix systems). This is useful when Ninja -is invoked as part of a larger build system controlled by a top-level -GNU Make instance, as it allows better coordination between -concurrent build tasks. +protocol (on Posix systems). If supports both client and +server modes. -This feature is automatically enabled under the following -conditions: +Client mode is useful when Ninja is invoked as part of a larger +build system controlled by a top-level GNU Make instance, as it +allows better coordination between concurrent build tasks. + +Server mode is useful when Ninja is the top-level build tool that +invokes sub-builds recursively in a similar setup. + +To enable server mode, use `--jobserver` or `--jobserver=MODE` +on the command line, or set `NINJA_JOBSERVER=MODE` in your +environment, where `MODE` can be one of the following values: + +`0`: Do not enable the feature (the default) +`1`: Enable the feature, using the best mode for the current system. +`pipe`: Enable the feature, implemented with an anonymous pipe (Posix only). +`fifo`: Enable the feature, implemented with a FIFO file path (Posix only). +`sem`: Enable the feature, implemented with a Win32 semaphore (Windows only). + +Note that `--jobserver` is equivalent to `--jobserver=1`. + +Otherwise, the client feature is automatically enabled for builds +(not tools) under the following conditions: - Dry-run (i.e. `-n` or `--dry-run`) is not enabled. -- Neither `-j1` (no parallelism) or `-j0` (infinite parallelism) - are specified on the Ninja command line. +- `-j1` (no parallelism) is not used on the command line. + Note that `-j0` means "infinite" parallelism and does not + disable client mode. - The `MAKEFLAGS` environment variable is defined and describes a valid jobserver mode using `--jobserver-auth` or even `--jobserver-fds`. -In this case, Ninja will use the jobserver pool of job slots +In this case, Ninja will use the shared pool of job slots to control parallelism, instead of its default implementation of `-j`. -Note that load-average limitations (i.e. when using `-l`) -are still being enforced in this mode. +Note that other parallelism limitations, (such as `-l`) are *still* +being enforced in this mode however. Environment variables ~~~~~~~~~~~~~~~~~~~~~ @@ -244,9 +262,8 @@ The default progress status is `"[%f/%t] "` (note the trailing space to separate from the build rule). Another example of possible progress status could be `"[%u/%r/%f] "`. -If `MAKEFLAGS` is defined in the environment, if may alter how -Ninja dispatches parallel build commands. See the GNU Jobserver support -section for details. +`NINJA_JOBSERVER` and `MAKEFLAGS` may impact how Ninja dispatches +parallel jobs, as described in the "GNU Jobserver support" section. Extra tools ~~~~~~~~~~~ diff --git a/misc/jobserver_test.py b/misc/jobserver_test.py index ccde8e1a0e..172349f9fc 100755 --- a/misc/jobserver_test.py +++ b/misc/jobserver_test.py @@ -241,6 +241,68 @@ def test_client_passes_MAKEFLAGS(self): prefix_args=[sys.executable, "-S", _JOBSERVER_POOL_SCRIPT, "--check"] ) + def _run_pool_test(self, mode: str) -> None: + task_count = 10 + build_plan = generate_build_plan(task_count) + extra_env = {"NINJA_JOBSERVER": mode} + with BuildDir(build_plan) as b: + # First, run the full 10 tasks with with 10 tokens, this should allow all + # tasks to run in parallel. + b.ninja_run([f"-j{task_count}", "all"], extra_env=extra_env) + max_overlaps = compute_max_overlapped_spans(b.path, task_count) + self.assertEqual(max_overlaps, 10) + + # Second, use 4 tokens only, and verify that this was enforced by Ninja. + b.ninja_clean() + b.ninja_run(["-j4", "all"], extra_env=extra_env) + max_overlaps = compute_max_overlapped_spans(b.path, task_count) + self.assertEqual(max_overlaps, 4) + + # Finally, verify that --token-count=1 serializes all tasks. + b.ninja_clean() + b.ninja_run(["-j1", "all"], extra_env=extra_env) + max_overlaps = compute_max_overlapped_spans(b.path, task_count) + self.assertEqual(max_overlaps, 1) + + def test_jobserver_pool_with_default_mode(self): + self._run_pool_test("1") + + def test_server_passes_MAKEFLAGS(self): + self._test_MAKEFLAGS_value(ninja_args=["--jobserver"]) + + def _verify_NINJA_JOBSERVER_value( + self, expected_value, ninja_args=[], env_vars={}, msg=None + ): + build_plan = r""" +rule print + command = echo NINJA_JOBSERVER="[$$NINJA_JOBSERVER]" + +build all: print +""" + env = dict(os.environ) + env.update(env_vars) + + with BuildDir(build_plan) as b: + extra_env = {"NINJA_JOBSERVER": "1"} + ret = b.ninja_spawn(["--quiet"] + ninja_args + ["all"], extra_env=extra_env) + self.assertEqual(ret.returncode, 0) + self.assertEqual( + ret.stdout.strip(), f"NINJA_JOBSERVER=[{expected_value}]", msg=msg + ) + + def test_server_unsets_NINJA_JOBSERVER(self): + env_jobserver_1 = {"NINJA_JOBSERVER": "1"} + self._verify_NINJA_JOBSERVER_value("", env_vars=env_jobserver_1) + self._verify_NINJA_JOBSERVER_value("", ninja_args=["--jobserver"]) + + @unittest.skipIf(_PLATFORM_IS_WINDOWS, "These test methods do not work on Windows") + def test_jobserver_pool_with_posix_pipe(self): + self._run_pool_test("pipe") + + @unittest.skipIf(_PLATFORM_IS_WINDOWS, "These test methods do not work on Windows") + def test_jobserver_pool_with_posix_fifo(self): + self._run_pool_test("fifo") + if __name__ == "__main__": unittest.main() diff --git a/src/build.h b/src/build.h index 9264b71ae0..fb511e21e4 100644 --- a/src/build.h +++ b/src/build.h @@ -184,6 +184,7 @@ struct BuildConfig { /// means that we do not have any limit. double max_load_average; DepfileParserOptions depfile_parser_options; + Jobserver::Config::Mode jobserver_mode = Jobserver::Config::kModeNone; }; /// Builder wraps the build process: starting commands, updating status. diff --git a/src/ninja.cc b/src/ninja.cc index e65eec8a80..944994a630 100644 --- a/src/ninja.cc +++ b/src/ninja.cc @@ -1372,8 +1372,8 @@ int NinjaMain::RunBuild(int argc, char** argv, Status* status) { Builder builder(&state_, config_, &build_log_, &deps_log_, &disk_interface_, status, start_time_millis_); - // Detect jobserver context and inject Jobserver::Client into the builder - // if needed. + // If MAKEFLAGS is set, only setup a Jobserver client if needed. + // (this means that an empty MAKEFLAGS value disables the feature). std::unique_ptr jobserver_client; // Determine whether to use a Jobserver client in this build. @@ -1502,15 +1502,16 @@ int ReadFlags(int* argc, char*** argv, Options* options, BuildConfig* config) { DeferGuessParallelism deferGuessParallelism(config); - enum { OPT_VERSION = 1, OPT_QUIET = 2 }; - const option kLongOptions[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, OPT_VERSION }, - { "verbose", no_argument, NULL, 'v' }, - { "quiet", no_argument, NULL, OPT_QUIET }, - { NULL, 0, NULL, 0 } - }; + enum { OPT_VERSION = 1, OPT_QUIET = 2, OPT_JOBSERVER = 3 }; + const option kLongOptions[] = { { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, OPT_VERSION }, + { "verbose", no_argument, NULL, 'v' }, + { "quiet", no_argument, NULL, OPT_QUIET }, + { "jobserver", optional_argument, NULL, + OPT_JOBSERVER }, + { NULL, 0, NULL, 0 } }; + const char* jobserver_mode = nullptr; int opt; while (!options->tool && (opt = getopt_long(*argc, *argv, "d:f:j:k:l:nt:vw:C:h", kLongOptions, @@ -1579,6 +1580,9 @@ int ReadFlags(int* argc, char*** argv, case OPT_VERSION: printf("%s\n", kNinjaVersion); return 0; + case OPT_JOBSERVER: + jobserver_mode = optarg ? optarg : "1"; + break; case 'h': default: deferGuessParallelism.Refresh(); @@ -1589,6 +1593,29 @@ int ReadFlags(int* argc, char*** argv, *argv += optind; *argc -= optind; + // If an explicit --jobserver has not been used, lookup the NINJA_JOBSERVER + // environment variable. Ignore it if parallelism was set explicitly on the + // command line though (and warn about it). + if (jobserver_mode == nullptr) { + jobserver_mode = getenv("NINJA_JOBSERVER"); + if (jobserver_mode && !deferGuessParallelism.needGuess) { + if (!config->dry_run && config->verbosity > BuildConfig::QUIET) + Warning( + "Explicit parallelism (-j), ignoring NINJA_JOBSERVER environment " + "variable."); + jobserver_mode = nullptr; + } + } + if (jobserver_mode) { + auto ret = Jobserver::Config::ModeFromString(jobserver_mode); + config->jobserver_mode = ret.second; + if (!ret.first && !config->dry_run && + config->verbosity > BuildConfig::QUIET) { + Warning("Invalid jobserver mode '%s': Must be one of: %s", jobserver_mode, + Jobserver::Config::GetValidModesListAsString(", ").c_str()); + } + } + return -1; } @@ -1628,6 +1655,72 @@ NORETURN void real_main(int argc, char** argv) { exit((ninja.*options.tool->func)(&options, argc, argv)); } + // Determine whether to setup a Jobserver pool. This depends on + // --jobserver or --jobserver=MODE being passed on the command-line, + // or NINJA_JOBSERVER=MODE being set in the environment. + // + // This must be ignored if a tool is being used, or no/infinite + // parallelism is being asked. + // + // At the moment, this overrides any MAKEFLAGS definition in + // the environment. + std::unique_ptr jobserver_pool; + + do { + if (options.tool) // Do not setup pool when a tool is used. + break; + + if (config.parallelism == 1 || config.parallelism == INT_MAX) { + // No-parallelism (-j1) or infinite parallelism (-j0) was specified. + break; + } + + if (config.jobserver_mode == Jobserver::Config::kModeNone) { + // --jobserver was not used, and NINJA_JOBSERVER is not set. + break; + } + + if (config.verbosity >= BuildConfig::VERBOSE) + status->Info("Creating jobserver pool for %d parallel jobs", + config.parallelism); + + std::string err; + jobserver_pool = Jobserver::Pool::Create( + static_cast(config.parallelism), config.jobserver_mode, &err); + if (!jobserver_pool.get()) { + if (config.verbosity > BuildConfig::QUIET) + status->Warning("Jobserver pool creation failed: %s", err.c_str()); + break; + } + + std::string makeflags = jobserver_pool->GetEnvMakeFlagsValue(); + + // Set or override the MAKEFLAGS environment variable in + // the current process. This ensures it is passed to sub-commands + // as well. +#ifdef _WIN32 + // TODO(digit): Verify that this works correctly on Win32. + // this code assumes that _putenv(), unlike Posix putenv() + // does create a copy of the input string, and that the + // resulting environment is passed to processes launched + // with CreateProcess (the documentation only mentions + // _spawn() and _exec()). + std::string env = "MAKEFLAGS=" + makeflags; + _putenv(env.c_str()); +#else // !_WIN32 + setenv("MAKEFLAGS", makeflags.c_str(), 1); +#endif // !_WIN32 + + } while (0); + + // Unset NINJA_JOBSERVER unconditionally in subprocesses + // to avoid multiple sub-pools to be started by mistake. +#ifdef _WIN32 + _putenv("NINJA_JOBSERVER="); +#else // !_WIN32 + unsetenv("NINJA_JOBSERVER"); +#endif // !_WIN32 + // Limit number of rebuilds, to prevent infinite loops. const int kCycleLimit = 100; for (int cycle = 1; cycle <= kCycleLimit; ++cycle) {