diff --git a/docs/recipes.md b/docs/recipes.md index 8cf715fc..26db4a5f 100644 --- a/docs/recipes.md +++ b/docs/recipes.md @@ -144,10 +144,12 @@ For example, in the recipe below, only `netcdf-fortran` will be built with the ` ### MPI -Stackinator can configure cray-mpich (CUDA, ROCM, or non-GPU aware) on a per-environment basis, by setting the `mpi` field in an environment. +Stackinator can configure cray-mpich (CUDA, ROCM, or non-GPU aware) or OpenMPI (with or without CUDA) on a per-environment basis, by setting the `mpi` field in an environment. !!! note - Future versions of Stackinator will support OpenMPI, MPICH and MVAPICH when (and if) they develop robust support for HPE SlingShot 11 interconnect. + Future versions of Stackinator will fully support OpenMPI, MPICH and MVAPICH when (and if) they develop robust support for HPE SlingShot 11 interconnect. + + Current OpenMPI support has been tested lightly and is not guaranteed to be production ready - only OpenMPI@5.x.x is supported (default is @5.0.6 at the time of writing - 2025.03.04) - CUDA is supported, ROCM has not yet been tested. If the `mpi` field is not set, or is set to `null`, MPI will not be configured in an environment: ```yaml title="environments.yaml: no MPI" @@ -157,12 +159,18 @@ serial-env: ``` To configure MPI without GPU support, set the `spec` field with an optional version: -```yaml title="environments.yaml: MPI without GPU support" +```yaml title="environments.yaml: Cray MPICH without GPU support" host-env: mpi: spec: cray-mpich@8.1.23 # ... ``` +```yaml title="environments.yaml: OpenMPI without GPU support" +host-env: + mpi: + spec: openmpi + # ... +``` GPU-aware MPI can be configured by setting the optional `gpu` field to specify whether to support `cuda` or `rocm` GPUs: ```yaml title="environments.yaml: GPU aware MPI" @@ -176,7 +184,66 @@ rocm-env: spec: cray-mpich gpu: rocm # ... +ompi-cuda-env: + mpi: + spec: openmpi + gpu: cuda + # ... ``` +#### Experimental libfabric 2.x support with cray-mpich + +HPE has open-sourced the libfabric/cxi provider (and related drivers) and these can be built into cray-mpich by adding a dependency to a newer version of libfabric. +The system default is libfabric@1.15.2 - which can be changed by adding a `depends` field to the yaml. A non default version (newer than 1.15.2) will trigger a build of libfabric using libcxi, cxi-driver, cassini-headers). + +This syntax appplies to both mpich and openmpi builds. +```yaml title="environments1.yaml: cray-mpich using new libfabric/cxi stack" +mpich-cxi-env: + mpi: + spec: cray-mpich + gpu: cuda + depends: [libfabric@main] + # on release of libfabric@2, we recommended using @2 in preference to @main ... +``` +```yaml title="environments2.yaml: openmpi using new libfabric/cxi stack" +openmpi-cxi-env: + mpi: + spec: openmpi + gpu: cuda + depends: [libfabric@main] + # on release of libfabric@2, we recommended using @2 in preference to @main ... +``` +!!! note + Currently the performance of OpenMPI on Alps clusters might not be optimal and work is ongoing to fine tune it especially for intra-node performance. + +#### Custom MPI builds + +If an experimental version of OpenMPI is required, the yaml syntax supports additional options to enable this. the `xspec` tag may be used to supply extra spack `spec` options. +To illustrate usage, consider this example: a build of openmpi using a particular git branch named `mpi-continue-5.0.6` which in this case has a variant `+continuations` that is not available on the `main` or released branches. +The `xspec` tag allows the user to supply arbitrary spack variants and options that replace the defaults used by stackinator in its absence. +```yaml title="custom-openmpi.yaml: custom build of openmpi" +openmpi-custom-env: + mpi: + spec: openmpi@git.mpi-continue-5.0.6=main + xspec: +continuations +internal-pmix fabrics=cma,ofi,xpmem schedulers=slurm +cray-xpmem + gpu: cuda + depends: ["libfabric@main"] +``` +In this example, we must tell spack to fetch our custom git branch from a repo that is different from the default openmpi github version, by adding the following to our recipe `packages.yaml` +```yaml title="custom-packages.yaml: custom repo for openmpi" + # mpi-continue-5.0.6 branch available from here + openmpi: + package_attributes: + git: https://github.com/your-username/ompi +``` + +!!! note + To build using a specific git commit, use the syntax + ``` + spec: openmpi@git.9a4079916dd13d4190fe224102b57757789c13da=main + ``` +It is therefore possible to build arbitrary versions of MPI using custom options/branches etc using these combinations of settings. + +### Version info !!! alps @@ -207,7 +274,7 @@ The list of software packages to install is configured in the `spec:` field of a The `deprecated: ` field controls if Spack should consider versions marked as deprecated, and can be set to `true` or `false` (for considering or not considering deprecated versions, respectively). The `unify:` field controls the Spack concretiser, and can be set to three values `true`, `false` or `when_possible`. -The +The ```yaml cuda-env: diff --git a/stackinator/builder.py b/stackinator/builder.py index 61d0e3ad..61929da0 100644 --- a/stackinator/builder.py +++ b/stackinator/builder.py @@ -353,21 +353,77 @@ def generate(self, recipe): with dst.open("w") as fid: fid.write(cache.generate_mirrors_yaml(recipe.mirror)) + packages_data = {} + # append network packages to packages.yaml + network_config_path = system_config_path / "network.yaml" + if not network_config_path.is_file(): + raise FileNotFoundError(f"The network configuration file '{network_config_path}' does not exist") + with network_config_path.open() as fid: + network_config = yaml.load(fid, Loader=yaml.Loader) + packages_data.update(network_config.get("packages", {})) + # append recipe packages to packages.yaml if recipe.packages: system_packages = system_config_path / "packages.yaml" - packages_data = {} if system_packages.is_file(): # load system yaml with system_packages.open() as fid: raw = yaml.load(fid, Loader=yaml.Loader) - packages_data = raw["packages"] + packages_data.update(raw["packages"]) packages_data.update(recipe.packages["packages"]) packages_yaml = yaml.dump({"packages": packages_data}) packages_path = config_path / "packages.yaml" with packages_path.open("w") as fid: fid.write(packages_yaml) + # validate the recipe mpi selection + for name, config in recipe.environments.items(): + # config[mpi] holds the user specified settings in environment.yaml + if config["mpi"]: + mpi = config["mpi"] + user_mpi_spec = mpi["spec"] + user_mpi_gpu = mpi["gpu"] + user_mpi_xspec = mpi["xspec"] if "xspec" in mpi else None + user_mpi_deps = mpi["depends"] if "depends" in mpi else None + self._logger.debug( + f"User MPI selection: spec={user_mpi_spec}, gpu={user_mpi_gpu}, " + f"xspec={user_mpi_xspec}, deps={user_mpi_deps}" + ) + + if user_mpi_spec: + try: + mpi_impl, mpi_ver = user_mpi_spec.strip().split(sep="@", maxsplit=1) + except ValueError: + mpi_impl = user_mpi_spec.strip() + mpi_ver = None + + # network_config holds the system specified settings in cluster config / network.yaml + if mpi_impl in network_config["mpi_supported"]: + default_ver = network_config[mpi_impl]["version"] + default_spec = network_config[mpi_impl]["spec"] if "spec" in network_config[mpi_impl] else "" + default_deps = ( + network_config[mpi_impl]["depends"] if "depends" in network_config[mpi_impl] else "" + ) + self._logger.debug( + f"System MPI selection: spec={mpi_impl}@{default_ver} {default_spec}, deps={default_deps}" + ) + + # select users versions or the default versions if user did not specify + version_opt = f"@{mpi_ver or default_ver}" + # create full spec based on user provided spec or default spec + spec_opt = f"{mpi_impl}{version_opt} {user_mpi_xspec or default_spec}" + if user_mpi_gpu and user_mpi_gpu not in spec_opt: + spec_opt = f"{spec_opt} +{user_mpi_gpu}" + deps_opt = user_mpi_deps or default_deps + for dep in deps_opt: + spec_opt = f"{spec_opt} ^{dep}" + self._logger.debug(f"Final MPI selection: spec={spec_opt}") + + recipe.environments[name]["specs"].append(spec_opt) + else: + # TODO: Create a custom exception type + raise Exception(f"Unsupported mpi: {mpi_impl}") + # Add custom spack package recipes, configured via Spack repos. # Step 1: copy Spack repos to store_path where they will be used to # build the stack, and then be part of the upstream provided diff --git a/stackinator/recipe.py b/stackinator/recipe.py index 66293147..4f5b565b 100644 --- a/stackinator/recipe.py +++ b/stackinator/recipe.py @@ -8,15 +8,6 @@ class Recipe: - valid_mpi_specs = { - "cray-mpich": (None, None), - "mpich": ("4.1", "device=ch4 netmod=ofi +slurm"), - "mvapich2": ( - "3.0a", - "+xpmem fabrics=ch4ofi ch4_max_vcis=4 process_managers=slurm", - ), - "openmpi": ("5", "+internal-pmix +legacylaunchers +orterunprefix fabrics=cma,ofi,xpmem schedulers=slurm"), - } @property def path(self): @@ -280,43 +271,19 @@ def generate_environment_specs(self, raw): for _, config in environments.items(): config["exclude_from_cache"] = ["cuda", "nvhpc", "perl"] - # check the environment descriptions and ammend where features are missing + # check the environment descriptions and amend where features are missing for name, config in environments.items(): if ("specs" not in config) or (config["specs"] is None): environments[name]["specs"] = [] if "mpi" not in config: - environments[name]["mpi"] = {"spec": None, "gpu": None} + environments[name]["mpi"] = {"spec": None, "gpu": None, "network": {"spec": None}} - # complete configuration of MPI in each environment - for name, config in environments.items(): - if config["mpi"]: - mpi = config["mpi"] - mpi_spec = mpi["spec"] - mpi_gpu = mpi["gpu"] - if mpi_spec: - try: - mpi_impl, mpi_ver = mpi_spec.strip().split(sep="@", maxsplit=1) - except ValueError: - mpi_impl = mpi_spec.strip() - mpi_ver = None - - if mpi_impl in Recipe.valid_mpi_specs: - default_ver, options = Recipe.valid_mpi_specs[mpi_impl] - if mpi_ver: - version_opt = f"@{mpi_ver}" - else: - version_opt = f"@{default_ver}" if default_ver else "" - - spec = f"{mpi_impl}{version_opt} {options or ''}".strip() - - if mpi_gpu: - spec = f"{spec} +{mpi_gpu}" - - environments[name]["specs"].append(spec) - else: - # TODO: Create a custom exception type - raise Exception(f"Unsupported mpi: {mpi_impl}") + if "network" not in config["mpi"]: + environments[name]["mpi"]["network"] = {"spec": ""} + + # we have not loaded the system configs yet, so mpi information will be generated + # during the builder phase. We will validate the mpi information then. # set constraints that ensure the the main compiler is always used to build packages # that do not explicitly request a compiler. diff --git a/stackinator/schema/environments.json b/stackinator/schema/environments.json index 9faefbd6..1c80c4a3 100644 --- a/stackinator/schema/environments.json +++ b/stackinator/schema/environments.json @@ -47,8 +47,14 @@ "gpu": { "enum": ["cuda", "rocm", null, false], "default": null - } - } + }, + "xspec": {"type": "string"}, + "depends": { + "type": "array", + "items": {"type": "string"}, + "default": [] + } + } }, {"enum": [null, false]} ], @@ -87,7 +93,7 @@ }, "uenv": { "type": "object", - "additionalPropertis": false, + "additionalProperties": false, "properties": { "add_compilers": { "type": "boolean" }, "prefix_paths": { @@ -109,4 +115,3 @@ } } } - diff --git a/unittests/test_schema.py b/unittests/test_schema.py index 9bc03264..37d0b326 100644 --- a/unittests/test_schema.py +++ b/unittests/test_schema.py @@ -133,7 +133,7 @@ def test_environments_yaml(yaml_path): # test defaults were set correctly assert env["unify"] == "when_possible" assert env["packages"] == ["perl", "git"] - assert env["mpi"] == {"spec": "cray-mpich", "gpu": "cuda"} + assert env["mpi"] == {"depends": [], "spec": "cray-mpich", "gpu": "cuda"} assert env["variants"] == ["+mpi", "+cuda"] assert env["views"] == {"default": None}