Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/detect perlmutter login nodes #1391

Merged
merged 3 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions libensemble/resources/platforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
option or the environment variable ``LIBE_PLATFORM``.
"""

import logging
import os
import subprocess
from typing import Optional
Expand All @@ -16,6 +17,10 @@

from libensemble.utils.misc import specs_dump

logger = logging.getLogger(__name__)
# To change logging level for just this module
# logger.setLevel(logging.DEBUG)


class PlatformException(Exception):
"""Platform module exception"""
Expand Down Expand Up @@ -269,6 +274,7 @@ class Known_platforms(BaseModel):
generic_rocm: GenericROCm = GenericROCm()
crusher: Crusher = Crusher()
frontier: Frontier = Frontier()
perlmutter: Perlmutter = Perlmutter()
perlmutter_c: PerlmutterCPU = PerlmutterCPU()
perlmutter_g: PerlmutterGPU = PerlmutterGPU()
polaris: Polaris = Polaris()
Expand All @@ -292,10 +298,15 @@ def known_envs():
"""Detect system by environment variables"""
name = None
if os.environ.get("NERSC_HOST") == "perlmutter":
if "gpu_" in os.environ.get("SLURM_JOB_PARTITION"):
name = "perlmutter_g"
partition = os.environ.get("SLURM_JOB_PARTITION")
if partition:
if "gpu_" in partition:
name = "perlmutter_g"
else:
name = "perlmutter_c"
else:
name = "perlmutter_c"
name = "perlmutter"
logger.manager_warning("Perlmutter detected, but no compute partition detected. Are you on login nodes?")
return name


Expand Down
53 changes: 44 additions & 9 deletions libensemble/tests/unit_tests/test_platform.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import pytest

from libensemble.resources.platforms import Known_platforms, PlatformException, get_platform, known_system_detect
from libensemble.resources.platforms import (
Known_platforms,
PlatformException,
get_platform,
known_envs,
known_system_detect,
)
from libensemble.utils.misc import specs_dump

my_spec = {
Expand All @@ -20,8 +26,12 @@
}


def test_platform_empty():
def test_platform_empty(monkeypatch):
"""Test no platform options supplied"""

# Ensure NERSC_HOST not set
monkeypatch.delenv("NERSC_HOST", raising=False)

exp = {}
libE_specs = {}
platform_info = get_platform(libE_specs)
Expand Down Expand Up @@ -55,10 +65,13 @@ def test_platform_known():
assert platform_info == exp, f"platform_info does not match expected: {platform_info}"


def test_platform_specs():
def test_platform_specs(monkeypatch):
"""Test known platform and platform_specs supplied"""
from libensemble.specs import LibeSpecs

# Ensure NERSC_HOST not set
monkeypatch.delenv("NERSC_HOST", raising=False)

exp = my_spec
libE_specs = {"platform_specs": my_spec}
platform_info = get_platform(libE_specs)
Expand All @@ -81,7 +94,12 @@ def test_platform_specs():
assert specs_dump(LS.platform_specs, exclude_none=True) == exp, "Conversion isn't as expected"


def test_known_sys_detect():
def test_known_sys_detect(monkeypatch):
"""Test detection of known system"""

# Ensure NERSC_HOST not set
monkeypatch.delenv("NERSC_HOST", raising=False)

known_platforms = specs_dump(Known_platforms(), exclude_none=True)
get_sys_cmd = "echo summit.olcf.ornl.gov" # Overrides default "hostname -d"
name = known_system_detect(cmd=get_sys_cmd)
Expand All @@ -94,9 +112,26 @@ def test_known_sys_detect():
assert name is None, f"Expected known_system_detect to return None ({name})"


def test_env_sys_detect(monkeypatch):
"""Test detection of system partitions"""
monkeypatch.setenv("NERSC_HOST", "other_host")
monkeypatch.setenv("SLURM_JOB_PARTITION", "cpu_test_partition")
name = known_envs()
assert name is None
monkeypatch.setenv("NERSC_HOST", "perlmutter")

monkeypatch.setenv("SLURM_JOB_PARTITION", "gpu_test_partition")
name = known_envs()
assert name == "perlmutter_g"

monkeypatch.setenv("SLURM_JOB_PARTITION", "cpu_test_partition")
name = known_envs()
assert name == "perlmutter_c"

monkeypatch.delenv("SLURM_JOB_PARTITION", raising=False)
name = known_envs()
assert name == "perlmutter"


if __name__ == "__main__":
test_platform_empty()
test_unknown_platform()
test_platform_known()
test_platform_specs()
test_known_sys_detect()
pytest.main([__file__])
Loading