From e9b395b3e644ea23342b74ad128f3c782f474555 Mon Sep 17 00:00:00 2001 From: Toni Harzendorf Date: Wed, 6 Apr 2022 18:57:58 +0200 Subject: [PATCH 01/28] Rework the Job-API --- .gitignore | 3 + pyslurm/__init__.py | 40 + pyslurm/api.pxd | 25 + pyslurm/api.pyx | 45 + pyslurm/core/__init__.pxd | 0 pyslurm/core/__init__.py | 0 pyslurm/core/common/__init__.pxd | 28 + pyslurm/core/common/__init__.pyx | 347 +++++++ pyslurm/core/common/cstr.pxd | 37 + pyslurm/core/common/cstr.pyx | 279 ++++++ pyslurm/core/common/ctime.pxd | 30 + pyslurm/core/common/ctime.pyx | 211 +++++ pyslurm/core/common/uint.pxd | 41 + pyslurm/core/common/uint.pyx | 179 ++++ pyslurm/core/error.pyx | 88 ++ pyslurm/core/job/__init__.pxd | 0 pyslurm/core/job/__init__.py | 3 + pyslurm/core/job/job.pxd | 88 ++ pyslurm/core/job/job.pyx | 1517 ++++++++++++++++++++++++++++++ pyslurm/core/job/sbatch_opts.pyx | 203 ++++ pyslurm/core/job/step.pxd | 62 ++ pyslurm/core/job/step.pyx | 530 +++++++++++ pyslurm/core/job/submission.pxd | 776 +++++++++++++++ pyslurm/core/job/submission.pyx | 663 +++++++++++++ pyslurm/core/job/util.pyx | 616 ++++++++++++ pyslurm/pyslurm.pyx | 23 - pyslurm/slurm/__init__.pxd | 1 - pyslurm/slurm/extra.pxi | 107 ++- pyslurm/slurm/other.pxi | 67 ++ pyslurm/slurm/xmalloc.h | 117 +++ setup.cfg | 6 + setup.py | 1 - tests/new_api/conftest.py | 50 + tests/new_api/test_common.py | 330 +++++++ tests/new_api/test_job.py | 143 +++ tests/new_api/test_job_steps.py | 181 ++++ tests/new_api/test_job_submit.py | 306 ++++++ 37 files changed, 7080 insertions(+), 63 deletions(-) create mode 100644 pyslurm/api.pxd create mode 100644 pyslurm/api.pyx create mode 100644 pyslurm/core/__init__.pxd create mode 100644 pyslurm/core/__init__.py create mode 100644 pyslurm/core/common/__init__.pxd create mode 100644 pyslurm/core/common/__init__.pyx create mode 100644 pyslurm/core/common/cstr.pxd create mode 100644 pyslurm/core/common/cstr.pyx create mode 100644 pyslurm/core/common/ctime.pxd create mode 100644 pyslurm/core/common/ctime.pyx create mode 100644 pyslurm/core/common/uint.pxd create mode 100644 pyslurm/core/common/uint.pyx create mode 100644 pyslurm/core/error.pyx create mode 100644 pyslurm/core/job/__init__.pxd create mode 100644 pyslurm/core/job/__init__.py create mode 100644 pyslurm/core/job/job.pxd create mode 100644 pyslurm/core/job/job.pyx create mode 100644 pyslurm/core/job/sbatch_opts.pyx create mode 100644 pyslurm/core/job/step.pxd create mode 100644 pyslurm/core/job/step.pyx create mode 100644 pyslurm/core/job/submission.pxd create mode 100644 pyslurm/core/job/submission.pyx create mode 100644 pyslurm/core/job/util.pyx create mode 100644 pyslurm/slurm/other.pxi create mode 100644 pyslurm/slurm/xmalloc.h create mode 100644 tests/new_api/conftest.py create mode 100644 tests/new_api/test_common.py create mode 100644 tests/new_api/test_job.py create mode 100644 tests/new_api/test_job_steps.py create mode 100644 tests/new_api/test_job_submit.py diff --git a/.gitignore b/.gitignore index f79b3369..d534a00e 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,8 @@ pyslurm/*.pxi~ pyslurm/*.pxd~ pyslurm/*.so pyslurm/*.c +pyslurm/**/*.c +pyslurm/**/__pycache__ # Ignore vim swap files *.swp @@ -25,6 +27,7 @@ tests/*.pyc # Ignore pycache (Python 3) */__pycache__ +*/**/__pycache__ # Ignore job output files *.out diff --git a/pyslurm/__init__.py b/pyslurm/__init__.py index 177bf7cb..b3bedd61 100644 --- a/pyslurm/__init__.py +++ b/pyslurm/__init__.py @@ -16,6 +16,46 @@ from .pyslurm import * from .__version__ import __version__ +from pyslurm.core.job import ( + Job, + Jobs, + JobStep, + JobSteps, + JobSubmitDescription, +) + +import pyslurm.core.error +from pyslurm.core.error import ( + RPCError, +) + +# Utility time functions +from pyslurm.core.common.ctime import ( + timestr_to_secs, + timestr_to_mins, + secs_to_timestr, + mins_to_timestr, + date_to_timestamp, + timestamp_to_date, +) + +# General utility functions +from pyslurm.core.common import ( + uid_to_name, + gid_to_name, + user_to_uid, + group_to_gid, + expand_range_str, + humanize, + dehumanize, + nodelist_from_range_str, + nodelist_to_range_str, +) + +# Initialize slurm api +from pyslurm.api import slurm_init, slurm_fini +slurm_init() + def version(): return __version__ diff --git a/pyslurm/api.pxd b/pyslurm/api.pxd new file mode 100644 index 00000000..7afe0752 --- /dev/null +++ b/pyslurm/api.pxd @@ -0,0 +1,25 @@ +######################################################################### +# api.pxd - pyslurm core API +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +# cython: embedsignature=True + +from pyslurm cimport slurm +from pyslurm.core.common cimport cstr diff --git a/pyslurm/api.pyx b/pyslurm/api.pyx new file mode 100644 index 00000000..716943b5 --- /dev/null +++ b/pyslurm/api.pyx @@ -0,0 +1,45 @@ +######################################################################### +# api.pyx - pyslurm core API +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +# cython: embedsignature=True + + +def slurm_init(config_path=None): + """ + Initialize the Slurm API. + + This function must be called first before certain RPC functions can be + executed. slurm_init is automatically called when the pyslurm module is + loaded. + + Args: + config_path (str, optional): + An absolute path to the slurm config file to use. The default is + None, so libslurm will automatically detect its config. + """ + slurm.slurm_init(cstr.from_unicode(config_path)) + + +def slurm_fini(): + """ + Clean up data structures previously allocated through slurm_init. + """ + slurm.slurm_fini() diff --git a/pyslurm/core/__init__.pxd b/pyslurm/core/__init__.pxd new file mode 100644 index 00000000..e69de29b diff --git a/pyslurm/core/__init__.py b/pyslurm/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pyslurm/core/common/__init__.pxd b/pyslurm/core/common/__init__.pxd new file mode 100644 index 00000000..160345ad --- /dev/null +++ b/pyslurm/core/common/__init__.pxd @@ -0,0 +1,28 @@ +######################################################################### +# common/__init__.pxd - common/utility functions +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport xfree, try_xmalloc, xmalloc, xfree_ptr +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t +from pyslurm.core.common cimport cstr +from libc.stdlib cimport free + diff --git a/pyslurm/core/common/__init__.pyx b/pyslurm/core/common/__init__.pyx new file mode 100644 index 00000000..3e9e98bb --- /dev/null +++ b/pyslurm/core/common/__init__.pyx @@ -0,0 +1,347 @@ +######################################################################### +# common/__init__.pyx - common/utility functions +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from grp import getgrgid, getgrnam, getgrall +from pwd import getpwuid, getpwnam, getpwall +from os import getuid, getgid +from itertools import chain +import re +import signal + + +MEMORY_UNITS = { + "K": 2**10.0, + "M": 2**20.0, + "G": 2**30.0, + "T": 2**40.0, + "P": 2**50.0, + "E": 2**60.0, + "Z": 2**70.0 +} + + +cpdef uid_to_name(uint32_t uid, err_on_invalid=True, dict lookup={}): + """Translate UID to a User-Name.""" + if uid == slurm.NO_VAL: + return None + + if lookup: + try: + name = lookup[uid] + return name + except KeyError as e: + if err_on_invalid: + raise e + else: + try: + name = getpwuid(uid).pw_name + return name + except KeyError as e: + if err_on_invalid: + raise e + + return None + + +cpdef gid_to_name(uint32_t gid, err_on_invalid=True, dict lookup={}): + """Translate a uid to a Group-Name.""" + if gid == slurm.NO_VAL: + return None + + if lookup: + try: + name = lookup[gid] + return name + except KeyError as e: + if err_on_invalid: + raise e + else: + try: + name = getgrgid(gid).gr_name + return name + except KeyError as e: + if err_on_invalid: + raise e + + return None + + +def user_to_uid(user, err_on_invalid=True): + """Translate User-Name to a uid.""" + if user is None: + return slurm.NO_VAL + + try: + if isinstance(user, str): + return getpwnam(user).pw_uid + + return getpwuid(user).pw_uid + except KeyError as e: + if err_on_invalid: + raise e + + return getuid() + + +def group_to_gid(group, err_on_invalid=True): + """Translate a Group-Name to a gid.""" + if group is None: + return slurm.NO_VAL + + try: + if isinstance(group, str): + return getgrnam(group).gr_gid + + return getgrgid(group).gr_gid + except KeyError as e: + if err_on_invalid: + raise e + + return getgid() + + +def _getgrall_to_dict(): + cdef list groups = getgrall() + cdef dict grp_info = {item.gr_gid: item.gr_name for item in groups} + return grp_info + + +def _getpwall_to_dict(): + cdef list passwd = getpwall() + cdef dict pw_info = {item.pw_uid: item.pw_name for item in passwd} + return pw_info + + +def expand_range_str(range_str): + """Expand a ranged string of numbers to a list of unique values. + + Args: + range_str (str): + A range string, which can for example look like this: + "1,2,3-10,11,15-20" + + Returns: + list: List of unique values + """ + ret = [] + for mrange in range_str.split(","): + start, sep, end = mrange.partition("-") + start = int(start) + + if sep: + ret += range(start, int(end)+1) + else: + ret.append(start) + + return ret + + +def nodelist_from_range_str(nodelist): + """Convert a bracketed nodelist str with ranges to a list. + + Args: + nodelist (Union[str, list]): + Comma-seperated str or list with potentially bracketed hostnames + and ranges. + + Returns: + list: List of all nodenames or None on failure + """ + if isinstance(nodelist, list): + nodelist = ",".join(nodelist) + + cdef: + char *nl = nodelist + slurm.hostlist_t hl + char *hl_unranged = NULL + + hl = slurm.slurm_hostlist_create(nl) + if not hl: + return None + + hl_unranged = slurm.slurm_hostlist_deranged_string_malloc(hl) + out = cstr.to_list(hl_unranged) + + free(hl_unranged) + slurm.slurm_hostlist_destroy(hl) + + return out + + +def nodelist_to_range_str(nodelist): + """Convert a list of nodes to a bracketed str with ranges. + + Args: + nodelist (Union[str, list]): + Comma-seperated str or list with unique, unbracketed nodenames. + + Returns: + str: Bracketed, ranged nodelist or None on failure. + """ + if isinstance(nodelist, list): + nodelist = ",".join(nodelist) + + cdef: + char *nl = nodelist + slurm.hostlist_t hl + char *hl_ranged = NULL + + hl = slurm.slurm_hostlist_create(nl) + if not hl: + return None + + hl_ranged = slurm.slurm_hostlist_ranged_string_malloc(hl) + out = cstr.to_unicode(hl_ranged) + + free(hl_ranged) + slurm.slurm_hostlist_destroy(hl) + + return out + + +def humanize(num, decimals=1): + """Humanize a number. + + This will convert the number to a string and add appropriate suffixes like + M,G,T,P,... + + Args: + num (int): + Number to humanize + decimals (int, optional): + Amount of decimals the humanized string should have. + + Returns: + str: Humanized number with appropriate suffix. + """ + if num is None or num == "unlimited": + return num + + num = int(num) + for unit in ["M", "G", "T", "P", "E", "Z"]: + if abs(num) < 1024.0: + return f"{num:3.{decimals}f}{unit}" + num /= 1024.0 + + return f"{num:.{decimals}f}Y" + + +def dehumanize(humanized_str, target="M", decimals=0): + """Dehumanize a previously humanized value. + + Args: + humanized_str (str): + A humanized str, for example "5M" or "10T" + target (str): + Target unit. The default is "M" (Mebibytes). Allowed values are + K,M,G,T,P,E,Z + decimals (int): + Amount of decimal places the result should have. Default is 0 + + Returns: + int: Dehumanized value + """ + if not humanized_str: + return None + + units_str = " ".join(MEMORY_UNITS.keys()) + splitted = re.split(f'([{units_str}])', str(humanized_str)) + + if len(splitted) == 1: + try: + return int(humanized_str) + except ValueError as e: + raise ValueError(f"Invalid value specified: {humanized_str}") + + val = float(splitted[0]) + unit = splitted[1] + + val_in_bytes = val * MEMORY_UNITS[unit] + val_in_target_size = float(val_in_bytes / MEMORY_UNITS[target]) + + if not decimals: + return round(val_in_target_size) + else: + return float(f"{val_in_target_size:.{decimals}f}") + + +def signal_to_num(sig): + if not sig: + return None + + try: + if str(sig).isnumeric(): + _sig = signal.Signals(int(sig)).value + else: + _sig = signal.Signals[sig].value + except Exception: + raise ValueError(f"Invalid Signal: {sig}.") from None + + return _sig + + +def cpubind_to_num(cpu_bind): + cdef uint32_t flags = 0 + + if not cpu_bind: + return flags + + cpu_bind = cpu_bind.casefold().split(",") + + if "none" in cpu_bind: + flags |= slurm.CPU_BIND_NONE + elif "sockets" in cpu_bind: + flags |= slurm.CPU_BIND_TO_SOCKETS + elif "ldoms" in cpu_bind: + flags |= slurm.CPU_BIND_TO_LDOMS + elif "cores" in cpu_bind: + flags |= slurm.CPU_BIND_TO_CORES + elif "threads" in cpu_bind: + flags |= slurm.CPU_BIND_TO_THREADS + elif "off" in cpu_bind: + flags |= slurm.CPU_BIND_OFF + if "verbose" in cpu_bind: + flags |= slurm.CPU_BIND_VERBOSE + + return flags + + +def instance_to_dict(inst): + cdef dict out = {} + for attr in dir(inst): + val = getattr(inst, attr) + if attr.startswith("_") or callable(val): + # Ignore everything starting with "_" and all functions. + continue + out[attr] = val + + return out + + +def _sum_prop(obj, name, startval=0): + val = startval + for n in obj.values(): + v = name.__get__(n) + if v is not None: + val += v + + return val diff --git a/pyslurm/core/common/cstr.pxd b/pyslurm/core/common/cstr.pxd new file mode 100644 index 00000000..133edf22 --- /dev/null +++ b/pyslurm/core/common/cstr.pxd @@ -0,0 +1,37 @@ +######################################################################### +# common/cstr.pxd - slurm string functions +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport xfree, try_xmalloc, xmalloc, xfree_ptr +from libc.string cimport memcpy, strlen + +cdef char *from_unicode(s) +cdef to_unicode(char *s, default=*) +cdef fmalloc(char **old, val) +cdef fmalloc2(char **old, char **old2, val) +cdef free_array(char **arr, count) +cdef list to_list(char *str_list) +cdef from_list(char **old, vals, delim=*) +cdef dict to_dict(char *str_dict, str delim1=*, str delim2=*) +cdef dict from_dict(char **old, vals, prepend=*, str delim1=*, str delim2=*) +cdef to_gres_dict(char *gres) +cdef from_gres_dict(vals, typ=*) diff --git a/pyslurm/core/common/cstr.pyx b/pyslurm/core/common/cstr.pyx new file mode 100644 index 00000000..0a824f3c --- /dev/null +++ b/pyslurm/core/common/cstr.pyx @@ -0,0 +1,279 @@ +######################################################################### +# common/cstr.pyx - pyslurm string functions +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +import re + +cdef bytes NULL_BYTE = "\0".encode("ascii") +cdef bytes NONE_BYTE = "None".encode("ascii") + +cdef char *from_unicode(s): + """Convert Python3 str (unicode) to char* (no malloc) + + Note + The lifetime of this char* depends on the lifetime of the equivalent + python-object passed in. If the python-object is gone, the char* cannot + be used safely anymore. + """ + if not s: + return NULL + + _s = str(s) + return _s + + +cdef inline to_unicode(char *_str, default=None): + """Convert a char* to Python3 str (unicode)""" + if _str and _str[0] != NULL_BYTE: + if _str == NONE_BYTE: + return None + + return _str + else: + return default + + +cdef fmalloc2(char **old, char **old2, val): + """Like fmalloc, but copies the value to 2 char pointers. + + Memory will only be allocated once. + "old" and "old2" will both share this same pointer. + """ + fmalloc(old, val) + old2[0] = old[0] + + +cdef fmalloc(char **old, val): + """Try to free first and then create xmalloc'ed char* from str. + + Also see: + https://github.com/SchedMD/slurm/blob/master/src/common/xstring.c#L454 + + This function is essentially like xstrdup from Slurm, but also tries to free + the previous allocation if needed. + + Uses Slurm's try_xmalloc for routine for allocating memory. try_xmalloc will + return NULL if the allocation failed. We can check this and raise a + MemoryError. + + Just using the normal xmalloc would call abort() if allocation failed (for + example when OOM). + """ + # TODO: Consider doing some size checks on the input by having an extra + # argument like "max_size" which is configurable. Otherwise infinitely huge + # strings could just be passed in and consume a lot of memory which would + # allow for a denial of service attack on services that use pyslurm. + cdef: + const char *tmp = NULL + size_t siz + + # Free the previous allocation (if neccessary) + xfree(old[0]) + + # Consider: Maybe every string containing a \0 should just + # be rejected with an Exception instead of silently cutting + # everything after \0 off? + + if val and val[0] != "\0": + # Let Cython convert the Python-string to a char* + # which will be NUL-terminated. + tmp = val + + # Get the length of the char*, include space for NUL character + siz = strlen(tmp) + 1 + + old[0] = slurm.try_xmalloc(siz) + if not old[0]: + raise MemoryError("xmalloc failed for char*") + + memcpy(old[0], tmp, siz) + else: + old[0] = NULL + + +cdef list to_list(char *str_list): + """Convert C-String to a list.""" + cdef str ret = to_unicode(str_list) + + if not ret: + return [] + + return ret.split(",") + + +cdef from_list(char **old, vals, delim=","): + """Convert list to a C-String.""" + cdef object final = vals + + if vals and not isinstance(vals, str): + final = delim.join(vals) + + fmalloc(old, final) + + +cdef dict to_dict(char *str_dict, str delim1=",", str delim2="="): + """Convert a char* key=value pair to dict. + + With a char* Slurm represents key-values pairs usually in the form of: + key1=value1,key2=value2 + which can easily be converted to a dict. + """ + cdef: + str _str_dict = to_unicode(str_dict) + str key, val + dict out = {} + + if not _str_dict: + return out + + for kv in _str_dict.split(delim1): + if delim2 in kv: + key, val = kv.split(delim2) + out[key] = val + + return out + + +cdef dict from_dict(char **old, vals, prepend=None, str delim1=",", str delim2="="): + """Convert a dict (or str) to Slurm Key-Value pair. + + Slurm predominantly uses a format of: + key1=value1,key2=value2,... + + for Key/Value type things, which can be easily created from a dict. + + A String which already has this form can also be passed in. The correct + format of this string will the be validated. + """ + cdef: + out = {} if not vals else vals + list tmp = [] + + if vals and isinstance(vals, str): + out = {} + for kv in vals.split(delim1): + if delim2 in kv: + k, v = kv.split(delim2) + out[k] = v + else: + raise ValueError( + f"Invalid format for key-value pair {kv}. " + f"Expected {delim2} as seperator." + ) + + for k, v in out.items(): + if ((delim1 in k or delim2 in k) or + delim1 in v or delim2 in v): + raise ValueError( + f"Key or Value cannot contain either {delim1} or {delim2}. " + f"Got Key: {k} and Value: {v}." + ) + + tmp.append(f"{'' if not prepend else prepend}{k}{delim2}{v}") + + fmalloc(old, delim1.join(tmp)) + + return out + + +cdef to_gres_dict(char *gres): + """Parse a GRES string.""" + cdef: + dict output = {} + str gres_str = to_unicode(gres) + + if not gres_str or gres_str == "(null)": + return {} + + for item in re.split(",(?=[^,]+?:)", gres_str): + + # Remove the additional "gres" specifier if it exists + if "gres:" in item: + item = item.replace("gres:", "") + + gres_splitted = re.split( + ":(?=[^:]+?)", + item.replace("(", ":", 1).replace(")", "") + ) + + name, typ, cnt = gres_splitted[0], gres_splitted[1], 0 + + # Check if we have a gres type. + if typ.isdigit(): + cnt = typ + typ = None + else: + cnt = gres_splitted[2] + + # Dict Key-Name depends on if we have a gres type or not + name_and_typ = f"{name}:{typ}" if typ else name + + if not "IDX" in gres_splitted: + # Check if we need to parse the exact GRES index when coming from + # job_resources_t. + output[name_and_typ] = int(cnt) + else: + # Cover cases with IDX + idx = gres_splitted[3] if not typ else gres_splitted[4] + output[name_and_typ] = { + "count": cnt, + "indexes": idx, + } + + return output + + +cdef from_gres_dict(vals, typ=""): + final = [] + gres_dict = vals + + if not vals: + return None + + if isinstance(vals, str) and not vals.isdigit(): + gres_dict = {} + + gres_list = vals.replace("gres:", "") + for gres_str in gres_list.split(","): + gres_and_type, cnt = gres_str.rsplit(":", 1) + gres_dict.update({gres_and_type: int(cnt)}) + elif isinstance(vals, dict): + for gres_and_type, cnt in gres_dict.items(): + # Error immediately on specifications that contain more than one + # semicolon, as it is wrong. + if len(gres_and_type.split(":")) > 2: + raise ValueError(f"Invalid specifier: '{gres_and_type}'") + + if typ not in gres_and_type: + gres_and_type = f"{gres_and_type}:{typ}" + + final.append(f"gres:{gres_and_type}:{int(cnt)}") + else: + return f"gres:{typ}:{int(vals)}" + + return ",".join(final) + + +cdef free_array(char **arr, count): + for i in range(count): + xfree(arr[i]) + + xfree(arr) diff --git a/pyslurm/core/common/ctime.pxd b/pyslurm/core/common/ctime.pxd new file mode 100644 index 00000000..ee0600cb --- /dev/null +++ b/pyslurm/core/common/ctime.pxd @@ -0,0 +1,30 @@ +######################################################################### +# ctime.pxd - wrappers around slurm time functions +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.core.common cimport cstr +from libc.stdint cimport uint32_t + +cdef extern from 'time.h' nogil: + ctypedef long time_t + double difftime(time_t time1, time_t time2) + time_t time(time_t *t) diff --git a/pyslurm/core/common/ctime.pyx b/pyslurm/core/common/ctime.pyx new file mode 100644 index 00000000..e77b18a1 --- /dev/null +++ b/pyslurm/core/common/ctime.pyx @@ -0,0 +1,211 @@ +######################################################################### +# ctime.pyx - wrappers around slurm time functions +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +import datetime + + +def timestr_to_secs(timestr): + """Convert Slurm Timestring to seconds + + Args: + timestr (str): + A Timestring compatible with Slurms time functions. + + Returns: + int: Amount of time in seconds + """ + cdef: + char *tmp = NULL + uint32_t secs + + if timestr is None: + return slurm.NO_VAL + elif timestr == "unlimited": + return slurm.INFINITE + + if str(timestr).isdigit(): + timestr = "00:00:{}".format(timestr) + + tmp = cstr.from_unicode(timestr) + secs = slurm.slurm_time_str2secs(tmp) + + if secs == slurm.NO_VAL: + raise ValueError(f"Invalid Time Specification: {timestr}.") + + return secs + + +def timestr_to_mins(timestr): + """Convert Slurm Timestring to minutes + + Args: + timestr (str): + A Timestring compatible with Slurms time functions. + + Returns: + int: Amount of time in minutes + """ + cdef: + char *tmp = NULL + uint32_t mins + + if timestr is None: + return slurm.NO_VAL + elif timestr == "unlimited": + return slurm.INFINITE + + tmp = cstr.from_unicode(timestr) + mins = slurm.slurm_time_str2mins(tmp) + + if mins == slurm.NO_VAL: + raise ValueError(f"Invalid Time Specification: {timestr}.") + + return mins + + +def secs_to_timestr(secs, default=None): + """Parse time in seconds to Slurm Timestring + + Args: + secs (int): + Amount of seconds to convert + + Returns: + str: A Slurm timestring + """ + cdef char time_line[32] + + if secs == slurm.NO_VAL or secs is None: + return default + elif secs != slurm.INFINITE: + slurm.slurm_secs2time_str( + secs, + time_line, + sizeof(time_line) + ) + + tmp = cstr.to_unicode(time_line) + if tmp == "00:00:00": + return None + else: + return tmp + else: + return "unlimited" + + +def mins_to_timestr(mins, default=None): + """Parse time in minutes to Slurm Timestring + + Args: + mins (int): + Amount of minutes to convert + + Returns: + str: A Slurm timestring + """ + cdef char time_line[32] + + if mins == slurm.NO_VAL or mins is None: + return default + elif mins != slurm.INFINITE: + slurm.slurm_mins2time_str( + mins, + time_line, + sizeof(time_line) + ) + + tmp = cstr.to_unicode(time_line) + if tmp == "00:00:00": + return None + else: + return tmp + else: + return "unlimited" + + +def date_to_timestamp(date, on_nodate=0): + """Parse Date to Unix timestamp + + Args: + date (Union[str, int, datetime.datetime]): + A date to convert to a Unix timestamp. + + Returns: + int: A unix timestamp + """ + cdef: + time_t tmp_time + char* tmp_char = NULL + + if not date: + # time_t of 0, so the option will be ignored by slurmctld + return on_nodate + elif str(date).isdigit(): + # Allow the user to pass a timestamp directly. + return int(date) + elif isinstance(date, datetime.datetime): + # Allow the user to pass a datetime.datetime object. + return int(date.timestamp()) + + tmp_char = cstr.from_unicode(date) + tmp_time = slurm.slurm_parse_time(tmp_char, 0) + + if not tmp_time: + raise ValueError(f"Invalid Time Specification: {date}") + + return tmp_time + + +def timestamp_to_date(timestamp): + """Parse Unix timestamp to Slurm Date-string + + Args: + timestamp (int): + A Unix timestamp that should be converted. + + Returns: + str: A Slurm date timestring + """ + cdef: + char time_str[32] + time_t _time = timestamp + + if _time == slurm.NO_VAL: + return None + + # slurm_make_time_str returns 'Unknown' if 0 or slurm.INFINITE + slurm.slurm_make_time_str(&_time, time_str, sizeof(time_str)) + + ret = cstr.to_unicode(time_str) + if ret == "Unknown": + return None + + return ret + + +def _raw_time(time): + if (time == slurm.NO_VAL or + time == 0 or + time == slurm.INFINITE): + return None + + return time diff --git a/pyslurm/core/common/uint.pxd b/pyslurm/core/common/uint.pxd new file mode 100644 index 00000000..aa9f8dd2 --- /dev/null +++ b/pyslurm/core/common/uint.pxd @@ -0,0 +1,41 @@ +######################################################################### +# common/uint.pxd - functions dealing with parsing uint types +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from pyslurm cimport slurm +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t + +cpdef u8(val, inf=*, noval=*, on_noval=*, zero_is_noval=*) +cpdef u16(val, inf=*, noval=*, on_noval=*, zero_is_noval=*) +cpdef u32(val, inf=*, noval=*, on_noval=*, zero_is_noval=*) +cpdef u64(val, inf=*, noval=*, on_noval=*, zero_is_noval=*) +cpdef u8_parse(uint8_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) +cpdef u16_parse(uint16_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) +cpdef u32_parse(uint32_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) +cpdef u64_parse(uint64_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) +cdef u8_bool(val) +cdef u16_bool(val) +cdef u8_parse_bool(uint8_t val) +cdef u16_parse_bool(uint16_t val) +cdef u64_parse_bool_flag(uint64_t flags, flag) +cdef u64_set_bool_flag(uint64_t *flags, boolean, flag_val) +cdef u16_parse_bool_flag(uint16_t flags, flag) +cdef u16_set_bool_flag(uint16_t *flags, boolean, flag_val) diff --git a/pyslurm/core/common/uint.pyx b/pyslurm/core/common/uint.pyx new file mode 100644 index 00000000..8c1146df --- /dev/null +++ b/pyslurm/core/common/uint.pyx @@ -0,0 +1,179 @@ +######################################################################### +# common/uint.pyx - functions dealing with parsing uint types +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + + +cpdef u8(val, inf=False, noval=slurm.NO_VAL8, on_noval=slurm.NO_VAL8, zero_is_noval=True): + """Try to convert arbitrary 'val' to uint8_t""" + if val is None or (val == 0 and zero_is_noval) or val == noval: + return on_noval + elif inf and val == "unlimited": + return slurm.INFINITE8 + else: + if isinstance(val, str) and val.isdigit(): + return int(val) + + return val + + +cpdef u8_parse(uint8_t val, on_inf="unlimited", on_noval=None, noval=slurm.NO_VAL8, zero_is_noval=True): + """Convert uint8_t to Python int (with a few situational parameters)""" + if val == noval or (val == 0 and zero_is_noval): + return on_noval + elif val == slurm.INFINITE8: + return on_inf + else: + return val + + +cpdef u16(val, inf=False, noval=slurm.NO_VAL16, on_noval=slurm.NO_VAL16, zero_is_noval=True): + """Try to convert arbitrary 'val' to uint16_t""" + if val is None or (val == 0 and zero_is_noval) or val == noval: + return on_noval + elif inf and val == "unlimited": + return slurm.INFINITE16 + else: + if isinstance(val, str) and val.isdigit(): + return int(val) + + return val + + +cpdef u16_parse(uint16_t val, on_inf="unlimited", on_noval=None, noval=slurm.NO_VAL16, zero_is_noval=True): + """Convert uint16_t to Python int (with a few situational parameters)""" + if val == noval or (val == 0 and zero_is_noval): + return on_noval + elif val == slurm.INFINITE16: + return on_inf + else: + return val + + +cpdef u32(val, inf=False, noval=slurm.NO_VAL, on_noval=slurm.NO_VAL, zero_is_noval=True): + """Try to convert arbitrary 'val' to uint32_t""" + if val is None or (val == 0 and zero_is_noval) or val == noval: + return on_noval + elif inf and val == "unlimited": + return slurm.INFINITE + else: + if isinstance(val, str) and val.isdigit(): + return int(val) + + return val + + +cpdef u32_parse(uint32_t val, on_inf="unlimited", on_noval=None, noval=slurm.NO_VAL, zero_is_noval=True): + """Convert uint32_t to Python int (with a few situational parameters)""" + if val == noval or (val == 0 and zero_is_noval): + return on_noval + elif val == slurm.INFINITE: + return on_inf + else: + return val + + +cpdef u64(val, inf=False, noval=slurm.NO_VAL64, on_noval=slurm.NO_VAL64, zero_is_noval=True): + """Try to convert arbitrary 'val' to uint64_t""" + if val is None or (val == 0 and zero_is_noval) or val == noval: + return on_noval + elif inf and val == "unlimited": + return slurm.INFINITE64 + else: + if isinstance(val, str) and val.isdigit(): + return int(val) + + return val + + +cpdef u64_parse(uint64_t val, on_inf="unlimited", on_noval=None, noval=slurm.NO_VAL64, zero_is_noval=True): + """Convert uint64_t to Python int (with a few situational parameters)""" + if val == noval or (val == 0 and zero_is_noval): + return on_noval + elif val == slurm.INFINITE64: + return on_inf + else: + return val + + +cdef u8_bool(val): + if val is None: + return slurm.NO_VAL8 + elif val: + return 1 + else: + return 0 + + +cdef u16_bool(val): + if val is None: + return slurm.NO_VAL16 + elif val: + return 1 + else: + return 0 + + +cdef u8_parse_bool(uint8_t val): + if not val or val == slurm.NO_VAL8: + return False + + return True + + +cdef u16_parse_bool(uint16_t val): + if not val or val == slurm.NO_VAL16: + return False + + return True + + +cdef u64_set_bool_flag(uint64_t *flags, boolean, flag_val): + if boolean: + flags[0] |= flag_val + else: + flags[0] &= ~flag_val + + +cdef u64_parse_bool_flag(uint64_t flags, flag): + if flags == slurm.NO_VAL: + return False + + if flags & flag: + return True + else: + return False + + +cdef u16_set_bool_flag(uint16_t *flags, boolean, flag_val): + if boolean: + flags[0] |= flag_val + else: + flags[0] &= ~flag_val + + +cdef u16_parse_bool_flag(uint16_t flags, flag): + if flags == slurm.NO_VAL16: + return False + + if flags & flag: + return True + else: + return False diff --git a/pyslurm/core/error.pyx b/pyslurm/core/error.pyx new file mode 100644 index 00000000..72ec8389 --- /dev/null +++ b/pyslurm/core/error.pyx @@ -0,0 +1,88 @@ +######################################################################### +# error.pyx - pyslurm error utilities +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.core.common cimport cstr +from pyslurm cimport slurm +from pyslurm.slurm cimport slurm_get_errno + + +def slurm_strerror(errno): + """Convert a slurm errno to a string. + + Args: + errno (int): The error number for which the string representation + should be returned. + + Returns: + str: String representation of errno. + """ + return cstr.to_unicode(slurm.slurm_strerror(errno)) + + +def slurm_errno(): + """Get the current slurm errno. + + Returns: + int: Current slurm errno + """ + return slurm_get_errno() + + +def get_last_slurm_error(): + """Get the last slurm error that occured as a tuple of errno and string. + + Returns: + tuple: The errno and its string representation -> (errno, str) + """ + errno = slurm_errno() + + if errno == slurm.SLURM_SUCCESS: + return (errno, 'Success') + else: + return (errno, slurm_strerror(errno)) + + +class RPCError(Exception): + """Exception for handling Slurm RPC errors. + + Args: + errno (int): A slurm error number returned by RPC functions. Default + is None, which will get the last slurm error automatically. + msg (str): An optional, custom error description. If this is set, the + errno will not be translated to its string representation. + """ + def __init__(self, errno=slurm.SLURM_ERROR, msg=None): + self.msg = msg + self.errno = errno + + if not msg: + if errno == slurm.SLURM_ERROR: + self.errno, self.msg = get_last_slurm_error() + else: + self.msg = slurm_strerror(errno) + + super().__init__(self.msg) + + +def verify_rpc(errno): + if errno != slurm.SLURM_SUCCESS: + raise RPCError(errno) diff --git a/pyslurm/core/job/__init__.pxd b/pyslurm/core/job/__init__.pxd new file mode 100644 index 00000000..e69de29b diff --git a/pyslurm/core/job/__init__.py b/pyslurm/core/job/__init__.py new file mode 100644 index 00000000..ccc396e2 --- /dev/null +++ b/pyslurm/core/job/__init__.py @@ -0,0 +1,3 @@ +from .job import Job, Jobs +from .step import JobStep, JobSteps +from .submission import JobSubmitDescription diff --git a/pyslurm/core/job/job.pxd b/pyslurm/core/job/job.pxd new file mode 100644 index 00000000..bb9dde6c --- /dev/null +++ b/pyslurm/core/job/job.pxd @@ -0,0 +1,88 @@ +######################################################################### +# job.pyx - interface to retrieve slurm job informations +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: embedsignature=True +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from pyslurm.core.common cimport cstr, ctime +from pyslurm.core.common.uint cimport * +from pyslurm.core.common.ctime cimport time_t + +from libc.string cimport memcpy, memset +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t, int64_t +from libc.stdlib cimport free + +from pyslurm.core.job.submission cimport JobSubmitDescription +from pyslurm.core.job.step cimport JobSteps, JobStep + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + working_cluster_rec, + slurm_msg_t, + job_id_msg_t, + slurm_msg_t_init, + return_code_msg_t, + slurm_send_recv_controller_msg, + slurm_free_return_code_msg, + slurm_free_job_info_msg, + slurm_free_job_info, + slurm_load_job, + slurm_load_jobs, + job_info_msg_t, + slurm_job_info_t, + slurm_job_state_string, + slurm_job_reason_string, + slurm_job_share_string, + slurm_job_batch_script, + slurm_get_job_stdin, + slurm_get_job_stdout, + slurm_get_job_stderr, + slurm_signal_job, + slurm_kill_job, + slurm_resume, + slurm_suspend, + slurm_update_job, + slurm_notify_job, + slurm_requeue, + xfree, + try_xmalloc, +) + + +cdef class Jobs(dict): + + cdef: + job_info_msg_t *info + slurm_job_info_t tmp_info + + +cdef class Job: + + cdef: + slurm_job_info_t *ptr + dict passwd + dict groups + + cdef alloc(self) + cdef time_t _calc_run_time(self) + + @staticmethod + cdef Job from_ptr(slurm_job_info_t *in_ptr) + diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx new file mode 100644 index 00000000..e705c755 --- /dev/null +++ b/pyslurm/core/job/job.pyx @@ -0,0 +1,1517 @@ +######################################################################### +# job.pyx - interface to retrieve slurm job informations +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: embedsignature=True +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from os import WIFSIGNALED, WIFEXITED, WTERMSIG, WEXITSTATUS +import re +from typing import Union +from pyslurm.core.common import cstr, ctime +from pyslurm.core.common.uint import * +from pyslurm.core.job.util import * +from pyslurm.core.error import ( + RPCError, + verify_rpc, + slurm_errno, +) +from pyslurm.core.common.ctime import ( + secs_to_timestr, + mins_to_timestr, + timestamp_to_date, + _raw_time, +) +from pyslurm.core.common import ( + uid_to_name, + gid_to_name, + humanize, + signal_to_num, + _getgrall_to_dict, + _getpwall_to_dict, + nodelist_from_range_str, + nodelist_to_range_str, + instance_to_dict, +) + + +cdef class Jobs(dict): + """A collection of :obj:`Job` objects. + + By creating a new :obj:`Jobs` instance, all Jobs in the system will be + fetched from the slurmctld. + """ + def __dealloc__(self): + slurm_free_job_info_msg(self.info) + + def __init__(self, preload_passwd_info=False): + """Initialize a Jobs collection + + Args: + preload_passwd_info (bool): + Decides whether to query passwd and groups information from + the system. + Could potentially speed up access to attributes of the Job + where a UID/GID is translated to a name. If True, the + information will fetched and stored in each of the Job + instances. The default is False. + + Raises: + RPCError: When getting all the Jobs from the slurmctld failed. + MemoryError: If malloc fails to allocate memory. + """ + cdef: + dict passwd = {} + dict groups = {} + int flags = slurm.SHOW_ALL | slurm.SHOW_DETAIL + Job job + + self.info = NULL + verify_rpc(slurm_load_jobs(0, &self.info, flags)) + + # If requested, preload the passwd and groups database to potentially + # speedup lookups for an attribute in a Job, e.g. user_name or + # group_name. + if preload_passwd_info: + passwd = _getpwall_to_dict() + groups = _getgrall_to_dict() + + # zero-out a dummy job_step_info_t + memset(&self.tmp_info, 0, sizeof(slurm_job_info_t)) + + # Put each job pointer into its own "Job" instance. + for cnt in range(self.info.record_count): + job = Job.from_ptr(&self.info.job_array[cnt]) + + # Prevent double free if xmalloc fails mid-loop and a MemoryError + # is raised by replacing it with a zeroed-out slurm_job_info_t. + self.info.job_array[cnt] = self.tmp_info + + if preload_passwd_info: + job.passwd = passwd + job.groups = groups + + self[job.id] = job + + # At this point we memcpy'd all the memory for the Jobs. Setting this + # to 0 will prevent the slurm job free function to deallocate the + # memory for the individual jobs. This should be fine, because they + # are free'd automatically in __dealloc__ since the lifetime of each + # job-pointer is tied to the lifetime of its corresponding "Job" + # instance. + self.info.record_count = 0 + + def load_steps(self): + """Load all Job steps for this collection of Jobs. + + Note: + Pending Jobs will be ignored, since they don't have any Steps yet. + + Raises: + RPCError: When retrieving the Job information for all the Steps + failed. + + Returns: + dict: JobSteps information for each JobID. + """ + cdef: + Job job + dict step_info = JobSteps.load_all() + dict out + + # Ignore any Steps from Jobs which do not exist in this collection. + out = {jid: step_info[jid] for jid in self if jid in step_info} + return out + + def as_list(self): + """Format the information as list of Job objects. + + Returns: + list: List of Job objects + """ + return list(self.values()) + + +cdef class Job: + """A Slurm Job. + + All attributes in this class are read-only. + + Args: + job_id (int): + An Integer representing a Job-ID. + + Raises: + MemoryError: If malloc fails to allocate memory. + """ + def __init__(self, int job_id): + self.alloc() + self.ptr.job_id = job_id + self.passwd = {} + self.groups = {} + + cdef alloc(self): + self.ptr = try_xmalloc(sizeof(slurm_job_info_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for job_info_t") + + def __dealloc__(self): + slurm_free_job_info(self.ptr) + self.ptr = NULL + + def __eq__(self, other): + return isinstance(other, Job) and self.id == other.id + + def reload(self): + """(Re)load information for a job. + + Implements the slurm_load_job RPC. + + Note: + You can call this function repeatedly to refresh the information + of an instance. Using the Job object returned is optional. + + Returns: + Job: This function returns the current Job-instance object itself. + + Raises: + RPCError: If requesting the Job information from the slurmctld was + not successful. + MemoryError: If malloc failed to allocate memory. + + Examples: + >>> from pyslurm import Job + >>> job = Job(9999) + >>> job.reload() + >>> + >>> # You can also write this in one-line: + >>> job = Job(9999).reload() + """ + cdef: + job_info_msg_t *info = NULL + + try: + verify_rpc(slurm_load_job(&info, self.id, slurm.SHOW_DETAIL)) + + if info and info.record_count: + # Cleanup the old info + slurm_free_job_info(self.ptr) + + # Copy new info + self.alloc() + memcpy(self.ptr, &info.job_array[0], sizeof(slurm_job_info_t)) + info.record_count = 0 + except Exception as e: + raise e + finally: + slurm_free_job_info_msg(info) + + return self + + @staticmethod + cdef Job from_ptr(slurm_job_info_t *in_ptr): + cdef Job wrap = Job.__new__(Job) + wrap.alloc() + wrap.passwd = {} + wrap.groups = {} + memcpy(wrap.ptr, in_ptr, sizeof(slurm_job_info_t)) + + return wrap + + def as_dict(self): + """Job information formatted as a dictionary. + + Returns: + dict: Job information as dict + """ + return instance_to_dict(self) + + def send_signal(self, signal, steps="children", hurry=False): + """Send a signal to a running Job. + + Implements the slurm_signal_job RPC. + + Args: + signal (Union[str, int]): + Any valid signal which will be sent to the Job. Can be either + a str like 'SIGUSR1', or simply an int. + steps (str): + Selects which steps should be signaled. Valid values for this + are: "all", "batch" and "children". The default value is + "children", where all steps except the batch-step will be + signaled. + The value "batch" in contrast means, that only the batch-step + will be signaled. With "all" every step is signaled. + hurry (bool): + If True, no burst buffer data will be staged out. The default + value is False. + + Raises: + RPCError: When sending the signal was not successful. + + Examples: + Specifying the signal as a string: + + >>> from pyslurm import Job + >>> Job(9999).send_signal("SIGUSR1") + + or passing in a numeric signal: + + >>> Job(9999).send_signal(9) + """ + cdef uint16_t flags = 0 + + if steps.casefold() == "all": + flags |= slurm.KILL_FULL_JOB + elif steps.casefold() == "batch": + flags |= slurm.KILL_JOB_BATCH + + if hurry: + flags |= slurm.KILL_HURRY + + sig = signal_to_num(signal) + slurm_kill_job(self.id, sig, flags) + + # Ignore errors when the Job is already done or when SIGKILL was + # specified and the job id is already purged from slurmctlds memory. + errno = slurm_errno() + if (errno == slurm.ESLURM_ALREADY_DONE + or errno == slurm.ESLURM_INVALID_JOB_ID and sig == 9): + pass + else: + verify_rpc(errno) + + def cancel(self): + """Cancel a Job. + + Implements the slurm_kill_job RPC. + + Raises: + RPCError: When cancelling the Job was not successful. + + Examples: + >>> from pyslurm import Job + >>> Job(9999).cancel() + """ + self.send_signal(9) + + def suspend(self): + """Suspend a running Job. + + Implements the slurm_suspend RPC. + + Raises: + RPCError: When suspending the Job was not successful. + + Examples: + >>> from pyslurm import Job + >>> Job(9999).suspend() + """ + # TODO: Report as a misbehaviour to schedmd that slurm_suspend is not + # correctly returning error code when it cannot find the job in + # _slurm_rpc_suspend it should return ESLURM_INVALID_JOB_ID, but + # returns -1 + # https://github.com/SchedMD/slurm/blob/master/src/slurmctld/proc_req.c#L4693 + verify_rpc(slurm_suspend(self.id)) + + def unsuspend(self): + """Unsuspend a currently suspended Job. + + Implements the slurm_resume RPC. + + Raises: + RPCError: When unsuspending the Job was not successful. + + Examples: + >>> from pyslurm import Jobs + >>> Job(9999).unsuspend() + """ + # Same problem as described in suspend() + verify_rpc(slurm_resume(self.id)) + + def modify(self, JobSubmitDescription changes): + """Modify a Job. + + Implements the slurm_update_job RPC. + + Args: + changes (JobSubmitDescription): + A JobSubmitDescription object which contains all the + modifications that should be done on the Job. + + Raises: + RPCError: When updating the Job was not successful. + + Examples: + >>> from pyslurm import Job, JobSubmitDescription + >>> + >>> # Setting the new time-limit to 20 days + >>> changes = JobSubmitDescription(time_limit="20-00:00:00") + >>> Job(9999).modify(changes) + """ + changes._create_job_submit_desc(is_update=True) + changes.ptr.job_id = self.id + verify_rpc(slurm_update_job(changes.ptr)) + + def hold(self, mode=None): + """Hold a currently pending Job, preventing it from being scheduled. + + Args: + mode (str): + Determines in which mode the Job should be held. Possible + values are "user" or "admin". By default, the Job is held in + "admin" mode, meaning only an Administrator will be able to + release the Job again. If you specify the mode as "user", the + User will also be able to release the job. + + Note: + Uses the modify() function to set the Job's priority to 0. + + Raises: + RPCError: When holding the Job was not successful. + + Examples: + >>> from pyslurm import Job + >>> + >>> # Holding a Job (in "admin" mode by default) + >>> Job(9999).hold() + >>> + >>> # Holding a Job in "user" mode + >>> Job(9999).hold(mode="user") + """ + cdef JobSubmitDescription job_sub = JobSubmitDescription(priority=0) + + if mode and mode.casefold() == "user": + job_sub.ptr.alloc_sid = slurm.ALLOC_SID_USER_HOLD + + self.modify(job_sub) + + def release(self): + """Release a currently held Job, allowing it to be scheduled again. + + Note: + Uses the modify() function to reset the priority back to + be controlled by the slurmctld's priority calculation routine. + + Raises: + RPCError: When releasing a held Job was not successful. + + Examples: + >>> from pyslurm import Job + >>> Job(9999).release() + """ + self.modify(JobSubmitDescription(priority=slurm.INFINITE)) + + def requeue(self, hold=False): + """Requeue a currently running Job. + + Implements the slurm_requeue RPC. + + Args: + hold (bool): + Controls whether the Job should be put in a held state or not. + Default for this is 'False', so it will not be held. + + Raises: + RPCError: When requeing the Job was not successful. + + Examples: + >>> from pyslurm import Job + >>> + >>> # Requeing a Job while allowing it to be + >>> # scheduled again immediately + >>> Job(9999).requeue() + >>> + >>> # Requeing a Job while putting it in a held state + >>> Job(9999).requeue(hold=True) + """ + cdef uint32_t flags = 0 + + if hold: + flags |= slurm.JOB_REQUEUE_HOLD + + verify_rpc(slurm_requeue(self.id, flags)) + + def notify(self, msg): + """Sends a message to the Jobs stdout. + + Implements the slurm_notify_job RPC. + + Args: + msg (str): + The message that should be sent. + + Raises: + RPCError: When sending the message to the Job was not successful. + + Examples: + >>> from pyslurm import Job + >>> Job(9999).notify("Hello Friends!") + """ + verify_rpc(slurm_notify_job(self.id, msg)) + + def get_batch_script(self): + """Return the content of the script for a Batch-Job. + + Note: + The string returned also includes all the "\n" characters + (new-line). + + Returns: + str: The content of the batch script. + + Raises: + RPCError: When retrieving the Batch-Script for the Job was not + successful. + + Examples: + >>> from pyslurm import Job + >>> script = Job(9999).get_batch_script() + """ + # This reimplements the slurm_job_batch_script API call. Otherwise we + # would have to parse back the FILE* ptr we get from it back into a + # char* which would be a bit silly. + # Source: https://github.com/SchedMD/slurm/blob/7162f15af8deaf02c3bbf940d59e818cdeb5c69d/src/api/job_info.c#L1319 + cdef: + job_id_msg_t msg + slurm_msg_t req + slurm_msg_t resp + int rc = slurm.SLURM_SUCCESS + str script = None + + slurm_msg_t_init(&req) + slurm_msg_t_init(&resp) + + memset(&msg, 0, sizeof(msg)) + msg.job_id = self.id + req.msg_type = slurm.REQUEST_BATCH_SCRIPT + req.data = &msg + + rc = slurm_send_recv_controller_msg(&req, &resp, working_cluster_rec) + verify_rpc(rc) + + if resp.msg_type == slurm.RESPONSE_BATCH_SCRIPT: + script = cstr.to_unicode(resp.data) + xfree(resp.data) + elif resp.msg_type == slurm.RESPONSE_SLURM_RC: + rc = ( resp.data).return_code + slurm_free_return_code_msg(resp.data) + verify_rpc(rc) + else: + verify_rpc(slurm.SLURM_ERROR) + + return script + + @property + def name(self): + """str: Name of the Job""" + return cstr.to_unicode(self.ptr.name) + + @property + def id(self): + """int: Unique Job-ID""" + return self.ptr.job_id + + @property + def association_id(self): + """int: ID of the Association this Job is run under.""" + return u32_parse(self.ptr.assoc_id) + + @property + def account(self): + """str: Name of the Account this Job is run under.""" + return cstr.to_unicode(self.ptr.account) + + @property + def uid(self): + """int: UID of the User who submitted the Job.""" + return u32_parse(self.ptr.user_id, zero_is_noval=False) + + @property + def user(self): + """str: Name of the User who submitted the Job.""" + return uid_to_name(self.ptr.user_id, lookup=self.passwd) + + @property + def gid(self): + """int: GID of the Group that Job runs under.""" + return u32_parse(self.ptr.group_id, zero_is_noval=False) + + @property + def group(self): + """str: Name of the Group this Job runs under.""" + return gid_to_name(self.ptr.group_id, lookup=self.groups) + + @property + def priority(self): + """int: Priority of the Job.""" + return u32_parse(self.ptr.priority, zero_is_noval=False) + + @property + def nice(self): + """int: Nice Value of the Job.""" + if self.ptr.nice == slurm.NO_VAL: + return None + + return self.ptr.nice - slurm.NICE_OFFSET + + @property + def qos(self): + """str: QOS Name of the Job.""" + return cstr.to_unicode(self.ptr.qos) + + @property + def min_cpus_per_node(self): + """int: Minimum Amount of CPUs per Node the Job requested.""" + return u32_parse(self.ptr.pn_min_cpus) + + # I don't think this is used anymore - there is no way in sbatch to ask + # for a "maximum cpu" count, so it will always be empty. + # @property + # def max_cpus(self): + # """Maximum Amount of CPUs the Job requested.""" + # return u32_parse(self.ptr.max_cpus) + + @property + def state(self): + """str: State this Job is currently in.""" + return cstr.to_unicode(slurm_job_state_string(self.ptr.job_state)) + + @property + def state_reason(self): + """str: A Reason explaining why the Job is in its current state.""" + if self.ptr.state_desc: + return cstr.to_unicode(self.ptr.state_desc) + + return cstr.to_unicode(slurm_job_reason_string(self.ptr.state_reason)) + + @property + def is_requeueable(self): + """bool: Whether the Job is requeuable or not.""" + return u16_parse_bool(self.ptr.requeue) + + @property + def requeue_count(self): + """int: Amount of times the Job has been requeued.""" + return u16_parse(self.ptr.restart_cnt, on_noval=0) + + @property + def is_batch_job(self): + """bool: Whether the Job is a batch job or not.""" + return u16_parse_bool(self.ptr.batch_flag) + + @property + def reboot_nodes(self): + """bool: Whether the Job requires the Nodes to be rebooted first.""" + return u8_parse_bool(self.ptr.reboot) + + @property + def dependencies(self): + """dict: Dependencies the Job has to other Jobs.""" + out = { + "after": [], + "afterany": [], + "afterburstbuffer": [], + "aftercorr": [], + "afternotok": [], + "afterok": [], + "singleton": False, + "satisfy": "all", + } + dep = cstr.to_unicode(self.ptr.dependency, default=[]) + + if not dep: + return out + + delim = "," + if "?" in dep: + delim = "?" + out["satisfy"] = "any" + + for item in dep.split(delim): + if item == "singleton": + out["singleton"] = True + + dep_and_job = item.split(":", 1) + if len(dep_and_job) != 2: + continue + + dep_name, jobs = dep_and_job[0], dep_and_job[1].split(":") + if dep_name not in out: + continue + + for job in jobs: + out[dep_name].append(int(job) if job.isdigit() else job) + + return out + + @property + def time_limit_raw(self): + """int: Time-Limit for this Job. (Unix timestamp)""" + return _raw_time(self.ptr.time_limit) + + @property + def time_limit(self): + """str: Time-Limit for this Job. (formatted)""" + return mins_to_timestr(self.ptr.time_limit, "PartitionLimit") + + @property + def time_limit_min_raw(self): + """int: Minimum Time-Limit for this Job (Unix timestamp)""" + return _raw_time(self.ptr.time_min) + + @property + def time_limit_min(self): + """str: Minimum Time-limit acceptable for this Job (formatted)""" + return mins_to_timestr(self.ptr.time_min) + + @property + def submit_time_raw(self): + """int: Time the Job was submitted. (Unix timestamp)""" + return _raw_time(self.ptr.submit_time) + + @property + def submit_time(self): + """str: Time the Job was submitted. (formatted)""" + return timestamp_to_date(self.ptr.submit_time) + + @property + def eligible_time_raw(self): + """int: Time the Job is eligible to start. (Unix timestamp)""" + return _raw_time(self.ptr.eligible_time) + + @property + def eligible_time(self): + """str: Time the Job is eligible to start. (formatted)""" + return timestamp_to_date(self.ptr.eligible_time) + + @property + def accrue_time_raw(self): + """int: Job accrue time (Unix timestamp)""" + return _raw_time(self.ptr.accrue_time) + + @property + def accrue_time(self): + """str: Job accrue time (formatted)""" + return timestamp_to_date(self.ptr.accrue_time) + + @property + def start_time_raw(self): + """int: Time this Job has started execution. (Unix timestamp)""" + return _raw_time(self.ptr.start_time) + + @property + def start_time(self): + """str: Time this Job has started execution. (formatted)""" + return timestamp_to_date(self.ptr.start_time) + + @property + def resize_time_raw(self): + """int: Time the job was resized. (Unix timestamp)""" + return _raw_time(self.ptr.resize_time) + + @property + def resize_time(self): + """str: Time the job was resized. (formatted)""" + return timestamp_to_date(self.ptr.resize_time) + + @property + def deadline_time_raw(self): + """int: Time when a pending Job will be cancelled. (Unix timestamp)""" + return _raw_time(self.ptr.deadline) + + @property + def deadline_time(self): + """str: Time at which a pending Job will be cancelled. (formatted)""" + return timestamp_to_date(self.ptr.deadline) + + @property + def preempt_eligible_time_raw(self): + """int: Time the Job is eligible for preemption. (Unix timestamp)""" + return _raw_time(self.ptr.preemptable_time) + + @property + def preempt_eligible_time(self): + """str: Time when the Job is eligible for preemption. (formatted)""" + return timestamp_to_date(self.ptr.preemptable_time) + + @property + def preempt_time_raw(self): + """int: Time the Job was signaled for preemption. (Unix timestamp)""" + return _raw_time(self.ptr.preempt_time) + + @property + def preempt_time(self): + """str: Time the Job was signaled for preemption. (formatted)""" + return timestamp_to_date(self.ptr.preempt_time) + + @property + def suspend_time_raw(self): + """int: Last Time the Job was suspended. (Unix timestamp)""" + return _raw_time(self.ptr.suspend_time) + + @property + def suspend_time(self): + """str: Last Time the Job was suspended. (formatted)""" + return timestamp_to_date(self.ptr.suspend_time) + + @property + def last_sched_eval_time_raw(self): + """int: Last time evaluated for Scheduling. (Unix timestamp)""" + return _raw_time(self.ptr.last_sched_eval) + + @property + def last_sched_eval_time(self): + """str: Last Time evaluated for Scheduling. (formatted)""" + return timestamp_to_date(self.ptr.last_sched_eval) + + @property + def pre_suspension_time_raw(self): + """int: Amount of seconds the Job ran prior to suspension.""" + return _raw_time(self.ptr.pre_sus_time) + + @property + def pre_suspension_time(self): + """str: Time the Job ran prior to suspension. (formatted)""" + return secs_to_timestr(self.ptr.pre_sus_time) + + @property + def mcs_label(self): + """str: MCS Label for the Job""" + return cstr.to_unicode(self.ptr.mcs_label) + + @property + def partition(self): + """str: Name of the Partition the Job runs in.""" + return cstr.to_unicode(self.ptr.partition) + + @property + def submit_host(self): + """str: Name of the Host this Job was submitted from.""" + return cstr.to_unicode(self.ptr.alloc_node) + + @property + def batch_host(self): + """str: Name of the Host where the Batch-Script is executed.""" + return cstr.to_unicode(self.ptr.batch_host) + + @property + def min_nodes(self): + """int: Minimum amount of Nodes the Job has requested.""" + return u32_parse(self.ptr.num_nodes) + + @property + def max_nodes(self): + """int: Maximum amount of Nodes the Job has requested.""" + return u32_parse(self.ptr.max_nodes) + + @property + def alloc_nodes(self): + """str: Nodes the Job is using. + + This is the formatted string of Nodes as shown by scontrol. + For example, it can look like this: + + "node001,node[005-010]" + + If you want to expand this string into a list of nodenames you can + use the "pyslurm.nodelist_from_range_str" function. + + Note: + This is only valid when the Job is running. If the Job is pending, + it will always return an empty list. + """ + return cstr.to_unicode(self.ptr.nodes) + + @property + def required_nodes(self): + """str: Nodes the Job is explicitly requiring to run on. + + This is the formatted string of Nodes as shown by scontrol. + For example, it can look like this: + + "node001,node[005-010]" + + If you want to expand this string into a list of nodenames you can + use the "pyslurm.nodelist_from_range_str" function. + """ + return cstr.to_unicode(self.ptr.req_nodes) + + @property + def excluded_nodes(self): + """str: Nodes that are explicitly excluded for execution. + + This is the formatted string of Nodes as shown by scontrol. + For example, it can look like this: + + "node001,node[005-010]" + + If you want to expand this string into a list of nodenames you can + use the "pyslurm.nodelist_from_range_str" function. + """ + return cstr.to_unicode(self.ptr.exc_nodes) + + @property + def scheduled_nodes(self): + """str: Nodes the Job is scheduled on by the slurm controller. + + This is the formatted string of Nodes as shown by scontrol. + For example, it can look like this: + + "node001,node[005-010]" + + If you want to expand this string into a list of nodenames you can + use the "pyslurm.nodelist_from_range_str" function. + """ + return cstr.to_unicode(self.ptr.sched_nodes) + + @property + def derived_exit_code(self): + """int: The derived exit code for the Job.""" + if (self.ptr.derived_ec == slurm.NO_VAL + or not WIFEXITED(self.ptr.derived_ec)): + return None + + return WEXITSTATUS(self.ptr.derived_ec) + + @property + def derived_exit_code_signal(self): + """int: Signal for the derived exit code.""" + if (self.ptr.derived_ec == slurm.NO_VAL + or not WIFSIGNALED(self.ptr.derived_ec)): + return None + + return WTERMSIG(self.ptr.derived_ec) + + @property + def exit_code(self): + """int: Code with which the Job has exited.""" + if (self.ptr.exit_code == slurm.NO_VAL + or not WIFEXITED(self.ptr.exit_code)): + return None + + return WEXITSTATUS(self.ptr.exit_code) + + @property + def exit_code_signal(self): + """int: The signal which has led to the exit code of the Job.""" + if (self.ptr.exit_code == slurm.NO_VAL + or not WIFSIGNALED(self.ptr.exit_code)): + return None + + return WTERMSIG(self.ptr.exit_code) + + @property + def batch_constraints(self): + """list: Features that node(s) should have for the batch script. + + Controls where it is possible to execute the batch-script of the job. + Also see 'constraints' + """ + return cstr.to_list(self.ptr.batch_features) + + @property + def federation_origin(self): + """str: Federation Origin""" + return cstr.to_unicode(self.ptr.fed_origin_str) + + @property + def federation_siblings_active(self): + """str: Federation siblings active""" + return u64_parse(self.ptr.fed_siblings_active) + + @property + def federation_siblings_viable(self): + """str: Federation siblings viable""" + return u64_parse(self.ptr.fed_siblings_viable) + + @property + def alloc_cpus(self): + """int: Total amount of CPUs the Job is using. + + If the Job is still pending, this will be None. + """ + return u32_parse(self.ptr.num_cpus) + + @property + def cpus_per_task(self): + """int: Number of CPUs per Task used.""" + if self.ptr.cpus_per_tres: + return None + + return u16_parse(self.ptr.cpus_per_task, on_noval=1) + + @property + def cpus_per_gpu(self): + """int: Number of CPUs per GPU used.""" + if (not self.ptr.cpus_per_tres + or self.ptr.cpus_per_task != slurm.NO_VAL16): + return None + + # TODO: Make a function that, given a GRES type, safely extracts its + # value from the string. + val = cstr.to_unicode(self.ptr.cpus_per_tres).split(":")[2] + return u16_parse(val) + + @property + def boards_per_node(self): + """int: Number of boards per Node.""" + return u16_parse(self.ptr.boards_per_node) + + @property + def sockets_per_board(self): + """int: Number of sockets per board.""" + return u16_parse(self.ptr.sockets_per_board) + + @property + def sockets_per_node(self): + """int: Number of sockets per node.""" + return u16_parse(self.ptr.sockets_per_node) + + @property + def cores_per_socket(self): + """int: Number of cores per socket.""" + return u16_parse(self.ptr.cores_per_socket) + + @property + def threads_per_core(self): + """int: Number of threads per core.""" + return u16_parse(self.ptr.threads_per_core) + + @property + def ntasks(self): + """int: Number of parallel processes.""" + return u32_parse(self.ptr.num_tasks, on_noval=1) + + @property + def ntasks_per_node(self): + """int: Number of parallel processes per node.""" + return u16_parse(self.ptr.ntasks_per_node) + + @property + def ntasks_per_board(self): + """int: Number of parallel processes per board.""" + return u16_parse(self.ptr.ntasks_per_board) + + @property + def ntasks_per_socket(self): + """int: Number of parallel processes per socket.""" + return u16_parse(self.ptr.ntasks_per_socket) + + @property + def ntasks_per_core(self): + """int: Number of parallel processes per core.""" + return u16_parse(self.ptr.ntasks_per_core) + + @property + def ntasks_per_gpu(self): + """int: Number of parallel processes per GPU.""" + return u16_parse(self.ptr.ntasks_per_tres) + + @property + def delay_boot_time_raw(self): + """int: https://slurm.schedmd.com/sbatch.html#OPT_delay-boot""" + return _raw_time(self.ptr.delay_boot) + + @property + def delay_boot_time(self): + """str: https://slurm.schedmd.com/sbatch.html#OPT_delay-boot""" + return secs_to_timestr(self.ptr.delay_boot) + + @property + def constraints(self): + """list: A list of features the Job requires nodes to have. + + In contrast, the 'batch_constraints' option only focuses on the + initial batch-script placement. + + This option however means features to restrict the list of nodes a + job is able to execute on in general beyond the initial batch-script. + """ + return cstr.to_list(self.ptr.features) + + @property + def cluster(self): + """str: Name of the cluster the job is executing on.""" + return cstr.to_unicode(self.ptr.cluster) + + @property + def cluster_constraints(self): + """list: A List of features that a cluster should have.""" + return cstr.to_list(self.ptr.cluster_features) + + @property + def reservation(self): + """str: Name of the reservation this Job uses.""" + return cstr.to_unicode(self.ptr.resv_name) + + @property + def resource_sharing(self): + """str: Mode controlling how a job shares resources with others.""" + return cstr.to_unicode(slurm_job_share_string(self.ptr.shared)) + + @property + def contiguous(self): + """bool: Whether the Job requires a set of contiguous nodes.""" + return u16_parse_bool(self.ptr.contiguous) + + @property + def licenses(self): + """list: List of licenses the Job needs.""" + return cstr.to_list(self.ptr.licenses) + + @property + def network(self): + """str: Network specification for the Job.""" + return cstr.to_unicode(self.ptr.network) + + @property + def command(self): + """str: The command that is executed for the Job.""" + return cstr.to_unicode(self.ptr.command) + + @property + def work_dir(self): + """str: Path to the working directory for this Job.""" + return cstr.to_unicode(self.ptr.work_dir) + + @property + def admin_comment(self): + """str: An arbitrary comment set by an administrator for the Job.""" + return cstr.to_unicode(self.ptr.admin_comment) + + @property + def system_comment(self): + """str: An arbitrary comment set by the slurmctld for the Job.""" + return cstr.to_unicode(self.ptr.system_comment) + + @property + def container(self): + """str: The container this Job uses.""" + return cstr.to_unicode(self.ptr.container) + + @property + def comment(self): + """str: An arbitrary comment set for the Job.""" + return cstr.to_unicode(self.ptr.comment) + + @property + def stdin(self): + """str: The path to the file for stdin.""" + cdef char tmp[1024] + slurm_get_job_stdin(tmp, sizeof(tmp), self.ptr) + return cstr.to_unicode(tmp) + + @property + def stdout(self): + """str: The path to the log file for stdout.""" + cdef char tmp[1024] + slurm_get_job_stdout(tmp, sizeof(tmp), self.ptr) + return cstr.to_unicode(tmp) + + @property + def stderr(self): + """The path to the log file for stderr.""" + cdef char tmp[1024] + slurm_get_job_stderr(tmp, sizeof(tmp), self.ptr) + return cstr.to_unicode(tmp) + + @property + def num_switches(self): + """int: Number of switches requested.""" + return u32_parse(self.ptr.req_switch) + + @property + def max_wait_time_switches_raw(self): + """int: Amount of seconds to wait for the switches.""" + return _raw_time(self.ptr.wait4switch) + + @property + def max_wait_time_switches(self): + """str: Amount of seconds to wait for the switches. (formatted)""" + return secs_to_timestr(self.ptr.wait4switch) + + @property + def burst_buffer(self): + """str: Burst buffer specification""" + return cstr.to_unicode(self.ptr.burst_buffer) + + @property + def burst_buffer_state(self): + """str: Burst buffer state""" + return cstr.to_unicode(self.ptr.burst_buffer_state) + + @property + def cpu_freq_min(self): + """Union[str, int]: Minimum CPU-Frequency requested.""" + return cpufreq_to_str(self.ptr.cpu_freq_min) + + @property + def cpu_freq_max(self): + """Union[str, int]: Maximum CPU-Frequency requested.""" + return cpufreq_to_str(self.ptr.cpu_freq_max) + + @property + def cpu_freq_governor(self): + """Union[str, int]: CPU-Frequency Governor requested.""" + return cpufreq_to_str(self.ptr.cpu_freq_gov) + + # @property + # def tres_bindings(self): + # """str: ?""" + # # TODO: Find out how it works + # return cstr.to_unicode(self.ptr.tres_bind) + + # @property + # def tres_frequency(self): + # """?""" + # # TODO: Find out how it works + # return cstr.to_unicode(self.ptr.tres_freq) + + @property + def wckey(self): + """str: Name of the WCKey this Job uses.""" + return cstr.to_unicode(self.ptr.wckey) + + @property + def mail_user(self): + """list: Users that should receive Mails for this Job.""" + return cstr.to_list(self.ptr.mail_user) + + @property + def mail_types(self): + """list: Mail Flags specified by the User.""" + return get_mail_type(self.ptr.mail_type) + + @property + def hetjob_id(self): + """int: Heterogeneous ID""" + return u32_parse(self.ptr.het_job_id, noval=0) + + @property + def hetjob_offset(self): + """int: Heterogeneous Job offset""" + return u32_parse(self.ptr.het_job_offset, noval=0) + + # @property + # def hetjob_component_ids(self): + # """str: ?""" + # # TODO: Find out how to parse it in a more proper way? + # return cstr.to_unicode(self.ptr.het_job_id_set) + + @property + def tmp_disk_per_node_raw(self): + """int: Temporary disk space available per Node. (in Mebibytes)""" + return u32_parse(self.ptr.pn_min_tmp_disk) + + @property + def tmp_disk_per_node(self): + """str: Amount of temporary disk space available per Node. + + The output for this value is already in a human readable format, + with appropriate unit suffixes like K|M|G|T. + """ + return humanize(self.tmp_disk_per_node_raw) + + @property + def array_job_id(self): + """int: The master Array-Job ID.""" + return u32_parse(self.ptr.array_job_id) + + @property + def array_tasks_parallel(self): + """int: Number of array tasks allowed to run in simultaneously.""" + return u32_parse(self.ptr.array_max_tasks) + + @property + def array_task_id(self): + """int: The Task-ID if the Job is an Array-Job.""" + return u32_parse(self.ptr.array_task_id) + + @property + def array_tasks_waiting(self): + """str: Array Tasks that are still waiting. + + This is the formatted string of Task-IDs as shown by scontrol. + For example, it can look like this: + + "1-3,5-7,8,9" + + If you want to expand this string including the ranges into a + list, you can use the "pyslurm.expand_range_str" function. + """ + task_str = cstr.to_unicode(self.ptr.array_task_str) + if not task_str: + return None + + if "%" in task_str: + # We don't want this % character and everything after it + # in here, so remove it. + task_str = task_str[:task_str.rindex("%")] + + return task_str + + @property + def end_time(self): + """int: Time at which this Job has ended. (Unix timestamp)""" + return _raw_time(self.ptr.end_time) + + @property + def end_time(self): + """str: Time at which this Job has ended. (formatted)""" + return timestamp_to_date(self.ptr.end_time) + + # https://github.com/SchedMD/slurm/blob/d525b6872a106d32916b33a8738f12510ec7cf04/src/api/job_info.c#L480 + cdef _calc_run_time(self): + cdef time_t rtime + cdef time_t etime + + if slurm.IS_JOB_PENDING(self.ptr): + return None + elif slurm.IS_JOB_SUSPENDED(self.ptr): + return self.pre_suspension_time + else: + if slurm.IS_JOB_RUNNING(self.ptr) or self.ptr.end_time == 0: + etime = ctime.time(NULL) + else: + etime = self.ptr.end_time + + if self.ptr.suspend_time: + rtime = ctime.difftime( + etime, + self.ptr.suspend_time + self.ptr.pre_sus_time) + else: + rtime = ctime.difftime(etime, self.ptr.start_time) + + return u64_parse(rtime) + + @property + def run_time_raw(self): + """int: Amount of seconds the Job has been running. (Unix timestamp)""" + return _raw_time(self._calc_run_time()) + + @property + def run_time(self): + """str: Amount of seconds the Job has been running. (formatted)""" + return secs_to_timestr(self._calc_run_time()) + + @property + def cores_reserved_for_system(self): + """int: Amount of cores reserved for System use only.""" + if self.ptr.core_spec != slurm.NO_VAL16: + if not self.ptr.core_spec & slurm.CORE_SPEC_THREAD: + return self.ptr.core_spec + + @property + def threads_reserved_for_system(self): + """int: Amount of Threads reserved for System use only.""" + if self.ptr.core_spec != slurm.NO_VAL16: + if self.ptr.core_spec & slurm.CORE_SPEC_THREAD: + return self.ptr.core_spec & (~slurm.CORE_SPEC_THREAD) + + @property + def mem_per_cpu_raw(self): + """int: Amount of Memory per CPU this Job has. (in Mebibytes)""" + if self.ptr.pn_min_memory != slurm.NO_VAL64: + if self.ptr.pn_min_memory & slurm.MEM_PER_CPU: + mem = self.ptr.pn_min_memory & (~slurm.MEM_PER_CPU) + return u64_parse(mem) + else: + return None + + @property + def mem_per_cpu(self): + """str: Humanized amount of Memory per CPU this Job has.""" + return humanize(self.mem_per_cpu_raw) + + @property + def mem_per_node_raw(self): + """int: Amount of Memory per Node this Job has. (in Mebibytes)""" + if self.ptr.pn_min_memory != slurm.NO_VAL64: + if not self.ptr.pn_min_memory & slurm.MEM_PER_CPU: + return u64_parse(self.ptr.pn_min_memory) + else: + return None + + @property + def mem_per_node(self): + """str: Humanized amount of Memory per Node this Job has.""" + return humanize(self.mem_per_node_raw) + + @property + def mem_per_gpu_raw(self): + """int: Amount of Memory per GPU this Job has. (in Mebibytes)""" + if self.ptr.mem_per_tres and self.ptr.pn_min_memory == slurm.NO_VAL64: + # TODO: Make a function that, given a GRES type, safely extracts + # its value from the string. + mem = int(cstr.to_unicode(self.ptr.mem_per_tres).split(":")[2]) + return u64_parse(mem) + else: + return None + + @property + def mem_per_gpu(self): + """str: Humanized amount of Memory per GPU this Job has.""" + return humanize(self.mem_per_gpu_raw) + + @property + def gres_per_node(self): + """dict: GRES (e.g. GPU) this Job is using per Node.""" + return cstr.to_gres_dict(self.ptr.tres_per_node) + + @property + def acct_gather_profile(self): + """list: Options that control gathering of Accounting information.""" + return get_acctg_profile(self.ptr.profile) + + @property + def gres_binding(self): + """str: Binding Enforcement of a GRES resource (e.g. GPU).""" + if self.ptr.bitflags & slurm.GRES_ENFORCE_BIND: + return "enforce" + elif self.ptr.bitflags & slurm.GRES_DISABLE_BIND: + return "disable" + else: + return None + + @property + def kill_on_invalid_dep(self): + """bool: Whether the Job should be killed on an invalid dependency.""" + return u64_parse_bool_flag(self.ptr.bitflags, slurm.KILL_INV_DEP) + + @property + def spread_job(self): + """bool: Whether the Job should be spread accross the nodes.""" + return u64_parse_bool_flag(self.ptr.bitflags, slurm.SPREAD_JOB) + + @property + def power(self): + """list: Options for Power Management.""" + return get_power_type(self.ptr.power_flags) + + @property + def is_cronjob(self): + """bool: Whether this Job is a cronjob.""" + return u64_parse_bool_flag(self.ptr.bitflags, slurm.CRON_JOB) + + @property + def cronjob_time(self): + """str: The time specification for the Cronjob.""" + return cstr.to_unicode(self.ptr.cronspec) + + def get_resource_layout_per_node(self): + """Retrieve the resource layout of this Job on each node. + + This contains the following information: + * cpus (int) + * gres (dict) + * memory (str) - Humanized Memory str + * memory_raw (int) - Value in Mebibytes + + Returns: + dict: Resource layout + """ + # TODO: Explain the structure of the return value a bit more. + cdef: + slurm.job_resources *resources = self.ptr.job_resrcs + slurm.hostlist_t hl + uint32_t rel_node_inx + int bit_inx = 0 + int bit_reps = 0 + int sock_inx = 0 + uint32_t sock_reps = 0 + int i = 0, j + uint32_t k = 0 + char *host + char *gres = NULL + slurm.bitstr_t *cpu_bitmap + char cpu_bitmap_str[128] + uint32_t threads + dict output = {} + + if not resources or not resources.core_bitmap: + return output + + hl = slurm.slurm_hostlist_create(resources.nodes) + if not hl: + raise ValueError("Unable to create hostlist.") + + for rel_node_inx in range(resources.nhosts): + # Check how many consecutive nodes have the same cpu allocation + # layout. + if sock_reps >= resources.sock_core_rep_count[sock_inx]: + sock_inx += 1 + sock_reps = 0 + sock_reps += 1 + + # Get the next node from the list of nodenames + host = slurm.slurm_hostlist_shift(hl) + + # How many rounds we have to do in order to calculate the complete + # cpu bitmap. + bit_reps = (resources.sockets_per_node[sock_inx] + * resources.cores_per_socket[sock_inx]) + + # Calculate the amount of threads per core this job has on the + # specific host. + threads = _threads_per_core(host) + + # Allocate a new, big enough cpu bitmap + cpu_bitmap = slurm.slurm_bit_alloc(bit_reps * threads) + + # Calculate the cpu bitmap for this host. + for j in range(bit_reps): + if slurm.slurm_bit_test(resources.core_bitmap, bit_inx): + for k in range(threads): + slurm.slurm_bit_set(cpu_bitmap, (j*threads)+k) + bit_inx += 1 + + # Extract the cpu bitmap into a char *cpu_bitmap_str + slurm.slurm_bit_fmt(cpu_bitmap_str, + sizeof(cpu_bitmap_str), cpu_bitmap) + slurm.slurm_bit_free(&cpu_bitmap) + + nodename = cstr.to_unicode(host) + cpu_ids = cstr.to_unicode(cpu_bitmap_str) + mem = None + + if rel_node_inx < self.ptr.gres_detail_cnt: + gres = self.ptr.gres_detail_str[rel_node_inx] + + if resources.memory_allocated: + mem = u64_parse(resources.memory_allocated[rel_node_inx]) + + if nodename: + output[nodename] = { + "cpus": cpu_ids, + "gres": cstr.to_gres_dict(gres), + "memory": humanize(mem), + "memory_raw": mem, + } + + free(host) + + slurm.slurm_hostlist_destroy(hl) + return output + + +# https://github.com/SchedMD/slurm/blob/d525b6872a106d32916b33a8738f12510ec7cf04/src/api/job_info.c#L99 +cdef _threads_per_core(char *host): + # TODO + return 1 diff --git a/pyslurm/core/job/sbatch_opts.pyx b/pyslurm/core/job/sbatch_opts.pyx new file mode 100644 index 00000000..35d77eb1 --- /dev/null +++ b/pyslurm/core/job/sbatch_opts.pyx @@ -0,0 +1,203 @@ +######################################################################### +# sbatch_opt.pyx - utilities to parse #SBATCH options +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: embedsignature=True +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +import re +from pathlib import Path + +SBATCH_MAGIC = "#SBATCH" + + +class _SbatchOpt(): + def __init__(self, short_opt, long_opt, + our_attr_name, attr_param=None, is_boolean=False, + has_optional_args=False): + self.short_opt = short_opt + self.long_opt = long_opt + self.our_attr_name = our_attr_name + self.attr_param = attr_param + self.is_boolean = is_boolean + self.has_optional_args = has_optional_args + + +# Sorted by occurence in the sbatch manpage - keep in order. +SBATCH_OPTIONS = [ + _SbatchOpt("A", "account", "account"), + _SbatchOpt(None, "acctg-freq", "accounting_gather_freq"), + _SbatchOpt("a", "array", "array"), + _SbatchOpt(None, "batch", "batch_constraints"), + _SbatchOpt(None, "bb", "burst_buffer"), + _SbatchOpt(None, "bbf", "burst_buffer_file"), + _SbatchOpt("b", "begin", "begin_time"), + _SbatchOpt("D", "chdir", "work_dir"), + _SbatchOpt(None, "cluster-constraint", "cluster_constraints"), + _SbatchOpt("M", "clusters", "clusters"), + _SbatchOpt(None, "comment","comment"), + _SbatchOpt("C", "constraint", "constraints"), + _SbatchOpt(None, "container", "container"), + _SbatchOpt(None, "contiguous", "contiguous"), + _SbatchOpt("S", "core-spec", "cores_reserved_for_system"), + _SbatchOpt(None, "cores-per-socket", "cores_per_socket"), + _SbatchOpt(None, "cpu-freq", "cpu_freq"), + _SbatchOpt(None, "cpus-per-gpu", "cpus_per_gpu"), + _SbatchOpt("c", "cpus-per-task", "cpus_per_task"), + _SbatchOpt(None, "deadline", "deadline"), + _SbatchOpt(None, "delay-boot", "delay_boot"), + _SbatchOpt("d", "dependency", "dependencies"), + _SbatchOpt("m", "distribution", "distribution"), + _SbatchOpt("e", "error", "stderr"), + _SbatchOpt("x", "exclude", "excluded_nodes"), + _SbatchOpt(None, "exclusive", "resource_sharing", "no"), + _SbatchOpt(None, "export", "environment"), + _SbatchOpt(None, "export-file", None), + _SbatchOpt("B", "extra-node-info", None), + _SbatchOpt(None, "get-user-env", "get_user_environment"), + _SbatchOpt(None, "gid", "gid"), + _SbatchOpt(None, "gpu-bind", "gpu_binding"), + _SbatchOpt(None, "gpu-freq", None), + _SbatchOpt("G", "gpus", "gpus"), + _SbatchOpt(None, "gpus-per-node", "gpus_per_node"), + _SbatchOpt(None, "gpus-per-socket", "gpus_per_socket"), + _SbatchOpt(None, "gpus-per-socket", "gpus_per_task"), + _SbatchOpt(None, "gres", "gres_per_node"), + _SbatchOpt(None, "gres-flags", "gres_enforce_binding"), + _SbatchOpt(None, "hint", None), + _SbatchOpt("H", "hold", "priority", 0), + _SbatchOpt(None, "ignore-pbs", None), + _SbatchOpt("i", "input", "stdin"), + _SbatchOpt("J", "job-name", "name"), + _SbatchOpt(None, "kill-on-invalid-dep", "kill_on_invalid_dependency"), + _SbatchOpt("L", "licenses", "licenses"), + _SbatchOpt(None, "mail-type", "mail_type"), + _SbatchOpt(None, "mail-user", "mail_user"), + _SbatchOpt(None, "mcs-label", "mcs_label"), + _SbatchOpt(None, "mem", "mem_per_node"), + _SbatchOpt(None, "mem-bind", None), + _SbatchOpt(None, "mem-per-cpu", "mem_per_cpu"), + _SbatchOpt(None, "mem-per-gpu", "mem_per_gpu"), + _SbatchOpt(None, "mincpus", "min_cpus_per_node"), + _SbatchOpt(None, "network", "network"), + _SbatchOpt(None, "nice", "nice"), + _SbatchOpt("k", "no-kill", "kill_on_node_fail", False), + _SbatchOpt(None, "no-requeue", "is_requeueable", False), + _SbatchOpt("F", "nodefile", None), + _SbatchOpt("w", "nodelist", "required_nodes"), + _SbatchOpt("N", "nodes", "nodes"), + _SbatchOpt("n", "ntasks", "ntasks"), + _SbatchOpt(None, "ntasks-per-core", "ntasks_per_core"), + _SbatchOpt(None, "ntasks-per-gpu", "ntasks_per_gpu"), + _SbatchOpt(None, "ntasks-per-node", "ntasks_per_node"), + _SbatchOpt(None, "ntasks-per-socket", "ntasks_per_socket"), + _SbatchOpt(None, "open-mode", "log_files_open_mode"), + _SbatchOpt("o", "output", "stdout"), + _SbatchOpt("O", "overcommit", "overcommit", True), + _SbatchOpt("s", "oversubscribe", "resource_sharing", "yes"), + _SbatchOpt("p", "partition", "partition"), + _SbatchOpt(None, "power", "power_type"), + _SbatchOpt(None, "prefer", None), + _SbatchOpt(None, "priority", "priority"), + _SbatchOpt(None, "profile", "profile"), + _SbatchOpt(None, "propagate", None), + _SbatchOpt("q", "qos", "qos"), + _SbatchOpt(None, "reboot", "reboot_nodes", True), + _SbatchOpt(None, "requeue", "is_requeueable", True), + _SbatchOpt(None, "reservation", "reservations"), + _SbatchOpt(None, "signal", "signal"), + _SbatchOpt(None, "sockets-per-node", "sockets_per_node"), + _SbatchOpt(None, "spread-job", "spread_job", True), + _SbatchOpt(None, "switches", "switches"), + _SbatchOpt(None, "thread-spec", "threads_reserved_for_system"), + _SbatchOpt(None, "threads-per-core", "threads_per_core"), + _SbatchOpt("t", "time", "time_limit"), + _SbatchOpt(None, "time-min", "time_limit_min"), + _SbatchOpt(None, "tmp", "tmp_disk_per_node"), + _SbatchOpt(None, "uid", "uid"), + _SbatchOpt(None, "use-min-nodes", "use_min_nodes", True), + _SbatchOpt(None, "wait-all-nodes", "wait_all_nodes", True), + _SbatchOpt(None, "wckey", "wckey"), +] + + +def _parse_line(line): + # Remove the #SBATCH from the start + opts = line[len("#SBATCH"):] + + # Ignore possible comments after the options + opts = opts.split("#")[0].strip() + + # Now the line can be in these forms for example: + # * -t20 or -t 20 + # * --time=20 or --time 20 or --time20 + if "=" in opts: + # -t=21 or --time=20 + opts = "=".join(opts.replace("=", " ").split()) + opt, val = opts.split("=") + elif " " in opts: + # --time 20 or -t 20 + opts = "=".join(opts.split()) + opt, val = opts.split("=") + elif any(el.isdigit() for el in opts): + # -t20 or --time20 + opt, val = list(filter(None, re.split(r'(\d+)', opts))) + else: + # Probably a boolean flag, like --exclusive or -O + opt, val = opts, None + + # Remove "-" or "--" at the front. + opt = opt[1:] + if opt[0] == "-": + # Found second dash. + opt = opt[1:] + + return opt, val + + +def _find_opt(opt): + for sbopt in SBATCH_OPTIONS: + # Check if we can find the option in our predefined mapping. + if opt == sbopt.short_opt or opt == sbopt.long_opt: + return sbopt + + return None + + +def _parse_opts_from_batch_script(desc, script, overwrite): + flags_and_vals = {} + + if not script or not Path(script).is_file(): + return None + + script = Path(script).read_text() + for line in script.splitlines(): + line = line.lstrip() + + if line.startswith(SBATCH_MAGIC): + flag, val = _parse_line(line) + opt = _find_opt(flag) + + if not opt or opt.our_attr_name is None: + # Not supported + continue + + if getattr(desc, opt.our_attr_name) is None or overwrite: + val = opt.attr_param if val is None else val + setattr(desc, opt.our_attr_name, val) diff --git a/pyslurm/core/job/step.pxd b/pyslurm/core/job/step.pxd new file mode 100644 index 00000000..a82cdd4b --- /dev/null +++ b/pyslurm/core/job/step.pxd @@ -0,0 +1,62 @@ +######################################################################### +# job/step.pxd - interface to retrieve slurm job step informations +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: embedsignature=True +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t +from .job cimport Job + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + job_step_info_t, + slurm_get_job_steps, + job_step_info_response_msg_t, + step_update_request_msg_t, + slurm_free_job_step_info_response_msg, + slurm_init_update_step_msg, + slurm_free_update_step_msg, + slurm_free_job_step_info_response_msg, + slurm_free_job_step_info_members, + slurm_update_step, + slurm_signal_job_step, + slurm_kill_job_step, + slurm_job_state_string, + xfree, + try_xmalloc, +) + +cdef class JobSteps(dict): + + cdef: + job_step_info_response_msg_t *info + job_step_info_t tmp_info + + cdef dict _load(self, uint32_t job_id, int flags) + + +cdef class JobStep: + + cdef: + job_step_info_t *ptr + step_update_request_msg_t *umsg + + @staticmethod + cdef JobStep from_ptr(job_step_info_t *in_ptr) diff --git a/pyslurm/core/job/step.pyx b/pyslurm/core/job/step.pyx new file mode 100644 index 00000000..7cfa2e35 --- /dev/null +++ b/pyslurm/core/job/step.pyx @@ -0,0 +1,530 @@ +######################################################################### +# job/step.pyx - interface to retrieve slurm job step informations +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: embedsignature=True +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from libc.string cimport memcpy, memset +from pyslurm.core.common cimport cstr, ctime +from pyslurm.core.common import cstr, ctime +from pyslurm.core.common.uint cimport * +from pyslurm.core.common.uint import * +from pyslurm.core.common.ctime cimport time_t +from pyslurm.core.error import RPCError, verify_rpc +from pyslurm.core.common import ( + signal_to_num, + instance_to_dict, + uid_to_name, +) +from pyslurm.core.job.util import ( + cpufreq_to_str, + get_task_dist, +) +from pyslurm.core.common.ctime import ( + secs_to_timestr, + mins_to_timestr, + timestr_to_mins, + timestamp_to_date, + _raw_time, +) + + +cdef class JobSteps(dict): + """A collection of :obj:`JobStep` objects for a given Job.""" + def __dealloc__(self): + slurm_free_job_step_info_response_msg(self.info) + + def __cinit__(self): + self.info = NULL + + def __init__(self, job): + """Initialize a JobSteps collection + + Args: + job (Union[Job, int]): + A Job for which the Steps should be loaded. + + Raises: + RPCError: When getting the Job steps from the slurmctld failed. + MemoryError: If malloc fails to allocate memory. + """ + cdef Job _job + + # Reload the Job in order to have updated information about its state. + _job = job.reload() if isinstance(job, Job) else Job(job).reload() + + step_info = self._load(_job.id, slurm.SHOW_ALL) + if not step_info and not slurm.IS_JOB_PENDING(_job.ptr): + msg = f"Failed to load step info for Job {_job.id}." + raise RPCError(msg=msg) + + # No super().__init__() needed? Cython probably already initialized + # the dict automatically. + self.update(step_info[_job.id]) + + cdef dict _load(self, uint32_t job_id, int flags): + cdef: + JobStep step + JobSteps steps + uint32_t cnt = 0 + dict out = {} + + rc = slurm_get_job_steps(0, job_id, slurm.NO_VAL, &self.info, + flags) + verify_rpc(rc) + + # zero-out a dummy job_step_info_t + memset(&self.tmp_info, 0, sizeof(job_step_info_t)) + + # Put each job-step pointer into its own "JobStep" instance. + for cnt in range(self.info.job_step_count): + step = JobStep.from_ptr(&self.info.job_steps[cnt]) + + # Prevent double free if xmalloc fails mid-loop and a MemoryError + # is raised by replacing it with a zeroed-out job_step_info_t. + self.info.job_steps[cnt] = self.tmp_info + + if not step.job_id in out: + steps = JobSteps.__new__(JobSteps) + out[step.job_id] = steps + + out[step.job_id].update({step.id: step}) + + # At this point we memcpy'd all the memory for the Steps. Setting this + # to 0 will prevent the slurm step free function to deallocate the + # memory for the individual steps. This should be fine, because they + # are free'd automatically in __dealloc__ since the lifetime of each + # step-pointer is tied to the lifetime of its corresponding JobStep + # instance. + self.info.job_step_count = 0 + + return out + + @staticmethod + def load_all(): + """Loads and returns all the steps in the system. + + Returns: + dict: A dict where every JobID (key) is mapped with an instance of + its JobSteps (value). + """ + cdef JobSteps steps = JobSteps.__new__(JobSteps) + return steps._load(slurm.NO_VAL, slurm.SHOW_ALL) + + +cdef class JobStep: + """A Slurm Jobstep""" + def __cinit__(self): + self.ptr = NULL + self.umsg = NULL + + def __init__(self, job=0, step=0, **kwargs): + """Initialize the JobStep instance + + Args: + job (Union[Job, int]): + The Job this Step belongs to. + step (Union[int, str]): + Step-ID for this JobStep object. + + Raises: + MemoryError: If malloc fails to allocate memory. + """ + self._alloc_impl() + self.job_id = job.id if isinstance(job, Job) else job + self.id = step + + # Initialize attributes, if any were provided + for k, v in kwargs.items(): + setattr(self, k, v) + + def _alloc_info(self): + if not self.ptr: + self.ptr = try_xmalloc( + sizeof(job_step_info_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for job_step_info_t") + + def _alloc_umsg(self): + if not self.umsg: + self.umsg = try_xmalloc( + sizeof(step_update_request_msg_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for " + "step_update_request_msg_t") + slurm_init_update_step_msg(self.umsg) + + def _alloc_impl(self): + self._alloc_info() + self._alloc_umsg() + + def __dealloc__(self): + self._dealloc_impl() + + def _dealloc_impl(self): + slurm_free_job_step_info_members(self.ptr) + xfree(self.ptr) + slurm_free_update_step_msg(self.umsg) + self.umsg = NULL + + def __setattr__(self, name, val): + # When a user wants to set attributes on a Node instance that was + # created by calling Nodes(), the "umsg" pointer is not yet allocated. + # We only allocate memory for it by the time the user actually wants + # to modify something. + self._alloc_umsg() + # Call descriptors __set__ directly + JobStep.__dict__[name].__set__(self, val) + + def reload(self): + """(Re)load information for a specific job step. + + Implements the slurm_get_job_steps RPC. + + Note: + You can call this function repeatedly to refresh the information + of an instance. Using the JobStep object returned is optional. + + Raises: + RPCError: When retrieving Step information from the slurmctld was + not successful. + MemoryError: If malloc failed to allocate memory. + + Returns: + JobStep: This function returns the current JobStep-instance object + itself. + + Examples: + >>> from pyslurm import JobStep + >>> jobstep = JobStep(9999, 1) + >>> jobstep.reload() + >>> + >>> # You can also write this in one-line: + >>> jobstep = JobStep(9999, 1).reload() + """ + cdef: + job_step_info_response_msg_t *info = NULL + uint32_t save_jid = self.job_id + uint32_t save_sid = self.ptr.step_id.step_id + + rc = slurm_get_job_steps(0, save_jid, save_sid, + &info, slurm.SHOW_ALL) + verify_rpc(rc) + + if info.job_step_count == 1: + # Cleanup the old info. + self._dealloc_impl() + + # Copy new info + self._alloc_impl() + memcpy(self.ptr, &info.job_steps[0], sizeof(job_step_info_t)) + info.job_step_count = 0 + slurm_free_job_step_info_response_msg(info) + else: + slurm_free_job_step_info_response_msg(info) + + sid = self._xlate_from_id(save_sid) + msg = f"Step {sid} of Job {save_jid} not found." + raise RPCError(msg=msg) + + return self + + @staticmethod + cdef JobStep from_ptr(job_step_info_t *in_ptr): + cdef JobStep wrap = JobStep.__new__(JobStep) + wrap._alloc_info() + memcpy(wrap.ptr, in_ptr, sizeof(job_step_info_t)) + return wrap + + def send_signal(self, signal): + """Send a signal to a running Job step. + + Implements the slurm_signal_job_step RPC. + + Args: + signal (Union[str, int]): + Any valid signal which will be sent to the Job. Can be either + a str like 'SIGUSR1', or simply an int. + + Raises: + RPCError: When sending the signal was not successful. + + Examples: + Specifying the signal as a string: + + >>> from pyslurm import JobStep + >>> JobStep(9999, 1).send_signal("SIGUSR1") + + or passing in a numeric signal: + + >>> JobStep(9999, 1).send_signal(9) + """ + step_id = self.ptr.step_id.step_id + sig = signal_to_num(signal) + verify_rpc(slurm_signal_job_step(self.job_id, step_id, sig)) + + def cancel(self): + """Cancel a Job step. + + Implements the slurm_kill_job_step RPC. + + Raises: + RPCError: When cancelling the Job was not successful. + + Examples: + >>> from pyslurm import JobStep + >>> JobStep(9999, 1).cancel() + """ + step_id = self.ptr.step_id.step_id + verify_rpc(slurm_kill_job_step(self.job_id, step_id, 9)) + + def modify(self, step=None, **kwargs): + """Modify a job step. + + Implements the slurm_update_step RPC. + + Args: + step (JobStep): + Another JobStep object which contains all the changes that + should be applied to this instance. + **kwargs: + You can also specify all the changes as keyword arguments. + Allowed values are only attributes which can actually be set + on a JobStep instance. If a step is explicitly specified as + parameter, all **kwargs will be ignored. + + Raises: + RPCError: When updating the JobStep was not successful. + + Examples: + >>> from pyslurm import JobStep + >>> + >>> # Setting the new time-limit to 20 days + >>> changes = JobStep(time_limit="20-00:00:00") + >>> JobStep(9999, 1).modify(changes) + >>> + >>> # Or by specifying the changes directly to the modify function + >>> JobStep(9999, 1).modify(time_limit="20-00:00:00") + """ + cdef JobStep js = self + + # Allow the user to both specify changes via object and **kwargs. + if step and isinstance(step, JobStep): + js = step + elif kwargs: + js = JobStep(**kwargs) + + js._alloc_umsg() + js.umsg.step_id = self.ptr.step_id.step_id + js.umsg.job_id = self.ptr.step_id.job_id + verify_rpc(slurm_update_step(js.umsg)) + + def _xlate_from_id(self, sid): + if sid == slurm.SLURM_BATCH_SCRIPT: + return "batch" + elif sid == slurm.SLURM_EXTERN_CONT: + return "extern" + elif sid == slurm.SLURM_INTERACTIVE_STEP: + return "interactive" + elif sid == slurm.SLURM_PENDING_STEP: + return "pending" + else: + return sid + + def _xlate_to_id(self, sid): + if sid == "batch": + return slurm.SLURM_BATCH_SCRIPT + elif sid == "extern": + return slurm.SLURM_EXTERN_CONT + elif sid == "interactive": + return slurm.SLURM_INTERACTIVE_STEP + elif sid == "pending": + return slurm.SLURM_PENDING_STEP + else: + return int(sid) + + def as_dict(self): + """JobStep information formatted as a dictionary. + + Returns: + dict: JobStep information as dict + """ + return instance_to_dict(self) + + @property + def id(self): + """Union[str, int]: The id for this step.""" + return self._xlate_from_id(self.ptr.step_id.step_id) + + @id.setter + def id(self, val): + self.ptr.step_id.step_id = self._xlate_to_id(val) + + @property + def job_id(self): + """int: The id for the Job this step belongs to.""" + return self.ptr.step_id.job_id + + @job_id.setter + def job_id(self, val): + self.ptr.step_id.job_id = int(val) + + @property + def name(self): + """str: Name of the step.""" + return cstr.to_unicode(self.ptr.name) + + @property + def uid(self): + """int: User ID who owns this step.""" + return u32_parse(self.ptr.user_id, zero_is_noval=False) + + @property + def user(self): + """str: Name of the User who owns this step.""" + return uid_to_name(self.ptr.user_id) + + @property + def time_limit_raw(self): + """int: Time limit in Minutes for this step.""" + return _raw_time(self.ptr.time_limit) + + @property + def time_limit(self): + """str: Time limit for this step. (formatted)""" + return mins_to_timestr(self.ptr.time_limit) + + @time_limit.setter + def time_limit(self, val): + self.umsg.time_limit=self.ptr.time_limit = timestr_to_mins(val) + + @property + def network(self): + """str: Network specification for the step.""" + return cstr.to_unicode(self.ptr.network) + + @property + def cpu_freq_min(self): + """Union[str, int]: Minimum CPU-Frequency requested.""" + return cpufreq_to_str(self.ptr.cpu_freq_min) + + @property + def cpu_freq_max(self): + """Union[str, int]: Maximum CPU-Frequency requested.""" + return cpufreq_to_str(self.ptr.cpu_freq_max) + + @property + def cpu_freq_governor(self): + """Union[str, int]: CPU-Frequency Governor requested.""" + return cpufreq_to_str(self.ptr.cpu_freq_gov) + + @property + def reserved_ports(self): + """str: Reserved ports for the step.""" + return cstr.to_unicode(self.ptr.resv_ports) + + @property + def cluster(self): + """str: Name of the cluster this step runs on.""" + return cstr.to_unicode(self.ptr.cluster) + + @property + def srun_host(self): + """str: Name of the host srun was executed on.""" + return cstr.to_unicode(self.ptr.srun_host) + + @property + def srun_pid(self): + """int: PID of the srun command.""" + return u32_parse(self.ptr.srun_pid) + + @property + def container(self): + """str: Path to the container OCI.""" + return cstr.to_unicode(self.ptr.container) + + @property + def alloc_nodes(self): + """str: Nodes the Job is using. + + This is the formatted string of Nodes as shown by scontrol. + For example, it can look like this: + + "node001,node[005-010]" + + If you want to expand this string into a list of nodenames you can + use the pyslurm.nodelist_from_range_str function. + """ + return cstr.to_list(self.ptr.nodes) + + @property + def start_time_raw(self): + """int: Time this step started. (Unix timestamp)""" + return _raw_time(self.ptr.start_time) + + @property + def start_time(self): + """str: Time this step started. (formatted)""" + return timestamp_to_date(self.ptr.start_time) + + @property + def run_time_raw(self): + """int: Seconds this step has been running for.""" + return _raw_time(self.ptr.run_time) + + @property + def run_time(self): + """str: Seconds this step has been running for. (formatted)""" + return secs_to_timestr(self.ptr.run_time) + + @property + def partition(self): + """str: Name of the partition this step runs in.""" + return cstr.to_unicode(self.ptr.partition) + + @property + def state(self): + """str: State the step is in.""" + return cstr.to_unicode(slurm_job_state_string(self.ptr.state)) + + @property + def alloc_cpus(self): + """int: Number of CPUs this step uses in total.""" + return u32_parse(self.ptr.num_cpus) + + @property + def ntasks(self): + """int: Number of tasks this step uses.""" + return u32_parse(self.ptr.num_tasks) + + @property + def distribution(self): + """dict: Task distribution specification for the step.""" + return get_task_dist(self.ptr.task_dist) + + @property + def command(self): + """str: Command that was specified with srun.""" + return cstr.to_unicode(self.ptr.submit_line) + + @property + def protocol_version(self): + """int: Slurm protocol version in use.""" + return u32_parse(self.ptr.start_protocol_ver) diff --git a/pyslurm/core/job/submission.pxd b/pyslurm/core/job/submission.pxd new file mode 100644 index 00000000..25505723 --- /dev/null +++ b/pyslurm/core/job/submission.pxd @@ -0,0 +1,776 @@ +######################################################################### +# submission.pxd - interface for submitting slurm jobs +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + job_desc_msg_t, + slurm_init_job_desc_msg, + slurm_free_job_desc_msg, + submit_response_msg_t, + slurm_submit_batch_job, + slurm_free_submit_response_response_msg, + slurm_env_array_free, + slurm_env_array_create, + slurm_env_array_merge, + slurm_env_array_overwrite, + slurm_job_share_string, + xfree, + try_xmalloc, +) + + +cdef class JobSubmitDescription: + cdef: + slurm.job_desc_msg_t *ptr + is_update + + cdef public: + parse_sbatch_options + + name + """str: Name of the Job. + + This is the same as -J/--job-name from sbatch. + """ + + account + """str: Account this Job should run under. + + This is the same as -A/--account from sbatch. + """ + + uid + """Union[str, int]: Under which user the job will be executed. + + For setting this value, you can both specify the name or numeric + uid of the User. + + This is the same as --uid from sbatch. + """ + + gid + """Union[str, int]: Under which group the job will be executed. + + For setting this value, you can both specify the name or numeric + gid of the Group. of the User. + + This is the same as --gid from sbatch. + """ + + priority + """int: A specific Priority the Job will receive. + + You can achieve the behaviour of sbatch's --hold option by + specifying a priority of 0. + + This is the same as --priority from sbatch. + """ + + site_factor + """int: Site Factor for the Job. + + This is only used for updating an already existing Job. It will + not be honored in the job submission. + """ + + wckey + """str: WCKey to be used with the Job. + + This is the same as --wckey from sbatch. + """ + + array + """str: An Array specification for the Job + + This is the same as -a/--array from sbatch. + """ + + batch_constraints + """str: Batch Features for a Job + + This is the same as --batch from sbatch. + """ + + begin_time + """str: Defer allocation until the specified time. + + This is the same as --begin from sbatch. + """ + + clusters + """Union[list, str]: Clusters the job may run on. + + This is the same as -M/--clusters from sbatch. + """ + + cluster_constraints + """str: Comma-separated str with cluster constraints for the job. + + This is the same as --cluster-constraint from sbatch. + """ + + comment + """str: An arbitrary comment for the job. + + This is the same as --comment from sbatch. + """ + + admin_comment + """str: An arbitrary admin-comment for the job + + This is only used when updating an already existing Job. Setting + it for new Submissions does nothing. + """ + + contiguous + """bool: Whether allocated Nodes should form a contiguous set + + This is the same as --contiguous from sbatch. + """ + + cores_reserved_for_system + """int: Count of cores reserved for system not usable by the Job. + + This is the same as -S/--core-spec from sbatch. + This is mutually exclusive with `threads_reserved_for_system`. + """ + + threads_reserved_for_system + """int: Count of threads reserved for system not usable by the Job. + + This is the same as --thread-spec from sbatch. + This is mutually exclusive with `cores_reserved_for_system`. + """ + + work_dir + """str: Work directory for the job. Default is current work-dir. + + This is the same as -D/--chdir from sbatch. + """ + + cpu_freq + """Union[dict, str]: Specify the CPU Frequency for the Job. + + This is the same as --cpu-freq from sbatch. + + Examples: + Specifying it as a dict: + + cpu_freq = { + "min": "Low", + "max": "High", + "governor": "UserSpace" + } + + or like in sbatch with a string. For more info on that, check out + the sbatch documentation for --cpu-freq. + + If you only want to set a Governor without any min or max, you can + simply specify it as a standalone string: + + cpu_freq = "Performance" + or + cpu_freq = {"governor": "Performance"} + + If you want to set a specific, fixed frequency, you can do: + + cpu_freq = + or either + cpu_freq = {"max": } or cpu_freq = {"min": } + """ + + nodes + """Union[dict, str, int]: Amount of nodes needed for the job. + + This is the same as -N/--nodes from sbatch. + + Examples: + Providing min/max nodes as a dict: + + nodes = { + "min": 3, + "max": 6 + } + + When no range is needed, you can also simply specify it as int: + + nodes = 3 + + Other than that, a range can also be specified in a str like with + sbatch: + + nodes = "1-5" + """ + + deadline + """str: Deadline specification for the Job. + + This is the same as --deadline from sbatch. + """ + + delay_boot + """Union[str, int]: Delay boot specification for the Job. + + This is the same as --delay-boot from sbatch. + """ + + dependencies + """Union[dict, str]: Dependencies for the Job. + + This is the same as -d/--dependency from sbatch. + """ + + excluded_nodes + """Union[list, str]: Exclude specific nodes for this Job. + + This is the same as -x/--exclude from sbatch. + """ + + required_nodes + """Union[list, str]: Specific list of nodes required for the Job. + + This is the same as -w/--nodelist from sbatch. + """ + + constraints + """str: Required node features for the Job. + + This is the same as -C/--constraint from sbatch. + """ + + kill_on_node_fail + """bool: Should the job get killed if one of the Nodes fails? + + This is the same as -k/--no-kill from sbatch. + """ + + licenses + """Union[list, str]: A list of licenses for the Job. + + This is the same as -L/--licenses from sbatch. + """ + + mail_user + """Union[list, str]: List of email addresses for notifications. + + This is the same as --mail-user from sbatch. + """ + + mail_type + """Union[list, str]: List of mail flags. + + This is the same as --mail-type from sbatch. + """ + + mcs_label + """str: An MCS Label for the Job. + + This is the same as --mcs-label from sbatch. + """ + + mem_per_cpu + """Union[str, int]: Memory required per allocated CPU. + + The default unit is in Mebibytes. You are also able to specify unit + suffixes like K|M|G|T. + This is the same as --mem-per-cpu from sbatch. + This is mutually exclusive with mem_per_node and mem_per_gpu. + + Examples: + # 1 MiB + mem_per_cpu = 1024 + + # 3 GiB + mem_per_cpu = "3G" + """ + + mem_per_node + """Union[str, int]: Memory required per whole node. + + The default unit is in Mebibytes. You are also able to specify unit + suffixes like K|M|G|T. + This is the same as --mem from sbatch. + This is mutually exclusive with mem_per_cpu and mem_per_gpu. + + Examples: + # 1 MiB + mem_per_node = 1024 + + # 3 GiB + mem_per_node = "3G" + """ + + mem_per_gpu + """Union[str, int]: Memory required per GPU. + + The default unit is in Mebibytes. You are also able to specify unit + suffixes like K|M|G|T. + This is the same as --mem-per-gpu from sbatch. + This is mutually exclusive with mem_per_node and mem_per_cpu. + + Examples: + # 1 MiB + mem_per_gpu = 1024 + + # 3 GiB + mem_per_gpu = "3G" + """ + + network + """str: Network types for the Job. + + This is the same as --network from sbatch. + """ + + nice + """int: Adjusted scheduling priority for the Job. + + This is the same as --nice from sbatch. + """ + + log_files_open_mode + """str: Mode in which stdout and stderr log files should be opened. + + Valid options are: + * append + * truncate + + This is the same as --open-mode from sbatch. + """ + + overcommit + """bool: If the resources should be overcommitted. + + This is the same as -O/--overcommit from sbatch. + """ + + partitions + """Union[list, str]: A list of partitions the Job may use. + + This is the same as -p/--partition from sbatch. + """ + + power_options + """list: A list of power management plugin options for the Job. + + This is the same as --power from sbatch. + """ + + profile + """list: List of types for the acct_gather_profile plugin. + + This is the same as --profile from sbatch. + """ + + accounting_gather_freq + """Union[dict, str]: Interval for accounting info to be gathered. + + This is the same as --acctg-freq from sbatch. + + Examples: + Specifying it as a dict: + + accounting_gather_freq = { + energy=60, + network=20, + } + + or as a single string: + + accounting_gather_freq = "energy=60,network=20" + """ + + qos + """str: Quality of Service for the Job. + + This is the same as -q/--qos from sbatch. + """ + + reboot_nodes + """bool: Force the allocated nodes to reboot before the job starts. + + This is the same --reboot from sbatch. + """ + + is_requeueable + """bool: If the Job is eligible for requeuing. + + This is the same as --requeue from sbatch. + """ + + reservations + """Union[list, str]: A list of possible reservations the Job can use. + + This is the same as --reservation from sbatch. + """ + + script + """str: Absolute Path or content of the batch script. + + You can specify either a path to a script which will be loaded, or + you can pass the script as a string. + If the script is passed as a string, providing arguments to it + (see "script_args") is not supported. + """ + + script_args + """str: Arguments passed to the batch script. + + You can only set arguments if a file path was specified for "script". + """ + + environment + """Union[dict, str]: Environment variables to be set for the Job. + + This is the same as --export from sbatch. + """ + + resource_sharing + """str: Controls the resource sharing with other Jobs. + + This property combines functionality of --oversubscribe and + --exclusive from sbatch. + + Allowed values are are: + + * "oversubscribe" or "yes": + The Job allows resources to be shared with other running Jobs. + + * "user" + Only sharing resources with other Jobs that have the "user" option + set is allowed + + * "mcs" + Only sharing resources with other Jobs that have the "mcs" option + set is allowed. + + * "no" or "exclusive" + No sharing of resources is allowed. (--exclusive from sbatch) + """ + + distribution + """TODO""" + + time_limit + """str: The time limit for the job. + + This is the same as -t/--time from sbatch. + """ + + time_limit_min + """str: A minimum time limit for the Job. + + This is the same as --time-min from sbatch. + """ + + container + """str: Path to an OCI container bundle. + + This is the same as --container from sbatch. + """ + + cpus_per_task + """int: The amount of cpus required for each task. + + This is the same as -c/--cpus-per-task from sbatch. + This is mutually exclusive with cpus_per_gpu. + """ + + cpus_per_gpu + """int: The amount of cpus required for each allocated GPU. + + This is the same as --cpus-per-gpu from sbatch. + This is mutually exclusive with cpus_per_task. + """ + + sockets_per_node + """int: Restrict Job to nodes with atleast this many sockets. + + This is the same as --sockets-per-node from sbatch. + """ + + cores_per_socket + """int: Restrict Job to nodes with atleast this many cores per socket + + This is the same as --cores-per-socket from sbatch. + """ + + threads_per_core + """int: Restrict Job to nodes with atleast this many threads per socket + + This is the same as --threads-per-core from sbatch. + """ + + gpus + """Union[dict, str, int]: GPUs for the Job to be allocated in total. + + This is the same as -G/--gpus from sbatch. + Specifying the type of the GPU is optional. + + Examples: + Specifying the GPU counts as a dict: + + gpus = { + "tesla": 1, + "volta": 5, + } + + Or, for example, in string format: + + gpus = "tesla:1,volta:5" + + Or, if you don't care about the type of the GPU: + + gpus = 6 + """ + + gpus_per_socket + """Union[dict, str, int]: GPUs for the Job to be allocated per socket. + + This is the same as --gpus-per-socket from sbatch. + + Specifying the type of the GPU is optional. Note that setting + gpus_per_socket requires to also specify sockets_per_node. + + Examples: + Specifying it as a dict: + + gpus_per_socket = { + "tesla": 1, + "volta": 5, + } + + Or, for example, in string format: + + gpus_per_socket = "tesla:1,volta:5" + + Or, if you don't care about the type of the GPU: + + gpus_per_socket = 6 + """ + + gpus_per_task + """Union[dict, str, int]: GPUs for the Job to be allocated per task. + + This is the same as --gpus-per-task from sbatch. + + Specifying the type of the GPU is optional. Note that setting + "gpus_per_task" requires to also specify either one of "ntasks" or + "gpus". + + Examples: + Specifying it as a dict: + + gpus_per_task = { + "tesla": 1, + "volta": 5, + } + + Or, for example, in string format: + + gpus_per_task = "tesla:1,volta:5" + + Or, if you don't care about the type of the GPU: + + gpus_per_task = 6 + """ + + gres_per_node + """Union[dict, str]: Generic resources to be allocated per node. + + This is the same as --gres from sbatch. You should also use this + option if you want to specify GPUs per node (--gpus-per-node). + Specifying the type (by seperating GRES name and type with a + semicolon) is optional. + + Examples: + Specifying it as a dict: + + gres_per_node = { + "gpu:tesla": 1, + "gpu:volta": 5, + } + + Or, for example, in string format: + + gres_per_node = "gpu:tesla:1,gpu:volta:5" + + GPU Gres without a specific type: + + gres_per_node = "gpu:6" + """ + + gpu_binding + """str: Specify GPU binding for the Job. + + This is the same as --gpu-bind from sbatch. + """ + + ntasks + """int: Maximum amount of tasks for the Job. + + This is the same as -n/--ntasks from sbatch. + """ + + ntasks_per_node + """int: Amount of tasks to be invoked on each node. + + This is the same as --ntasks-per-node from sbatch. + """ + + ntasks_per_socket + """int: Maximum amount of tasks to be invoked on each socket. + + This is the same as --ntasks-per-socket from sbatch. + """ + + ntasks_per_core + """int: Maximum amount of tasks to be invoked on each core. + + This is the same as --ntasks-per-core from sbatch. + """ + + ntasks_per_gpu + """int: Amount of tasks to be invoked per GPU. + + This is the same as --ntasks-per-socket from sbatch. + """ + + switches + """Union[dict, str, int]: Maximum amount of leaf switches desired. + + This can also optionally include a maximum waiting time for these + switches. + This is the same as --switches from sbatch. + + Examples: + Specifying it as a dict: + + switches = { "count": 5, "max_wait_time": "00:10:00" } + + Or as a single string (sbatch-style): + + switches = "5@00:10:00" + """ + + signal + """Union[dict, str]: Warn signal to be sent to the Job. + + This is the same as --signal from sbatch. + The signal can both be specified with its name, e.g. "SIGKILL", or + as a number, e.g. 9 + + Examples: + Specifying it as a dict: + + signal = { + "signal": "SIGKILL", + "time": 120 + } + + The above will send a "SIGKILL" signal 120 seconds before the + Jobs' time limit is reached. + + Or, specifying it as a string (sbatch-style): + + signal = "SIGKILL@120" + """ + + stdin + """str: Path to a File acting as stdin for the batch-script. + + This is the same as -i/--input from sbatch. + """ + + stdout + """str: Path to a File to write the Jobs stdout. + + This is the same as -o/--output from sbatch. + """ + + stderr + """str: Path to a File to write the Jobs stderr. + + This is the same as -e/--error from sbatch. + """ + + kill_on_invalid_dependency + """bool: Kill the job if it has an invalid dependency. + + This is the same as --kill-on-invalid-dep from sbatch. + """ + + spread_job + """bool: Spread the Job over as many nodes as possible. + + This is the same as --spread-job from sbatch. + """ + + use_min_nodes + """bool: Prefer the minimum amount of nodes specified. + + This is the same as --use-min-nodes from sbatch. + """ + + gres_flags + """str: Generic resource task binding options. + + This is the --gres-flags option from sbatch. + + Possible values are: + * "enforce-binding" + * "disable-binding" + """ + + tmp_disk_per_node + """Union[str, int]: Amount of temporary disk space needed per node. + + This is the same as --tmp from sbatch. You can specify units like + K|M|G|T (multiples of 1024). + If no unit is specified, the value will be assumed as Mebibytes. + + Examples: + # 2048 MiB + tmp_disk_per_node = "2G" + + # 1024 MiB + tmp_disk_per_node = 1024 + """ + + get_user_environment + """TODO""" + + min_cpus_per_node + """str: Set the minimum amount of CPUs required per Node. + + This is the same as --mincpus from sbatch. + """ + + wait_all_nodes + """bool: Controls when the execution of the command begins. + + A value of True means that the Job should begin execution only after + all nodes in the allocation are ready. Setting it to False, the + default, means that it is not waited for the nodes to be ready. (i.e + booted) + """ + diff --git a/pyslurm/core/job/submission.pyx b/pyslurm/core/job/submission.pyx new file mode 100644 index 00000000..47eb627b --- /dev/null +++ b/pyslurm/core/job/submission.pyx @@ -0,0 +1,663 @@ +######################################################################### +# submission.pyx - interface for submitting slurm jobs +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: embedsignature=True +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from os import getcwd +from os import environ as pyenviron +import re +import typing +import shlex +from pathlib import Path +from pyslurm.core.common cimport cstr, ctime +from pyslurm.core.common.uint cimport * +from pyslurm.core.common.uint import * +from pyslurm.core.common.ctime cimport time_t +from pyslurm.core.job.util import * +from pyslurm.core.error import RPCError, verify_rpc +from pyslurm.core.job.sbatch_opts import _parse_opts_from_batch_script +from pyslurm.core.common.ctime import ( + secs_to_timestr, + timestr_to_secs, + mins_to_timestr, + timestr_to_mins, + timestamp_to_date, + date_to_timestamp, +) + +from pyslurm.core.common import ( + humanize, + dehumanize, + signal_to_num, + user_to_uid, + group_to_gid, + uid_to_name, + gid_to_name, +) + + +cdef class JobSubmitDescription: + """Slurm Job Submission""" + def __cinit__(self): + self.ptr = NULL + + def __init__(self, **kwargs): + # Initialize explicitly provided attributes, if any. + for k, v in kwargs.items(): + setattr(self, k, v) + + def __dealloc__(self): + slurm_free_job_desc_msg(self.ptr) + + def _alloc_and_init(self): + slurm_free_job_desc_msg(self.ptr) + + self.ptr = try_xmalloc(sizeof(job_desc_msg_t)) + if not self.ptr: + raise MemoryError("xmalloc for job_desc_msg_t failed.") + + slurm_init_job_desc_msg(self.ptr) + + def submit(self): + """Submit a batch job description. + + Returns: + int: The ID of the submitted Job. + + Raises: + RPCError: When the job submission was not successful. + MemoryError: If malloc failed to allocate enough memory. + + Examples: + >>> desc = JobSubmitDescription( + >>> name="test-job", + >>> cpus_per_task=1, + >>> time_limit="10-00:00:00") + >>> + >>> job_id = desc.submit() + """ + cdef submit_response_msg_t *resp = NULL + + self._create_job_submit_desc() + verify_rpc(slurm_submit_batch_job(self.ptr, &resp)) + + job_id = resp.job_id + slurm_free_submit_response_response_msg(resp) + + return job_id + + def load_environment(self, overwrite=False): + """Load values of attributes provided through the environment. + + Args: + overwrite (bool): + If set to True, the value from an option found in the + environment will override its current value. Default is False + """ + self._parse_env(overwrite) + + def load_sbatch_options(self, overwrite=False): + """Load values from #SBATCH options in the batch script. + + Args: + overwrite (bool): + If set to True, the value from an option found in the in the + batch script will override its current value. Default is False + """ + _parse_opts_from_batch_script(self, self.script, overwrite) + + def _parse_env(self, overwrite=False): + for attr in dir(self): + if attr.startswith("_") or callable(attr): + # Ignore everything starting with "_" and all functions. + # Arguments directly specified upon object creation will + # always have precedence. + continue + + spec = attr.upper() + val = pyenviron.get(f"PYSLURM_JOBDESC_{spec)}") + if (val is not None + and (getattr(self, attr) is None or overwrite)): + + # Just convert literal true/false strings to bool. + tmp = val.casefold() + if tmp == "true": + val = True + elif tmp == "false": + val = False + + setattr(self, attr, val) + + def _create_job_submit_desc(self, is_update=False): + self.is_update = is_update + self._alloc_and_init() + cdef slurm.job_desc_msg_t *ptr = self.ptr + + if not self.is_update: + self._validate_options() + self._set_defaults() + + if self.nice: + ptr.nice = slurm.NICE_OFFSET + int(self.nice) + + if self.site_factor: + ptr.site_factor = slurm.NICE_OFFSET + int(self.site_factor) + + cstr.fmalloc(&ptr.name, self.name) + cstr.fmalloc(&ptr.account, self.account) + cstr.fmalloc(&ptr.wckey, self.wckey) + cstr.fmalloc(&ptr.array_inx, self.array) + cstr.fmalloc(&ptr.batch_features, self.batch_constraints) + cstr.fmalloc(&ptr.cluster_features, self.cluster_constraints) + cstr.fmalloc(&ptr.comment, self.comment) + cstr.fmalloc(&ptr.work_dir, self.work_dir) + cstr.fmalloc(&ptr.features, self.constraints) + cstr.fmalloc(&ptr.mail_user, self.mail_user) + cstr.fmalloc(&ptr.mcs_label, self.mcs_label) + cstr.fmalloc(&ptr.work_dir, self.work_dir) + cstr.fmalloc(&ptr.network, self.network) + cstr.fmalloc(&ptr.qos, self.qos) + cstr.fmalloc(&ptr.container, self.container) + cstr.fmalloc(&ptr.std_in, self.stdin) + cstr.fmalloc(&ptr.std_out, self.stdout) + cstr.fmalloc(&ptr.std_err, self.stderr) + cstr.fmalloc(&ptr.tres_per_job, cstr.from_gres_dict(self.gpus, "gpu")) + cstr.fmalloc(&ptr.tres_per_socket, + cstr.from_gres_dict(self.gpus_per_socket, "gpu")) + cstr.fmalloc(&ptr.tres_per_task, + cstr.from_gres_dict(self.gpus_per_task, "gpu")) + cstr.fmalloc(&ptr.tres_per_node, + cstr.from_gres_dict(self.gres_per_node)) + cstr.fmalloc(&ptr.cpus_per_tres, + cstr.from_gres_dict(self.cpus_per_gpu, "gpu")) + cstr.fmalloc(&ptr.admin_comment, self.admin_comment) + + cstr.from_list(&ptr.clusters, self.clusters) + cstr.from_list(&ptr.exc_nodes, self.excluded_nodes) + cstr.from_list(&ptr.req_nodes, self.required_nodes) + cstr.from_list(&ptr.licenses, self.licenses) + cstr.from_list(&ptr.partition, self.partitions) + cstr.from_list(&ptr.reservation, self.reservations) + cstr.from_dict(&ptr.acctg_freq, self.accounting_gather_freq) + + ptr.deadline = date_to_timestamp(self.deadline) + ptr.begin_time = date_to_timestamp(self.begin_time) + ptr.delay_boot = timestr_to_secs(self.delay_boot) + ptr.time_limit = timestr_to_mins(self.time_limit) + ptr.time_min = timestr_to_mins(self.time_limit_min) + + ptr.user_id = user_to_uid(self.uid) + ptr.group_id = group_to_gid(self.gid) + ptr.priority = u32(self.priority, zero_is_noval=False) + ptr.num_tasks = u32(self.ntasks) + ptr.pn_min_tmp_disk = u32(dehumanize(self.tmp_disk_per_node)) + ptr.cpus_per_task = u16(self.cpus_per_task) + ptr.sockets_per_node = u16(self.sockets_per_node) + ptr.cores_per_socket = u16(self.cores_per_socket) + ptr.ntasks_per_socket = u16(self.ntasks_per_socket) + ptr.ntasks_per_tres = u16(self.ntasks_per_gpu) + ptr.ntasks_per_node = u16(self.ntasks_per_node) + ptr.threads_per_core = u16(self.threads_per_core) + ptr.ntasks_per_core = u16(self.ntasks_per_core) + u64_set_bool_flag(&ptr.bitflags, self.spread_job, slurm.SPREAD_JOB) + u64_set_bool_flag(&ptr.bitflags, self.kill_on_invalid_dependency, + slurm.KILL_INV_DEP) + u64_set_bool_flag(&ptr.bitflags, self.use_min_nodes, + slurm.USE_MIN_NODES) + ptr.contiguous = u16_bool(self.contiguous) + ptr.kill_on_node_fail = u16_bool(self.kill_on_node_fail) + ptr.overcommit = u8_bool(self.overcommit) + ptr.reboot = u16_bool(self.reboot_nodes) + ptr.requeue = u16_bool(self.is_requeueable) + ptr.wait_all_nodes = u16_bool(self.wait_all_nodes) + + ptr.mail_type = parse_mail_type(self.mail_type) + ptr.power_flags = parse_power_type(self.power_options) + ptr.profile = parse_acctg_profile(self.profile) + ptr.shared = parse_shared_type(self.resource_sharing) + + self._set_cpu_frequency() + self._set_nodes() + self._set_dependencies() + self._set_memory() + self._set_open_mode() + self._set_script() + self._set_script_args() + self._set_environment() + self._set_distribution() + self._set_gpu_binding() + self._set_min_cpus() + + # TODO + # burst_buffer + # mem_bind, mem_bind_type? + # gpu_freq + # --hint + # spank_env + # --propagate for rlimits + + def _set_defaults(self): + if not self.ntasks: + self.ntasks = 1 + if not self.cpus_per_task: + self.cpus_per_task = 1 + if not self.work_dir: + self.work_dir = str(getcwd()) + if not self.environment: + # By default, sbatch also exports everything in the users env. + self.environment = "ALL" + + def _validate_options(self): + if not self.script: + raise ValueError("You need to provide a batch script.") + + if (self.mem_per_node and self.mem_per_cpu + or self.mem_per_gpu and self.mem_per_cpu + or self.mem_per_node and self.mem_per_gpu): + raise ValueError("Only one of mem_per_cpu, mem_per_node or " + "mem_per_gpu can be set.") + + if (self.ntasks_per_gpu and + (self.ptr.min_nodes != u32(None) or self.nodes + or self.gpus_per_task or self.gpus_per_socket + or self.ntasks_per_node)): + raise ValueError("ntasks_per_gpu is mutually exclusive with " + "nodes, gpus_per_task, gpus_per_socket and " + "ntasks_per_node.") + + if self.cpus_per_gpu and self.cpus_per_task: + raise ValueError("cpus_per_task and cpus_per_gpu " + "are mutually exclusive.") + + if (self.cores_reserved_for_system + and self.threads_reserved_for_system): + raise ValueError("cores_reserved_for_system is mutually " + " exclusive with threads_reserved_for_system.") + + def _set_core_spec(self): + if self.cores_reserved_for_system: + self.ptr.core_spec = u16(self.cores_reserved_for_system) + elif self.threads_reserved_for_system: + self.ptr.core_spec = u16(self.threads_reserved_for_system) + self.ptr.core_spec |= slurm.CORE_SPEC_THREAD + + def _set_cpu_frequency(self): + if not self.cpu_freq: + return None + + freq = self.cpu_freq + have_no_range = False + + # Alternatively support sbatch-like --cpu-freq setting. + if not isinstance(freq, dict): + freq_splitted = re.split("[-:]+", str(freq)) + freq_len = len(freq_splitted) + freq = {} + + # Transform cpu-freq string to the individual components. + if freq_splitted[0].isdigit(): + freq["max"] = freq_splitted[0] + else: + if freq_len > 1: + raise ValueError( + "Invalid cpu_freq format: {kwargs}." + "Governor must be provided as single element or " + "as last element in the form of min-max:governor. " + ) + freq["governor"] = freq_splitted[0] + + if freq_len >= 2: + freq["min"] = freq["max"] + freq["max"] = freq_splitted[1] + + if freq_len == 3: + freq["governor"] = freq_splitted[2] + + freq_min = parse_cpufreq(freq.get("min")) + freq_max = parse_cpufreq(freq.get("max")) + freq_gov = parse_cpu_gov(freq.get("governor")) + + if freq_min != u32(None): + if freq_max == u32(None): + freq_max = freq_min + freq_min = u32(None) + have_no_range = True + elif freq_max < freq_min: + raise ValueError( + f"min cpu-freq ({freq_min}) must be smaller " + f"than max cpu-freq ({freq_max})" + ) + elif freq_max != u32(None) and freq_min == u32(None): + have_no_range = True + + if have_no_range and freq_gov != u32(None): + raise ValueError( + "Setting Governor when specifying only either one " + "of min or max is not allowed." + ) + + self.ptr.cpu_freq_min = freq_min + self.ptr.cpu_freq_max = freq_max + self.ptr.cpu_freq_gov = freq_gov + + def _set_nodes(self): + vals = self.nodes + nmin=nmax = 1 + + if self.is_update: + return None + + # Support input like --nodes from sbatch (min-[max]) + if isinstance(vals, dict): + nmin = u32(vals.get("min", 1), on_noval=1) + nmax = u32(vals.get("max", 1), on_noval=nmin) + elif vals is not None: + v = str(vals).split("-", 1) + nmin = int(v[0]) + if nmin == 0: + nmin = 1 + if "-" in str(vals): + nmax = int(v[1]) + else: + nmax = nmin + + if not nmax: + nmax = nmin + if nmax < nmin: + raise ValueError("Max Nodecount cannot be " + "less than minimum nodecount.") + + self.ptr.min_nodes = nmin + self.ptr.max_nodes = nmax + + def _set_dependencies(self): + val = self.dependencies + final = None + + if isinstance(val, str): + # TODO: Even though everything is checked in the slurmctld, maybe + # still do some sanity checks here on the input when a string + # is provided. + final = val + elif val is not None: + satisfy = val.pop("satisfy", "all").casefold() + + if satisfy == "any": + delim = "?" + else: + delim = "," + + final = [] + for k, v in val.items(): + if k == "singleton" and bool(v): + final.append("singleton") + continue + + if not isinstance(v, list): + raise TypeError(f"Values for {k} must be list, " + f"got {type(v)}.") + # Convert everything to strings and add it to the dependency + # list. + v[:] = [str(s) for s in v] + final.append(f"{k}:{':'.join(v)}") + + final = delim.join(final) + + cstr.fmalloc(&self.ptr.dependency, final) + + def _set_memory(self): + if self.mem_per_cpu: + self.ptr.pn_min_memory = u64(dehumanize(self.mem_per_cpu)) + self.ptr.pn_min_memory |= slurm.MEM_PER_CPU + elif self.mem_per_node: + self.ptr.pn_min_memory = u64(dehumanize(self.mem_per_node)) + elif self.mem_per_gpu: + mem_gpu = u64(dehumanize(val)) + cstr.fmalloc(&self.ptr.mem_per_tres, f"gres:gpu:{mem_gpu}") + + def _set_open_mode(self): + val = self.log_files_open_mode + if val == "append": + self.ptr.open_mode = slurm.OPEN_MODE_APPEND + elif val == "truncate": + self.ptr.open_mode = slurm.OPEN_MODE_TRUNCATE + + def _set_script(self): + sfile = self.script + sbody = None + + if self.is_update: + return None + + if Path(sfile).is_file(): + # First assume the caller is passing a path to a script and we try + # to load it. + sbody = Path(sfile).read_text() + else: + # Otherwise assume that the script content is passed directly. + sbody = sfile + if self.script_args: + raise ValueError("Passing arguments to a script is only allowed " + "if it was loaded from a file.") + + # Validate the script + if not sbody or not len(sbody): + raise ValueError("Batch script is empty or none was provided.") + elif sbody.isspace(): + raise ValueError("Batch script contains only whitespace.") + elif not sbody.startswith("#!"): + msg = "Not a valid Batch script. " + msg += "First line must start with '#!'," + msg += "followed by the path to an interpreter" + raise ValueError(msg) + elif "\0" in sbody: + msg = "The Slurm Controller does not allow scripts that " + msg += "contain a NULL character: '\\0'." + raise ValueError(msg) + elif "\r\n" in sbody: + msg = "Batch script contains DOS line breaks (\\r\\n) " + msg += "instead of expected UNIX line breaks (\\n)." + raise ValueError(msg) + + cstr.fmalloc(&self.ptr.script, sbody) + + def _set_script_args(self): + args = self.script_args + if not args: + return None + + if isinstance(args, str): + sargs = shlex.split(args) + else: + sargs = list(args) + + # Script should always first in argv. + if sargs[0] != self.script: + sargs.insert(0, self.script) + + self.ptr.argc = len(sargs) + self.ptr.argv = try_xmalloc(self.ptr.argc * sizeof(char*)) + if not self.ptr.argv: + raise MemoryError("xmalloc failed for script_args") + + for idx, opt in enumerate(sargs): + cstr.fmalloc(&self.ptr.argv[idx], opt) + + def _set_environment(self): + if self.is_update: + return None + + vals = self.environment + get_user_env = self.get_user_environment + + # Clear any previous environment set for the Job. + slurm_env_array_free(self.ptr.environment) + self.ptr.env_size = 0 + + # Allocate a new environment. + self.ptr.environment = slurm_env_array_create() + + if isinstance(vals, str) or vals is None: + if vals is None or vals.casefold() == "all": + # This is the default. Export all current environment + # variables into the Job. + slurm_env_array_merge(&self.ptr.environment, + slurm.environ) + elif vals.casefold() == "none": + # Only env variables starting with "SLURM_" will be exported. + for var, val in pyenviron.items(): + if var.startswith("SLURM_"): + slurm_env_array_overwrite(&self.ptr.environment, + var, str(val)) + get_user_env = True + else: + # Assume Env-vars were provided sbatch style like a string. + # Setup all 'SLURM' env vars found first. + for var, val in pyenviron.items(): + if var.startswith("SLURM_"): + slurm_env_array_overwrite(&self.ptr.environment, + var, str(val)) + + # Merge the provided environment variables from the string in. + for idx, item in enumerate(vals.split(",")): + if idx == 0 and item.casefold() == "all": + slurm_env_array_merge(&self.ptr.environment, + slurm.environ) + continue + + if not "=" in item: + continue + + var, val = item.split("=", 1) + slurm_env_array_overwrite(&self.ptr.environment, + var, str(val)) + get_user_env = True + else: + # Here, the user provided an actual dictionary as Input. + # Setup all 'SLURM' env vars first. + for var, val in pyenviron.items(): + if var.startswith("SLURM_"): + slurm_env_array_overwrite(&self.ptr.environment, + var, str(val)) + + # Setup all User selected env vars. + for var, val in vals.items(): + slurm_env_array_overwrite(&self.ptr.environment, + var, str(val)) + + if get_user_env: + slurm_env_array_overwrite(&self.ptr.environment, + "SLURM_GET_USER_ENV", "1") + + # Calculate Environment size + while self.ptr.environment and self.ptr.environment[self.ptr.env_size]: + self.ptr.env_size+=1 + + def _set_distribution(self): + dist, plane = parse_task_dist(self.distribution) + if plane: + self.ptr.plane_size = plane + self.ptr.task_dist = slurm.SLURM_DIST_PLANE + elif self.distribution is not None: + self.ptr.task_dist = dist + + def _set_gpu_binding(self): + binding = self.gpu_binding + + if not binding: + if self.ptr.ntasks_per_tres != u16(None): + # Set gpu bind implicit to single:ntasks_per_gpu + binding = f"single:{self.ntasks_per_gpu}" + else: + binding = self.gpu_binding.replace("verbose,", "") \ + .replace("gpu:", "") + if "verbose" in self.gpu_binding: + binding = f"verbose,gpu:{binding}" + + cstr.fmalloc(&self.ptr.tres_bind, binding) + + def _set_min_cpus(self): + if self.min_cpus_per_node: + self.ptr.min_cpus = u16(self.min_cpus_per_node) + elif not self.is_update: + if self.overcommit: + self.ptr.min_cpus = max(self.ptr.min_nodes, 1) + + self.ptr.min_cpus = self.ptr.cpus_per_task * self.ptr.num_tasks + + def _set_switches(self): + kwargs = self.switches + if isinstance(kwargs, dict): + self.ptr.req_switch = u32(kwargs.get("count")) + self.ptr.wait4switch = timestr_to_secs(kwargs.get("max_wait_time")) + elif kwargs is not None: + vals = str(kwargs.split("@")) + if len(vals) > 1: + self.ptr.wait4switch = timestr_to_secs(vals[1]) + self.ptr.req_switch = u32(vals[0]) + + def _set_signal(self): + vals = self.signal + if not vals: + return None + + info = vals + # This supports input like the --signal option from sbatch + if vals and not isinstance(vals, dict): + info = {} + val_list = re.split("[:@]+", str(vals)) + + if len(val_list): + if ":" in str(vals): + flags = val_list.pop(0).casefold() + + if "r" in flags: + info["allow_reservation_overlap"] = True + + if "b" in flags: + info["batch_only"] = True + + if "@" in str(vals): + info["time"] = val_list[1] + + info["signal"] = val_list[0] + + # Parse values first to catch bad input + w_signal = u16(signal_to_num(info.get("signal"))) + w_time = u16(info.get("time"), on_noval=60) + batch_only = bool(info.get("batch_only")) + allow_resv_overlap = bool(info.get("allow_reservation_overlap")) + + # Then set it. At this point we can be sure that the input is correct. + self.ptr.warn_signal = w_signal + self.ptr.warn_time = w_time + u16_set_bool_flag(&self.ptr.warn_flags, + batch_only, slurm.KILL_JOB_BATCH) + u16_set_bool_flag(&self.ptr.warn_flags, + allow_resv_overlap, slurm.KILL_JOB_RESV) + + def _set_gres_flags(self): + if not self.gres_flags: + return None + elif self.gres_flags.casefold() == "enforce-binding": + self.ptr.bitflags |= slurm.GRES_ENFORCE_BIND + elif self.gres_flags.casefold() == "disable-binding": + self.ptr.bitflags |= slurm.GRES_DISABLE_BIND diff --git a/pyslurm/core/job/util.pyx b/pyslurm/core/job/util.pyx new file mode 100644 index 00000000..fd34f6c7 --- /dev/null +++ b/pyslurm/core/job/util.pyx @@ -0,0 +1,616 @@ +######################################################################### +# parse_types.pyx - utility functions used to parse various job flags +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t +from pyslurm cimport slurm +from pyslurm.core.common.uint import * +from pyslurm.core.common.uint cimport * + + +def parse_mail_type(mail_types): + """Convert a str or list of mail types to a uint16_t.""" + cdef uint16_t flags = 0 + types = mail_types + + if not types or "None" == types: + return slurm.NO_VAL16 + + if isinstance(types, str): + types = types.split(",") + + for typ in mail_types: + typ = typ.casefold() + + if "array_tasks" == typ: + flags |= slurm.MAIL_ARRAY_TASKS + + elif "begin" == typ: + flags |= slurm.MAIL_JOB_BEGIN + + elif "end" == typ: + flags |= slurm.MAIL_JOB_END + + elif "fail" == typ: + flags |= slurm.MAIL_JOB_FAIL + + # elif "invalid_depend" == typ: + # flags |= slurm.MAIL_INVALID_DEPEND + + elif "requeue" == typ: + flags |= slurm.MAIL_JOB_REQUEUE + + elif "stage_out" == typ: + flags |= slurm.MAIL_JOB_STAGE_OUT + + elif "time_limit" == typ: + flags |= slurm.MAIL_JOB_TIME100 + + elif "time_limit_90" == typ: + flags |= slurm.MAIL_JOB_TIME90 + + elif "time_limit_80" == typ: + flags |= slurm.MAIL_JOB_TIME80 + + elif "time_limit_50" == typ: + flags |= slurm.MAIL_JOB_TIME50 + + elif "all" == typ: + flags |= (slurm.MAIL_JOB_BEGIN + | slurm.MAIL_JOB_END + | slurm.MAIL_JOB_FAIL + | slurm.MAIL_JOB_REQUEUE + | slurm.MAIL_JOB_STAGE_OUT) + else: + raise ValueError("Invalid Mail type: {typ}.") + + return flags + + +def get_mail_type(uint16_t typ): + """Convert uint16_t to a list of mail types.""" + types = [] + + if typ == 0: + return types + + if typ & slurm.MAIL_ARRAY_TASKS: + types.append("array_tasks") + +# if typ & slurm.MAIL_INVALID_DEPEND: +# types.append("invalid_depend") + + if typ & slurm.MAIL_JOB_BEGIN: + types.append("begin") + + if typ & slurm.MAIL_JOB_END: + types.append("end") + + if typ & slurm.MAIL_JOB_FAIL: + types.append("fail") + + if typ & slurm.MAIL_JOB_REQUEUE: + types.append("requeue") + + if typ & slurm.MAIL_JOB_STAGE_OUT: + types.append("stage_out") + + if typ & slurm.MAIL_JOB_TIME50: + types.append("time_limit_50") + + if typ & slurm.MAIL_JOB_TIME80: + types.append("time_limit_80") + + if typ & slurm.MAIL_JOB_TIME90: + types.append("time_limit_90") + + if typ & slurm.MAIL_JOB_TIME100: + types.append("time_limit_100") + + return types + + +def parse_acctg_profile(acctg_profiles): + """Convert a str or list of accounting gather profiles to uin32_t.""" + cdef uint32_t profile = 0 + profiles = acctg_profiles + + if not acctg_profiles: + return slurm.NO_VAL + + if "none" in acctg_profiles: + return slurm.ACCT_GATHER_PROFILE_NONE + elif "all" in acctg_profiles: + return slurm.ACCT_GATHER_PROFILE_ALL + + if "energy" in acctg_profiles: + profile |= slurm.ACCT_GATHER_PROFILE_ENERGY + + if "task" in acctg_profiles: + profile |= slurm.ACCT_GATHER_PROFILE_TASK + + if "lustre" in acctg_profiles: + profile |= slurm.ACCT_GATHER_PROFILE_LUSTRE + + if "network" in acctg_profiles: + profile |= slurm.ACCT_GATHER_PROFILE_NETWORK + + return profile + + +def get_acctg_profile(flags): + """Convert uin32_t accounting gather profiles to a list of strings.""" + profiles = [] + + if flags == 0 or flags == slurm.NO_VAL: + return ["none"] + + if flags == slurm.ACCT_GATHER_PROFILE_ALL: + return ["all"] + elif flags == slurm.ACCT_GATHER_PROFILE_NONE: + return ["none"] + + if flags & slurm.ACCT_GATHER_PROFILE_ENERGY: + profiles.append("energy") + + if flags & slurm.ACCT_GATHER_PROFILE_TASK: + profiles.append("task") + + if flags & slurm.ACCT_GATHER_PROFILE_LUSTRE: + profiles.append("lustre") + + if flags & slurm.ACCT_GATHER_PROFILE_NETWORK: + profiles.append("network") + + return profiles + + +def parse_power_type(power_types): + """Convert a str or list of str with power types to uint8_t.""" + cdef uint8_t flags = 0 + + if not power_types: + return slurm.NO_VAL8 + + if "level" in power_types: + flags |= slurm.SLURM_POWER_FLAGS_LEVEL + + +def get_power_type(flags): + """Convert uint8_t power type flags to a list of strings.""" + types = [] + + if flags & slurm.SLURM_POWER_FLAGS_LEVEL: + types.append("level") + + return types + + +def parse_shared_type(typ): + """Convert a job-sharing type str to its numerical representation.""" + if not typ: + return slurm.NO_VAL16 + + typ = typ.casefold() + if typ == "oversubscribe" or typ == "yes": + return slurm.JOB_SHARED_OK + elif typ == "user": + return slurm.JOB_SHARED_USER + elif typ == "mcs": + return slurm.JOB_SHARED_MCS + elif typ == "no" or typ == "exclusive": + return slurm.JOB_SHARED_NONE + else: + raise ValueError(f"Invalid resource_sharing type: {typ}.") + + +# https://github.com/SchedMD/slurm/blob/510ba4f17dfa559b579aa054cb8a415dcc224abc/src/common/proc_args.c#L319 +def get_task_dist(dist): + """Get the task distribution of a step as a dictionary.""" + out = { + "nodes": None, + "sockets": None, + "cores": None, + "plane": None, + "pack": None, + } + + if int(dist) <= 0 or dist == slurm.SLURM_DIST_UNKNOWN: + return None + + if (dist & slurm.SLURM_DIST_STATE_BASE) != slurm.SLURM_DIST_UNKNOWN: + state = dist & slurm.SLURM_DIST_STATE_BASE + + if state == slurm.SLURM_DIST_BLOCK: + out["nodes"] = "block" + elif state == slurm.SLURM_DIST_CYCLIC: + out["nodes"] = "cyclic" + elif state == slurm.SLURM_DIST_PLANE: + pass + elif state == slurm.SLURM_DIST_ARBITRARY: + out["nodes"] = "arbitrary" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC: + out["nodes"] = "cyclic" + out["sockets"] = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK: + out["nodes"] = "cyclic" + out["sockets"] = "block" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL: + out["nodes"] = "cyclic" + out["sockets"] = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC: + out["nodes"] = "block" + out["sockets"] = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK: + out["nodes"] = "block" + out["sockets"] = "block" + elif state == slurm.SLURM_DIST_BLOCK_CFULL: + out["nodes"] = "block" + out["sockets"] = "fcyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_CYCLIC: + out["nodes"] = "cyclic" + out["sockets"] = "cyclic" + out["cores"] = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_BLOCK: + out["nodes"] = "cyclic" + out["sockets"] = "cyclic" + out["cores"] = "block" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_CFULL: + out["nodes"] = "cyclic" + out["sockets"] = "cyclic" + out["cores"] = "fcyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC: + out["nodes"] = "cyclic" + out["sockets"] = "block" + out["cores"] = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC: + out["nodes"] = "cyclic" + out["sockets"] = "block" + out["cores"] = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_BLOCK: + out["nodes"] = "cyclic" + out["sockets"] = "block" + out["cores"] = "block" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CFULL: + out["nodes"] = "cyclic" + out["sockets"] = "block" + out["cores"] = "fcyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL_CYCLIC: + out["nodes"] = "cyclic" + out["sockets"] = "fcyclic" + out["cores"] = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL_BLOCK: + out["nodes"] = "cyclic" + out["sockets"] = "fcyclic" + out["cores"] = "block" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL_CFULL: + out["nodes"] = "cyclic" + out["sockets"] = "fcyclic" + out["cores"] = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_CYCLIC: + out["nodes"] = "block" + out["sockets"] = "cyclic" + out["cores"] = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_BLOCK: + out["nodes"] = "block" + out["sockets"] = "cyclic" + out["cores"] = "block" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_CFULL: + out["nodes"] = "block" + out["sockets"] = "cyclic" + out["cores"] = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK_CYCLIC: + out["nodes"] = "block" + out["sockets"] = "block" + out["cores"] = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK_BLOCK: + out["nodes"] = "block" + out["sockets"] = "block" + out["cores"] = "block" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK_CFULL: + out["nodes"] = "block" + out["sockets"] = "block" + out["cores"] = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_CFULL_CYCLIC: + out["nodes"] = "block" + out["sockets"] = "fcyclic" + out["cores"] = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_CFULL_BLOCK: + out["nodes"] = "block" + out["sockets"] = "fcyclic" + out["cores"] = "block" + elif state == slurm.SLURM_DIST_BLOCK_CFULL_CFULL: + out["nodes"] = "block" + out["sockets"] = "fcyclic" + out["cores"] = "fcyclic" + else: + out = None + + if out is not None: + dist_flag = dist & slurm.SLURM_DIST_STATE_FLAGS + if dist_flag == slurm.SLURM_DIST_PACK_NODES: + out["pack"] = True + elif dist_flag == slurm.SLURM_DIST_NO_PACK_NODES: + out["pack"] = False + + return out + + +def parse_task_dist(dist): + """Parse a distribution str or dict to its numerical representation.""" + cdef slurm.task_dist_states_t dist_state = slurm.SLURM_DIST_UNKNOWN + + if not dist: + return dist_state, None + + # Assume the user meant to specify the plane size. + if isinstance(dist, int): + return None, u16(dist) + + # Support sbatch-style string input. + # Parse the string and fill in the dist_dict above. + if isinstance(dist, str): + dist_str = dist + + # Plane method - return early because nothing else can be + # specified when this is set. + if "plane" in dist_str: + return None, u16(dist_str.split("=", 1)[1]) + + dist = { + "nodes": None, + "sockets": None, + "cores": None, + "plane": None, + "pack": None, + } + + # [0] = distribution method for nodes:sockets:cores + # [1] = pack/nopack specification (true or false) + dist_items = dist_str.split(",", 1) + + # Parse the different methods and fill in the dist_dict. + dist_methods = dist_items[0].split(":") + if len(dist_methods) and dist_methods[0] != "*": + dist["nodes"] = dist_methods[0] + + if len(dist_methods) > 2 and dist_methods[1] != "*": + dist["sockets"] = dist_methods[1] + + if len(dist_methods) >= 3: + if dist_methods[2] == "*": + dist["cores"] = dist_dict["sockets"] + else: + dist["cores"] = dist_methods[2] + + if len(dist_items) > 1: + if dist_items[1].casefold() == "pack": + dist["pack"] = True + elif dist_items[1].casefold() == "nopack": + dist["pack"] = False + + # Plane method - return early because nothing else can be + # specified when this is set. + if dist.get("plane") is not None: + return None, u16(dist['plane']) + + dist_str = "" + sockets_dist = None + + # Join the dist_dict distribution methods into a dist_str + # for easier comparison to check which distribution state + # is needed (see below). + nodes = dist.get("nodes") + if nodes is not None and nodes != "*": + dist_str = f"{nodes}" + else: + dist_str = "block" + + sockets = dist.get("sockets") + if sockets is not None and sockets != "*": + dist_str = f"{dist_str}:{sockets}" + else: + dist_str = f"{dist_str}:cyclic" + + cores = dist.get("cores") + if cores is not None and cores != "*": + dist_str = f"{dist_str}:{cores}" + else: + dist_str = f"{dist_str}:{sockets}" + + # Select the correct distribution method according to dist_str. + if dist_str == "cyclic": + dist_state = slurm.SLURM_DIST_CYCLIC + elif dist_str == "block": + dist_state = slurm.SLURM_DIST_BLOCK + elif dist_str == "arbitrary" or dist_str == "hostfile": + dist_state = slurm.SLURM_DIST_ARBITRARY + elif dist_str == "cyclic:cyclic": + dist_state = slurm.SLURM_DIST_CYCLIC_CYCLIC + elif dist_str == "cyclic:block": + dist_state = slurm.SLURM_DIST_CYCLIC_BLOCK + elif dist_str == "block:block": + dist_state = slurm,SLURM_DIST_BLOCK_BLOCK + elif dist_str == "block:cyclic": + dist_state = slurm.SLURM_DIST_BLOCK_CYCLIC + elif dist_str == "block:fcyclic": + dist_state = slurm.SLURM_DIST_BLOCK_CFULL + elif dist_str == "cyclic:fcyclic": + dist_state = slurm.SLURM_DIST_CYCLIC_CFULL + elif dist_str == "cyclic:cyclic:cyclic": + dist_state = slurm.SLURM_DIST_CYCLIC_CYCLIC_CYCLIC + elif dist_str == "cyclic:cyclic:block": + dist_state = slurm.SLURM_DIST_CYCLIC_CYCLIC_BLOCK + elif dist_str == "cyclic:cyclic:fcyclic": + dist_state = slurm.SLURM_DIST_CYCLIC_CYCLIC_CFULL + elif dist_str == "cyclic:block:cyclic": + dist_state = slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC + elif dist_str == "cyclic:block:block": + dist_state = slurm.SLURM_DIST_CYCLIC_BLOCK_BLOCK + elif dist_str == "cyclic:block:fcyclic": + dist_state = slurm.SLURM_DIST_CYCLIC_BLOCK_CFULL + elif dist_str == "cyclic:fcyclic:cyclic": + dist_state = slurm.SLURM_DIST_CYCLIC_CFULL_CYCLIC + elif dist_str == "cyclic:fcyclic:block": + dist_state = slurm.SLURM_DIST_CYCLIC_CFULL_BLOCK + elif dist_str == "cyclic:fcyclic:fcyclic": + dist_state = slurm.SLURM_DIST_CYCLIC_CFULL_CFULL + elif dist_str == "block:cyclic:cyclic": + dist_state = slurm.SLURM_DIST_BLOCK_CYCLIC_CYCLIC + elif dist_str == "block:cyclic:block": + dist_state = slurm.SLURM_DIST_BLOCK_CYCLIC_BLOCK + elif dist_str == "block:cyclic:fcyclic": + dist_state = slurm.SLURM_DIST_BLOCK_CYCLIC_CFULL + elif dist_str == "block:block:cyclic": + dist_state = slurm.SLURM_DIST_BLOCK_BLOCK_CYCLIC + elif dist_str == "block:block:block": + dist_state = slurm.SLURM_DIST_BLOCK_BLOCK_BLOCK + elif dist_str == "block:block:fcyclic": + dist_state = slurm.SLURM_DIST_BLOCK_BLOCK_CFULL + elif dist_str == "block:fcyclic:cyclic": + dist_state = slurm.SLURM_DIST_BLOCK_CFULL_CYCLIC + elif dist_str == "block:fcyclic:block": + dist_state = slurm.SLURM_DIST_BLOCK_CFULL_BLOCK + elif dist_str == "block:fcyclic:fcyclic": + dist_state = slurm.SLURM_DIST_BLOCK_CFULL_CFULL + else: + raise ValueError(f"Invalid distribution specification: {dist}") + + # Check for Pack/NoPack + # Don't do anything if dist["pack"] is None + if dist["pack"]: + dist_state = (dist_state | slurm.SLURM_DIST_PACK_NODES) + elif dist["pack"] is not None and not dist["pack"]: + dist_state = (dist_state | slurm.SLURM_DIST_NO_PACK_NODES) + + return dist_state, None + + +def parse_cpu_gov(gov): + """Convert a cpu governor str to is numerical representation.""" + if not gov: + return u32(None) + + gov = gov.casefold() + rc = 0 + + if gov == "conservative": + rc = slurm.CPU_FREQ_CONSERVATIVE + elif gov == "ondemand": + rc = slurm.CPU_FREQ_ONDEMAND + elif gov == "performance": + rc = slurm.CPU_FREQ_PERFORMANCE + elif gov == "powersave": + rc = slurm.CPU_FREQ_POWERSAVE + elif gov == "userspace": + rc = slurm.CPU_FREQ_USERSPACE + elif gov == "schedutil": + rc = slurm.CPU_FREQ_SCHEDUTIL + else: + raise ValueError("Invalid cpu gov type: {}".format(gov)) + + return rc | slurm.CPU_FREQ_RANGE_FLAG + + +def parse_cpufreq(freq): + """Convert a cpu-frequency str to its numerical representation.""" + if not freq: + return u32(None) + + if isinstance(freq, str) and not freq.isdigit(): + freq = freq.casefold() + + if freq == "low": + return slurm.CPU_FREQ_LOW + elif freq == "highm1": + return slurm.CPU_FREQ_HIGHM1 + elif freq == "high": + return slurm.CPU_FREQ_HIGH + elif freq == "medium": + return slurm.CPU_FREQ_MEDIUM + else: + fr = u32(int(freq)) + if fr != slurm.NO_VAL: + return fr + + raise ValueError(f"Invalid cpu freq value: {freq}.") + + +def cpufreq_to_str(freq): + """Convert a numerical cpufreq value to its string representation.""" + if freq == slurm.CPU_FREQ_LOW: + return "Low" + elif freq == slurm.CPU_FREQ_MEDIUM: + return "Medium" + elif freq == slurm.CPU_FREQ_HIGHM1: + return "Highm1" + elif freq == slurm.CPU_FREQ_HIGH: + return "High" + elif freq == slurm.CPU_FREQ_CONSERVATIVE: + return "Conservative" + elif freq == slurm.CPU_FREQ_PERFORMANCE: + return "Performance" + elif freq == slurm.CPU_FREQ_POWERSAVE: + return "PowerSave" + elif freq == slurm.CPU_FREQ_USERSPACE: + return "UserSpace" + elif freq == slurm.CPU_FREQ_ONDEMAND: + return "OnDemand" + elif freq == slurm.CPU_FREQ_SCHEDUTIL: + return "SchedUtil" + elif freq & slurm.CPU_FREQ_RANGE_FLAG: + return None + elif freq == slurm.NO_VAL or freq == 0: + return None + else: + # This is in kHz + return freq + + +def make_gres_str(vals, typ=""): + final = [] + gres_dict = vals + + if not vals: + return None + + if isinstance(vals, str) and not vals.isdigit(): + gres_dict = {} + + gres_list = vals.replace("gres:", "") + for gres_str in gres_list.split(","): + gres_and_type, cnt = gres_str.rsplit(":", 1) + gres_dict.update({gres_and_type: int(cnt)}) + elif isinstance(vals, dict): + for gres_and_type, cnt in gres_dict.items(): + # Error immediately on specifications that contain more than one + # semicolon, as it is wrong. + if len(gres_and_type.split(":")) > 2: + raise ValueError(f"Invalid specifier: '{gres_and_type}'") + + if typ not in gres_and_type: + gres_and_type = f"{gres_and_type}:{typ}" + + final.append(f"gres:{gres_and_type}:{int(cnt)}") + else: + return f"gres:{typ}:{int(vals)}" + + return ",".join(final) diff --git a/pyslurm/pyslurm.pyx b/pyslurm/pyslurm.pyx index adbed03e..89b226a2 100644 --- a/pyslurm/pyslurm.pyx +++ b/pyslurm/pyslurm.pyx @@ -373,26 +373,6 @@ def slurm_load_slurmd_status(): return Status -def slurm_init(conf_file=None): - """Initialize the Slurm API internal structures. - - This function MUST be called before any internal API calls to ensure - Slurm's internal configuration structures have been populated. - - Args: - conf_file (str, optional): Absolute path to the configuration file. If - None (default value), libslurm automatically locates its own - configuration. - """ - if conf_file: - slurm.slurm_init(conf_file.encode('UTF-8')) - else: - slurm.slurm_init(NULL) - -def slurm_fini(): - """Cleanup Slurm internal configuration structures.""" - slurm.slurm_fini() - # # Slurm Config Class # @@ -6758,6 +6738,3 @@ cdef class licenses: else: apiError = slurm.slurm_get_errno() raise ValueError(slurm.stringOrNone(slurm.slurm_strerror(apiError), ''), apiError) - -# Automatically load Slurm configuration data structure at pyslurm module load -slurm_init() diff --git a/pyslurm/slurm/__init__.pxd b/pyslurm/slurm/__init__.pxd index f1fbdd6f..f29bfc00 100644 --- a/pyslurm/slurm/__init__.pxd +++ b/pyslurm/slurm/__init__.pxd @@ -61,7 +61,6 @@ cdef extern from '' nogil: cdef extern from *: ctypedef struct slurm_job_credential ctypedef struct switch_jobinfo - ctypedef struct job_resources ctypedef struct select_jobinfo ctypedef struct select_nodeinfo ctypedef struct jobacctinfo diff --git a/pyslurm/slurm/extra.pxi b/pyslurm/slurm/extra.pxi index 50fccb23..0c0f11ed 100644 --- a/pyslurm/slurm/extra.pxi +++ b/pyslurm/slurm/extra.pxi @@ -1,12 +1,3 @@ -# -# Structs that are not in the Slurm headers, which need to be redefined -# in order to implement certain features. -# -# For example: to communicate with the slurmctld directly in order -# to retrieve the actual batch-script as a string. -# - -# https://github.com/SchedMD/slurm/blob/26abe9188ea8712ba1eab4a8eb6322851f06a108/src/common/slurm_persist_conn.h#L51 ctypedef enum persist_conn_type_t: PERSIST_TYPE_NONE = 0 PERSIST_TYPE_DBD @@ -15,7 +6,7 @@ ctypedef enum persist_conn_type_t: PERSIST_TYPE_HA_DBD PERSIST_TYPE_ACCT_UPDATE -# https://github.com/SchedMD/slurm/blob/26abe9188ea8712ba1eab4a8eb6322851f06a108/src/common/slurm_persist_conn.h#L59 +# https://github.com/SchedMD/slurm/blob/master/src/common/slurm_persist_conn.h ctypedef struct persist_msg_t: void *conn void *data @@ -23,9 +14,9 @@ ctypedef struct persist_msg_t: uint16_t msg_type ctypedef int (*_slurm_persist_conn_t_callback_proc) (void *arg, persist_msg_t *msg, buf_t **out_buffer, uint32_t *uid) + ctypedef void (*_slurm_persist_conn_t_callback_fini)(void *arg) -# https://github.com/SchedMD/slurm/blob/26abe9188ea8712ba1eab4a8eb6322851f06a108/src/common/slurm_persist_conn.h#L66 ctypedef struct slurm_persist_conn_t: void *auth_cred _slurm_persist_conn_t_callback_proc callback_proc @@ -46,7 +37,7 @@ ctypedef struct slurm_persist_conn_t: slurm_trigger_callbacks_t trigger_callbacks; uint16_t version -# https://github.com/SchedMD/slurm/blob/20e2b354168aeb0f76d67f80122d80925c2ef32b/src/common/pack.h#L68 +# https://github.com/SchedMD/slurm/blob/master/src/common/pack.h#L68 ctypedef struct buf_t: uint32_t magic char *head @@ -54,24 +45,20 @@ ctypedef struct buf_t: uint32_t processed bool mmaped -# https://github.com/SchedMD/slurm/blob/20e2b354168aeb0f76d67f80122d80925c2ef32b/src/common/pack.h#L68 +# https://github.com/SchedMD/slurm/blob/master/src/common/slurm_protocol_defs.h ctypedef struct return_code_msg_t: uint32_t return_code -# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L650 ctypedef struct job_id_msg_t: uint32_t job_id uint16_t show_flags -# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L216 -# Only partially defined - not everything needed at the moment. ctypedef enum slurm_msg_type_t: REQUEST_SHARE_INFO = 2022 REQUEST_BATCH_SCRIPT = 2051 RESPONSE_BATCH_SCRIPT = 2052 RESPONSE_SLURM_RC = 8001 -# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L469 ctypedef struct forward_t: uint16_t cnt uint16_t init @@ -79,7 +66,6 @@ ctypedef struct forward_t: uint32_t timeout uint16_t tree_width -# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L491 ctypedef struct forward_struct_t: char *buf int buf_len @@ -89,7 +75,6 @@ ctypedef struct forward_struct_t: List ret_list uint32_t timeout -# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L514 ctypedef struct slurm_msg_t: slurm_addr_t address void *auth_cred @@ -113,40 +98,52 @@ ctypedef struct slurm_msg_t: slurm_addr_t orig_addr List ret_list -# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.c#L865 +# Slurm Protocol stuff cdef extern void slurm_free_return_code_msg(return_code_msg_t *msg) - -# https://github.com/SchedMD/slurm/blob/2d2e83674b59410a7ed8ab6fc8d8acfcfa8beaf9/src/common/slurm_protocol_api.c#L2401 cdef extern int slurm_send_recv_controller_msg(slurm_msg_t *request_msg, slurm_msg_t *response_msg, slurmdb_cluster_rec_t *working_cluster_rec) -# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.c#L168 cdef extern void slurm_msg_t_init(slurm_msg_t *msg) +# https://github.com/SchedMD/slurm/blob/master/src/common/job_resources.h +ctypedef struct job_resources: + bitstr_t *core_bitmap + bitstr_t *core_bitmap_used + uint32_t cpu_array_cnt + uint16_t *cpu_array_value + uint32_t *cpu_array_reps + uint16_t *cpus + uint16_t *cpus_used + uint16_t *cores_per_socket + uint16_t cr_type + uint64_t *memory_allocated + uint64_t *memory_used + uint32_t nhosts + bitstr_t *node_bitmap + uint32_t node_req + char *nodes + uint32_t ncpus + uint32_t *sock_core_rep_count + uint16_t *sockets_per_node + uint16_t *tasks_per_node + uint16_t threads_per_core + uint8_t whole_node -# Global Environment +# Global Environment cdef extern char **environ # # Slurm Memory routines +# We simply use the macros from xmalloc.h - more convenient # -cdef extern void slurm_xfree (void **) -cdef extern void *slurm_xcalloc(size_t, size_t, bool, bool, const char *, int, const char *) - -cdef inline xfree(void *__p): - slurm_xfree(&__p) - -cdef inline void *xmalloc(size_t __sz): - return slurm_xcalloc(1, __sz, True, False, __FILE__, __LINE__, __FUNCTION__) - -cdef inline void *try_xmalloc(size_t __sz): - return slurm_xcalloc(1, __sz, True, True, __FILE__, __LINE__, __FUNCTION__) - -cdef inline void xfree_ptr(void *__p): - slurm_xfree(&__p) +cdef extern from "pyslurm/slurm/xmalloc.h" nogil: + void xfree(void *__p) + void *xmalloc(size_t __sz) + void *try_xmalloc(size_t __sz) + void xfree_ptr(void *ptr) # # Slurm xstring functions @@ -177,6 +174,15 @@ cdef extern void slurm_free_job_step_info_members(job_step_info_t *msg) cdef extern char *slurm_job_state_string(uint16_t inx) cdef extern char *slurm_job_reason_string(int inx) cdef extern char *slurm_job_share_string(uint16_t shared) +cdef extern void slurm_free_update_step_msg(step_update_request_msg_t *msg) + +# +# Slurm Node functions +# + +cdef extern int slurm_get_select_nodeinfo(dynamic_plugin_data_t *nodeinfo, select_nodedata_type data_type, node_states state, void *data) +cdef extern char *slurm_node_state_string_complete(uint32_t inx) +cdef extern void slurm_free_update_node_msg(update_node_msg_t *msg) # # Slurm environment functions @@ -199,3 +205,28 @@ cdef extern int slurm_addto_char_list_with_case(List char_list, char *names, boo cdef extern int slurm_addto_step_list(List step_list, char *names) cdef extern int slurmdb_report_set_start_end_time(time_t *start, time_t *end) cdef extern uint16_t slurm_get_track_wckey() +cdef extern void slurm_sprint_cpu_bind_type(char *str, cpu_bind_type_t cpu_bind_type) + +# Slurm bit functions + +cdef extern bitstr_t *slurm_bit_alloc(bitoff_t nbits) +cdef extern void slurm_bit_set(bitstr_t *b, bitoff_t bit) +cdef extern int slurm_bit_test(bitstr_t *b, bitoff_t bit) +cdef extern char *slurm_bit_fmt(char *str, int32_t len, bitstr_t *b) +cdef extern void slurm_bit_free(bitstr_t **b) + + +cdef extern from *: + """ + #define bit_free(__b) slurm_bit_free((bitstr_t **)&(__b)) + #define FREE_NULL_BITMAP(_X) \ + do { \ + if (_X) \ + bit_free(_X); \ + _X = NULL; \ + } while(0) \ + """ + void bit_free(bitstr_t *_X) + void FREE_NULL_BITMAP(bitstr_t *_X) + +cdef extern char *slurm_hostlist_deranged_string_malloc(hostlist_t hl) diff --git a/pyslurm/slurm/other.pxi b/pyslurm/slurm/other.pxi new file mode 100644 index 00000000..79d212f2 --- /dev/null +++ b/pyslurm/slurm/other.pxi @@ -0,0 +1,67 @@ +# Global Environment +cdef extern char **environ + +# +# Slurm Memory routines +# + +cdef extern void slurm_xfree (void **) +cdef extern void *slurm_xcalloc(size_t, size_t, bool, bool, const char *, int, const char *) + +cdef inline xfree(void **item): + slurm_xfree(item) + +cdef inline void *xmalloc(size_t size): + return slurm_xcalloc(1, size, True, False, __FILE__, __LINE__, __FUNCTION__) + +cdef inline void *try_xmalloc(size_t size): + return slurm_xcalloc(1, size, True, True, __FILE__, __LINE__, __FUNCTION__) + +cdef inline void xfree_ptr(void *ptr): + slurm_xfree(&ptr) + +# +# Slurm xstring functions +# + +cdef extern char *slurm_xstrdup(const char *str) + + +# +# Slurm time functions +# + + +cdef extern void slurm_secs2time_str(time_t time, char *string, int size) +cdef extern void slurm_mins2time_str(time_t time, char *string, int size) +cdef extern int slurm_time_str2mins(const char *string) +cdef extern int slurm_time_str2secs(const char *string) +cdef extern void slurm_make_time_str(time_t *time, char *string, int size) +cdef extern time_t slurm_parse_time(char *time_str, int past) + +# +# Slurm Job functions +# + + +cdef extern void slurm_free_job_desc_msg(job_desc_msg_t *msg) +cdef extern void slurm_free_job_info(job_info_t *job) +cdef extern void slurm_free_job_info_members(job_info_t *job) +cdef extern void slurm_free_job_step_info_response_msg(job_step_info_response_msg_t *msg) +cdef extern void slurm_free_job_step_info_members(job_step_info_t *msg) +cdef extern char *slurm_job_state_string(uint16_t inx) +cdef extern char *slurm_job_reason_string(int inx) +cdef extern char *slurm_job_share_string(uint16_t shared) + +# +# Slurm environment functions +# + +cdef extern void slurm_env_array_merge(char ***dest_array, const char **src_array) +cdef extern char **slurm_env_array_create() +cdef extern int slurm_env_array_overwrite(char ***array_ptr, const char *name, const char *value) +cdef extern void slurm_env_array_free(char **env_array) +# cdef extern void slurm_env_array_merge_slurm(char ***dest_array, const char **src_array) + + +cdef extern int slurm_select_fini() diff --git a/pyslurm/slurm/xmalloc.h b/pyslurm/slurm/xmalloc.h new file mode 100644 index 00000000..f1db7b5f --- /dev/null +++ b/pyslurm/slurm/xmalloc.h @@ -0,0 +1,117 @@ +/*****************************************************************************\ + * xmalloc.h - enhanced malloc routines for slurm + * - default: never return if errors are encountered. + * - attempt to report file, line, and calling function on assertion failure + * - use configurable slurm log facility for reporting errors + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Jim Garlick and + * Mark Grondona + * CODE-OCEC-09-009. All rights reserved. + * + * This file is part of Slurm, a resource management program. + * For details, see . + * Please also read the included file: DISCLAIMER. + * + * Slurm is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Slurm; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + ***************************************************************************** + * Description: + * + * void *xmalloc(size_t size); + * void xrealloc(void *p, size_t newsize); + * void xfree(void *p); + * int xsize(void *p); + * + * xmalloc(size) allocates size bytes and returns a pointer to the allocated + * memory. The memory is set to zero. xmalloc() will not return unless + * there are no errors. The memory must be freed using xfree(). + * + * xrealloc(p, newsize) changes the size of the block pointed to by p to the + * value of newsize. Newly allocated memory is zeroed. If p is NULL, + * xrealloc() performs the same function as `p = xmalloc(newsize)'. If p + * is not NULL, it is required to have been initialized with a call to + * [try_]xmalloc() or [try_]xrealloc(). + * + * xfree(p) frees the memory block pointed to by p. The memory must have been + * initialized with a call to [try_]xmalloc() or [try_]xrealloc(). + * + * xsize(p) returns the current size of the memory allocation pointed to by + * p. The memory must have been allocated with [try_]xmalloc() or + * [try_]xrealloc(). + * +\*****************************************************************************/ + +#ifndef _XMALLOC_H +#define _XMALLOC_H + +#include +#include + +#define xcalloc(__cnt, __sz) \ + slurm_xcalloc(__cnt, __sz, true, false, __FILE__, __LINE__, __func__) + +#define try_xcalloc(__cnt, __sz) \ + slurm_xcalloc(__cnt, __sz, true, true, __FILE__, __LINE__, __func__) + +#define xcalloc_nz(__cnt, __sz) \ + slurm_xcalloc(__cnt, __sz, false, false, __FILE__, __LINE__, __func__) + +#define xmalloc(__sz) \ + slurm_xcalloc(1, __sz, true, false, __FILE__, __LINE__, __func__) + +#define try_xmalloc(__sz) \ + slurm_xcalloc(1, __sz, true, true, __FILE__, __LINE__, __func__) + +#define xmalloc_nz(__sz) \ + slurm_xcalloc(1, __sz, false, false, __FILE__, __LINE__, __func__) + +#define xfree(__p) slurm_xfree((void **)&(__p)) + +#define xfree_array(__p) slurm_xfree_array((void ***)&(__p)) + +#define xrecalloc(__p, __cnt, __sz) \ + slurm_xrecalloc((void **)&(__p), __cnt, __sz, true, false, __FILE__, __LINE__, __func__) + +#define xrealloc(__p, __sz) \ + slurm_xrecalloc((void **)&(__p), 1, __sz, true, false, __FILE__, __LINE__, __func__) + +#define try_xrealloc(__p, __sz) \ + slurm_xrecalloc((void **)&(__p), 1, __sz, true, true, __FILE__, __LINE__, __func__) + +#define xrealloc_nz(__p, __sz) \ + slurm_xrecalloc((void **)&(__p), 1, __sz, false, false, __FILE__, __LINE__, __func__) + +void *slurm_xcalloc(size_t, size_t, bool, bool, const char *, int, const char *); +void slurm_xfree(void **); +void slurm_xfree_array(void ***); +void *slurm_xrecalloc(void **, size_t, size_t, bool, bool, const char *, int, const char *); + +size_t xsize(void *item); + +void xfree_ptr(void *); + +#endif /* !_XMALLOC_H */ diff --git a/setup.cfg b/setup.cfg index 17a6e9f3..78d52108 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,9 @@ +[options] +packages = find: + +[options.packages.find] +include = pyslurm, pyslurm.* + [bdist_rpm] release = 1 packager = Giovanni Torres diff --git a/setup.py b/setup.py index 796faa6a..7b96fdc8 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,6 @@ url="https://github.com/PySlurm/pyslurm", platforms=["Linux"], keywords=["HPC", "Batch Scheduler", "Resource Manager", "Slurm", "Cython"], - packages=["pyslurm"], classifiers=[ "Development Status :: 5 - Production/Stable", "Environment :: Console", diff --git a/tests/new_api/conftest.py b/tests/new_api/conftest.py new file mode 100644 index 00000000..7b49db69 --- /dev/null +++ b/tests/new_api/conftest.py @@ -0,0 +1,50 @@ +import pytest +from pyslurm import ( + Job, + JobSubmitDescription, +) + + +def create_job_script(): + job_script = """\ +#!/bin/bash + +echo "Got args: $@" + +/usr/bin/env + +sleep 500\ + +""" + return job_script + + +def create_simple_job_desc(script=None, **kwargs): + job = JobSubmitDescription(**kwargs) + + job.name = "test_job" + job.stdout = "/tmp/slurm-test-%j.out" + job.mem_per_cpu = "1G" + job.ntasks = 2 + job.cpus_per_task = 3 + job.script = create_job_script() if not script else script + job.time_limit = "1-00:00:00" + + return job + + +@pytest.fixture +def submit_job(): + + jobs = [] + def _job(script=None, **kwargs): + job_desc = create_simple_job_desc(script, **kwargs) + job = Job(job_desc.submit()) + + jobs.append(job) + return job + + yield _job + + for j in jobs: + j.cancel() diff --git a/tests/new_api/test_common.py b/tests/new_api/test_common.py new file mode 100644 index 00000000..5d502b90 --- /dev/null +++ b/tests/new_api/test_common.py @@ -0,0 +1,330 @@ +"""test_common.py - Test the most commonly used helper functions.""" + +import pyslurm +import pytest +import datetime +from pyslurm import Job, JobSubmitDescription, Node +from pyslurm.core.common.ctime import ( + timestr_to_mins, + timestr_to_secs, + mins_to_timestr, + secs_to_timestr, + date_to_timestamp, + timestamp_to_date, +) +from pyslurm.core.common.uint import ( + u8, + u16, + u32, + u64, + u8_parse, + u16_parse, + u32_parse, + u64_parse, +) +from pyslurm.core.common import ( + uid_to_name, + gid_to_name, + user_to_uid, + group_to_gid, + expand_range_str, + humanize, + dehumanize, + signal_to_num, + cpubind_to_num, + nodelist_from_range_str, + nodelist_to_range_str, +) + + +class TestTypes: + + def test_strings(self): + n = Node() + + n.name = "Testing fmalloc string routines." + assert n.name == "Testing fmalloc string routines." + + n.name = None + assert n.name == None + + # Everything after a \0 will be cut off + n.name = "test1\0test2" + assert n.name == "test1" + + n.name = "\0" + assert n.name == None + + def test_lists(self): + n = Node() + input_as_list = ["test1", "test2", "test3", "test4"] + input_as_str = ",".join(input_as_list) + + n.available_features = input_as_list + assert n.available_features == input_as_list + + n.available_features = input_as_str + assert n.available_features == input_as_list + + n.available_features = [] + assert n.available_features == [] + + n.available_features = "" + assert n.available_features == [] + + n.available_features = None + assert n.available_features == [] + + def test_dicts(self): + js = JobSubmitDescription() + input_as_dict = {"key1": "value1", "key2": "value2"} + input_as_str = "key1=value1,key2=value2" + + js.accounting_gather_freq = input_as_dict + assert js.accounting_gather_freq == input_as_dict + + js.accounting_gather_freq = input_as_str + assert js.accounting_gather_freq == input_as_dict + + js.accounting_gather_freq = {} + assert js.accounting_gather_freq == {} + + js.accounting_gather_freq = "" + assert js.accounting_gather_freq == {} + + js.accounting_gather_freq = None + assert js.accounting_gather_freq == {} + + def _uint_impl(self, func_set, func_get, typ): + val = func_set(2**typ-2) + assert func_get(val) == None + + val = func_set(None) + assert func_get(val) == None + + val = func_set(str(2**typ-2)) + assert func_get(val) == None + + val = func_set("unlimited", inf=True) + assert func_get(val) == "unlimited" + + val = func_set(0) + assert func_get(val) == None + + val = func_set(0, zero_is_noval=False) + assert func_get(val, zero_is_noval=False) == 0 + + with pytest.raises(TypeError, + match="an integer is required"): + val = func_set("unlimited") + + with pytest.raises(OverflowError, + match=r"can't convert negative value to*"): + val = func_set(-1) + + with pytest.raises(OverflowError, + match=r"value too large to convert to*|" + "Python int too large*"): + val = func_set(2**typ) + + def test_u8(self): + self._uint_impl(u8, u8_parse, 8) + + def test_u16(self): + self._uint_impl(u16, u16_parse, 16) + + def test_u32(self): + self._uint_impl(u32, u32_parse, 32) + + def test_u64(self): + self._uint_impl(u64, u64_parse, 64) + + def _uint_bool_impl(self, arg): + js = JobSubmitDescription() + + setattr(js, arg, True) + assert getattr(js, arg) == True + + setattr(js, arg, False) + assert getattr(js, arg) == False + + # Set to true again to make sure toggling actually works. + setattr(js, arg, True) + assert getattr(js, arg) == True + + setattr(js, arg, None) + assert getattr(js, arg) == False + + def test_u8_bool(self): + self._uint_bool_impl("overcommit") + + def test_u16_bool(self): + self._uint_bool_impl("contiguous") + + def test_u64_bool_flag(self): + self._uint_bool_impl("kill_on_invalid_dependency") + + +class TestTime: + + def test_parse_minutes(self): + mins = 60 + mins_str = "01:00:00" + + assert timestr_to_mins(mins_str) == mins + assert timestr_to_mins("unlimited") == 2**32-1 + assert timestr_to_mins(None) == 2**32-2 + + assert mins_to_timestr(mins) == mins_str + assert mins_to_timestr(2**32-1) == "unlimited" + assert mins_to_timestr(2**32-2) == None + assert mins_to_timestr(0) == None + + with pytest.raises(ValueError, + match="Invalid Time Specification: invalid_val."): + timestr_to_mins("invalid_val") + + def test_parse_seconds(self): + secs = 3600 + secs_str = "01:00:00" + + assert timestr_to_secs(secs_str) == secs + assert timestr_to_secs("unlimited") == 2**32-1 + assert timestr_to_secs(None) == 2**32-2 + + assert secs_to_timestr(secs) == secs_str + assert secs_to_timestr(2**32-1) == "unlimited" + assert secs_to_timestr(2**32-2) == None + assert secs_to_timestr(0) == None + + with pytest.raises(ValueError, + match="Invalid Time Specification: invalid_val."): + timestr_to_secs("invalid_val") + + def test_parse_date(self): + timestamp = 1667938097 + date = "2022-11-08T21:08:17" + datetime_date = datetime.datetime(2022, 11, 8, 21, 8, 17) + + assert date_to_timestamp(date) == timestamp + assert date_to_timestamp(timestamp) == timestamp + assert date_to_timestamp(datetime_date) == timestamp + + assert timestamp_to_date(timestamp) == date + assert timestamp_to_date(0) == None + assert timestamp_to_date(2**32-1) == None + assert timestamp_to_date(2**32-2) == None + + with pytest.raises(ValueError, + match="Invalid Time Specification: 2022-11-08T21"): + date_to_timestamp("2022-11-08T21") + +class TestMiscUtil: + + def test_parse_uid(self): + name = uid_to_name(0) + assert name == "root" + + lookup = {0: "root"} + name = uid_to_name(0, lookup=lookup) + assert name == "root" + + uid = user_to_uid("root") + assert uid == 0 + + with pytest.raises(KeyError): + name = uid_to_name(2**32-5) + + with pytest.raises(KeyError): + name = user_to_uid("invalid_user") + + def test_parse_gid(self): + name = gid_to_name(0) + assert name == "root" + + lookup = {0: "root"} + name = gid_to_name(0, lookup=lookup) + assert name == "root" + + gid = group_to_gid("root") + assert gid == 0 + + with pytest.raises(KeyError): + name = gid_to_name(2**32-5) + + with pytest.raises(KeyError): + name = group_to_gid("invalid_group") + + def test_expand_range_str(self): + r = expand_range_str("1-5,6,7,10-11") + assert r == [1, 2, 3, 4, 5, 6, 7, 10, 11] + + def test_humanize(self): + val = humanize(1024) + assert val == "1.0G" + + val = humanize(2**20) + assert val == "1.0T" + + val = humanize(800) + assert val == "800.0M" + + val = humanize("unlimited") + assert val == "unlimited" + + val = humanize(None) + assert val == None + + with pytest.raises(ValueError): + val = humanize("invalid_val") + + def test_dehumanize(self): + # Note: default target unit for dehumanize is "M". + val = dehumanize(1024) + assert val == 1024 + + val = dehumanize("2M") + assert val == 2 + + val = dehumanize("10G") + assert val == 10240 + + val = dehumanize("9.6G") + assert val == round(1024*9.6) + + val = dehumanize("10T") + assert val == 10*(2**20) + + val = dehumanize("10T", target="G") + assert val == 10*(2**10) + + with pytest.raises(ValueError, + match="Invalid value specified: 10L"): + val = dehumanize("10L") + + with pytest.raises(ValueError, + match="could not convert string to float: 'invalid_val'"): + val = dehumanize("invalid_valM") + + def test_signal_to_num(self): + sig = signal_to_num("SIGKILL") + assert sig == 9 + + sig = signal_to_num(7) + assert sig == 7 + + with pytest.raises(ValueError): + sig = signal_to_num("invalid_sig") + + def test_nodelist_from_range_str(self): + nodelist = ["node001", "node007", "node008", "node009"] + nodelist_str = ",".join(nodelist) + assert nodelist == nodelist_from_range_str("node[001,007-009]") + assert nodelist_from_range_str("node[001,007:009]") is None + + def test_nodelist_to_range_str(self): + nodelist = ["node001", "node007", "node008", "node009"] + nodelist_str = ",".join(nodelist) + assert "node[001,007-009]" == nodelist_to_range_str(nodelist) + assert "node[001,007-009]" == nodelist_to_range_str(nodelist_str) + diff --git a/tests/new_api/test_job.py b/tests/new_api/test_job.py new file mode 100644 index 00000000..4056b5f1 --- /dev/null +++ b/tests/new_api/test_job.py @@ -0,0 +1,143 @@ +"""test_job.py - Test the job api functions.""" + +import sys +import time +import pytest +import pyslurm +import tempfile +import os +from os import environ as pyenviron +from conftest import create_simple_job_desc +from pyslurm import ( + Job, + Jobs, + JobSubmitDescription, + RPCError, +) + + +def test_reload(submit_job): + job = submit_job() + jid = job.id + + # Nothing has been loaded at this point, just make sure everything is + # on default values. + assert job.ntasks == 1 + assert job.cpus_per_task == 1 + assert job.time_limit == None + + # Now load the job info + job.reload() + + assert job.id == jid + assert job.ntasks == 2 + assert job.cpus_per_task == 3 + assert job.time_limit == "1-00:00:00" + + with pytest.raises(RPCError): + Job(99999).reload() + + +def test_cancel(submit_job): + job = submit_job() + + job.cancel() + + # make sure the job is actually cancelled + time.sleep(0.5) + assert job.reload().state == "CANCELLED" + + +def test_parse_all(submit_job): + job = submit_job() + + # Use the as_dict() function to test if parsing works for all + # properties on a simple Job without error. + job.reload().as_dict() + + +def test_send_signal(submit_job): + job = submit_job() + + time.sleep(1) + assert job.reload().state == "RUNNING" + + # Send a SIGKILL (basically cancelling the Job) + job.send_signal(9) + + # make sure the job is actually cancelled + time.sleep(1) + assert job.reload().state == "CANCELLED" + + +def test_suspend_unsuspend(submit_job): + job = submit_job() + + time.sleep(1) + job.suspend() + assert job.reload().state == "SUSPENDED" + + job.unsuspend() + # make sure the job is actually running again + time.sleep(1) + assert job.reload().state == "RUNNING" + + +# Don't need to test hold/resume, since it uses just job.modify() to set +# priority to 0/INFINITE. +def test_modify(submit_job): + job = submit_job(priority=0) + job = job.reload() + + changes = JobSubmitDescription( + time_limit = "2-00:00:00", + ntasks = 5, + cpus_per_task = 4, + ) + + job.modify(changes) + job.reload() + + assert job.time_limit == "2-00:00:00" + assert job.ntasks == 5 + assert job.cpus_per_task == 4 + + +def test_requeue(submit_job): + job = submit_job() + job.reload() + + assert job.requeue_count == 0 + + time.sleep(1.5) + job.requeue() + job.reload() + + assert job.requeue_count == 1 + + +def test_notify(submit_job): + job = submit_job() + time.sleep(1) + + # Could check the logfile, but we just assume for now + # that when this function raises no Exception, everything worked. + job.notify("Hello Friends!") + + +def test_get_batch_script(submit_job): + script_body = create_simple_job_desc().script + job = submit_job() + + assert script_body == job.get_batch_script() + + +def test_get_job_queue(submit_job): + # Submit 10 jobs, gather the job_ids in a list + job_list = [submit_job() for i in range(10)] + + jobs = Jobs() + for job in job_list: + # Check to see if all the Jobs we submitted exist + assert job.id in jobs + assert isinstance(jobs[job.id], Job) diff --git a/tests/new_api/test_job_steps.py b/tests/new_api/test_job_steps.py new file mode 100644 index 00000000..1d3fbe75 --- /dev/null +++ b/tests/new_api/test_job_steps.py @@ -0,0 +1,181 @@ +"""test_job_steps.py - Test the job steps api functions.""" + +import pytest +import time +from pyslurm import ( + JobStep, + JobSteps, + RPCError, +) + + +def create_job_script_multi_step(steps=None): + default = f""" + srun -n1 -N1 -c2 \ + -J step_zero --distribution=block:cyclic:block,Pack \ + sleep 300 & + srun -n1 -N1 -c3 \ + -t 10 -J step_one --distribution=block:cyclic:block,Pack \ + sleep 300 &""" + + job_script = f"""\ +#!/bin/bash + +echo "Got args: $@" + +/usr/bin/env + +{default if steps is None else steps} +wait +""" + return job_script + + +def test_reload(submit_job): + job = submit_job(script=create_job_script_multi_step()) + step = JobStep(job, "batch") + + # Nothing has been loaded at this point, just make sure everything is + # on default values. + assert step.name is None + assert step.ntasks is None + assert step.time_limit is None + + # Now load the step info, waiting one second to make sure the Step + # actually exists. + time.sleep(1) + step.reload() + + assert step.id == "batch" + assert step.job_id == job.id + assert step.name == "batch" + # Job was submitted with ntasks=2, but the batch step always has just 1. + assert step.ntasks == 1 + # Job was submitted with a time-limit of 1 day, but it seems this doesn't + # propagate through for the steps if not set explicitly. + assert step.time_limit == "unlimited" + + # Now try to load the first and second Step started by srun + step_zero = JobStep(job, 0).reload() + step_one = JobStep(job, 1).reload() + + # It is possible that the srun executed as the second command will + # become the Step with ID '0' - so we just swap it. + if step_zero.name == "step_one": + tmp = step_zero + step_zero = step_one + step_one = tmp + + assert step_one.id == 0 + assert step_zero.id == 1 + + step = step_zero + assert step.job_id == job.id + assert step.name == "step_zero" + assert step.ntasks == 1 + assert step.alloc_cpus == 2 + assert step.time_limit == "unlimited" + + step = step_one + assert step.job_id == job.id + assert step.name == "step_one" + assert step.ntasks == 1 + assert step.alloc_cpus == 3 + assert step.time_limit == "00:10:00" + + +def test_collection(submit_job): + job = submit_job(script=create_job_script_multi_step()) + + time.sleep(1) + steps = JobSteps(job) + + assert steps != {} + # We have 3 Steps: batch, 0 and 1 + assert len(steps) == 3 + assert ("batch" in steps and + 0 in steps and + 1 in steps) + + +def test_distribution(submit_job): + job = submit_job(script=create_job_script_multi_step()) + step = JobStep(job, 0) + + assert step.distribution is None + + time.sleep(1) + step.reload() + + assert step.distribution == {"nodes": "block" , "sockets": "cyclic", + "cores": "block", "plane": None ,"pack": True} + + +def test_cancel(submit_job): + job = submit_job(script=create_job_script_multi_step()) + + time.sleep(1) + steps = JobSteps(job) + assert len(steps) == 3 + assert ("batch" in steps and + 0 in steps and + 1 in steps) + + steps[0].cancel() + + time.sleep(0.5) + steps = JobSteps(job) + assert len(steps) == 2 + assert ("batch" in steps and + 1 in steps) + + +def test_modify(submit_job): + steps = "srun -t 20 sleep 100" + job = submit_job(script=create_job_script_multi_step(steps)) + + time.sleep(1) + step = JobStep(job, 0).reload() + assert step.time_limit == "00:20:00" + + step.modify(JobStep(time_limit="00:05:00")) + assert step.reload().time_limit == "00:05:00" + + step.modify(time_limit="00:15:00") + assert step.reload().time_limit == "00:15:00" + + +def test_send_signal(submit_job): + steps = "srun -t 10 sleep 100" + job = submit_job(script=create_job_script_multi_step(steps)) + step = JobStep(job, 0) + + time.sleep(1) + assert step.reload().state == "RUNNING" + + # Send a SIGTERM (basically cancelling the Job) + step.send_signal(15) + + # Make sure the job is actually cancelled. + # If a RPCError is raised, this means the Step got cancelled. + time.sleep(1) + with pytest.raises(RPCError): + step.reload() + + +def test_reload_with_wrong_step_id(submit_job): + job = submit_job() + step = JobStep(job, 3) + + with pytest.raises(RPCError): + step.reload() + + +def test_parse_all(submit_job): + job = submit_job() + step = JobStep(job, "batch") + + # Use the as_dict() function to test if parsing works for all + # properties on a simple JobStep without error. + time.sleep(1) + step.reload().as_dict() diff --git a/tests/new_api/test_job_submit.py b/tests/new_api/test_job_submit.py new file mode 100644 index 00000000..ff1d2858 --- /dev/null +++ b/tests/new_api/test_job_submit.py @@ -0,0 +1,306 @@ +"""test_job_submit.py - Test the job submit api functions.""" + +import sys +import time +import pytest +import pyslurm +import tempfile +import os +from os import environ as pyenviron +from conftest import create_simple_job_desc, create_job_script +from pyslurm import ( + Job, + Jobs, + JobSubmitDescription, + RPCError, +) + +def job_desc(**kwargs): + return JobSubmitDescription(script=create_job_script(), **kwargs) + + +def test_environment(): + job = job_desc() + + # Everything in the current environment will be exported + job.environment = "ALL" + job._create_job_submit_desc() + + # Only SLURM_* Vars from the current env will be exported + job.environment = "NONE" + job._create_job_submit_desc() + + # TODO: more test cases + # Test explicitly set vars as dict +# job.environment = { +# "PYSLURM_TEST_VAR_1": 2, +# "PYSLURM_TEST_VAR_2": "test-value", +# } + + +def test_cpu_frequency(): + job = job_desc() + job._create_job_submit_desc() + + job.cpu_freq = "Performance" + job._create_job_submit_desc() + + job.cpu_freq = {"governor": "Performance"} + job._create_job_submit_desc() + + job.cpu_freq = 1000000 + job._create_job_submit_desc() + + job.cpu_freq = {"max": 1000000} + job._create_job_submit_desc() + + job.cpu_freq = "1000000-3700000" + job._create_job_submit_desc() + + job.cpu_freq = {"min": 1000000, "max": 3700000} + job._create_job_submit_desc() + + job.cpu_freq = "1000000-3700000:Performance" + job._create_job_submit_desc() + + job.cpu_freq = {"min": 1000000, "max": 3700000, + "governor": "Performance"} + job._create_job_submit_desc() + + with pytest.raises(ValueError, + match=r"Invalid cpu_freq format*"): + job.cpu_freq = "Performance:3700000" + job._create_job_submit_desc() + + with pytest.raises(ValueError, + match=r"min cpu-freq*"): + job.cpu_freq = "4000000-3700000" + job._create_job_submit_desc() + + with pytest.raises(ValueError, + match=r"Invalid cpu freq value*"): + job.cpu_freq = "3700000:Performance" + job._create_job_submit_desc() + + with pytest.raises(ValueError, + match=r"Setting Governor when specifying*"): + job.cpu_freq = {"max": 3700000, "governor": "Performance"} + job._create_job_submit_desc() + + with pytest.raises(ValueError, + match=r"Setting Governor when specifying*"): + job.cpu_freq = {"min": 3700000, "governor": "Performance"} + job._create_job_submit_desc() + + +def test_nodes(): + job = job_desc() + job._create_job_submit_desc() + + job.nodes = "5" + job._create_job_submit_desc() + + job.nodes = {"min": 5, "max": 5} + job._create_job_submit_desc() + + job.nodes = "5-10" + job._create_job_submit_desc() + + job.nodes = {"min": 5, "max": 10} + job._create_job_submit_desc() + + with pytest.raises(ValueError, + match=r"Max Nodecount cannot be less than*"): + job.nodes = {"min": 10, "max": 5} + job._create_job_submit_desc() + + +def test_script(): + job = job_desc() + script = create_job_script() + job._create_job_submit_desc() + + job.script = script + assert job.script == script + assert job.script_args is None + + # Try passing in a path to a script. + fd, path = tempfile.mkstemp() + try: + with os.fdopen(fd, 'w') as tmp: + tmp.write(script) + + job.script = path + job.script_args = "-t 10 input.csv" + job._create_job_submit_desc() + finally: + os.remove(path) + + with pytest.raises(ValueError, + match=r"Passing arguments to a script*"): + job.script = "#!/bin/bash\nsleep 10" + job.script_args = "-t 10" + job._create_job_submit_desc() + + with pytest.raises(ValueError, + match=r"The Slurm Controller does not allow*"): + job.script = script + "\0" + job.script_args = None + job._create_job_submit_desc() + + with pytest.raises(ValueError, + match="You need to provide a batch script."): + job.script = "" + job.script_args = None + job._create_job_submit_desc() + + with pytest.raises(ValueError, + match=r"Batch script contains DOS line breaks*"): + job.script = script + "\r\n" + job.script_args = None + job._create_job_submit_desc() + + +def test_dependencies(): + job = job_desc() + job._create_job_submit_desc() + + job.dependencies = "after:70:90:60+30,afterok:80" + job._create_job_submit_desc() + + job.dependencies = "after:70:90:60?afterok:80" + job._create_job_submit_desc() + + job.dependencies = { + "afterany": [40, 30, 20], + "afternotok": [100], + "satisfy": "any", + "singleton": True, + } + job._create_job_submit_desc() + + +def test_cpus(): + job = job_desc() + job._create_job_submit_desc() + + job.cpus_per_task = 5 + job._create_job_submit_desc() + + with pytest.raises(ValueError, + match="cpus_per_task and cpus_per_gpu are mutually exclusive."): + job.cpus_per_gpu = 5 + job._create_job_submit_desc() + + job.cpus_per_task = None + job.cpus_per_gpu = 5 + job._create_job_submit_desc() + + with pytest.raises(ValueError, + match="cpus_per_task and cpus_per_gpu are mutually exclusive."): + job.cpus_per_task = 5 + job._create_job_submit_desc() + + +def test_gres_per_node(): + job = job_desc() + job._create_job_submit_desc() + + job.gres_per_node = "gpu:tesla:1,gpu:volta:5" + job._create_job_submit_desc() + + job.gres_per_node = {"gpu:tesla": 1, "gpu:volta": 1} + job._create_job_submit_desc() + + +def test_signal(): + job = job_desc() + job._create_job_submit_desc() + + job.signal = 7 + job._create_job_submit_desc() + + job.signal = {"batch_only": True} + job._create_job_submit_desc() + + job.signal = "7@120" + job._create_job_submit_desc() + + job.signal = "RB:8@180" + job._create_job_submit_desc() + + +def test_distribution(): + job = job_desc() + job._create_job_submit_desc() + + job.distribution = "cyclic:cyclic:cyclic" + job._create_job_submit_desc() + + job.distribution = {"nodes": "cyclic", "sockets": "block", "pack": True} + job._create_job_submit_desc() + + job.distribution = "*:*:fcyclic,NoPack" + job._create_job_submit_desc() + + job.distribution = 10 + job._create_job_submit_desc() + + job.distribution = {"plane": 20} + job._create_job_submit_desc() + + +def test_setting_attrs_with_env_vars(): + pyenviron["PYSLURM_JOBDESC_ACCOUNT"] = "account1" + pyenviron["PYSLURM_JOBDESC_NAME"] = "jobname" + pyenviron["PYSLURM_JOBDESC_WCKEY"] = "wckey" + pyenviron["PYSLURM_JOBDESC_CLUSTERS"] = "cluster1,cluster2" + pyenviron["PYSLURM_JOBDESC_COMMENT"] = "A simple job comment" + pyenviron["PYSLURM_JOBDESC_CONTIGUOUS"] = "True" + pyenviron["PYSLURM_JOBDESC_WORK_DIR"] = "/work/user1" + + job = job_desc(work_dir="/work/user2") + job.load_environment() + + assert job.account == "account1" + assert job.name == "jobname" + assert job.wckey == "wckey" + assert job.clusters == "cluster1,cluster2" + assert job.comment == "A simple job comment" + assert job.work_dir == "/work/user2" + assert job.contiguous == True + job._create_job_submit_desc() + + +def test_parsing_sbatch_options_from_script(): + job = job_desc(work_dir="/work/user2") + + fd, path = tempfile.mkstemp() + try: + with os.fdopen(fd, 'w') as tmp: + tmp.write( + """#!/bin/bash + + #SBATCH --time 20 + #SBATCH --mem-per-cpu =1G + #SBATCH -G 1 + #SBATCH --exclusive + #SBATCH --ntasks = 2 + #SBATCH -c=3 # inline-comments should be ignored + + sleep 1000 + """ + ) + + job.script = path + job.load_sbatch_options() + assert job.time_limit == "20" + assert job.mem_per_cpu == "1G" + assert job.gpus == "1" + assert job.resource_sharing == "no" + assert job.ntasks == "2" + assert job.cpus_per_task == "3" + job._create_job_submit_desc() + finally: + os.remove(path) + From 0017301b853bdfb2149df9f81e3f8c4883c5a437 Mon Sep 17 00:00:00 2001 From: tazend Date: Thu, 23 Feb 2023 20:59:10 +0100 Subject: [PATCH 02/28] Rework the Node-API --- pyslurm/__init__.py | 2 + pyslurm/core/common/__init__.pxd | 3 + pyslurm/core/common/cstr.pxd | 3 +- pyslurm/core/common/cstr.pyx | 17 +- pyslurm/core/error.pyx | 21 +- pyslurm/core/job/job.pxd | 2 +- pyslurm/core/job/util.pyx | 29 -- pyslurm/core/node.pxd | 69 +++ pyslurm/core/node.pyx | 826 +++++++++++++++++++++++++++++++ pyslurm/slurm/extra.pxi | 1 + tests/new_api/test_node.py | 56 +++ 11 files changed, 984 insertions(+), 45 deletions(-) create mode 100644 pyslurm/core/node.pxd create mode 100644 pyslurm/core/node.pyx create mode 100644 tests/new_api/test_node.py diff --git a/pyslurm/__init__.py b/pyslurm/__init__.py index b3bedd61..0181892e 100644 --- a/pyslurm/__init__.py +++ b/pyslurm/__init__.py @@ -24,6 +24,8 @@ JobSubmitDescription, ) +from pyslurm.core.node import Node, Nodes + import pyslurm.core.error from pyslurm.core.error import ( RPCError, diff --git a/pyslurm/core/common/__init__.pxd b/pyslurm/core/common/__init__.pxd index 160345ad..284f5acb 100644 --- a/pyslurm/core/common/__init__.pxd +++ b/pyslurm/core/common/__init__.pxd @@ -17,6 +17,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # +# cython: embedsignature=True # cython: c_string_type=unicode, c_string_encoding=utf8 # cython: language_level=3 @@ -26,3 +27,5 @@ from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t from pyslurm.core.common cimport cstr from libc.stdlib cimport free +cpdef uid_to_name(uint32_t uid, err_on_invalid=*, dict lookup=*) +cpdef gid_to_name(uint32_t gid, err_on_invalid=*, dict lookup=*) diff --git a/pyslurm/core/common/cstr.pxd b/pyslurm/core/common/cstr.pxd index 133edf22..f32c60f6 100644 --- a/pyslurm/core/common/cstr.pxd +++ b/pyslurm/core/common/cstr.pxd @@ -27,10 +27,11 @@ from libc.string cimport memcpy, strlen cdef char *from_unicode(s) cdef to_unicode(char *s, default=*) cdef fmalloc(char **old, val) -cdef fmalloc2(char **old, char **old2, val) +cdef fmalloc2(char **p1, char **p2, val) cdef free_array(char **arr, count) cdef list to_list(char *str_list) cdef from_list(char **old, vals, delim=*) +cdef from_list2(char **p1, char **p2, vals, delim=*) cdef dict to_dict(char *str_dict, str delim1=*, str delim2=*) cdef dict from_dict(char **old, vals, prepend=*, str delim1=*, str delim2=*) cdef to_gres_dict(char *gres) diff --git a/pyslurm/core/common/cstr.pyx b/pyslurm/core/common/cstr.pyx index 0a824f3c..8ea08186 100644 --- a/pyslurm/core/common/cstr.pyx +++ b/pyslurm/core/common/cstr.pyx @@ -51,14 +51,10 @@ cdef inline to_unicode(char *_str, default=None): return default -cdef fmalloc2(char **old, char **old2, val): - """Like fmalloc, but copies the value to 2 char pointers. - - Memory will only be allocated once. - "old" and "old2" will both share this same pointer. - """ - fmalloc(old, val) - old2[0] = old[0] +cdef fmalloc2(char **p1, char **p2, val): + """Like fmalloc, but copies the value to 2 char pointers.""" + fmalloc(p1, val) + fmalloc(p2, val) cdef fmalloc(char **old, val): @@ -129,6 +125,11 @@ cdef from_list(char **old, vals, delim=","): fmalloc(old, final) +cdef from_list2(char **p1, char **p2, vals, delim=","): + from_list(p1, vals, delim) + from_list(p2, vals, delim) + + cdef dict to_dict(char *str_dict, str delim1=",", str delim2="="): """Convert a char* key=value pair to dict. diff --git a/pyslurm/core/error.pyx b/pyslurm/core/error.pyx index 72ec8389..649d1c55 100644 --- a/pyslurm/core/error.pyx +++ b/pyslurm/core/error.pyx @@ -29,8 +29,9 @@ def slurm_strerror(errno): """Convert a slurm errno to a string. Args: - errno (int): The error number for which the string representation - should be returned. + errno (int): + The error number for which the string representation should be + returned. Returns: str: String representation of errno. @@ -65,10 +66,12 @@ class RPCError(Exception): """Exception for handling Slurm RPC errors. Args: - errno (int): A slurm error number returned by RPC functions. Default - is None, which will get the last slurm error automatically. - msg (str): An optional, custom error description. If this is set, the - errno will not be translated to its string representation. + errno (int): + A slurm error number returned by RPC functions. Default is None, + which will get the last slurm error automatically. + msg (str): + An optional, custom error description. If this is set, the errno + will not be translated to its string representation. """ def __init__(self, errno=slurm.SLURM_ERROR, msg=None): self.msg = msg @@ -84,5 +87,11 @@ class RPCError(Exception): def verify_rpc(errno): + """Verify a Slurm RPC + + Args: + errno (int): + A Slurm error value + """ if errno != slurm.SLURM_SUCCESS: raise RPCError(errno) diff --git a/pyslurm/core/job/job.pxd b/pyslurm/core/job/job.pxd index bb9dde6c..e9bd9867 100644 --- a/pyslurm/core/job/job.pxd +++ b/pyslurm/core/job/job.pxd @@ -81,7 +81,7 @@ cdef class Job: dict groups cdef alloc(self) - cdef time_t _calc_run_time(self) + cdef _calc_run_time(self) @staticmethod cdef Job from_ptr(slurm_job_info_t *in_ptr) diff --git a/pyslurm/core/job/util.pyx b/pyslurm/core/job/util.pyx index fd34f6c7..f7a95892 100644 --- a/pyslurm/core/job/util.pyx +++ b/pyslurm/core/job/util.pyx @@ -585,32 +585,3 @@ def cpufreq_to_str(freq): return freq -def make_gres_str(vals, typ=""): - final = [] - gres_dict = vals - - if not vals: - return None - - if isinstance(vals, str) and not vals.isdigit(): - gres_dict = {} - - gres_list = vals.replace("gres:", "") - for gres_str in gres_list.split(","): - gres_and_type, cnt = gres_str.rsplit(":", 1) - gres_dict.update({gres_and_type: int(cnt)}) - elif isinstance(vals, dict): - for gres_and_type, cnt in gres_dict.items(): - # Error immediately on specifications that contain more than one - # semicolon, as it is wrong. - if len(gres_and_type.split(":")) > 2: - raise ValueError(f"Invalid specifier: '{gres_and_type}'") - - if typ not in gres_and_type: - gres_and_type = f"{gres_and_type}:{typ}" - - final.append(f"gres:{gres_and_type}:{int(cnt)}") - else: - return f"gres:{typ}:{int(vals)}" - - return ",".join(final) diff --git a/pyslurm/core/node.pxd b/pyslurm/core/node.pxd new file mode 100644 index 00000000..2f2a32ec --- /dev/null +++ b/pyslurm/core/node.pxd @@ -0,0 +1,69 @@ +######################################################################### +# node.pxd - interface to work with nodes in slurm +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: embedsignature=True +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from libc.string cimport memcpy, memset +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + node_info_t, + node_info_msg_t, + update_node_msg_t, + partition_info_msg_t, + slurm_load_node, + slurm_load_node_single, + slurm_update_node, + slurm_delete_node, + slurm_create_node, + slurm_load_partitions, + slurm_free_update_node_msg, + slurm_init_update_node_msg, + slurm_populate_node_partitions, + slurm_free_node_info_msg, + slurm_free_node_info_members, + slurm_free_update_node_msg, + slurm_free_partition_info_msg, + slurm_get_select_nodeinfo, + slurm_sprint_cpu_bind_type, + slurm_node_state_string_complete, + slurm_node_state_string, + cpu_bind_type_t, +) + + +cdef class Nodes(dict): + + cdef: + node_info_msg_t *info + partition_info_msg_t *part_info + node_info_t tmp_info + + +cdef class Node: + cdef: + node_info_t *info + update_node_msg_t *umsg + dict passwd + dict groups + + @staticmethod + cdef Node from_ptr(node_info_t *in_ptr) + diff --git a/pyslurm/core/node.pyx b/pyslurm/core/node.pyx new file mode 100644 index 00000000..63254ec0 --- /dev/null +++ b/pyslurm/core/node.pyx @@ -0,0 +1,826 @@ +######################################################################### +# node.pyx - interface to work with nodes in slurm +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: embedsignature=True +# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: language_level=3 + +from pyslurm.slurm cimport xfree, try_xmalloc +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t +from pyslurm.core.common cimport cstr +from pyslurm.core.common import cstr +from pyslurm.core.common cimport ctime +from pyslurm.core.common import ctime +from pyslurm.core.common.ctime cimport time_t +from pyslurm.core.common.uint cimport * +from pyslurm.core.common.uint import * +from pyslurm.core.error import RPCError, verify_rpc +from pyslurm.core.common.ctime import timestamp_to_date, _raw_time +from pyslurm.core.common import ( + uid_to_name, + gid_to_name, + humanize, + _getgrall_to_dict, + _getpwall_to_dict, + cpubind_to_num, + instance_to_dict, + _sum_prop, +) + + +cdef class Nodes(dict): + """A collection of Node objects. + + By creating a new Nodes instance, all Nodes in the system will be + fetched from the slurmctld. + + Args: + preload_passwd_info (bool): + Decides whether to query passwd and groups information from the + system. + Could potentially speed up access to attributes of the Node where + a UID/GID is translated to a name. + If True, the information will fetched and stored in each of the + Node instances. The default is False. + + Raises: + RPCError: When getting all the Nodes from the slurmctld failed. + MemoryError: If malloc fails to allocate memory. + """ + def __dealloc__(self): + slurm_free_node_info_msg(self.info) + slurm_free_partition_info_msg(self.part_info) + + def __init__(self, preload_passwd_info=False): + cdef: + dict passwd = {} + dict groups = {} + int flags = slurm.SHOW_ALL + Node node + + self.info = NULL + self.part_info = NULL + + # If requested, preload the passwd and groups database to potentially + # speedup lookups for an attribute in a node, e.g "owner". + if preload_passwd_info: + passwd = _getpwall_to_dict() + groups = _getgrall_to_dict() + + verify_rpc(slurm_load_node(0, &self.info, slurm.SHOW_ALL)) + verify_rpc(slurm_load_partitions(0, &self.part_info, slurm.SHOW_ALL)) + slurm_populate_node_partitions(self.info, self.part_info) + + # zero-out a dummy node_info_t + memset(&self.tmp_info, 0, sizeof(node_info_t)) + + # Put each node pointer into its own "Node" instance. + for cnt in range(self.info.record_count): + node = Node.from_ptr(&self.info.node_array[cnt]) + + # Prevent double free if xmalloc fails mid-loop and a MemoryError + # is raised by replacing it with a zeroed-out node_info_t. + self.info.node_array[cnt] = self.tmp_info + + if preload_passwd_info: + node.passwd = passwd + node.groups = groups + + self[node.name] = node + + # At this point we memcpy'd all the memory for the Nodes. Setting this + # to 0 will prevent the slurm node free function to deallocate the + # memory for the individual nodes. This should be fine, because they + # are free'd automatically in __dealloc__ since the lifetime of each + # node-pointer is tied to the lifetime of its corresponding "Node" + # instance. + self.info.record_count = 0 + + def as_list(self): + """Format the information as list of Node objects. + + Returns: + list: List of Node objects + """ + return list(self.values()) + + @property + def free_memory_raw(self): + """int: Amount of free memory in this node collection. (Mebibytes)""" + return _sum_prop(self, Node.free_memory) + + @property + def free_memory(self): + """str: Humanized amount of free memory in this node collection.""" + return humanize(self.free_memory_raw, 2) + + @property + def real_memory_raw(self): + """int: Amount of real memory in this node collection. (Mebibytes)""" + return _sum_prop(self, Node.real_memory) + + @property + def real_memory(self): + """str: Humanized amount of real memory in this node collection.""" + return humanize(self.real_memory_raw, 2) + + @property + def alloc_memory_raw(self): + """int: Amount of alloc Memory in this node collection. (Mebibytes)""" + return _sum_prop(self, Node.alloc_memory) + + @property + def alloc_memory(self): + """str: Total amount of allocated Memory in this node collection.""" + return humanize(self.alloc_memory_raw, 2) + + @property + def total_cpus(self): + """int: Total amount of CPUs in this node collection.""" + return _sum_prop(self, Node.total_cpus) + + @property + def idle_cpus(self): + """int: Total amount of idle CPUs in this node collection.""" + return _sum_prop(self, Node.idle_cpus) + + @property + def alloc_cpus(self): + """int: Total amount of allocated CPUs in this node collection.""" + return _sum_prop(self, Node.alloc_cpus) + + @property + def effective_cpus(self): + """int: Total amount of effective CPUs in this node collection.""" + return _sum_prop(self, Node.effective_cpus) + + @property + def current_watts(self): + """int: Total amount of Watts consumed in this node collection.""" + return _sum_prop(self, Node.current_watts) + + @property + def average_watts(self): + """int: Amount of average watts consumed in this node collection.""" + return _sum_prop(self, Node.average_watts) + + +cdef class Node: + """A Slurm node.""" + + def __cinit__(self): + self.info = NULL + self.umsg = NULL + + def __init__(self, str name=None, **kwargs): + """Initialize a Node instance + + Args: + name (str): + Name of a node + **kwargs: + Any writable property. Writable attributes include: + * name + * configured_gres + * address + * hostname + * extra + * comment + * weight + * available_features + * active_features + * cpu_binding + * state + """ + self._alloc_impl() + self.name = name + for k, v in kwargs.items(): + setattr(self, k, v) + + def _alloc_impl(self): + self._alloc_info() + self._alloc_umsg() + + def _alloc_info(self): + if not self.info: + self.info = try_xmalloc(sizeof(node_info_t)) + if not self.info: + raise MemoryError("xmalloc failed for node_info_t") + + def _alloc_umsg(self): + if not self.umsg: + self.umsg = try_xmalloc(sizeof(update_node_msg_t)) + if not self.umsg: + raise MemoryError("xmalloc failed for update_node_msg_t") + slurm_init_update_node_msg(self.umsg) + + def _dealloc_impl(self): + slurm_free_update_node_msg(self.umsg) + self.umsg = NULL + slurm_free_node_info_members(self.info) + xfree(self.info) + + def __dealloc__(self): + self._dealloc_impl() + + def __setattr__(self, name, val): + # When a user wants to set attributes on a Node instance that was + # created by calling Nodes(), the "umsg" pointer is not yet allocated. + # We only allocate memory for it by the time the user actually wants + # to modify something. + self._alloc_umsg() + # Call descriptors __set__ directly + Node.__dict__[name].__set__(self, val) + + def __eq__(self, other): + return isinstance(other, Node) and self.name == other.name + + @staticmethod + cdef Node from_ptr(node_info_t *in_ptr): + cdef Node wrap = Node.__new__(Node) + wrap._alloc_info() + wrap.passwd = {} + wrap.groups = {} + memcpy(wrap.info, in_ptr, sizeof(node_info_t)) + return wrap + + def reload(self): + """(Re)load information for a node. + + Implements the slurm_load_node_single RPC. + + Note: + You can call this function repeatedly to refresh the information + of an instance. Using the Node object returned is optional. + + Returns: + Node: This function returns the current Node-instance object + itself. + + Raises: + RPCError: If requesting the Node information from the slurmctld + was not successful. + MemoryError: If malloc failed to allocate memory. + + Examples: + >>> from pyslurm import Node + >>> node = Node("localhost") + >>> node.reload() + >>> + >>> # You can also write this in one-line: + >>> node = Node("localhost").reload() + """ + cdef: + node_info_msg_t *node_info = NULL + partition_info_msg_t *part_info = NULL + + if not self.name: + raise ValueError("You need to set a node name first") + + try: + verify_rpc(slurm_load_node_single(&node_info, + self.name, slurm.SHOW_ALL)) + verify_rpc(slurm_load_partitions(0, &part_info, slurm.SHOW_ALL)) + slurm_populate_node_partitions(node_info, part_info) + + save_name = self.name + if node_info and node_info.record_count: + # Cleanup the old info. + self._dealloc_impl() + # Copy new info + self._alloc_impl() + memcpy(self.info, &node_info.node_array[0], sizeof(node_info_t)) + node_info.record_count = 0 + + # Need to do this, because while testing even when specifying + # a node name that doesn't exist, it still returned the + # "localhost" node in my Test-setup. Why? + if self.name != save_name: + raise RPCError(msg=f"Node '{save_name}' does not exist") + except Exception as e: + raise e + finally: + slurm_free_node_info_msg(node_info) + slurm_free_partition_info_msg(part_info) + + return self + + def create(self, state="future"): + """Create a node. + + Implements the slurm_create_node RPC. + + Args: + future (str, optional): + An optional state the created Node should have. Allowed values + are "future" and "cloud". "future" is the default. + + Returns: + Node: This function returns the current Node-instance object + itself. + + Raises: + RPCError: If creating the Node was not successful. + MemoryError: If malloc failed to allocate memory. + + Examples: + >>> from pyslurm import Node + >>> node = Node("testnode").create() + """ + if not self.name: + raise ValueError("You need to set a node name first.") + + self._alloc_umsg() + cstr.fmalloc(&self.umsg.extra, + f"NodeName={self.name} State={state}") + verify_rpc(slurm_create_node(self.umsg)) + + return self + + def modify(self, node=None, **kwargs): + """Modify a node. + + Implements the slurm_update_node RPC. + + Args: + node (JobStep): + Another Node object which contains all the changes that + should be applied to this instance. + **kwargs: + You can also specify all the changes as keyword arguments. + Allowed values are only attributes which can actually be set + on a Node instance. If a node is explicitly specified as + parameter, all **kwargs will be ignored. + + Raises: + RPCError: When updating the Node was not successful. + + Examples: + >>> from pyslurm import Node + >>> + >>> # Setting a new weight for the Node + >>> changes = Node(weight=100) + >>> Node("localhost").modify(changes) + >>> + >>> # Or by specifying the changes directly to the modify function + >>> Node("localhost").modify(weight=100) + """ + cdef Node n = self + + # Allow the user to both specify changes via a Node instance or + # **kwargs. + if node and isinstance(node, Node): + n = node + elif kwargs: + n = Node(**kwargs) + + n._alloc_umsg() + cstr.fmalloc(&n.umsg.node_names, self.name) + verify_rpc(slurm_update_node(n.umsg)) + + def delete(self): + """Delete a node. + + Implements the slurm_delete_node RPC. + + Raises: + RPCError: If deleting the Node was not successful. + MemoryError: If malloc failed to allocate memory. + + Examples: + >>> from pyslurm import Node + >>> Node("localhost").delete() + """ + self._alloc_umsg() + verify_rpc(slurm_delete_node(self.umsg)) + + def as_dict(self): + """Node information formatted as a dictionary. + + Returns: + dict: Node information as dict + """ + return instance_to_dict(self) + + @property + def name(self): + """str: Name of the node.""" + return cstr.to_unicode(self.info.name) + + @name.setter + def name(self, val): + cstr.fmalloc2(&self.info.name, &self.umsg.node_names, val) + + @property + def architecture(self): + """str: Architecture of the node (e.g. x86_64)""" + return cstr.to_unicode(self.info.arch) + + @property + def configured_gres(self): + """dict: Generic Resources this Node is configured with.""" + return cstr.to_gres_dict(self.info.gres) + + @configured_gres.setter + def configured_gres(self, val): + cstr.fmalloc2(&self.info.gres, &self.umsg.gres, + cstr.from_gres_dict(val)) + + @property + def owner(self): + """str: User that owns the Node.""" + return uid_to_name(self.info.owner, lookup=self.passwd) + + @property + def address(self): + """str: Address of the node.""" + return cstr.to_unicode(self.info.node_addr) + + @address.setter + def address(self, val): + cstr.fmalloc2(&self.info.node_addr, &self.umsg.node_addr, val) + + @property + def hostname(self): + """str: Hostname of the node.""" + return cstr.to_unicode(self.info.node_hostname) + + @hostname.setter + def hostname(self, val): + cstr.fmalloc2(&self.info.node_hostname, &self.umsg.node_hostname, val) + + @property + def extra(self): + """str: Arbitrary string attached to the Node.""" + return cstr.to_unicode(self.info.extra) + + @extra.setter + def extra(self, val): + cstr.fmalloc2(&self.info.extra, &self.umsg.extra, val) + + @property + def reason(self): + """str: Reason why this node is in its current state.""" + return cstr.to_unicode(self.info.reason) + + @property + def reason_user(self): + """str: Name of the User who set the reason.""" + return uid_to_name(self.info.reason_uid, lookup=self.passwd) + + @property + def comment(self): + """str: Arbitrary node comment.""" + return cstr.to_unicode(self.info.comment) + + @comment.setter + def comment(self, val): + cstr.fmalloc2(&self.info.comment, &self.umsg.comment, val) + + @property + def bcast_address(self): + """str: Address of the node for sbcast.""" + return cstr.to_unicode(self.info.bcast_address) + + @property + def slurm_version(self): + """str: Version of slurm this node is running on.""" + return cstr.to_unicode(self.info.version) + + @property + def operating_system(self): + """str: Name of the operating system installed.""" + return cstr.to_unicode(self.info.os) + + @property + def alloc_gres(self): + """dict: Generic Resources currently in use on the node.""" + return cstr.to_gres_dict(self.info.gres_used) + + @property + def mcs_label(self): + """str: MCS label for the node.""" + return cstr.to_unicode(self.info.mcs_label) + + @property + def alloc_memory_raw(self): + """int: Memory allocated on the node. (Mebibytes)""" + cdef uint64_t alloc_memory = 0 + if self.info.select_nodeinfo: + slurm_get_select_nodeinfo( + self.info.select_nodeinfo, + slurm.SELECT_NODEDATA_MEM_ALLOC, + slurm.NODE_STATE_ALLOCATED, + &alloc_memory) + return u64_parse(alloc_memory) + + @property + def alloc_memory(self): + """str: Memory allocated on the node.""" + return humanize(self.alloc_memory_raw, 2) + + @property + def real_memory_raw(self): + """int: Real Memory configured for this node. (Mebibytes)""" + return u64_parse(self.info.real_memory) + + @property + def real_memory(self): + """str: Humanized Real Memory configured for this node.""" + return humanize(self.real_memory_raw, 2) + + @property + def free_memory_raw(self): + """int: Free Memory on the node. (Mebibytes)""" + return u64_parse(self.info.free_mem) + + @property + def free_memory(self): + """str: Humanized Free Memory on the node.""" + return humanize(self.free_memory_raw, 2) + + @property + def memory_reserved_for_system_raw(self): + """int: Memory reserved for the System not usable by Jobs.""" + return u64_parse(self.info.mem_spec_limit) + + @property + def memory_reserved_for_system(self): + """str: Memory reserved for the System not usable by Jobs.""" + return humanize(self.memory_reserved_for_system_raw, 2) + + @property + def tmp_disk_space_raw(self): + """int: Amount of temporary disk space this node has. (Mebibytes)""" + return u32_parse(self.info.tmp_disk) + + @property + def tmp_disk_space(self): + """str: Amount of temporary disk space this node has.""" + return humanize(self.tmp_disk_space_raw) + + @property + def weight(self): + """int: Weight of the node in scheduling.""" + return u32_parse(self.info.weight) + + @weight.setter + def weight(self, val): + self.info.weight=self.umsg.weight = u32(val) + + @property + def effective_cpus(self): + """int: Number of effective CPUs the node has.""" + return u16_parse(self.info.cpus_efctv) + + @property + def total_cpus(self): + """int: Total amount of CPUs the node has.""" + return u16_parse(self.info.cpus) + + @property + def sockets(self): + """int: Number of sockets the node has.""" + return u16_parse(self.info.sockets) + + @property + def cores_reserved_for_system(self): + """int: Number of cores reserved for the System not usable by Jobs.""" + return u16_parse(self.info.core_spec_cnt) + + @property + def boards(self): + """int: Number of boards the node has.""" + return u16_parse(self.info.boards) + + @property + def cores_per_socket(self): + """int: Number of cores per socket configured for the node.""" + return u16_parse(self.info.cores) + + @property + def threads_per_core(self): + """int: Number of threads per core configured for the node.""" + return u16_parse(self.info.threads) + + @property + def available_features(self): + """list: List of features available on the node.""" + return cstr.to_list(self.info.features) + + @available_features.setter + def available_features(self, val): + cstr.from_list2(&self.info.features, &self.umsg.features, val) + + @property + def active_features(self): + """list: List of features on the node.""" + return cstr.to_list(self.info.features_act) + + @active_features.setter + def active_features(self, val): + cstr.from_list2(&self.info.features_act, &self.umsg.features_act, val) + + @property + def partitions(self): + """list: List of partitions this Node is in.""" + return cstr.to_list(self.info.partitions) + + @property + def boot_time_raw(self): + """int: Time the node has booted. (Unix timestamp)""" + return _raw_time(self.info.boot_time) + + @property + def boot_time(self): + """str: Time the node has booted. (formatted)""" + return timestamp_to_date(self.info.boot_time) + + @property + def slurmd_start_time_raw(self): + """int: Time the slurmd has started on the Node. (Unix timestamp)""" + return _raw_time(self.info.slurmd_start_time) + + @property + def slurmd_start_time(self): + """str: Time the slurmd has started on the Node. (formatted)""" + return timestamp_to_date(self.info.slurmd_start_time) + + @property + def last_busy_time_raw(self): + """int: Time this node was last busy. (Unix timestamp)""" + return _raw_time(self.info.last_busy) + + @property + def last_busy_time(self): + """str: Time this node was last busy. (formatted)""" + return timestamp_to_date(self.info.last_busy) + + @property + def reason_time_raw(self): + """int: Time the reason was set for the node. (Unix timestamp)""" + return _raw_time(self.info.reason_time) + + @property + def reason_time(self): + """str: Time the reason was set for the node. (formatted)""" + return timestamp_to_date(self.info.reason_time) + +# @property +# def tres_configured(self): +# """dict: TRES that are configured on the node.""" +# return cstr.to_dict(self.info.tres_fmt_str) + +# @property +# def tres_alloc(self): +# cdef char *alloc_tres = NULL +# if self.info.select_nodeinfo: +# slurm_get_select_nodeinfo( +# self.info.select_nodeinfo, +# slurm.SELECT_NODEDATA_TRES_ALLOC_FMT_STR, +# slurm.NODE_STATE_ALLOCATED, +# &alloc_tres +# ) +# return cstr.to_gres_dict(alloc_tres) + + @property + def alloc_cpus(self): + """int: Number of allocated CPUs on the node.""" + cdef uint16_t alloc_cpus = 0 + if self.info.select_nodeinfo: + slurm_get_select_nodeinfo( + self.info.select_nodeinfo, + slurm.SELECT_NODEDATA_SUBCNT, + slurm.NODE_STATE_ALLOCATED, + &alloc_cpus + ) + return alloc_cpus + + @property + def idle_cpus(self): + """int: Number of idle CPUs.""" + efctv = self.effective_cpus + if not efctv: + return None + + return efctv - self.alloc_cpus + + @property + def cpu_binding(self): + """str: Default CPU-Binding on the node.""" + cdef char cpu_bind[128] + slurm_sprint_cpu_bind_type(cpu_bind, + self.info.cpu_bind) + if cpu_bind == "(null type)": + return None + + return cstr.to_unicode(cpu_bind) + + @cpu_binding.setter + def cpu_binding(self, val): + self.info.cpu_bind=self.umsg.cpu_bind = cpubind_to_num(val) + + @property + def cap_watts(self): + """int: Node cap watts.""" + if not self.info.power: + return None + return u32_parse(self.info.power.cap_watts) + + @property + def current_watts(self): + """int: Current amount of watts consumed on the node.""" + if not self.info.energy: + return None + return u32_parse(self.info.energy.current_watts) + + @property + def average_watts(self): + """int: Average amount of watts consumed on the node.""" + if not self.info.energy: + return None + return u32_parse(self.info.energy.ave_watts) + + @property + def external_sensors(self): + """ + dict: External Sensor info for the Node. + + The dict returned contains the following information: + * joules_total (int) + * current_watts (int) + * temperature (int) + """ + if not self.info.ext_sensors: + return {} + + return { + "joules_total": u64_parse(self.info.ext_sensors.consumed_energy), + "current_watts": u32_parse(self.info.ext_sensors.current_watts), + "temperature": u32_parse(self.info.ext_sensors.temperature) + } + + @property + def state(self): + """str: State the node is currently in.""" + cdef char* state = slurm_node_state_string_complete( + self.info.node_state) + state_str = cstr.to_unicode(state) + xfree(state) + return state_str + + @property + def next_state(self): + """str: Next state the node will be in.""" + if ((self.info.next_state != slurm.NO_VAL) + and (self.info.node_state & slurm.NODE_STATE_REBOOT_REQUESTED + or self.info.node_state & slurm.NODE_STATE_REBOOT_ISSUED)): + return cstr.to_unicode( + slurm_node_state_string(self.info.next_state)) + else: + return None + + @state.setter + def state(self, val): + self.umsg.node_state=self.info.node_state = _node_state_from_str(val) + + @property + def cpu_load(self): + """float: CPU Load on the Node.""" + load = u32_parse(self.info.cpu_load) + return load / 100.0 if load is not None else None + + @property + def port(self): + """int: Port the slurmd is listening on the node.""" + return u16_parse(self.info.port) + + +def _node_state_from_str(state, err_on_invalid=True): + if not state: + return slurm.NO_VAL + + for i in range(slurm.NODE_STATE_END): + if state == slurm_node_state_string(i): + return i + + if err_on_invalid: + raise ValueError(f"Invalid Node state: {state}") + else: + return slurm.NO_VAL diff --git a/pyslurm/slurm/extra.pxi b/pyslurm/slurm/extra.pxi index 0c0f11ed..c9294339 100644 --- a/pyslurm/slurm/extra.pxi +++ b/pyslurm/slurm/extra.pxi @@ -183,6 +183,7 @@ cdef extern void slurm_free_update_step_msg(step_update_request_msg_t *msg) cdef extern int slurm_get_select_nodeinfo(dynamic_plugin_data_t *nodeinfo, select_nodedata_type data_type, node_states state, void *data) cdef extern char *slurm_node_state_string_complete(uint32_t inx) cdef extern void slurm_free_update_node_msg(update_node_msg_t *msg) +cdef extern void slurm_free_node_info_members(node_info_t *node) # # Slurm environment functions diff --git a/tests/new_api/test_node.py b/tests/new_api/test_node.py new file mode 100644 index 00000000..d3e81481 --- /dev/null +++ b/tests/new_api/test_node.py @@ -0,0 +1,56 @@ +"""test_node.py - Test the node api functions.""" + +import sys +import time +import pytest +import pyslurm +import os +from pyslurm import Node, Nodes, RPCError + + +def test_reload(): + node = Node(Nodes().as_list()[0].name) + + # Nothing has been loaded at this point, just make sure everything is + # on default values. + assert node.weight is None + assert node.slurm_version is None + # Now load the node info + node.reload() + assert node.name == "localhost" + assert node.weight is not None + assert node.slurm_version is not None + + with pytest.raises(RPCError, + match=f"Node 'nonexistent' does not exist"): + Node("nonexistent").reload() + + +def test_create(): + node = Node("testhostpyslurm") + node.create() + + with pytest.raises(RPCError, + match=f"Invalid node state specified"): + Node("testhostpyslurm2").create("idle") + + +# def test_delete(): +# node = Node("testhost1").delete() + + +def test_modify(): + node = Node(Nodes().as_list()[0].name) + + node.modify(weight=10000) + assert node.reload().weight == 10000 + + node.modify(Node(weight=20000)) + assert node.reload().weight == 20000 + + node.modify(Node(weight=5000)) + assert node.reload().weight == 5000 + + +def test_parse_all(): + Node(Nodes().as_list()[0].name).reload().as_dict() From fa04ffa8c01516839ebfc9514c2e6bc034e042b7 Mon Sep 17 00:00:00 2001 From: tazend Date: Sat, 4 Mar 2023 17:42:40 +0100 Subject: [PATCH 03/28] Add valgrind suppression file --- valgrind-pyslurm.supp | 544 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 544 insertions(+) create mode 100644 valgrind-pyslurm.supp diff --git a/valgrind-pyslurm.supp b/valgrind-pyslurm.supp new file mode 100644 index 00000000..d7243f44 --- /dev/null +++ b/valgrind-pyslurm.supp @@ -0,0 +1,544 @@ +# Initial suppression file taken from here: +# https://github.com/python/cpython/blob/77a3196b7cc17d90a8aae5629aa71ff183b9266a/Misc/valgrind-python.supp +# Extended with Slurm specific suppressions + +{ + Python _PyFunction_Vectorcall + Memcheck:Leak + match-leak-kinds: possible + fun:malloc + fun:_PyObject_GC_NewVar + obj:/usr/bin/python3.10 + fun:_PyFunction_Vectorcall + fun:_PyEval_EvalFrameDefault + fun:_PyFunction_Vectorcall + fun:_PyEval_EvalFrameDefault + fun:_PyFunction_Vectorcall + fun:_PyEval_EvalFrameDefault + fun:_PyFunction_Vectorcall + fun:_PyEval_EvalFrameDefault + fun:_PyFunction_Vectorcall +} + +### +### IGNORE POSSIBLE LEAKS CAUSED BY SOME INIT FUNCTIONS IN libslurm +### + +{ + Slurm select_g_init + Memcheck:Leak + match-leak-kinds: possible + ... + fun:select_g_init + ... +} + +{ + Slurm slurm_auth_init + Memcheck:Leak + match-leak-kinds: possible + ... + fun:slurm_auth_init + ... +} + +{ + Slurm slurm_conf_init/slurm_init + Memcheck:Leak + match-leak-kinds: possible + ... + fun:slurm_conf_init + fun:slurm_init + ... +} + +{ + Slurm hash_g_init + Memcheck:Leak + match-leak-kinds: possible + ... + fun:hash_g_init + ... +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Addr4 + fun:address_in_range +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Value4 + fun:address_in_range +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 8 (x86_64 aka amd64) + Memcheck:Value8 + fun:address_in_range +} + +{ + ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value + Memcheck:Cond + fun:address_in_range +} + +# +# Leaks (including possible leaks) +# Hmmm, I wonder if this masks some real leaks. I think it does. +# Will need to fix that. +# + +{ + Suppress leaking the GIL. Happens once per process, see comment in ceval.c. + Memcheck:Leak + fun:malloc + fun:PyThread_allocate_lock + fun:PyEval_InitThreads +} + +{ + Suppress leaking the GIL after a fork. + Memcheck:Leak + fun:malloc + fun:PyThread_allocate_lock + fun:PyEval_ReInitThreads +} + +{ + Suppress leaking the autoTLSkey. This looks like it shouldn't leak though. + Memcheck:Leak + fun:malloc + fun:PyThread_create_key + fun:_PyGILState_Init + fun:Py_InitializeEx + fun:Py_Main +} + +{ + Hmmm, is this a real leak or like the GIL? + Memcheck:Leak + fun:malloc + fun:PyThread_ReInitTLS +} + +{ + Handle PyMalloc confusing valgrind (possibly leaked) + Memcheck:Leak + fun:realloc + fun:_PyObject_GC_Resize + fun:COMMENT_THIS_LINE_TO_DISABLE_LEAK_WARNING +} + +{ + Handle PyMalloc confusing valgrind (possibly leaked) + Memcheck:Leak + fun:malloc + fun:_PyObject_GC_New + fun:COMMENT_THIS_LINE_TO_DISABLE_LEAK_WARNING +} + +{ + Handle PyMalloc confusing valgrind (possibly leaked) + Memcheck:Leak + fun:malloc + fun:_PyObject_GC_NewVar + fun:COMMENT_THIS_LINE_TO_DISABLE_LEAK_WARNING +} + +# +# Non-python specific leaks +# + +{ + Handle pthread issue (possibly leaked) + Memcheck:Leak + fun:calloc + fun:allocate_dtv + fun:_dl_allocate_tls_storage + fun:_dl_allocate_tls +} + +{ + Handle pthread issue (possibly leaked) + Memcheck:Leak + fun:memalign + fun:_dl_allocate_tls_storage + fun:_dl_allocate_tls +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Addr4 + fun:_PyObject_Free +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Value4 + fun:_PyObject_Free +} + +{ + ADDRESS_IN_RANGE/Use of uninitialised value of size 8 + Memcheck:Addr8 + fun:_PyObject_Free +} + +{ + ADDRESS_IN_RANGE/Use of uninitialised value of size 8 + Memcheck:Value8 + fun:_PyObject_Free +} + +{ + ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value + Memcheck:Cond + fun:_PyObject_Free +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Addr4 + fun:_PyObject_Realloc +} + +{ + ADDRESS_IN_RANGE/Invalid read of size 4 + Memcheck:Value4 + fun:_PyObject_Realloc +} + +{ + ADDRESS_IN_RANGE/Use of uninitialised value of size 8 + Memcheck:Addr8 + fun:_PyObject_Realloc +} + +{ + ADDRESS_IN_RANGE/Use of uninitialised value of size 8 + Memcheck:Value8 + fun:_PyObject_Realloc +} + +{ + ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value + Memcheck:Cond + fun:_PyObject_Realloc +} + +### +### All the suppressions below are for errors that occur within libraries +### that Python uses. The problems to not appear to be related to Python's +### use of the libraries. +### + +{ + Generic ubuntu ld problems + Memcheck:Addr8 + obj:/lib/ld-2.4.so + obj:/lib/ld-2.4.so + obj:/lib/ld-2.4.so + obj:/lib/ld-2.4.so +} + +{ + Generic gentoo ld problems + Memcheck:Cond + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so + obj:/lib/ld-2.3.4.so +} + +{ + DBM problems, see test_dbm + Memcheck:Param + write(buf) + fun:write + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + fun:dbm_close +} + +{ + DBM problems, see test_dbm + Memcheck:Value8 + fun:memmove + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + fun:dbm_store + fun:dbm_ass_sub +} + +{ + DBM problems, see test_dbm + Memcheck:Cond + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + fun:dbm_store + fun:dbm_ass_sub +} + +{ + DBM problems, see test_dbm + Memcheck:Cond + fun:memmove + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + obj:/usr/lib/libdb1.so.2 + fun:dbm_store + fun:dbm_ass_sub +} + +{ + GDBM problems, see test_gdbm + Memcheck:Param + write(buf) + fun:write + fun:gdbm_open + +} + +{ + Uninitialised byte(s) false alarm, see bpo-35561 + Memcheck:Param + epoll_ctl(event) + fun:epoll_ctl + fun:pyepoll_internal_ctl +} + +{ + ZLIB problems, see test_gzip + Memcheck:Cond + obj:/lib/libz.so.1.2.3 + obj:/lib/libz.so.1.2.3 + fun:deflate +} + +{ + Avoid problems w/readline doing a putenv and leaking on exit + Memcheck:Leak + fun:malloc + fun:xmalloc + fun:sh_set_lines_and_columns + fun:_rl_get_screen_size + fun:_rl_init_terminal_io + obj:/lib/libreadline.so.4.3 + fun:rl_initialize +} + +# Valgrind emits "Conditional jump or move depends on uninitialised value(s)" +# false alarms on GCC builtin strcmp() function. The GCC code is correct. +# +# Valgrind bug: https://bugs.kde.org/show_bug.cgi?id=264936 +{ + bpo-38118: Valgrind emits false alarm on GCC builtin strcmp() + Memcheck:Cond + fun:PyUnicode_Decode +} + + +### +### These occur from somewhere within the SSL, when running +### test_socket_sll. They are too general to leave on by default. +### +###{ +### somewhere in SSL stuff +### Memcheck:Cond +### fun:memset +###} +###{ +### somewhere in SSL stuff +### Memcheck:Value4 +### fun:memset +###} +### +###{ +### somewhere in SSL stuff +### Memcheck:Cond +### fun:MD5_Update +###} +### +###{ +### somewhere in SSL stuff +### Memcheck:Value4 +### fun:MD5_Update +###} + +# Fedora's package "openssl-1.0.1-0.1.beta2.fc17.x86_64" on x86_64 +# See http://bugs.python.org/issue14171 +{ + openssl 1.0.1 prng 1 + Memcheck:Cond + fun:bcmp + fun:fips_get_entropy + fun:FIPS_drbg_instantiate + fun:RAND_init_fips + fun:OPENSSL_init_library + fun:SSL_library_init + fun:init_hashlib +} + +{ + openssl 1.0.1 prng 2 + Memcheck:Cond + fun:fips_get_entropy + fun:FIPS_drbg_instantiate + fun:RAND_init_fips + fun:OPENSSL_init_library + fun:SSL_library_init + fun:init_hashlib +} + +{ + openssl 1.0.1 prng 3 + Memcheck:Value8 + fun:_x86_64_AES_encrypt_compact + fun:AES_encrypt +} + +# +# All of these problems come from using test_socket_ssl +# +{ + from test_socket_ssl + Memcheck:Cond + fun:BN_bin2bn +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:BN_num_bits_word +} + +{ + from test_socket_ssl + Memcheck:Value4 + fun:BN_num_bits_word +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:BN_mod_exp_mont_word +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:BN_mod_exp_mont +} + +{ + from test_socket_ssl + Memcheck:Param + write(buf) + fun:write + obj:/usr/lib/libcrypto.so.0.9.7 +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:RSA_verify +} + +{ + from test_socket_ssl + Memcheck:Value4 + fun:RSA_verify +} + +{ + from test_socket_ssl + Memcheck:Value4 + fun:DES_set_key_unchecked +} + +{ + from test_socket_ssl + Memcheck:Value4 + fun:DES_encrypt2 +} + +{ + from test_socket_ssl + Memcheck:Cond + obj:/usr/lib/libssl.so.0.9.7 +} + +{ + from test_socket_ssl + Memcheck:Value4 + obj:/usr/lib/libssl.so.0.9.7 +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:BUF_MEM_grow_clean +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:memcpy + fun:ssl3_read_bytes +} + +{ + from test_socket_ssl + Memcheck:Cond + fun:SHA1_Update +} + +{ + from test_socket_ssl + Memcheck:Value4 + fun:SHA1_Update +} + +{ + test_buffer_non_debug + Memcheck:Addr4 + fun:PyUnicodeUCS2_FSConverter +} + +{ + test_buffer_non_debug + Memcheck:Addr4 + fun:PyUnicode_FSConverter +} + +{ + wcscmp_false_positive + Memcheck:Addr8 + fun:wcscmp + fun:_PyOS_GetOpt + fun:Py_Main + fun:main +} + +# Additional suppressions for the unified decimal tests: +{ + test_decimal + Memcheck:Addr4 + fun:PyUnicodeUCS2_FSConverter +} + +{ + test_decimal2 + Memcheck:Addr4 + fun:PyUnicode_FSConverter +} + From 672cbd5c9fc591b463f6143f77691891ba97984c Mon Sep 17 00:00:00 2001 From: tazend Date: Mon, 6 Mar 2023 22:11:59 +0100 Subject: [PATCH 04/28] wip dependencies --- pyslurm/core/job/job.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx index e705c755..b4a980c4 100644 --- a/pyslurm/core/job/job.pyx +++ b/pyslurm/core/job/job.pyx @@ -624,6 +624,10 @@ cdef class Job: @property def dependencies(self): """dict: Dependencies the Job has to other Jobs.""" + dep = cstr.to_unicode(self.ptr.dependency, default=[]) + if not dep: + return None + out = { "after": [], "afterany": [], @@ -634,10 +638,6 @@ cdef class Job: "singleton": False, "satisfy": "all", } - dep = cstr.to_unicode(self.ptr.dependency, default=[]) - - if not dep: - return out delim = "," if "?" in dep: @@ -1376,7 +1376,7 @@ cdef class Job: return cstr.to_gres_dict(self.ptr.tres_per_node) @property - def acct_gather_profile(self): + def accounting_gather_profile(self): """list: Options that control gathering of Accounting information.""" return get_acctg_profile(self.ptr.profile) From 3a2b47b45064331707bd001a8d0f2b910d2d879a Mon Sep 17 00:00:00 2001 From: tazend Date: Thu, 9 Mar 2023 20:26:43 +0100 Subject: [PATCH 05/28] Fix docstrings for mkdocs --- pyslurm/core/node.pxd | 51 +++++++++++++++++++++++++++++++++++++++++++ pyslurm/core/node.pyx | 43 +----------------------------------- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/pyslurm/core/node.pxd b/pyslurm/core/node.pxd index 2f2a32ec..14779d20 100644 --- a/pyslurm/core/node.pxd +++ b/pyslurm/core/node.pxd @@ -50,7 +50,30 @@ from pyslurm.slurm cimport ( cdef class Nodes(dict): + """A collection of Node objects. + By creating a new Nodes instance, all Nodes in the system will be + fetched from the slurmctld. + + Args: + preload_passwd_info (bool): + Decides whether to query passwd and groups information from the + system. + Could potentially speed up access to attributes of the Node where + a UID/GID is translated to a name. + If True, the information will fetched and stored in each of the + Node instances. The default is False. + + Attributes: + free_memory_raw (int): + Amount of free memory in this node collection. (Mebibytes) + free_memory (str): + Humanized amount of free memory in this node collection. + + Raises: + RPCError: When getting all the Nodes from the slurmctld failed. + MemoryError: If malloc fails to allocate memory. + """ cdef: node_info_msg_t *info partition_info_msg_t *part_info @@ -58,6 +81,34 @@ cdef class Nodes(dict): cdef class Node: + """A Slurm node. + + Args: + name (str): + Name of a node + **kwargs: + Any writable property. Writable attributes include: + * name + * configured_gres + * address + * hostname + * extra + * comment + * weight + * available_features + * active_features + * cpu_binding + * state + + Attributes: + name (str): + Name of the node. + architecture (str): + Architecture of the node (e.g. x86_64) + + Raises: + MemoryError: If malloc fails to allocate memory. + """ cdef: node_info_t *info update_node_msg_t *umsg diff --git a/pyslurm/core/node.pyx b/pyslurm/core/node.pyx index 63254ec0..e2605841 100644 --- a/pyslurm/core/node.pyx +++ b/pyslurm/core/node.pyx @@ -45,24 +45,7 @@ from pyslurm.core.common import ( cdef class Nodes(dict): - """A collection of Node objects. - - By creating a new Nodes instance, all Nodes in the system will be - fetched from the slurmctld. - - Args: - preload_passwd_info (bool): - Decides whether to query passwd and groups information from the - system. - Could potentially speed up access to attributes of the Node where - a UID/GID is translated to a name. - If True, the information will fetched and stored in each of the - Node instances. The default is False. - - Raises: - RPCError: When getting all the Nodes from the slurmctld failed. - MemoryError: If malloc fails to allocate memory. - """ + def __dealloc__(self): slurm_free_node_info_msg(self.info) slurm_free_partition_info_msg(self.part_info) @@ -122,12 +105,10 @@ cdef class Nodes(dict): @property def free_memory_raw(self): - """int: Amount of free memory in this node collection. (Mebibytes)""" return _sum_prop(self, Node.free_memory) @property def free_memory(self): - """str: Humanized amount of free memory in this node collection.""" return humanize(self.free_memory_raw, 2) @property @@ -182,32 +163,12 @@ cdef class Nodes(dict): cdef class Node: - """A Slurm node.""" def __cinit__(self): self.info = NULL self.umsg = NULL def __init__(self, str name=None, **kwargs): - """Initialize a Node instance - - Args: - name (str): - Name of a node - **kwargs: - Any writable property. Writable attributes include: - * name - * configured_gres - * address - * hostname - * extra - * comment - * weight - * available_features - * active_features - * cpu_binding - * state - """ self._alloc_impl() self.name = name for k, v in kwargs.items(): @@ -420,7 +381,6 @@ cdef class Node: @property def name(self): - """str: Name of the node.""" return cstr.to_unicode(self.info.name) @name.setter @@ -429,7 +389,6 @@ cdef class Node: @property def architecture(self): - """str: Architecture of the node (e.g. x86_64)""" return cstr.to_unicode(self.info.arch) @property From 6a1114f55aa5afaee80bec053f8892da7b30079e Mon Sep 17 00:00:00 2001 From: tazend Date: Thu, 16 Mar 2023 23:34:25 +0100 Subject: [PATCH 06/28] Fix docstring layout and rename some attributes --- pyslurm/core/common/ctime.pyx | 4 +- pyslurm/core/error.pyx | 7 +- pyslurm/core/job/job.pxd | 268 ++++++- pyslurm/core/job/job.pyx | 422 ++--------- pyslurm/core/job/sbatch_opts.pyx | 37 +- pyslurm/core/job/step.pxd | 74 +- pyslurm/core/job/step.pyx | 107 +-- pyslurm/core/job/submission.pxd | 1167 +++++++++++++----------------- pyslurm/core/job/submission.pyx | 69 +- pyslurm/core/node.pxd | 116 ++- pyslurm/core/node.pyx | 165 +---- tests/new_api/conftest.py | 4 +- tests/new_api/test_common.py | 22 +- tests/new_api/test_job.py | 4 +- tests/new_api/test_job_steps.py | 12 +- tests/new_api/test_job_submit.py | 44 +- 16 files changed, 1150 insertions(+), 1372 deletions(-) diff --git a/pyslurm/core/common/ctime.pyx b/pyslurm/core/common/ctime.pyx index e77b18a1..faf5a7a1 100644 --- a/pyslurm/core/common/ctime.pyx +++ b/pyslurm/core/common/ctime.pyx @@ -202,10 +202,10 @@ def timestamp_to_date(timestamp): return ret -def _raw_time(time): +def _raw_time(time, default=None): if (time == slurm.NO_VAL or time == 0 or time == slurm.INFINITE): - return None + return default return time diff --git a/pyslurm/core/error.pyx b/pyslurm/core/error.pyx index 649d1c55..4ba24277 100644 --- a/pyslurm/core/error.pyx +++ b/pyslurm/core/error.pyx @@ -34,7 +34,7 @@ def slurm_strerror(errno): returned. Returns: - str: String representation of errno. + (str): String representation of errno. """ return cstr.to_unicode(slurm.slurm_strerror(errno)) @@ -43,7 +43,7 @@ def slurm_errno(): """Get the current slurm errno. Returns: - int: Current slurm errno + (int): Current slurm errno """ return slurm_get_errno() @@ -52,7 +52,8 @@ def get_last_slurm_error(): """Get the last slurm error that occured as a tuple of errno and string. Returns: - tuple: The errno and its string representation -> (errno, str) + errno (int): The error number + errno_str (str): The errno converted to a String """ errno = slurm_errno() diff --git a/pyslurm/core/job/job.pxd b/pyslurm/core/job/job.pxd index e9bd9867..65b829a2 100644 --- a/pyslurm/core/job/job.pxd +++ b/pyslurm/core/job/job.pxd @@ -17,7 +17,6 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: embedsignature=True # cython: c_string_type=unicode, c_string_encoding=utf8 # cython: language_level=3 @@ -67,14 +66,281 @@ from pyslurm.slurm cimport ( cdef class Jobs(dict): + """A collection of :obj:`Job` objects. + By creating a new :obj:`Jobs` instance, all Jobs in the system will be + fetched from the slurmctld. + + Args: + preload_passwd_info (bool, optional): + Decides whether to query passwd and groups information from + the system. + Could potentially speed up access to attributes of the Job + where a UID/GID is translated to a name. If True, the + information will fetched and stored in each of the Job + instances. The default is False. + + Raises: + RPCError: When getting all the Jobs from the slurmctld failed. + MemoryError: If malloc fails to allocate memory. + """ cdef: job_info_msg_t *info slurm_job_info_t tmp_info cdef class Job: + """A Slurm Job. + + All attributes in this class are read-only. + + Args: + job_id (int): + An Integer representing a Job-ID. + + Raises: + MemoryError: If malloc fails to allocate memory. + Attributes: + name (str): + Name of the Job + id (int): + Unique ID of the Job. + association_id (int): + ID of the Association this Job runs with. + account (str): + Name of the Account this Job is runs with. + user_id (int): + UID of the User who submitted the Job. + user_name (str): + Name of the User who submitted the Job. + group_id (int): + GID of the Group that Job runs under. + group_name (str): + Name of the Group this Job runs under. + priority (int): + Priority of the Job. + nice (int): + Nice Value of the Job. + qos (str): + QOS Name of the Job. + min_cpus_per_node (int): + Minimum Amount of CPUs per Node the Job requested. + state (str): + State this Job is currently in. + state_reason (str): + A Reason explaining why the Job is in its current state. + is_requeueable (bool): + Whether the Job is requeuable or not. + requeue_count (int): + Amount of times the Job has been requeued. + is_batch_job (bool): + Whether the Job is a batch job or not. + node_reboot_required (bool): + Whether the Job requires the Nodes to be rebooted first. + dependencies (dict): + Dependencies the Job has to other Jobs. + time_limit (int): + Time-Limit, in minutes, for this Job. + time_limit_min (int): + Minimum Time-Limit in minutes for this Job. + submit_time (int): + Time the Job was submitted, as unix timestamp. + eligible_time (int): + Time the Job is eligible to start, as unix timestamp. + accrue_time (int): + Job accrue time, as unix timestamp + start_time (int): + Time this Job has started execution, as unix timestamp. + resize_time (int): + Time the job was resized, as unix timestamp. + deadline (int): + Time when a pending Job will be cancelled, as unix timestamp. + preempt_eligible_time (int): + Time the Job is eligible for preemption, as unix timestamp. + preempt_time_raw (int): + Time the Job was signaled for preemption, as unix timestamp. + suspend_time (int): + Last Time the Job was suspended, as unix timestamp. + last_sched_evaluation_time (int): + Last time evaluated for Scheduling, as unix timestamp. + pre_suspension_time (int): + Amount of seconds the Job ran prior to suspension, as unix + timestamp + mcs_label (str): + MCS Label for the Job + partition (str): + Name of the Partition the Job runs in. + submit_host (str): + Name of the Host this Job was submitted from. + batch_host (str): + Name of the Host where the Batch-Script is executed. + min_nodes (int): + Minimum amount of Nodes the Job has requested. + max_nodes (int): + Maximum amount of Nodes the Job has requested. + allocated_nodes (str): + Nodes the Job is currently using. + This is only valid when the Job is running. If the Job is pending, + it will always return None. + required_nodes (str): + Nodes the Job is explicitly requiring to run on. + excluded_nodes (str): + Nodes that are explicitly excluded for execution. + scheduled_nodes (str): + Nodes the Job is scheduled on by the slurm controller. + derived_exit_code (int): + The derived exit code for the Job. + derived_exit_code_signal (int): + Signal for the derived exit code. + exit_code (int): + Code with which the Job has exited. + exit_code_signal (int): + The signal which has led to the exit code of the Job. + batch_constraints (list): + Features that node(s) should have for the batch script. + Controls where it is possible to execute the batch-script of the + job. Also see 'constraints' + federation_origin (str): + Federation Origin + federation_siblings_active (int): + Federation siblings active + federation_siblings_viable (int): + Federation siblings viable + allocated_cpus (int): + Total amount of CPUs the Job is using. + If the Job is still pending, this will be None. + cpus_per_task (int): + Number of CPUs per Task used. + cpus_per_gpu (int): + Number of CPUs per GPU used. + boards_per_node (int): + Number of boards per Node. + sockets_per_board (int): + Number of sockets per board. + sockets_per_node (int): + Number of sockets per node. + cores_per_socket (int): + Number of cores per socket. + threads_per_core (int): + Number of threads per core. + ntasks (int): + Number of parallel processes. + ntasks_per_node (int): + Number of parallel processes per node. + ntasks_per_board (int): + Number of parallel processes per board. + ntasks_per_socket (int): + Number of parallel processes per socket. + ntasks_per_core (int): + Number of parallel processes per core. + ntasks_per_gpu (int): + Number of parallel processes per GPU. + delay_boot_time (int): + https://slurm.schedmd.com/sbatch.html#OPT_delay-boot, in minutes + constraints (list): + A list of features the Job requires nodes to have. + In contrast, the 'batch_constraints' option only focuses on the + initial batch-script placement. This option however means features + to restrict the list of nodes a job is able to execute on in + general beyond the initial batch-script. + cluster (str): + Name of the cluster the job is executing on. + cluster_constraints (list): + A List of features that a cluster should have. + reservation (str): + Name of the reservation this Job uses. + resource_sharing (str): + Mode controlling how a job shares resources with others. + requires_contiguous_nodes (bool): + Whether the Job has allocated a set of contiguous nodes. + licenses (list): + List of licenses the Job needs. + network (str): + Network specification for the Job. + command (str): + The command that is executed for the Job. + working_directory (str): + Path to the working directory for this Job. + admin_comment (str): + An arbitrary comment set by an administrator for the Job. + system_comment (str): + An arbitrary comment set by the slurmctld for the Job. + container (str): + The container this Job uses. + comment (str): + An arbitrary comment set for the Job. + standard_input (str): + The path to the file for the standard input stream. + standard_output (str): + The path to the log file for the standard output stream. + standard_error (str): + The path to the log file for the standard error stream. + required_switches (int): + Number of switches required. + max_wait_time_switches (int): + Amount of seconds to wait for the switches. + burst_buffer (str): + Burst buffer specification + burst_buffer_state (str): + Burst buffer state + cpu_frequency_min (Union[str, int]): + Minimum CPU-Frequency requested. + cpu_frequency_max (Union[str, int]): + Maximum CPU-Frequency requested. + cpu_frequency_governor (Union[str, int]): + CPU-Frequency Governor requested. + wckey (str): + Name of the WCKey this Job uses. + mail_user (list): + Users that should receive Mails for this Job. + mail_types (list): + Mail Flags specified by the User. + heterogeneous_id (int): + Heterogeneous job id. + heterogeneous_offset (int): + Heterogeneous job offset. + temporary_disk_per_node (int): + Temporary disk space in Mebibytes available per Node. + array_id (int): + The master Array-Job ID. + array_tasks_parallel (int): + Max number of array tasks allowed to run simultaneously. + array_task_id (int): + Array Task ID of this Job if it is an Array-Job. + array_tasks_waiting (str): + Array Tasks that are still waiting. + end_time (int): + Time at which this Job will end, as unix timestamp. + run_time (int): + Amount of seconds the Job has been running. + cores_reserved_for_system (int): + Amount of cores reserved for System use only. + threads_reserved_for_system (int): + Amount of Threads reserved for System use only. + memory_per_cpu (int): + Amount of Memory per CPU this Job has, in Mebibytes + memory_per_node (int): + Amount of Memory per Node this Job has, in Mebibytes + memory_per_gpu (int): + Amount of Memory per GPU this Job has, in Mebibytes + gres_per_node (dict): + Generic Resources (e.g. GPU) this Job is using per Node. + profile_types (list): + Types for which detailed accounting data is collected. + gres_binding (str): + Binding Enforcement of a Generic Resource (e.g. GPU). + kill_on_invalid_dependency (bool): + Whether the Job should be killed on an invalid dependency. + spreads_over_nodes (bool): + Whether the Job should be spreaded over as many nodes as possible. + power_options (list): + Options set for Power Management. + is_cronjob (bool): + Whether this Job is a cronjob. + cronjob_time (str): + The time specification for the Cronjob. + """ cdef: slurm_job_info_t *ptr dict passwd diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx index b4a980c4..2510cc67 100644 --- a/pyslurm/core/job/job.pyx +++ b/pyslurm/core/job/job.pyx @@ -17,7 +17,6 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: embedsignature=True # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 @@ -52,30 +51,11 @@ from pyslurm.core.common import ( cdef class Jobs(dict): - """A collection of :obj:`Job` objects. - By creating a new :obj:`Jobs` instance, all Jobs in the system will be - fetched from the slurmctld. - """ def __dealloc__(self): slurm_free_job_info_msg(self.info) def __init__(self, preload_passwd_info=False): - """Initialize a Jobs collection - - Args: - preload_passwd_info (bool): - Decides whether to query passwd and groups information from - the system. - Could potentially speed up access to attributes of the Job - where a UID/GID is translated to a name. If True, the - information will fetched and stored in each of the Job - instances. The default is False. - - Raises: - RPCError: When getting all the Jobs from the slurmctld failed. - MemoryError: If malloc fails to allocate memory. - """ cdef: dict passwd = {} dict groups = {} @@ -128,7 +108,7 @@ cdef class Jobs(dict): failed. Returns: - dict: JobSteps information for each JobID. + (dict): JobSteps information for each JobID. """ cdef: Job job @@ -143,23 +123,13 @@ cdef class Jobs(dict): """Format the information as list of Job objects. Returns: - list: List of Job objects + (list): List of Job objects """ return list(self.values()) cdef class Job: - """A Slurm Job. - - All attributes in this class are read-only. - - Args: - job_id (int): - An Integer representing a Job-ID. - Raises: - MemoryError: If malloc fails to allocate memory. - """ def __init__(self, int job_id): self.alloc() self.ptr.job_id = job_id @@ -188,7 +158,8 @@ cdef class Job: of an instance. Using the Job object returned is optional. Returns: - Job: This function returns the current Job-instance object itself. + (Job): This function returns the current Job-instance object + itself. Raises: RPCError: If requesting the Job information from the slurmctld was @@ -238,7 +209,7 @@ cdef class Job: """Job information formatted as a dictionary. Returns: - dict: Job information as dict + (dict): Job information as dict """ return instance_to_dict(self) @@ -474,7 +445,7 @@ cdef class Job: (new-line). Returns: - str: The content of the batch script. + (str): The content of the batch script. Raises: RPCError: When retrieving the Batch-Script for the Job was not @@ -520,52 +491,42 @@ cdef class Job: @property def name(self): - """str: Name of the Job""" return cstr.to_unicode(self.ptr.name) @property def id(self): - """int: Unique Job-ID""" return self.ptr.job_id @property def association_id(self): - """int: ID of the Association this Job is run under.""" return u32_parse(self.ptr.assoc_id) @property def account(self): - """str: Name of the Account this Job is run under.""" return cstr.to_unicode(self.ptr.account) @property - def uid(self): - """int: UID of the User who submitted the Job.""" + def user_id(self): return u32_parse(self.ptr.user_id, zero_is_noval=False) @property - def user(self): - """str: Name of the User who submitted the Job.""" + def user_name(self): return uid_to_name(self.ptr.user_id, lookup=self.passwd) @property - def gid(self): - """int: GID of the Group that Job runs under.""" + def group_id(self): return u32_parse(self.ptr.group_id, zero_is_noval=False) @property - def group(self): - """str: Name of the Group this Job runs under.""" + def group_name(self): return gid_to_name(self.ptr.group_id, lookup=self.groups) @property def priority(self): - """int: Priority of the Job.""" return u32_parse(self.ptr.priority, zero_is_noval=False) @property def nice(self): - """int: Nice Value of the Job.""" if self.ptr.nice == slurm.NO_VAL: return None @@ -573,12 +534,10 @@ cdef class Job: @property def qos(self): - """str: QOS Name of the Job.""" return cstr.to_unicode(self.ptr.qos) @property def min_cpus_per_node(self): - """int: Minimum Amount of CPUs per Node the Job requested.""" return u32_parse(self.ptr.pn_min_cpus) # I don't think this is used anymore - there is no way in sbatch to ask @@ -590,12 +549,10 @@ cdef class Job: @property def state(self): - """str: State this Job is currently in.""" return cstr.to_unicode(slurm_job_state_string(self.ptr.job_state)) @property def state_reason(self): - """str: A Reason explaining why the Job is in its current state.""" if self.ptr.state_desc: return cstr.to_unicode(self.ptr.state_desc) @@ -603,27 +560,22 @@ cdef class Job: @property def is_requeueable(self): - """bool: Whether the Job is requeuable or not.""" return u16_parse_bool(self.ptr.requeue) @property def requeue_count(self): - """int: Amount of times the Job has been requeued.""" return u16_parse(self.ptr.restart_cnt, on_noval=0) @property def is_batch_job(self): - """bool: Whether the Job is a batch job or not.""" return u16_parse_bool(self.ptr.batch_flag) @property - def reboot_nodes(self): - """bool: Whether the Job requires the Nodes to be rebooted first.""" + def requires_node_reboot(self): return u8_parse_bool(self.ptr.reboot) @property def dependencies(self): - """dict: Dependencies the Job has to other Jobs.""" dep = cstr.to_unicode(self.ptr.dependency, default=[]) if not dep: return None @@ -661,229 +613,100 @@ cdef class Job: return out - @property - def time_limit_raw(self): - """int: Time-Limit for this Job. (Unix timestamp)""" - return _raw_time(self.ptr.time_limit) - @property def time_limit(self): - """str: Time-Limit for this Job. (formatted)""" - return mins_to_timestr(self.ptr.time_limit, "PartitionLimit") - - @property - def time_limit_min_raw(self): - """int: Minimum Time-Limit for this Job (Unix timestamp)""" - return _raw_time(self.ptr.time_min) + return _raw_time(self.ptr.time_limit) @property def time_limit_min(self): - """str: Minimum Time-limit acceptable for this Job (formatted)""" - return mins_to_timestr(self.ptr.time_min) - - @property - def submit_time_raw(self): - """int: Time the Job was submitted. (Unix timestamp)""" - return _raw_time(self.ptr.submit_time) + return _raw_time(self.ptr.time_min) @property def submit_time(self): - """str: Time the Job was submitted. (formatted)""" - return timestamp_to_date(self.ptr.submit_time) - - @property - def eligible_time_raw(self): - """int: Time the Job is eligible to start. (Unix timestamp)""" - return _raw_time(self.ptr.eligible_time) + return _raw_time(self.ptr.submit_time) @property def eligible_time(self): - """str: Time the Job is eligible to start. (formatted)""" - return timestamp_to_date(self.ptr.eligible_time) - - @property - def accrue_time_raw(self): - """int: Job accrue time (Unix timestamp)""" - return _raw_time(self.ptr.accrue_time) + return _raw_time(self.ptr.eligible_time) @property def accrue_time(self): - """str: Job accrue time (formatted)""" - return timestamp_to_date(self.ptr.accrue_time) - - @property - def start_time_raw(self): - """int: Time this Job has started execution. (Unix timestamp)""" - return _raw_time(self.ptr.start_time) + return _raw_time(self.ptr.accrue_time) @property def start_time(self): - """str: Time this Job has started execution. (formatted)""" - return timestamp_to_date(self.ptr.start_time) - - @property - def resize_time_raw(self): - """int: Time the job was resized. (Unix timestamp)""" - return _raw_time(self.ptr.resize_time) + return _raw_time(self.ptr.start_time) @property def resize_time(self): - """str: Time the job was resized. (formatted)""" - return timestamp_to_date(self.ptr.resize_time) + return _raw_time(self.ptr.resize_time) @property - def deadline_time_raw(self): - """int: Time when a pending Job will be cancelled. (Unix timestamp)""" + def deadline(self): return _raw_time(self.ptr.deadline) - @property - def deadline_time(self): - """str: Time at which a pending Job will be cancelled. (formatted)""" - return timestamp_to_date(self.ptr.deadline) - - @property - def preempt_eligible_time_raw(self): - """int: Time the Job is eligible for preemption. (Unix timestamp)""" - return _raw_time(self.ptr.preemptable_time) - @property def preempt_eligible_time(self): - """str: Time when the Job is eligible for preemption. (formatted)""" - return timestamp_to_date(self.ptr.preemptable_time) - - @property - def preempt_time_raw(self): - """int: Time the Job was signaled for preemption. (Unix timestamp)""" - return _raw_time(self.ptr.preempt_time) + return _raw_time(self.ptr.preemptable_time) @property def preempt_time(self): - """str: Time the Job was signaled for preemption. (formatted)""" - return timestamp_to_date(self.ptr.preempt_time) - - @property - def suspend_time_raw(self): - """int: Last Time the Job was suspended. (Unix timestamp)""" - return _raw_time(self.ptr.suspend_time) + return _raw_time(self.ptr.preempt_time) @property def suspend_time(self): - """str: Last Time the Job was suspended. (formatted)""" - return timestamp_to_date(self.ptr.suspend_time) + return _raw_time(self.ptr.suspend_time) @property - def last_sched_eval_time_raw(self): - """int: Last time evaluated for Scheduling. (Unix timestamp)""" + def last_sched_evaluation_time(self): return _raw_time(self.ptr.last_sched_eval) - @property - def last_sched_eval_time(self): - """str: Last Time evaluated for Scheduling. (formatted)""" - return timestamp_to_date(self.ptr.last_sched_eval) - - @property - def pre_suspension_time_raw(self): - """int: Amount of seconds the Job ran prior to suspension.""" - return _raw_time(self.ptr.pre_sus_time) - @property def pre_suspension_time(self): - """str: Time the Job ran prior to suspension. (formatted)""" - return secs_to_timestr(self.ptr.pre_sus_time) + return _raw_time(self.ptr.pre_sus_time) @property def mcs_label(self): - """str: MCS Label for the Job""" return cstr.to_unicode(self.ptr.mcs_label) @property def partition(self): - """str: Name of the Partition the Job runs in.""" return cstr.to_unicode(self.ptr.partition) @property def submit_host(self): - """str: Name of the Host this Job was submitted from.""" return cstr.to_unicode(self.ptr.alloc_node) @property def batch_host(self): - """str: Name of the Host where the Batch-Script is executed.""" return cstr.to_unicode(self.ptr.batch_host) @property def min_nodes(self): - """int: Minimum amount of Nodes the Job has requested.""" return u32_parse(self.ptr.num_nodes) @property def max_nodes(self): - """int: Maximum amount of Nodes the Job has requested.""" return u32_parse(self.ptr.max_nodes) @property - def alloc_nodes(self): - """str: Nodes the Job is using. - - This is the formatted string of Nodes as shown by scontrol. - For example, it can look like this: - - "node001,node[005-010]" - - If you want to expand this string into a list of nodenames you can - use the "pyslurm.nodelist_from_range_str" function. - - Note: - This is only valid when the Job is running. If the Job is pending, - it will always return an empty list. - """ + def allocated_nodes(self): return cstr.to_unicode(self.ptr.nodes) @property def required_nodes(self): - """str: Nodes the Job is explicitly requiring to run on. - - This is the formatted string of Nodes as shown by scontrol. - For example, it can look like this: - - "node001,node[005-010]" - - If you want to expand this string into a list of nodenames you can - use the "pyslurm.nodelist_from_range_str" function. - """ return cstr.to_unicode(self.ptr.req_nodes) @property def excluded_nodes(self): - """str: Nodes that are explicitly excluded for execution. - - This is the formatted string of Nodes as shown by scontrol. - For example, it can look like this: - - "node001,node[005-010]" - - If you want to expand this string into a list of nodenames you can - use the "pyslurm.nodelist_from_range_str" function. - """ return cstr.to_unicode(self.ptr.exc_nodes) @property def scheduled_nodes(self): - """str: Nodes the Job is scheduled on by the slurm controller. - - This is the formatted string of Nodes as shown by scontrol. - For example, it can look like this: - - "node001,node[005-010]" - - If you want to expand this string into a list of nodenames you can - use the "pyslurm.nodelist_from_range_str" function. - """ return cstr.to_unicode(self.ptr.sched_nodes) @property def derived_exit_code(self): - """int: The derived exit code for the Job.""" if (self.ptr.derived_ec == slurm.NO_VAL or not WIFEXITED(self.ptr.derived_ec)): return None @@ -892,7 +715,6 @@ cdef class Job: @property def derived_exit_code_signal(self): - """int: Signal for the derived exit code.""" if (self.ptr.derived_ec == slurm.NO_VAL or not WIFSIGNALED(self.ptr.derived_ec)): return None @@ -901,7 +723,6 @@ cdef class Job: @property def exit_code(self): - """int: Code with which the Job has exited.""" if (self.ptr.exit_code == slurm.NO_VAL or not WIFEXITED(self.ptr.exit_code)): return None @@ -910,7 +731,6 @@ cdef class Job: @property def exit_code_signal(self): - """int: The signal which has led to the exit code of the Job.""" if (self.ptr.exit_code == slurm.NO_VAL or not WIFSIGNALED(self.ptr.exit_code)): return None @@ -919,39 +739,26 @@ cdef class Job: @property def batch_constraints(self): - """list: Features that node(s) should have for the batch script. - - Controls where it is possible to execute the batch-script of the job. - Also see 'constraints' - """ return cstr.to_list(self.ptr.batch_features) @property def federation_origin(self): - """str: Federation Origin""" return cstr.to_unicode(self.ptr.fed_origin_str) @property def federation_siblings_active(self): - """str: Federation siblings active""" return u64_parse(self.ptr.fed_siblings_active) @property def federation_siblings_viable(self): - """str: Federation siblings viable""" return u64_parse(self.ptr.fed_siblings_viable) @property - def alloc_cpus(self): - """int: Total amount of CPUs the Job is using. - - If the Job is still pending, this will be None. - """ + def allocated_cpus(self): return u32_parse(self.ptr.num_cpus) @property def cpus_per_task(self): - """int: Number of CPUs per Task used.""" if self.ptr.cpus_per_tres: return None @@ -959,7 +766,6 @@ cdef class Job: @property def cpus_per_gpu(self): - """int: Number of CPUs per GPU used.""" if (not self.ptr.cpus_per_tres or self.ptr.cpus_per_task != slurm.NO_VAL16): return None @@ -971,205 +777,152 @@ cdef class Job: @property def boards_per_node(self): - """int: Number of boards per Node.""" return u16_parse(self.ptr.boards_per_node) @property def sockets_per_board(self): - """int: Number of sockets per board.""" return u16_parse(self.ptr.sockets_per_board) @property def sockets_per_node(self): - """int: Number of sockets per node.""" return u16_parse(self.ptr.sockets_per_node) @property def cores_per_socket(self): - """int: Number of cores per socket.""" return u16_parse(self.ptr.cores_per_socket) @property def threads_per_core(self): - """int: Number of threads per core.""" return u16_parse(self.ptr.threads_per_core) @property def ntasks(self): - """int: Number of parallel processes.""" return u32_parse(self.ptr.num_tasks, on_noval=1) @property def ntasks_per_node(self): - """int: Number of parallel processes per node.""" return u16_parse(self.ptr.ntasks_per_node) @property def ntasks_per_board(self): - """int: Number of parallel processes per board.""" return u16_parse(self.ptr.ntasks_per_board) @property def ntasks_per_socket(self): - """int: Number of parallel processes per socket.""" return u16_parse(self.ptr.ntasks_per_socket) @property def ntasks_per_core(self): - """int: Number of parallel processes per core.""" return u16_parse(self.ptr.ntasks_per_core) @property def ntasks_per_gpu(self): - """int: Number of parallel processes per GPU.""" return u16_parse(self.ptr.ntasks_per_tres) - @property - def delay_boot_time_raw(self): - """int: https://slurm.schedmd.com/sbatch.html#OPT_delay-boot""" - return _raw_time(self.ptr.delay_boot) - @property def delay_boot_time(self): - """str: https://slurm.schedmd.com/sbatch.html#OPT_delay-boot""" - return secs_to_timestr(self.ptr.delay_boot) + return _raw_time(self.ptr.delay_boot) @property def constraints(self): - """list: A list of features the Job requires nodes to have. - - In contrast, the 'batch_constraints' option only focuses on the - initial batch-script placement. - - This option however means features to restrict the list of nodes a - job is able to execute on in general beyond the initial batch-script. - """ return cstr.to_list(self.ptr.features) @property def cluster(self): - """str: Name of the cluster the job is executing on.""" return cstr.to_unicode(self.ptr.cluster) @property def cluster_constraints(self): - """list: A List of features that a cluster should have.""" return cstr.to_list(self.ptr.cluster_features) @property def reservation(self): - """str: Name of the reservation this Job uses.""" return cstr.to_unicode(self.ptr.resv_name) @property def resource_sharing(self): - """str: Mode controlling how a job shares resources with others.""" return cstr.to_unicode(slurm_job_share_string(self.ptr.shared)) @property - def contiguous(self): - """bool: Whether the Job requires a set of contiguous nodes.""" + def requires_contiguous_nodes(self): return u16_parse_bool(self.ptr.contiguous) @property def licenses(self): - """list: List of licenses the Job needs.""" return cstr.to_list(self.ptr.licenses) @property def network(self): - """str: Network specification for the Job.""" return cstr.to_unicode(self.ptr.network) @property def command(self): - """str: The command that is executed for the Job.""" return cstr.to_unicode(self.ptr.command) @property - def work_dir(self): - """str: Path to the working directory for this Job.""" + def working_directory(self): return cstr.to_unicode(self.ptr.work_dir) @property def admin_comment(self): - """str: An arbitrary comment set by an administrator for the Job.""" return cstr.to_unicode(self.ptr.admin_comment) @property def system_comment(self): - """str: An arbitrary comment set by the slurmctld for the Job.""" return cstr.to_unicode(self.ptr.system_comment) @property def container(self): - """str: The container this Job uses.""" return cstr.to_unicode(self.ptr.container) @property def comment(self): - """str: An arbitrary comment set for the Job.""" return cstr.to_unicode(self.ptr.comment) @property - def stdin(self): - """str: The path to the file for stdin.""" + def standard_input(self): cdef char tmp[1024] slurm_get_job_stdin(tmp, sizeof(tmp), self.ptr) return cstr.to_unicode(tmp) @property - def stdout(self): - """str: The path to the log file for stdout.""" + def standard_output(self): cdef char tmp[1024] slurm_get_job_stdout(tmp, sizeof(tmp), self.ptr) return cstr.to_unicode(tmp) @property - def stderr(self): - """The path to the log file for stderr.""" + def standard_error(self): cdef char tmp[1024] slurm_get_job_stderr(tmp, sizeof(tmp), self.ptr) return cstr.to_unicode(tmp) @property - def num_switches(self): - """int: Number of switches requested.""" + def required_switches(self): return u32_parse(self.ptr.req_switch) - @property - def max_wait_time_switches_raw(self): - """int: Amount of seconds to wait for the switches.""" - return _raw_time(self.ptr.wait4switch) - @property def max_wait_time_switches(self): - """str: Amount of seconds to wait for the switches. (formatted)""" - return secs_to_timestr(self.ptr.wait4switch) + return _raw_time(self.ptr.wait4switch) @property def burst_buffer(self): - """str: Burst buffer specification""" return cstr.to_unicode(self.ptr.burst_buffer) @property def burst_buffer_state(self): - """str: Burst buffer state""" return cstr.to_unicode(self.ptr.burst_buffer_state) @property - def cpu_freq_min(self): - """Union[str, int]: Minimum CPU-Frequency requested.""" + def cpu_frequency_min(self): return cpufreq_to_str(self.ptr.cpu_freq_min) @property - def cpu_freq_max(self): - """Union[str, int]: Maximum CPU-Frequency requested.""" + def cpu_frequency_max(self): return cpufreq_to_str(self.ptr.cpu_freq_max) @property - def cpu_freq_governor(self): - """Union[str, int]: CPU-Frequency Governor requested.""" + def cpu_frequency_governor(self): return cpufreq_to_str(self.ptr.cpu_freq_gov) # @property @@ -1186,27 +939,22 @@ cdef class Job: @property def wckey(self): - """str: Name of the WCKey this Job uses.""" return cstr.to_unicode(self.ptr.wckey) @property def mail_user(self): - """list: Users that should receive Mails for this Job.""" return cstr.to_list(self.ptr.mail_user) @property def mail_types(self): - """list: Mail Flags specified by the User.""" return get_mail_type(self.ptr.mail_type) @property - def hetjob_id(self): - """int: Heterogeneous ID""" + def heterogeneous_id(self): return u32_parse(self.ptr.het_job_id, noval=0) @property - def hetjob_offset(self): - """int: Heterogeneous Job offset""" + def heterogeneous_offset(self): return u32_parse(self.ptr.het_job_offset, noval=0) # @property @@ -1216,46 +964,23 @@ cdef class Job: # return cstr.to_unicode(self.ptr.het_job_id_set) @property - def tmp_disk_per_node_raw(self): - """int: Temporary disk space available per Node. (in Mebibytes)""" + def temporary_disk_per_node(self): return u32_parse(self.ptr.pn_min_tmp_disk) @property - def tmp_disk_per_node(self): - """str: Amount of temporary disk space available per Node. - - The output for this value is already in a human readable format, - with appropriate unit suffixes like K|M|G|T. - """ - return humanize(self.tmp_disk_per_node_raw) - - @property - def array_job_id(self): - """int: The master Array-Job ID.""" + def array_id(self): return u32_parse(self.ptr.array_job_id) @property def array_tasks_parallel(self): - """int: Number of array tasks allowed to run in simultaneously.""" return u32_parse(self.ptr.array_max_tasks) @property def array_task_id(self): - """int: The Task-ID if the Job is an Array-Job.""" return u32_parse(self.ptr.array_task_id) @property def array_tasks_waiting(self): - """str: Array Tasks that are still waiting. - - This is the formatted string of Task-IDs as shown by scontrol. - For example, it can look like this: - - "1-3,5-7,8,9" - - If you want to expand this string including the ranges into a - list, you can use the "pyslurm.expand_range_str" function. - """ task_str = cstr.to_unicode(self.ptr.array_task_str) if not task_str: return None @@ -1269,14 +994,8 @@ cdef class Job: @property def end_time(self): - """int: Time at which this Job has ended. (Unix timestamp)""" return _raw_time(self.ptr.end_time) - @property - def end_time(self): - """str: Time at which this Job has ended. (formatted)""" - return timestamp_to_date(self.ptr.end_time) - # https://github.com/SchedMD/slurm/blob/d525b6872a106d32916b33a8738f12510ec7cf04/src/api/job_info.c#L480 cdef _calc_run_time(self): cdef time_t rtime @@ -1301,33 +1020,24 @@ cdef class Job: return u64_parse(rtime) - @property - def run_time_raw(self): - """int: Amount of seconds the Job has been running. (Unix timestamp)""" - return _raw_time(self._calc_run_time()) - @property def run_time(self): - """str: Amount of seconds the Job has been running. (formatted)""" - return secs_to_timestr(self._calc_run_time()) + return _raw_time(self._calc_run_time()) @property def cores_reserved_for_system(self): - """int: Amount of cores reserved for System use only.""" if self.ptr.core_spec != slurm.NO_VAL16: if not self.ptr.core_spec & slurm.CORE_SPEC_THREAD: return self.ptr.core_spec @property def threads_reserved_for_system(self): - """int: Amount of Threads reserved for System use only.""" if self.ptr.core_spec != slurm.NO_VAL16: if self.ptr.core_spec & slurm.CORE_SPEC_THREAD: return self.ptr.core_spec & (~slurm.CORE_SPEC_THREAD) @property - def mem_per_cpu_raw(self): - """int: Amount of Memory per CPU this Job has. (in Mebibytes)""" + def memory_per_cpu(self): if self.ptr.pn_min_memory != slurm.NO_VAL64: if self.ptr.pn_min_memory & slurm.MEM_PER_CPU: mem = self.ptr.pn_min_memory & (~slurm.MEM_PER_CPU) @@ -1336,13 +1046,7 @@ cdef class Job: return None @property - def mem_per_cpu(self): - """str: Humanized amount of Memory per CPU this Job has.""" - return humanize(self.mem_per_cpu_raw) - - @property - def mem_per_node_raw(self): - """int: Amount of Memory per Node this Job has. (in Mebibytes)""" + def memory_per_node(self): if self.ptr.pn_min_memory != slurm.NO_VAL64: if not self.ptr.pn_min_memory & slurm.MEM_PER_CPU: return u64_parse(self.ptr.pn_min_memory) @@ -1350,13 +1054,7 @@ cdef class Job: return None @property - def mem_per_node(self): - """str: Humanized amount of Memory per Node this Job has.""" - return humanize(self.mem_per_node_raw) - - @property - def mem_per_gpu_raw(self): - """int: Amount of Memory per GPU this Job has. (in Mebibytes)""" + def memory_per_gpu(self): if self.ptr.mem_per_tres and self.ptr.pn_min_memory == slurm.NO_VAL64: # TODO: Make a function that, given a GRES type, safely extracts # its value from the string. @@ -1365,54 +1063,41 @@ cdef class Job: else: return None - @property - def mem_per_gpu(self): - """str: Humanized amount of Memory per GPU this Job has.""" - return humanize(self.mem_per_gpu_raw) - @property def gres_per_node(self): - """dict: GRES (e.g. GPU) this Job is using per Node.""" return cstr.to_gres_dict(self.ptr.tres_per_node) @property - def accounting_gather_profile(self): - """list: Options that control gathering of Accounting information.""" + def profile_types(self): return get_acctg_profile(self.ptr.profile) @property def gres_binding(self): - """str: Binding Enforcement of a GRES resource (e.g. GPU).""" if self.ptr.bitflags & slurm.GRES_ENFORCE_BIND: - return "enforce" + return "enforce-binding" elif self.ptr.bitflags & slurm.GRES_DISABLE_BIND: - return "disable" + return "disable-binding" else: return None @property - def kill_on_invalid_dep(self): - """bool: Whether the Job should be killed on an invalid dependency.""" + def kill_on_invalid_dependency(self): return u64_parse_bool_flag(self.ptr.bitflags, slurm.KILL_INV_DEP) @property - def spread_job(self): - """bool: Whether the Job should be spread accross the nodes.""" + def spreads_over_nodes(self): return u64_parse_bool_flag(self.ptr.bitflags, slurm.SPREAD_JOB) @property - def power(self): - """list: Options for Power Management.""" + def power_options(self): return get_power_type(self.ptr.power_flags) @property def is_cronjob(self): - """bool: Whether this Job is a cronjob.""" return u64_parse_bool_flag(self.ptr.bitflags, slurm.CRON_JOB) @property def cronjob_time(self): - """str: The time specification for the Cronjob.""" return cstr.to_unicode(self.ptr.cronspec) def get_resource_layout_per_node(self): @@ -1425,7 +1110,7 @@ cdef class Job: * memory_raw (int) - Value in Mebibytes Returns: - dict: Resource layout + (dict): Resource layout """ # TODO: Explain the structure of the return value a bit more. cdef: @@ -1501,8 +1186,7 @@ cdef class Job: output[nodename] = { "cpus": cpu_ids, "gres": cstr.to_gres_dict(gres), - "memory": humanize(mem), - "memory_raw": mem, + "memory": mem, } free(host) diff --git a/pyslurm/core/job/sbatch_opts.pyx b/pyslurm/core/job/sbatch_opts.pyx index 35d77eb1..9f0495cd 100644 --- a/pyslurm/core/job/sbatch_opts.pyx +++ b/pyslurm/core/job/sbatch_opts.pyx @@ -17,7 +17,6 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: embedsignature=True # cython: c_string_type=unicode, c_string_encoding=utf8 # cython: language_level=3 @@ -42,29 +41,29 @@ class _SbatchOpt(): # Sorted by occurence in the sbatch manpage - keep in order. SBATCH_OPTIONS = [ _SbatchOpt("A", "account", "account"), - _SbatchOpt(None, "acctg-freq", "accounting_gather_freq"), + _SbatchOpt(None, "acctg-freq", "accounting_gather_frequency"), _SbatchOpt("a", "array", "array"), _SbatchOpt(None, "batch", "batch_constraints"), _SbatchOpt(None, "bb", "burst_buffer"), _SbatchOpt(None, "bbf", "burst_buffer_file"), _SbatchOpt("b", "begin", "begin_time"), - _SbatchOpt("D", "chdir", "work_dir"), + _SbatchOpt("D", "chdir", "working_directory"), _SbatchOpt(None, "cluster-constraint", "cluster_constraints"), _SbatchOpt("M", "clusters", "clusters"), _SbatchOpt(None, "comment","comment"), _SbatchOpt("C", "constraint", "constraints"), _SbatchOpt(None, "container", "container"), - _SbatchOpt(None, "contiguous", "contiguous"), + _SbatchOpt(None, "contiguous", "requires_contiguous_nodes"), _SbatchOpt("S", "core-spec", "cores_reserved_for_system"), _SbatchOpt(None, "cores-per-socket", "cores_per_socket"), - _SbatchOpt(None, "cpu-freq", "cpu_freq"), + _SbatchOpt(None, "cpu-freq", "cpu_frequency"), _SbatchOpt(None, "cpus-per-gpu", "cpus_per_gpu"), _SbatchOpt("c", "cpus-per-task", "cpus_per_task"), _SbatchOpt(None, "deadline", "deadline"), - _SbatchOpt(None, "delay-boot", "delay_boot"), + _SbatchOpt(None, "delay-boot", "delay_boot_time"), _SbatchOpt("d", "dependency", "dependencies"), _SbatchOpt("m", "distribution", "distribution"), - _SbatchOpt("e", "error", "stderr"), + _SbatchOpt("e", "error", "standard_error"), _SbatchOpt("x", "exclude", "excluded_nodes"), _SbatchOpt(None, "exclusive", "resource_sharing", "no"), _SbatchOpt(None, "export", "environment"), @@ -79,21 +78,21 @@ SBATCH_OPTIONS = [ _SbatchOpt(None, "gpus-per-socket", "gpus_per_socket"), _SbatchOpt(None, "gpus-per-socket", "gpus_per_task"), _SbatchOpt(None, "gres", "gres_per_node"), - _SbatchOpt(None, "gres-flags", "gres_enforce_binding"), + _SbatchOpt(None, "gres-flags", "gres_binding"), _SbatchOpt(None, "hint", None), _SbatchOpt("H", "hold", "priority", 0), _SbatchOpt(None, "ignore-pbs", None), - _SbatchOpt("i", "input", "stdin"), + _SbatchOpt("i", "input", "standard_in"), _SbatchOpt("J", "job-name", "name"), _SbatchOpt(None, "kill-on-invalid-dep", "kill_on_invalid_dependency"), _SbatchOpt("L", "licenses", "licenses"), - _SbatchOpt(None, "mail-type", "mail_type"), + _SbatchOpt(None, "mail-type", "mail_types"), _SbatchOpt(None, "mail-user", "mail_user"), _SbatchOpt(None, "mcs-label", "mcs_label"), - _SbatchOpt(None, "mem", "mem_per_node"), + _SbatchOpt(None, "mem", "memory_per_node"), _SbatchOpt(None, "mem-bind", None), - _SbatchOpt(None, "mem-per-cpu", "mem_per_cpu"), - _SbatchOpt(None, "mem-per-gpu", "mem_per_gpu"), + _SbatchOpt(None, "mem-per-cpu", "memory_per_cpu"), + _SbatchOpt(None, "mem-per-gpu", "memory_per_gpu"), _SbatchOpt(None, "mincpus", "min_cpus_per_node"), _SbatchOpt(None, "network", "network"), _SbatchOpt(None, "nice", "nice"), @@ -108,28 +107,28 @@ SBATCH_OPTIONS = [ _SbatchOpt(None, "ntasks-per-node", "ntasks_per_node"), _SbatchOpt(None, "ntasks-per-socket", "ntasks_per_socket"), _SbatchOpt(None, "open-mode", "log_files_open_mode"), - _SbatchOpt("o", "output", "stdout"), + _SbatchOpt("o", "output", "standard_output"), _SbatchOpt("O", "overcommit", "overcommit", True), _SbatchOpt("s", "oversubscribe", "resource_sharing", "yes"), _SbatchOpt("p", "partition", "partition"), - _SbatchOpt(None, "power", "power_type"), + _SbatchOpt(None, "power", "power_options"), _SbatchOpt(None, "prefer", None), _SbatchOpt(None, "priority", "priority"), - _SbatchOpt(None, "profile", "profile"), + _SbatchOpt(None, "profile", "profile_types"), _SbatchOpt(None, "propagate", None), _SbatchOpt("q", "qos", "qos"), - _SbatchOpt(None, "reboot", "reboot_nodes", True), + _SbatchOpt(None, "reboot", "requires_node_reboot", True), _SbatchOpt(None, "requeue", "is_requeueable", True), _SbatchOpt(None, "reservation", "reservations"), _SbatchOpt(None, "signal", "signal"), _SbatchOpt(None, "sockets-per-node", "sockets_per_node"), - _SbatchOpt(None, "spread-job", "spread_job", True), + _SbatchOpt(None, "spread-job", "spreads_over_nodes", True), _SbatchOpt(None, "switches", "switches"), _SbatchOpt(None, "thread-spec", "threads_reserved_for_system"), _SbatchOpt(None, "threads-per-core", "threads_per_core"), _SbatchOpt("t", "time", "time_limit"), _SbatchOpt(None, "time-min", "time_limit_min"), - _SbatchOpt(None, "tmp", "tmp_disk_per_node"), + _SbatchOpt(None, "tmp", "temporary_disk_per_node"), _SbatchOpt(None, "uid", "uid"), _SbatchOpt(None, "use-min-nodes", "use_min_nodes", True), _SbatchOpt(None, "wait-all-nodes", "wait_all_nodes", True), diff --git a/pyslurm/core/job/step.pxd b/pyslurm/core/job/step.pxd index a82cdd4b..9b42368c 100644 --- a/pyslurm/core/job/step.pxd +++ b/pyslurm/core/job/step.pxd @@ -17,7 +17,6 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: embedsignature=True # cython: c_string_type=unicode, c_string_encoding=utf8 # cython: language_level=3 @@ -44,6 +43,16 @@ from pyslurm.slurm cimport ( ) cdef class JobSteps(dict): + """A collection of :obj:`JobStep` objects for a given Job. + + Args: + job (Union[Job, int]): + A Job for which the Steps should be loaded. + + Raises: + RPCError: When getting the Job steps from the slurmctld failed. + MemoryError: If malloc fails to allocate memory. + """ cdef: job_step_info_response_msg_t *info @@ -53,6 +62,69 @@ cdef class JobSteps(dict): cdef class JobStep: + """A Slurm Jobstep + + Args: + job (Union[Job, int]): + The Job this Step belongs to. + step (Union[int, str]): + Step-ID for this JobStep object. + + Raises: + MemoryError: If malloc fails to allocate memory. + + Attributes: + id (Union[str, int]): + The id for this step. + job_id (int): + The id for the Job this step belongs to. + name (str): + Name of the step. + user_id (int): + User ID who owns this step. + user_name (str): + Name of the User who owns this step. + time_limit (int): + Time limit in Minutes for this step. + network (str): + Network specification for the step. + cpu_frequency_min (Union[str, int]): + Minimum CPU-Frequency requested. + cpu_frequency_max (Union[str, int]): + Maximum CPU-Frequency requested. + cpu_frequency_governor (Union[str, int]): + CPU-Frequency Governor requested. + reserved_ports (str): + Reserved ports for the step. + cluster (str): + Name of the cluster this step runs on. + srun_host (str): + Name of the host srun was executed on. + srun_process_id (int): + Process ID of the srun command. + container (str): + Path to the container OCI. + allocated_nodes (str): + Nodes the Job is using. + start_time (int): + Time this step started, as unix timestamp. + run_time (int): + Seconds this step has been running for. + partition (str): + Name of the partition this step runs in. + state (str): + State the step is in. + allocated_cpus (int): + Number of CPUs this step uses in total. + ntasks (int): + Number of tasks this step uses. + distribution (dict): + Task distribution specification for the step. + command (str): + Command that was specified with srun. + slurm_protocol_version (int): + Slurm protocol version in use. + """ cdef: job_step_info_t *ptr diff --git a/pyslurm/core/job/step.pyx b/pyslurm/core/job/step.pyx index 7cfa2e35..8710abd2 100644 --- a/pyslurm/core/job/step.pyx +++ b/pyslurm/core/job/step.pyx @@ -17,7 +17,6 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: embedsignature=True # cython: c_string_type=unicode, c_string_encoding=utf8 # cython: language_level=3 @@ -47,7 +46,7 @@ from pyslurm.core.common.ctime import ( cdef class JobSteps(dict): - """A collection of :obj:`JobStep` objects for a given Job.""" + def __dealloc__(self): slurm_free_job_step_info_response_msg(self.info) @@ -55,16 +54,6 @@ cdef class JobSteps(dict): self.info = NULL def __init__(self, job): - """Initialize a JobSteps collection - - Args: - job (Union[Job, int]): - A Job for which the Steps should be loaded. - - Raises: - RPCError: When getting the Job steps from the slurmctld failed. - MemoryError: If malloc fails to allocate memory. - """ cdef Job _job # Reload the Job in order to have updated information about its state. @@ -122,31 +111,20 @@ cdef class JobSteps(dict): """Loads and returns all the steps in the system. Returns: - dict: A dict where every JobID (key) is mapped with an instance of - its JobSteps (value). + (dict): A dict where every JobID (key) is mapped with an instance + of its JobSteps (value). """ cdef JobSteps steps = JobSteps.__new__(JobSteps) return steps._load(slurm.NO_VAL, slurm.SHOW_ALL) cdef class JobStep: - """A Slurm Jobstep""" + def __cinit__(self): self.ptr = NULL self.umsg = NULL def __init__(self, job=0, step=0, **kwargs): - """Initialize the JobStep instance - - Args: - job (Union[Job, int]): - The Job this Step belongs to. - step (Union[int, str]): - Step-ID for this JobStep object. - - Raises: - MemoryError: If malloc fails to allocate memory. - """ self._alloc_impl() self.job_id = job.id if isinstance(job, Job) else job self.id = step @@ -208,8 +186,8 @@ cdef class JobStep: MemoryError: If malloc failed to allocate memory. Returns: - JobStep: This function returns the current JobStep-instance object - itself. + (JobStep): This function returns the current JobStep-instance + object itself. Examples: >>> from pyslurm import JobStep @@ -364,13 +342,12 @@ cdef class JobStep: """JobStep information formatted as a dictionary. Returns: - dict: JobStep information as dict + (dict): JobStep information as dict """ return instance_to_dict(self) @property def id(self): - """Union[str, int]: The id for this step.""" return self._xlate_from_id(self.ptr.step_id.step_id) @id.setter @@ -379,7 +356,6 @@ cdef class JobStep: @property def job_id(self): - """int: The id for the Job this step belongs to.""" return self.ptr.step_id.job_id @job_id.setter @@ -388,28 +364,19 @@ cdef class JobStep: @property def name(self): - """str: Name of the step.""" return cstr.to_unicode(self.ptr.name) @property - def uid(self): - """int: User ID who owns this step.""" + def user_id(self): return u32_parse(self.ptr.user_id, zero_is_noval=False) @property - def user(self): - """str: Name of the User who owns this step.""" + def user_name(self): return uid_to_name(self.ptr.user_id) - @property - def time_limit_raw(self): - """int: Time limit in Minutes for this step.""" - return _raw_time(self.ptr.time_limit) - @property def time_limit(self): - """str: Time limit for this step. (formatted)""" - return mins_to_timestr(self.ptr.time_limit) + return _raw_time(self.ptr.time_limit) @time_limit.setter def time_limit(self, val): @@ -417,114 +384,76 @@ cdef class JobStep: @property def network(self): - """str: Network specification for the step.""" return cstr.to_unicode(self.ptr.network) @property - def cpu_freq_min(self): - """Union[str, int]: Minimum CPU-Frequency requested.""" + def cpu_frequency_min(self): return cpufreq_to_str(self.ptr.cpu_freq_min) @property - def cpu_freq_max(self): - """Union[str, int]: Maximum CPU-Frequency requested.""" + def cpu_frequency_max(self): return cpufreq_to_str(self.ptr.cpu_freq_max) @property - def cpu_freq_governor(self): - """Union[str, int]: CPU-Frequency Governor requested.""" + def cpu_frequency_governor(self): return cpufreq_to_str(self.ptr.cpu_freq_gov) @property def reserved_ports(self): - """str: Reserved ports for the step.""" return cstr.to_unicode(self.ptr.resv_ports) @property def cluster(self): - """str: Name of the cluster this step runs on.""" return cstr.to_unicode(self.ptr.cluster) @property def srun_host(self): - """str: Name of the host srun was executed on.""" return cstr.to_unicode(self.ptr.srun_host) @property - def srun_pid(self): - """int: PID of the srun command.""" + def srun_process_id(self): return u32_parse(self.ptr.srun_pid) @property def container(self): - """str: Path to the container OCI.""" return cstr.to_unicode(self.ptr.container) @property - def alloc_nodes(self): - """str: Nodes the Job is using. - - This is the formatted string of Nodes as shown by scontrol. - For example, it can look like this: - - "node001,node[005-010]" - - If you want to expand this string into a list of nodenames you can - use the pyslurm.nodelist_from_range_str function. - """ + def allocated_nodes(self): return cstr.to_list(self.ptr.nodes) - @property - def start_time_raw(self): - """int: Time this step started. (Unix timestamp)""" - return _raw_time(self.ptr.start_time) - @property def start_time(self): - """str: Time this step started. (formatted)""" - return timestamp_to_date(self.ptr.start_time) - - @property - def run_time_raw(self): - """int: Seconds this step has been running for.""" - return _raw_time(self.ptr.run_time) + return _raw_time(self.ptr.start_time) @property def run_time(self): - """str: Seconds this step has been running for. (formatted)""" - return secs_to_timestr(self.ptr.run_time) + return _raw_time(self.ptr.run_time) @property def partition(self): - """str: Name of the partition this step runs in.""" return cstr.to_unicode(self.ptr.partition) @property def state(self): - """str: State the step is in.""" return cstr.to_unicode(slurm_job_state_string(self.ptr.state)) @property def alloc_cpus(self): - """int: Number of CPUs this step uses in total.""" return u32_parse(self.ptr.num_cpus) @property def ntasks(self): - """int: Number of tasks this step uses.""" return u32_parse(self.ptr.num_tasks) @property def distribution(self): - """dict: Task distribution specification for the step.""" return get_task_dist(self.ptr.task_dist) @property def command(self): - """str: Command that was specified with srun.""" return cstr.to_unicode(self.ptr.submit_line) @property - def protocol_version(self): - """int: Slurm protocol version in use.""" + def slurm_protocol_version(self): return u32_parse(self.ptr.start_protocol_ver) diff --git a/pyslurm/core/job/submission.pxd b/pyslurm/core/job/submission.pxd index 25505723..f10a24d4 100644 --- a/pyslurm/core/job/submission.pxd +++ b/pyslurm/core/job/submission.pxd @@ -39,738 +39,579 @@ from pyslurm.slurm cimport ( cdef class JobSubmitDescription: - cdef: - slurm.job_desc_msg_t *ptr - is_update + """Description of a Slurm Job. + + Attributes: + name (str): + Name of the Job, same as -J/--job-name from sbatch. + account (str): + Account of the job, same as -A/--account from sbatch. + uid (Union[str, int]): + Run the job as a different User, same as --uid from sbatch. + This requires root privileges. + You can both specify the name or numeric uid of the User. + gid (Union[str, int]): + Run the job as a different Group, same as --gid from sbatch. + This requires root privileges. + You can both specify the name or numeric gid of the User. + priority (int): + Specific priority the Job will receive. + Same as --priority from sbatch. + You can achieve the behaviour of sbatch's --hold option by + specifying a priority of 0. + site_factor (int): + Site Factor of the Job. Only used when updating an existing Job. + wckey (str): + WCKey to use with the Job, same as --wckey from sbatch. + array (str): + Job Array specification, same as -a/--array from sbatch. + batch_constraints (str): + Batch Features of a Job, same as --batch from sbatch. + begin_time (str): + Defer allocation until the specified time, same as --begin from + sbatch. + clusters (Union[list, str]): + Clusters the job may run on, same as -M/--clusters from sbatch. + cluster_constraints (str): + Comma-separated str with cluster constraints for the job. + This is the same as --cluster-constraint from sbatch. + comment (str): + Arbitrary job comment, same as --comment from sbatch. + admin_comment (str): + Arbitrary job admin comment. + Only used when updating an existing job. + requires_contiguous_nodes (bool): + Whether allocated Nodes are required to form a contiguous set. + Same as --contiguous from sbatch. + cores_reserved_for_system (int): + Count of cores reserved for system not usable by the Job. + Same as -S/--core-spec from sbatch. + Mutually exclusive with `threads_reserved_for_system`. + threads_reserved_for_system (int): + Count of threads reserved for system not usable by the Job. + Same as --thread-spec from sbatch. + Mutually exclusive with `cores_reserved_for_system`. + working_directory (str): + Work directory for the Job. Default is current work-dir from where + the job was submitted. + Same as -D/--chdir from sbatch. + cpu_frequency (Union[dict, str]): + CPU Frequency for the Job, same as --cpu-freq from sbatch. + + Examples: + Specifying it as a dict: + + cpu_frequency = { + "min": "Low", + "max": "High", + "governor": "UserSpace" + } + + or like in sbatch with a string. For more info on that, check + out the sbatch documentation for --cpu-freq. + + If you only want to set a Governor without any min or max, you + can simply specify it as a standalone string: + + cpu_frequency = "Performance" + or + cpu_frequency = {"governor": "Performance"} + + If you want to set a specific, fixed frequency, you can do: + + cpu_frequency = + or either + cpu_frequency = {"max": } or cpu_freq = {"min": } + nodes (Union[dict, str, int]): + Amount of nodes needed for the job. + This is the same as -N/--nodes from sbatch. + + Examples: + Providing min/max nodes as a dict: + + nodes = { + "min": 3, + "max": 6 + } + + When no range is needed, you can also simply specify it as + int: + + nodes = 3 + + Other than that, a range can also be specified in a str like + with sbatch: + + nodes = "1-5" + deadline (str): + Deadline specification for the Job, same as --deadline from + sbatch. + delay_boot_time (Union[str, int]): + Delay boot specification for the Job, same as --delay-boot from + sbatch. + dependencies (Union[dict, str]): + Dependencies for the Job, same as -d/--dependency from sbatch. + excluded_nodes (Union[list, str]): + Exclude specific nodes for this Job. + This is the same as -x/--exclude from sbatch. + required_nodes (Union[list, str]): + Specific list of nodes required for the Job. + This is the same as -w/--nodelist from sbatch. + constraints (str): + Required node features for the Job. + This is the same as -C/--constraint from sbatch. + kill_on_node_fail (bool): + Should the job get killed if one of the Nodes fails? + This is the same as -k/--no-kill from sbatch. + licenses (Union[list, str]): + A list of licenses for the Job. + This is the same as -L/--licenses from sbatch. + mail_user (Union[list, str]): + List of email addresses for notifications. + This is the same as --mail-user from sbatch. + mail_types (Union[list, str]): + List of mail flags. + This is the same as --mail-type from sbatch. + mcs_label (str): + An MCS Label for the Job. + This is the same as --mcs-label from sbatch. + memory_per_cpu (Union[str, int]): + Memory required per allocated CPU. + + The default unit is in Mebibytes. You are also able to specify + unit suffixes like K|M|G|T. + This is the same as --mem-per-cpu from sbatch. This is mutually + exclusive with memory_per_node and memory_per_gpu. + + Examples: + # 1 MiB + memory_per_cpu = 1024 + + # 3 GiB + memory_per_cpu = "3G" + memory_per_node (Union[str, int]): + Memory required per whole node. + + The default unit is in Mebibytes. You are also able to specify + unit suffixes like K|M|G|T. + This is the same as --mem from sbatch. This is mutually exclusive + with memory_per_cpu and memory_per_gpu. + + Examples: + # 1 MiB + memory_per_node = 1024 + + # 3 GiB + memory_per_node = "3G" + memory_per_gpu (Union[str, int]): + Memory required per GPU. + + The default unit is in Mebibytes. You are also able to specify + unit suffixes like K|M|G|T. + This is the same as --mem-per-gpu from sbatch. This is mutually + exclusive with memory_per_node and memory_per_cpu. + + Examples: + # 1 MiB + memory_per_gpu = 1024 + + # 3 GiB + memory_per_gpu = "3G" + network (str): + Network types for the Job. + This is the same as --network from sbatch. + nice (int): + Adjusted scheduling priority for the Job. + This is the same as --nice from sbatch. + log_files_open_mode (str): + Mode in which standard_output and standard_error log files should be opened. + + Valid options are: + * append + * truncate + + This is the same as --open-mode from sbatch. + overcommit (bool): + If the resources should be overcommitted. + This is the same as -O/--overcommit from sbatch. + partitions (Union[list, str]): + A list of partitions the Job may use. + This is the same as -p/--partition from sbatch. + power_options (list): + A list of power management plugin options for the Job. + This is the same as --power from sbatch. + accounting_gather_frequency (Union[dict, str]): + Interval for accounting info to be gathered. + This is the same as --acctg-freq from sbatch. + + Examples: + Specifying it as a dict: + + accounting_gather_frequency = { + energy=60, + network=20, + } + + or as a single string: + + accounting_gather_frequency = "energy=60,network=20" + qos (str): + Quality of Service for the Job. + This is the same as -q/--qos from sbatch. + requires_node_reboot (bool): + Force the allocated nodes to reboot before the job starts. + This is the same --reboot from sbatch. + is_requeueable (bool): + If the Job is eligible for requeuing. + This is the same as --requeue from sbatch. + reservations (Union[list, str]): + A list of possible reservations the Job can use. + This is the same as --reservation from sbatch. + script (str): + Absolute Path or content of the batch script. + + You can specify either a path to a script which will be loaded, or + you can pass the script as a string. + If the script is passed as a string, providing arguments to it + (see "script_args") is not supported. + script_args (str): + Arguments passed to the batch script. + You can only set arguments if a file path was specified for + "script". + environment (Union[dict, str]): + Environment variables to be set for the Job. + This is the same as --export from sbatch. + resource_sharing (str): + Controls the resource sharing with other Jobs. + + This property combines functionality of --oversubscribe and + --exclusive from sbatch. - cdef public: - parse_sbatch_options + Allowed values are are: - name - """str: Name of the Job. + * "oversubscribe" or "yes": + The Job allows resources to be shared with other running Jobs. + + * "user" + Only sharing resources with other Jobs that have the "user" + option set is allowed - This is the same as -J/--job-name from sbatch. - """ + * "mcs" + Only sharing resources with other Jobs that have the "mcs" + option set is allowed. - account - """str: Account this Job should run under. + * "no" or "exclusive" + No sharing of resources is allowed. (--exclusive from sbatch) + distribution (Union[dict, str]): + TODO + time_limit (str): + The time limit for the job. + This is the same as -t/--time from sbatch. + time_limit_min (str): + A minimum time limit for the Job. + This is the same as --time-min from sbatch. + container (str): + Path to an OCI container bundle. + This is the same as --container from sbatch. + cpus_per_task (int): + The amount of cpus required for each task. - This is the same as -A/--account from sbatch. - """ + This is the same as -c/--cpus-per-task from sbatch. + This is mutually exclusive with cpus_per_gpu. + cpus_per_gpu (int): + The amount of cpus required for each allocated GPU. - uid - """Union[str, int]: Under which user the job will be executed. + This is the same as --cpus-per-gpu from sbatch. + This is mutually exclusive with cpus_per_task. + sockets_per_node (int): + Restrict Job to nodes with atleast this many sockets. + This is the same as --sockets-per-node from sbatch. + cores_per_socket (int): + Restrict Job to nodes with atleast this many cores per socket + This is the same as --cores-per-socket from sbatch. + threads_per_core (int): + Restrict Job to nodes with atleast this many threads per socket + This is the same as --threads-per-core from sbatch. + gpus (Union[dict, str, int]): + GPUs for the Job to be allocated in total. - For setting this value, you can both specify the name or numeric - uid of the User. + This is the same as -G/--gpus from sbatch. + Specifying the type of the GPU is optional. + + Examples: + Specifying the GPU counts as a dict: + + gpus = { + "tesla": 1, + "volta": 5, + } + + Or, for example, in string format: + + gpus = "tesla:1,volta:5" + + Or, if you don't care about the type of the GPU: + + gpus = 6 + gpus_per_socket (Union[dict, str, int]): + GPUs for the Job to be allocated per socket. + + This is the same as --gpus-per-socket from sbatch. + + Specifying the type of the GPU is optional. Note that setting + gpus_per_socket requires to also specify sockets_per_node. + + Examples: + Specifying it as a dict: + + gpus_per_socket = { + "tesla": 1, + "volta": 5, + } + + Or, for example, in string format: - This is the same as --uid from sbatch. - """ + gpus_per_socket = "tesla:1,volta:5" + + Or, if you don't care about the type of the GPU: - gid - """Union[str, int]: Under which group the job will be executed. - - For setting this value, you can both specify the name or numeric - gid of the Group. of the User. + gpus_per_socket = 6 + gpus_per_task (Union[dict, str, int]): + GPUs for the Job to be allocated per task. - This is the same as --gid from sbatch. - """ + This is the same as --gpus-per-task from sbatch. + + Specifying the type of the GPU is optional. Note that setting + "gpus_per_task" requires to also specify either one of "ntasks" or + "gpus". + + Examples: + Specifying it as a dict: + + gpus_per_task = { + "tesla": 1, + "volta": 5, + } + + Or, for example, in string format: + + gpus_per_task = "tesla:1,volta:5" + + Or, if you don't care about the type of the GPU: + + gpus_per_task = 6 + gres_per_node (Union[dict, str]): + Generic resources to be allocated per node. + + This is the same as --gres from sbatch. You should also use this + option if you want to specify GPUs per node (--gpus-per-node). + Specifying the type (by seperating GRES name and type with a + semicolon) is optional. + + Examples: + Specifying it as a dict: + + gres_per_node = { + "gpu:tesla": 1, + "gpu:volta": 5, + } + + Or, for example, in string format: + + gres_per_node = "gpu:tesla:1,gpu:volta:5" + + GPU Gres without a specific type: + + gres_per_node = "gpu:6" + gpu_binding (str): + Specify GPU binding for the Job. + This is the same as --gpu-bind from sbatch. + ntasks (int): + Maximum amount of tasks for the Job. + This is the same as -n/--ntasks from sbatch. + ntasks_per_node (int): + Amount of tasks to be invoked on each node. + This is the same as --ntasks-per-node from sbatch. + ntasks_per_socket (int): + Maximum amount of tasks to be invoked on each socket. + This is the same as --ntasks-per-socket from sbatch. + ntasks_per_core (int): + Maximum amount of tasks to be invoked on each core. + This is the same as --ntasks-per-core from sbatch. + ntasks_per_gpu (int): + Amount of tasks to be invoked per GPU. + This is the same as --ntasks-per-socket from sbatch. + switches (Union[dict, str, int]): + Maximum amount of leaf switches and wait time desired. + + This can also optionally include a maximum waiting time for these + switches. + This is the same as --switches from sbatch. + + Examples: + Specifying it as a dict: + + switches = { "count": 5, "max_wait_time": "00:10:00" } + + Or as a single string (sbatch-style): + + switches = "5@00:10:00" + signal (Union[dict, str]): + Warn signal to be sent to the Job. + + This is the same as --signal from sbatch. + The signal can both be specified with its name, e.g. "SIGKILL", or + as a number, e.g. 9 + + Examples: + Specifying it as a dict: + + signal = { + "signal": "SIGKILL", + "time": 120 + } + + The above will send a "SIGKILL" signal 120 seconds before the + Jobs' time limit is reached. + + Or, specifying it as a string (sbatch-style): + + signal = "SIGKILL@120" + standard_in (str): + Path to a File acting as standard_in for the batch-script. + This is the same as -i/--input from sbatch. + standard_in (str): + Path to a File acting as standard_in for the batch-script. + This is the same as -i/--input from sbatch. + standard_output (str): + Path to a File to write the Jobs standard_output. + This is the same as -o/--output from sbatch. + kill_on_invalid_dependency (bool): + Kill the job if it has an invalid dependency. + This is the same as --kill-on-invalid-dep from sbatch. + spreads_over_nodes (bool): + Spread the Job over as many nodes as possible. + This is the same as --spread-job from sbatch. + use_min_nodes (bool): + Prefer the minimum amount of nodes specified. + This is the same as --use-min-nodes from sbatch. + gres_binding (str): + Generic resource task binding options. + This is the --gres-flags option from sbatch. + + Possible values are: + * "enforce-binding" + * "disable-binding" + temporary_disk_per_node (Union[str, int]): + Amount of temporary disk space needed per node. + + This is the same as --tmp from sbatch. You can specify units like + K|M|G|T (multiples of 1024). + If no unit is specified, the value will be assumed as Mebibytes. + + Examples: + # 2048 MiB + tmp_disk_per_node = "2G" + + # 1024 MiB + tmp_disk_per_node = 1024 + get_user_environment (Union[str, bool, int]): + TODO + min_cpus_per_node (str): + Set the minimum amount of CPUs required per Node. + This is the same as --mincpus from sbatch. + wait_all_nodes (bool): + Controls when the execution of the command begins. + + A value of True means that the Job should begin execution only + after all nodes in the allocation are ready. Setting it to False, + the default, means that it is not waited for the nodes to be + ready. (i.e booted) + """ + cdef: + slurm.job_desc_msg_t *ptr + is_update + cdef public: + name + account + uid + gid priority - """int: A specific Priority the Job will receive. - - You can achieve the behaviour of sbatch's --hold option by - specifying a priority of 0. - - This is the same as --priority from sbatch. - """ - site_factor - """int: Site Factor for the Job. - - This is only used for updating an already existing Job. It will - not be honored in the job submission. - """ - wckey - """str: WCKey to be used with the Job. - - This is the same as --wckey from sbatch. - """ - array - """str: An Array specification for the Job - - This is the same as -a/--array from sbatch. - """ - batch_constraints - """str: Batch Features for a Job - - This is the same as --batch from sbatch. - """ - begin_time - """str: Defer allocation until the specified time. - - This is the same as --begin from sbatch. - """ - clusters - """Union[list, str]: Clusters the job may run on. - - This is the same as -M/--clusters from sbatch. - """ - cluster_constraints - """str: Comma-separated str with cluster constraints for the job. - - This is the same as --cluster-constraint from sbatch. - """ - comment - """str: An arbitrary comment for the job. - - This is the same as --comment from sbatch. - """ - admin_comment - """str: An arbitrary admin-comment for the job - - This is only used when updating an already existing Job. Setting - it for new Submissions does nothing. - """ - - contiguous - """bool: Whether allocated Nodes should form a contiguous set - - This is the same as --contiguous from sbatch. - """ - + requires_contiguous_nodes cores_reserved_for_system - """int: Count of cores reserved for system not usable by the Job. - - This is the same as -S/--core-spec from sbatch. - This is mutually exclusive with `threads_reserved_for_system`. - """ - threads_reserved_for_system - """int: Count of threads reserved for system not usable by the Job. - - This is the same as --thread-spec from sbatch. - This is mutually exclusive with `cores_reserved_for_system`. - """ - - work_dir - """str: Work directory for the job. Default is current work-dir. - - This is the same as -D/--chdir from sbatch. - """ - - cpu_freq - """Union[dict, str]: Specify the CPU Frequency for the Job. - - This is the same as --cpu-freq from sbatch. - - Examples: - Specifying it as a dict: - - cpu_freq = { - "min": "Low", - "max": "High", - "governor": "UserSpace" - } - - or like in sbatch with a string. For more info on that, check out - the sbatch documentation for --cpu-freq. - - If you only want to set a Governor without any min or max, you can - simply specify it as a standalone string: - - cpu_freq = "Performance" - or - cpu_freq = {"governor": "Performance"} - - If you want to set a specific, fixed frequency, you can do: - - cpu_freq = - or either - cpu_freq = {"max": } or cpu_freq = {"min": } - """ - + working_directory + cpu_frequency nodes - """Union[dict, str, int]: Amount of nodes needed for the job. - - This is the same as -N/--nodes from sbatch. - - Examples: - Providing min/max nodes as a dict: - - nodes = { - "min": 3, - "max": 6 - } - - When no range is needed, you can also simply specify it as int: - - nodes = 3 - - Other than that, a range can also be specified in a str like with - sbatch: - - nodes = "1-5" - """ - deadline - """str: Deadline specification for the Job. - - This is the same as --deadline from sbatch. - """ - - delay_boot - """Union[str, int]: Delay boot specification for the Job. - - This is the same as --delay-boot from sbatch. - """ - + delay_boot_time dependencies - """Union[dict, str]: Dependencies for the Job. - - This is the same as -d/--dependency from sbatch. - """ - excluded_nodes - """Union[list, str]: Exclude specific nodes for this Job. - - This is the same as -x/--exclude from sbatch. - """ - required_nodes - """Union[list, str]: Specific list of nodes required for the Job. - - This is the same as -w/--nodelist from sbatch. - """ - constraints - """str: Required node features for the Job. - - This is the same as -C/--constraint from sbatch. - """ - kill_on_node_fail - """bool: Should the job get killed if one of the Nodes fails? - - This is the same as -k/--no-kill from sbatch. - """ - licenses - """Union[list, str]: A list of licenses for the Job. - - This is the same as -L/--licenses from sbatch. - """ - mail_user - """Union[list, str]: List of email addresses for notifications. - - This is the same as --mail-user from sbatch. - """ - - mail_type - """Union[list, str]: List of mail flags. - - This is the same as --mail-type from sbatch. - """ - + mail_types mcs_label - """str: An MCS Label for the Job. - - This is the same as --mcs-label from sbatch. - """ - - mem_per_cpu - """Union[str, int]: Memory required per allocated CPU. - - The default unit is in Mebibytes. You are also able to specify unit - suffixes like K|M|G|T. - This is the same as --mem-per-cpu from sbatch. - This is mutually exclusive with mem_per_node and mem_per_gpu. - - Examples: - # 1 MiB - mem_per_cpu = 1024 - - # 3 GiB - mem_per_cpu = "3G" - """ - - mem_per_node - """Union[str, int]: Memory required per whole node. - - The default unit is in Mebibytes. You are also able to specify unit - suffixes like K|M|G|T. - This is the same as --mem from sbatch. - This is mutually exclusive with mem_per_cpu and mem_per_gpu. - - Examples: - # 1 MiB - mem_per_node = 1024 - - # 3 GiB - mem_per_node = "3G" - """ - - mem_per_gpu - """Union[str, int]: Memory required per GPU. - - The default unit is in Mebibytes. You are also able to specify unit - suffixes like K|M|G|T. - This is the same as --mem-per-gpu from sbatch. - This is mutually exclusive with mem_per_node and mem_per_cpu. - - Examples: - # 1 MiB - mem_per_gpu = 1024 - - # 3 GiB - mem_per_gpu = "3G" - """ - + memory_per_cpu + memory_per_node + memory_per_gpu network - """str: Network types for the Job. - - This is the same as --network from sbatch. - """ - nice - """int: Adjusted scheduling priority for the Job. - - This is the same as --nice from sbatch. - """ - log_files_open_mode - """str: Mode in which stdout and stderr log files should be opened. - - Valid options are: - * append - * truncate - - This is the same as --open-mode from sbatch. - """ - overcommit - """bool: If the resources should be overcommitted. - - This is the same as -O/--overcommit from sbatch. - """ - partitions - """Union[list, str]: A list of partitions the Job may use. - - This is the same as -p/--partition from sbatch. - """ - power_options - """list: A list of power management plugin options for the Job. - - This is the same as --power from sbatch. - """ - - profile - """list: List of types for the acct_gather_profile plugin. - - This is the same as --profile from sbatch. - """ - - accounting_gather_freq - """Union[dict, str]: Interval for accounting info to be gathered. - - This is the same as --acctg-freq from sbatch. - - Examples: - Specifying it as a dict: - - accounting_gather_freq = { - energy=60, - network=20, - } - - or as a single string: - - accounting_gather_freq = "energy=60,network=20" - """ - + profile_types + accounting_gather_frequency qos - """str: Quality of Service for the Job. - - This is the same as -q/--qos from sbatch. - """ - - reboot_nodes - """bool: Force the allocated nodes to reboot before the job starts. - - This is the same --reboot from sbatch. - """ - + requires_node_reboot is_requeueable - """bool: If the Job is eligible for requeuing. - - This is the same as --requeue from sbatch. - """ - reservations - """Union[list, str]: A list of possible reservations the Job can use. - - This is the same as --reservation from sbatch. - """ - script - """str: Absolute Path or content of the batch script. - - You can specify either a path to a script which will be loaded, or - you can pass the script as a string. - If the script is passed as a string, providing arguments to it - (see "script_args") is not supported. - """ - script_args - """str: Arguments passed to the batch script. - - You can only set arguments if a file path was specified for "script". - """ - environment - """Union[dict, str]: Environment variables to be set for the Job. - - This is the same as --export from sbatch. - """ - resource_sharing - """str: Controls the resource sharing with other Jobs. - - This property combines functionality of --oversubscribe and - --exclusive from sbatch. - - Allowed values are are: - - * "oversubscribe" or "yes": - The Job allows resources to be shared with other running Jobs. - - * "user" - Only sharing resources with other Jobs that have the "user" option - set is allowed - - * "mcs" - Only sharing resources with other Jobs that have the "mcs" option - set is allowed. - - * "no" or "exclusive" - No sharing of resources is allowed. (--exclusive from sbatch) - """ - distribution - """TODO""" - time_limit - """str: The time limit for the job. - - This is the same as -t/--time from sbatch. - """ - time_limit_min - """str: A minimum time limit for the Job. - - This is the same as --time-min from sbatch. - """ - container - """str: Path to an OCI container bundle. - - This is the same as --container from sbatch. - """ - cpus_per_task - """int: The amount of cpus required for each task. - - This is the same as -c/--cpus-per-task from sbatch. - This is mutually exclusive with cpus_per_gpu. - """ - cpus_per_gpu - """int: The amount of cpus required for each allocated GPU. - - This is the same as --cpus-per-gpu from sbatch. - This is mutually exclusive with cpus_per_task. - """ - sockets_per_node - """int: Restrict Job to nodes with atleast this many sockets. - - This is the same as --sockets-per-node from sbatch. - """ - cores_per_socket - """int: Restrict Job to nodes with atleast this many cores per socket - - This is the same as --cores-per-socket from sbatch. - """ - threads_per_core - """int: Restrict Job to nodes with atleast this many threads per socket - - This is the same as --threads-per-core from sbatch. - """ - gpus - """Union[dict, str, int]: GPUs for the Job to be allocated in total. - - This is the same as -G/--gpus from sbatch. - Specifying the type of the GPU is optional. - - Examples: - Specifying the GPU counts as a dict: - - gpus = { - "tesla": 1, - "volta": 5, - } - - Or, for example, in string format: - - gpus = "tesla:1,volta:5" - - Or, if you don't care about the type of the GPU: - - gpus = 6 - """ - gpus_per_socket - """Union[dict, str, int]: GPUs for the Job to be allocated per socket. - - This is the same as --gpus-per-socket from sbatch. - - Specifying the type of the GPU is optional. Note that setting - gpus_per_socket requires to also specify sockets_per_node. - - Examples: - Specifying it as a dict: - - gpus_per_socket = { - "tesla": 1, - "volta": 5, - } - - Or, for example, in string format: - - gpus_per_socket = "tesla:1,volta:5" - - Or, if you don't care about the type of the GPU: - - gpus_per_socket = 6 - """ - gpus_per_task - """Union[dict, str, int]: GPUs for the Job to be allocated per task. - - This is the same as --gpus-per-task from sbatch. - - Specifying the type of the GPU is optional. Note that setting - "gpus_per_task" requires to also specify either one of "ntasks" or - "gpus". - - Examples: - Specifying it as a dict: - - gpus_per_task = { - "tesla": 1, - "volta": 5, - } - - Or, for example, in string format: - - gpus_per_task = "tesla:1,volta:5" - - Or, if you don't care about the type of the GPU: - - gpus_per_task = 6 - """ - gres_per_node - """Union[dict, str]: Generic resources to be allocated per node. - - This is the same as --gres from sbatch. You should also use this - option if you want to specify GPUs per node (--gpus-per-node). - Specifying the type (by seperating GRES name and type with a - semicolon) is optional. - - Examples: - Specifying it as a dict: - - gres_per_node = { - "gpu:tesla": 1, - "gpu:volta": 5, - } - - Or, for example, in string format: - - gres_per_node = "gpu:tesla:1,gpu:volta:5" - - GPU Gres without a specific type: - - gres_per_node = "gpu:6" - """ - gpu_binding - """str: Specify GPU binding for the Job. - - This is the same as --gpu-bind from sbatch. - """ - ntasks - """int: Maximum amount of tasks for the Job. - - This is the same as -n/--ntasks from sbatch. - """ - ntasks_per_node - """int: Amount of tasks to be invoked on each node. - - This is the same as --ntasks-per-node from sbatch. - """ - ntasks_per_socket - """int: Maximum amount of tasks to be invoked on each socket. - - This is the same as --ntasks-per-socket from sbatch. - """ - ntasks_per_core - """int: Maximum amount of tasks to be invoked on each core. - - This is the same as --ntasks-per-core from sbatch. - """ - ntasks_per_gpu - """int: Amount of tasks to be invoked per GPU. - - This is the same as --ntasks-per-socket from sbatch. - """ - switches - """Union[dict, str, int]: Maximum amount of leaf switches desired. - - This can also optionally include a maximum waiting time for these - switches. - This is the same as --switches from sbatch. - - Examples: - Specifying it as a dict: - - switches = { "count": 5, "max_wait_time": "00:10:00" } - - Or as a single string (sbatch-style): - - switches = "5@00:10:00" - """ - signal - """Union[dict, str]: Warn signal to be sent to the Job. - - This is the same as --signal from sbatch. - The signal can both be specified with its name, e.g. "SIGKILL", or - as a number, e.g. 9 - - Examples: - Specifying it as a dict: - - signal = { - "signal": "SIGKILL", - "time": 120 - } - - The above will send a "SIGKILL" signal 120 seconds before the - Jobs' time limit is reached. - - Or, specifying it as a string (sbatch-style): - - signal = "SIGKILL@120" - """ - - stdin - """str: Path to a File acting as stdin for the batch-script. - - This is the same as -i/--input from sbatch. - """ - - stdout - """str: Path to a File to write the Jobs stdout. - - This is the same as -o/--output from sbatch. - """ - - stderr - """str: Path to a File to write the Jobs stderr. - - This is the same as -e/--error from sbatch. - """ - + standard_in + standard_output + standard_error kill_on_invalid_dependency - """bool: Kill the job if it has an invalid dependency. - - This is the same as --kill-on-invalid-dep from sbatch. - """ - - spread_job - """bool: Spread the Job over as many nodes as possible. - - This is the same as --spread-job from sbatch. - """ - + spreads_over_nodes use_min_nodes - """bool: Prefer the minimum amount of nodes specified. - - This is the same as --use-min-nodes from sbatch. - """ - - gres_flags - """str: Generic resource task binding options. - - This is the --gres-flags option from sbatch. - - Possible values are: - * "enforce-binding" - * "disable-binding" - """ - - tmp_disk_per_node - """Union[str, int]: Amount of temporary disk space needed per node. - - This is the same as --tmp from sbatch. You can specify units like - K|M|G|T (multiples of 1024). - If no unit is specified, the value will be assumed as Mebibytes. - - Examples: - # 2048 MiB - tmp_disk_per_node = "2G" - - # 1024 MiB - tmp_disk_per_node = 1024 - """ - + gres_binding + temporary_disk_per_node get_user_environment - """TODO""" - min_cpus_per_node - """str: Set the minimum amount of CPUs required per Node. - - This is the same as --mincpus from sbatch. - """ - wait_all_nodes - """bool: Controls when the execution of the command begins. - - A value of True means that the Job should begin execution only after - all nodes in the allocation are ready. Setting it to False, the - default, means that it is not waited for the nodes to be ready. (i.e - booted) - """ - diff --git a/pyslurm/core/job/submission.pyx b/pyslurm/core/job/submission.pyx index 47eb627b..c466dba7 100644 --- a/pyslurm/core/job/submission.pyx +++ b/pyslurm/core/job/submission.pyx @@ -17,7 +17,6 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: embedsignature=True # cython: c_string_type=unicode, c_string_encoding=utf8 # cython: language_level=3 @@ -55,7 +54,6 @@ from pyslurm.core.common import ( cdef class JobSubmitDescription: - """Slurm Job Submission""" def __cinit__(self): self.ptr = NULL @@ -80,7 +78,7 @@ cdef class JobSubmitDescription: """Submit a batch job description. Returns: - int: The ID of the submitted Job. + (int): The ID of the submitted Job. Raises: RPCError: When the job submission was not successful. @@ -168,17 +166,16 @@ cdef class JobSubmitDescription: cstr.fmalloc(&ptr.batch_features, self.batch_constraints) cstr.fmalloc(&ptr.cluster_features, self.cluster_constraints) cstr.fmalloc(&ptr.comment, self.comment) - cstr.fmalloc(&ptr.work_dir, self.work_dir) + cstr.fmalloc(&ptr.work_dir, self.working_directory) cstr.fmalloc(&ptr.features, self.constraints) cstr.fmalloc(&ptr.mail_user, self.mail_user) cstr.fmalloc(&ptr.mcs_label, self.mcs_label) - cstr.fmalloc(&ptr.work_dir, self.work_dir) cstr.fmalloc(&ptr.network, self.network) cstr.fmalloc(&ptr.qos, self.qos) cstr.fmalloc(&ptr.container, self.container) - cstr.fmalloc(&ptr.std_in, self.stdin) - cstr.fmalloc(&ptr.std_out, self.stdout) - cstr.fmalloc(&ptr.std_err, self.stderr) + cstr.fmalloc(&ptr.std_in, self.standard_in) + cstr.fmalloc(&ptr.std_out, self.standard_output) + cstr.fmalloc(&ptr.std_err, self.standard_error) cstr.fmalloc(&ptr.tres_per_job, cstr.from_gres_dict(self.gpus, "gpu")) cstr.fmalloc(&ptr.tres_per_socket, cstr.from_gres_dict(self.gpus_per_socket, "gpu")) @@ -196,11 +193,11 @@ cdef class JobSubmitDescription: cstr.from_list(&ptr.licenses, self.licenses) cstr.from_list(&ptr.partition, self.partitions) cstr.from_list(&ptr.reservation, self.reservations) - cstr.from_dict(&ptr.acctg_freq, self.accounting_gather_freq) + cstr.from_dict(&ptr.acctg_freq, self.accounting_gather_frequency) ptr.deadline = date_to_timestamp(self.deadline) ptr.begin_time = date_to_timestamp(self.begin_time) - ptr.delay_boot = timestr_to_secs(self.delay_boot) + ptr.delay_boot = timestr_to_secs(self.delay_boot_time) ptr.time_limit = timestr_to_mins(self.time_limit) ptr.time_min = timestr_to_mins(self.time_limit_min) @@ -208,7 +205,7 @@ cdef class JobSubmitDescription: ptr.group_id = group_to_gid(self.gid) ptr.priority = u32(self.priority, zero_is_noval=False) ptr.num_tasks = u32(self.ntasks) - ptr.pn_min_tmp_disk = u32(dehumanize(self.tmp_disk_per_node)) + ptr.pn_min_tmp_disk = u32(dehumanize(self.temporary_disk_per_node)) ptr.cpus_per_task = u16(self.cpus_per_task) ptr.sockets_per_node = u16(self.sockets_per_node) ptr.cores_per_socket = u16(self.cores_per_socket) @@ -217,21 +214,22 @@ cdef class JobSubmitDescription: ptr.ntasks_per_node = u16(self.ntasks_per_node) ptr.threads_per_core = u16(self.threads_per_core) ptr.ntasks_per_core = u16(self.ntasks_per_core) - u64_set_bool_flag(&ptr.bitflags, self.spread_job, slurm.SPREAD_JOB) + u64_set_bool_flag(&ptr.bitflags, self.spreads_over_nodes, + slurm.SPREAD_JOB) u64_set_bool_flag(&ptr.bitflags, self.kill_on_invalid_dependency, slurm.KILL_INV_DEP) u64_set_bool_flag(&ptr.bitflags, self.use_min_nodes, slurm.USE_MIN_NODES) - ptr.contiguous = u16_bool(self.contiguous) + ptr.contiguous = u16_bool(self.requires_contiguous_nodes) ptr.kill_on_node_fail = u16_bool(self.kill_on_node_fail) ptr.overcommit = u8_bool(self.overcommit) - ptr.reboot = u16_bool(self.reboot_nodes) + ptr.reboot = u16_bool(self.requires_node_reboot) ptr.requeue = u16_bool(self.is_requeueable) ptr.wait_all_nodes = u16_bool(self.wait_all_nodes) - ptr.mail_type = parse_mail_type(self.mail_type) + ptr.mail_type = parse_mail_type(self.mail_types) ptr.power_flags = parse_power_type(self.power_options) - ptr.profile = parse_acctg_profile(self.profile) + ptr.profile = parse_acctg_profile(self.profile_types) ptr.shared = parse_shared_type(self.resource_sharing) self._set_cpu_frequency() @@ -244,6 +242,7 @@ cdef class JobSubmitDescription: self._set_environment() self._set_distribution() self._set_gpu_binding() + self._set_gres_binding() self._set_min_cpus() # TODO @@ -259,8 +258,8 @@ cdef class JobSubmitDescription: self.ntasks = 1 if not self.cpus_per_task: self.cpus_per_task = 1 - if not self.work_dir: - self.work_dir = str(getcwd()) + if not self.working_directory: + self.working_directory = str(getcwd()) if not self.environment: # By default, sbatch also exports everything in the users env. self.environment = "ALL" @@ -269,11 +268,11 @@ cdef class JobSubmitDescription: if not self.script: raise ValueError("You need to provide a batch script.") - if (self.mem_per_node and self.mem_per_cpu - or self.mem_per_gpu and self.mem_per_cpu - or self.mem_per_node and self.mem_per_gpu): - raise ValueError("Only one of mem_per_cpu, mem_per_node or " - "mem_per_gpu can be set.") + if (self.memory_per_node and self.memory_per_cpu + or self.memory_per_gpu and self.memory_per_cpu + or self.memory_per_node and self.memory_per_gpu): + raise ValueError("Only one of memory_per_cpu, memory_per_node or " + "memory_per_gpu can be set.") if (self.ntasks_per_gpu and (self.ptr.min_nodes != u32(None) or self.nodes @@ -300,10 +299,10 @@ cdef class JobSubmitDescription: self.ptr.core_spec |= slurm.CORE_SPEC_THREAD def _set_cpu_frequency(self): - if not self.cpu_freq: + if not self.cpu_frequency: return None - freq = self.cpu_freq + freq = self.cpu_frequency have_no_range = False # Alternatively support sbatch-like --cpu-freq setting. @@ -318,7 +317,7 @@ cdef class JobSubmitDescription: else: if freq_len > 1: raise ValueError( - "Invalid cpu_freq format: {kwargs}." + "Invalid cpu_frequency format: {kwargs}." "Governor must be provided as single element or " "as last element in the form of min-max:governor. " ) @@ -424,12 +423,12 @@ cdef class JobSubmitDescription: cstr.fmalloc(&self.ptr.dependency, final) def _set_memory(self): - if self.mem_per_cpu: - self.ptr.pn_min_memory = u64(dehumanize(self.mem_per_cpu)) + if self.memory_per_cpu: + self.ptr.pn_min_memory = u64(dehumanize(self.memory_per_cpu)) self.ptr.pn_min_memory |= slurm.MEM_PER_CPU - elif self.mem_per_node: - self.ptr.pn_min_memory = u64(dehumanize(self.mem_per_node)) - elif self.mem_per_gpu: + elif self.memory_per_node: + self.ptr.pn_min_memory = u64(dehumanize(self.memory_per_node)) + elif self.memory_per_gpu: mem_gpu = u64(dehumanize(val)) cstr.fmalloc(&self.ptr.mem_per_tres, f"gres:gpu:{mem_gpu}") @@ -654,10 +653,10 @@ cdef class JobSubmitDescription: u16_set_bool_flag(&self.ptr.warn_flags, allow_resv_overlap, slurm.KILL_JOB_RESV) - def _set_gres_flags(self): - if not self.gres_flags: + def _set_gres_binding(self): + if not self.gres_binding: return None - elif self.gres_flags.casefold() == "enforce-binding": + elif self.gres_binding.casefold() == "enforce-binding": self.ptr.bitflags |= slurm.GRES_ENFORCE_BIND - elif self.gres_flags.casefold() == "disable-binding": + elif self.gres_binding.casefold() == "disable-binding": self.ptr.bitflags |= slurm.GRES_DISABLE_BIND diff --git a/pyslurm/core/node.pxd b/pyslurm/core/node.pxd index 14779d20..770a797d 100644 --- a/pyslurm/core/node.pxd +++ b/pyslurm/core/node.pxd @@ -17,7 +17,6 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: embedsignature=True # cython: c_string_type=unicode, c_string_encoding=utf8 # cython: language_level=3 @@ -65,10 +64,24 @@ cdef class Nodes(dict): Node instances. The default is False. Attributes: - free_memory_raw (int): - Amount of free memory in this node collection. (Mebibytes) - free_memory (str): - Humanized amount of free memory in this node collection. + free_memory (int): + Amount of free memory in this node collection. (in Mebibytes) + real_memory (int): + Amount of real memory in this node collection. (in Mebibytes) + allocated_memory (int): + Amount of alloc Memory in this node collection. (in Mebibytes) + total_cpus (int): + Total amount of CPUs in this node collection. + idle_cpus (int): + Total amount of idle CPUs in this node collection. + allocated_cpus (int): + Total amount of allocated CPUs in this node collection. + effective_cpus (int): + Total amount of effective CPUs in this node collection. + current_watts (int): + Total amount of Watts consumed in this node collection. + average_watts (int): + Amount of average watts consumed in this node collection. Raises: RPCError: When getting all the Nodes from the slurmctld failed. @@ -105,6 +118,99 @@ cdef class Node: Name of the node. architecture (str): Architecture of the node (e.g. x86_64) + configured_gres (dict): + Generic Resources this Node is configured with. + owner (str): + User that owns the Node. + address (str): + Address of the node. + hostname (str): + Hostname of the node. + extra (str): + Arbitrary string attached to the Node. + reason (str): + Reason why this node is in its current state. + reason_user (str): + Name of the User who set the reason. + comment (str): + Arbitrary node comment. + bcast_address (str): + Address of the node for sbcast. + slurm_version (str): + Version of slurm this node is running on. + operating_system (str): + Name of the operating system installed. + allocated_gres (dict): + Generic Resources currently in use on the node. + mcs_label (str): + MCS label for the node. + allocated_memory (int): + Memory in Mebibytes allocated on the node. + real_memory (int): + Real Memory in Mebibytes configured for this node. + free_memory (int): + Free Memory in Mebibytes on the node. + memory_reserved_for_system (int): + Raw Memory in Mebibytes reserved for the System not usable by + Jobs. + temporary_disk_space_per_node (int): + Amount of temporary disk space this node has, in Mebibytes. + weight (int): + Weight of the node in scheduling. + effective_cpus (int): + Number of effective CPUs the node has. + total_cpus (int): + Total amount of CPUs the node has. + sockets (int): + Number of sockets the node has. + cores_reserved_for_system (int): + Number of cores reserved for the System not usable by Jobs. + boards (int): + Number of boards the node has. + cores_per_socket (int): + Number of cores per socket configured for the node. + threads_per_core (int): + Number of threads per core configured for the node. + available_features (list): + List of features available on the node. + active_features (list): + List of features on the node. + partitions (list): + List of partitions this Node is part of. + boot_time (int): + Time the node has booted, as unix timestamp. + slurmd_start_time (int): + Time the slurmd has started on the Node, as unix timestamp. + last_busy_time (int): + Time this node was last busy, as unix timestamp. + reason_time (int): + Time the reason was set for the node, as unix timestamp. + allocated_cpus (int): + Number of allocated CPUs on the node. + idle_cpus (int): + Number of idle CPUs. + cpu_binding (str): + Default CPU-Binding on the node. + cap_watts (int): + Node cap watts. + current_watts (int): + Current amount of watts consumed on the node. + average_watts (int): + Average amount of watts consumed on the node. + external_sensors (dict): + External Sensor info for the Node. + The dict returned contains the following information: + * joules_total (int) + * current_watts (int) + * temperature (int) + state (str): + State the node is currently in. + next_state (str): + Next state the node will be in. + cpu_load (float): + CPU Load on the Node. + slurmd_port (int): + Port the slurmd is listening on the node. Raises: MemoryError: If malloc fails to allocate memory. diff --git a/pyslurm/core/node.pyx b/pyslurm/core/node.pyx index e2605841..00601d0e 100644 --- a/pyslurm/core/node.pyx +++ b/pyslurm/core/node.pyx @@ -17,7 +17,6 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: embedsignature=True # cython: c_string_type=unicode, c_string_encoding=utf8 # cython: language_level=3 @@ -99,66 +98,44 @@ cdef class Nodes(dict): """Format the information as list of Node objects. Returns: - list: List of Node objects + (list): List of Node objects """ return list(self.values()) - @property - def free_memory_raw(self): - return _sum_prop(self, Node.free_memory) - @property def free_memory(self): - return humanize(self.free_memory_raw, 2) - - @property - def real_memory_raw(self): - """int: Amount of real memory in this node collection. (Mebibytes)""" - return _sum_prop(self, Node.real_memory) + return _sum_prop(self, Node.free_memory) @property def real_memory(self): - """str: Humanized amount of real memory in this node collection.""" - return humanize(self.real_memory_raw, 2) - - @property - def alloc_memory_raw(self): - """int: Amount of alloc Memory in this node collection. (Mebibytes)""" - return _sum_prop(self, Node.alloc_memory) + return _sum_prop(self, Node.real_memory) @property - def alloc_memory(self): - """str: Total amount of allocated Memory in this node collection.""" - return humanize(self.alloc_memory_raw, 2) + def allocated_memory(self): + return _sum_prop(self, Node.allocated_memory) @property def total_cpus(self): - """int: Total amount of CPUs in this node collection.""" return _sum_prop(self, Node.total_cpus) @property def idle_cpus(self): - """int: Total amount of idle CPUs in this node collection.""" return _sum_prop(self, Node.idle_cpus) @property - def alloc_cpus(self): - """int: Total amount of allocated CPUs in this node collection.""" - return _sum_prop(self, Node.alloc_cpus) + def allocated_cpus(self): + return _sum_prop(self, Node.allocated_cpus) @property def effective_cpus(self): - """int: Total amount of effective CPUs in this node collection.""" return _sum_prop(self, Node.effective_cpus) @property def current_watts(self): - """int: Total amount of Watts consumed in this node collection.""" return _sum_prop(self, Node.current_watts) @property def average_watts(self): - """int: Amount of average watts consumed in this node collection.""" return _sum_prop(self, Node.average_watts) @@ -231,7 +208,7 @@ cdef class Node: of an instance. Using the Node object returned is optional. Returns: - Node: This function returns the current Node-instance object + (Node): This function returns the current Node-instance object itself. Raises: @@ -293,7 +270,7 @@ cdef class Node: are "future" and "cloud". "future" is the default. Returns: - Node: This function returns the current Node-instance object + (Node): This function returns the current Node-instance object itself. Raises: @@ -375,7 +352,7 @@ cdef class Node: """Node information formatted as a dictionary. Returns: - dict: Node information as dict + (dict): Node information as dict """ return instance_to_dict(self) @@ -393,7 +370,6 @@ cdef class Node: @property def configured_gres(self): - """dict: Generic Resources this Node is configured with.""" return cstr.to_gres_dict(self.info.gres) @configured_gres.setter @@ -403,12 +379,10 @@ cdef class Node: @property def owner(self): - """str: User that owns the Node.""" return uid_to_name(self.info.owner, lookup=self.passwd) @property def address(self): - """str: Address of the node.""" return cstr.to_unicode(self.info.node_addr) @address.setter @@ -417,7 +391,6 @@ cdef class Node: @property def hostname(self): - """str: Hostname of the node.""" return cstr.to_unicode(self.info.node_hostname) @hostname.setter @@ -426,7 +399,6 @@ cdef class Node: @property def extra(self): - """str: Arbitrary string attached to the Node.""" return cstr.to_unicode(self.info.extra) @extra.setter @@ -435,17 +407,14 @@ cdef class Node: @property def reason(self): - """str: Reason why this node is in its current state.""" return cstr.to_unicode(self.info.reason) @property def reason_user(self): - """str: Name of the User who set the reason.""" return uid_to_name(self.info.reason_uid, lookup=self.passwd) @property def comment(self): - """str: Arbitrary node comment.""" return cstr.to_unicode(self.info.comment) @comment.setter @@ -454,32 +423,26 @@ cdef class Node: @property def bcast_address(self): - """str: Address of the node for sbcast.""" return cstr.to_unicode(self.info.bcast_address) @property def slurm_version(self): - """str: Version of slurm this node is running on.""" return cstr.to_unicode(self.info.version) @property def operating_system(self): - """str: Name of the operating system installed.""" return cstr.to_unicode(self.info.os) @property - def alloc_gres(self): - """dict: Generic Resources currently in use on the node.""" + def allocated_gres(self): return cstr.to_gres_dict(self.info.gres_used) @property def mcs_label(self): - """str: MCS label for the node.""" return cstr.to_unicode(self.info.mcs_label) @property - def alloc_memory_raw(self): - """int: Memory allocated on the node. (Mebibytes)""" + def allocated_memory(self): cdef uint64_t alloc_memory = 0 if self.info.select_nodeinfo: slurm_get_select_nodeinfo( @@ -489,54 +452,24 @@ cdef class Node: &alloc_memory) return u64_parse(alloc_memory) - @property - def alloc_memory(self): - """str: Memory allocated on the node.""" - return humanize(self.alloc_memory_raw, 2) - - @property - def real_memory_raw(self): - """int: Real Memory configured for this node. (Mebibytes)""" - return u64_parse(self.info.real_memory) - @property def real_memory(self): - """str: Humanized Real Memory configured for this node.""" - return humanize(self.real_memory_raw, 2) - - @property - def free_memory_raw(self): - """int: Free Memory on the node. (Mebibytes)""" - return u64_parse(self.info.free_mem) + return u64_parse(self.info.real_memory) @property def free_memory(self): - """str: Humanized Free Memory on the node.""" - return humanize(self.free_memory_raw, 2) - - @property - def memory_reserved_for_system_raw(self): - """int: Memory reserved for the System not usable by Jobs.""" - return u64_parse(self.info.mem_spec_limit) + return u64_parse(self.info.free_mem) @property def memory_reserved_for_system(self): - """str: Memory reserved for the System not usable by Jobs.""" - return humanize(self.memory_reserved_for_system_raw, 2) + return u64_parse(self.info.mem_spec_limit) @property - def tmp_disk_space_raw(self): - """int: Amount of temporary disk space this node has. (Mebibytes)""" + def temporary_disk_space(self): return u32_parse(self.info.tmp_disk) - @property - def tmp_disk_space(self): - """str: Amount of temporary disk space this node has.""" - return humanize(self.tmp_disk_space_raw) - @property def weight(self): - """int: Weight of the node in scheduling.""" return u32_parse(self.info.weight) @weight.setter @@ -545,42 +478,34 @@ cdef class Node: @property def effective_cpus(self): - """int: Number of effective CPUs the node has.""" return u16_parse(self.info.cpus_efctv) @property def total_cpus(self): - """int: Total amount of CPUs the node has.""" return u16_parse(self.info.cpus) @property def sockets(self): - """int: Number of sockets the node has.""" return u16_parse(self.info.sockets) @property def cores_reserved_for_system(self): - """int: Number of cores reserved for the System not usable by Jobs.""" return u16_parse(self.info.core_spec_cnt) @property def boards(self): - """int: Number of boards the node has.""" return u16_parse(self.info.boards) @property def cores_per_socket(self): - """int: Number of cores per socket configured for the node.""" return u16_parse(self.info.cores) @property def threads_per_core(self): - """int: Number of threads per core configured for the node.""" return u16_parse(self.info.threads) @property def available_features(self): - """list: List of features available on the node.""" return cstr.to_list(self.info.features) @available_features.setter @@ -589,7 +514,6 @@ cdef class Node: @property def active_features(self): - """list: List of features on the node.""" return cstr.to_list(self.info.features_act) @active_features.setter @@ -598,48 +522,23 @@ cdef class Node: @property def partitions(self): - """list: List of partitions this Node is in.""" return cstr.to_list(self.info.partitions) - @property - def boot_time_raw(self): - """int: Time the node has booted. (Unix timestamp)""" - return _raw_time(self.info.boot_time) - @property def boot_time(self): - """str: Time the node has booted. (formatted)""" - return timestamp_to_date(self.info.boot_time) - - @property - def slurmd_start_time_raw(self): - """int: Time the slurmd has started on the Node. (Unix timestamp)""" - return _raw_time(self.info.slurmd_start_time) + return _raw_time(self.info.boot_time) @property def slurmd_start_time(self): - """str: Time the slurmd has started on the Node. (formatted)""" - return timestamp_to_date(self.info.slurmd_start_time) - - @property - def last_busy_time_raw(self): - """int: Time this node was last busy. (Unix timestamp)""" - return _raw_time(self.info.last_busy) + return _raw_time(self.info.slurmd_start_time) @property def last_busy_time(self): - """str: Time this node was last busy. (formatted)""" - return timestamp_to_date(self.info.last_busy) - - @property - def reason_time_raw(self): - """int: Time the reason was set for the node. (Unix timestamp)""" - return _raw_time(self.info.reason_time) + return _raw_time(self.info.last_busy) @property def reason_time(self): - """str: Time the reason was set for the node. (formatted)""" - return timestamp_to_date(self.info.reason_time) + return _raw_time(self.info.reason_time) # @property # def tres_configured(self): @@ -659,8 +558,7 @@ cdef class Node: # return cstr.to_gres_dict(alloc_tres) @property - def alloc_cpus(self): - """int: Number of allocated CPUs on the node.""" + def allocated_cpus(self): cdef uint16_t alloc_cpus = 0 if self.info.select_nodeinfo: slurm_get_select_nodeinfo( @@ -673,16 +571,14 @@ cdef class Node: @property def idle_cpus(self): - """int: Number of idle CPUs.""" efctv = self.effective_cpus if not efctv: return None - return efctv - self.alloc_cpus + return efctv - self.allocated_cpus @property def cpu_binding(self): - """str: Default CPU-Binding on the node.""" cdef char cpu_bind[128] slurm_sprint_cpu_bind_type(cpu_bind, self.info.cpu_bind) @@ -697,35 +593,24 @@ cdef class Node: @property def cap_watts(self): - """int: Node cap watts.""" if not self.info.power: return None return u32_parse(self.info.power.cap_watts) @property def current_watts(self): - """int: Current amount of watts consumed on the node.""" if not self.info.energy: return None return u32_parse(self.info.energy.current_watts) @property def average_watts(self): - """int: Average amount of watts consumed on the node.""" if not self.info.energy: return None return u32_parse(self.info.energy.ave_watts) @property def external_sensors(self): - """ - dict: External Sensor info for the Node. - - The dict returned contains the following information: - * joules_total (int) - * current_watts (int) - * temperature (int) - """ if not self.info.ext_sensors: return {} @@ -737,7 +622,6 @@ cdef class Node: @property def state(self): - """str: State the node is currently in.""" cdef char* state = slurm_node_state_string_complete( self.info.node_state) state_str = cstr.to_unicode(state) @@ -746,7 +630,6 @@ cdef class Node: @property def next_state(self): - """str: Next state the node will be in.""" if ((self.info.next_state != slurm.NO_VAL) and (self.info.node_state & slurm.NODE_STATE_REBOOT_REQUESTED or self.info.node_state & slurm.NODE_STATE_REBOOT_ISSUED)): @@ -761,13 +644,11 @@ cdef class Node: @property def cpu_load(self): - """float: CPU Load on the Node.""" load = u32_parse(self.info.cpu_load) return load / 100.0 if load is not None else None @property - def port(self): - """int: Port the slurmd is listening on the node.""" + def slurmd_port(self): return u16_parse(self.info.port) diff --git a/tests/new_api/conftest.py b/tests/new_api/conftest.py index 7b49db69..ad195fb9 100644 --- a/tests/new_api/conftest.py +++ b/tests/new_api/conftest.py @@ -23,8 +23,8 @@ def create_simple_job_desc(script=None, **kwargs): job = JobSubmitDescription(**kwargs) job.name = "test_job" - job.stdout = "/tmp/slurm-test-%j.out" - job.mem_per_cpu = "1G" + job.standard_output = "/tmp/slurm-test-%j.out" + job.memory_per_cpu = "1G" job.ntasks = 2 job.cpus_per_task = 3 job.script = create_job_script() if not script else script diff --git a/tests/new_api/test_common.py b/tests/new_api/test_common.py index 5d502b90..eb4ea227 100644 --- a/tests/new_api/test_common.py +++ b/tests/new_api/test_common.py @@ -80,20 +80,20 @@ def test_dicts(self): input_as_dict = {"key1": "value1", "key2": "value2"} input_as_str = "key1=value1,key2=value2" - js.accounting_gather_freq = input_as_dict - assert js.accounting_gather_freq == input_as_dict + js.accounting_gather_frequency = input_as_dict + assert js.accounting_gather_frequency == input_as_dict - js.accounting_gather_freq = input_as_str - assert js.accounting_gather_freq == input_as_dict + js.accounting_gather_frequency = input_as_str + assert js.accounting_gather_frequency == input_as_dict - js.accounting_gather_freq = {} - assert js.accounting_gather_freq == {} + js.accounting_gather_frequency = {} + assert js.accounting_gather_frequency == {} - js.accounting_gather_freq = "" - assert js.accounting_gather_freq == {} + js.accounting_gather_frequency = "" + assert js.accounting_gather_frequency == {} - js.accounting_gather_freq = None - assert js.accounting_gather_freq == {} + js.accounting_gather_frequency = None + assert js.accounting_gather_frequency == {} def _uint_impl(self, func_set, func_get, typ): val = func_set(2**typ-2) @@ -159,7 +159,7 @@ def test_u8_bool(self): self._uint_bool_impl("overcommit") def test_u16_bool(self): - self._uint_bool_impl("contiguous") + self._uint_bool_impl("requires_contiguous_nodes") def test_u64_bool_flag(self): self._uint_bool_impl("kill_on_invalid_dependency") diff --git a/tests/new_api/test_job.py b/tests/new_api/test_job.py index 4056b5f1..24f5f63d 100644 --- a/tests/new_api/test_job.py +++ b/tests/new_api/test_job.py @@ -32,7 +32,7 @@ def test_reload(submit_job): assert job.id == jid assert job.ntasks == 2 assert job.cpus_per_task == 3 - assert job.time_limit == "1-00:00:00" + assert job.time_limit == 1440 with pytest.raises(RPCError): Job(99999).reload() @@ -98,7 +98,7 @@ def test_modify(submit_job): job.modify(changes) job.reload() - assert job.time_limit == "2-00:00:00" + assert job.time_limit == 2880 assert job.ntasks == 5 assert job.cpus_per_task == 4 diff --git a/tests/new_api/test_job_steps.py b/tests/new_api/test_job_steps.py index 1d3fbe75..f9cf54bb 100644 --- a/tests/new_api/test_job_steps.py +++ b/tests/new_api/test_job_steps.py @@ -53,7 +53,7 @@ def test_reload(submit_job): assert step.ntasks == 1 # Job was submitted with a time-limit of 1 day, but it seems this doesn't # propagate through for the steps if not set explicitly. - assert step.time_limit == "unlimited" + assert step.time_limit == None # Now try to load the first and second Step started by srun step_zero = JobStep(job, 0).reload() @@ -74,14 +74,14 @@ def test_reload(submit_job): assert step.name == "step_zero" assert step.ntasks == 1 assert step.alloc_cpus == 2 - assert step.time_limit == "unlimited" + assert step.time_limit == None step = step_one assert step.job_id == job.id assert step.name == "step_one" assert step.ntasks == 1 assert step.alloc_cpus == 3 - assert step.time_limit == "00:10:00" + assert step.time_limit == 10 def test_collection(submit_job): @@ -136,13 +136,13 @@ def test_modify(submit_job): time.sleep(1) step = JobStep(job, 0).reload() - assert step.time_limit == "00:20:00" + assert step.time_limit == 20 step.modify(JobStep(time_limit="00:05:00")) - assert step.reload().time_limit == "00:05:00" + assert step.reload().time_limit == 5 step.modify(time_limit="00:15:00") - assert step.reload().time_limit == "00:15:00" + assert step.reload().time_limit == 15 def test_send_signal(submit_job): diff --git a/tests/new_api/test_job_submit.py b/tests/new_api/test_job_submit.py index ff1d2858..c5c5039a 100644 --- a/tests/new_api/test_job_submit.py +++ b/tests/new_api/test_job_submit.py @@ -38,58 +38,58 @@ def test_environment(): # } -def test_cpu_frequency(): +def test_cpu_frequencyuency(): job = job_desc() job._create_job_submit_desc() - job.cpu_freq = "Performance" + job.cpu_frequency = "Performance" job._create_job_submit_desc() - job.cpu_freq = {"governor": "Performance"} + job.cpu_frequency = {"governor": "Performance"} job._create_job_submit_desc() - job.cpu_freq = 1000000 + job.cpu_frequency = 1000000 job._create_job_submit_desc() - job.cpu_freq = {"max": 1000000} + job.cpu_frequency = {"max": 1000000} job._create_job_submit_desc() - job.cpu_freq = "1000000-3700000" + job.cpu_frequency = "1000000-3700000" job._create_job_submit_desc() - job.cpu_freq = {"min": 1000000, "max": 3700000} + job.cpu_frequency = {"min": 1000000, "max": 3700000} job._create_job_submit_desc() - job.cpu_freq = "1000000-3700000:Performance" + job.cpu_frequency = "1000000-3700000:Performance" job._create_job_submit_desc() - job.cpu_freq = {"min": 1000000, "max": 3700000, + job.cpu_frequency = {"min": 1000000, "max": 3700000, "governor": "Performance"} job._create_job_submit_desc() with pytest.raises(ValueError, - match=r"Invalid cpu_freq format*"): - job.cpu_freq = "Performance:3700000" + match=r"Invalid cpu_frequency format*"): + job.cpu_frequency = "Performance:3700000" job._create_job_submit_desc() with pytest.raises(ValueError, match=r"min cpu-freq*"): - job.cpu_freq = "4000000-3700000" + job.cpu_frequency = "4000000-3700000" job._create_job_submit_desc() with pytest.raises(ValueError, match=r"Invalid cpu freq value*"): - job.cpu_freq = "3700000:Performance" + job.cpu_frequency = "3700000:Performance" job._create_job_submit_desc() with pytest.raises(ValueError, match=r"Setting Governor when specifying*"): - job.cpu_freq = {"max": 3700000, "governor": "Performance"} + job.cpu_frequency = {"max": 3700000, "governor": "Performance"} job._create_job_submit_desc() with pytest.raises(ValueError, match=r"Setting Governor when specifying*"): - job.cpu_freq = {"min": 3700000, "governor": "Performance"} + job.cpu_frequency = {"min": 3700000, "governor": "Performance"} job._create_job_submit_desc() @@ -256,10 +256,10 @@ def test_setting_attrs_with_env_vars(): pyenviron["PYSLURM_JOBDESC_WCKEY"] = "wckey" pyenviron["PYSLURM_JOBDESC_CLUSTERS"] = "cluster1,cluster2" pyenviron["PYSLURM_JOBDESC_COMMENT"] = "A simple job comment" - pyenviron["PYSLURM_JOBDESC_CONTIGUOUS"] = "True" - pyenviron["PYSLURM_JOBDESC_WORK_DIR"] = "/work/user1" + pyenviron["PYSLURM_JOBDESC_REQUIRES_CONTIGUOUS_NODES"] = "True" + pyenviron["PYSLURM_JOBDESC_WORKING_DIRECTORY"] = "/work/user1" - job = job_desc(work_dir="/work/user2") + job = job_desc(working_directory="/work/user2") job.load_environment() assert job.account == "account1" @@ -267,13 +267,13 @@ def test_setting_attrs_with_env_vars(): assert job.wckey == "wckey" assert job.clusters == "cluster1,cluster2" assert job.comment == "A simple job comment" - assert job.work_dir == "/work/user2" - assert job.contiguous == True + assert job.working_directory == "/work/user2" + assert job.requires_contiguous_nodes == True job._create_job_submit_desc() def test_parsing_sbatch_options_from_script(): - job = job_desc(work_dir="/work/user2") + job = job_desc(working_directory="/work/user2") fd, path = tempfile.mkstemp() try: @@ -295,7 +295,7 @@ def test_parsing_sbatch_options_from_script(): job.script = path job.load_sbatch_options() assert job.time_limit == "20" - assert job.mem_per_cpu == "1G" + assert job.memory_per_cpu == "1G" assert job.gpus == "1" assert job.resource_sharing == "no" assert job.ntasks == "2" From 3a63d1f5f56e007924c68f65738524239eb58607 Mon Sep 17 00:00:00 2001 From: tazend Date: Fri, 17 Mar 2023 23:50:40 +0100 Subject: [PATCH 07/28] Some changes to the Jobs class, add some new attributes --- pyslurm/core/job/job.pxd | 46 ++++++++++-- pyslurm/core/job/job.pyx | 143 ++++++++++++++++++++++++++++++++------ tests/new_api/test_job.py | 2 +- 3 files changed, 164 insertions(+), 27 deletions(-) diff --git a/pyslurm/core/job/job.pxd b/pyslurm/core/job/job.pxd index 65b829a2..d7e7971b 100644 --- a/pyslurm/core/job/job.pxd +++ b/pyslurm/core/job/job.pxd @@ -78,16 +78,42 @@ cdef class Jobs(dict): Could potentially speed up access to attributes of the Job where a UID/GID is translated to a name. If True, the information will fetched and stored in each of the Job - instances. The default is False. + instances. + freeze (bool, optional): + Decide whether this collection of Jobs should be "frozen". Raises: RPCError: When getting all the Jobs from the slurmctld failed. MemoryError: If malloc fails to allocate memory. + + Attributes: + memory (int): + Total amount of memory for all Jobs in this collection, in + Mebibytes + cpus (int): + Total amount of cpus for all Jobs in this collection. + ntasks (int): + Total amount of tasks for all Jobs in this collection. + cpu_time (int): + Total amount of CPU-Time used by all the Jobs in the collection. + This is the result of multiplying the run_time with the amount of + cpus for each job. + freeze (bool): + If this is set to True and the reload() method is called, then + *ONLY* Jobs that already exist in this collection will be + reloaded. New Jobs that are discovered will not be added to this + collection, but old Jobs which have already been purged from the + Slurm controllers memory will not be removed either. + The default is False, so old jobs will be removed, and new Jobs + will be added - basically the same behaviour as doing Jobs.get(). """ cdef: job_info_msg_t *info slurm_job_info_t tmp_info + cdef public: + freeze + cdef class Job: """A Slurm Job. @@ -175,8 +201,8 @@ cdef class Job: Name of the Host this Job was submitted from. batch_host (str): Name of the Host where the Batch-Script is executed. - min_nodes (int): - Minimum amount of Nodes the Job has requested. + num_nodes (int): + Amount of Nodes the Job has requested or allocated. max_nodes (int): Maximum amount of Nodes the Job has requested. allocated_nodes (str): @@ -207,9 +233,10 @@ cdef class Job: Federation siblings active federation_siblings_viable (int): Federation siblings viable - allocated_cpus (int): + cpus (int): Total amount of CPUs the Job is using. - If the Job is still pending, this will be None. + If the Job is still pending, this will be the amount of requested + CPUs. cpus_per_task (int): Number of CPUs per Task used. cpus_per_gpu (int): @@ -318,6 +345,8 @@ cdef class Job: Amount of cores reserved for System use only. threads_reserved_for_system (int): Amount of Threads reserved for System use only. + memory (int): + Total Amount of Memory this Job has, in Mebibytes memory_per_cpu (int): Amount of Memory per CPU this Job has, in Mebibytes memory_per_node (int): @@ -340,6 +369,10 @@ cdef class Job: Whether this Job is a cronjob. cronjob_time (str): The time specification for the Cronjob. + cpu_time (int): + Amount of CPU-Time used by the Job so far. + This is the result of multiplying the run_time with the amount of + cpus. """ cdef: slurm_job_info_t *ptr @@ -349,6 +382,9 @@ cdef class Job: cdef alloc(self) cdef _calc_run_time(self) + @staticmethod + cdef _swap_data(Job dst, Job src) + @staticmethod cdef Job from_ptr(slurm_job_info_t *in_ptr) diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx index 2510cc67..67c98638 100644 --- a/pyslurm/core/job/job.pyx +++ b/pyslurm/core/job/job.pyx @@ -31,39 +31,48 @@ from pyslurm.core.error import ( verify_rpc, slurm_errno, ) -from pyslurm.core.common.ctime import ( - secs_to_timestr, - mins_to_timestr, - timestamp_to_date, - _raw_time, -) +from pyslurm.core.common.ctime import _raw_time from pyslurm.core.common import ( uid_to_name, gid_to_name, - humanize, signal_to_num, _getgrall_to_dict, _getpwall_to_dict, - nodelist_from_range_str, - nodelist_to_range_str, instance_to_dict, + _sum_prop, ) cdef class Jobs(dict): + def __cinit__(self): + self.info = NULL + def __dealloc__(self): slurm_free_job_info_msg(self.info) - def __init__(self, preload_passwd_info=False): + def __init__(self, jobs=None, freeze=False): + self.freeze = freeze + + if isinstance(jobs, dict): + self.update(jobs) + elif jobs is not None: + for job in jobs: + if isinstance(job, int): + self[job] = Job(job) + else: + self[job.id] = job + + @staticmethod + def get(preload_passwd_info=False, freeze=False): cdef: dict passwd = {} dict groups = {} - int flags = slurm.SHOW_ALL | slurm.SHOW_DETAIL + Jobs jobs = Jobs.__new__(Jobs) + int flags = slurm.SHOW_ALL | slurm.SHOW_DETAIL Job job - self.info = NULL - verify_rpc(slurm_load_jobs(0, &self.info, flags)) + verify_rpc(slurm_load_jobs(0, &jobs.info, flags)) # If requested, preload the passwd and groups database to potentially # speedup lookups for an attribute in a Job, e.g. user_name or @@ -73,21 +82,21 @@ cdef class Jobs(dict): groups = _getgrall_to_dict() # zero-out a dummy job_step_info_t - memset(&self.tmp_info, 0, sizeof(slurm_job_info_t)) + memset(&jobs.tmp_info, 0, sizeof(slurm_job_info_t)) # Put each job pointer into its own "Job" instance. - for cnt in range(self.info.record_count): - job = Job.from_ptr(&self.info.job_array[cnt]) + for cnt in range(jobs.info.record_count): + job = Job.from_ptr(&jobs.info.job_array[cnt]) # Prevent double free if xmalloc fails mid-loop and a MemoryError # is raised by replacing it with a zeroed-out slurm_job_info_t. - self.info.job_array[cnt] = self.tmp_info + jobs.info.job_array[cnt] = jobs.tmp_info if preload_passwd_info: job.passwd = passwd job.groups = groups - self[job.id] = job + jobs[job.id] = job # At this point we memcpy'd all the memory for the Jobs. Setting this # to 0 will prevent the slurm job free function to deallocate the @@ -95,7 +104,27 @@ cdef class Jobs(dict): # are free'd automatically in __dealloc__ since the lifetime of each # job-pointer is tied to the lifetime of its corresponding "Job" # instance. - self.info.record_count = 0 + jobs.info.record_count = 0 + + jobs.freeze = freeze + return jobs + + def reload(self): + cdef Jobs reloaded_jobs = Jobs.get() + + for jid in list(self.keys()): + if jid in reloaded_jobs: + # Put the new data in our instance. + Job._swap_data(self[jid], reloaded_jobs[jid]) + elif not self.freeze: + # Remove this instance from the current collection, as the Job + # doesn't exist anymore. + del self[jid] + + if not self.freeze: + for jid in reloaded_jobs: + if jid not in self: + self[jid] = reloaded_jobs[jid] def load_steps(self): """Load all Job steps for this collection of Jobs. @@ -127,9 +156,28 @@ cdef class Jobs(dict): """ return list(self.values()) + @property + def memory(self): + return _sum_prop(self, Job.memory) + + @property + def cpus(self): + return _sum_prop(self, Job.cpus) + + @property + def ntasks(self): + return _sum_prop(self, Job.ntasks) + + @property + def cpu_time(self): + return _sum_prop(self, Job.cpu_time) + cdef class Job: + def __cinit__(self): + self.ptr = NULL + def __init__(self, int job_id): self.alloc() self.ptr.job_id = job_id @@ -205,6 +253,13 @@ cdef class Job: return wrap + cdef _swap_data(Job dst, Job src): + cdef slurm_job_info_t *tmp = NULL + if dst.ptr and src.ptr: + tmp = dst.ptr + dst.ptr = src.ptr + src.ptr = tmp + def as_dict(self): """Job information formatted as a dictionary. @@ -682,7 +737,7 @@ cdef class Job: return cstr.to_unicode(self.ptr.batch_host) @property - def min_nodes(self): + def num_nodes(self): return u32_parse(self.ptr.num_nodes) @property @@ -754,7 +809,7 @@ cdef class Job: return u64_parse(self.ptr.fed_siblings_viable) @property - def allocated_cpus(self): + def cpus(self): return u32_parse(self.ptr.num_cpus) @property @@ -1036,6 +1091,32 @@ cdef class Job: if self.ptr.core_spec & slurm.CORE_SPEC_THREAD: return self.ptr.core_spec & (~slurm.CORE_SPEC_THREAD) + @property + def memory(self): + mem_cpu = self.memory_per_cpu + if mem_cpu is not None: + total_cpus = self.cpus + if total_cpus is not None: + mem_cpu *= total_cpus + return mem_cpu + + mem_node = self.memory_per_node + if mem_node is not None: + num_nodes = self.min_nodes + if num_nodes is not None: + mem_node *= num_nodes + return mem_cpu + + # TODO + # mem_gpu = self.memory_per_gpu + # if mem_gpu is not None: + # num_nodes = self.min_nodes + # if num_nodes is not None: + # mem_node *= num_nodes + # return mem_cpu + + return None + @property def memory_per_cpu(self): if self.ptr.pn_min_memory != slurm.NO_VAL64: @@ -1100,6 +1181,26 @@ cdef class Job: def cronjob_time(self): return cstr.to_unicode(self.ptr.cronspec) + @property + def cpu_time(self): + run_time = self.run_time + if run_time: + cpus = self.cpus + if cpus is not None: + return cpus * run_time + + return 0 + + @property + def pending_time(self): + # TODO + return None + + @property + def run_time_left(self): + # TODO + return None + def get_resource_layout_per_node(self): """Retrieve the resource layout of this Job on each node. diff --git a/tests/new_api/test_job.py b/tests/new_api/test_job.py index 24f5f63d..91fbfc2b 100644 --- a/tests/new_api/test_job.py +++ b/tests/new_api/test_job.py @@ -136,7 +136,7 @@ def test_get_job_queue(submit_job): # Submit 10 jobs, gather the job_ids in a list job_list = [submit_job() for i in range(10)] - jobs = Jobs() + jobs = Jobs.get() for job in job_list: # Check to see if all the Jobs we submitted exist assert job.id in jobs From 21ee25cf7e11d80ad1ea98e3c397ac035b58ce91 Mon Sep 17 00:00:00 2001 From: tazend Date: Sun, 19 Mar 2023 00:00:19 +0100 Subject: [PATCH 08/28] wip --- pyslurm/core/job/job.pxd | 9 +++- pyslurm/core/job/job.pyx | 36 ++++++++------ pyslurm/core/job/step.pxd | 5 +- pyslurm/core/job/step.pyx | 34 ++++++++----- pyslurm/core/node.pxd | 16 ++---- pyslurm/core/node.pyx | 87 +++++++++++++++++++++++++++------ tests/new_api/test_job.py | 2 +- tests/new_api/test_job_steps.py | 6 +-- tests/new_api/test_node.py | 6 +-- 9 files changed, 139 insertions(+), 62 deletions(-) diff --git a/pyslurm/core/job/job.pxd b/pyslurm/core/job/job.pxd index d7e7971b..fceeecdb 100644 --- a/pyslurm/core/job/job.pxd +++ b/pyslurm/core/job/job.pxd @@ -105,7 +105,7 @@ cdef class Jobs(dict): collection, but old Jobs which have already been purged from the Slurm controllers memory will not be removed either. The default is False, so old jobs will be removed, and new Jobs - will be added - basically the same behaviour as doing Jobs.get(). + will be added - basically the same behaviour as doing Jobs.load(). """ cdef: job_info_msg_t *info @@ -128,6 +128,11 @@ cdef class Job: MemoryError: If malloc fails to allocate memory. Attributes: + steps (JobSteps): + Steps this Job has. + Before you can access the Steps data for a Job, you have to call + the reload() method of a Job instance or the load_steps() method + of a Jobs collection. name (str): Name of the Job id (int): @@ -379,6 +384,8 @@ cdef class Job: dict passwd dict groups + cdef public JobSteps steps + cdef alloc(self) cdef _calc_run_time(self) diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx index 67c98638..357d9495 100644 --- a/pyslurm/core/job/job.pyx +++ b/pyslurm/core/job/job.pyx @@ -64,7 +64,7 @@ cdef class Jobs(dict): self[job.id] = job @staticmethod - def get(preload_passwd_info=False, freeze=False): + def load(preload_passwd_info=False, freeze=False): cdef: dict passwd = {} dict groups = {} @@ -110,12 +110,12 @@ cdef class Jobs(dict): return jobs def reload(self): - cdef Jobs reloaded_jobs = Jobs.get() + cdef Jobs reloaded_jobs = Jobs.load() for jid in list(self.keys()): if jid in reloaded_jobs: - # Put the new data in our instance. - Job._swap_data(self[jid], reloaded_jobs[jid]) + # Put the new data in. + self[jid] = reloaded_jobs[jid] elif not self.freeze: # Remove this instance from the current collection, as the Job # doesn't exist anymore. @@ -129,24 +129,23 @@ cdef class Jobs(dict): def load_steps(self): """Load all Job steps for this collection of Jobs. + This function fills in the "steps" attribute for all Jobs in the + collection. + Note: Pending Jobs will be ignored, since they don't have any Steps yet. Raises: RPCError: When retrieving the Job information for all the Steps failed. - - Returns: - (dict): JobSteps information for each JobID. """ - cdef: - Job job - dict step_info = JobSteps.load_all() - dict out + cdef dict step_info = JobSteps.load_all() - # Ignore any Steps from Jobs which do not exist in this collection. - out = {jid: step_info[jid] for jid in self if jid in step_info} - return out + for jid in self: + # Ignore any Steps from Jobs which do not exist in this + # collection. + if jid in step_info: + self[jid].steps = step_info[jid] def as_list(self): """Format the information as list of Job objects. @@ -183,6 +182,7 @@ cdef class Job: self.ptr.job_id = job_id self.passwd = {} self.groups = {} + self.steps = JobSteps.__new__(JobSteps) cdef alloc(self): self.ptr = try_xmalloc(sizeof(slurm_job_info_t)) @@ -236,6 +236,13 @@ cdef class Job: self.alloc() memcpy(self.ptr, &info.job_array[0], sizeof(slurm_job_info_t)) info.record_count = 0 + + # Just ignore if the steps couldn't be loaded here. + try: + if not slurm.IS_JOB_PENDING(self.ptr): + self.steps = JobSteps._load(self) + except RPCError: + pass except Exception as e: raise e finally: @@ -249,6 +256,7 @@ cdef class Job: wrap.alloc() wrap.passwd = {} wrap.groups = {} + wrap.steps = JobSteps.__new__(JobSteps) memcpy(wrap.ptr, in_ptr, sizeof(slurm_job_info_t)) return wrap diff --git a/pyslurm/core/job/step.pxd b/pyslurm/core/job/step.pxd index 9b42368c..0482d0b2 100644 --- a/pyslurm/core/job/step.pxd +++ b/pyslurm/core/job/step.pxd @@ -58,7 +58,10 @@ cdef class JobSteps(dict): job_step_info_response_msg_t *info job_step_info_t tmp_info - cdef dict _load(self, uint32_t job_id, int flags) + @staticmethod + cdef JobSteps _load(Job job) + + cdef dict _get_info(self, uint32_t job_id, int flags) cdef class JobStep: diff --git a/pyslurm/core/job/step.pyx b/pyslurm/core/job/step.pyx index 8710abd2..834c999b 100644 --- a/pyslurm/core/job/step.pyx +++ b/pyslurm/core/job/step.pyx @@ -53,22 +53,30 @@ cdef class JobSteps(dict): def __cinit__(self): self.info = NULL - def __init__(self, job): - cdef Job _job + def __init__(self): + pass - # Reload the Job in order to have updated information about its state. + @staticmethod + def load(job): + cdef Job _job _job = job.reload() if isinstance(job, Job) else Job(job).reload() + return JobSteps._load(_job) - step_info = self._load(_job.id, slurm.SHOW_ALL) - if not step_info and not slurm.IS_JOB_PENDING(_job.ptr): - msg = f"Failed to load step info for Job {_job.id}." + @staticmethod + cdef JobSteps _load(Job job): + cdef JobSteps steps = JobSteps.__new__(JobSteps) + + step_info = steps._get_info(job.id, slurm.SHOW_ALL) + if not step_info and not slurm.IS_JOB_PENDING(job.ptr): + msg = f"Failed to load step info for Job {job.id}." raise RPCError(msg=msg) # No super().__init__() needed? Cython probably already initialized # the dict automatically. - self.update(step_info[_job.id]) - - cdef dict _load(self, uint32_t job_id, int flags): + steps.update(step_info[job.id]) + return steps + + cdef dict _get_info(self, uint32_t job_id, int flags): cdef: JobStep step JobSteps steps @@ -108,14 +116,14 @@ cdef class JobSteps(dict): @staticmethod def load_all(): - """Loads and returns all the steps in the system. + """Loads all the steps in the system. Returns: (dict): A dict where every JobID (key) is mapped with an instance of its JobSteps (value). """ cdef JobSteps steps = JobSteps.__new__(JobSteps) - return steps._load(slurm.NO_VAL, slurm.SHOW_ALL) + return steps._get_info(slurm.NO_VAL, slurm.SHOW_ALL) cdef class JobStep: @@ -163,8 +171,8 @@ cdef class JobStep: self.umsg = NULL def __setattr__(self, name, val): - # When a user wants to set attributes on a Node instance that was - # created by calling Nodes(), the "umsg" pointer is not yet allocated. + # When a user wants to set attributes on a instance that was created + # by calling JobSteps.load(), the "umsg" pointer is not yet allocated. # We only allocate memory for it by the time the user actually wants # to modify something. self._alloc_umsg() diff --git a/pyslurm/core/node.pxd b/pyslurm/core/node.pxd index 770a797d..dc878bf0 100644 --- a/pyslurm/core/node.pxd +++ b/pyslurm/core/node.pxd @@ -51,17 +51,9 @@ from pyslurm.slurm cimport ( cdef class Nodes(dict): """A collection of Node objects. - By creating a new Nodes instance, all Nodes in the system will be - fetched from the slurmctld. - Args: - preload_passwd_info (bool): - Decides whether to query passwd and groups information from the - system. - Could potentially speed up access to attributes of the Node where - a UID/GID is translated to a name. - If True, the information will fetched and stored in each of the - Node instances. The default is False. + nodes (Union[list, dict, str], optional): + Nodes to initialize this collection with. Attributes: free_memory (int): @@ -84,7 +76,6 @@ cdef class Nodes(dict): Amount of average watts consumed in this node collection. Raises: - RPCError: When getting all the Nodes from the slurmctld failed. MemoryError: If malloc fails to allocate memory. """ cdef: @@ -221,6 +212,9 @@ cdef class Node: dict passwd dict groups + @staticmethod + cdef _swap_data(Node dst, Node src) + @staticmethod cdef Node from_ptr(node_info_t *in_ptr) diff --git a/pyslurm/core/node.pyx b/pyslurm/core/node.pyx index 00601d0e..e25e8cb0 100644 --- a/pyslurm/core/node.pyx +++ b/pyslurm/core/node.pyx @@ -40,6 +40,7 @@ from pyslurm.core.common import ( cpubind_to_num, instance_to_dict, _sum_prop, + nodelist_from_range_str, ) @@ -49,15 +50,53 @@ cdef class Nodes(dict): slurm_free_node_info_msg(self.info) slurm_free_partition_info_msg(self.part_info) - def __init__(self, preload_passwd_info=False): + def __cinit__(self): + self.info = NULL + self.part_info = NULL + + def __init__(self, nodes=None): + if isinstance(nodes, dict): + self.update(nodes) + elif isinstance(nodes, str): + nodelist = nodelist_from_range_str(nodes) + self.update({node: Node(node) for node in nodelist}) + elif nodes is not None: + for node in nodes: + if isinstance(node, str): + self[node] = Node(node) + else: + self[node.name] = node + + @staticmethod + def load(preload_passwd_info=False): + """Load all nodes in the system. + + Args: + preload_passwd_info (bool): + Decides whether to query passwd and groups information from + the system. + Could potentially speed up access to attributes of the Node + where a UID/GID is translated to a name. + If True, the information will fetched and stored in each of + the Node instances. The default is False. + + Returns: + (Nodes): Collection of node objects. + + Raises: + RPCError: When getting all the Nodes from the slurmctld failed. + MemoryError: If malloc fails to allocate memory. + """ cdef: dict passwd = {} dict groups = {} - int flags = slurm.SHOW_ALL + Nodes nodes = Nodes.__new__(Nodes) + int flags = slurm.SHOW_ALL Node node - self.info = NULL - self.part_info = NULL + verify_rpc(slurm_load_node(0, &nodes.info, flags)) + verify_rpc(slurm_load_partitions(0, &nodes.part_info, flags)) + slurm_populate_node_partitions(nodes.info, nodes.part_info) # If requested, preload the passwd and groups database to potentially # speedup lookups for an attribute in a node, e.g "owner". @@ -65,26 +104,22 @@ cdef class Nodes(dict): passwd = _getpwall_to_dict() groups = _getgrall_to_dict() - verify_rpc(slurm_load_node(0, &self.info, slurm.SHOW_ALL)) - verify_rpc(slurm_load_partitions(0, &self.part_info, slurm.SHOW_ALL)) - slurm_populate_node_partitions(self.info, self.part_info) - # zero-out a dummy node_info_t - memset(&self.tmp_info, 0, sizeof(node_info_t)) + memset(&nodes.tmp_info, 0, sizeof(node_info_t)) # Put each node pointer into its own "Node" instance. - for cnt in range(self.info.record_count): - node = Node.from_ptr(&self.info.node_array[cnt]) + for cnt in range(nodes.info.record_count): + node = Node.from_ptr(&nodes.info.node_array[cnt]) # Prevent double free if xmalloc fails mid-loop and a MemoryError # is raised by replacing it with a zeroed-out node_info_t. - self.info.node_array[cnt] = self.tmp_info + nodes.info.node_array[cnt] = nodes.tmp_info if preload_passwd_info: node.passwd = passwd node.groups = groups - self[node.name] = node + nodes[node.name] = node # At this point we memcpy'd all the memory for the Nodes. Setting this # to 0 will prevent the slurm node free function to deallocate the @@ -92,7 +127,22 @@ cdef class Nodes(dict): # are free'd automatically in __dealloc__ since the lifetime of each # node-pointer is tied to the lifetime of its corresponding "Node" # instance. - self.info.record_count = 0 + nodes.info.record_count = 0 + + return nodes + + def reload(self): + cdef Nodes reloaded_nodes + our_nodes = list(self.keys()) + + if not our_nodes: + return None + + reloaded_nodes = Nodes.load() + for node in list(self.keys()): + if node in reloaded_nodes: + # Put the new data in. + self[node] = reloaded_nodes[node] def as_list(self): """Format the information as list of Node objects. @@ -145,7 +195,7 @@ cdef class Node: self.info = NULL self.umsg = NULL - def __init__(self, str name=None, **kwargs): + def __init__(self, name=None, **kwargs): self._alloc_impl() self.name = name for k, v in kwargs.items(): @@ -198,6 +248,13 @@ cdef class Node: memcpy(wrap.info, in_ptr, sizeof(node_info_t)) return wrap + cdef _swap_data(Node dst, Node src): + cdef node_info_t *tmp = NULL + if dst.info and src.info: + tmp = dst.info + dst.info = src.info + src.info = tmp + def reload(self): """(Re)load information for a node. diff --git a/tests/new_api/test_job.py b/tests/new_api/test_job.py index 91fbfc2b..b82b763e 100644 --- a/tests/new_api/test_job.py +++ b/tests/new_api/test_job.py @@ -136,7 +136,7 @@ def test_get_job_queue(submit_job): # Submit 10 jobs, gather the job_ids in a list job_list = [submit_job() for i in range(10)] - jobs = Jobs.get() + jobs = Jobs.load() for job in job_list: # Check to see if all the Jobs we submitted exist assert job.id in jobs diff --git a/tests/new_api/test_job_steps.py b/tests/new_api/test_job_steps.py index f9cf54bb..f22fe2fe 100644 --- a/tests/new_api/test_job_steps.py +++ b/tests/new_api/test_job_steps.py @@ -88,7 +88,7 @@ def test_collection(submit_job): job = submit_job(script=create_job_script_multi_step()) time.sleep(1) - steps = JobSteps(job) + steps = JobSteps.load(job) assert steps != {} # We have 3 Steps: batch, 0 and 1 @@ -115,7 +115,7 @@ def test_cancel(submit_job): job = submit_job(script=create_job_script_multi_step()) time.sleep(1) - steps = JobSteps(job) + steps = JobSteps.load(job) assert len(steps) == 3 assert ("batch" in steps and 0 in steps and @@ -124,7 +124,7 @@ def test_cancel(submit_job): steps[0].cancel() time.sleep(0.5) - steps = JobSteps(job) + steps = JobSteps.load(job) assert len(steps) == 2 assert ("batch" in steps and 1 in steps) diff --git a/tests/new_api/test_node.py b/tests/new_api/test_node.py index d3e81481..614460ff 100644 --- a/tests/new_api/test_node.py +++ b/tests/new_api/test_node.py @@ -9,7 +9,7 @@ def test_reload(): - node = Node(Nodes().as_list()[0].name) + node = Node(Nodes.load().as_list()[0].name) # Nothing has been loaded at this point, just make sure everything is # on default values. @@ -40,7 +40,7 @@ def test_create(): def test_modify(): - node = Node(Nodes().as_list()[0].name) + node = Node(Nodes.load().as_list()[0].name) node.modify(weight=10000) assert node.reload().weight == 10000 @@ -53,4 +53,4 @@ def test_modify(): def test_parse_all(): - Node(Nodes().as_list()[0].name).reload().as_dict() + Node(Nodes.load().as_list()[0].name).reload().as_dict() From 32554545b2b45273131f02ab15167f9fcf7afebe Mon Sep 17 00:00:00 2001 From: tazend Date: Thu, 23 Mar 2023 18:37:50 +0100 Subject: [PATCH 09/28] wip --- pyslurm/core/common/__init__.pxd | 1 - pyslurm/core/job/job.pxd | 21 +++++---------------- pyslurm/core/job/job.pyx | 27 +++++++++++++++++++++++++++ pyslurm/core/node.pyx | 11 +++++++++++ 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/pyslurm/core/common/__init__.pxd b/pyslurm/core/common/__init__.pxd index 284f5acb..5d4735eb 100644 --- a/pyslurm/core/common/__init__.pxd +++ b/pyslurm/core/common/__init__.pxd @@ -17,7 +17,6 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: embedsignature=True # cython: c_string_type=unicode, c_string_encoding=utf8 # cython: language_level=3 diff --git a/pyslurm/core/job/job.pxd b/pyslurm/core/job/job.pxd index fceeecdb..b9a4231c 100644 --- a/pyslurm/core/job/job.pxd +++ b/pyslurm/core/job/job.pxd @@ -66,25 +66,14 @@ from pyslurm.slurm cimport ( cdef class Jobs(dict): - """A collection of :obj:`Job` objects. - - By creating a new :obj:`Jobs` instance, all Jobs in the system will be - fetched from the slurmctld. + """A collection of Job objects. Args: - preload_passwd_info (bool, optional): - Decides whether to query passwd and groups information from - the system. - Could potentially speed up access to attributes of the Job - where a UID/GID is translated to a name. If True, the - information will fetched and stored in each of the Job - instances. + nodes (Union[list, dict], optional): + Jobs to initialize this collection with. freeze (bool, optional): - Decide whether this collection of Jobs should be "frozen". - - Raises: - RPCError: When getting all the Jobs from the slurmctld failed. - MemoryError: If malloc fails to allocate memory. + Control whether this collection is "frozen" when reloading Job + information. Attributes: memory (int): diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx index 357d9495..6bfb7208 100644 --- a/pyslurm/core/job/job.pyx +++ b/pyslurm/core/job/job.pyx @@ -65,6 +65,26 @@ cdef class Jobs(dict): @staticmethod def load(preload_passwd_info=False, freeze=False): + """Retrieve all Jobs from the Slurm controller + + Args: + preload_passwd_info (bool, optional): + Decides whether to query passwd and groups information from + the system. + Could potentially speed up access to attributes of the Job + where a UID/GID is translated to a name. If True, the + information will fetched and stored in each of the Job + instances. + freeze (bool, optional): + Decide whether this collection of Jobs should be "frozen". + + Returns: + (Jobs): A collection of Job objects. + + Raises: + RPCError: When getting all the Jobs from the slurmctld failed. + MemoryError: If malloc fails to allocate memory. + """ cdef: dict passwd = {} dict groups = {} @@ -110,6 +130,11 @@ cdef class Jobs(dict): return jobs def reload(self): + """Reload the information for jobs in a collection. + + Raises: + RPCError: When getting the Jobs from the slurmctld failed. + """ cdef Jobs reloaded_jobs = Jobs.load() for jid in list(self.keys()): @@ -126,6 +151,8 @@ cdef class Jobs(dict): if jid not in self: self[jid] = reloaded_jobs[jid] + return self + def load_steps(self): """Load all Job steps for this collection of Jobs. diff --git a/pyslurm/core/node.pyx b/pyslurm/core/node.pyx index e25e8cb0..46c19fbe 100644 --- a/pyslurm/core/node.pyx +++ b/pyslurm/core/node.pyx @@ -132,6 +132,15 @@ cdef class Nodes(dict): return nodes def reload(self): + """Reload the information for nodes in a collection. + + Note: + Only information for nodes which are already in the collection at + the time of calling this method will be reloaded. + + Raises: + RPCError: When getting the Nodes from the slurmctld failed. + """ cdef Nodes reloaded_nodes our_nodes = list(self.keys()) @@ -144,6 +153,8 @@ cdef class Nodes(dict): # Put the new data in. self[node] = reloaded_nodes[node] + return self + def as_list(self): """Format the information as list of Node objects. From 4ab16586734ada56ebca08a8fbccd2d68c901c0e Mon Sep 17 00:00:00 2001 From: tazend Date: Sat, 1 Apr 2023 10:58:57 +0200 Subject: [PATCH 10/28] wip --- .gitignore | 1 + pyslurm/core/job/job.pxd | 2 +- pyslurm/core/job/job.pyx | 3 +-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index d534a00e..ef44eef6 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ pyslurm/*.pxd~ pyslurm/*.so pyslurm/*.c pyslurm/**/*.c +pyslurm/**/*.so pyslurm/**/__pycache__ # Ignore vim swap files diff --git a/pyslurm/core/job/job.pxd b/pyslurm/core/job/job.pxd index b9a4231c..1be664ec 100644 --- a/pyslurm/core/job/job.pxd +++ b/pyslurm/core/job/job.pxd @@ -178,7 +178,7 @@ cdef class Job: Time when a pending Job will be cancelled, as unix timestamp. preempt_eligible_time (int): Time the Job is eligible for preemption, as unix timestamp. - preempt_time_raw (int): + preempt_time (int): Time the Job was signaled for preemption, as unix timestamp. suspend_time (int): Last Time the Job was suspended, as unix timestamp. diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx index 6bfb7208..3bd33a65 100644 --- a/pyslurm/core/job/job.pyx +++ b/pyslurm/core/job/job.pyx @@ -204,7 +204,7 @@ cdef class Job: def __cinit__(self): self.ptr = NULL - def __init__(self, int job_id): + def __init__(self, job_id): self.alloc() self.ptr.job_id = job_id self.passwd = {} @@ -1243,7 +1243,6 @@ cdef class Job: * cpus (int) * gres (dict) * memory (str) - Humanized Memory str - * memory_raw (int) - Value in Mebibytes Returns: (dict): Resource layout From cbe77e03ad9d9b238d2b660cab044e1892feb362 Mon Sep 17 00:00:00 2001 From: tazend Date: Wed, 1 Mar 2023 23:12:14 +0100 Subject: [PATCH 11/28] Rework slurmdbd Job API --- pyslurm/core/common/__init__.pxd | 2 +- pyslurm/core/common/cstr.pxd | 2 +- pyslurm/core/db/__init__.pxd | 0 pyslurm/core/db/__init__.py | 0 pyslurm/core/db/connection.pxd | 35 ++++++ pyslurm/core/db/connection.pyx | 49 ++++++++ pyslurm/core/db/job.pxd | 74 +++++++++++++ pyslurm/core/db/job.pyx | 185 +++++++++++++++++++++++++++++++ pyslurm/core/db/util.pxd | 66 +++++++++++ pyslurm/core/db/util.pyx | 133 ++++++++++++++++++++++ pyslurm/slurm/extra.pxi | 9 +- 11 files changed, 552 insertions(+), 3 deletions(-) create mode 100644 pyslurm/core/db/__init__.pxd create mode 100644 pyslurm/core/db/__init__.py create mode 100644 pyslurm/core/db/connection.pxd create mode 100644 pyslurm/core/db/connection.pyx create mode 100644 pyslurm/core/db/job.pxd create mode 100644 pyslurm/core/db/job.pyx create mode 100644 pyslurm/core/db/util.pxd create mode 100644 pyslurm/core/db/util.pyx diff --git a/pyslurm/core/common/__init__.pxd b/pyslurm/core/common/__init__.pxd index 5d4735eb..b4ea27fa 100644 --- a/pyslurm/core/common/__init__.pxd +++ b/pyslurm/core/common/__init__.pxd @@ -21,7 +21,7 @@ # cython: language_level=3 from pyslurm cimport slurm -from pyslurm.slurm cimport xfree, try_xmalloc, xmalloc, xfree_ptr +from pyslurm.slurm cimport xfree, try_xmalloc, xmalloc from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t from pyslurm.core.common cimport cstr from libc.stdlib cimport free diff --git a/pyslurm/core/common/cstr.pxd b/pyslurm/core/common/cstr.pxd index f32c60f6..0c6b96c3 100644 --- a/pyslurm/core/common/cstr.pxd +++ b/pyslurm/core/common/cstr.pxd @@ -21,7 +21,7 @@ # cython: language_level=3 from pyslurm cimport slurm -from pyslurm.slurm cimport xfree, try_xmalloc, xmalloc, xfree_ptr +from pyslurm.slurm cimport xfree, try_xmalloc, xmalloc from libc.string cimport memcpy, strlen cdef char *from_unicode(s) diff --git a/pyslurm/core/db/__init__.pxd b/pyslurm/core/db/__init__.pxd new file mode 100644 index 00000000..e69de29b diff --git a/pyslurm/core/db/__init__.py b/pyslurm/core/db/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pyslurm/core/db/connection.pxd b/pyslurm/core/db/connection.pxd new file mode 100644 index 00000000..b6e7e5ff --- /dev/null +++ b/pyslurm/core/db/connection.pxd @@ -0,0 +1,35 @@ +######################################################################### +# connection.pyx - pyslurm slurmdbd database connection +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +# cython: embedsignature=True + +from pyslurm cimport slurm +from libc.stdint cimport uint16_t +from pyslurm.slurm cimport ( + slurmdb_connection_get, + slurmdb_connection_close, +) + + +cdef class Connection: + cdef: + void *conn + uint16_t conn_flags diff --git a/pyslurm/core/db/connection.pyx b/pyslurm/core/db/connection.pyx new file mode 100644 index 00000000..b686065c --- /dev/null +++ b/pyslurm/core/db/connection.pyx @@ -0,0 +1,49 @@ +######################################################################### +# connection.pyx - pyslurm slurmdbd database connection +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +# cython: embedsignature=True + +from pyslurm.core.error import RPCError + + +cdef class Connection: + def __cinit__(self): + self.conn = NULL + self.conn_flags = 0 + + def __init__(self): + self.open() + + def open(self): + if not self.conn: + self.conn = slurmdb_connection_get(&self.conn_flags) + if not self.conn: + raise RPCError(msg="Failed to open Connection to slurmdbd") + + def close(self): + slurmdb_connection_close(&self.conn) + self.conn = NULL + + def is_open(self): + if self.conn: + return True + else: + return False diff --git a/pyslurm/core/db/job.pxd b/pyslurm/core/db/job.pxd new file mode 100644 index 00000000..d6843831 --- /dev/null +++ b/pyslurm/core/db/job.pxd @@ -0,0 +1,74 @@ +######################################################################### +# job.pxd - pyslurm slurmdbd job api +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +# cython: embedsignature=True + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + slurmdb_job_rec_t, + slurmdb_job_cond_t, + slurmdb_step_rec_t, + slurmdb_jobs_get, + slurmdb_destroy_job_cond, + slurmdb_destroy_job_rec, + slurmdb_destroy_step_rec, + try_xmalloc, + slurmdb_job_cond_def_start_end, +) +from pyslurm.core.db.util cimport SlurmList, SlurmListItem +from pyslurm.core.db.connection cimport Connection +from pyslurm.core.common cimport cstr + + +cdef class JobConditions: + cdef slurmdb_job_cond_t *ptr + + cdef public: + start_time + end_time + accounts + association_ids + clusters + constraints + + +cdef class JobSteps(dict): + pass + + +cdef class JobStep: + cdef slurmdb_step_rec_t *ptr + + @staticmethod + cdef JobStep from_ptr(slurmdb_step_rec_t *step) + + +cdef class Jobs(dict): + cdef SlurmList info + + +cdef class Job: + cdef slurmdb_job_rec_t *ptr + cdef public JobSteps steps + + @staticmethod + cdef Job from_ptr(slurmdb_job_rec_t *in_ptr) + diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx new file mode 100644 index 00000000..92f759f5 --- /dev/null +++ b/pyslurm/core/db/job.pyx @@ -0,0 +1,185 @@ +######################################################################### +# job.pyx - pyslurm slurmdbd job api +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +# cython: embedsignature=True + + +from pyslurm.core.error import RPCError +from pyslurm.core.common.ctime import date_to_timestamp +from pyslurm.core.common.uint import * + +# Maybe prefix these classes with something like "DB" to avoid name collision +# with the other classes from pyslurm/core/job ? + +cdef class JobStep: + + def __cinit__(self): + self.ptr = NULL + + def __dealloc__(self): + slurmdb_destroy_step_rec(self.ptr) + self.ptr = NULL + + @staticmethod + cdef JobStep from_ptr(slurmdb_step_rec_t *step): + cdef JobStep wrap = JobStep.__new__(JobStep) + wrap.ptr = step + return wrap + + def _xlate_from_id(self, sid): + if sid == slurm.SLURM_BATCH_SCRIPT: + return "batch" + elif sid == slurm.SLURM_EXTERN_CONT: + return "extern" + elif sid == slurm.SLURM_INTERACTIVE_STEP: + return "interactive" + elif sid == slurm.SLURM_PENDING_STEP: + return "pending" + else: + return sid + + def _xlate_to_id(self, sid): + if sid == "batch": + return slurm.SLURM_BATCH_SCRIPT + elif sid == "extern": + return slurm.SLURM_EXTERN_CONT + elif sid == "interactive": + return slurm.SLURM_INTERACTIVE_STEP + elif sid == "pending": + return slurm.SLURM_PENDING_STEP + else: + return int(sid) + + @property + def id(self): + return self._xlate_from_id(self.ptr.step_id.step_id) + + @property + def job_id(self): + return self.ptr.step_id.job_id + + @property + def name(self): + return cstr.to_unicode(self.ptr.stepname) + + +cdef class JobConditions: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + def __dealloc__(self): + self._dealloc() + + def _dealloc(self): + slurmdb_destroy_job_cond(self.ptr) + self.ptr = NULL + + def _alloc(self): + self._dealloc() + self.ptr = try_xmalloc(sizeof(slurmdb_job_cond_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_job_cond_t") + + self.ptr.db_flags = slurm.SLURMDB_JOB_FLAG_NOTSET + self.ptr.flags |= slurm.JOBCOND_FLAG_NO_TRUNC + + def _create_job_cond(self): + self._alloc() + cdef slurmdb_job_cond_t *ptr = self.ptr + + ptr.usage_start = date_to_timestamp(self.start_time) + ptr.usage_end = date_to_timestamp(self.end_time) + slurmdb_job_cond_def_start_end(ptr) + SlurmList.to_char_list(&ptr.acct_list, self.accounts) + SlurmList.to_char_list(&ptr.associd_list, self.association_ids) + SlurmList.to_char_list(&ptr.cluster_list, self.clusters) + SlurmList.to_char_list(&ptr.constraint_list, self.constraints) + + +cdef class Jobs(dict): + + def __init__(self, *args, **kwargs): + cdef: + Job job + JobStep step + Connection db_conn + JobConditions job_cond + SlurmListItem job_ptr + SlurmListItem step_ptr + SlurmList step_list + + # Allow the user to both specify search conditions via a JobConditions + # instance or **kwargs. + if args and isinstance(args[0], JobConditions): + job_cond = args[0] + else: + job_cond = JobConditions(**kwargs) + + job_cond._create_job_cond() + # TODO: Have a single, global DB connection in pyslurm internally? + db_conn = Connection() + self.info = SlurmList.wrap(slurmdb_jobs_get(db_conn.conn, + job_cond.ptr)) + if self.info.is_null(): + raise RPCError(msg="Failed to get Jobs from slurmdbd") + + # TODO: For multi-cluster support, remove duplicate federation jobs + for job_ptr in SlurmList.iter_and_pop(self.info): + job = Job.from_ptr(job_ptr.data) + self[job.id] = job + + step_list = SlurmList.wrap(job.ptr.steps, owned=False) + for step_ptr in SlurmList.iter_and_pop(step_list): + step = JobStep.from_ptr(step_ptr.data) + job.steps[step.id] = step + + +cdef class Job: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, int job_id): + pass + + def __dealloc__(self): + slurmdb_destroy_job_rec(self.ptr) + self.ptr = NULL + + @staticmethod + cdef Job from_ptr(slurmdb_job_rec_t *in_ptr): + cdef Job wrap = Job.__new__(Job) + wrap.ptr = in_ptr + wrap.steps = JobSteps.__new__(JobSteps) + return wrap + + @property + def account(self): + return cstr.to_unicode(self.ptr.account) + + @property + def id(self): + return self.ptr.jobid diff --git a/pyslurm/core/db/util.pxd b/pyslurm/core/db/util.pxd new file mode 100644 index 00000000..44be9b15 --- /dev/null +++ b/pyslurm/core/db/util.pxd @@ -0,0 +1,66 @@ +######################################################################### +# util.pxd - pyslurm slurmdbd util functions +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +# cython: embedsignature=True + +from pyslurm cimport slurm +from pyslurm.core.common cimport cstr +from pyslurm.slurm cimport ( + ListIterator, + List, + slurm_list_iterator_create, + slurm_list_iterator_destroy, + slurm_list_iterator_reset, + slurm_list_count, + slurm_list_next, + slurm_list_destroy, + slurm_list_create, + slurm_list_pop, + slurm_xfree_ptr, +) + + +cdef class SlurmListItem: + cdef void *data + + @staticmethod + cdef SlurmListItem from_ptr(void *item) + + +cdef class SlurmList: + cdef: + List info + int cnt + ListIterator itr + int itr_cnt + owned + + @staticmethod + cdef SlurmList wrap(List, owned=*) + + @staticmethod + cdef SlurmList create(slurm.ListDelF delf) + + @staticmethod + cdef to_char_list(List *in_list, vals) + + @staticmethod + cdef to_str_pylist(List in_list) diff --git a/pyslurm/core/db/util.pyx b/pyslurm/core/db/util.pyx new file mode 100644 index 00000000..6956c458 --- /dev/null +++ b/pyslurm/core/db/util.pyx @@ -0,0 +1,133 @@ +######################################################################### +# util.pxd - pyslurm slurmdbd util functions +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +# cython: embedsignature=True + + +cdef class SlurmListItem: + + def __cinit__(self): + self.data = NULL + + @staticmethod + cdef SlurmListItem from_ptr(void *item): + cdef SlurmListItem wrap = SlurmListItem.__new__(SlurmListItem) + wrap.data = item + return wrap + + +cdef class SlurmList: + """ + Convenience Wrapper around slurms List type + """ + def __cinit__(self): + self.info = NULL + self.itr = NULL + self.itr_cnt = 0 + self.cnt = 0 + self.owned = True + + def __dealloc__(self): + if self.owned: + if self.itr: + slurm_list_iterator_destroy(self.itr) + + if self.info: + slurm_list_destroy(self.info) + + def __iter__(self): + return self + + def __next__(self): + if self.itr_cnt < self.cnt: + self.itr_cnt += 1 + return SlurmListItem.from_ptr(slurm_list_next(self.itr)) + + slurm_list_iterator_reset(self.itr) + self.itr_cnt = 0 + raise StopIteration + + @staticmethod + def iter_and_pop(SlurmList li): + cnt = 0 + while cnt < li.cnt: + yield SlurmListItem.from_ptr(slurm_list_pop(li.info)) + cnt += 1 + + @staticmethod + cdef SlurmList create(slurm.ListDelF delfunc): + cdef SlurmList wrapper = SlurmList.__new__(SlurmList) + wrapper.info = slurm_list_create(delfunc) + wrapper.itr = slurm_list_iterator_create(wrapper.info) + return wrapper + + @staticmethod + cdef SlurmList wrap(List li, owned=True): + if not li: + raise ValueError("List is NULL") + + cdef SlurmList wrapper = SlurmList.__new__(SlurmList) + wrapper.info = li + wrapper.cnt = slurm_list_count(li) + wrapper.itr = slurm_list_iterator_create(wrapper.info) + wrapper.owned = owned + return wrapper + + @staticmethod + cdef to_str_pylist(List in_list): + cdef: + ListIterator itr = slurm_list_iterator_create(in_list) + char* entry = NULL + list out = [] + + for i in range(slurm_list_count(in_list)): + entry = slurm_list_next(itr) + pystr = cstr.to_unicode(entry) + if pystr: + out.append(pystr) + + slurm_list_iterator_destroy(itr) + return out + + @staticmethod + cdef to_char_list(List *in_list, vals): + cdef: + List li = in_list[0] + char *entry = NULL + + if in_list[0]: + slurm_list_destroy(li) + in_list[0] = NULL + + if not vals: + in_list[0] = NULL + else: + in_list[0] = slurm_list_create(slurm_xfree_ptr) + for val in vals: + if val: + cstr.fmalloc(&entry, val) + slurm.slurm_list_append(in_list[0], entry) + + def is_null(self): + if not self.info: + return True + else: + return False diff --git a/pyslurm/slurm/extra.pxi b/pyslurm/slurm/extra.pxi index c9294339..a0ac15dc 100644 --- a/pyslurm/slurm/extra.pxi +++ b/pyslurm/slurm/extra.pxi @@ -143,7 +143,8 @@ cdef extern from "pyslurm/slurm/xmalloc.h" nogil: void xfree(void *__p) void *xmalloc(size_t __sz) void *try_xmalloc(size_t __sz) - void xfree_ptr(void *ptr) + +cdef extern void slurm_xfree_ptr(void *) # # Slurm xstring functions @@ -231,3 +232,9 @@ cdef extern from *: void FREE_NULL_BITMAP(bitstr_t *_X) cdef extern char *slurm_hostlist_deranged_string_malloc(hostlist_t hl) + +# +# Slurmdbd functions +# + +cdef extern void slurmdb_job_cond_def_start_end(slurmdb_job_cond_t *job_cond) From 87992302fe7bc8167713b64d8527d5dde990296e Mon Sep 17 00:00:00 2001 From: tazend Date: Fri, 3 Mar 2023 19:33:46 +0100 Subject: [PATCH 12/28] wip slurmdbd job api --- pyslurm/core/common/__init__.pyx | 4 +- pyslurm/core/db/job.pxd | 2 + pyslurm/core/db/job.pyx | 295 ++++++++++++++++++++++++++++++- pyslurm/core/db/tres.pxd | 41 +++++ pyslurm/core/db/tres.pyx | 61 +++++++ pyslurm/slurm/extra.pxi | 14 ++ 6 files changed, 414 insertions(+), 3 deletions(-) create mode 100644 pyslurm/core/db/tres.pxd create mode 100644 pyslurm/core/db/tres.pyx diff --git a/pyslurm/core/common/__init__.pyx b/pyslurm/core/common/__init__.pyx index 3e9e98bb..0bf4e7d2 100644 --- a/pyslurm/core/common/__init__.pyx +++ b/pyslurm/core/common/__init__.pyx @@ -41,7 +41,7 @@ MEMORY_UNITS = { cpdef uid_to_name(uint32_t uid, err_on_invalid=True, dict lookup={}): """Translate UID to a User-Name.""" - if uid == slurm.NO_VAL: + if uid == slurm.NO_VAL or uid == slurm.INFINITE: return None if lookup: @@ -64,7 +64,7 @@ cpdef uid_to_name(uint32_t uid, err_on_invalid=True, dict lookup={}): cpdef gid_to_name(uint32_t gid, err_on_invalid=True, dict lookup={}): """Translate a uid to a Group-Name.""" - if gid == slurm.NO_VAL: + if gid == slurm.NO_VAL or gid == slurm.INFINITE: return None if lookup: diff --git a/pyslurm/core/db/job.pxd b/pyslurm/core/db/job.pxd index d6843831..18fe28b6 100644 --- a/pyslurm/core/db/job.pxd +++ b/pyslurm/core/db/job.pxd @@ -32,6 +32,8 @@ from pyslurm.slurm cimport ( slurmdb_destroy_step_rec, try_xmalloc, slurmdb_job_cond_def_start_end, + slurm_job_state_string, + slurm_job_reason_string, ) from pyslurm.core.db.util cimport SlurmList, SlurmListItem from pyslurm.core.db.connection cimport Connection diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index 92f759f5..7b05e929 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -22,9 +22,22 @@ # cython: embedsignature=True +from os import WIFSIGNALED, WIFEXITED, WTERMSIG, WEXITSTATUS from pyslurm.core.error import RPCError -from pyslurm.core.common.ctime import date_to_timestamp +from pyslurm.core.db.tres cimport TrackableResources, TrackableResource from pyslurm.core.common.uint import * +from pyslurm.core.common.ctime import ( + date_to_timestamp, + secs_to_timestr, + timestamp_to_date, + mins_to_timestr, +) +from pyslurm.core.common import ( + gid_to_name, + uid_to_name, + humanize, + instance_to_dict, +) # Maybe prefix these classes with something like "DB" to avoid name collision # with the other classes from pyslurm/core/job ? @@ -68,6 +81,30 @@ cdef class JobStep: else: return int(sid) + @property + def container(self): + return cstr.to_unicode(self.ptr.container) + + @property + def elapsed_time(self): + return secs_to_timestr(self.ptr.elapsed) + + @property + def end_time(self): + return timestamp_to_date(self.ptr.end) + + @property + def exit_code(self): + return None + + @property + def nodes_count(self): + return None + + @property + def nodes(self): + return None + @property def id(self): return self._xlate_from_id(self.ptr.step_id.step_id) @@ -143,9 +180,15 @@ cdef class Jobs(dict): db_conn = Connection() self.info = SlurmList.wrap(slurmdb_jobs_get(db_conn.conn, job_cond.ptr)) + if self.info.is_null(): raise RPCError(msg="Failed to get Jobs from slurmdbd") + # TODO: also get trackable resources with slurmdb_tres_get and store + # it in each job instance. tres_alloc_str and tres_req_str only + # contain the numeric tres ids, but it probably makes more sense to + # convert them to its type name for the user in advance. + # TODO: For multi-cluster support, remove duplicate federation jobs for job_ptr in SlurmList.iter_and_pop(self.info): job = Job.from_ptr(job_ptr.data) @@ -176,10 +219,260 @@ cdef class Job: wrap.steps = JobSteps.__new__(JobSteps) return wrap + def as_dict(self): + return instance_to_dict(self) + @property def account(self): return cstr.to_unicode(self.ptr.account) + @property + def admin_comment(self): + return cstr.to_unicode(self.ptr.admin_comment) + + @property + def allocated_nodes(self): + return u32_parse(self.ptr.alloc_nodes) + + @property + def array_job_id(self): + return u32_parse(self.ptr.array_job_id) + + @property + def array_parallel_tasks(self): + return u32_parse(self.ptr.array_max_tasks) + + @property + def array_task_id(self): + return u32_parse(self.ptr.array_task_id) + + @property + def array_tasks_waiting(self): + task_str = cstr.to_unicode(self.ptr.array_task_str) + if not task_str: + return None + + if "%" in task_str: + # We don't want this % character and everything after it + # in here, so remove it. + task_str = task_str[:task_str.rindex("%")] + + return task_str + + @property + def association_id(self): + return u32_parse(self.ptr.associd) + + @property + def block_id(self): + return cstr.to_unicode(self.ptr.blockid) + + @property + def cluster(self): + return cstr.to_unicode(self.ptr.cluster) + + @property + def constraints(self): + return cstr.to_list(self.ptr.constraints) + + @property + def container(self): + return cstr.to_list(self.ptr.container) + + @property + def db_index(self): + return u64_parse(self.ptr.db_index) + + @property + def derived_exit_code(self): + """int: The derived exit code for the Job.""" + if not WIFEXITED(self.ptr.derived_ec): + return None + + return WEXITSTATUS(self.ptr.derived_ec) + + @property + def derived_exit_code_signal(self): + """int: Signal for the derived exit code.""" + if not WIFSIGNALED(self.ptr.derived_ec): + return None + + return WTERMSIG(self.ptr.derived_ec) + + @property + def comment(self): + return cstr.to_unicode(self.ptr.derived_es) + + @property + def elapsed_time(self): + return secs_to_timestr(self.ptr.elapsed) + + @property + def eligible_time(self): + return timestamp_to_date(self.ptr.eligible) + + @property + def end_time(self): + return timestamp_to_date(self.ptr.end) + + @property + def exit_code(self): + pass + + # uint32_t flags + + def gid(self): + return gid_to_name(self.ptr.gid) + + # uint32_t het_job_id + # uint32_t het_job_offset + @property def id(self): return self.ptr.jobid + + @property + def name(self): + return cstr.to_unicode(self.ptr.jobname) + + # uint32_t lft + + @property + def mcs_label(self): + return cstr.to_unicode(self.ptr.mcs_label) + + @property + def nodelist(self): + return cstr.to_list(self.ptr.nodes) + + @property + def partition(self): + return cstr.to_unicode(self.ptr.partition) + + @property + def priority(self): + return u32_parse(self.ptr.priority, zero_is_noval=False) + + @property + def quality_of_service(self): + # Need to convert the raw uint32_t qosid to a name, by calling + # slurmdb_qos_get. To avoid doing this repeatedly, we'll probably need + # to also get the qos list when calling slurmdb_jobs_get and store it + # in each job instance. + return None + + @property + def requested_cpus(self): + return u32_parse(self.ptr.req_cpus) + + @property + def requested_mem(self): + val = TrackableResources.find_count_in_str(self.ptr.tres_req_str, + slurm.TRES_MEM) + return humanize(val, decimals=2) + + @property + def allocated_cpus(self): + pass + + @property + def reservation(self): + return cstr.to_unicode(self.ptr.resv_name) + + @property + def reservation_id(self): + return u32_parse(self.ptr.resvid) + + @property + def script(self): + return cstr.to_unicode(self.ptr.script) + + # uint32_t show_full + + @property + def start_time(self): + return timestamp_to_date(self.ptr.start) + + @property + def state(self): + """str: State this Job is in.""" + return cstr.to_unicode(slurm_job_state_string(self.ptr.state)) + + @property + def state_reason(self): + return cstr.to_unicode(slurm_job_reason_string + (self.ptr.state_reason_prev)) + + @property + def cancelled_by(self): + return uid_to_name(self.ptr.requid) + + @property + def submit_time(self): + return timestamp_to_date(self.ptr.submit) + + @property + def submit_line(self): + return cstr.to_unicode(self.ptr.submit_line) + + @property + def suspended_time(self): + return secs_to_timestr(self.ptr.elapsed) + + @property + def system_comment(self): + return cstr.to_unicode(self.ptr.system_comment) + + @property + def system_cpu_time(self): + # uint32_t sys_cpu_sec + # uint32_t sys_cpu_usec + pass + + @property + def time_limit(self): + return mins_to_timestr(self.ptr.timelimit, "PartitionLimit") + + @property + def cpu_time(self): + pass + + @property + def total_cpu_time(self): + # uint32_t tot_cpu_sec + # uint32_t tot_cpu_usec + pass + + @property + def uid(self): + # Theres also a ptr->user + # https://github.com/SchedMD/slurm/blob/6365a8b7c9480c48678eeedef99864d8d3b6a6b5/src/sacct/print.c#L1946 + return uid_to_name(self.ptr.uid) + + # TODO: used gres + + @property + def user_cpu_time(self): + # uint32_t user_cpu_sec + # uint32_t user_cpu_usec + pass + + @property + def wckey(self): + return cstr.to_unicode(self.ptr.wckey) + + @property + def wckey_id(self): + return u32_parse(self.ptr.wckeyid) + + @property + def work_dir(self): + return cstr.to_unicode(self.ptr.work_dir) + + @property + def tres_allocated(self): + return TrackableResources.from_str(self.ptr.tres_alloc_str) + + @property + def tres_requested(self): + return TrackableResources.from_str(self.ptr.tres_req_str) diff --git a/pyslurm/core/db/tres.pxd b/pyslurm/core/db/tres.pxd new file mode 100644 index 00000000..e05644b8 --- /dev/null +++ b/pyslurm/core/db/tres.pxd @@ -0,0 +1,41 @@ +######################################################################### +# tres.pxd - pyslurm slurmdbd tres api +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +# cython: embedsignature=True + +from pyslurm cimport slurm +from pyslurm.core.common cimport cstr +from libc.stdint cimport uint64_t +from pyslurm.slurm cimport ( + slurmdb_tres_rec_t, + slurmdb_find_tres_count_in_string, +) + + +cdef class TrackableResources(dict): + cdef public raw_str + + @staticmethod + cdef TrackableResources from_str(char *tres_str) + + +cdef class TrackableResource: + cdef slurmdb_tres_rec_t *ptr diff --git a/pyslurm/core/db/tres.pyx b/pyslurm/core/db/tres.pyx new file mode 100644 index 00000000..f73507c6 --- /dev/null +++ b/pyslurm/core/db/tres.pyx @@ -0,0 +1,61 @@ +######################################################################### +# tres.pyx - pyslurm slurmdbd tres api +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 +# cython: embedsignature=True + +from pyslurm.core.common.uint import * + + +cdef class TrackableResources(dict): + + def __init__(self): + pass + + @staticmethod + cdef TrackableResources from_str(char *tres_str): + cdef: + TrackableResources tres + str raw_str = cstr.to_unicode(tres_str) + dict tres_dict + + tres = TrackableResources.__new__(TrackableResources) + if not raw_str: + return tres + + tres.raw_str = raw_str + tres_dict = cstr.to_dict(tres_str) + for tres_id, val in tres_dict.items(): + # TODO: resolve ids to type name + pass + + return tres + + @staticmethod + def find_count_in_str(tres_str, typ): + cdef uint64_t tmp + tmp = slurmdb_find_tres_count_in_string(tres_str, typ) + return u64_parse(tmp) + + +cdef class TrackableResource: + + def __cinit__(self): + self.ptr = NULL diff --git a/pyslurm/slurm/extra.pxi b/pyslurm/slurm/extra.pxi index a0ac15dc..88b8e89a 100644 --- a/pyslurm/slurm/extra.pxi +++ b/pyslurm/slurm/extra.pxi @@ -130,6 +130,19 @@ ctypedef struct job_resources: uint16_t threads_per_core uint8_t whole_node +# +# TRES +# +ctypedef enum tres_types_t: + TRES_CPU = 1 + TRES_MEM + TRES_ENERGY + TRES_NODE + TRES_BILLING + TRES_FS_DISK + TRES_VMEM + TRES_PAGES + TRES_STATIC_CNT # Global Environment cdef extern char **environ @@ -238,3 +251,4 @@ cdef extern char *slurm_hostlist_deranged_string_malloc(hostlist_t hl) # cdef extern void slurmdb_job_cond_def_start_end(slurmdb_job_cond_t *job_cond) +cdef extern uint64_t slurmdb_find_tres_count_in_string(char *tres_str_in, int id) From 04a2ac2e8929fc24de5e1fa9bce78756114eb8f8 Mon Sep 17 00:00:00 2001 From: tazend Date: Fri, 10 Mar 2023 22:30:58 +0100 Subject: [PATCH 13/28] wip --- pyslurm/core/db/job.pyx | 61 ++++++++++++++++++++++++++++++++++++--- pyslurm/core/db/stats.pxd | 31 ++++++++++++++++++++ pyslurm/core/db/stats.pyx | 27 +++++++++++++++++ pyslurm/core/db/tres.pyx | 3 ++ 4 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 pyslurm/core/db/stats.pxd create mode 100644 pyslurm/core/db/stats.pyx diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index 7b05e929..a1d35b23 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -19,7 +19,6 @@ # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 -# cython: embedsignature=True from os import WIFSIGNALED, WIFEXITED, WTERMSIG, WEXITSTATUS @@ -31,6 +30,7 @@ from pyslurm.core.common.ctime import ( secs_to_timestr, timestamp_to_date, mins_to_timestr, + _raw_time, ) from pyslurm.core.common import ( gid_to_name, @@ -81,6 +81,44 @@ cdef class JobStep: else: return int(sid) + @property + def alloc_cpus(self): + return self.requested_cpus + + @property + def alloc_nodes(self): + nnodes = u32_parse(self.ptr.nnodes) + if not nnodes and self.ptr.tres_alloc_str: + return TrackableResources.find_count_in_str( + self.ptr.tres_alloc_str, slurm.TRES_NODE) + else: + return nnodes + + @property + def requested_cpus(self): + req_cpus = TrackableResources.find_count_in_str( + self.ptr.tres_alloc_str, slurm.TRES_CPU) + + if req_cpus == slurm.INFINITE64 and step.job_ptr: + tres_alloc_str = cstr.to_unicode(step.job_ptr.tres_alloc_str) + req_cpus = TrackableResources.find_count_in_str(tres_alloc_str, + slurm.TRES_CPU) + if not req_cpus: + tres_req_str = cstr.to_unicode(step.job_ptr.tres_req_str) + req_cpus = TrackableResources.find_count_in_str(tres_req_str, + slurm.TRES_CPU) + else: + req_cpus = 0 + + return req_cpus + + # Only in Parent Job available: + # association_id + # admin_comment + + + # ACT_CPUFREQ + @property def container(self): return cstr.to_unicode(self.ptr.container) @@ -89,6 +127,10 @@ cdef class JobStep: def elapsed_time(self): return secs_to_timestr(self.ptr.elapsed) + @property + def end_time_raw(self): + return _raw_time(self.ptr.end) + @property def end_time(self): return timestamp_to_date(self.ptr.end) @@ -167,6 +209,8 @@ cdef class Jobs(dict): SlurmListItem job_ptr SlurmListItem step_ptr SlurmList step_list + int cpu_tres_rec_count = 0 + int step_cpu_tres_rec_count = 0 # Allow the user to both specify search conditions via a JobConditions # instance or **kwargs. @@ -184,6 +228,9 @@ cdef class Jobs(dict): if self.info.is_null(): raise RPCError(msg="Failed to get Jobs from slurmdbd") + tres_alloc_str = cstr.to_unicode() + cpu_tres_rec_count + # TODO: also get trackable resources with slurmdb_tres_get and store # it in each job instance. tres_alloc_str and tres_req_str only # contain the numeric tres ids, but it probably makes more sense to @@ -231,7 +278,7 @@ cdef class Job: return cstr.to_unicode(self.ptr.admin_comment) @property - def allocated_nodes(self): + def alloc_nodes(self): return u32_parse(self.ptr.alloc_nodes) @property @@ -286,7 +333,8 @@ cdef class Job: @property def derived_exit_code(self): """int: The derived exit code for the Job.""" - if not WIFEXITED(self.ptr.derived_ec): + if (self.ptr.derived_ec == slurm.NO_VAL + or not WIFEXITED(self.ptr.derived_ec)): return None return WEXITSTATUS(self.ptr.derived_ec) @@ -294,7 +342,8 @@ cdef class Job: @property def derived_exit_code_signal(self): """int: Signal for the derived exit code.""" - if not WIFSIGNALED(self.ptr.derived_ec): + if (self.ptr.derived_ec == slurm.NO_VAL + or not WIFSIGNALED(self.ptr.derived_ec)): return None return WTERMSIG(self.ptr.derived_ec) @@ -303,6 +352,10 @@ cdef class Job: def comment(self): return cstr.to_unicode(self.ptr.derived_es) + @property + def elapsed_time_raw(self): + return _raw_time(self.ptr.elapsed) + @property def elapsed_time(self): return secs_to_timestr(self.ptr.elapsed) diff --git a/pyslurm/core/db/stats.pxd b/pyslurm/core/db/stats.pxd new file mode 100644 index 00000000..ff7b0570 --- /dev/null +++ b/pyslurm/core/db/stats.pxd @@ -0,0 +1,31 @@ +######################################################################### +# stats.pxd - pyslurm slurmdbd job stats +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + try_xmalloc, + slurmdb_stats_t, +) + + +cdef class JobStats: + cdef slurmdb_stats_t *ptr diff --git a/pyslurm/core/db/stats.pyx b/pyslurm/core/db/stats.pyx new file mode 100644 index 00000000..a2f31008 --- /dev/null +++ b/pyslurm/core/db/stats.pyx @@ -0,0 +1,27 @@ +######################################################################### +# stats.pyx - pyslurm slurmdbd job stats +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + + +cdef class JobStats: + pass + + diff --git a/pyslurm/core/db/tres.pyx b/pyslurm/core/db/tres.pyx index f73507c6..e0e7c698 100644 --- a/pyslurm/core/db/tres.pyx +++ b/pyslurm/core/db/tres.pyx @@ -50,6 +50,9 @@ cdef class TrackableResources(dict): @staticmethod def find_count_in_str(tres_str, typ): + if not tres_str: + return None + cdef uint64_t tmp tmp = slurmdb_find_tres_count_in_string(tres_str, typ) return u64_parse(tmp) From 437ce10ccdf7f6d41bfdac7e27a1a1be3bbaff52 Mon Sep 17 00:00:00 2001 From: tazend Date: Mon, 13 Mar 2023 19:55:03 +0100 Subject: [PATCH 14/28] wip --- pyslurm/core/db/job.pxd | 15 +-- pyslurm/core/db/job.pyx | 127 +----------------------- pyslurm/core/db/stats.pxd | 5 +- pyslurm/core/db/stats.pyx | 122 ++++++++++++++++++++++- pyslurm/core/db/step.pxd | 50 ++++++++++ pyslurm/core/db/step.pyx | 203 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 384 insertions(+), 138 deletions(-) create mode 100644 pyslurm/core/db/step.pxd create mode 100644 pyslurm/core/db/step.pyx diff --git a/pyslurm/core/db/job.pxd b/pyslurm/core/db/job.pxd index 18fe28b6..6e983c8d 100644 --- a/pyslurm/core/db/job.pxd +++ b/pyslurm/core/db/job.pxd @@ -19,7 +19,6 @@ # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 -# cython: embedsignature=True from pyslurm cimport slurm from pyslurm.slurm cimport ( @@ -36,6 +35,8 @@ from pyslurm.slurm cimport ( slurm_job_reason_string, ) from pyslurm.core.db.util cimport SlurmList, SlurmListItem +from pyslurm.core.db.step cimport JobStep, JobSteps +from pyslurm.core.db.stats cimport JobStats from pyslurm.core.db.connection cimport Connection from pyslurm.core.common cimport cstr @@ -52,17 +53,6 @@ cdef class JobConditions: constraints -cdef class JobSteps(dict): - pass - - -cdef class JobStep: - cdef slurmdb_step_rec_t *ptr - - @staticmethod - cdef JobStep from_ptr(slurmdb_step_rec_t *step) - - cdef class Jobs(dict): cdef SlurmList info @@ -73,4 +63,3 @@ cdef class Job: @staticmethod cdef Job from_ptr(slurmdb_job_rec_t *in_ptr) - diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index a1d35b23..1762bc71 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -20,7 +20,6 @@ # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 - from os import WIFSIGNALED, WIFEXITED, WTERMSIG, WEXITSTATUS from pyslurm.core.error import RPCError from pyslurm.core.db.tres cimport TrackableResources, TrackableResource @@ -42,124 +41,6 @@ from pyslurm.core.common import ( # Maybe prefix these classes with something like "DB" to avoid name collision # with the other classes from pyslurm/core/job ? -cdef class JobStep: - - def __cinit__(self): - self.ptr = NULL - - def __dealloc__(self): - slurmdb_destroy_step_rec(self.ptr) - self.ptr = NULL - - @staticmethod - cdef JobStep from_ptr(slurmdb_step_rec_t *step): - cdef JobStep wrap = JobStep.__new__(JobStep) - wrap.ptr = step - return wrap - - def _xlate_from_id(self, sid): - if sid == slurm.SLURM_BATCH_SCRIPT: - return "batch" - elif sid == slurm.SLURM_EXTERN_CONT: - return "extern" - elif sid == slurm.SLURM_INTERACTIVE_STEP: - return "interactive" - elif sid == slurm.SLURM_PENDING_STEP: - return "pending" - else: - return sid - - def _xlate_to_id(self, sid): - if sid == "batch": - return slurm.SLURM_BATCH_SCRIPT - elif sid == "extern": - return slurm.SLURM_EXTERN_CONT - elif sid == "interactive": - return slurm.SLURM_INTERACTIVE_STEP - elif sid == "pending": - return slurm.SLURM_PENDING_STEP - else: - return int(sid) - - @property - def alloc_cpus(self): - return self.requested_cpus - - @property - def alloc_nodes(self): - nnodes = u32_parse(self.ptr.nnodes) - if not nnodes and self.ptr.tres_alloc_str: - return TrackableResources.find_count_in_str( - self.ptr.tres_alloc_str, slurm.TRES_NODE) - else: - return nnodes - - @property - def requested_cpus(self): - req_cpus = TrackableResources.find_count_in_str( - self.ptr.tres_alloc_str, slurm.TRES_CPU) - - if req_cpus == slurm.INFINITE64 and step.job_ptr: - tres_alloc_str = cstr.to_unicode(step.job_ptr.tres_alloc_str) - req_cpus = TrackableResources.find_count_in_str(tres_alloc_str, - slurm.TRES_CPU) - if not req_cpus: - tres_req_str = cstr.to_unicode(step.job_ptr.tres_req_str) - req_cpus = TrackableResources.find_count_in_str(tres_req_str, - slurm.TRES_CPU) - else: - req_cpus = 0 - - return req_cpus - - # Only in Parent Job available: - # association_id - # admin_comment - - - # ACT_CPUFREQ - - @property - def container(self): - return cstr.to_unicode(self.ptr.container) - - @property - def elapsed_time(self): - return secs_to_timestr(self.ptr.elapsed) - - @property - def end_time_raw(self): - return _raw_time(self.ptr.end) - - @property - def end_time(self): - return timestamp_to_date(self.ptr.end) - - @property - def exit_code(self): - return None - - @property - def nodes_count(self): - return None - - @property - def nodes(self): - return None - - @property - def id(self): - return self._xlate_from_id(self.ptr.step_id.step_id) - - @property - def job_id(self): - return self.ptr.step_id.job_id - - @property - def name(self): - return cstr.to_unicode(self.ptr.stepname) - - cdef class JobConditions: def __cinit__(self): @@ -228,8 +109,8 @@ cdef class Jobs(dict): if self.info.is_null(): raise RPCError(msg="Failed to get Jobs from slurmdbd") - tres_alloc_str = cstr.to_unicode() - cpu_tres_rec_count + # tres_alloc_str = cstr.to_unicode() + # cpu_tres_rec_count # TODO: also get trackable resources with slurmdb_tres_get and store # it in each job instance. tres_alloc_str and tres_req_str only @@ -396,7 +277,7 @@ cdef class Job: @property def nodelist(self): - return cstr.to_list(self.ptr.nodes) + return cstr.to_unicode(self.ptr.nodes) @property def partition(self): @@ -425,7 +306,7 @@ cdef class Job: return humanize(val, decimals=2) @property - def allocated_cpus(self): + def alloc_cpus(self): pass @property diff --git a/pyslurm/core/db/stats.pxd b/pyslurm/core/db/stats.pxd index ff7b0570..98d1bad1 100644 --- a/pyslurm/core/db/stats.pxd +++ b/pyslurm/core/db/stats.pxd @@ -24,8 +24,11 @@ from pyslurm cimport slurm from pyslurm.slurm cimport ( try_xmalloc, slurmdb_stats_t, + slurmdb_job_rec_t, ) cdef class JobStats: - cdef slurmdb_stats_t *ptr + cdef: + slurmdb_stats_t *ptr + slurmdb_job_rec_t *job diff --git a/pyslurm/core/db/stats.pyx b/pyslurm/core/db/stats.pyx index a2f31008..c04cdfe9 100644 --- a/pyslurm/core/db/stats.pyx +++ b/pyslurm/core/db/stats.pyx @@ -22,6 +22,126 @@ cdef class JobStats: - pass + @property + def consumed_energy(self): + return None + @property + def avg_cpu_time(self): + return None + + @property + def avg_cpu_freq(self): + return None + + @property + def cpu_time(self): + # Elapsed * alloc_cpus + # This is the time the Job has been using the allocated CPUs for. + # This is not the actual cpu-usage. + return None + + @property + def avg_disk_read(self): + return None + + @property + def avg_disk_write(self): + return None + + @property + def avg_pages(self): + return None + + @property + def avg_rss(self): + return None + + @property + def avg_vmsize(self): + return None + + @property + def max_disk_read(self): + return None + + @property + def max_disk_read_node(self): + return None + + @property + def max_disk_read_task(self): + return None + + @property + def max_disk_write(self): + return None + + @property + def max_disk_write_node(self): + return None + + @property + def max_disk_write_task(self): + return None + + @property + def max_pages(self): + return None + + @property + def max_pages_node(self): + return None + + @property + def max_pages_task(self): + return None + + @property + def max_rss(self): + return None + + @property + def max_rss_node(self): + return None + + @property + def max_rss_task(self): + return None + + @property + def max_vmsize(self): + return None + + @property + def max_vmsize_node(self): + return None + + @property + def max_vmsize_task(self): + return None + + @property + def min_cpu_time(self): + return None + + @property + def min_cpu_time_node(self): + return None + + @property + def min_cpu_time_task(self): + return None + + @property + def total_cpu_time(self): + return None + + @property + def user_cpu_time(self): + return None + + @property + def system_cpu_time(self): + return None diff --git a/pyslurm/core/db/step.pxd b/pyslurm/core/db/step.pxd new file mode 100644 index 00000000..132bed0b --- /dev/null +++ b/pyslurm/core/db/step.pxd @@ -0,0 +1,50 @@ +######################################################################### +# step.pxd - pyslurm slurmdbd step api +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + slurmdb_job_rec_t, + slurmdb_job_cond_t, + slurmdb_step_rec_t, + slurmdb_jobs_get, + slurmdb_destroy_job_cond, + slurmdb_destroy_job_rec, + slurmdb_destroy_step_rec, + try_xmalloc, + slurmdb_job_cond_def_start_end, + slurm_job_state_string, + slurm_job_reason_string, +) +from pyslurm.core.db.util cimport SlurmList, SlurmListItem +from pyslurm.core.db.connection cimport Connection +from pyslurm.core.common cimport cstr + + +cdef class JobSteps(dict): + pass + + +cdef class JobStep: + cdef slurmdb_step_rec_t *ptr + + @staticmethod + cdef JobStep from_ptr(slurmdb_step_rec_t *step) diff --git a/pyslurm/core/db/step.pyx b/pyslurm/core/db/step.pyx new file mode 100644 index 00000000..8d0d1f4b --- /dev/null +++ b/pyslurm/core/db/step.pyx @@ -0,0 +1,203 @@ +######################################################################### +# step.pyx - pyslurm slurmdbd step api +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from os import WIFSIGNALED, WIFEXITED, WTERMSIG, WEXITSTATUS +from pyslurm.core.error import RPCError +from pyslurm.core.db.tres cimport TrackableResources, TrackableResource +from pyslurm.core.common.uint import * +from pyslurm.core.common.ctime import ( + date_to_timestamp, + secs_to_timestr, + timestamp_to_date, + mins_to_timestr, + _raw_time, +) +from pyslurm.core.common import ( + gid_to_name, + uid_to_name, + humanize, + instance_to_dict, +) + + +cdef class JobStep: + + def __cinit__(self): + self.ptr = NULL + + def __dealloc__(self): + slurmdb_destroy_step_rec(self.ptr) + self.ptr = NULL + + @staticmethod + cdef JobStep from_ptr(slurmdb_step_rec_t *step): + cdef JobStep wrap = JobStep.__new__(JobStep) + wrap.ptr = step + return wrap + + def _xlate_from_id(self, sid): + if sid == slurm.SLURM_BATCH_SCRIPT: + return "batch" + elif sid == slurm.SLURM_EXTERN_CONT: + return "extern" + elif sid == slurm.SLURM_INTERACTIVE_STEP: + return "interactive" + elif sid == slurm.SLURM_PENDING_STEP: + return "pending" + else: + return sid + + def _xlate_to_id(self, sid): + if sid == "batch": + return slurm.SLURM_BATCH_SCRIPT + elif sid == "extern": + return slurm.SLURM_EXTERN_CONT + elif sid == "interactive": + return slurm.SLURM_INTERACTIVE_STEP + elif sid == "pending": + return slurm.SLURM_PENDING_STEP + else: + return int(sid) + + @property + def alloc_cpus(self): + return self.requested_cpus + + @property + def alloc_nodes(self): + nnodes = u32_parse(self.ptr.nnodes) + if not nnodes and self.ptr.tres_alloc_str: + return TrackableResources.find_count_in_str( + self.ptr.tres_alloc_str, slurm.TRES_NODE) + else: + return nnodes + + @property + def requested_cpus(self): + req_cpus = TrackableResources.find_count_in_str( + self.ptr.tres_alloc_str, slurm.TRES_CPU) + + if req_cpus == slurm.INFINITE64 and step.job_ptr: + tres_alloc_str = cstr.to_unicode(step.job_ptr.tres_alloc_str) + req_cpus = TrackableResources.find_count_in_str(tres_alloc_str, + slurm.TRES_CPU) + if not req_cpus: + tres_req_str = cstr.to_unicode(step.job_ptr.tres_req_str) + req_cpus = TrackableResources.find_count_in_str(tres_req_str, + slurm.TRES_CPU) + else: + req_cpus = 0 + + return req_cpus + + # Only in Parent Job available: + # resvcpu? + + @property + def container(self): + return cstr.to_unicode(self.ptr.container) + + @property + def elapsed_time(self): + return secs_to_timestr(self.ptr.elapsed) + + @property + def end_time_raw(self): + return _raw_time(self.ptr.end) + + @property + def end_time(self): + return timestamp_to_date(self.ptr.end) + + @property + def eligible_time_raw(self): + return _raw_time(self.ptr.start) + + @property + def eligible_time(self): + return timestamp_to_date(self.ptr.start) + + @property + def start_time_raw(self): + return _raw_time(self.ptr.start) + + @property + def start_time(self): + return timestamp_to_date(self.ptr.start) + + @property + def exit_code(self): + return None + + @property + def ntasks(self): + return None + + @property + def requested_cpu_freq_min(self): + return None + + @property + def requested_cpu_freq_max(self): + return None + + @property + def requested_cpu_freq_gov(self): + return None + + @property + def nodelist(self): + return None + + @property + def id(self): + return self._xlate_from_id(self.ptr.step_id.step_id) + + @property + def job_id(self): + return self.ptr.step_id.job_id + + @property + def name(self): + return cstr.to_unicode(self.ptr.stepname) + + @property + def distribution(self): + # ptr.task_dist + pass + + @property + def state(self): + """str: State this Job step is in.""" + return cstr.to_unicode(slurm_job_state_string(self.ptr.state)) + + @property + def cancelled_by(self): + return uid_to_name(self.ptr.requid) + + @property + def submit_line(self): + return cstr.to_unicode(self.ptr.submit_line) + + @property + def suspended_time(self): + return secs_to_timestr(self.ptr.elapsed) From aea4f2d555aa3a1929ed564c18de3e0384d53bbd Mon Sep 17 00:00:00 2001 From: tazend Date: Mon, 13 Mar 2023 22:18:33 +0100 Subject: [PATCH 15/28] wip --- pyslurm/core/db/job.pyx | 75 ++++++++++++++++++++++----------------- pyslurm/core/db/stats.pyx | 8 +++++ pyslurm/core/db/step.pyx | 6 ++-- 3 files changed, 53 insertions(+), 36 deletions(-) diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index 1762bc71..1755e7f6 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -167,7 +167,7 @@ cdef class Job: return u32_parse(self.ptr.array_job_id) @property - def array_parallel_tasks(self): + def array_tasks_parallel(self): return u32_parse(self.ptr.array_max_tasks) @property @@ -241,10 +241,18 @@ cdef class Job: def elapsed_time(self): return secs_to_timestr(self.ptr.elapsed) + @property + def eligible_time_raw(self): + return _raw_time(self.ptr.eligible) + @property def eligible_time(self): return timestamp_to_date(self.ptr.eligible) + @property + def end_time_raw(self): + return _raw_time(self.ptr.end) + @property def end_time(self): return timestamp_to_date(self.ptr.end) @@ -256,6 +264,9 @@ cdef class Job: # uint32_t flags def gid(self): + return u32_parse(self.ptr.gid, zero_is_noval=False) + + def group(self): return gid_to_name(self.ptr.gid) # uint32_t het_job_id @@ -288,7 +299,7 @@ cdef class Job: return u32_parse(self.ptr.priority, zero_is_noval=False) @property - def quality_of_service(self): + def qos(self): # Need to convert the raw uint32_t qosid to a name, by calling # slurmdb_qos_get. To avoid doing this repeatedly, we'll probably need # to also get the qos list when calling slurmdb_jobs_get and store it @@ -313,9 +324,9 @@ cdef class Job: def reservation(self): return cstr.to_unicode(self.ptr.resv_name) - @property - def reservation_id(self): - return u32_parse(self.ptr.resvid) +# @property +# def reservation_id(self): +# return u32_parse(self.ptr.resvid) @property def script(self): @@ -323,6 +334,10 @@ cdef class Job: # uint32_t show_full + @property + def start_time_raw(self): + return _raw_time(self.ptr.start) + @property def start_time(self): return timestamp_to_date(self.ptr.start) @@ -341,6 +356,10 @@ cdef class Job: def cancelled_by(self): return uid_to_name(self.ptr.requid) + @property + def submit_time_raw(self): + return _raw_time(self.ptr.submit) + @property def submit_time(self): return timestamp_to_date(self.ptr.submit) @@ -349,6 +368,10 @@ cdef class Job: def submit_line(self): return cstr.to_unicode(self.ptr.submit_line) + @property + def suspended_time_raw(self): + return _raw_time(self.ptr.elapsed) + @property def suspended_time(self): return secs_to_timestr(self.ptr.elapsed) @@ -358,55 +381,41 @@ cdef class Job: return cstr.to_unicode(self.ptr.system_comment) @property - def system_cpu_time(self): - # uint32_t sys_cpu_sec - # uint32_t sys_cpu_usec - pass + def time_limit_raw(self): + return _raw_time(self.ptr.timelimit) @property def time_limit(self): return mins_to_timestr(self.ptr.timelimit, "PartitionLimit") @property - def cpu_time(self): - pass - - @property - def total_cpu_time(self): - # uint32_t tot_cpu_sec - # uint32_t tot_cpu_usec - pass + def uid(self): + return u32_parse(self.ptr.uid, zero_is_noval=False) @property - def uid(self): + def user(self): # Theres also a ptr->user # https://github.com/SchedMD/slurm/blob/6365a8b7c9480c48678eeedef99864d8d3b6a6b5/src/sacct/print.c#L1946 return uid_to_name(self.ptr.uid) # TODO: used gres - @property - def user_cpu_time(self): - # uint32_t user_cpu_sec - # uint32_t user_cpu_usec - pass - @property def wckey(self): return cstr.to_unicode(self.ptr.wckey) - @property - def wckey_id(self): - return u32_parse(self.ptr.wckeyid) +# @property +# def wckey_id(self): +# return u32_parse(self.ptr.wckeyid) @property def work_dir(self): return cstr.to_unicode(self.ptr.work_dir) - @property - def tres_allocated(self): - return TrackableResources.from_str(self.ptr.tres_alloc_str) +# @property +# def tres_allocated(self): +# return TrackableResources.from_str(self.ptr.tres_alloc_str) - @property - def tres_requested(self): - return TrackableResources.from_str(self.ptr.tres_req_str) +# @property +# def tres_requested(self): +# return TrackableResources.from_str(self.ptr.tres_req_str) diff --git a/pyslurm/core/db/stats.pyx b/pyslurm/core/db/stats.pyx index c04cdfe9..1ecd98d2 100644 --- a/pyslurm/core/db/stats.pyx +++ b/pyslurm/core/db/stats.pyx @@ -136,12 +136,20 @@ cdef class JobStats: @property def total_cpu_time(self): + # uint32_t tot_cpu_sec + # uint32_t tot_cpu_usec return None @property def user_cpu_time(self): + # Only available for Jobs from the Database, not sstat + # uint32_t user_cpu_sec + # uint32_t user_cpu_usec return None @property def system_cpu_time(self): + # Only available for Jobs from the Database, not sstat + # uint32_t sys_cpu_sec + # uint32_t sys_cpu_usec return None diff --git a/pyslurm/core/db/step.pyx b/pyslurm/core/db/step.pyx index 8d0d1f4b..83e3f16c 100644 --- a/pyslurm/core/db/step.pyx +++ b/pyslurm/core/db/step.pyx @@ -153,15 +153,15 @@ cdef class JobStep: return None @property - def requested_cpu_freq_min(self): + def cpu_freq_min(self): return None @property - def requested_cpu_freq_max(self): + def cpu_freq_max(self): return None @property - def requested_cpu_freq_gov(self): + def cpu_freq_gov(self): return None @property From 902a32e46617b659f59eb8df37fb9517279c789b Mon Sep 17 00:00:00 2001 From: tazend Date: Sat, 1 Apr 2023 17:36:41 +0200 Subject: [PATCH 16/28] wip db api --- pyslurm/__init__.py | 1 + pyslurm/core/db/__init__.py | 3 + pyslurm/core/db/connection.pxd | 2 +- pyslurm/core/db/connection.pyx | 17 ++- pyslurm/core/db/job.pxd | 40 +++++- pyslurm/core/db/job.pyx | 183 ++++++++++++++++--------- pyslurm/core/db/stats.pxd | 54 +++++++- pyslurm/core/db/stats.pyx | 243 +++++++++++++++------------------ pyslurm/core/db/step.pxd | 2 + pyslurm/core/db/step.pyx | 97 ++++++------- pyslurm/core/db/tres.pxd | 4 + pyslurm/core/db/tres.pyx | 54 ++++++-- pyslurm/core/db/util.pyx | 1 - 13 files changed, 436 insertions(+), 265 deletions(-) diff --git a/pyslurm/__init__.py b/pyslurm/__init__.py index 0181892e..da8c7830 100644 --- a/pyslurm/__init__.py +++ b/pyslurm/__init__.py @@ -24,6 +24,7 @@ JobSubmitDescription, ) +from pyslurm.core import db from pyslurm.core.node import Node, Nodes import pyslurm.core.error diff --git a/pyslurm/core/db/__init__.py b/pyslurm/core/db/__init__.py index e69de29b..f0f96747 100644 --- a/pyslurm/core/db/__init__.py +++ b/pyslurm/core/db/__init__.py @@ -0,0 +1,3 @@ +from pyslurm.core.db.job import Job, Jobs +from pyslurm.core.db.step import JobStep +from pyslurm.core.db.tres import TrackableResource, TrackableResources diff --git a/pyslurm/core/db/connection.pxd b/pyslurm/core/db/connection.pxd index b6e7e5ff..f75465c9 100644 --- a/pyslurm/core/db/connection.pxd +++ b/pyslurm/core/db/connection.pxd @@ -31,5 +31,5 @@ from pyslurm.slurm cimport ( cdef class Connection: cdef: - void *conn + void *ptr uint16_t conn_flags diff --git a/pyslurm/core/db/connection.pyx b/pyslurm/core/db/connection.pyx index b686065c..bd1d1062 100644 --- a/pyslurm/core/db/connection.pyx +++ b/pyslurm/core/db/connection.pyx @@ -25,25 +25,28 @@ from pyslurm.core.error import RPCError cdef class Connection: + def __cinit__(self): - self.conn = NULL + self.ptr = NULL self.conn_flags = 0 def __init__(self): self.open() def open(self): - if not self.conn: - self.conn = slurmdb_connection_get(&self.conn_flags) - if not self.conn: + if not self.ptr: + self.ptr = slurmdb_connection_get(&self.conn_flags) + if not self.ptr: raise RPCError(msg="Failed to open Connection to slurmdbd") def close(self): - slurmdb_connection_close(&self.conn) - self.conn = NULL + if self.is_open: + slurmdb_connection_close(&self.ptr) + self.ptr = NULL + @property def is_open(self): - if self.conn: + if self.ptr: return True else: return False diff --git a/pyslurm/core/db/job.pxd b/pyslurm/core/db/job.pxd index 6e983c8d..052d1a8d 100644 --- a/pyslurm/core/db/job.pxd +++ b/pyslurm/core/db/job.pxd @@ -54,12 +54,48 @@ cdef class JobConditions: cdef class Jobs(dict): - cdef SlurmList info + cdef: + SlurmList info + Connection db_conn cdef class Job: + """A Slurm Database Job. + + All attributes in this class are read-only. + + Args: + job_id (int): + An Integer representing a Job-ID. + + Raises: + MemoryError: If malloc fails to allocate memory. + + Attributes: + steps (pyslurm.db.JobSteps): + Steps this Job has. + account (str): + Account of the Job. + admin_comment (str): + Admin comment for the Job. + num_nodes (int): + Amount of nodes this Job has allocated (if it is running) or + requested (if it is still pending). + array_id (int): + The master Array-Job ID. + array_tasks_parallel (int): + Max number of array tasks allowed to run simultaneously. + array_task_id (int): + Array Task ID of this Job if it is an Array-Job. + array_tasks_waiting (str): + Array Tasks that are still waiting. + name (str): + Name of the Job. + """ cdef slurmdb_job_rec_t *ptr - cdef public JobSteps steps + cdef public: + JobSteps steps + JobStats stats @staticmethod cdef Job from_ptr(slurmdb_job_rec_t *in_ptr) diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index 1755e7f6..0b4277d1 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -26,15 +26,11 @@ from pyslurm.core.db.tres cimport TrackableResources, TrackableResource from pyslurm.core.common.uint import * from pyslurm.core.common.ctime import ( date_to_timestamp, - secs_to_timestr, - timestamp_to_date, - mins_to_timestr, _raw_time, ) from pyslurm.core.common import ( gid_to_name, uid_to_name, - humanize, instance_to_dict, ) @@ -81,11 +77,13 @@ cdef class JobConditions: cdef class Jobs(dict): + def __dealloc__(self): + self.db_conn.close() + def __init__(self, *args, **kwargs): cdef: Job job JobStep step - Connection db_conn JobConditions job_cond SlurmListItem job_ptr SlurmListItem step_ptr @@ -102,8 +100,8 @@ cdef class Jobs(dict): job_cond._create_job_cond() # TODO: Have a single, global DB connection in pyslurm internally? - db_conn = Connection() - self.info = SlurmList.wrap(slurmdb_jobs_get(db_conn.conn, + self.db_conn = Connection() + self.info = SlurmList.wrap(slurmdb_jobs_get(self.db_conn.ptr, job_cond.ptr)) if self.info.is_null(): @@ -127,13 +125,15 @@ cdef class Jobs(dict): step = JobStep.from_ptr(step_ptr.data) job.steps[step.id] = step + job._sum_stats_from_steps() + cdef class Job: def __cinit__(self): self.ptr = NULL - def __init__(self, int job_id): + def __init__(self, job_id): pass def __dealloc__(self): @@ -145,10 +145,80 @@ cdef class Job: cdef Job wrap = Job.__new__(Job) wrap.ptr = in_ptr wrap.steps = JobSteps.__new__(JobSteps) + wrap.stats = JobStats() return wrap + def _sum_stats_from_steps(self): + cdef: + JobStats job_stats = self.stats + JobStats step_stats = None + + for step in self.steps.values(): + step_stats = step.stats + + job_stats.consumed_energy += step_stats.consumed_energy + job_stats.average_cpu_time += step_stats.average_cpu_time + job_stats.average_cpu_frequency += step_stats.average_cpu_frequency + job_stats.cpu_time += step_stats.cpu_time + job_stats.average_disk_read += step_stats.average_disk_read + job_stats.average_disk_write += step_stats.average_disk_write + job_stats.average_pages += step_stats.average_pages + job_stats.average_rss += step_stats.average_rss + job_stats.average_vmsize += step_stats.average_vmsize + + if step_stats.max_disk_read >= job_stats.max_disk_read: + job_stats.max_disk_read = step_stats.max_disk_read + job_stats.max_disk_read_node = step_stats.max_disk_read_node + job_stats.max_disk_read_task = step_stats.max_disk_read_task + + if step_stats.max_disk_write >= job_stats.max_disk_write: + job_stats.max_disk_write = step_stats.max_disk_write + job_stats.max_disk_write_node = step_stats.max_disk_write_node + job_stats.max_disk_write_task = step_stats.max_disk_write_task + + if step_stats.max_pages >= job_stats.max_pages: + job_stats.max_pages = step_stats.max_pages + job_stats.max_pages_node = step_stats.max_pages_node + job_stats.max_pages_task = step_stats.max_pages_task + + if step_stats.max_rss >= job_stats.max_rss: + job_stats.max_rss = step_stats.max_rss + job_stats.max_rss_node = step_stats.max_rss_node + job_stats.max_rss_task = step_stats.max_rss_task + + if step_stats.max_vmsize >= job_stats.max_vmsize: + job_stats.max_vmsize = step_stats.max_vmsize + job_stats.max_vmsize_node = step_stats.max_vmsize_node + job_stats.max_vmsize_task = step_stats.max_vmsize_task + + if step_stats.min_cpu_time >= job_stats.min_cpu_time: + job_stats.min_cpu_time = step_stats.min_cpu_time + job_stats.min_cpu_time_node = step_stats.min_cpu_time_node + job_stats.min_cpu_time_task = step_stats.min_cpu_time_task + + if self.ptr.tot_cpu_sec != slurm.NO_VAL64: + job_stats.total_cpu_time = self.ptr.tot_cpu_sec + + if self.ptr.user_cpu_sec != slurm.NO_VAL64: + job_stats.user_cpu_time = self.ptr.user_cpu_sec + + if self.ptr.sys_cpu_sec != slurm.NO_VAL64: + job_stats.system_cpu_time = self.ptr.sys_cpu_sec + + elapsed = self.elapsed_time if self.elapsed_time else 0 + cpus = self.cpus if self.cpus else 0 + job_stats.cpu_time = elapsed * cpus + job_stats.average_cpu_frequency /= len(self.steps) + def as_dict(self): - return instance_to_dict(self) + cdef dict out = instance_to_dict(self) + out["stats"] = self.stats.as_dict() + steps = out.pop("steps", {}) + + out["steps"] = {} + for step_id, step in steps.items(): + out["steps"][step_id] = step.as_dict() + return out @property def account(self): @@ -159,11 +229,21 @@ cdef class Job: return cstr.to_unicode(self.ptr.admin_comment) @property - def alloc_nodes(self): - return u32_parse(self.ptr.alloc_nodes) + def num_nodes(self): + val = TrackableResources.find_count_in_str(self.ptr.tres_alloc_str, + slurm.TRES_NODE) + if val is not None: + # Job is already running and has nodes allocated + return val + else: + # Job is still pending, so we return the number of requested nodes + # instead. + val = TrackableResources.find_count_in_str(self.ptr.tres_req_str, + slurm.TRES_NODE) + return val @property - def array_job_id(self): + def array_id(self): return u32_parse(self.ptr.array_job_id) @property @@ -233,40 +313,29 @@ cdef class Job: def comment(self): return cstr.to_unicode(self.ptr.derived_es) - @property - def elapsed_time_raw(self): - return _raw_time(self.ptr.elapsed) - @property def elapsed_time(self): - return secs_to_timestr(self.ptr.elapsed) - - @property - def eligible_time_raw(self): - return _raw_time(self.ptr.eligible) + return _raw_time(self.ptr.elapsed) @property def eligible_time(self): - return timestamp_to_date(self.ptr.eligible) - - @property - def end_time_raw(self): - return _raw_time(self.ptr.end) + return _raw_time(self.ptr.eligible) @property def end_time(self): - return timestamp_to_date(self.ptr.end) + return _raw_time(self.ptr.end) @property def exit_code(self): - pass + # TODO + return None # uint32_t flags - def gid(self): + def group_id(self): return u32_parse(self.ptr.gid, zero_is_noval=False) - def group(self): + def group_name(self): return gid_to_name(self.ptr.gid) # uint32_t het_job_id @@ -307,18 +376,22 @@ cdef class Job: return None @property - def requested_cpus(self): - return u32_parse(self.ptr.req_cpus) + def cpus(self): + val = TrackableResources.find_count_in_str(self.ptr.tres_alloc_str, + slurm.TRES_CPU) + if val is not None: + # Job is already running and has cpus allocated + return val + else: + # Job is still pending, so we return the number of requested cpus + # instead. + return u32_parse(self.ptr.req_cpus) @property - def requested_mem(self): + def memory(self): val = TrackableResources.find_count_in_str(self.ptr.tres_req_str, slurm.TRES_MEM) - return humanize(val, decimals=2) - - @property - def alloc_cpus(self): - pass + return val @property def reservation(self): @@ -334,13 +407,9 @@ cdef class Job: # uint32_t show_full - @property - def start_time_raw(self): - return _raw_time(self.ptr.start) - @property def start_time(self): - return timestamp_to_date(self.ptr.start) + return _raw_time(self.ptr.start) @property def state(self): @@ -356,44 +425,36 @@ cdef class Job: def cancelled_by(self): return uid_to_name(self.ptr.requid) - @property - def submit_time_raw(self): - return _raw_time(self.ptr.submit) - @property def submit_time(self): - return timestamp_to_date(self.ptr.submit) + return _raw_time(self.ptr.submit) @property def submit_line(self): return cstr.to_unicode(self.ptr.submit_line) - @property - def suspended_time_raw(self): - return _raw_time(self.ptr.elapsed) - @property def suspended_time(self): - return secs_to_timestr(self.ptr.elapsed) + # seconds + return _raw_time(self.ptr.elapsed) @property def system_comment(self): return cstr.to_unicode(self.ptr.system_comment) - @property - def time_limit_raw(self): - return _raw_time(self.ptr.timelimit) - @property def time_limit(self): - return mins_to_timestr(self.ptr.timelimit, "PartitionLimit") + # minutes + # TODO: Perhaps we should just find out what the actual PartitionLimit + # is? + return _raw_time(self.ptr.timelimit, "PartitionLimit") @property - def uid(self): + def user_id(self): return u32_parse(self.ptr.uid, zero_is_noval=False) @property - def user(self): + def user_name(self): # Theres also a ptr->user # https://github.com/SchedMD/slurm/blob/6365a8b7c9480c48678eeedef99864d8d3b6a6b5/src/sacct/print.c#L1946 return uid_to_name(self.ptr.uid) @@ -409,7 +470,7 @@ cdef class Job: # return u32_parse(self.ptr.wckeyid) @property - def work_dir(self): + def working_directory(self): return cstr.to_unicode(self.ptr.work_dir) # @property diff --git a/pyslurm/core/db/stats.pxd b/pyslurm/core/db/stats.pxd index 98d1bad1..08b04e75 100644 --- a/pyslurm/core/db/stats.pxd +++ b/pyslurm/core/db/stats.pxd @@ -26,9 +26,57 @@ from pyslurm.slurm cimport ( slurmdb_stats_t, slurmdb_job_rec_t, ) +from pyslurm.core.db.tres cimport TrackableResources +from pyslurm.core.db.step cimport JobStep +from pyslurm.core.common cimport cstr cdef class JobStats: - cdef: - slurmdb_stats_t *ptr - slurmdb_job_rec_t *job + cdef slurmdb_job_rec_t *job + + cdef public: + consumed_energy + average_cpu_time + average_cpu_frequency + # Elapsed * alloc_cpus + # This is the time the Job has been using the allocated CPUs for. + # This is not the actual cpu-usage. + cpu_time + average_disk_read + average_disk_write + average_pages + average_rss + average_vmsize + max_disk_read + max_disk_read_node + max_disk_read_task + max_disk_write + max_disk_write_node + max_disk_write_task + max_pages + max_pages_node + max_pages_task + max_rss + max_rss_node + max_rss_task + max_vmsize + max_vmsize_node + max_vmsize_task + min_cpu_time + min_cpu_time_node + min_cpu_time_task + # uint32_t tot_cpu_sec + # uint32_t tot_cpu_usec + total_cpu_time + # Only available for Jobs from the Database, not sstat + # uint32_t user_cpu_sec + # uint32_t user_cpu_usec + user_cpu_time + # Only available for Jobs from the Database, not sstat + # uint32_t sys_cpu_sec + # uint32_t sys_cpu_usec + system_cpu_time + + @staticmethod + cdef JobStats from_step(JobStep step) + diff --git a/pyslurm/core/db/stats.pyx b/pyslurm/core/db/stats.pyx index 1ecd98d2..facff0bc 100644 --- a/pyslurm/core/db/stats.pyx +++ b/pyslurm/core/db/stats.pyx @@ -20,136 +20,119 @@ # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 +from pyslurm.core.common import nodelist_from_range_str, instance_to_dict -cdef class JobStats: - - @property - def consumed_energy(self): - return None - @property - def avg_cpu_time(self): - return None - - @property - def avg_cpu_freq(self): - return None +cdef class JobStats: - @property - def cpu_time(self): - # Elapsed * alloc_cpus - # This is the time the Job has been using the allocated CPUs for. - # This is not the actual cpu-usage. - return None - - @property - def avg_disk_read(self): - return None - - @property - def avg_disk_write(self): - return None - - @property - def avg_pages(self): - return None - - @property - def avg_rss(self): - return None - - @property - def avg_vmsize(self): - return None - - @property - def max_disk_read(self): - return None - - @property - def max_disk_read_node(self): - return None - - @property - def max_disk_read_task(self): - return None - - @property - def max_disk_write(self): - return None - - @property - def max_disk_write_node(self): - return None - - @property - def max_disk_write_task(self): - return None - - @property - def max_pages(self): - return None - - @property - def max_pages_node(self): - return None - - @property - def max_pages_task(self): - return None - - @property - def max_rss(self): - return None - - @property - def max_rss_node(self): - return None - - @property - def max_rss_task(self): - return None - - @property - def max_vmsize(self): - return None - - @property - def max_vmsize_node(self): - return None - - @property - def max_vmsize_task(self): - return None - - @property - def min_cpu_time(self): - return None - - @property - def min_cpu_time_node(self): - return None - - @property - def min_cpu_time_task(self): - return None - - @property - def total_cpu_time(self): - # uint32_t tot_cpu_sec - # uint32_t tot_cpu_usec - return None - - @property - def user_cpu_time(self): - # Only available for Jobs from the Database, not sstat - # uint32_t user_cpu_sec - # uint32_t user_cpu_usec - return None - - @property - def system_cpu_time(self): - # Only available for Jobs from the Database, not sstat - # uint32_t sys_cpu_sec - # uint32_t sys_cpu_usec - return None + def __init__(self): + for attr, val in instance_to_dict(self).items(): + setattr(self, attr, 0) + + self.max_disk_read_node = None + self.max_disk_read_task = None + self.max_disk_write_node = None + self.max_disk_write_task = None + self.max_pages_node = None + self.max_pages_task = None + self.max_rss_node = None + self.max_rss_task = None + self.max_vmsize_node = None + self.max_vmsize_task = None + self.min_cpu_time_node = None + self.min_cpu_time_task = None + + def as_dict(self): + return instance_to_dict(self) + + @staticmethod + cdef JobStats from_step(JobStep step): + cdef JobStats wrap = JobStats() + if not &step.ptr.stats: + return wrap + + cdef: + list nodes = nodelist_from_range_str( + cstr.to_unicode(step.ptr.nodes)) + cpu_time_adj = 1000 + slurmdb_stats_t *ptr = &step.ptr.stats + + if ptr.consumed_energy != slurm.NO_VAL64: + wrap.consumed_energy = ptr.consumed_energy + + wrap.average_cpu_time = TrackableResources.find_count_in_str( + ptr.tres_usage_in_ave, slurm.TRES_CPU) / cpu_time_adj + # TODO + # wrap.cpu_time = elapsed * step_cpu_tres_rec + + ave_freq = int(ptr.act_cpufreq) + if ave_freq != slurm.NO_VAL: + wrap.average_cpu_frequency = ptr.act_cpufreq + + # Convert to MiB instead of raw bytes? + wrap.average_disk_read = TrackableResources.find_count_in_str( + ptr.tres_usage_in_ave, slurm.TRES_FS_DISK) + wrap.average_disk_write = TrackableResources.find_count_in_str( + ptr.tres_usage_out_ave, slurm.TRES_FS_DISK) + wrap.average_pages = TrackableResources.find_count_in_str( + ptr.tres_usage_in_ave, slurm.TRES_PAGES) + wrap.average_rss = TrackableResources.find_count_in_str( + ptr.tres_usage_in_ave, slurm.TRES_MEM) + wrap.average_vmsize = TrackableResources.find_count_in_str( + ptr.tres_usage_in_ave, slurm.TRES_VMEM) + + wrap.max_disk_read = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max, slurm.TRES_FS_DISK) + max_disk_read_nodeid = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_nodeid, slurm.TRES_FS_DISK) + if nodes: + wrap.max_disk_read_node = nodes[max_disk_read_nodeid] + wrap.max_disk_read_task = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_taskid, slurm.TRES_FS_DISK) + + wrap.max_disk_write = TrackableResources.find_count_in_str( + ptr.tres_usage_out_max, slurm.TRES_FS_DISK) + max_disk_write_nodeid = TrackableResources.find_count_in_str( + ptr.tres_usage_out_max_nodeid, slurm.TRES_FS_DISK) + if nodes: + wrap.max_disk_write_node = nodes[max_disk_write_nodeid] + wrap.max_disk_write_task = TrackableResources.find_count_in_str( + ptr.tres_usage_out_max_taskid, slurm.TRES_FS_DISK) + + wrap.max_rss = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max, slurm.TRES_MEM) + max_rss_nodeid = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_nodeid, slurm.TRES_MEM) + if nodes: + wrap.max_rss_node = nodes[max_rss_nodeid] + wrap.max_rss_task = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_taskid, slurm.TRES_MEM) + + wrap.max_vmsize = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max, slurm.TRES_VMEM) + max_vmsize_nodeid = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_nodeid, slurm.TRES_VMEM) + if nodes: + wrap.max_vmsize_node = nodes[max_vmsize_nodeid] + wrap.max_vmsize_task = TrackableResources.find_count_in_str( + ptr.tres_usage_in_max_taskid, slurm.TRES_VMEM) + + wrap.min_cpu_time = TrackableResources.find_count_in_str( + ptr.tres_usage_in_min, slurm.TRES_CPU) / cpu_time_adj + min_cpu_time_nodeid = TrackableResources.find_count_in_str( + ptr.tres_usage_in_min_nodeid, slurm.TRES_CPU) + if nodes: + wrap.min_cpu_time_node = nodes[min_cpu_time_nodeid] + wrap.min_cpu_time_task = TrackableResources.find_count_in_str( + ptr.tres_usage_in_min_taskid, slurm.TRES_CPU) + + wrap.total_cpu_time = TrackableResources.find_count_in_str( + ptr.tres_usage_in_tot, slurm.TRES_CPU) + + if step.ptr.user_cpu_sec != slurm.NO_VAL64: + wrap.user_cpu_time = step.ptr.user_cpu_sec + + if step.ptr.sys_cpu_sec != slurm.NO_VAL64: + wrap.system_cpu_time = step.ptr.sys_cpu_sec + + return wrap diff --git a/pyslurm/core/db/step.pxd b/pyslurm/core/db/step.pxd index 132bed0b..2bbe7f01 100644 --- a/pyslurm/core/db/step.pxd +++ b/pyslurm/core/db/step.pxd @@ -37,6 +37,7 @@ from pyslurm.slurm cimport ( from pyslurm.core.db.util cimport SlurmList, SlurmListItem from pyslurm.core.db.connection cimport Connection from pyslurm.core.common cimport cstr +from pyslurm.core.db.stats cimport JobStats cdef class JobSteps(dict): @@ -45,6 +46,7 @@ cdef class JobSteps(dict): cdef class JobStep: cdef slurmdb_step_rec_t *ptr + cdef public JobStats stats @staticmethod cdef JobStep from_ptr(slurmdb_step_rec_t *step) diff --git a/pyslurm/core/db/step.pyx b/pyslurm/core/db/step.pyx index 83e3f16c..990f737e 100644 --- a/pyslurm/core/db/step.pyx +++ b/pyslurm/core/db/step.pyx @@ -24,19 +24,13 @@ from os import WIFSIGNALED, WIFEXITED, WTERMSIG, WEXITSTATUS from pyslurm.core.error import RPCError from pyslurm.core.db.tres cimport TrackableResources, TrackableResource from pyslurm.core.common.uint import * -from pyslurm.core.common.ctime import ( - date_to_timestamp, - secs_to_timestr, - timestamp_to_date, - mins_to_timestr, - _raw_time, -) +from pyslurm.core.common.ctime import _raw_time from pyslurm.core.common import ( gid_to_name, uid_to_name, - humanize, instance_to_dict, ) +from pyslurm.core.job.util import cpufreq_to_str cdef class JobStep: @@ -52,8 +46,14 @@ cdef class JobStep: cdef JobStep from_ptr(slurmdb_step_rec_t *step): cdef JobStep wrap = JobStep.__new__(JobStep) wrap.ptr = step + wrap.stats = JobStats.from_step(wrap) return wrap + def as_dict(self): + cdef dict out = instance_to_dict(self) + out["stats"] = self.stats.as_dict() + return out + def _xlate_from_id(self, sid): if sid == slurm.SLURM_BATCH_SCRIPT: return "batch" @@ -79,11 +79,7 @@ cdef class JobStep: return int(sid) @property - def alloc_cpus(self): - return self.requested_cpus - - @property - def alloc_nodes(self): + def num_nodes(self): nnodes = u32_parse(self.ptr.nnodes) if not nnodes and self.ptr.tres_alloc_str: return TrackableResources.find_count_in_str( @@ -92,22 +88,28 @@ cdef class JobStep: return nnodes @property - def requested_cpus(self): + def cpus(self): req_cpus = TrackableResources.find_count_in_str( self.ptr.tres_alloc_str, slurm.TRES_CPU) - if req_cpus == slurm.INFINITE64 and step.job_ptr: - tres_alloc_str = cstr.to_unicode(step.job_ptr.tres_alloc_str) - req_cpus = TrackableResources.find_count_in_str(tres_alloc_str, - slurm.TRES_CPU) - if not req_cpus: - tres_req_str = cstr.to_unicode(step.job_ptr.tres_req_str) - req_cpus = TrackableResources.find_count_in_str(tres_req_str, - slurm.TRES_CPU) - else: - req_cpus = 0 + if req_cpus == slurm.INFINITE64: + return 0 return req_cpus +# if req_cpus == slurm.INFINITE64 and step.job_ptr: +# tres_alloc_str = cstr.to_unicode(step.job_ptr.tres_alloc_str) +# req_cpus = TrackableResources.find_count_in_str(tres_alloc_str, +# slurm.TRES_CPU) +# if not req_cpus: +# tres_req_str = cstr.to_unicode(step.job_ptr.tres_req_str) +# req_cpus = TrackableResources.find_count_in_str(tres_req_str, +# slurm.TRES_CPU) + + @property + def memory(self): + val = TrackableResources.find_count_in_str(self.ptr.tres_alloc_str, + slurm.TRES_MEM) + return val # Only in Parent Job available: # resvcpu? @@ -118,55 +120,45 @@ cdef class JobStep: @property def elapsed_time(self): - return secs_to_timestr(self.ptr.elapsed) - - @property - def end_time_raw(self): - return _raw_time(self.ptr.end) + # seconds + return _raw_time(self.ptr.elapsed) @property def end_time(self): - return timestamp_to_date(self.ptr.end) - - @property - def eligible_time_raw(self): - return _raw_time(self.ptr.start) + return _raw_time(self.ptr.end) @property def eligible_time(self): - return timestamp_to_date(self.ptr.start) - - @property - def start_time_raw(self): return _raw_time(self.ptr.start) @property def start_time(self): - return timestamp_to_date(self.ptr.start) + return _raw_time(self.ptr.start) @property def exit_code(self): + # TODO return None @property def ntasks(self): - return None + return u32_parse(self.ptr.ntasks) @property - def cpu_freq_min(self): - return None + def cpu_frequency_min(self): + return cpufreq_to_str(self.ptr.req_cpufreq_min) @property - def cpu_freq_max(self): - return None + def cpu_frequency_max(self): + return cpufreq_to_str(self.ptr.req_cpufreq_max) @property - def cpu_freq_gov(self): - return None + def cpu_frequency_governor(self): + return cpufreq_to_str(self.ptr.req_cpufreq_gov) @property def nodelist(self): - return None + return cstr.to_unicode(self.ptr.nodes) @property def id(self): @@ -180,10 +172,10 @@ cdef class JobStep: def name(self): return cstr.to_unicode(self.ptr.stepname) - @property - def distribution(self): - # ptr.task_dist - pass +# @property +# def distribution(self): +# # ptr.task_dist +# pass @property def state(self): @@ -200,4 +192,5 @@ cdef class JobStep: @property def suspended_time(self): - return secs_to_timestr(self.ptr.elapsed) + # seconds + return _raw_time(self.ptr.elapsed) diff --git a/pyslurm/core/db/tres.pxd b/pyslurm/core/db/tres.pxd index e05644b8..260f8477 100644 --- a/pyslurm/core/db/tres.pxd +++ b/pyslurm/core/db/tres.pxd @@ -27,6 +27,7 @@ from libc.stdint cimport uint64_t from pyslurm.slurm cimport ( slurmdb_tres_rec_t, slurmdb_find_tres_count_in_string, + try_xmalloc, ) @@ -39,3 +40,6 @@ cdef class TrackableResources(dict): cdef class TrackableResource: cdef slurmdb_tres_rec_t *ptr + + @staticmethod + cdef TrackableResource from_ptr(slurmdb_tres_rec_t *in_ptr) diff --git a/pyslurm/core/db/tres.pyx b/pyslurm/core/db/tres.pyx index e0e7c698..445954a2 100644 --- a/pyslurm/core/db/tres.pyx +++ b/pyslurm/core/db/tres.pyx @@ -19,7 +19,6 @@ # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 -# cython: embedsignature=True from pyslurm.core.common.uint import * @@ -32,19 +31,20 @@ cdef class TrackableResources(dict): @staticmethod cdef TrackableResources from_str(char *tres_str): cdef: - TrackableResources tres + TrackableResources tres_collection + TrackableResource tres str raw_str = cstr.to_unicode(tres_str) dict tres_dict - tres = TrackableResources.__new__(TrackableResources) + tres_collection = TrackableResources.__new__(TrackableResources) if not raw_str: - return tres + return tres_collection - tres.raw_str = raw_str + tres_collection.raw_str = raw_str tres_dict = cstr.to_dict(tres_str) for tres_id, val in tres_dict.items(): - # TODO: resolve ids to type name - pass + tres = TrackableResource(tres_id) + tres.ptr.count = val return tres @@ -55,10 +55,48 @@ cdef class TrackableResources(dict): cdef uint64_t tmp tmp = slurmdb_find_tres_count_in_string(tres_str, typ) - return u64_parse(tmp) + if tmp == slurm.NO_VAL64: + return 0 + else: + return tmp cdef class TrackableResource: def __cinit__(self): self.ptr = NULL + + def __init__(self, tres_id): + self._alloc() + self.ptr.id = tres_id + + def _alloc(self): + if not self.ptr: + self.ptr = try_xmalloc(sizeof(slurmdb_tres_rec_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_tres_rec_t") + + @staticmethod + cdef TrackableResource from_ptr(slurmdb_tres_rec_t *in_ptr): + cdef TrackableResource wrap = TrackableResource.__new__(TrackableResource) + wrap.ptr = in_ptr + return wrap + + @property + def id(self): + return self.ptr.id + + @property + def name(self): + return cstr.to_unicode(self.ptr.name) + + @property + def type(self): + return cstr.to_unicode(self.ptr.type) + + @property + def count(self): + return u64_parse(self.ptr.count) + + # rec_count + # alloc_secs diff --git a/pyslurm/core/db/util.pyx b/pyslurm/core/db/util.pyx index 6956c458..df64286d 100644 --- a/pyslurm/core/db/util.pyx +++ b/pyslurm/core/db/util.pyx @@ -19,7 +19,6 @@ # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 -# cython: embedsignature=True cdef class SlurmListItem: From 607c6d36d492bbb04a48b5400f8923cfd6759369 Mon Sep 17 00:00:00 2001 From: tazend Date: Sat, 8 Apr 2023 10:11:41 +0200 Subject: [PATCH 17/28] wip --- pyslurm/core/db/job.pxd | 4 ++++ pyslurm/core/db/job.pyx | 25 +++++++++++++++++++++++-- pyslurm/core/db/stats.pyx | 17 +++++++---------- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/pyslurm/core/db/job.pxd b/pyslurm/core/db/job.pxd index 052d1a8d..95e0bfdb 100644 --- a/pyslurm/core/db/job.pxd +++ b/pyslurm/core/db/job.pxd @@ -51,6 +51,10 @@ cdef class JobConditions: association_ids clusters constraints + min_cpus + max_cpus + min_nodes + max_nodes cdef class Jobs(dict): diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index 0b4277d1..60809bbb 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -26,16 +26,16 @@ from pyslurm.core.db.tres cimport TrackableResources, TrackableResource from pyslurm.core.common.uint import * from pyslurm.core.common.ctime import ( date_to_timestamp, + timestr_to_mins, _raw_time, ) from pyslurm.core.common import ( gid_to_name, uid_to_name, + nodelist_to_range_str, instance_to_dict, ) -# Maybe prefix these classes with something like "DB" to avoid name collision -# with the other classes from pyslurm/core/job ? cdef class JobConditions: @@ -69,10 +69,31 @@ cdef class JobConditions: ptr.usage_start = date_to_timestamp(self.start_time) ptr.usage_end = date_to_timestamp(self.end_time) slurmdb_job_cond_def_start_end(ptr) + + ptr.cpus_min = u32(self.min_cpus) + ptr.cpus_max = u32(self.max_cpus) + ptr.nodes_min = u32(self.min_nodes) + ptr.nodes_max = u32(self.max_nodes) +# ptr.timelimit_min = +# ptr.timelimit_max = + SlurmList.to_char_list(&ptr.acct_list, self.accounts) SlurmList.to_char_list(&ptr.associd_list, self.association_ids) SlurmList.to_char_list(&ptr.cluster_list, self.clusters) SlurmList.to_char_list(&ptr.constraint_list, self.constraints) + SlurmList.to_char_list(&ptr.jobname_list, self.names) + SlurmList.to_char_list(&ptr.used_nodes, + nodelist_to_range_str(self.nodelist)) + + # TODO: Need to convert user/group names to their ids... + SlurmList.to_char_list(&ptr.groupid_list, self.groups) + SlurmList.to_char_list(&ptr.userid_list, self.users) + + SlurmList.to_char_list(&ptr.wckey_list, self.wckeys) + SlurmList.to_char_list(&ptr.partition_list, self.partitions) + + # TODO: Need to convert qos names to its id... + SlurmList.to_char_list(&ptr.qos_list, self.qos) cdef class Jobs(dict): diff --git a/pyslurm/core/db/stats.pyx b/pyslurm/core/db/stats.pyx index facff0bc..4cebe21c 100644 --- a/pyslurm/core/db/stats.pyx +++ b/pyslurm/core/db/stats.pyx @@ -85,8 +85,6 @@ cdef class JobStats: ptr.tres_usage_in_max, slurm.TRES_FS_DISK) max_disk_read_nodeid = TrackableResources.find_count_in_str( ptr.tres_usage_in_max_nodeid, slurm.TRES_FS_DISK) - if nodes: - wrap.max_disk_read_node = nodes[max_disk_read_nodeid] wrap.max_disk_read_task = TrackableResources.find_count_in_str( ptr.tres_usage_in_max_taskid, slurm.TRES_FS_DISK) @@ -94,8 +92,6 @@ cdef class JobStats: ptr.tres_usage_out_max, slurm.TRES_FS_DISK) max_disk_write_nodeid = TrackableResources.find_count_in_str( ptr.tres_usage_out_max_nodeid, slurm.TRES_FS_DISK) - if nodes: - wrap.max_disk_write_node = nodes[max_disk_write_nodeid] wrap.max_disk_write_task = TrackableResources.find_count_in_str( ptr.tres_usage_out_max_taskid, slurm.TRES_FS_DISK) @@ -103,8 +99,6 @@ cdef class JobStats: ptr.tres_usage_in_max, slurm.TRES_MEM) max_rss_nodeid = TrackableResources.find_count_in_str( ptr.tres_usage_in_max_nodeid, slurm.TRES_MEM) - if nodes: - wrap.max_rss_node = nodes[max_rss_nodeid] wrap.max_rss_task = TrackableResources.find_count_in_str( ptr.tres_usage_in_max_taskid, slurm.TRES_MEM) @@ -112,8 +106,6 @@ cdef class JobStats: ptr.tres_usage_in_max, slurm.TRES_VMEM) max_vmsize_nodeid = TrackableResources.find_count_in_str( ptr.tres_usage_in_max_nodeid, slurm.TRES_VMEM) - if nodes: - wrap.max_vmsize_node = nodes[max_vmsize_nodeid] wrap.max_vmsize_task = TrackableResources.find_count_in_str( ptr.tres_usage_in_max_taskid, slurm.TRES_VMEM) @@ -121,14 +113,19 @@ cdef class JobStats: ptr.tres_usage_in_min, slurm.TRES_CPU) / cpu_time_adj min_cpu_time_nodeid = TrackableResources.find_count_in_str( ptr.tres_usage_in_min_nodeid, slurm.TRES_CPU) - if nodes: - wrap.min_cpu_time_node = nodes[min_cpu_time_nodeid] wrap.min_cpu_time_task = TrackableResources.find_count_in_str( ptr.tres_usage_in_min_taskid, slurm.TRES_CPU) wrap.total_cpu_time = TrackableResources.find_count_in_str( ptr.tres_usage_in_tot, slurm.TRES_CPU) + if nodes: + wrap.max_disk_write_node = nodes[max_disk_write_nodeid] + wrap.max_disk_read_node = nodes[max_disk_read_nodeid] + wrap.max_rss_node = nodes[max_rss_nodeid] + wrap.max_vmsize_node = nodes[max_vmsize_nodeid] + wrap.min_cpu_time_node = nodes[min_cpu_time_nodeid] + if step.ptr.user_cpu_sec != slurm.NO_VAL64: wrap.user_cpu_time = step.ptr.user_cpu_sec From 0200fbc2d0de23330b0528f52e5085eb32d3fc71 Mon Sep 17 00:00:00 2001 From: Toni Harzendorf Date: Sun, 9 Apr 2023 19:16:32 +0200 Subject: [PATCH 18/28] wip --- pyslurm/__init__.py | 2 + pyslurm/core/db/__init__.py | 5 + pyslurm/core/db/connection.pyx | 4 +- pyslurm/core/db/job.pxd | 26 ++++- pyslurm/core/db/job.pyx | 193 +++++++++++++++++++++++++-------- pyslurm/core/db/qos.pxd | 60 ++++++++++ pyslurm/core/db/qos.pyx | 117 ++++++++++++++++++++ pyslurm/core/db/tres.pyx | 2 +- pyslurm/core/db/util.pxd | 2 +- pyslurm/core/db/util.pyx | 9 +- pyslurm/core/slurmctld.pxd | 36 ++++++ pyslurm/core/slurmctld.pyx | 46 ++++++++ 12 files changed, 446 insertions(+), 56 deletions(-) create mode 100644 pyslurm/core/db/qos.pxd create mode 100644 pyslurm/core/db/qos.pyx create mode 100644 pyslurm/core/slurmctld.pxd create mode 100644 pyslurm/core/slurmctld.pyx diff --git a/pyslurm/__init__.py b/pyslurm/__init__.py index da8c7830..aa9e26c6 100644 --- a/pyslurm/__init__.py +++ b/pyslurm/__init__.py @@ -55,6 +55,8 @@ nodelist_to_range_str, ) +from pyslurm.core import slurmctld + # Initialize slurm api from pyslurm.api import slurm_init, slurm_fini slurm_init() diff --git a/pyslurm/core/db/__init__.py b/pyslurm/core/db/__init__.py index f0f96747..098304e9 100644 --- a/pyslurm/core/db/__init__.py +++ b/pyslurm/core/db/__init__.py @@ -1,3 +1,8 @@ from pyslurm.core.db.job import Job, Jobs from pyslurm.core.db.step import JobStep from pyslurm.core.db.tres import TrackableResource, TrackableResources +from pyslurm.core.db.qos import ( + QualitiesOfService, + QualityOfService, + QualityOfServiceConditions, +) diff --git a/pyslurm/core/db/connection.pyx b/pyslurm/core/db/connection.pyx index bd1d1062..8b068ddb 100644 --- a/pyslurm/core/db/connection.pyx +++ b/pyslurm/core/db/connection.pyx @@ -19,7 +19,6 @@ # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 -# cython: embedsignature=True from pyslurm.core.error import RPCError @@ -33,6 +32,9 @@ cdef class Connection: def __init__(self): self.open() + def __dealloc__(self): + self.close() + def open(self): if not self.ptr: self.ptr = slurmdb_connection_get(&self.conn_flags) diff --git a/pyslurm/core/db/job.pxd b/pyslurm/core/db/job.pxd index 95e0bfdb..b3539228 100644 --- a/pyslurm/core/db/job.pxd +++ b/pyslurm/core/db/job.pxd @@ -29,6 +29,10 @@ from pyslurm.slurm cimport ( slurmdb_destroy_job_cond, slurmdb_destroy_job_rec, slurmdb_destroy_step_rec, + slurm_destroy_selected_step, + slurm_selected_step_t, + slurm_list_create, + slurm_list_append, try_xmalloc, slurmdb_job_cond_def_start_end, slurm_job_state_string, @@ -39,6 +43,7 @@ from pyslurm.core.db.step cimport JobStep, JobSteps from pyslurm.core.db.stats cimport JobStats from pyslurm.core.db.connection cimport Connection from pyslurm.core.common cimport cstr +from pyslurm.core.db.qos cimport QualitiesOfService cdef class JobConditions: @@ -51,10 +56,22 @@ cdef class JobConditions: association_ids clusters constraints - min_cpus + cpus max_cpus - min_nodes + nodes max_nodes + qualities_of_service + names + partitions + groups + timelimit + max_timelimit + users + wckeys + nodelist + with_script + with_env + ids cdef class Jobs(dict): @@ -96,7 +113,10 @@ cdef class Job: name (str): Name of the Job. """ - cdef slurmdb_job_rec_t *ptr + cdef: + slurmdb_job_rec_t *ptr + QualitiesOfService qos_data + cdef public: JobSteps steps JobStats stats diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index 60809bbb..77c20bda 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -23,6 +23,7 @@ from os import WIFSIGNALED, WIFEXITED, WTERMSIG, WEXITSTATUS from pyslurm.core.error import RPCError from pyslurm.core.db.tres cimport TrackableResources, TrackableResource +from pyslurm.core import slurmctld from pyslurm.core.common.uint import * from pyslurm.core.common.ctime import ( date_to_timestamp, @@ -31,6 +32,8 @@ from pyslurm.core.common.ctime import ( ) from pyslurm.core.common import ( gid_to_name, + group_to_gid, + user_to_uid, uid_to_name, nodelist_to_range_str, instance_to_dict, @@ -62,72 +65,163 @@ cdef class JobConditions: self.ptr.db_flags = slurm.SLURMDB_JOB_FLAG_NOTSET self.ptr.flags |= slurm.JOBCOND_FLAG_NO_TRUNC - def _create_job_cond(self): + def _parse_qos(self): + if not self.qualities_of_service: + return None + + qos_id_list = [] + qos = QualitiesOfService.load() + for q in self.qualities_of_service: + if isinstance(q, int): + qos_id_list.append(q) + elif q in qos: + qos_id_list.append(str(qos[q].id)) + else: + raise ValueError(f"QoS {q} does not exist") + + return qos_id_list + + def _parse_groups(self): + if not self.groups: + return None + + gid_list = [] + for group in self.groups: + if isinstance(group, int): + gid_list.append(group) + else: + gid_list.append(group_to_gid(group)) + + return gid_list + + def _parse_users(self): + if not self.users: + return None + + uid_list = [] + for user in self.users: + if isinstance(user, int): + uid_list.append(user) + else: + uid_list.append(user_to_uid(user)) + + return uid_list + + def _parse_clusters(self): + if not self.clusters: + # Get the local cluster name + # This is a requirement for some other parameters to function + # correctly, like self.nodelist + slurm_conf = slurmctld.Config.load() + return [slurm_conf.cluster] + elif self.clusters == "all": + return None + else: + return self.clusters + + def _parse_state(self): + # TODO: implement + return None + + def _create(self): self._alloc() - cdef slurmdb_job_cond_t *ptr = self.ptr + cdef: + slurmdb_job_cond_t *ptr = self.ptr + slurm_selected_step_t *selected_step ptr.usage_start = date_to_timestamp(self.start_time) ptr.usage_end = date_to_timestamp(self.end_time) slurmdb_job_cond_def_start_end(ptr) - - ptr.cpus_min = u32(self.min_cpus) - ptr.cpus_max = u32(self.max_cpus) - ptr.nodes_min = u32(self.min_nodes) - ptr.nodes_max = u32(self.max_nodes) -# ptr.timelimit_min = -# ptr.timelimit_max = - + ptr.cpus_min = u32(self.cpus, on_noval=0) + ptr.cpus_max = u32(self.max_cpus, on_noval=0) + ptr.nodes_min = u32(self.nodes, on_noval=0) + ptr.nodes_max = u32(self.max_nodes, on_noval=0) + ptr.timelimit_min = u32(timestr_to_mins(self.timelimit), on_noval=0) + ptr.timelimit_max = u32(timestr_to_mins(self.max_timelimit), + on_noval=0) SlurmList.to_char_list(&ptr.acct_list, self.accounts) SlurmList.to_char_list(&ptr.associd_list, self.association_ids) - SlurmList.to_char_list(&ptr.cluster_list, self.clusters) + SlurmList.to_char_list(&ptr.cluster_list, self._parse_clusters()) SlurmList.to_char_list(&ptr.constraint_list, self.constraints) SlurmList.to_char_list(&ptr.jobname_list, self.names) - SlurmList.to_char_list(&ptr.used_nodes, - nodelist_to_range_str(self.nodelist)) - - # TODO: Need to convert user/group names to their ids... - SlurmList.to_char_list(&ptr.groupid_list, self.groups) - SlurmList.to_char_list(&ptr.userid_list, self.users) - + SlurmList.to_char_list(&ptr.groupid_list, self._parse_groups()) + SlurmList.to_char_list(&ptr.userid_list, self._parse_users()) SlurmList.to_char_list(&ptr.wckey_list, self.wckeys) SlurmList.to_char_list(&ptr.partition_list, self.partitions) - - # TODO: Need to convert qos names to its id... - SlurmList.to_char_list(&ptr.qos_list, self.qos) + SlurmList.to_char_list(&ptr.qos_list, self._parse_qos()) + SlurmList.to_char_list(&ptr.state_list, self._parse_state()) + + if self.nodelist: + cstr.fmalloc(&ptr.used_nodes, + nodelist_to_range_str(self.nodelist)) + + if self.ids: + # These are only allowed by the slurmdbd when specific jobs are + # requested. + if self.with_script: + ptr.flags |= slurm.JOBCOND_FLAG_SCRIPT + elif self.with_env: + # TODO: implement a new "envrironment" attribute in the job + # class + ptr.flags |= slurm.JOBCOND_FLAG_ENV + + ptr.step_list = slurm_list_create(slurm_destroy_selected_step) + already_added = [] + for i in self.ids: + job_id = u32(i) + + selected_step = NULL + selected_step = try_xmalloc( + sizeof(slurm_selected_step_t)) + if not selected_step: + raise MemoryError("xmalloc failed for slurm_selected_step_t") + + selected_step.array_task_id = slurm.NO_VAL + selected_step.het_job_offset = slurm.NO_VAL + selected_step.step_id.step_id = slurm.NO_VAL + selected_step.step_id.job_id = job_id + + if not job_id in already_added: + slurm_list_append(ptr.step_list, selected_step) cdef class Jobs(dict): - def __dealloc__(self): - self.db_conn.close() - def __init__(self, *args, **kwargs): + # TODO: ability to initialize with existing job objects + pass + + @staticmethod + def load(*args, **kwargs): cdef: + Jobs jobs = Jobs() Job job JobStep step - JobConditions job_cond + JobConditions cond SlurmListItem job_ptr SlurmListItem step_ptr SlurmList step_list + QualitiesOfService qos_data int cpu_tres_rec_count = 0 int step_cpu_tres_rec_count = 0 # Allow the user to both specify search conditions via a JobConditions # instance or **kwargs. if args and isinstance(args[0], JobConditions): - job_cond = args[0] + cond = args[0] else: - job_cond = JobConditions(**kwargs) + cond = JobConditions(**kwargs) - job_cond._create_job_cond() - # TODO: Have a single, global DB connection in pyslurm internally? - self.db_conn = Connection() - self.info = SlurmList.wrap(slurmdb_jobs_get(self.db_conn.ptr, - job_cond.ptr)) - - if self.info.is_null(): + cond._create() + jobs.db_conn = Connection() + jobs.info = SlurmList.wrap(slurmdb_jobs_get(jobs.db_conn.ptr, + cond.ptr)) + if jobs.info.is_null(): raise RPCError(msg="Failed to get Jobs from slurmdbd") + qos_data = QualitiesOfService.load(name_is_key=False, + db_connection=jobs.db_conn) + # tres_alloc_str = cstr.to_unicode() # cpu_tres_rec_count @@ -137,9 +231,10 @@ cdef class Jobs(dict): # convert them to its type name for the user in advance. # TODO: For multi-cluster support, remove duplicate federation jobs - for job_ptr in SlurmList.iter_and_pop(self.info): + for job_ptr in SlurmList.iter_and_pop(jobs.info): job = Job.from_ptr(job_ptr.data) - self[job.id] = job + job.qos_data = qos_data + jobs[job.id] = job step_list = SlurmList.wrap(job.ptr.steps, owned=False) for step_ptr in SlurmList.iter_and_pop(step_list): @@ -148,19 +243,29 @@ cdef class Jobs(dict): job._sum_stats_from_steps() - + return jobs + + cdef class Job: def __cinit__(self): self.ptr = NULL def __init__(self, job_id): - pass + self._alloc() + self.ptr.jobid = int(job_id) def __dealloc__(self): slurmdb_destroy_job_rec(self.ptr) self.ptr = NULL + def _alloc(self): + if not self.ptr: + self.ptr = try_xmalloc( + sizeof(slurmdb_job_rec_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_job_rec_t") + @staticmethod cdef Job from_ptr(slurmdb_job_rec_t *in_ptr): cdef Job wrap = Job.__new__(Job) @@ -390,11 +495,11 @@ cdef class Job: @property def qos(self): - # Need to convert the raw uint32_t qosid to a name, by calling - # slurmdb_qos_get. To avoid doing this repeatedly, we'll probably need - # to also get the qos list when calling slurmdb_jobs_get and store it - # in each job instance. - return None + _qos = self.qos_data.get(self.ptr.qosid, None) + if _qos: + return _qos.name + else: + return None @property def cpus(self): @@ -426,8 +531,6 @@ cdef class Job: def script(self): return cstr.to_unicode(self.ptr.script) - # uint32_t show_full - @property def start_time(self): return _raw_time(self.ptr.start) diff --git a/pyslurm/core/db/qos.pxd b/pyslurm/core/db/qos.pxd new file mode 100644 index 00000000..b23325d0 --- /dev/null +++ b/pyslurm/core/db/qos.pxd @@ -0,0 +1,60 @@ +######################################################################### +# qos.pxd - pyslurm slurmdbd qos api +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + slurmdb_qos_rec_t, + slurmdb_qos_cond_t, + slurmdb_destroy_qos_rec, + slurmdb_destroy_qos_cond, + slurmdb_qos_get, + try_xmalloc, +) +from pyslurm.core.db.util cimport SlurmList, SlurmListItem +from pyslurm.core.db.connection cimport Connection +from pyslurm.core.db.qos cimport QualitiesOfService +from pyslurm.core.common cimport cstr + + +cdef class QualitiesOfService(dict): + cdef: + SlurmList info + Connection db_conn + + +cdef class QualityOfServiceConditions: + cdef slurmdb_qos_cond_t *ptr + + cdef public: + names + ids + descriptions + preempt_mode + with_deleted + + +cdef class QualityOfService: + cdef slurmdb_qos_rec_t *ptr + + @staticmethod + cdef QualityOfService from_ptr(slurmdb_qos_rec_t *in_ptr) diff --git a/pyslurm/core/db/qos.pyx b/pyslurm/core/db/qos.pyx new file mode 100644 index 00000000..5e704026 --- /dev/null +++ b/pyslurm/core/db/qos.pyx @@ -0,0 +1,117 @@ +######################################################################### +# qos.pyx - pyslurm slurmdbd qos api +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.core.error import RPCError + + +cdef class QualitiesOfService(dict): + + def __init__(self): + pass + + @staticmethod + def load(*args, name_is_key=True, db_connection=None, **kwargs): + cdef: + QualitiesOfService qos_dict = QualitiesOfService() + QualityOfService qos + QualityOfServiceConditions cond + SlurmListItem qos_ptr + Connection conn = db_connection + + if args and isinstance(args[0], QualityOfServiceConditions): + cond = args[0] + else: + cond = QualityOfServiceConditions(**kwargs) + + cond._create() + qos_dict.db_conn = Connection() if not conn else conn + qos_dict.info = SlurmList.wrap(slurmdb_qos_get(qos_dict.db_conn.ptr, + cond.ptr)) + if qos_dict.info.is_null(): + raise RPCError(msg="Failed to get QoS from slurmdbd") + + for qos_ptr in SlurmList.iter_and_pop(qos_dict.info): + qos = QualityOfService.from_ptr(qos_ptr.data) + if name_is_key: + qos_dict[qos.name] = qos + else: + qos_dict[qos.id] = qos + + return qos_dict + + +cdef class QualityOfServiceConditions: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + def __dealloc__(self): + self._dealloc() + + def _dealloc(self): + slurmdb_destroy_qos_cond(self.ptr) + self.ptr = NULL + + def _alloc(self): + self._dealloc() + self.ptr = try_xmalloc(sizeof(slurmdb_qos_cond_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_qos_cond_t") + + def _create(self): + self._alloc() + cdef slurmdb_qos_cond_t *ptr = self.ptr + + +cdef class QualityOfService: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, qos_id): + pass + + def __dealloc__(self): + slurmdb_destroy_qos_rec(self.ptr) + self.ptr = NULL + + @staticmethod + cdef QualityOfService from_ptr(slurmdb_qos_rec_t *in_ptr): + cdef QualityOfService wrap = QualityOfService.__new__(QualityOfService) + wrap.ptr = in_ptr + return wrap + + @property + def name(self): + return cstr.to_unicode(self.ptr.name) + + @property + def description(self): + return cstr.to_unicode(self.ptr.description) + + @property + def id(self): + return self.ptr.id diff --git a/pyslurm/core/db/tres.pyx b/pyslurm/core/db/tres.pyx index 445954a2..a827cddc 100644 --- a/pyslurm/core/db/tres.pyx +++ b/pyslurm/core/db/tres.pyx @@ -51,7 +51,7 @@ cdef class TrackableResources(dict): @staticmethod def find_count_in_str(tres_str, typ): if not tres_str: - return None + return 0 cdef uint64_t tmp tmp = slurmdb_find_tres_count_in_string(tres_str, typ) diff --git a/pyslurm/core/db/util.pxd b/pyslurm/core/db/util.pxd index 44be9b15..42b48a6f 100644 --- a/pyslurm/core/db/util.pxd +++ b/pyslurm/core/db/util.pxd @@ -19,7 +19,6 @@ # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 -# cython: embedsignature=True from pyslurm cimport slurm from pyslurm.core.common cimport cstr @@ -34,6 +33,7 @@ from pyslurm.slurm cimport ( slurm_list_destroy, slurm_list_create, slurm_list_pop, + slurm_list_append, slurm_xfree_ptr, ) diff --git a/pyslurm/core/db/util.pyx b/pyslurm/core/db/util.pyx index df64286d..7601f2b2 100644 --- a/pyslurm/core/db/util.pyx +++ b/pyslurm/core/db/util.pyx @@ -34,9 +34,7 @@ cdef class SlurmListItem: cdef class SlurmList: - """ - Convenience Wrapper around slurms List type - """ + """Convenience Wrapper around slurms List type""" def __cinit__(self): self.info = NULL self.itr = NULL @@ -122,8 +120,9 @@ cdef class SlurmList: in_list[0] = slurm_list_create(slurm_xfree_ptr) for val in vals: if val: - cstr.fmalloc(&entry, val) - slurm.slurm_list_append(in_list[0], entry) + entry = NULL + cstr.fmalloc(&entry, str(val)) + slurm_list_append(in_list[0], entry) def is_null(self): if not self.info: diff --git a/pyslurm/core/slurmctld.pxd b/pyslurm/core/slurmctld.pxd new file mode 100644 index 00000000..f93a600d --- /dev/null +++ b/pyslurm/core/slurmctld.pxd @@ -0,0 +1,36 @@ +######################################################################### +# slurmctld.pxd - pyslurm slurmctld api +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.slurm cimport ( + slurm_conf_t, + slurm_load_ctl_conf, + slurm_free_ctl_conf, + try_xmalloc, +) +from pyslurm.core.common cimport cstr +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t, int64_t +from pyslurm.core.common.uint cimport * + + +cdef class Config: + cdef slurm_conf_t *ptr diff --git a/pyslurm/core/slurmctld.pyx b/pyslurm/core/slurmctld.pyx new file mode 100644 index 00000000..f6a4559b --- /dev/null +++ b/pyslurm/core/slurmctld.pyx @@ -0,0 +1,46 @@ +######################################################################### +# slurmctld.pyx - pyslurm slurmctld api +######################################################################### +# Copyright (C) 2022 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm.core.error import verify_rpc, RPCError + + +cdef class Config: + + def __cinit__(self): + self.ptr = NULL + + def __init__(self, job_id): + raise RuntimeError("Cannot instantiate class directly") + + def __dealloc__(self): + slurm_free_ctl_conf(self.ptr) + self.ptr = NULL + + @staticmethod + def load(): + cdef Config conf = Config.__new__(Config) + verify_rpc(slurm_load_ctl_conf(0, &conf.ptr)) + return conf + + @property + def cluster(self): + return cstr.to_unicode(self.ptr.cluster_name) From 2d03a0fa3b728807fd665bb2934bfe529626cf14 Mon Sep 17 00:00:00 2001 From: tazend Date: Sun, 16 Apr 2023 18:51:50 +0200 Subject: [PATCH 19/28] wip --- pyslurm/core/db/__init__.py | 13 ++- pyslurm/core/db/connection.pxd | 8 +- pyslurm/core/db/connection.pyx | 28 ++++-- pyslurm/core/db/job.pxd | 160 ++++++++++++++++++++++++++++-- pyslurm/core/db/job.pyx | 174 ++++++++++++++++----------------- pyslurm/core/db/qos.pxd | 9 +- pyslurm/core/db/qos.pyx | 90 ++++++++++++++--- pyslurm/core/db/stats.pxd | 3 +- pyslurm/core/db/stats.pyx | 64 ++++++++++++ pyslurm/core/db/step.pxd | 44 +++++++++ pyslurm/core/db/step.pyx | 7 +- pyslurm/core/db/tres.pxd | 1 + pyslurm/core/db/tres.pyx | 16 ++- pyslurm/core/db/util.pyx | 2 +- pyslurm/core/node.pyx | 2 +- pyslurm/slurm/extra.pxi | 1 + 16 files changed, 488 insertions(+), 134 deletions(-) diff --git a/pyslurm/core/db/__init__.py b/pyslurm/core/db/__init__.py index 098304e9..60db7389 100644 --- a/pyslurm/core/db/__init__.py +++ b/pyslurm/core/db/__init__.py @@ -1,8 +1,15 @@ -from pyslurm.core.db.job import Job, Jobs from pyslurm.core.db.step import JobStep -from pyslurm.core.db.tres import TrackableResource, TrackableResources +from pyslurm.core.db.job import ( + Job, + Jobs, + JobSearchFilter, +) +from pyslurm.core.db.tres import ( + TrackableResource, + TrackableResources, +) from pyslurm.core.db.qos import ( QualitiesOfService, QualityOfService, - QualityOfServiceConditions, + QualityOfServiceSearchFilter, ) diff --git a/pyslurm/core/db/connection.pxd b/pyslurm/core/db/connection.pxd index f75465c9..635bdb14 100644 --- a/pyslurm/core/db/connection.pxd +++ b/pyslurm/core/db/connection.pxd @@ -30,6 +30,12 @@ from pyslurm.slurm cimport ( cdef class Connection: + """A connection to the slurmdbd. + + Attributes: + is_open (bool): + Whether the connection is open or closed. + """ cdef: void *ptr - uint16_t conn_flags + uint16_t flags diff --git a/pyslurm/core/db/connection.pyx b/pyslurm/core/db/connection.pyx index 8b068ddb..70785c00 100644 --- a/pyslurm/core/db/connection.pyx +++ b/pyslurm/core/db/connection.pyx @@ -24,24 +24,36 @@ from pyslurm.core.error import RPCError cdef class Connection: - def __cinit__(self): self.ptr = NULL - self.conn_flags = 0 + self.flags = 0 def __init__(self): - self.open() + raise RuntimeError("A new connection should be created through " + "calling Connection.open()") def __dealloc__(self): self.close() - def open(self): - if not self.ptr: - self.ptr = slurmdb_connection_get(&self.conn_flags) - if not self.ptr: - raise RPCError(msg="Failed to open Connection to slurmdbd") + @staticmethod + def open(): + """Open a new connection to the slurmdbd + + Raises: + RPCError: When opening the connection fails + + Returns: + (Connection): Connection to slurmdbd + """ + cdef Connection conn = Connection.__new__(Connection) + conn.ptr = slurmdb_connection_get(&conn.flags) + if not conn.ptr: + raise RPCError(msg="Failed to open onnection to slurmdbd") + + return conn def close(self): + """Close the current connection.""" if self.is_open: slurmdb_connection_close(&self.ptr) self.ptr = NULL diff --git a/pyslurm/core/db/job.pxd b/pyslurm/core/db/job.pxd index b3539228..74640d81 100644 --- a/pyslurm/core/db/job.pxd +++ b/pyslurm/core/db/job.pxd @@ -1,7 +1,7 @@ ######################################################################### # job.pxd - pyslurm slurmdbd job api ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -46,10 +46,78 @@ from pyslurm.core.common cimport cstr from pyslurm.core.db.qos cimport QualitiesOfService -cdef class JobConditions: +cdef class JobSearchFilter: + """Search conditions for Slurm database Jobs. + + Args: + **kwargs: + Any valid attribute of the object. + + Attributes: + ids (list): + A list of Job ids to search for. + start_time (Union[str, int, datetime.datetime]): + Search for Jobs which started after this time. + end_time (Union[str, int, datetime.datetime]): + Search for Jobs which ended before this time. + accounts (list): + Search for Jobs with these account names. + association_ids (list): + Search for Jobs with these association ids. + clusters (list): + Search for Jobs running in these clusters. + constraints (list): + Search for Jobs with these constraints. + cpus (int): + Search for Jobs with exactly this many CPUs. + Note: If you also specify max_cpus, then this value will act as + the minimum. + max_cpus (int): + Search for Jobs with no more than this amount of CPUs. + Note: This value has no effect without also setting cpus. + nodes (int): + Search for Jobs with exactly this many nodes. + Note: If you also specify max_nodes, then this value will act as + the minimum. + max_nodes (int): + Search for Jobs with no more than this amount of nodes. + Note: This value has no effect without also setting nodes. + qos (list): + Search for Jobs with these Qualities of Service. + names (list): + Search for Jobs with these job names. + partitions (list): + Search for Jobs with these partition names. + groups (list): + Search for Jobs with these group names. You can both specify the + groups as string or by their GID. + timelimit (Union[str, int]): + Search for Jobs with exactly this timelimit. + Note: If you also specify max_timelimit, then this value will act + as the minimum. + max_timelimit (Union[str, int]): + Search for Jobs which run no longer than this timelimit + Note: This value has no effect without also setting timelimit + users (list): + Search for Jobs with these user names. You can both specify the + users as string or by their UID. + wckeys (list): + Search for Jobs with these WCKeys + nodelist (list): + Search for Jobs that ran on any of these Nodes + with_script (bool): + Instruct the slurmdbd to also send the job script(s) + Note: This requires specifying explictiy job ids, and is mutually + exclusive with with_env + with_env (bool): + Instruct the slurmdbd to also send the job environment(s) + Note: This requires specifying explictiy job ids, and is mutually + exclusive with with_script + """ cdef slurmdb_job_cond_t *ptr cdef public: + ids start_time end_time accounts @@ -71,10 +139,10 @@ cdef class JobConditions: nodelist with_script with_env - ids cdef class Jobs(dict): + """A collection of Database Jobs.""" cdef: SlurmList info Connection db_conn @@ -83,8 +151,6 @@ cdef class Jobs(dict): cdef class Job: """A Slurm Database Job. - All attributes in this class are read-only. - Args: job_id (int): An Integer representing a Job-ID. @@ -95,6 +161,8 @@ cdef class Job: Attributes: steps (pyslurm.db.JobSteps): Steps this Job has. + stats (pyslurm.db.JobStats): + General Statistics of this Job account (str): Account of the Job. admin_comment (str): @@ -110,8 +178,88 @@ cdef class Job: Array Task ID of this Job if it is an Array-Job. array_tasks_waiting (str): Array Tasks that are still waiting. + association_id (int): + ID of the Association this job runs in. + block_id (str): + Name of the block used (for BlueGene Systems) + cluster (str): + Cluster this Job belongs to + constraints (str): + Constraints of the Job + container (str): + Path to OCI Container bundle + db_index (int): + Unique database index of the Job in the job table + derived_exit_code (int): + Highest exit code of all the Job steps + derived_exit_code_signal (int): + Signal of the derived exit code + comment (str): + Comment for the Job + elapsed_time (int): + Amount of seconds elapsed for the Job + eligible_time (int): + When the Job became eligible to run, as a unix timestamp + end_time (int): + When the Job ended, as a unix timestamp + exit_code (int): + Exit code of the job script or salloc. + exit_code_signal (int): + Signal of the exit code for this Job. + group_id (int): + ID of the group for this Job + group_name (str): + Name of the group for this Job + id (int): + ID of the Job name (str): - Name of the Job. + Name of the Job + mcs_label (str): + MCS Label of the Job + nodelist (str): + Nodes this Job is using + partition (str): + Name of the Partition for this Job + priority (int): + Priority for the Job + quality_of_service (str): + Name of the Quality of Service for the Job + cpus (int): + Amount of CPUs the Job has/had allocated, or, if the Job is still + pending, this will reflect the amount requested. + memory (int): + Amount of memory the Job requested in total + reservation (str): + Name of the Reservation for this Job + script (str): + The batch script for this Job. + Note: Only available if the "with_script" condition was given + start_time (int): + Time when the Job started, as a unix timestamp + state (str): + State of the Job + state_reason (str): + Last reason a Job was blocked from running + cancelled_by (str): + Name of the User who cancelled this Job + submit_time (int): + Time the Job was submitted, as a unix timestamp + submit_command (str): + Full command issued to submit the Job + suspended_time (int): + Amount of seconds the Job was suspended + system_comment (str): + Arbitrary System comment for the Job + time_limit (int): + Time limit of the Job in minutes + user_id (int): + UID of the User this Job belongs to + user_name (str): + Name of the User this Job belongs to + wckey (str): + Name of the WCKey for this Job + working_directory (str): + Working directory of the Job """ cdef: slurmdb_job_rec_t *ptr diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index 77c20bda..bf6583b6 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -1,7 +1,7 @@ ######################################################################### # job.pyx - pyslurm slurmdbd job api ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -40,7 +40,7 @@ from pyslurm.core.common import ( ) -cdef class JobConditions: +cdef class JobSearchFilter: def __cinit__(self): self.ptr = NULL @@ -158,6 +158,10 @@ cdef class JobConditions: if self.ids: # These are only allowed by the slurmdbd when specific jobs are # requested. + if self.with_script and self.with_env: + raise ValueError("with_script and with_env are mutually " + "exclusive") + if self.with_script: ptr.flags |= slurm.JOBCOND_FLAG_SCRIPT elif self.with_env: @@ -192,28 +196,36 @@ cdef class Jobs(dict): pass @staticmethod - def load(*args, **kwargs): + def load(search_filter=None): + """Load Jobs from the Slurm Database + + Implements the slurmdb_jobs_get RPC. + + Args: + search_filter (pyslurm.db.JobSearchFilter): + A search filter that the slurmdbd will apply when retrieving + Jobs from the database. + + Raises: + RPCError: When getting the Jobs from the Database was not + sucessful + """ cdef: Jobs jobs = Jobs() Job job - JobStep step - JobConditions cond + JobSearchFilter cond SlurmListItem job_ptr - SlurmListItem step_ptr - SlurmList step_list QualitiesOfService qos_data int cpu_tres_rec_count = 0 int step_cpu_tres_rec_count = 0 - # Allow the user to both specify search conditions via a JobConditions - # instance or **kwargs. - if args and isinstance(args[0], JobConditions): - cond = args[0] + if search_filter: + cond = search_filter else: - cond = JobConditions(**kwargs) + cond = JobSearchFilter() cond._create() - jobs.db_conn = Connection() + jobs.db_conn = Connection.open() jobs.info = SlurmList.wrap(slurmdb_jobs_get(jobs.db_conn.ptr, cond.ptr)) if jobs.info.is_null(): @@ -231,18 +243,15 @@ cdef class Jobs(dict): # convert them to its type name for the user in advance. # TODO: For multi-cluster support, remove duplicate federation jobs + # TODO: How to handle the possibility of duplicate job ids that could + # appear if IDs on a cluster are resetted? for job_ptr in SlurmList.iter_and_pop(jobs.info): job = Job.from_ptr(job_ptr.data) job.qos_data = qos_data + job._create_steps() + JobStats._sum_step_stats_for_job(job, job.steps) jobs[job.id] = job - step_list = SlurmList.wrap(job.ptr.steps, owned=False) - for step_ptr in SlurmList.iter_and_pop(step_list): - step = JobStep.from_ptr(step_ptr.data) - job.steps[step.id] = step - - job._sum_stats_from_steps() - return jobs @@ -252,14 +261,17 @@ cdef class Job: self.ptr = NULL def __init__(self, job_id): - self._alloc() + self._alloc_impl() self.ptr.jobid = int(job_id) def __dealloc__(self): + self._dealloc_impl() + + def _dealloc_impl(self): slurmdb_destroy_job_rec(self.ptr) self.ptr = NULL - def _alloc(self): + def _alloc_impl(self): if not self.ptr: self.ptr = try_xmalloc( sizeof(slurmdb_job_rec_t)) @@ -274,69 +286,51 @@ cdef class Job: wrap.stats = JobStats() return wrap - def _sum_stats_from_steps(self): + def reload(self): + """(Re)load the information for this Database Job. + + Note: + You can call this function repeatedly to refresh the information + of an instance. Using the object returned is optional. + + Returns: + (pyslurm.db.Job): Returns the current Job-instance itself + + Raises: + RPCError: If requesting the information for the database Job was + not sucessful. + """ + cdef Job job + jobs = Jobs.load(ids=[self.id]) + if not jobs or self.id not in jobs: + raise RPCError(msg=f"Job {self.id} does not exist") + + job = jobs[self.id] + self._dealloc_impl() + self.ptr = job.ptr + self.steps = job.steps + self.stats = job.stats + job.ptr = NULL + + return self + + def _create_steps(self): cdef: - JobStats job_stats = self.stats - JobStats step_stats = None - - for step in self.steps.values(): - step_stats = step.stats - - job_stats.consumed_energy += step_stats.consumed_energy - job_stats.average_cpu_time += step_stats.average_cpu_time - job_stats.average_cpu_frequency += step_stats.average_cpu_frequency - job_stats.cpu_time += step_stats.cpu_time - job_stats.average_disk_read += step_stats.average_disk_read - job_stats.average_disk_write += step_stats.average_disk_write - job_stats.average_pages += step_stats.average_pages - job_stats.average_rss += step_stats.average_rss - job_stats.average_vmsize += step_stats.average_vmsize - - if step_stats.max_disk_read >= job_stats.max_disk_read: - job_stats.max_disk_read = step_stats.max_disk_read - job_stats.max_disk_read_node = step_stats.max_disk_read_node - job_stats.max_disk_read_task = step_stats.max_disk_read_task - - if step_stats.max_disk_write >= job_stats.max_disk_write: - job_stats.max_disk_write = step_stats.max_disk_write - job_stats.max_disk_write_node = step_stats.max_disk_write_node - job_stats.max_disk_write_task = step_stats.max_disk_write_task - - if step_stats.max_pages >= job_stats.max_pages: - job_stats.max_pages = step_stats.max_pages - job_stats.max_pages_node = step_stats.max_pages_node - job_stats.max_pages_task = step_stats.max_pages_task - - if step_stats.max_rss >= job_stats.max_rss: - job_stats.max_rss = step_stats.max_rss - job_stats.max_rss_node = step_stats.max_rss_node - job_stats.max_rss_task = step_stats.max_rss_task - - if step_stats.max_vmsize >= job_stats.max_vmsize: - job_stats.max_vmsize = step_stats.max_vmsize - job_stats.max_vmsize_node = step_stats.max_vmsize_node - job_stats.max_vmsize_task = step_stats.max_vmsize_task - - if step_stats.min_cpu_time >= job_stats.min_cpu_time: - job_stats.min_cpu_time = step_stats.min_cpu_time - job_stats.min_cpu_time_node = step_stats.min_cpu_time_node - job_stats.min_cpu_time_task = step_stats.min_cpu_time_task - - if self.ptr.tot_cpu_sec != slurm.NO_VAL64: - job_stats.total_cpu_time = self.ptr.tot_cpu_sec - - if self.ptr.user_cpu_sec != slurm.NO_VAL64: - job_stats.user_cpu_time = self.ptr.user_cpu_sec - - if self.ptr.sys_cpu_sec != slurm.NO_VAL64: - job_stats.system_cpu_time = self.ptr.sys_cpu_sec - - elapsed = self.elapsed_time if self.elapsed_time else 0 - cpus = self.cpus if self.cpus else 0 - job_stats.cpu_time = elapsed * cpus - job_stats.average_cpu_frequency /= len(self.steps) + JobStep step + SlurmList step_list + SlurmListItem step_ptr + + step_list = SlurmList.wrap(self.ptr.steps, owned=False) + for step_ptr in SlurmList.iter_and_pop(step_list): + step = JobStep.from_ptr(step_ptr.data) + self.steps[step.id] = step def as_dict(self): + """Database Job information formatted as a dictionary. + + Returns: + (dict): Database Job information as dict + """ cdef dict out = instance_to_dict(self) out["stats"] = self.stats.as_dict() steps = out.pop("steps", {}) @@ -419,7 +413,6 @@ cdef class Job: @property def derived_exit_code(self): - """int: The derived exit code for the Job.""" if (self.ptr.derived_ec == slurm.NO_VAL or not WIFEXITED(self.ptr.derived_ec)): return None @@ -428,7 +421,6 @@ cdef class Job: @property def derived_exit_code_signal(self): - """int: Signal for the derived exit code.""" if (self.ptr.derived_ec == slurm.NO_VAL or not WIFSIGNALED(self.ptr.derived_ec)): return None @@ -454,7 +446,12 @@ cdef class Job: @property def exit_code(self): # TODO - return None + return 0 + + @property + def exit_code_signal(self): + # TODO + return 0 # uint32_t flags @@ -494,7 +491,7 @@ cdef class Job: return u32_parse(self.ptr.priority, zero_is_noval=False) @property - def qos(self): + def quality_of_service(self): _qos = self.qos_data.get(self.ptr.qosid, None) if _qos: return _qos.name @@ -537,7 +534,6 @@ cdef class Job: @property def state(self): - """str: State this Job is in.""" return cstr.to_unicode(slurm_job_state_string(self.ptr.state)) @property @@ -554,12 +550,11 @@ cdef class Job: return _raw_time(self.ptr.submit) @property - def submit_line(self): + def submit_command(self): return cstr.to_unicode(self.ptr.submit_line) @property def suspended_time(self): - # seconds return _raw_time(self.ptr.elapsed) @property @@ -568,7 +563,6 @@ cdef class Job: @property def time_limit(self): - # minutes # TODO: Perhaps we should just find out what the actual PartitionLimit # is? return _raw_time(self.ptr.timelimit, "PartitionLimit") diff --git a/pyslurm/core/db/qos.pxd b/pyslurm/core/db/qos.pxd index b23325d0..5ae8b8e3 100644 --- a/pyslurm/core/db/qos.pxd +++ b/pyslurm/core/db/qos.pxd @@ -1,7 +1,7 @@ ######################################################################### # qos.pxd - pyslurm slurmdbd qos api ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,7 +20,6 @@ # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 - from pyslurm cimport slurm from pyslurm.slurm cimport ( slurmdb_qos_rec_t, @@ -28,11 +27,11 @@ from pyslurm.slurm cimport ( slurmdb_destroy_qos_rec, slurmdb_destroy_qos_cond, slurmdb_qos_get, + slurm_preempt_mode_num, try_xmalloc, ) from pyslurm.core.db.util cimport SlurmList, SlurmListItem from pyslurm.core.db.connection cimport Connection -from pyslurm.core.db.qos cimport QualitiesOfService from pyslurm.core.common cimport cstr @@ -42,14 +41,14 @@ cdef class QualitiesOfService(dict): Connection db_conn -cdef class QualityOfServiceConditions: +cdef class QualityOfServiceSearchFilter: cdef slurmdb_qos_cond_t *ptr cdef public: names ids descriptions - preempt_mode + preempt_modes with_deleted diff --git a/pyslurm/core/db/qos.pyx b/pyslurm/core/db/qos.pyx index 5e704026..f7cbc1a8 100644 --- a/pyslurm/core/db/qos.pyx +++ b/pyslurm/core/db/qos.pyx @@ -1,7 +1,7 @@ ######################################################################### # qos.pyx - pyslurm slurmdbd qos api ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -29,25 +29,25 @@ cdef class QualitiesOfService(dict): pass @staticmethod - def load(*args, name_is_key=True, db_connection=None, **kwargs): + def load(search_filter=None, name_is_key=True, db_connection=None): cdef: QualitiesOfService qos_dict = QualitiesOfService() QualityOfService qos - QualityOfServiceConditions cond + QualityOfServiceSearchFilter cond SlurmListItem qos_ptr Connection conn = db_connection - if args and isinstance(args[0], QualityOfServiceConditions): - cond = args[0] + if search_filter: + cond = search_filter else: - cond = QualityOfServiceConditions(**kwargs) + cond = QualityOfServiceSearchFilter() cond._create() - qos_dict.db_conn = Connection() if not conn else conn + qos_dict.db_conn = Connection.open() if not conn else conn qos_dict.info = SlurmList.wrap(slurmdb_qos_get(qos_dict.db_conn.ptr, cond.ptr)) if qos_dict.info.is_null(): - raise RPCError(msg="Failed to get QoS from slurmdbd") + raise RPCError(msg="Failed to get QoS data from slurmdbd") for qos_ptr in SlurmList.iter_and_pop(qos_dict.info): qos = QualityOfService.from_ptr(qos_ptr.data) @@ -59,7 +59,7 @@ cdef class QualitiesOfService(dict): return qos_dict -cdef class QualityOfServiceConditions: +cdef class QualityOfServiceSearchFilter: def __cinit__(self): self.ptr = NULL @@ -81,33 +81,101 @@ cdef class QualityOfServiceConditions: if not self.ptr: raise MemoryError("xmalloc failed for slurmdb_qos_cond_t") + def _parse_preempt_modes(self): + if not self.preempt_modes: + return 0 + + if not isinstance(self.preempt_modes, list): + return int(self.preempt_modes) + + out = 0 + for mode in self.preempt_modes: + _mode = slurm_preempt_mode_num(mode) + if _mode == slurm.NO_VAL16: + raise ValueError(f"Unknown preempt mode: {mode}") + + if _mode == slurm.PREEMPT_MODE_OFF: + _mode = slurm.PREEMPT_MODE_COND_OFF + + out |= _mode + + return out + def _create(self): self._alloc() cdef slurmdb_qos_cond_t *ptr = self.ptr + SlurmList.to_char_list(&ptr.name_list, self.names) + SlurmList.to_char_list(&ptr.id_list, self.ids) + SlurmList.to_char_list(&ptr.description_list, self.descriptions) + ptr.preempt_mode = self._parse_preempt_modes() + ptr.with_deleted = 1 if bool(self.with_deleted) else 0 + cdef class QualityOfService: def __cinit__(self): self.ptr = NULL - def __init__(self, qos_id): - pass + def __init__(self, name=None): + self._alloc_impl() + self.name = name def __dealloc__(self): + self._dealloc_impl() + + def _dealloc_impl(self): slurmdb_destroy_qos_rec(self.ptr) self.ptr = NULL + def _alloc_impl(self): + if not self.ptr: + self.ptr = try_xmalloc( + sizeof(slurmdb_qos_rec_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for slurmdb_qos_rec_t") + @staticmethod cdef QualityOfService from_ptr(slurmdb_qos_rec_t *in_ptr): cdef QualityOfService wrap = QualityOfService.__new__(QualityOfService) wrap.ptr = in_ptr return wrap + def reload(self): + """(Re)load the information for this Quality of Service. + + Note: + You can call this function repeatedly to refresh the information + of an instance. Using the object returned is optional. + + Returns: + (pyslurm.db.QualityOfService): Returns the current + QualityOfService-instance itself. + + Raises: + RPCError: If requesting the information from the database was not + sucessful. + """ + cdef QualityOfService qos + qos_data = QualitiesOfService.load(names=[self.name]) + if not qos_data or self.name not in qos_data: + raise RPCError(msg=f"QualityOfService {self.name} does not exist") + + qos = qos_data[self.name] + self._dealloc_impl() + self.ptr = qos.ptr + qos.ptr = NULL + + return self + @property def name(self): return cstr.to_unicode(self.ptr.name) + @name.setter + def name(self, val): + cstr.fmalloc(&self.ptr.name, val) + @property def description(self): return cstr.to_unicode(self.ptr.description) diff --git a/pyslurm/core/db/stats.pxd b/pyslurm/core/db/stats.pxd index 08b04e75..11f08342 100644 --- a/pyslurm/core/db/stats.pxd +++ b/pyslurm/core/db/stats.pxd @@ -27,7 +27,8 @@ from pyslurm.slurm cimport ( slurmdb_job_rec_t, ) from pyslurm.core.db.tres cimport TrackableResources -from pyslurm.core.db.step cimport JobStep +from pyslurm.core.db.step cimport JobStep, JobSteps +from pyslurm.core.db.job cimport Job from pyslurm.core.common cimport cstr diff --git a/pyslurm/core/db/stats.pyx b/pyslurm/core/db/stats.pyx index 4cebe21c..92bd3b9a 100644 --- a/pyslurm/core/db/stats.pyx +++ b/pyslurm/core/db/stats.pyx @@ -133,3 +133,67 @@ cdef class JobStats: wrap.system_cpu_time = step.ptr.sys_cpu_sec return wrap + + @staticmethod + def _sum_step_stats_for_job(Job job, JobSteps steps): + cdef: + JobStats job_stats = job.stats + JobStats step_stats = None + + for step in steps.values(): + step_stats = step.stats + + job_stats.consumed_energy += step_stats.consumed_energy + job_stats.average_cpu_time += step_stats.average_cpu_time + job_stats.average_cpu_frequency += step_stats.average_cpu_frequency + job_stats.cpu_time += step_stats.cpu_time + job_stats.average_disk_read += step_stats.average_disk_read + job_stats.average_disk_write += step_stats.average_disk_write + job_stats.average_pages += step_stats.average_pages + + if step_stats.max_disk_read >= job_stats.max_disk_read: + job_stats.max_disk_read = step_stats.max_disk_read + job_stats.max_disk_read_node = step_stats.max_disk_read_node + job_stats.max_disk_read_task = step_stats.max_disk_read_task + + if step_stats.max_disk_write >= job_stats.max_disk_write: + job_stats.max_disk_write = step_stats.max_disk_write + job_stats.max_disk_write_node = step_stats.max_disk_write_node + job_stats.max_disk_write_task = step_stats.max_disk_write_task + + if step_stats.max_pages >= job_stats.max_pages: + job_stats.max_pages = step_stats.max_pages + job_stats.max_pages_node = step_stats.max_pages_node + job_stats.max_pages_task = step_stats.max_pages_task + + if step_stats.max_rss >= job_stats.max_rss: + job_stats.max_rss = step_stats.max_rss + job_stats.max_rss_node = step_stats.max_rss_node + job_stats.max_rss_task = step_stats.max_rss_task + job_stats.average_rss = job_stats.max_rss + + if step_stats.max_vmsize >= job_stats.max_vmsize: + job_stats.max_vmsize = step_stats.max_vmsize + job_stats.max_vmsize_node = step_stats.max_vmsize_node + job_stats.max_vmsize_task = step_stats.max_vmsize_task + job_stats.average_vmsize = job_stats.max_vmsize + + if step_stats.min_cpu_time >= job_stats.min_cpu_time: + job_stats.min_cpu_time = step_stats.min_cpu_time + job_stats.min_cpu_time_node = step_stats.min_cpu_time_node + job_stats.min_cpu_time_task = step_stats.min_cpu_time_task + + if job.ptr.tot_cpu_sec != slurm.NO_VAL64: + job_stats.total_cpu_time = job.ptr.tot_cpu_sec + + if job.ptr.user_cpu_sec != slurm.NO_VAL64: + job_stats.user_cpu_time = job.ptr.user_cpu_sec + + if job.ptr.sys_cpu_sec != slurm.NO_VAL64: + job_stats.system_cpu_time = job.ptr.sys_cpu_sec + + elapsed = job.elapsed_time if job.elapsed_time else 0 + cpus = job.cpus if job.cpus else 0 + job_stats.cpu_time = elapsed * cpus + job_stats.average_cpu_frequency /= len(steps) + diff --git a/pyslurm/core/db/step.pxd b/pyslurm/core/db/step.pxd index 2bbe7f01..c0cf876d 100644 --- a/pyslurm/core/db/step.pxd +++ b/pyslurm/core/db/step.pxd @@ -45,6 +45,50 @@ cdef class JobSteps(dict): cdef class JobStep: + """A Slurm Database Job-step. + + Attributes: + num_nodes (int): + Amount of nodes this Step has allocated + cpus (int): + Amount of CPUs the Step has/had allocated + memory (int): + Amount of memory the Step requested + container (str): + Path to OCI Container bundle + elapsed_time (int): + Amount of seconds elapsed for the Step + end_time (int): + When the Step ended, as a unix timestamp + eligible_time (int): + When the Step became eligible to run, as a unix timestamp + start_time (int): + Time when the Step started, as a unix timestamp + exit_code (int): + Exit code of the step + ntasks (int): + Number of tasks the Step uses + cpu_frequency_min (str): + Minimum CPU-Frequency requested for the Step + cpu_frequency_max (str): + Maximum CPU-Frequency requested for the Step + cpu_frequency_governor (str): + CPU-Frequency Governor requested for the Step + nodelist (str): + Nodes this Step is using + id (Union[str, int]): + ID of the Step + job_id (int): + ID of the Job this Step is a part of + state (str): + State of the Step + cancelled_by (str): + Name of the User who cancelled this Step + submit_command (str): + Full command issued to start the Step + suspended_time (int): + Amount of seconds the Step was suspended + """ cdef slurmdb_step_rec_t *ptr cdef public JobStats stats diff --git a/pyslurm/core/db/step.pyx b/pyslurm/core/db/step.pyx index 990f737e..5a55cc7f 100644 --- a/pyslurm/core/db/step.pyx +++ b/pyslurm/core/db/step.pyx @@ -38,6 +38,9 @@ cdef class JobStep: def __cinit__(self): self.ptr = NULL + def __init__(self): + raise RuntimeError("You can not instantiate this class directly") + def __dealloc__(self): slurmdb_destroy_step_rec(self.ptr) self.ptr = NULL @@ -179,7 +182,6 @@ cdef class JobStep: @property def state(self): - """str: State this Job step is in.""" return cstr.to_unicode(slurm_job_state_string(self.ptr.state)) @property @@ -187,10 +189,9 @@ cdef class JobStep: return uid_to_name(self.ptr.requid) @property - def submit_line(self): + def submit_command(self): return cstr.to_unicode(self.ptr.submit_line) @property def suspended_time(self): - # seconds return _raw_time(self.ptr.elapsed) diff --git a/pyslurm/core/db/tres.pxd b/pyslurm/core/db/tres.pxd index 260f8477..82fb43ee 100644 --- a/pyslurm/core/db/tres.pxd +++ b/pyslurm/core/db/tres.pxd @@ -26,6 +26,7 @@ from pyslurm.core.common cimport cstr from libc.stdint cimport uint64_t from pyslurm.slurm cimport ( slurmdb_tres_rec_t, + slurmdb_destroy_tres_rec, slurmdb_find_tres_count_in_string, try_xmalloc, ) diff --git a/pyslurm/core/db/tres.pyx b/pyslurm/core/db/tres.pyx index a827cddc..785d6dd7 100644 --- a/pyslurm/core/db/tres.pyx +++ b/pyslurm/core/db/tres.pyx @@ -55,7 +55,7 @@ cdef class TrackableResources(dict): cdef uint64_t tmp tmp = slurmdb_find_tres_count_in_string(tres_str, typ) - if tmp == slurm.NO_VAL64: + if tmp == slurm.INFINITE64 or tmp == slurm.NO_VAL64: return 0 else: return tmp @@ -67,15 +67,23 @@ cdef class TrackableResource: self.ptr = NULL def __init__(self, tres_id): - self._alloc() + self._alloc_impl() self.ptr.id = tres_id - def _alloc(self): + def __dealloc__(self): + self._dealloc_impl() + + def _alloc_impl(self): if not self.ptr: - self.ptr = try_xmalloc(sizeof(slurmdb_tres_rec_t)) + self.ptr = try_xmalloc( + sizeof(slurmdb_tres_rec_t)) if not self.ptr: raise MemoryError("xmalloc failed for slurmdb_tres_rec_t") + def _dealloc_impl(self): + slurmdb_destroy_tres_rec(self.ptr) + self.ptr = NULL + @staticmethod cdef TrackableResource from_ptr(slurmdb_tres_rec_t *in_ptr): cdef TrackableResource wrap = TrackableResource.__new__(TrackableResource) diff --git a/pyslurm/core/db/util.pyx b/pyslurm/core/db/util.pyx index 7601f2b2..1a30cb82 100644 --- a/pyslurm/core/db/util.pyx +++ b/pyslurm/core/db/util.pyx @@ -1,7 +1,7 @@ ######################################################################### # util.pxd - pyslurm slurmdbd util functions ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/pyslurm/core/node.pyx b/pyslurm/core/node.pyx index 46c19fbe..2c1a51bc 100644 --- a/pyslurm/core/node.pyx +++ b/pyslurm/core/node.pyx @@ -365,7 +365,7 @@ cdef class Node: Implements the slurm_update_node RPC. Args: - node (JobStep): + node (pyslurm.Node): Another Node object which contains all the changes that should be applied to this instance. **kwargs: diff --git a/pyslurm/slurm/extra.pxi b/pyslurm/slurm/extra.pxi index 88b8e89a..9c144ba4 100644 --- a/pyslurm/slurm/extra.pxi +++ b/pyslurm/slurm/extra.pxi @@ -212,6 +212,7 @@ cdef extern void slurm_env_array_free(char **env_array) # cdef extern char *slurm_preempt_mode_string (uint16_t preempt_mode) +cdef extern uint16_t slurm_preempt_mode_num (const char *preempt_mode) cdef extern char *slurm_node_state_string (uint32_t inx) cdef extern char *slurm_step_layout_type_name (task_dist_states_t task_dist) cdef extern char *slurm_reservation_flags_string (reserve_info_t *resv_ptr) From 48d98d5df4ffe7515eeeb7c2ecee7bc3a1c632e5 Mon Sep 17 00:00:00 2001 From: Toni Harzendorf Date: Thu, 27 Apr 2023 18:54:19 +0200 Subject: [PATCH 20/28] wip --- pyslurm/core/common/cstr.pxd | 13 +- pyslurm/core/common/cstr.pyx | 94 +++++---- pyslurm/core/common/uint.pxd | 4 +- pyslurm/core/common/uint.pyx | 4 +- pyslurm/core/db/connection.pxd | 1 + pyslurm/core/db/connection.pyx | 11 + pyslurm/core/db/job.pxd | 4 +- pyslurm/core/db/job.pyx | 29 +-- pyslurm/core/db/qos.pyx | 29 ++- pyslurm/core/db/stats.pxd | 116 ++++++++--- pyslurm/core/db/stats.pyx | 93 ++++----- pyslurm/core/db/step.pxd | 2 + pyslurm/core/db/step.pyx | 35 +--- pyslurm/core/job/job.pxd | 7 +- pyslurm/core/job/job.pyx | 145 +++++-------- pyslurm/core/job/step.pxd | 2 +- pyslurm/core/job/step.pyx | 126 ++++++------ pyslurm/core/job/submission.pyx | 39 ++-- pyslurm/core/job/task_dist.pxd | 39 ++++ pyslurm/core/job/task_dist.pyx | 350 +++++++++++++++++++++++++++++++ pyslurm/core/job/util.pyx | 351 +++++--------------------------- pyslurm/core/node.pxd | 4 +- pyslurm/core/node.pyx | 55 ++--- 23 files changed, 853 insertions(+), 700 deletions(-) create mode 100644 pyslurm/core/job/task_dist.pxd create mode 100644 pyslurm/core/job/task_dist.pyx diff --git a/pyslurm/core/common/cstr.pxd b/pyslurm/core/common/cstr.pxd index 0c6b96c3..ca20d4a9 100644 --- a/pyslurm/core/common/cstr.pxd +++ b/pyslurm/core/common/cstr.pxd @@ -1,7 +1,7 @@ ######################################################################### # common/cstr.pxd - slurm string functions ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from pyslurm cimport slurm @@ -29,10 +29,9 @@ cdef to_unicode(char *s, default=*) cdef fmalloc(char **old, val) cdef fmalloc2(char **p1, char **p2, val) cdef free_array(char **arr, count) -cdef list to_list(char *str_list) +cpdef list to_list(char *str_list) cdef from_list(char **old, vals, delim=*) cdef from_list2(char **p1, char **p2, vals, delim=*) -cdef dict to_dict(char *str_dict, str delim1=*, str delim2=*) -cdef dict from_dict(char **old, vals, prepend=*, str delim1=*, str delim2=*) -cdef to_gres_dict(char *gres) -cdef from_gres_dict(vals, typ=*) +cpdef dict to_dict(char *str_dict, str delim1=*, str delim2=*) +cdef from_dict(char **old, vals, prepend=*, str delim1=*, str delim2=*) +cpdef dict to_gres_dict(char *gres) diff --git a/pyslurm/core/common/cstr.pyx b/pyslurm/core/common/cstr.pyx index 8ea08186..7f6fae60 100644 --- a/pyslurm/core/common/cstr.pyx +++ b/pyslurm/core/common/cstr.pyx @@ -1,7 +1,7 @@ ######################################################################### # common/cstr.pyx - pyslurm string functions ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 import re @@ -40,7 +40,7 @@ cdef char *from_unicode(s): return _s -cdef inline to_unicode(char *_str, default=None): +cdef to_unicode(char *_str, default=None): """Convert a char* to Python3 str (unicode)""" if _str and _str[0] != NULL_BYTE: if _str == NONE_BYTE: @@ -105,7 +105,7 @@ cdef fmalloc(char **old, val): old[0] = NULL -cdef list to_list(char *str_list): +cpdef list to_list(char *str_list): """Convert C-String to a list.""" cdef str ret = to_unicode(str_list) @@ -115,14 +115,18 @@ cdef list to_list(char *str_list): return ret.split(",") -cdef from_list(char **old, vals, delim=","): +def list_to_str(vals, delim=","): """Convert list to a C-String.""" cdef object final = vals if vals and not isinstance(vals, str): final = delim.join(vals) - fmalloc(old, final) + return final + + +cdef from_list(char **old, vals, delim=","): + fmalloc(old, list_to_str(vals, delim)) cdef from_list2(char **p1, char **p2, vals, delim=","): @@ -130,7 +134,7 @@ cdef from_list2(char **p1, char **p2, vals, delim=","): from_list(p2, vals, delim) -cdef dict to_dict(char *str_dict, str delim1=",", str delim2="="): +cpdef dict to_dict(char *str_dict, str delim1=",", str delim2="="): """Convert a char* key=value pair to dict. With a char* Slurm represents key-values pairs usually in the form of: @@ -142,7 +146,7 @@ cdef dict to_dict(char *str_dict, str delim1=",", str delim2="="): str key, val dict out = {} - if not _str_dict: + if not _str_dict or delim1 not in _str_dict: return out for kv in _str_dict.split(delim1): @@ -153,7 +157,23 @@ cdef dict to_dict(char *str_dict, str delim1=",", str delim2="="): return out -cdef dict from_dict(char **old, vals, prepend=None, str delim1=",", str delim2="="): +def validate_str_key_value_format(val, delim1=",", delim2="="): + cdef dict out = {} + + for kv in val.split(delim1): + if delim2 in kv: + k, v = kv.split(delim2) + out[k] = v + else: + raise ValueError( + f"Invalid format for key-value pair {kv}. " + f"Expected {delim2} as seperator." + ) + + return out + + +def dict_to_str(vals, prepend=None, delim1=",", delim2="="): """Convert a dict (or str) to Slurm Key-Value pair. Slurm predominantly uses a format of: @@ -165,22 +185,16 @@ cdef dict from_dict(char **old, vals, prepend=None, str delim1=",", str delim2=" format of this string will the be validated. """ cdef: - out = {} if not vals else vals + tmp_dict = {} if not vals else vals list tmp = [] - if vals and isinstance(vals, str): - out = {} - for kv in vals.split(delim1): - if delim2 in kv: - k, v = kv.split(delim2) - out[k] = v - else: - raise ValueError( - f"Invalid format for key-value pair {kv}. " - f"Expected {delim2} as seperator." - ) + if not vals: + return None + + if isinstance(vals, str): + tmp_dict = validate_str_key_value_format(vals, delim1, delim2) - for k, v in out.items(): + for k, v in tmp_dict.items(): if ((delim1 in k or delim2 in k) or delim1 in v or delim2 in v): raise ValueError( @@ -190,12 +204,15 @@ cdef dict from_dict(char **old, vals, prepend=None, str delim1=",", str delim2=" tmp.append(f"{'' if not prepend else prepend}{k}{delim2}{v}") - fmalloc(old, delim1.join(tmp)) + return delim1.join(tmp) - return out + +cdef from_dict(char **old, vals, prepend=None, + str delim1=",", str delim2="="): + fmalloc(old, dict_to_str(vals, prepend, delim1, delim2)) -cdef to_gres_dict(char *gres): +cpdef dict to_gres_dict(char *gres): """Parse a GRES string.""" cdef: dict output = {} @@ -242,33 +259,32 @@ cdef to_gres_dict(char *gres): return output -cdef from_gres_dict(vals, typ=""): +def from_gres_dict(vals, typ=""): final = [] - gres_dict = vals + gres_dict = {} if not vals else vals if not vals: return None if isinstance(vals, str) and not vals.isdigit(): gres_dict = {} - gres_list = vals.replace("gres:", "") for gres_str in gres_list.split(","): gres_and_type, cnt = gres_str.rsplit(":", 1) gres_dict.update({gres_and_type: int(cnt)}) - elif isinstance(vals, dict): - for gres_and_type, cnt in gres_dict.items(): - # Error immediately on specifications that contain more than one - # semicolon, as it is wrong. - if len(gres_and_type.split(":")) > 2: - raise ValueError(f"Invalid specifier: '{gres_and_type}'") + elif not isinstance(vals, dict): + return f"gres:{typ}:{int(vals)}" - if typ not in gres_and_type: - gres_and_type = f"{gres_and_type}:{typ}" + for gres_and_type, cnt in gres_dict.items(): + # Error immediately on specifications that contain more than one + # semicolon, as it is wrong. + if len(gres_and_type.split(":")) > 2: + raise ValueError(f"Invalid specifier: '{gres_and_type}'") - final.append(f"gres:{gres_and_type}:{int(cnt)}") - else: - return f"gres:{typ}:{int(vals)}" + if typ not in gres_and_type: + gres_and_type = f"{gres_and_type}:{typ}" + + final.append(f"gres:{gres_and_type}:{int(cnt)}") return ",".join(final) diff --git a/pyslurm/core/common/uint.pxd b/pyslurm/core/common/uint.pxd index aa9f8dd2..a18b5a72 100644 --- a/pyslurm/core/common/uint.pxd +++ b/pyslurm/core/common/uint.pxd @@ -31,8 +31,8 @@ cpdef u8_parse(uint8_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) cpdef u16_parse(uint16_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) cpdef u32_parse(uint32_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) cpdef u64_parse(uint64_t val, on_inf=*, on_noval=*, noval=*, zero_is_noval=*) -cdef u8_bool(val) -cdef u16_bool(val) +cpdef u8_bool(val) +cpdef u16_bool(val) cdef u8_parse_bool(uint8_t val) cdef u16_parse_bool(uint16_t val) cdef u64_parse_bool_flag(uint64_t flags, flag) diff --git a/pyslurm/core/common/uint.pyx b/pyslurm/core/common/uint.pyx index 8c1146df..b5ff3e1c 100644 --- a/pyslurm/core/common/uint.pyx +++ b/pyslurm/core/common/uint.pyx @@ -113,7 +113,7 @@ cpdef u64_parse(uint64_t val, on_inf="unlimited", on_noval=None, noval=slurm.NO_ return val -cdef u8_bool(val): +cpdef u8_bool(val): if val is None: return slurm.NO_VAL8 elif val: @@ -122,7 +122,7 @@ cdef u8_bool(val): return 0 -cdef u16_bool(val): +cpdef u16_bool(val): if val is None: return slurm.NO_VAL16 elif val: diff --git a/pyslurm/core/db/connection.pxd b/pyslurm/core/db/connection.pxd index 635bdb14..1a42f31f 100644 --- a/pyslurm/core/db/connection.pxd +++ b/pyslurm/core/db/connection.pxd @@ -26,6 +26,7 @@ from libc.stdint cimport uint16_t from pyslurm.slurm cimport ( slurmdb_connection_get, slurmdb_connection_close, + slurmdb_connection_commit, ) diff --git a/pyslurm/core/db/connection.pyx b/pyslurm/core/db/connection.pyx index 70785c00..d600921c 100644 --- a/pyslurm/core/db/connection.pyx +++ b/pyslurm/core/db/connection.pyx @@ -24,6 +24,7 @@ from pyslurm.core.error import RPCError cdef class Connection: + def __cinit__(self): self.ptr = NULL self.flags = 0 @@ -58,6 +59,16 @@ cdef class Connection: slurmdb_connection_close(&self.ptr) self.ptr = NULL + def commit(self): + """Commit recent changes.""" + if slurmdb_connection_commit(self.ptr, 1) == slurm.SLURM_ERROR: + raise RPCError("Failed to commit database changes.") + + def rollback(self): + """Rollback recent changes.""" + if slurmdb_connection_commit(self.ptr, 0) == slurm.SLURM_ERROR: + raise RPCError("Failed to rollback database changes.") + @property def is_open(self): if self.ptr: diff --git a/pyslurm/core/db/job.pxd b/pyslurm/core/db/job.pxd index 74640d81..996f646d 100644 --- a/pyslurm/core/db/job.pxd +++ b/pyslurm/core/db/job.pxd @@ -160,9 +160,9 @@ cdef class Job: Attributes: steps (pyslurm.db.JobSteps): - Steps this Job has. + Steps this Job has stats (pyslurm.db.JobStats): - General Statistics of this Job + Utilization statistics of this Job account (str): Account of the Job. admin_comment (str): diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index bf6583b6..683b61ac 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -286,33 +286,26 @@ cdef class Job: wrap.stats = JobStats() return wrap - def reload(self): - """(Re)load the information for this Database Job. + @staticmethod + def load(job_id): + """Load the information for a specific Job from the Database. - Note: - You can call this function repeatedly to refresh the information - of an instance. Using the object returned is optional. + Args: + job_id (int): + ID of the Job to be loaded. Returns: - (pyslurm.db.Job): Returns the current Job-instance itself + (pyslurm.db.Job): Returns a new Job instance Raises: RPCError: If requesting the information for the database Job was not sucessful. """ - cdef Job job - jobs = Jobs.load(ids=[self.id]) - if not jobs or self.id not in jobs: - raise RPCError(msg=f"Job {self.id} does not exist") - - job = jobs[self.id] - self._dealloc_impl() - self.ptr = job.ptr - self.steps = job.steps - self.stats = job.stats - job.ptr = NULL + jobs = Jobs.load(ids=[int(job_id)]) + if not jobs or job_idid not in jobs: + raise RPCError(msg=f"Job {job_id} does not exist") - return self + return jobs[job_id] def _create_steps(self): cdef: diff --git a/pyslurm/core/db/qos.pyx b/pyslurm/core/db/qos.pyx index f7cbc1a8..9d6f7690 100644 --- a/pyslurm/core/db/qos.pyx +++ b/pyslurm/core/db/qos.pyx @@ -141,32 +141,27 @@ cdef class QualityOfService: wrap.ptr = in_ptr return wrap - def reload(self): - """(Re)load the information for this Quality of Service. + @staticmethod + def load(name): + """Load the information for a specific Quality of Service. - Note: - You can call this function repeatedly to refresh the information - of an instance. Using the object returned is optional. + Args: + name (str): + Name of the Quality of Service to be loaded. Returns: - (pyslurm.db.QualityOfService): Returns the current - QualityOfService-instance itself. + (pyslurm.db.QualityOfService): Returns a new QualityOfService + instance. Raises: RPCError: If requesting the information from the database was not sucessful. """ - cdef QualityOfService qos - qos_data = QualitiesOfService.load(names=[self.name]) - if not qos_data or self.name not in qos_data: - raise RPCError(msg=f"QualityOfService {self.name} does not exist") - - qos = qos_data[self.name] - self._dealloc_impl() - self.ptr = qos.ptr - qos.ptr = NULL + qos_data = QualitiesOfService.load(names=[name]) + if not qos_data or name not in qos_data: + raise RPCError(msg=f"QualityOfService {name} does not exist") - return self + return qos_data[name] @property def name(self): diff --git a/pyslurm/core/db/stats.pxd b/pyslurm/core/db/stats.pxd index 11f08342..52d43627 100644 --- a/pyslurm/core/db/stats.pxd +++ b/pyslurm/core/db/stats.pxd @@ -1,7 +1,7 @@ ######################################################################### # stats.pxd - pyslurm slurmdbd job stats ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -33,49 +33,107 @@ from pyslurm.core.common cimport cstr cdef class JobStats: + """Statistics for a Slurm Job or Step. + + Note: + For more information also see the sacct manpage. + + Attributes: + consumed_energy (int): + Total amount of energy consumed, in joules + elapsed_cpu_time (int): + Total amount of time used(Elapsed time * cpu count) in seconds. + This is not the real CPU-Efficiency, but rather the total amount + of cpu-time the CPUs were occupied for + avg_cpu_time (int): + Average CPU-Time (System + User) in seconds of all tasks + avg_cpu_frequency (int): + Average weighted CPU-Frequency of all tasks, in Kilohertz + avg_disk_read (int): + Average number of bytes read by all tasks + avg_disk_write (int): + Average number of bytes written by all tasks + avg_page_faults (int): + Average number of page faults by all tasks + avg_resident_memory (int): + Average Resident Set Size (RSS) in bytes of all tasks + avg_virtual_memory (int): + Average Virtual Memory Size (VSZ) in bytes of all tasks + max_disk_read (int): + Highest peak number of bytes read by all tasks + max_disk_read_node (int): + Name of the Node where max_disk_read occured + max_disk_read_task (int): + ID of the Task where max_disk_read occured + max_disk_write (int): + Lowest peak number of bytes written by all tasks + max_disk_write_node (int): + Name of the Node where max_disk_write occured + max_disk_write_task (int): + ID of the Task where max_disk_write occured + max_page_faults (int): + Highest peak number of page faults by all tasks + max_page_faults_node (int): + Name of the Node where max_page_faults occured + max_page_faults_task (int): + ID of the Task where max_page_faults occured + max_resident_memory (int): + Highest peak Resident Set Size (RSS) in bytes by all tasks + max_resident_memory_node (int): + Name of the Node where max_resident_memory occured + max_resident_memory_task (int): + ID of the Task where max_resident_memory occured + max_virtual_memory (int): + Highest peak Virtual Memory Size (VSZ) in bytes by all tasks + max_virtual_memory_node (int): + Name of the Node where max_virtual_memory occured + max_virtual_memory_task (int): + ID of the Task where max_virtual_memory occured + min_cpu_time (int): + Lowest peak CPU-Time (System + User) in seconds of all tasks + min_cpu_time_node (int): + Name of the Node where min_cpu_time occured + min_cpu_time_task (int): + ID of the Task where min_cpu_time occured + total_cpu_time (int): + Sum of user_cpu_time and system_cpu_time, in seconds + user_cpu_time (int): + Amount of Time spent in user space, in seconds + system_cpu_time (int): + Amount of Time spent in kernel space, in seconds + """ cdef slurmdb_job_rec_t *job cdef public: consumed_energy - average_cpu_time - average_cpu_frequency - # Elapsed * alloc_cpus - # This is the time the Job has been using the allocated CPUs for. - # This is not the actual cpu-usage. - cpu_time - average_disk_read - average_disk_write - average_pages - average_rss - average_vmsize + elapsed_cpu_time + avg_cpu_time + avg_cpu_frequency + avg_disk_read + avg_disk_write + avg_page_faults + avg_resident_memory + avg_virtual_memory max_disk_read max_disk_read_node max_disk_read_task max_disk_write max_disk_write_node max_disk_write_task - max_pages - max_pages_node - max_pages_task - max_rss - max_rss_node - max_rss_task - max_vmsize - max_vmsize_node - max_vmsize_task + max_page_faults + max_page_faults_node + max_page_faults_task + max_resident_memory + max_resident_memory_node + max_resident_memory_task + max_virtual_memory + max_virtual_memory_node + max_virtual_memory_task min_cpu_time min_cpu_time_node min_cpu_time_task - # uint32_t tot_cpu_sec - # uint32_t tot_cpu_usec total_cpu_time - # Only available for Jobs from the Database, not sstat - # uint32_t user_cpu_sec - # uint32_t user_cpu_usec user_cpu_time - # Only available for Jobs from the Database, not sstat - # uint32_t sys_cpu_sec - # uint32_t sys_cpu_usec system_cpu_time @staticmethod diff --git a/pyslurm/core/db/stats.pyx b/pyslurm/core/db/stats.pyx index 92bd3b9a..46ceef2a 100644 --- a/pyslurm/core/db/stats.pyx +++ b/pyslurm/core/db/stats.pyx @@ -1,7 +1,7 @@ ######################################################################### # stats.pyx - pyslurm slurmdbd job stats ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -33,12 +33,12 @@ cdef class JobStats: self.max_disk_read_task = None self.max_disk_write_node = None self.max_disk_write_task = None - self.max_pages_node = None - self.max_pages_task = None - self.max_rss_node = None - self.max_rss_task = None - self.max_vmsize_node = None - self.max_vmsize_task = None + self.max_page_faults_node = None + self.max_page_faults_task = None + self.max_resident_memory_node = None + self.max_resident_memory_task = None + self.max_virtual_memory_node = None + self.max_virtual_memory_task = None self.min_cpu_time_node = None self.min_cpu_time_task = None @@ -60,25 +60,27 @@ cdef class JobStats: if ptr.consumed_energy != slurm.NO_VAL64: wrap.consumed_energy = ptr.consumed_energy - wrap.average_cpu_time = TrackableResources.find_count_in_str( + wrap.avg_cpu_time = TrackableResources.find_count_in_str( ptr.tres_usage_in_ave, slurm.TRES_CPU) / cpu_time_adj - # TODO - # wrap.cpu_time = elapsed * step_cpu_tres_rec + + elapsed = step.elapsed_time if step.elapsed_time else 0 + cpus = step.cpus if step.cpus else 0 + wrap.elapsed_cpu_time = elapsed * cpus ave_freq = int(ptr.act_cpufreq) if ave_freq != slurm.NO_VAL: - wrap.average_cpu_frequency = ptr.act_cpufreq + wrap.avg_cpu_frequency = ptr.act_cpufreq # Convert to MiB instead of raw bytes? - wrap.average_disk_read = TrackableResources.find_count_in_str( + wrap.avg_disk_read = TrackableResources.find_count_in_str( ptr.tres_usage_in_ave, slurm.TRES_FS_DISK) - wrap.average_disk_write = TrackableResources.find_count_in_str( + wrap.avg_disk_write = TrackableResources.find_count_in_str( ptr.tres_usage_out_ave, slurm.TRES_FS_DISK) - wrap.average_pages = TrackableResources.find_count_in_str( + wrap.avg_page_faults = TrackableResources.find_count_in_str( ptr.tres_usage_in_ave, slurm.TRES_PAGES) - wrap.average_rss = TrackableResources.find_count_in_str( + wrap.avg_resident_memory = TrackableResources.find_count_in_str( ptr.tres_usage_in_ave, slurm.TRES_MEM) - wrap.average_vmsize = TrackableResources.find_count_in_str( + wrap.avg_virtual_memory = TrackableResources.find_count_in_str( ptr.tres_usage_in_ave, slurm.TRES_VMEM) wrap.max_disk_read = TrackableResources.find_count_in_str( @@ -95,18 +97,18 @@ cdef class JobStats: wrap.max_disk_write_task = TrackableResources.find_count_in_str( ptr.tres_usage_out_max_taskid, slurm.TRES_FS_DISK) - wrap.max_rss = TrackableResources.find_count_in_str( + wrap.max_resident_memory = TrackableResources.find_count_in_str( ptr.tres_usage_in_max, slurm.TRES_MEM) - max_rss_nodeid = TrackableResources.find_count_in_str( + max_resident_memory_nodeid = TrackableResources.find_count_in_str( ptr.tres_usage_in_max_nodeid, slurm.TRES_MEM) - wrap.max_rss_task = TrackableResources.find_count_in_str( + wrap.max_resident_memory_task = TrackableResources.find_count_in_str( ptr.tres_usage_in_max_taskid, slurm.TRES_MEM) - wrap.max_vmsize = TrackableResources.find_count_in_str( + wrap.max_virtual_memory = TrackableResources.find_count_in_str( ptr.tres_usage_in_max, slurm.TRES_VMEM) - max_vmsize_nodeid = TrackableResources.find_count_in_str( + max_virtual_memory_nodeid = TrackableResources.find_count_in_str( ptr.tres_usage_in_max_nodeid, slurm.TRES_VMEM) - wrap.max_vmsize_task = TrackableResources.find_count_in_str( + wrap.max_virtual_memory_task = TrackableResources.find_count_in_str( ptr.tres_usage_in_max_taskid, slurm.TRES_VMEM) wrap.min_cpu_time = TrackableResources.find_count_in_str( @@ -122,8 +124,8 @@ cdef class JobStats: if nodes: wrap.max_disk_write_node = nodes[max_disk_write_nodeid] wrap.max_disk_read_node = nodes[max_disk_read_nodeid] - wrap.max_rss_node = nodes[max_rss_nodeid] - wrap.max_vmsize_node = nodes[max_vmsize_nodeid] + wrap.max_resident_memory_node = nodes[max_resident_memory_nodeid] + wrap.max_virtual_memory_node = nodes[max_virtual_memory_nodeid] wrap.min_cpu_time_node = nodes[min_cpu_time_nodeid] if step.ptr.user_cpu_sec != slurm.NO_VAL64: @@ -144,12 +146,11 @@ cdef class JobStats: step_stats = step.stats job_stats.consumed_energy += step_stats.consumed_energy - job_stats.average_cpu_time += step_stats.average_cpu_time - job_stats.average_cpu_frequency += step_stats.average_cpu_frequency - job_stats.cpu_time += step_stats.cpu_time - job_stats.average_disk_read += step_stats.average_disk_read - job_stats.average_disk_write += step_stats.average_disk_write - job_stats.average_pages += step_stats.average_pages + job_stats.avg_cpu_time += step_stats.avg_cpu_time + job_stats.avg_cpu_frequency += step_stats.avg_cpu_frequency + job_stats.avg_disk_read += step_stats.avg_disk_read + job_stats.avg_disk_write += step_stats.avg_disk_write + job_stats.avg_page_faults += step_stats.avg_page_faults if step_stats.max_disk_read >= job_stats.max_disk_read: job_stats.max_disk_read = step_stats.max_disk_read @@ -161,22 +162,22 @@ cdef class JobStats: job_stats.max_disk_write_node = step_stats.max_disk_write_node job_stats.max_disk_write_task = step_stats.max_disk_write_task - if step_stats.max_pages >= job_stats.max_pages: - job_stats.max_pages = step_stats.max_pages - job_stats.max_pages_node = step_stats.max_pages_node - job_stats.max_pages_task = step_stats.max_pages_task + if step_stats.max_page_faults >= job_stats.max_page_faults: + job_stats.max_page_faults = step_stats.max_page_faults + job_stats.max_page_faults_node = step_stats.max_page_faults_node + job_stats.max_page_faults_task = step_stats.max_page_faults_task - if step_stats.max_rss >= job_stats.max_rss: - job_stats.max_rss = step_stats.max_rss - job_stats.max_rss_node = step_stats.max_rss_node - job_stats.max_rss_task = step_stats.max_rss_task - job_stats.average_rss = job_stats.max_rss + if step_stats.max_resident_memory >= job_stats.max_resident_memory: + job_stats.max_resident_memory = step_stats.max_resident_memory + job_stats.max_resident_memory_node = step_stats.max_resident_memory_node + job_stats.max_resident_memory_task = step_stats.max_resident_memory_task + job_stats.avg_resident_memory = job_stats.max_resident_memory - if step_stats.max_vmsize >= job_stats.max_vmsize: - job_stats.max_vmsize = step_stats.max_vmsize - job_stats.max_vmsize_node = step_stats.max_vmsize_node - job_stats.max_vmsize_task = step_stats.max_vmsize_task - job_stats.average_vmsize = job_stats.max_vmsize + if step_stats.max_virtual_memory >= job_stats.max_virtual_memory: + job_stats.max_virtual_memory = step_stats.max_virtual_memory + job_stats.max_virtual_memory_node = step_stats.max_virtual_memory_node + job_stats.max_virtual_memory_task = step_stats.max_virtual_memory_task + job_stats.avg_virtual_memory = job_stats.max_virtual_memory if step_stats.min_cpu_time >= job_stats.min_cpu_time: job_stats.min_cpu_time = step_stats.min_cpu_time @@ -194,6 +195,6 @@ cdef class JobStats: elapsed = job.elapsed_time if job.elapsed_time else 0 cpus = job.cpus if job.cpus else 0 - job_stats.cpu_time = elapsed * cpus - job_stats.average_cpu_frequency /= len(steps) + job_stats.elapsed_cpu_time = elapsed * cpus + job_stats.avg_cpu_frequency /= len(steps) diff --git a/pyslurm/core/db/step.pxd b/pyslurm/core/db/step.pxd index c0cf876d..97ef9b50 100644 --- a/pyslurm/core/db/step.pxd +++ b/pyslurm/core/db/step.pxd @@ -48,6 +48,8 @@ cdef class JobStep: """A Slurm Database Job-step. Attributes: + stats (pyslurm.db.JobStats): + Utilization statistics for this Step num_nodes (int): Amount of nodes this Step has allocated cpus (int): diff --git a/pyslurm/core/db/step.pyx b/pyslurm/core/db/step.pyx index 5a55cc7f..6463ca8e 100644 --- a/pyslurm/core/db/step.pyx +++ b/pyslurm/core/db/step.pyx @@ -30,7 +30,8 @@ from pyslurm.core.common import ( uid_to_name, instance_to_dict, ) -from pyslurm.core.job.util import cpufreq_to_str +from pyslurm.core.job.util import cpu_freq_int_to_str +from pyslurm.core.job.step import humanize_step_id cdef class JobStep: @@ -57,30 +58,6 @@ cdef class JobStep: out["stats"] = self.stats.as_dict() return out - def _xlate_from_id(self, sid): - if sid == slurm.SLURM_BATCH_SCRIPT: - return "batch" - elif sid == slurm.SLURM_EXTERN_CONT: - return "extern" - elif sid == slurm.SLURM_INTERACTIVE_STEP: - return "interactive" - elif sid == slurm.SLURM_PENDING_STEP: - return "pending" - else: - return sid - - def _xlate_to_id(self, sid): - if sid == "batch": - return slurm.SLURM_BATCH_SCRIPT - elif sid == "extern": - return slurm.SLURM_EXTERN_CONT - elif sid == "interactive": - return slurm.SLURM_INTERACTIVE_STEP - elif sid == "pending": - return slurm.SLURM_PENDING_STEP - else: - return int(sid) - @property def num_nodes(self): nnodes = u32_parse(self.ptr.nnodes) @@ -149,15 +126,15 @@ cdef class JobStep: @property def cpu_frequency_min(self): - return cpufreq_to_str(self.ptr.req_cpufreq_min) + return cpu_freq_int_to_str(self.ptr.req_cpufreq_min) @property def cpu_frequency_max(self): - return cpufreq_to_str(self.ptr.req_cpufreq_max) + return cpu_freq_int_to_str(self.ptr.req_cpufreq_max) @property def cpu_frequency_governor(self): - return cpufreq_to_str(self.ptr.req_cpufreq_gov) + return cpu_freq_int_to_str(self.ptr.req_cpufreq_gov) @property def nodelist(self): @@ -165,7 +142,7 @@ cdef class JobStep: @property def id(self): - return self._xlate_from_id(self.ptr.step_id.step_id) + return humanize_step_id(self.ptr.step_id.step_id) @property def job_id(self): diff --git a/pyslurm/core/job/job.pxd b/pyslurm/core/job/job.pxd index 1be664ec..bcb3218f 100644 --- a/pyslurm/core/job/job.pxd +++ b/pyslurm/core/job/job.pxd @@ -1,7 +1,7 @@ ######################################################################### # job.pyx - interface to retrieve slurm job informations ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from pyslurm.core.common cimport cstr, ctime @@ -69,7 +69,7 @@ cdef class Jobs(dict): """A collection of Job objects. Args: - nodes (Union[list, dict], optional): + jobs (Union[list, dict], optional): Jobs to initialize this collection with. freeze (bool, optional): Control whether this collection is "frozen" when reloading Job @@ -375,7 +375,6 @@ cdef class Job: cdef public JobSteps steps - cdef alloc(self) cdef _calc_run_time(self) @staticmethod diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx index 3bd33a65..4c8b1188 100644 --- a/pyslurm/core/job/job.pyx +++ b/pyslurm/core/job/job.pyx @@ -1,7 +1,7 @@ ######################################################################### # job.pyx - interface to retrieve slurm job informations ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -205,36 +205,44 @@ cdef class Job: self.ptr = NULL def __init__(self, job_id): - self.alloc() + self._alloc_impl() self.ptr.job_id = job_id self.passwd = {} self.groups = {} self.steps = JobSteps.__new__(JobSteps) - cdef alloc(self): - self.ptr = try_xmalloc(sizeof(slurm_job_info_t)) + def _alloc_impl(self): if not self.ptr: - raise MemoryError("xmalloc failed for job_info_t") + self.ptr = try_xmalloc(sizeof(slurm_job_info_t)) + if not self.ptr: + raise MemoryError("xmalloc failed for job_info_t") - def __dealloc__(self): + def _dealloc_impl(self): slurm_free_job_info(self.ptr) self.ptr = NULL + def __dealloc__(self): + self._dealloc_impl() + def __eq__(self, other): return isinstance(other, Job) and self.id == other.id - def reload(self): - """(Re)load information for a job. + @staticmethod + def load(job_id): + """Load information for a specific Job. Implements the slurm_load_job RPC. Note: - You can call this function repeatedly to refresh the information - of an instance. Using the Job object returned is optional. + If the Job is not pending, the related Job steps will also be + loaded. + + Args: + job_id (int): + An Integer representing a Job-ID. Returns: - (Job): This function returns the current Job-instance object - itself. + (pyslurm.Job): Returns a new Job instance Raises: RPCError: If requesting the Job information from the slurmctld was @@ -242,45 +250,42 @@ cdef class Job: MemoryError: If malloc failed to allocate memory. Examples: - >>> from pyslurm import Job - >>> job = Job(9999) - >>> job.reload() - >>> - >>> # You can also write this in one-line: - >>> job = Job(9999).reload() + >>> import pyslurm + >>> job = pyslurm.Job.load(9999) """ cdef: job_info_msg_t *info = NULL + Job wrap = Job.__new__(Job) try: - verify_rpc(slurm_load_job(&info, self.id, slurm.SHOW_DETAIL)) + verify_rpc(slurm_load_job(&info, job_id, slurm.SHOW_DETAIL)) if info and info.record_count: - # Cleanup the old info - slurm_free_job_info(self.ptr) - - # Copy new info - self.alloc() - memcpy(self.ptr, &info.job_array[0], sizeof(slurm_job_info_t)) + # Copy info + wrap._alloc_impl() + memcpy(wrap.ptr, &info.job_array[0], sizeof(slurm_job_info_t)) info.record_count = 0 - # Just ignore if the steps couldn't be loaded here. - try: - if not slurm.IS_JOB_PENDING(self.ptr): - self.steps = JobSteps._load(self) - except RPCError: - pass + if not slurm.IS_JOB_PENDING(wrap.ptr): + # Just ignore if the steps couldn't be loaded here. + try: + wrap.steps = JobSteps._load(wrap) + except RPCError: + pass + else: + raise RPCError(msg=f"RPC was successful but got no job data, " + "this should never happen") except Exception as e: raise e finally: slurm_free_job_info_msg(info) - return self + return wrap @staticmethod cdef Job from_ptr(slurm_job_info_t *in_ptr): cdef Job wrap = Job.__new__(Job) - wrap.alloc() + wrap._alloc_impl() wrap.passwd = {} wrap.groups = {} wrap.steps = JobSteps.__new__(JobSteps) @@ -666,42 +671,7 @@ cdef class Job: @property def dependencies(self): - dep = cstr.to_unicode(self.ptr.dependency, default=[]) - if not dep: - return None - - out = { - "after": [], - "afterany": [], - "afterburstbuffer": [], - "aftercorr": [], - "afternotok": [], - "afterok": [], - "singleton": False, - "satisfy": "all", - } - - delim = "," - if "?" in dep: - delim = "?" - out["satisfy"] = "any" - - for item in dep.split(delim): - if item == "singleton": - out["singleton"] = True - - dep_and_job = item.split(":", 1) - if len(dep_and_job) != 2: - continue - - dep_name, jobs = dep_and_job[0], dep_and_job[1].split(":") - if dep_name not in out: - continue - - for job in jobs: - out[dep_name].append(int(job) if job.isdigit() else job) - - return out + return dependency_str_to_dict(cstr.to_unicode(self.ptr.dependency)) @property def time_limit(self): @@ -845,7 +815,7 @@ cdef class Job: @property def cpus(self): - return u32_parse(self.ptr.num_cpus) + return u32_parse(self.ptr.num_cpus, on_noval=1) @property def cpus_per_task(self): @@ -1005,15 +975,15 @@ cdef class Job: @property def cpu_frequency_min(self): - return cpufreq_to_str(self.ptr.cpu_freq_min) + return cpu_freq_int_to_str(self.ptr.cpu_freq_min) @property def cpu_frequency_max(self): - return cpufreq_to_str(self.ptr.cpu_freq_max) + return cpu_freq_int_to_str(self.ptr.cpu_freq_max) @property def cpu_frequency_governor(self): - return cpufreq_to_str(self.ptr.cpu_freq_gov) + return cpu_freq_int_to_str(self.ptr.cpu_freq_gov) # @property # def tres_bindings(self): @@ -1037,7 +1007,7 @@ cdef class Job: @property def mail_types(self): - return get_mail_type(self.ptr.mail_type) + return mail_type_int_to_list(self.ptr.mail_type) @property def heterogeneous_id(self): @@ -1091,8 +1061,8 @@ cdef class Job: cdef time_t rtime cdef time_t etime - if slurm.IS_JOB_PENDING(self.ptr): - return None + if slurm.IS_JOB_PENDING(self.ptr) or not self.ptr.start_time: + return 0 elif slurm.IS_JOB_SUSPENDED(self.ptr): return self.pre_suspension_time else: @@ -1102,17 +1072,16 @@ cdef class Job: etime = self.ptr.end_time if self.ptr.suspend_time: - rtime = ctime.difftime( - etime, - self.ptr.suspend_time + self.ptr.pre_sus_time) + rtime = ctime.difftime(etime, self.ptr.suspend_time) + rtime += self.ptr.pre_sus_time else: rtime = ctime.difftime(etime, self.ptr.start_time) - return u64_parse(rtime) + return u64_parse(rtime, on_noval=0) @property def run_time(self): - return _raw_time(self._calc_run_time()) + return self._calc_run_time() @property def cores_reserved_for_system(self): @@ -1185,7 +1154,7 @@ cdef class Job: @property def profile_types(self): - return get_acctg_profile(self.ptr.profile) + return acctg_profile_int_to_list(self.ptr.profile) @property def gres_binding(self): @@ -1206,7 +1175,7 @@ cdef class Job: @property def power_options(self): - return get_power_type(self.ptr.power_flags) + return power_type_int_to_list(self.ptr.power_flags) @property def is_cronjob(self): @@ -1218,13 +1187,7 @@ cdef class Job: @property def cpu_time(self): - run_time = self.run_time - if run_time: - cpus = self.cpus - if cpus is not None: - return cpus * run_time - - return 0 + return self.cpus * self.run_time @property def pending_time(self): @@ -1242,7 +1205,7 @@ cdef class Job: This contains the following information: * cpus (int) * gres (dict) - * memory (str) - Humanized Memory str + * memory (int) Returns: (dict): Resource layout diff --git a/pyslurm/core/job/step.pxd b/pyslurm/core/job/step.pxd index 0482d0b2..ae0101c9 100644 --- a/pyslurm/core/job/step.pxd +++ b/pyslurm/core/job/step.pxd @@ -1,7 +1,7 @@ ######################################################################### # job/step.pxd - interface to retrieve slurm job step informations ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/pyslurm/core/job/step.pyx b/pyslurm/core/job/step.pyx index 834c999b..88d795e9 100644 --- a/pyslurm/core/job/step.pyx +++ b/pyslurm/core/job/step.pyx @@ -1,7 +1,7 @@ ######################################################################### # job/step.pyx - interface to retrieve slurm job step informations ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -32,10 +32,9 @@ from pyslurm.core.common import ( instance_to_dict, uid_to_name, ) -from pyslurm.core.job.util import ( - cpufreq_to_str, - get_task_dist, -) +from pyslurm.core.job.util import cpu_freq_int_to_str +from pyslurm.core.job.task_dist cimport TaskDistribution + from pyslurm.core.common.ctime import ( secs_to_timestr, mins_to_timestr, @@ -59,7 +58,7 @@ cdef class JobSteps(dict): @staticmethod def load(job): cdef Job _job - _job = job.reload() if isinstance(job, Job) else Job(job).reload() + _job = Job.load(job.id) if isinstance(job, Job) else Job.load(job) return JobSteps._load(_job) @staticmethod @@ -132,10 +131,10 @@ cdef class JobStep: self.ptr = NULL self.umsg = NULL - def __init__(self, job=0, step=0, **kwargs): + def __init__(self, job_id=0, step_id=0, **kwargs): self._alloc_impl() - self.job_id = job.id if isinstance(job, Job) else job - self.id = step + self.job_id = job_id.id if isinstance(job_id, Job) else job_id + self.id = step_id # Initialize attributes, if any were provided for k, v in kwargs.items(): @@ -179,58 +178,51 @@ cdef class JobStep: # Call descriptors __set__ directly JobStep.__dict__[name].__set__(self, val) - def reload(self): - """(Re)load information for a specific job step. + @staticmethod + def load(job_id, step_id): + """Load information for a specific job step. Implements the slurm_get_job_steps RPC. - Note: - You can call this function repeatedly to refresh the information - of an instance. Using the JobStep object returned is optional. + Args: + job_id (Union[Job, int]): + ID of the Job the Step belongs to. + step_id (Union[int, str]): + Step-ID for the Step to be loaded. + + Returns: + (pyslurm.JobStep): Returns a new JobStep instance Raises: RPCError: When retrieving Step information from the slurmctld was not successful. MemoryError: If malloc failed to allocate memory. - Returns: - (JobStep): This function returns the current JobStep-instance - object itself. - Examples: - >>> from pyslurm import JobStep - >>> jobstep = JobStep(9999, 1) - >>> jobstep.reload() - >>> - >>> # You can also write this in one-line: - >>> jobstep = JobStep(9999, 1).reload() + >>> import pyslurm + >>> jobstep = pyslurm.JobStep.load(9999, 1) """ cdef: job_step_info_response_msg_t *info = NULL - uint32_t save_jid = self.job_id - uint32_t save_sid = self.ptr.step_id.step_id + JobStep wrap = JobStep.__new__(JobStep) - rc = slurm_get_job_steps(0, save_jid, save_sid, + job_id = job_id.id if isinstance(job_id, Job) else job_id + rc = slurm_get_job_steps(0, job_id, dehumanize_step_id(step_id), &info, slurm.SHOW_ALL) verify_rpc(rc) - if info.job_step_count == 1: - # Cleanup the old info. - self._dealloc_impl() - + if info and info.job_step_count == 1: # Copy new info - self._alloc_impl() - memcpy(self.ptr, &info.job_steps[0], sizeof(job_step_info_t)) + wrap._alloc_impl() + memcpy(wrap.ptr, &info.job_steps[0], sizeof(job_step_info_t)) info.job_step_count = 0 slurm_free_job_step_info_response_msg(info) else: slurm_free_job_step_info_response_msg(info) - - sid = self._xlate_from_id(save_sid) - msg = f"Step {sid} of Job {save_jid} not found." + msg = f"Step {step_id} of Job {job_id} not found." raise RPCError(msg=msg) - return self + return wrap @staticmethod cdef JobStep from_ptr(job_step_info_t *in_ptr): @@ -322,29 +314,6 @@ cdef class JobStep: js.umsg.job_id = self.ptr.step_id.job_id verify_rpc(slurm_update_step(js.umsg)) - def _xlate_from_id(self, sid): - if sid == slurm.SLURM_BATCH_SCRIPT: - return "batch" - elif sid == slurm.SLURM_EXTERN_CONT: - return "extern" - elif sid == slurm.SLURM_INTERACTIVE_STEP: - return "interactive" - elif sid == slurm.SLURM_PENDING_STEP: - return "pending" - else: - return sid - - def _xlate_to_id(self, sid): - if sid == "batch": - return slurm.SLURM_BATCH_SCRIPT - elif sid == "extern": - return slurm.SLURM_EXTERN_CONT - elif sid == "interactive": - return slurm.SLURM_INTERACTIVE_STEP - elif sid == "pending": - return slurm.SLURM_PENDING_STEP - else: - return int(sid) def as_dict(self): """JobStep information formatted as a dictionary. @@ -356,11 +325,11 @@ cdef class JobStep: @property def id(self): - return self._xlate_from_id(self.ptr.step_id.step_id) + return humanize_step_id(self.ptr.step_id.step_id) @id.setter def id(self, val): - self.ptr.step_id.step_id = self._xlate_to_id(val) + self.ptr.step_id.step_id = dehumanize_step_id(val) @property def job_id(self): @@ -396,15 +365,15 @@ cdef class JobStep: @property def cpu_frequency_min(self): - return cpufreq_to_str(self.ptr.cpu_freq_min) + return cpu_freq_int_to_str(self.ptr.cpu_freq_min) @property def cpu_frequency_max(self): - return cpufreq_to_str(self.ptr.cpu_freq_max) + return cpu_freq_int_to_str(self.ptr.cpu_freq_max) @property def cpu_frequency_governor(self): - return cpufreq_to_str(self.ptr.cpu_freq_gov) + return cpu_freq_int_to_str(self.ptr.cpu_freq_gov) @property def reserved_ports(self): @@ -456,7 +425,7 @@ cdef class JobStep: @property def distribution(self): - return get_task_dist(self.ptr.task_dist) + return TaskDistribution.from_int(self.ptr.task_dist) @property def command(self): @@ -465,3 +434,28 @@ cdef class JobStep: @property def slurm_protocol_version(self): return u32_parse(self.ptr.start_protocol_ver) + + +def humanize_step_id(sid): + if sid == slurm.SLURM_BATCH_SCRIPT: + return "batch" + elif sid == slurm.SLURM_EXTERN_CONT: + return "extern" + elif sid == slurm.SLURM_INTERACTIVE_STEP: + return "interactive" + elif sid == slurm.SLURM_PENDING_STEP: + return "pending" + else: + return sid + +def dehumanize_step_id(sid): + if sid == "batch": + return slurm.SLURM_BATCH_SCRIPT + elif sid == "extern": + return slurm.SLURM_EXTERN_CONT + elif sid == "interactive": + return slurm.SLURM_INTERACTIVE_STEP + elif sid == "pending": + return slurm.SLURM_PENDING_STEP + else: + return int(sid) diff --git a/pyslurm/core/job/submission.pyx b/pyslurm/core/job/submission.pyx index c466dba7..d4a19743 100644 --- a/pyslurm/core/job/submission.pyx +++ b/pyslurm/core/job/submission.pyx @@ -1,7 +1,7 @@ ######################################################################### # submission.pyx - interface for submitting slurm jobs ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from os import getcwd @@ -27,6 +27,7 @@ import typing import shlex from pathlib import Path from pyslurm.core.common cimport cstr, ctime +from pyslurm.core.common import cstr from pyslurm.core.common.uint cimport * from pyslurm.core.common.uint import * from pyslurm.core.common.ctime cimport time_t @@ -41,6 +42,7 @@ from pyslurm.core.common.ctime import ( timestamp_to_date, date_to_timestamp, ) +from pyslurm.core.job.task_dist cimport TaskDistribution from pyslurm.core.common import ( humanize, @@ -227,10 +229,10 @@ cdef class JobSubmitDescription: ptr.requeue = u16_bool(self.is_requeueable) ptr.wait_all_nodes = u16_bool(self.wait_all_nodes) - ptr.mail_type = parse_mail_type(self.mail_types) - ptr.power_flags = parse_power_type(self.power_options) - ptr.profile = parse_acctg_profile(self.profile_types) - ptr.shared = parse_shared_type(self.resource_sharing) + ptr.mail_type = mail_type_list_to_int(self.mail_types) + ptr.power_flags = power_type_list_to_int(self.power_options) + ptr.profile = acctg_profile_list_to_int(self.profile_types) + ptr.shared = shared_type_str_to_int(self.resource_sharing) self._set_cpu_frequency() self._set_nodes() @@ -330,9 +332,9 @@ cdef class JobSubmitDescription: if freq_len == 3: freq["governor"] = freq_splitted[2] - freq_min = parse_cpufreq(freq.get("min")) - freq_max = parse_cpufreq(freq.get("max")) - freq_gov = parse_cpu_gov(freq.get("governor")) + freq_min = cpu_freq_str_to_int(freq.get("min")) + freq_max = cpu_freq_str_to_int(freq.get("max")) + freq_gov = cpu_gov_str_to_int(freq.get("governor")) if freq_min != u32(None): if freq_max == u32(None): @@ -571,12 +573,25 @@ cdef class JobSubmitDescription: self.ptr.env_size+=1 def _set_distribution(self): - dist, plane = parse_task_dist(self.distribution) + dist=plane = None + + if not self.distribution: + self.ptr.task_dist = slurm.SLURM_DIST_UNKNOWN + return None + + if isinstance(self.distribution, int): + # Assume the user meant to specify the plane size only. + plane = u16(self.distribution) + elif isinstance(self.distribution, str): + # Support sbatch style string input + dist = TaskDistribution.from_str(self.distribution) + plane = dist.plane if isinstance(dist.plane, int) else 0 + if plane: self.ptr.plane_size = plane self.ptr.task_dist = slurm.SLURM_DIST_PLANE - elif self.distribution is not None: - self.ptr.task_dist = dist + elif dist is not None: + self.ptr.task_dist = dist.as_int() def _set_gpu_binding(self): binding = self.gpu_binding diff --git a/pyslurm/core/job/task_dist.pxd b/pyslurm/core/job/task_dist.pxd new file mode 100644 index 00000000..27fa8626 --- /dev/null +++ b/pyslurm/core/job/task_dist.pxd @@ -0,0 +1,39 @@ +######################################################################### +# task_dist.pxd - job task distribution +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + +from pyslurm cimport slurm +from pyslurm.core.common.uint cimport u16 +from pyslurm.slurm cimport ( + task_dist_states_t, +) + + +cdef class TaskDistribution: + + cdef public: + str nodes + str sockets + str cores + plane + pack + + cdef task_dist_states_t state diff --git a/pyslurm/core/job/task_dist.pyx b/pyslurm/core/job/task_dist.pyx new file mode 100644 index 00000000..f1da4ae7 --- /dev/null +++ b/pyslurm/core/job/task_dist.pyx @@ -0,0 +1,350 @@ +######################################################################### +# task_dist.pyx - job task distribution +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# Pyslurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# Pyslurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# cython: c_string_type=unicode, c_string_encoding=default +# cython: language_level=3 + + +cdef class TaskDistribution: + + def __init__(self, nodes="block", sockets="cyclic", + cores=None, pack=None, plane_size=None): + self.nodes = nodes + self.sockets = sockets + self.cores = cores if cores else self.sockets + self.pack = pack + self.plane = plane_size + self.state = self._get_task_dist_state() + + def __eq__(self, other): + if not isinstance(other, TaskDistribution): + return NotImplemented + return self.as_int() == other.as_int() + + @staticmethod + def from_int(dist): + cdef TaskDistribution tdist = None + + if int(dist) <= 0 or dist == slurm.SLURM_DIST_UNKNOWN: + return None + + if (dist & slurm.SLURM_DIST_STATE_BASE) != slurm.SLURM_DIST_UNKNOWN: + tdist = _parse_task_dist_from_int(dist) + + dist_flag = dist & slurm.SLURM_DIST_STATE_FLAGS + tdist = _parse_task_dist_flags_from_int(tdist, dist_flag) + + if tdist: + tdist.state = dist + + return tdist + + def _to_str_no_flags(self): + if self.plane: + return "plane" + + dist_str = "" + nodes = self.nodes + if nodes is not None and nodes != "*": + dist_str = f"{nodes}" + else: + dist_str = "block" + + sockets = self.sockets + if sockets is not None and sockets != "*": + dist_str = f"{dist_str}:{sockets}" + else: + dist_str = f"{dist_str}:cyclic" + + cores = self.cores + if cores is not None and cores != "*": + dist_str = f"{dist_str}:{cores}" + else: + dist_str = f"{dist_str}:{sockets}" + + return dist_str + + def to_str(self): + dist_str = self._to_str_no_flags() + + if self.pack is not None: + dist_str = f"{dist_str},{'Pack' if self.pack else 'NoPack'}" + + return dist_str + + def to_dict(self): + return { + "nodes": self.nodes, + "sockets": self.sockets, + "cores": self.cores, + "plane": self.plane, + "pack": self.pack, + } + + def as_int(self): + return self.state + + def _get_task_dist_state(self): + cdef task_dist_states_t dist_state + + dist_str = self._to_str_no_flags() + if dist_str == "plane": + return slurm.SLURM_DIST_PLANE + + dist_state = _parse_str_to_task_dist_int(dist_str) + if dist_state == slurm.SLURM_DIST_UNKNOWN: + raise ValueError(f"Invalid distribution specification: {dist_str}") + + # Check for Pack/NoPack + # Don't do anything if it is None + if self.pack: + dist_state = (dist_state | slurm.SLURM_DIST_PACK_NODES) + elif self.pack is not None and not self.pack: + dist_state = (dist_state | slurm.SLURM_DIST_NO_PACK_NODES) + + return dist_state + + @staticmethod + def from_str(dist_str): + cdef TaskDistribution tdist = TaskDistribution.__new__(TaskDistribution) + + # Plane method - return early because nothing else can be + # specified when this is set. + if "plane" in dist_str: + if "plane=" in dist_str: + plane_size = u16(dist_str.split("=", 1)[1]) + return TaskDistribution(plane_size=plane_size) + else: + return TaskDistribution(plane_size=True) + + # [0] = distribution method for nodes:sockets:cores + # [1] = pack/nopack specification (true or false) + dist_items = dist_str.split(",", 1) + + # Parse the different methods + dist_methods = dist_items[0].split(":") + if len(dist_methods) and dist_methods[0] != "*": + tdist.nodes = dist_methods[0] + + if len(dist_methods) > 2 and dist_methods[1] != "*": + tdist.sockets = dist_methods[1] + + if len(dist_methods) >= 3: + if dist_methods[2] == "*": + tdist.cores = tdist.sockets + else: + tdist.cores = dist_methods[2] + + if len(dist_items) > 1: + if dist_items[1].casefold() == "pack": + tdist.pack = True + elif dist_items[1].casefold() == "nopack": + tdist.pack = False + + tdist.state = tdist._get_task_dist_state() + return tdist + + +# https://github.com/SchedMD/slurm/blob/510ba4f17dfa559b579aa054cb8a415dcc224abc/src/common/proc_args.c#L319 +def _parse_task_dist_from_int(dist): + cdef TaskDistribution out = TaskDistribution.__new__(TaskDistribution) + + state = dist & slurm.SLURM_DIST_STATE_BASE + if state == slurm.SLURM_DIST_BLOCK: + out.nodes = "block" + elif state == slurm.SLURM_DIST_CYCLIC: + out.nodes = "cyclic" + elif state == slurm.SLURM_DIST_PLANE: + out.plane = state + elif state == slurm.SLURM_DIST_ARBITRARY: + out.nodes = "arbitrary" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC: + out.nodes = "cyclic" + out.sockets = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK: + out.nodes = "cyclic" + out.sockets = "block" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL: + out.nodes = "cyclic" + out.sockets = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC: + out.nodes = "block" + out.sockets = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK: + out.nodes = "block" + out.sockets = "block" + elif state == slurm.SLURM_DIST_BLOCK_CFULL: + out.nodes = "block" + out.sockets = "fcyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_CYCLIC: + out.nodes = "cyclic" + out.sockets = "cyclic" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_BLOCK: + out.nodes = "cyclic" + out.sockets = "cyclic" + out.cores = "block" + elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_CFULL: + out.nodes = "cyclic" + out.sockets = "cyclic" + out.cores = "fcyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC: + out.nodes = "cyclic" + out.sockets = "block" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC: + out.nodes = "cyclic" + out.sockets = "block" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_BLOCK: + out.nodes = "cyclic" + out.sockets = "block" + out.cores = "block" + elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CFULL: + out.nodes = "cyclic" + out.sockets = "block" + out.cores = "fcyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL_CYCLIC: + out.nodes = "cyclic" + out.sockets = "fcyclic" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL_BLOCK: + out.nodes = "cyclic" + out.sockets = "fcyclic" + out.cores = "block" + elif state == slurm.SLURM_DIST_CYCLIC_CFULL_CFULL: + out.nodes = "cyclic" + out.sockets = "fcyclic" + out.cores = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_CYCLIC: + out.nodes = "block" + out.sockets = "cyclic" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_BLOCK: + out.nodes = "block" + out.sockets = "cyclic" + out.cores = "block" + elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_CFULL: + out.nodes = "block" + out.sockets = "cyclic" + out.cores = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK_CYCLIC: + out.nodes = "block" + out.sockets = "block" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK_BLOCK: + out.nodes = "block" + out.sockets = "block" + out.cores = "block" + elif state == slurm.SLURM_DIST_BLOCK_BLOCK_CFULL: + out.nodes = "block" + out.sockets = "block" + out.cores = "fcyclic" + elif state == slurm.SLURM_DIST_BLOCK_CFULL_CYCLIC: + out.nodes = "block" + out.sockets = "fcyclic" + out.cores = "cyclic" + elif state == slurm.SLURM_DIST_BLOCK_CFULL_BLOCK: + out.nodes = "block" + out.sockets = "fcyclic" + out.cores = "block" + elif state == slurm.SLURM_DIST_BLOCK_CFULL_CFULL: + out.nodes = "block" + out.sockets = "fcyclic" + out.cores = "fcyclic" + else: + return None + + return out + + +def _parse_task_dist_flags_from_int(TaskDistribution dst, dist_flag): + if not dist_flag: + return dst + + cdef TaskDistribution _dst = dst + if not _dst: + _dst = TaskDistribution.__new__(TaskDistribution) + + if dist_flag == slurm.SLURM_DIST_PACK_NODES: + _dst.pack = True + elif dist_flag == slurm.SLURM_DIST_NO_PACK_NODES: + _dst.pack = False + + return _dst + + +def _parse_str_to_task_dist_int(dist_str): + # Select the correct distribution method according to dist_str. + if dist_str == "cyclic": + return slurm.SLURM_DIST_CYCLIC + elif dist_str == "block": + return slurm.SLURM_DIST_BLOCK + elif dist_str == "arbitrary" or dist_str == "hostfile": + return slurm.SLURM_DIST_ARBITRARY + elif dist_str == "cyclic:cyclic": + return slurm.SLURM_DIST_CYCLIC_CYCLIC + elif dist_str == "cyclic:block": + return slurm.SLURM_DIST_CYCLIC_BLOCK + elif dist_str == "block:block": + return slurm.SLURM_DIST_BLOCK_BLOCK + elif dist_str == "block:cyclic": + return slurm.SLURM_DIST_BLOCK_CYCLIC + elif dist_str == "block:fcyclic": + return slurm.SLURM_DIST_BLOCK_CFULL + elif dist_str == "cyclic:fcyclic": + return slurm.SLURM_DIST_CYCLIC_CFULL + elif dist_str == "cyclic:cyclic:cyclic": + return slurm.SLURM_DIST_CYCLIC_CYCLIC_CYCLIC + elif dist_str == "cyclic:cyclic:block": + return slurm.SLURM_DIST_CYCLIC_CYCLIC_BLOCK + elif dist_str == "cyclic:cyclic:fcyclic": + return slurm.SLURM_DIST_CYCLIC_CYCLIC_CFULL + elif dist_str == "cyclic:block:cyclic": + return slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC + elif dist_str == "cyclic:block:block": + return slurm.SLURM_DIST_CYCLIC_BLOCK_BLOCK + elif dist_str == "cyclic:block:fcyclic": + return slurm.SLURM_DIST_CYCLIC_BLOCK_CFULL + elif dist_str == "cyclic:fcyclic:cyclic": + return slurm.SLURM_DIST_CYCLIC_CFULL_CYCLIC + elif dist_str == "cyclic:fcyclic:block": + return slurm.SLURM_DIST_CYCLIC_CFULL_BLOCK + elif dist_str == "cyclic:fcyclic:fcyclic": + return slurm.SLURM_DIST_CYCLIC_CFULL_CFULL + elif dist_str == "block:cyclic:cyclic": + return slurm.SLURM_DIST_BLOCK_CYCLIC_CYCLIC + elif dist_str == "block:cyclic:block": + return slurm.SLURM_DIST_BLOCK_CYCLIC_BLOCK + elif dist_str == "block:cyclic:fcyclic": + return slurm.SLURM_DIST_BLOCK_CYCLIC_CFULL + elif dist_str == "block:block:cyclic": + return slurm.SLURM_DIST_BLOCK_BLOCK_CYCLIC + elif dist_str == "block:block:block": + return slurm.SLURM_DIST_BLOCK_BLOCK_BLOCK + elif dist_str == "block:block:fcyclic": + return slurm.SLURM_DIST_BLOCK_BLOCK_CFULL + elif dist_str == "block:fcyclic:cyclic": + return slurm.SLURM_DIST_BLOCK_CFULL_CYCLIC + elif dist_str == "block:fcyclic:block": + return slurm.SLURM_DIST_BLOCK_CFULL_BLOCK + elif dist_str == "block:fcyclic:fcyclic": + return slurm.SLURM_DIST_BLOCK_CFULL_CFULL + else: + return slurm.SLURM_DIST_UNKNOWN diff --git a/pyslurm/core/job/util.pyx b/pyslurm/core/job/util.pyx index f7a95892..43dc8489 100644 --- a/pyslurm/core/job/util.pyx +++ b/pyslurm/core/job/util.pyx @@ -1,7 +1,7 @@ ######################################################################### -# parse_types.pyx - utility functions used to parse various job flags +# util.pyx - utility functions used to parse various job flags ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t @@ -25,8 +25,10 @@ from pyslurm cimport slurm from pyslurm.core.common.uint import * from pyslurm.core.common.uint cimport * +# Note: Maybe consider using libslurmfull again to avoid having to reimplement +# some of these functions and keeping track for changes in new releases. -def parse_mail_type(mail_types): +def mail_type_list_to_int(mail_types): """Convert a str or list of mail types to a uint16_t.""" cdef uint16_t flags = 0 types = mail_types @@ -85,7 +87,7 @@ def parse_mail_type(mail_types): return flags -def get_mail_type(uint16_t typ): +def mail_type_int_to_list(uint16_t typ): """Convert uint16_t to a list of mail types.""" types = [] @@ -128,7 +130,7 @@ def get_mail_type(uint16_t typ): return types -def parse_acctg_profile(acctg_profiles): +def acctg_profile_list_to_int(acctg_profiles): """Convert a str or list of accounting gather profiles to uin32_t.""" cdef uint32_t profile = 0 profiles = acctg_profiles @@ -156,17 +158,17 @@ def parse_acctg_profile(acctg_profiles): return profile -def get_acctg_profile(flags): +def acctg_profile_int_to_list(flags): """Convert uin32_t accounting gather profiles to a list of strings.""" profiles = [] if flags == 0 or flags == slurm.NO_VAL: - return ["none"] + return [] if flags == slurm.ACCT_GATHER_PROFILE_ALL: return ["all"] elif flags == slurm.ACCT_GATHER_PROFILE_NONE: - return ["none"] + return [] if flags & slurm.ACCT_GATHER_PROFILE_ENERGY: profiles.append("energy") @@ -183,7 +185,7 @@ def get_acctg_profile(flags): return profiles -def parse_power_type(power_types): +def power_type_list_to_int(power_types): """Convert a str or list of str with power types to uint8_t.""" cdef uint8_t flags = 0 @@ -194,7 +196,7 @@ def parse_power_type(power_types): flags |= slurm.SLURM_POWER_FLAGS_LEVEL -def get_power_type(flags): +def power_type_int_to_list(flags): """Convert uint8_t power type flags to a list of strings.""" types = [] @@ -204,7 +206,7 @@ def get_power_type(flags): return types -def parse_shared_type(typ): +def shared_type_str_to_int(typ): """Convert a job-sharing type str to its numerical representation.""" if not typ: return slurm.NO_VAL16 @@ -222,289 +224,7 @@ def parse_shared_type(typ): raise ValueError(f"Invalid resource_sharing type: {typ}.") -# https://github.com/SchedMD/slurm/blob/510ba4f17dfa559b579aa054cb8a415dcc224abc/src/common/proc_args.c#L319 -def get_task_dist(dist): - """Get the task distribution of a step as a dictionary.""" - out = { - "nodes": None, - "sockets": None, - "cores": None, - "plane": None, - "pack": None, - } - - if int(dist) <= 0 or dist == slurm.SLURM_DIST_UNKNOWN: - return None - - if (dist & slurm.SLURM_DIST_STATE_BASE) != slurm.SLURM_DIST_UNKNOWN: - state = dist & slurm.SLURM_DIST_STATE_BASE - - if state == slurm.SLURM_DIST_BLOCK: - out["nodes"] = "block" - elif state == slurm.SLURM_DIST_CYCLIC: - out["nodes"] = "cyclic" - elif state == slurm.SLURM_DIST_PLANE: - pass - elif state == slurm.SLURM_DIST_ARBITRARY: - out["nodes"] = "arbitrary" - elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC: - out["nodes"] = "cyclic" - out["sockets"] = "cyclic" - elif state == slurm.SLURM_DIST_CYCLIC_BLOCK: - out["nodes"] = "cyclic" - out["sockets"] = "block" - elif state == slurm.SLURM_DIST_CYCLIC_CFULL: - out["nodes"] = "cyclic" - out["sockets"] = "fcyclic" - elif state == slurm.SLURM_DIST_BLOCK_CYCLIC: - out["nodes"] = "block" - out["sockets"] = "cyclic" - elif state == slurm.SLURM_DIST_BLOCK_BLOCK: - out["nodes"] = "block" - out["sockets"] = "block" - elif state == slurm.SLURM_DIST_BLOCK_CFULL: - out["nodes"] = "block" - out["sockets"] = "fcyclic" - elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_CYCLIC: - out["nodes"] = "cyclic" - out["sockets"] = "cyclic" - out["cores"] = "cyclic" - elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_BLOCK: - out["nodes"] = "cyclic" - out["sockets"] = "cyclic" - out["cores"] = "block" - elif state == slurm.SLURM_DIST_CYCLIC_CYCLIC_CFULL: - out["nodes"] = "cyclic" - out["sockets"] = "cyclic" - out["cores"] = "fcyclic" - elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC: - out["nodes"] = "cyclic" - out["sockets"] = "block" - out["cores"] = "cyclic" - elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC: - out["nodes"] = "cyclic" - out["sockets"] = "block" - out["cores"] = "cyclic" - elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_BLOCK: - out["nodes"] = "cyclic" - out["sockets"] = "block" - out["cores"] = "block" - elif state == slurm.SLURM_DIST_CYCLIC_BLOCK_CFULL: - out["nodes"] = "cyclic" - out["sockets"] = "block" - out["cores"] = "fcyclic" - elif state == slurm.SLURM_DIST_CYCLIC_CFULL_CYCLIC: - out["nodes"] = "cyclic" - out["sockets"] = "fcyclic" - out["cores"] = "cyclic" - elif state == slurm.SLURM_DIST_CYCLIC_CFULL_BLOCK: - out["nodes"] = "cyclic" - out["sockets"] = "fcyclic" - out["cores"] = "block" - elif state == slurm.SLURM_DIST_CYCLIC_CFULL_CFULL: - out["nodes"] = "cyclic" - out["sockets"] = "fcyclic" - out["cores"] = "fcyclic" - elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_CYCLIC: - out["nodes"] = "block" - out["sockets"] = "cyclic" - out["cores"] = "cyclic" - elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_BLOCK: - out["nodes"] = "block" - out["sockets"] = "cyclic" - out["cores"] = "block" - elif state == slurm.SLURM_DIST_BLOCK_CYCLIC_CFULL: - out["nodes"] = "block" - out["sockets"] = "cyclic" - out["cores"] = "fcyclic" - elif state == slurm.SLURM_DIST_BLOCK_BLOCK_CYCLIC: - out["nodes"] = "block" - out["sockets"] = "block" - out["cores"] = "cyclic" - elif state == slurm.SLURM_DIST_BLOCK_BLOCK_BLOCK: - out["nodes"] = "block" - out["sockets"] = "block" - out["cores"] = "block" - elif state == slurm.SLURM_DIST_BLOCK_BLOCK_CFULL: - out["nodes"] = "block" - out["sockets"] = "block" - out["cores"] = "fcyclic" - elif state == slurm.SLURM_DIST_BLOCK_CFULL_CYCLIC: - out["nodes"] = "block" - out["sockets"] = "fcyclic" - out["cores"] = "cyclic" - elif state == slurm.SLURM_DIST_BLOCK_CFULL_BLOCK: - out["nodes"] = "block" - out["sockets"] = "fcyclic" - out["cores"] = "block" - elif state == slurm.SLURM_DIST_BLOCK_CFULL_CFULL: - out["nodes"] = "block" - out["sockets"] = "fcyclic" - out["cores"] = "fcyclic" - else: - out = None - - if out is not None: - dist_flag = dist & slurm.SLURM_DIST_STATE_FLAGS - if dist_flag == slurm.SLURM_DIST_PACK_NODES: - out["pack"] = True - elif dist_flag == slurm.SLURM_DIST_NO_PACK_NODES: - out["pack"] = False - - return out - - -def parse_task_dist(dist): - """Parse a distribution str or dict to its numerical representation.""" - cdef slurm.task_dist_states_t dist_state = slurm.SLURM_DIST_UNKNOWN - - if not dist: - return dist_state, None - - # Assume the user meant to specify the plane size. - if isinstance(dist, int): - return None, u16(dist) - - # Support sbatch-style string input. - # Parse the string and fill in the dist_dict above. - if isinstance(dist, str): - dist_str = dist - - # Plane method - return early because nothing else can be - # specified when this is set. - if "plane" in dist_str: - return None, u16(dist_str.split("=", 1)[1]) - - dist = { - "nodes": None, - "sockets": None, - "cores": None, - "plane": None, - "pack": None, - } - - # [0] = distribution method for nodes:sockets:cores - # [1] = pack/nopack specification (true or false) - dist_items = dist_str.split(",", 1) - - # Parse the different methods and fill in the dist_dict. - dist_methods = dist_items[0].split(":") - if len(dist_methods) and dist_methods[0] != "*": - dist["nodes"] = dist_methods[0] - - if len(dist_methods) > 2 and dist_methods[1] != "*": - dist["sockets"] = dist_methods[1] - - if len(dist_methods) >= 3: - if dist_methods[2] == "*": - dist["cores"] = dist_dict["sockets"] - else: - dist["cores"] = dist_methods[2] - - if len(dist_items) > 1: - if dist_items[1].casefold() == "pack": - dist["pack"] = True - elif dist_items[1].casefold() == "nopack": - dist["pack"] = False - - # Plane method - return early because nothing else can be - # specified when this is set. - if dist.get("plane") is not None: - return None, u16(dist['plane']) - - dist_str = "" - sockets_dist = None - - # Join the dist_dict distribution methods into a dist_str - # for easier comparison to check which distribution state - # is needed (see below). - nodes = dist.get("nodes") - if nodes is not None and nodes != "*": - dist_str = f"{nodes}" - else: - dist_str = "block" - - sockets = dist.get("sockets") - if sockets is not None and sockets != "*": - dist_str = f"{dist_str}:{sockets}" - else: - dist_str = f"{dist_str}:cyclic" - - cores = dist.get("cores") - if cores is not None and cores != "*": - dist_str = f"{dist_str}:{cores}" - else: - dist_str = f"{dist_str}:{sockets}" - - # Select the correct distribution method according to dist_str. - if dist_str == "cyclic": - dist_state = slurm.SLURM_DIST_CYCLIC - elif dist_str == "block": - dist_state = slurm.SLURM_DIST_BLOCK - elif dist_str == "arbitrary" or dist_str == "hostfile": - dist_state = slurm.SLURM_DIST_ARBITRARY - elif dist_str == "cyclic:cyclic": - dist_state = slurm.SLURM_DIST_CYCLIC_CYCLIC - elif dist_str == "cyclic:block": - dist_state = slurm.SLURM_DIST_CYCLIC_BLOCK - elif dist_str == "block:block": - dist_state = slurm,SLURM_DIST_BLOCK_BLOCK - elif dist_str == "block:cyclic": - dist_state = slurm.SLURM_DIST_BLOCK_CYCLIC - elif dist_str == "block:fcyclic": - dist_state = slurm.SLURM_DIST_BLOCK_CFULL - elif dist_str == "cyclic:fcyclic": - dist_state = slurm.SLURM_DIST_CYCLIC_CFULL - elif dist_str == "cyclic:cyclic:cyclic": - dist_state = slurm.SLURM_DIST_CYCLIC_CYCLIC_CYCLIC - elif dist_str == "cyclic:cyclic:block": - dist_state = slurm.SLURM_DIST_CYCLIC_CYCLIC_BLOCK - elif dist_str == "cyclic:cyclic:fcyclic": - dist_state = slurm.SLURM_DIST_CYCLIC_CYCLIC_CFULL - elif dist_str == "cyclic:block:cyclic": - dist_state = slurm.SLURM_DIST_CYCLIC_BLOCK_CYCLIC - elif dist_str == "cyclic:block:block": - dist_state = slurm.SLURM_DIST_CYCLIC_BLOCK_BLOCK - elif dist_str == "cyclic:block:fcyclic": - dist_state = slurm.SLURM_DIST_CYCLIC_BLOCK_CFULL - elif dist_str == "cyclic:fcyclic:cyclic": - dist_state = slurm.SLURM_DIST_CYCLIC_CFULL_CYCLIC - elif dist_str == "cyclic:fcyclic:block": - dist_state = slurm.SLURM_DIST_CYCLIC_CFULL_BLOCK - elif dist_str == "cyclic:fcyclic:fcyclic": - dist_state = slurm.SLURM_DIST_CYCLIC_CFULL_CFULL - elif dist_str == "block:cyclic:cyclic": - dist_state = slurm.SLURM_DIST_BLOCK_CYCLIC_CYCLIC - elif dist_str == "block:cyclic:block": - dist_state = slurm.SLURM_DIST_BLOCK_CYCLIC_BLOCK - elif dist_str == "block:cyclic:fcyclic": - dist_state = slurm.SLURM_DIST_BLOCK_CYCLIC_CFULL - elif dist_str == "block:block:cyclic": - dist_state = slurm.SLURM_DIST_BLOCK_BLOCK_CYCLIC - elif dist_str == "block:block:block": - dist_state = slurm.SLURM_DIST_BLOCK_BLOCK_BLOCK - elif dist_str == "block:block:fcyclic": - dist_state = slurm.SLURM_DIST_BLOCK_BLOCK_CFULL - elif dist_str == "block:fcyclic:cyclic": - dist_state = slurm.SLURM_DIST_BLOCK_CFULL_CYCLIC - elif dist_str == "block:fcyclic:block": - dist_state = slurm.SLURM_DIST_BLOCK_CFULL_BLOCK - elif dist_str == "block:fcyclic:fcyclic": - dist_state = slurm.SLURM_DIST_BLOCK_CFULL_CFULL - else: - raise ValueError(f"Invalid distribution specification: {dist}") - - # Check for Pack/NoPack - # Don't do anything if dist["pack"] is None - if dist["pack"]: - dist_state = (dist_state | slurm.SLURM_DIST_PACK_NODES) - elif dist["pack"] is not None and not dist["pack"]: - dist_state = (dist_state | slurm.SLURM_DIST_NO_PACK_NODES) - - return dist_state, None - - -def parse_cpu_gov(gov): +def cpu_gov_str_to_int(gov): """Convert a cpu governor str to is numerical representation.""" if not gov: return u32(None) @@ -530,7 +250,7 @@ def parse_cpu_gov(gov): return rc | slurm.CPU_FREQ_RANGE_FLAG -def parse_cpufreq(freq): +def cpu_freq_str_to_int(freq): """Convert a cpu-frequency str to its numerical representation.""" if not freq: return u32(None) @@ -554,7 +274,8 @@ def parse_cpufreq(freq): raise ValueError(f"Invalid cpu freq value: {freq}.") -def cpufreq_to_str(freq): +# https://github.com/SchedMD/slurm/blob/fec3d2648cfdcfa8b4efb1b59e70ebfaac98d9c3/src/common/cpu_frequency.c#L1359 +def cpu_freq_int_to_str(freq): """Convert a numerical cpufreq value to its string representation.""" if freq == slurm.CPU_FREQ_LOW: return "Low" @@ -585,3 +306,39 @@ def cpufreq_to_str(freq): return freq +def dependency_str_to_dict(dep): + if not dep: + return None + + out = { + "after": [], + "afterany": [], + "afterburstbuffer": [], + "aftercorr": [], + "afternotok": [], + "afterok": [], + "singleton": False, + "satisfy": "all", + } + + delim = "," + if "?" in dep: + delim = "?" + out["satisfy"] = "any" + + for item in dep.split(delim): + if item == "singleton": + out["singleton"] = True + + dep_and_job = item.split(":", 1) + if len(dep_and_job) != 2: + continue + + dep_name, jobs = dep_and_job[0], dep_and_job[1].split(":") + if dep_name not in out: + continue + + for job in jobs: + out[dep_name].append(int(job) if job.isdigit() else job) + + return out diff --git a/pyslurm/core/node.pxd b/pyslurm/core/node.pxd index dc878bf0..04568227 100644 --- a/pyslurm/core/node.pxd +++ b/pyslurm/core/node.pxd @@ -72,7 +72,7 @@ cdef class Nodes(dict): Total amount of effective CPUs in this node collection. current_watts (int): Total amount of Watts consumed in this node collection. - average_watts (int): + avg_watts (int): Amount of average watts consumed in this node collection. Raises: @@ -186,7 +186,7 @@ cdef class Node: Node cap watts. current_watts (int): Current amount of watts consumed on the node. - average_watts (int): + avg_watts (int): Average amount of watts consumed on the node. external_sensors (dict): External Sensor info for the Node. diff --git a/pyslurm/core/node.pyx b/pyslurm/core/node.pyx index 2c1a51bc..d62df45c 100644 --- a/pyslurm/core/node.pyx +++ b/pyslurm/core/node.pyx @@ -1,7 +1,7 @@ ######################################################################### # node.pyx - interface to work with nodes in slurm ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # # Pyslurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from pyslurm.slurm cimport xfree, try_xmalloc @@ -196,8 +196,8 @@ cdef class Nodes(dict): return _sum_prop(self, Node.current_watts) @property - def average_watts(self): - return _sum_prop(self, Node.average_watts) + def avg_watts(self): + return _sum_prop(self, Node.avg_watts) cdef class Node: @@ -266,18 +266,14 @@ cdef class Node: dst.info = src.info src.info = tmp - def reload(self): - """(Re)load information for a node. + @staticmethod + def load(name): + """Load information for a specific node. Implements the slurm_load_node_single RPC. - Note: - You can call this function repeatedly to refresh the information - of an instance. Using the Node object returned is optional. - Returns: - (Node): This function returns the current Node-instance object - itself. + (pyslurm.Node): Returns a new Node instance. Raises: RPCError: If requesting the Node information from the slurmctld @@ -285,47 +281,34 @@ cdef class Node: MemoryError: If malloc failed to allocate memory. Examples: - >>> from pyslurm import Node - >>> node = Node("localhost") - >>> node.reload() - >>> - >>> # You can also write this in one-line: - >>> node = Node("localhost").reload() + >>> import pyslurm + >>> node = pyslurm.Node.load("localhost") """ cdef: node_info_msg_t *node_info = NULL partition_info_msg_t *part_info = NULL - - if not self.name: - raise ValueError("You need to set a node name first") + Node wrap = Node.__new__(Node) try: verify_rpc(slurm_load_node_single(&node_info, - self.name, slurm.SHOW_ALL)) + name, slurm.SHOW_ALL)) verify_rpc(slurm_load_partitions(0, &part_info, slurm.SHOW_ALL)) slurm_populate_node_partitions(node_info, part_info) - save_name = self.name if node_info and node_info.record_count: - # Cleanup the old info. - self._dealloc_impl() - # Copy new info - self._alloc_impl() - memcpy(self.info, &node_info.node_array[0], sizeof(node_info_t)) + # Copy info + wrap._alloc_impl() + memcpy(wrap.info, &node_info.node_array[0], sizeof(node_info_t)) node_info.record_count = 0 - - # Need to do this, because while testing even when specifying - # a node name that doesn't exist, it still returned the - # "localhost" node in my Test-setup. Why? - if self.name != save_name: - raise RPCError(msg=f"Node '{save_name}' does not exist") + else: + raise RPCError(msg=f"Node '{name}' does not exist") except Exception as e: raise e finally: slurm_free_node_info_msg(node_info) slurm_free_partition_info_msg(part_info) - return self + return wrap def create(self, state="future"): """Create a node. @@ -672,7 +655,7 @@ cdef class Node: return u32_parse(self.info.energy.current_watts) @property - def average_watts(self): + def avg_watts(self): if not self.info.energy: return None return u32_parse(self.info.energy.ave_watts) From 3e027042cdbe3cf571a1905da4e124fa8c9c3136 Mon Sep 17 00:00:00 2001 From: Toni Harzendorf Date: Thu, 27 Apr 2023 18:58:56 +0200 Subject: [PATCH 21/28] rework tests directory structure, split into "unit" and "integration" --- tests/integration/conftest.py | 23 ++++ tests/{new_api => integration}/test_job.py | 69 ++++++----- .../test_job_steps.py | 71 ++++------- tests/integration/test_job_submit.py | 21 ++++ tests/{new_api => integration}/test_node.py | 22 ++-- tests/integration/util.py | 39 ++++++ tests/{new_api => unit}/test_common.py | 113 ++++++++++++------ tests/unit/test_job.py | 51 ++++++++ tests/unit/test_job_steps.py | 24 ++++ tests/{new_api => unit}/test_job_submit.py | 22 +--- tests/unit/test_node.py | 24 ++++ tests/unit/test_task_dist.py | 32 +++++ tests/{new_api/conftest.py => unit/util.py} | 19 +-- 13 files changed, 364 insertions(+), 166 deletions(-) create mode 100644 tests/integration/conftest.py rename tests/{new_api => integration}/test_job.py (72%) rename tests/{new_api => integration}/test_job_steps.py (71%) create mode 100644 tests/integration/test_job_submit.py rename tests/{new_api => integration}/test_node.py (62%) create mode 100644 tests/integration/util.py rename tests/{new_api => unit}/test_common.py (71%) create mode 100644 tests/unit/test_job.py create mode 100644 tests/unit/test_job_steps.py rename tests/{new_api => unit}/test_job_submit.py (93%) create mode 100644 tests/unit/test_node.py create mode 100644 tests/unit/test_task_dist.py rename tests/{new_api/conftest.py => unit/util.py} (66%) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 00000000..124c4235 --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,23 @@ +import pytest +from pyslurm import ( + Job, + JobSubmitDescription, +) +from util import create_simple_job_desc + + +@pytest.fixture +def submit_job(): + + jobs = [] + def _job(script=None, **kwargs): + job_desc = create_simple_job_desc(script, **kwargs) + job = Job(job_desc.submit()) + + jobs.append(job) + return job + + yield _job + + for j in jobs: + j.cancel() diff --git a/tests/new_api/test_job.py b/tests/integration/test_job.py similarity index 72% rename from tests/new_api/test_job.py rename to tests/integration/test_job.py index b82b763e..b1c9efd8 100644 --- a/tests/new_api/test_job.py +++ b/tests/integration/test_job.py @@ -1,13 +1,10 @@ -"""test_job.py - Test the job api functions.""" +"""test_job.py - Integration test job api functionalities.""" -import sys import time import pytest import pyslurm -import tempfile -import os -from os import environ as pyenviron -from conftest import create_simple_job_desc +import util +from util import create_simple_job_desc from pyslurm import ( Job, Jobs, @@ -16,7 +13,14 @@ ) -def test_reload(submit_job): +def test_parse_all(submit_job): + job = submit_job() + # Use the as_dict() function to test if parsing works for all + # properties on a simple Job without error. + Job.load(job.id).as_dict() + + +def test_load(submit_job): job = submit_job() jid = job.id @@ -27,7 +31,7 @@ def test_reload(submit_job): assert job.time_limit == None # Now load the job info - job.reload() + job = Job.load(jid) assert job.id == jid assert job.ntasks == 2 @@ -35,59 +39,49 @@ def test_reload(submit_job): assert job.time_limit == 1440 with pytest.raises(RPCError): - Job(99999).reload() + Job.load(99999) def test_cancel(submit_job): job = submit_job() - job.cancel() - # make sure the job is actually cancelled - time.sleep(0.5) - assert job.reload().state == "CANCELLED" - - -def test_parse_all(submit_job): - job = submit_job() - - # Use the as_dict() function to test if parsing works for all - # properties on a simple Job without error. - job.reload().as_dict() + time.sleep(util.WAIT_SECS_SLURMCTLD) + assert Job.load(job.id).state == "CANCELLED" def test_send_signal(submit_job): job = submit_job() - time.sleep(1) - assert job.reload().state == "RUNNING" + time.sleep(util.WAIT_SECS_SLURMCTLD) + assert Job.load(job.id).state == "RUNNING" # Send a SIGKILL (basically cancelling the Job) job.send_signal(9) # make sure the job is actually cancelled - time.sleep(1) - assert job.reload().state == "CANCELLED" + time.sleep(util.WAIT_SECS_SLURMCTLD) + assert Job.load(job.id).state == "CANCELLED" def test_suspend_unsuspend(submit_job): job = submit_job() - time.sleep(1) + time.sleep(util.WAIT_SECS_SLURMCTLD) job.suspend() - assert job.reload().state == "SUSPENDED" + assert Job.load(job.id).state == "SUSPENDED" job.unsuspend() # make sure the job is actually running again - time.sleep(1) - assert job.reload().state == "RUNNING" + time.sleep(util.WAIT_SECS_SLURMCTLD) + assert Job.load(job.id).state == "RUNNING" # Don't need to test hold/resume, since it uses just job.modify() to set # priority to 0/INFINITE. def test_modify(submit_job): job = submit_job(priority=0) - job = job.reload() + job = Job(job.id) changes = JobSubmitDescription( time_limit = "2-00:00:00", @@ -96,7 +90,7 @@ def test_modify(submit_job): ) job.modify(changes) - job.reload() + job = Job.load(job.id) assert job.time_limit == 2880 assert job.ntasks == 5 @@ -105,20 +99,20 @@ def test_modify(submit_job): def test_requeue(submit_job): job = submit_job() - job.reload() + job = Job.load(job.id) assert job.requeue_count == 0 - time.sleep(1.5) + time.sleep(util.WAIT_SECS_SLURMCTLD) job.requeue() - job.reload() + job = Job.load(job.id) assert job.requeue_count == 1 def test_notify(submit_job): job = submit_job() - time.sleep(1) + time.sleep(util.WAIT_SECS_SLURMCTLD) # Could check the logfile, but we just assume for now # that when this function raises no Exception, everything worked. @@ -141,3 +135,8 @@ def test_get_job_queue(submit_job): # Check to see if all the Jobs we submitted exist assert job.id in jobs assert isinstance(jobs[job.id], Job) + + +def test_get_resource_layout_per_node(submit_job): + # TODO + assert True diff --git a/tests/new_api/test_job_steps.py b/tests/integration/test_job_steps.py similarity index 71% rename from tests/new_api/test_job_steps.py rename to tests/integration/test_job_steps.py index f22fe2fe..5a33b386 100644 --- a/tests/new_api/test_job_steps.py +++ b/tests/integration/test_job_steps.py @@ -7,6 +7,7 @@ JobSteps, RPCError, ) +import util def create_job_script_multi_step(steps=None): @@ -31,20 +32,13 @@ def create_job_script_multi_step(steps=None): return job_script -def test_reload(submit_job): +def test_load(submit_job): job = submit_job(script=create_job_script_multi_step()) - step = JobStep(job, "batch") - # Nothing has been loaded at this point, just make sure everything is - # on default values. - assert step.name is None - assert step.ntasks is None - assert step.time_limit is None - - # Now load the step info, waiting one second to make sure the Step + # Load the step info, waiting one second to make sure the Step # actually exists. - time.sleep(1) - step.reload() + time.sleep(util.WAIT_SECS_SLURMCTLD) + step = JobStep.load(job.id, "batch") assert step.id == "batch" assert step.job_id == job.id @@ -53,11 +47,11 @@ def test_reload(submit_job): assert step.ntasks == 1 # Job was submitted with a time-limit of 1 day, but it seems this doesn't # propagate through for the steps if not set explicitly. - assert step.time_limit == None + assert step.time_limit is None # Now try to load the first and second Step started by srun - step_zero = JobStep(job, 0).reload() - step_one = JobStep(job, 1).reload() + step_zero = JobStep.load(job, 0) + step_one = JobStep.load(job, 1) # It is possible that the srun executed as the second command will # become the Step with ID '0' - so we just swap it. @@ -74,7 +68,7 @@ def test_reload(submit_job): assert step.name == "step_zero" assert step.ntasks == 1 assert step.alloc_cpus == 2 - assert step.time_limit == None + assert step.time_limit is None step = step_one assert step.job_id == job.id @@ -87,7 +81,7 @@ def test_reload(submit_job): def test_collection(submit_job): job = submit_job(script=create_job_script_multi_step()) - time.sleep(1) + time.sleep(util.WAIT_SECS_SLURMCTLD) steps = JobSteps.load(job) assert steps != {} @@ -98,23 +92,10 @@ def test_collection(submit_job): 1 in steps) -def test_distribution(submit_job): - job = submit_job(script=create_job_script_multi_step()) - step = JobStep(job, 0) - - assert step.distribution is None - - time.sleep(1) - step.reload() - - assert step.distribution == {"nodes": "block" , "sockets": "cyclic", - "cores": "block", "plane": None ,"pack": True} - - def test_cancel(submit_job): job = submit_job(script=create_job_script_multi_step()) - time.sleep(1) + time.sleep(util.WAIT_SECS_SLURMCTLD) steps = JobSteps.load(job) assert len(steps) == 3 assert ("batch" in steps and @@ -123,7 +104,7 @@ def test_cancel(submit_job): steps[0].cancel() - time.sleep(0.5) + time.sleep(util.WAIT_SECS_SLURMCTLD) steps = JobSteps.load(job) assert len(steps) == 2 assert ("batch" in steps and @@ -134,48 +115,46 @@ def test_modify(submit_job): steps = "srun -t 20 sleep 100" job = submit_job(script=create_job_script_multi_step(steps)) - time.sleep(1) - step = JobStep(job, 0).reload() + time.sleep(util.WAIT_SECS_SLURMCTLD) + step = JobStep.load(job, 0) assert step.time_limit == 20 step.modify(JobStep(time_limit="00:05:00")) - assert step.reload().time_limit == 5 + assert JobStep.load(job, 0).time_limit == 5 step.modify(time_limit="00:15:00") - assert step.reload().time_limit == 15 + assert JobStep.load(job, 0).time_limit == 15 def test_send_signal(submit_job): steps = "srun -t 10 sleep 100" job = submit_job(script=create_job_script_multi_step(steps)) - step = JobStep(job, 0) - time.sleep(1) - assert step.reload().state == "RUNNING" + time.sleep(util.WAIT_SECS_SLURMCTLD) + step = JobStep.load(job, 0) + assert step.state == "RUNNING" # Send a SIGTERM (basically cancelling the Job) step.send_signal(15) # Make sure the job is actually cancelled. # If a RPCError is raised, this means the Step got cancelled. - time.sleep(1) + time.sleep(util.WAIT_SECS_SLURMCTLD) with pytest.raises(RPCError): - step.reload() + step = JobStep.load(job, 0) -def test_reload_with_wrong_step_id(submit_job): +def test_load_with_wrong_step_id(submit_job): job = submit_job() - step = JobStep(job, 3) with pytest.raises(RPCError): - step.reload() + JobStep.load(job, 3) def test_parse_all(submit_job): job = submit_job() - step = JobStep(job, "batch") # Use the as_dict() function to test if parsing works for all # properties on a simple JobStep without error. - time.sleep(1) - step.reload().as_dict() + time.sleep(util.WAIT_SECS_SLURMCTLD) + JobStep.load(job, "batch").as_dict() diff --git a/tests/integration/test_job_submit.py b/tests/integration/test_job_submit.py new file mode 100644 index 00000000..9a8d1f9e --- /dev/null +++ b/tests/integration/test_job_submit.py @@ -0,0 +1,21 @@ +"""test_job_submit.py - Test the job submit api functions.""" + +import time +import pytest +import pyslurm +from os import environ as pyenviron +from util import create_simple_job_desc, create_job_script +from pyslurm import ( + Job, + Jobs, + JobSubmitDescription, + RPCError, +) + + +def test_submit_example1(): + assert True + + +def test_submit_example2(): + assert True diff --git a/tests/new_api/test_node.py b/tests/integration/test_node.py similarity index 62% rename from tests/new_api/test_node.py rename to tests/integration/test_node.py index 614460ff..501cc90d 100644 --- a/tests/new_api/test_node.py +++ b/tests/integration/test_node.py @@ -8,22 +8,18 @@ from pyslurm import Node, Nodes, RPCError -def test_reload(): - node = Node(Nodes.load().as_list()[0].name) +def test_load(): + name = Nodes.load().as_list()[0].name - # Nothing has been loaded at this point, just make sure everything is - # on default values. - assert node.weight is None - assert node.slurm_version is None # Now load the node info - node.reload() - assert node.name == "localhost" + node = Node.load(name) + assert node.name == name assert node.weight is not None assert node.slurm_version is not None with pytest.raises(RPCError, match=f"Node 'nonexistent' does not exist"): - Node("nonexistent").reload() + Node.load("nonexistent") def test_create(): @@ -43,14 +39,14 @@ def test_modify(): node = Node(Nodes.load().as_list()[0].name) node.modify(weight=10000) - assert node.reload().weight == 10000 + assert Node.load(node.name).weight == 10000 node.modify(Node(weight=20000)) - assert node.reload().weight == 20000 + assert Node.load(node.name).weight == 20000 node.modify(Node(weight=5000)) - assert node.reload().weight == 5000 + assert Node.load(node.name).weight == 5000 def test_parse_all(): - Node(Nodes.load().as_list()[0].name).reload().as_dict() + Node.load(Nodes.load().as_list()[0].name).as_dict() diff --git a/tests/integration/util.py b/tests/integration/util.py new file mode 100644 index 00000000..370035d1 --- /dev/null +++ b/tests/integration/util.py @@ -0,0 +1,39 @@ +import pytest +from pyslurm import ( + Job, + JobSubmitDescription, +) + +# Horrendous, but works for now, because when testing against a real slurmctld +# we need to wait a bit for state changes (i.e. we cancel a job and +# immediately check after if the state is really "CANCELLED", but the state +# hasn't changed yet, so we need to wait a bit) +WAIT_SECS_SLURMCTLD = 3 + + +def create_job_script(): + job_script = """\ +#!/bin/bash + +echo "Got args: $@" + +/usr/bin/env + +sleep 500\ + +""" + return job_script + + +def create_simple_job_desc(script=None, **kwargs): + job = JobSubmitDescription(**kwargs) + + job.name = "test_job" + job.standard_output = "/tmp/slurm-test-%j.out" + job.memory_per_cpu = "1G" + job.ntasks = 2 + job.cpus_per_task = 3 + job.script = create_job_script() if not script else script + job.time_limit = "1-00:00:00" + + return job diff --git a/tests/new_api/test_common.py b/tests/unit/test_common.py similarity index 71% rename from tests/new_api/test_common.py rename to tests/unit/test_common.py index eb4ea227..dddce37c 100644 --- a/tests/new_api/test_common.py +++ b/tests/unit/test_common.py @@ -34,7 +34,9 @@ cpubind_to_num, nodelist_from_range_str, nodelist_to_range_str, + _sum_prop, ) +from pyslurm.core.common import cstr class TestTypes: @@ -46,14 +48,14 @@ def test_strings(self): assert n.name == "Testing fmalloc string routines." n.name = None - assert n.name == None + assert n.name is None # Everything after a \0 will be cut off n.name = "test1\0test2" assert n.name == "test1" n.name = "\0" - assert n.name == None + assert n.name is None def test_lists(self): n = Node() @@ -75,25 +77,49 @@ def test_lists(self): n.available_features = None assert n.available_features == [] - def test_dicts(self): - js = JobSubmitDescription() - input_as_dict = {"key1": "value1", "key2": "value2"} - input_as_str = "key1=value1,key2=value2" + def test_str_to_dict(self): + expected_dict = {"key1": "value1", "key2": "value2"} + input_str = "key1=value1,key2=value2" + assert cstr.to_dict(input_str) == expected_dict + assert cstr.to_dict("") == {} - js.accounting_gather_frequency = input_as_dict - assert js.accounting_gather_frequency == input_as_dict + def test_dict_to_str(self): + input_dict = {"key1": "value1", "key2": "value2"} + expected_str = "key1=value1,key2=value2" + assert cstr.dict_to_str(input_dict) == expected_str - js.accounting_gather_frequency = input_as_str - assert js.accounting_gather_frequency == input_as_dict + input_dict = {"key1": "value1", "key2": "value2"} + expected_str = "key1=value1,key2=value2" + assert cstr.dict_to_str(input_dict) == expected_str - js.accounting_gather_frequency = {} - assert js.accounting_gather_frequency == {} + expected_str = "key1-value1:key2-value2" + assert cstr.dict_to_str(input_dict, delim1=":", delim2="-") == expected_str - js.accounting_gather_frequency = "" - assert js.accounting_gather_frequency == {} + input_dict = {"key1=": "value1", "key2": "value2"} + expected_str = "key1=value1,key2=value2" + with pytest.raises(ValueError, + match=r"Key or Value cannot contain either*"): + assert cstr.dict_to_str(input_dict) == expected_str + + expected_str = "key1=value1,key2=value2" + assert cstr.dict_to_str(expected_str) == expected_str + + assert cstr.dict_to_str({}) == None + assert cstr.dict_to_str("") == None + + def test_dict_to_gres_str(self): + input_dict = {"gpu:tesla": 3} + expected_str = "gres:gpu:tesla:3" + assert cstr.from_gres_dict(input_dict) == expected_str + assert cstr.from_gres_dict(expected_str) == expected_str + + input_dict = {"gpu": 3} + expected_str = "gres:gpu:3" + assert cstr.from_gres_dict(input_dict) == expected_str + assert cstr.from_gres_dict(expected_str) == expected_str - js.accounting_gather_frequency = None - assert js.accounting_gather_frequency == {} + def test_str_to_gres_dict(self): + assert True def _uint_impl(self, func_set, func_get, typ): val = func_set(2**typ-2) @@ -139,30 +165,30 @@ def test_u32(self): def test_u64(self): self._uint_impl(u64, u64_parse, 64) - def _uint_bool_impl(self, arg): - js = JobSubmitDescription() +# def _uint_bool_impl(self, arg): +# js = JobSubmitDescription() - setattr(js, arg, True) - assert getattr(js, arg) == True +# setattr(js, arg, True) +# assert getattr(js, arg) == True - setattr(js, arg, False) - assert getattr(js, arg) == False +# setattr(js, arg, False) +# assert getattr(js, arg) == False - # Set to true again to make sure toggling actually works. - setattr(js, arg, True) - assert getattr(js, arg) == True +# # Set to true again to make sure toggling actually works. +# setattr(js, arg, True) +# assert getattr(js, arg) == True - setattr(js, arg, None) - assert getattr(js, arg) == False +# setattr(js, arg, None) +# assert getattr(js, arg) == False - def test_u8_bool(self): - self._uint_bool_impl("overcommit") +# def test_u8_bool(self): +# self._uint_bool_impl("overcommit") - def test_u16_bool(self): - self._uint_bool_impl("requires_contiguous_nodes") +# def test_u16_bool(self): +# self._uint_bool_impl("requires_contiguous_nodes") - def test_u64_bool_flag(self): - self._uint_bool_impl("kill_on_invalid_dependency") +# def test_u64_bool_flag(self): +# self._uint_bool_impl("kill_on_invalid_dependency") class TestTime: @@ -202,10 +228,12 @@ def test_parse_seconds(self): timestr_to_secs("invalid_val") def test_parse_date(self): - timestamp = 1667938097 + timestamp = 1667941697 date = "2022-11-08T21:08:17" datetime_date = datetime.datetime(2022, 11, 8, 21, 8, 17) + # Converting date str to timestamp with the slurm API functions may + # not yield the expected timestamp above due to using local time zone assert date_to_timestamp(date) == timestamp assert date_to_timestamp(timestamp) == timestamp assert date_to_timestamp(datetime_date) == timestamp @@ -328,3 +356,20 @@ def test_nodelist_to_range_str(self): assert "node[001,007-009]" == nodelist_to_range_str(nodelist) assert "node[001,007-009]" == nodelist_to_range_str(nodelist_str) + def test_summarize_property(self): + class TestObject: + @property + def memory(self): + return 10240 + + @property + def cpus(self): + return None + + object_dict = {i: TestObject() for i in range(10)} + + expected = 10240 * 10 + assert _sum_prop(object_dict, TestObject.memory) == expected + + expected = 0 + assert _sum_prop(object_dict, TestObject.cpus) == 0 diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py new file mode 100644 index 00000000..aa2b47d4 --- /dev/null +++ b/tests/unit/test_job.py @@ -0,0 +1,51 @@ +"""test_job.py - Unit test basic job functionalities.""" + +import time +import pytest +import pyslurm +from pyslurm import Job +from pyslurm.core.job.util import * + + +def test_parse_all(): + # Use the as_dict() function to test if parsing works for all + # properties on a simple Job without error. + Job(9999).as_dict() + + +def test_parse_dependencies_to_dict(): + expected = None + assert dependency_str_to_dict("") == expected + + expected = { + "after": [1, 2], + "afterany": [], + "afterburstbuffer": [], + "aftercorr": [], + "afternotok": [], + "afterok": [3], + "singleton": False, + "satisfy": "all", + } + input_str = "after:1:2,afterok:3" + assert dependency_str_to_dict(input_str) == expected + + +def test_mail_types_int_to_list(): + expected = [] + assert mail_type_int_to_list(0) == expected + + +def test_acctg_profile_int_to_list(): + expected = [] + assert acctg_profile_int_to_list(0) == expected + + +def test_power_type_int_to_list(): + expected = [] + assert power_type_int_to_list(0) == expected + + +def test_cpu_freq_int_to_str(): + expected = None + assert cpu_freq_int_to_str(0) == expected diff --git a/tests/unit/test_job_steps.py b/tests/unit/test_job_steps.py new file mode 100644 index 00000000..10e6f9ff --- /dev/null +++ b/tests/unit/test_job_steps.py @@ -0,0 +1,24 @@ +"""test_job_steps.py - Unit test basic job step functionality.""" + +import pytest +from pyslurm import JobStep, Job +from pyslurm.core.job.step import ( + humanize_step_id, + dehumanize_step_id, +) + +def test_create_instance(): + step = JobStep(9999, 1) + assert step.id == 1 + assert step.job_id == 9999 + + job = Job(10000) + step2 = JobStep(job, 2) + assert step2.id == 2 + assert step2.job_id == 10000 + + +def test_parse_all(): + # Use the as_dict() function to test if parsing works for all + # properties on a simple JobStep without error. + JobStep(9999, 1).as_dict() diff --git a/tests/new_api/test_job_submit.py b/tests/unit/test_job_submit.py similarity index 93% rename from tests/new_api/test_job_submit.py rename to tests/unit/test_job_submit.py index c5c5039a..9be67f5f 100644 --- a/tests/new_api/test_job_submit.py +++ b/tests/unit/test_job_submit.py @@ -7,7 +7,7 @@ import tempfile import os from os import environ as pyenviron -from conftest import create_simple_job_desc, create_job_script +from util import create_simple_job_desc, create_job_script from pyslurm import ( Job, Jobs, @@ -230,26 +230,6 @@ def test_signal(): job._create_job_submit_desc() -def test_distribution(): - job = job_desc() - job._create_job_submit_desc() - - job.distribution = "cyclic:cyclic:cyclic" - job._create_job_submit_desc() - - job.distribution = {"nodes": "cyclic", "sockets": "block", "pack": True} - job._create_job_submit_desc() - - job.distribution = "*:*:fcyclic,NoPack" - job._create_job_submit_desc() - - job.distribution = 10 - job._create_job_submit_desc() - - job.distribution = {"plane": 20} - job._create_job_submit_desc() - - def test_setting_attrs_with_env_vars(): pyenviron["PYSLURM_JOBDESC_ACCOUNT"] = "account1" pyenviron["PYSLURM_JOBDESC_NAME"] = "jobname" diff --git a/tests/unit/test_node.py b/tests/unit/test_node.py new file mode 100644 index 00000000..170ff78c --- /dev/null +++ b/tests/unit/test_node.py @@ -0,0 +1,24 @@ +"""test_node.py - Test the node api functions.""" + +import pytest +import pyslurm +from pyslurm import Node, Nodes + + +def test_create_instance(): + node = Node("localhost") + assert node.name == "localhost" + + +def test_parse_all(): + Node("localhost").as_dict() + + +def test_create_nodes_collection(): + # TODO + assert True + + +def test_setting_attributes(): + # TODO + assert True diff --git a/tests/unit/test_task_dist.py b/tests/unit/test_task_dist.py new file mode 100644 index 00000000..4779ba22 --- /dev/null +++ b/tests/unit/test_task_dist.py @@ -0,0 +1,32 @@ +"""test_task_dist.py - Test task distribution functions.""" + +import pyslurm +from pyslurm.core.job.task_dist import TaskDistribution + + +def test_from_int(): + expected = None + assert TaskDistribution.from_int(0) == expected + + +def test_from_str(): + + input_str = "cyclic:cyclic:cyclic" + expected = TaskDistribution("cyclic", "cyclic", "cyclic") + parsed = TaskDistribution.from_str(input_str) + assert parsed == expected + assert parsed.to_str() == input_str + + input_str = "*:*:fcyclic,NoPack" + expected = TaskDistribution("*", "*", "fcyclic", False) + parsed = TaskDistribution.from_str(input_str) + assert parsed == expected + assert parsed.to_str() == "block:cyclic:fcyclic,NoPack" + + input_plane_size = 10 + expected = TaskDistribution(plane_size=input_plane_size) + parsed = TaskDistribution.from_str(f"plane={input_plane_size}") + assert parsed == expected + assert parsed.to_str() == "plane" + assert parsed.plane == 10 +# assert parsed.as_int() == pyslurm.SLURM_DIST_PLANE diff --git a/tests/new_api/conftest.py b/tests/unit/util.py similarity index 66% rename from tests/new_api/conftest.py rename to tests/unit/util.py index ad195fb9..f2db880b 100644 --- a/tests/new_api/conftest.py +++ b/tests/unit/util.py @@ -4,6 +4,8 @@ JobSubmitDescription, ) +# TODO: Figure out how to share this properly between the unit and integration +# folders def create_job_script(): job_script = """\ @@ -31,20 +33,3 @@ def create_simple_job_desc(script=None, **kwargs): job.time_limit = "1-00:00:00" return job - - -@pytest.fixture -def submit_job(): - - jobs = [] - def _job(script=None, **kwargs): - job_desc = create_simple_job_desc(script, **kwargs) - job = Job(job_desc.submit()) - - jobs.append(job) - return job - - yield _job - - for j in jobs: - j.cancel() From 292e7c026c8a52b105e0456a1f3d015f3cceafe8 Mon Sep 17 00:00:00 2001 From: Toni Harzendorf Date: Fri, 28 Apr 2023 19:10:27 +0200 Subject: [PATCH 22/28] wip --- pyslurm/core/common/cstr.pyx | 2 +- pyslurm/core/db/job.pxd | 6 +- pyslurm/core/db/job.pyx | 43 ++++++----- pyslurm/core/db/qos.pxd | 6 +- pyslurm/core/db/qos.pyx | 8 +- pyslurm/core/db/util.pxd | 17 ++--- pyslurm/core/db/util.pyx | 123 +++++++++++++++++++++---------- tests/integration/test_db_job.py | 80 ++++++++++++++++++++ tests/integration/util.py | 5 ++ tests/unit/db/test_db_job.py | 32 ++++++++ tests/unit/db/test_slurm_list.py | 90 ++++++++++++++++++++++ tests/unit/test_job.py | 5 +- 12 files changed, 343 insertions(+), 74 deletions(-) create mode 100644 tests/integration/test_db_job.py create mode 100644 tests/unit/db/test_db_job.py create mode 100644 tests/unit/db/test_slurm_list.py diff --git a/pyslurm/core/common/cstr.pyx b/pyslurm/core/common/cstr.pyx index 7f6fae60..bf9d2884 100644 --- a/pyslurm/core/common/cstr.pyx +++ b/pyslurm/core/common/cstr.pyx @@ -151,7 +151,7 @@ cpdef dict to_dict(char *str_dict, str delim1=",", str delim2="="): for kv in _str_dict.split(delim1): if delim2 in kv: - key, val = kv.split(delim2) + key, val = kv.split(delim2, 1) out[key] = val return out diff --git a/pyslurm/core/db/job.pxd b/pyslurm/core/db/job.pxd index 996f646d..a5ba9405 100644 --- a/pyslurm/core/db/job.pxd +++ b/pyslurm/core/db/job.pxd @@ -38,7 +38,11 @@ from pyslurm.slurm cimport ( slurm_job_state_string, slurm_job_reason_string, ) -from pyslurm.core.db.util cimport SlurmList, SlurmListItem +from pyslurm.core.db.util cimport ( + SlurmList, + SlurmListItem, + make_char_list, +) from pyslurm.core.db.step cimport JobStep, JobSteps from pyslurm.core.db.stats cimport JobStats from pyslurm.core.db.connection cimport Connection diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index 683b61ac..5554f5a0 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -139,17 +139,17 @@ cdef class JobSearchFilter: ptr.timelimit_min = u32(timestr_to_mins(self.timelimit), on_noval=0) ptr.timelimit_max = u32(timestr_to_mins(self.max_timelimit), on_noval=0) - SlurmList.to_char_list(&ptr.acct_list, self.accounts) - SlurmList.to_char_list(&ptr.associd_list, self.association_ids) - SlurmList.to_char_list(&ptr.cluster_list, self._parse_clusters()) - SlurmList.to_char_list(&ptr.constraint_list, self.constraints) - SlurmList.to_char_list(&ptr.jobname_list, self.names) - SlurmList.to_char_list(&ptr.groupid_list, self._parse_groups()) - SlurmList.to_char_list(&ptr.userid_list, self._parse_users()) - SlurmList.to_char_list(&ptr.wckey_list, self.wckeys) - SlurmList.to_char_list(&ptr.partition_list, self.partitions) - SlurmList.to_char_list(&ptr.qos_list, self._parse_qos()) - SlurmList.to_char_list(&ptr.state_list, self._parse_state()) + make_char_list(&ptr.acct_list, self.accounts) + make_char_list(&ptr.associd_list, self.association_ids) + make_char_list(&ptr.cluster_list, self._parse_clusters()) + make_char_list(&ptr.constraint_list, self.constraints) + make_char_list(&ptr.jobname_list, self.names) + make_char_list(&ptr.groupid_list, self._parse_groups()) + make_char_list(&ptr.userid_list, self._parse_users()) + make_char_list(&ptr.wckey_list, self.wckeys) + make_char_list(&ptr.partition_list, self.partitions) + make_char_list(&ptr.qos_list, self._parse_qos()) + make_char_list(&ptr.state_list, self._parse_state()) if self.nodelist: cstr.fmalloc(&ptr.used_nodes, @@ -228,7 +228,7 @@ cdef class Jobs(dict): jobs.db_conn = Connection.open() jobs.info = SlurmList.wrap(slurmdb_jobs_get(jobs.db_conn.ptr, cond.ptr)) - if jobs.info.is_null(): + if jobs.info.is_null: raise RPCError(msg="Failed to get Jobs from slurmdbd") qos_data = QualitiesOfService.load(name_is_key=False, @@ -287,7 +287,7 @@ cdef class Job: return wrap @staticmethod - def load(job_id): + def load(job_id, with_script=False, with_env=False): """Load the information for a specific Job from the Database. Args: @@ -301,8 +301,10 @@ cdef class Job: RPCError: If requesting the information for the database Job was not sucessful. """ - jobs = Jobs.load(ids=[int(job_id)]) - if not jobs or job_idid not in jobs: + jfilter = JobSearchFilter(ids=[int(job_id)], + with_script=with_script, with_env=with_env) + jobs = Jobs.load(jfilter) + if not jobs or job_id not in jobs: raise RPCError(msg=f"Job {job_id} does not exist") return jobs[job_id] @@ -325,12 +327,15 @@ cdef class Job: (dict): Database Job information as dict """ cdef dict out = instance_to_dict(self) - out["stats"] = self.stats.as_dict() - steps = out.pop("steps", {}) + if self.stats: + out["stats"] = self.stats.as_dict() + + steps = out.pop("steps", {}) out["steps"] = {} for step_id, step in steps.items(): out["steps"][step_id] = step.as_dict() + return out @property @@ -521,6 +526,10 @@ cdef class Job: def script(self): return cstr.to_unicode(self.ptr.script) + @property + def environment(self): + return cstr.to_dict(self.ptr.env, delim1="\n", delim2="=") + @property def start_time(self): return _raw_time(self.ptr.start) diff --git a/pyslurm/core/db/qos.pxd b/pyslurm/core/db/qos.pxd index 5ae8b8e3..00293a13 100644 --- a/pyslurm/core/db/qos.pxd +++ b/pyslurm/core/db/qos.pxd @@ -30,7 +30,11 @@ from pyslurm.slurm cimport ( slurm_preempt_mode_num, try_xmalloc, ) -from pyslurm.core.db.util cimport SlurmList, SlurmListItem +from pyslurm.core.db.util cimport ( + SlurmList, + SlurmListItem, + make_char_list, +) from pyslurm.core.db.connection cimport Connection from pyslurm.core.common cimport cstr diff --git a/pyslurm/core/db/qos.pyx b/pyslurm/core/db/qos.pyx index 9d6f7690..a81d1879 100644 --- a/pyslurm/core/db/qos.pyx +++ b/pyslurm/core/db/qos.pyx @@ -46,7 +46,7 @@ cdef class QualitiesOfService(dict): qos_dict.db_conn = Connection.open() if not conn else conn qos_dict.info = SlurmList.wrap(slurmdb_qos_get(qos_dict.db_conn.ptr, cond.ptr)) - if qos_dict.info.is_null(): + if qos_dict.info.is_null: raise RPCError(msg="Failed to get QoS data from slurmdbd") for qos_ptr in SlurmList.iter_and_pop(qos_dict.info): @@ -105,9 +105,9 @@ cdef class QualityOfServiceSearchFilter: self._alloc() cdef slurmdb_qos_cond_t *ptr = self.ptr - SlurmList.to_char_list(&ptr.name_list, self.names) - SlurmList.to_char_list(&ptr.id_list, self.ids) - SlurmList.to_char_list(&ptr.description_list, self.descriptions) + make_char_list(&ptr.name_list, self.names) + make_char_list(&ptr.id_list, self.ids) + make_char_list(&ptr.description_list, self.descriptions) ptr.preempt_mode = self._parse_preempt_modes() ptr.with_deleted = 1 if bool(self.with_deleted) else 0 diff --git a/pyslurm/core/db/util.pxd b/pyslurm/core/db/util.pxd index 42b48a6f..60894058 100644 --- a/pyslurm/core/db/util.pxd +++ b/pyslurm/core/db/util.pxd @@ -37,6 +37,9 @@ from pyslurm.slurm cimport ( slurm_xfree_ptr, ) +cdef slurm_list_to_pylist(List in_list) +cdef make_char_list(List *in_list, vals) + cdef class SlurmListItem: cdef void *data @@ -48,19 +51,15 @@ cdef class SlurmListItem: cdef class SlurmList: cdef: List info - int cnt ListIterator itr - int itr_cnt + + cdef readonly: owned + int itr_cnt + int cnt @staticmethod cdef SlurmList wrap(List, owned=*) @staticmethod - cdef SlurmList create(slurm.ListDelF delf) - - @staticmethod - cdef to_char_list(List *in_list, vals) - - @staticmethod - cdef to_str_pylist(List in_list) + cdef SlurmList create(slurm.ListDelF delf, owned=*) diff --git a/pyslurm/core/db/util.pyx b/pyslurm/core/db/util.pyx index 1a30cb82..a37ee7e5 100644 --- a/pyslurm/core/db/util.pyx +++ b/pyslurm/core/db/util.pyx @@ -21,6 +21,25 @@ # cython: language_level=3 +cdef make_char_list(List *in_list, vals): + if not in_list[0]: + return None + + # Make a new SlurmList wrapper with the values + cdef SlurmList slist = SlurmList(vals) + + # Make sure the previous list is deallocated + slurm_list_destroy(in_list[0]) + + # Assign the pointer from slist to in_list, and give up ownership of slist + in_list[0] = slist.info + slist.owned = False + + +cdef slurm_list_to_pylist(List in_list): + return SlurmList.wrap(in_list, owned=False).to_pylist() + + cdef class SlurmListItem: def __cinit__(self): @@ -32,6 +51,13 @@ cdef class SlurmListItem: wrap.data = item return wrap + @property + def has_data(self): + if self.data: + return True + else: + return False + cdef class SlurmList: """Convenience Wrapper around slurms List type""" @@ -42,15 +68,29 @@ cdef class SlurmList: self.cnt = 0 self.owned = True + def __init__(self, vals=None): + self.info = slurm_list_create(slurm_xfree_ptr) + self.append(vals) + def __dealloc__(self): - if self.owned: - if self.itr: - slurm_list_iterator_destroy(self.itr) + self._dealloc_itr() + self._dealloc_list() - if self.info: - slurm_list_destroy(self.info) + def _dealloc_list(self): + if self.info is not NULL and self.owned: + slurm_list_destroy(self.info) + self.cnt = 0 + self.info = NULL + + def _dealloc_itr(self): + if self.itr: + slurm_list_iterator_destroy(self.itr) + self.itr_cnt = 0 + self.itr = NULL def __iter__(self): + self._dealloc_itr() + self.itr = slurm_list_iterator_create(self.info) return self def __next__(self): @@ -58,72 +98,75 @@ cdef class SlurmList: self.itr_cnt += 1 return SlurmListItem.from_ptr(slurm_list_next(self.itr)) - slurm_list_iterator_reset(self.itr) - self.itr_cnt = 0 + self._dealloc_itr() raise StopIteration @staticmethod def iter_and_pop(SlurmList li): - cnt = 0 - while cnt < li.cnt: + while li.cnt > 0: yield SlurmListItem.from_ptr(slurm_list_pop(li.info)) - cnt += 1 + li.cnt -= 1 @staticmethod - cdef SlurmList create(slurm.ListDelF delfunc): + cdef SlurmList create(slurm.ListDelF delfunc, owned=True): cdef SlurmList wrapper = SlurmList.__new__(SlurmList) wrapper.info = slurm_list_create(delfunc) - wrapper.itr = slurm_list_iterator_create(wrapper.info) + wrapper.owned = owned return wrapper @staticmethod cdef SlurmList wrap(List li, owned=True): + cdef SlurmList wrapper = SlurmList.__new__(SlurmList) if not li: - raise ValueError("List is NULL") + return wrapper - cdef SlurmList wrapper = SlurmList.__new__(SlurmList) wrapper.info = li wrapper.cnt = slurm_list_count(li) - wrapper.itr = slurm_list_iterator_create(wrapper.info) wrapper.owned = owned return wrapper - @staticmethod - cdef to_str_pylist(List in_list): + def to_pylist(self): cdef: - ListIterator itr = slurm_list_iterator_create(in_list) - char* entry = NULL + SlurmListItem item list out = [] - for i in range(slurm_list_count(in_list)): - entry = slurm_list_next(itr) - pystr = cstr.to_unicode(entry) + for item in self: + if not item.has_data: + continue + + pystr = cstr.to_unicode(item.data) if pystr: - out.append(pystr) + out.append(int(pystr) if pystr.isdigit() else pystr) - slurm_list_iterator_destroy(itr) return out - @staticmethod - cdef to_char_list(List *in_list, vals): - cdef: - List li = in_list[0] - char *entry = NULL - - if in_list[0]: - slurm_list_destroy(li) - in_list[0] = NULL + def append(self, vals): + cdef char *entry = NULL if not vals: - in_list[0] = NULL + return None + + to_add = vals + if not isinstance(vals, list): + # If it is not a list, then anything that can't be casted to str + # will error below anyways + to_add = [vals] + + for val in to_add: + if val: + entry = NULL + cstr.fmalloc(&entry, str(val)) + slurm_list_append(self.info, entry) + self.cnt += 1 + + @property + def is_itr_null(self): + if not self.itr: + return True else: - in_list[0] = slurm_list_create(slurm_xfree_ptr) - for val in vals: - if val: - entry = NULL - cstr.fmalloc(&entry, str(val)) - slurm_list_append(in_list[0], entry) + return False + @property def is_null(self): if not self.info: return True diff --git a/tests/integration/test_db_job.py b/tests/integration/test_db_job.py new file mode 100644 index 00000000..650db25f --- /dev/null +++ b/tests/integration/test_db_job.py @@ -0,0 +1,80 @@ +"""test_db_job.py - Unit test database job api functionalities.""" + +import pytest +import pyslurm +import time +import util + + +# TODO: Instead of submitting new Jobs and waiting to test Database API +# functionality, we could just fill a slurm database with data on a host, then +# dump the slurm_acct_db to a SQL file and import it in the test environment +# before the integration tests are ran. +# Just a few Jobs and other stuff is enough to keep it small, so it could also +# be put in the repository and uploaded to github. + + +def test_load_single(submit_job): + job = submit_job() + util.wait() + db_job = pyslurm.db.Job.load(job.id) + + assert db_job.id == job.id + + with pytest.raises(pyslurm.RPCError): + pyslurm.db.Job.load(1000) + + +def test_parse_all(submit_job): + job = submit_job() + util.wait() + db_job = pyslurm.db.Job.load(job.id) + job_dict = db_job.as_dict() + + assert job_dict["stats"] + assert job_dict["steps"] + + +def test_modify(submit_job): + # TODO + pass + + +def test_if_steps_exist(submit_job): + # TODO + pass + + +def test_load_with_filter_node(submit_job): + # TODO + pass + + +def test_load_with_filter_qos(submit_job): + # TODO + pass + + +def test_load_with_filter_cluster(submit_job): + # TODO + pass + + +def test_load_with_filter_multiple(submit_job): + # TODO + pass + + +def test_load_with_script(submit_job): + script = util.create_job_script() + job = submit_job(script=script) + util.wait(5) + db_job = pyslurm.db.Job.load(job.id, with_script=True) + assert db_job.script == script + + +def test_load_with_env(submit_job): + job = submit_job() + util.wait(5) + db_job = pyslurm.db.Job.load(job.id, with_env=True) + assert db_job.environment diff --git a/tests/integration/util.py b/tests/integration/util.py index 370035d1..391bfc9f 100644 --- a/tests/integration/util.py +++ b/tests/integration/util.py @@ -3,6 +3,7 @@ Job, JobSubmitDescription, ) +import time # Horrendous, but works for now, because when testing against a real slurmctld # we need to wait a bit for state changes (i.e. we cancel a job and @@ -11,6 +12,10 @@ WAIT_SECS_SLURMCTLD = 3 +def wait(secs=WAIT_SECS_SLURMCTLD): + time.sleep(secs) + + def create_job_script(): job_script = """\ #!/bin/bash diff --git a/tests/unit/db/test_db_job.py b/tests/unit/db/test_db_job.py new file mode 100644 index 00000000..e259b137 --- /dev/null +++ b/tests/unit/db/test_db_job.py @@ -0,0 +1,32 @@ +"""test_db_job.py - Unit test basic database job functionalities.""" + +import pytest +import pyslurm + + +def test_search_filter(): + job_filter = pyslurm.db.JobSearchFilter() + + job_filter.clusters = ["test1"] + job_filter.partitions = ["partition1", "partition2"] + job_filter._create() + + job_filter.ids = [1000, 1001] + job_filter._create() + + job_filter.with_script = True + job_filter._create() + + job_filter.with_env = True + with pytest.raises(ValueError): + job_filter._create() + + +def test_collection_init(): + # TODO + assert True + + +def test_create_instance(): + job = pyslurm.db.Job(9999) + assert job.id == 9999 diff --git a/tests/unit/db/test_slurm_list.py b/tests/unit/db/test_slurm_list.py new file mode 100644 index 00000000..57a47837 --- /dev/null +++ b/tests/unit/db/test_slurm_list.py @@ -0,0 +1,90 @@ +"""test_slurm_List.py - Unit test basic Slurm list functionalities.""" + +import pytest +import pyslurm +from pyslurm.core.db.util import SlurmList + + +def test_create_and_destroy_list(): + slist = SlurmList() + assert not slist.is_null + + slist2 = SlurmList(["user1", "user2"]) + assert not slist.is_null + assert slist2.cnt == 2 + assert slist2.itr_cnt == 0 + assert slist2.is_itr_null + + slist2._dealloc_itr() + slist2._dealloc_list() + assert slist2.is_null + + +def test_append(): + slist = SlurmList() + input_list = ["user1", "user2", "user3"] + slist.append(input_list) + assert slist.cnt == len(input_list) + + input_str = "user4" + slist.append(input_str) + assert slist.cnt == 4 + + input_int = 10 + slist.append(input_int) + assert slist.cnt == 5 + + input_ignore_none = ["user6", None] + slist.append(input_ignore_none) + assert slist.cnt == 6 + + +def test_convert_to_pylist(): + input_list = ["user1", "user2", "user3"] + slist = SlurmList(input_list) + assert slist.cnt == 3 + assert slist.to_pylist() == input_list + + +def test_iter(): + input_list = ["user1", "user2", "user3"] + slist = SlurmList(input_list) + assert slist.itr_cnt == 0 + assert slist.is_itr_null + assert slist.cnt == 3 + + for idx, slurm_item in enumerate(slist): + assert not slist.is_itr_null + assert slurm_item.has_data + assert slist.itr_cnt == idx+1 + + assert slist.itr_cnt == 0 + assert slist.is_itr_null + + +def test_iter_and_pop(): + input_list = ["user1", "user2", "user3"] + slist = SlurmList(input_list) + assert slist.itr_cnt == 0 + assert slist.is_itr_null + assert slist.cnt == 3 + + for idx, slurm_item in enumerate(SlurmList.iter_and_pop(slist)): + assert slist.is_itr_null + assert slurm_item.has_data + + assert slist.cnt == 0 + assert slist.itr_cnt == 0 + assert slist.is_itr_null + + # Round 2 on existing object + slist.append(["user10", "user11"]) + assert slist.itr_cnt == 0 + assert slist.cnt == 2 + + for idx, slurm_item in enumerate(SlurmList.iter_and_pop(slist)): + assert slurm_item.has_data + + assert slist.cnt == 0 + assert slist.itr_cnt == 0 + assert slist.is_itr_null diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index aa2b47d4..e072ba4d 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -1,11 +1,14 @@ """test_job.py - Unit test basic job functionalities.""" -import time import pytest import pyslurm from pyslurm import Job from pyslurm.core.job.util import * +def test_create_instance(): + job = Job(9999) + assert job.id == 9999 + def test_parse_all(): # Use the as_dict() function to test if parsing works for all From 8d6c94d5d4c7ef9d7a8fb778fb766190f1f53a0a Mon Sep 17 00:00:00 2001 From: Toni Harzendorf Date: Sat, 29 Apr 2023 17:37:45 +0200 Subject: [PATCH 23/28] wip tests --- tests/integration/conftest.py | 21 +++++++ tests/integration/test_db_connection.py | 56 +++++++++++++++++++ tests/integration/test_db_job.py | 20 +++++++ tests/integration/test_db_qos.py | 55 ++++++++++++++++++ tests/integration/test_job.py | 20 +++++++ tests/integration/test_job_steps.py | 20 +++++++ tests/integration/test_job_submit.py | 22 ++++++++ tests/integration/test_node.py | 20 +++++++ tests/integration/util.py | 21 +++++++ tests/unit/db/test_db_job.py | 32 ----------- tests/unit/test_common.py | 20 +++++++ tests/unit/test_db_job.py | 52 +++++++++++++++++ tests/unit/test_db_qos.py | 49 ++++++++++++++++ ...st_slurm_list.py => test_db_slurm_list.py} | 48 +++++++++++++++- tests/unit/test_job.py | 20 +++++++ tests/unit/test_job_steps.py | 20 +++++++ tests/unit/test_job_submit.py | 20 +++++++ tests/unit/test_node.py | 22 +++++++- tests/unit/test_task_dist.py | 20 +++++++ tests/unit/util.py | 21 +++++++ 20 files changed, 544 insertions(+), 35 deletions(-) create mode 100644 tests/integration/test_db_connection.py create mode 100644 tests/integration/test_db_qos.py delete mode 100644 tests/unit/db/test_db_job.py create mode 100644 tests/unit/test_db_job.py create mode 100644 tests/unit/test_db_qos.py rename tests/unit/{db/test_slurm_list.py => test_db_slurm_list.py} (55%) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 124c4235..bf70149c 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,3 +1,24 @@ +######################################################################### +# conftest.py - pytest fixtures +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + import pytest from pyslurm import ( Job, diff --git a/tests/integration/test_db_connection.py b/tests/integration/test_db_connection.py new file mode 100644 index 00000000..876ec63d --- /dev/null +++ b/tests/integration/test_db_connection.py @@ -0,0 +1,56 @@ +######################################################################### +# test_db_connection.py - database connection api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_db_connection.py - Test database connectin api functionalities.""" + +import pytest +import pyslurm + + +def test_create_instance(): + with pytest.raises(RuntimeError): + pyslurm.db.Connection() + + +def test_open(): + conn = pyslurm.db.Connection.open() + assert conn.is_open + + +def test_close(): + conn = pyslurm.db.Connection.open() + assert conn.is_open + + conn.close() + assert not conn.is_open + # no-op + conn.close() + + +def test_commit(): + conn = pyslurm.db.Connection.open() + assert conn.is_open + conn.commit() + + +def test_rollback(): + conn = pyslurm.db.Connection.open() + assert conn.is_open + conn.rollback() diff --git a/tests/integration/test_db_job.py b/tests/integration/test_db_job.py index 650db25f..2c84ef4f 100644 --- a/tests/integration/test_db_job.py +++ b/tests/integration/test_db_job.py @@ -1,3 +1,23 @@ +######################################################################### +# test_db_job.py - database job api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """test_db_job.py - Unit test database job api functionalities.""" import pytest diff --git a/tests/integration/test_db_qos.py b/tests/integration/test_db_qos.py new file mode 100644 index 00000000..5bbd69e4 --- /dev/null +++ b/tests/integration/test_db_qos.py @@ -0,0 +1,55 @@ +######################################################################### +# test_db_qos.py - database qos api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_db_qos.py - Integration test database qos api functionalities.""" + +import pytest +import pyslurm +import time +import util + + +def test_load_single(): + qos = pyslurm.db.QualityOfService.load("normal") + + assert qos.name == "normal" + assert qos.id == 1 + + with pytest.raises(pyslurm.RPCError): + pyslurm.db.QualityOfService.load("qos_non_existent") + + +def test_parse_all(submit_job): + qos = pyslurm.db.QualityOfService.load("normal") + qos_dict = qos.as_dict() + + assert qos_dict + assert qos_dict["name"] == qos.name + + +def test_load_all(): + qos = pyslurm.db.QualitiesOfService.load() + assert qos + + +def test_load_with_filter_name(): + qfilter = pyslurm.db.QualityOfServiceSearchFilter(names=["non_existent"]) + qos = pyslurm.db.QualitiesOfService.load(qfilter) + assert not qos diff --git a/tests/integration/test_job.py b/tests/integration/test_job.py index b1c9efd8..15c4bdef 100644 --- a/tests/integration/test_job.py +++ b/tests/integration/test_job.py @@ -1,3 +1,23 @@ +######################################################################### +# test_job.py - job api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """test_job.py - Integration test job api functionalities.""" import time diff --git a/tests/integration/test_job_steps.py b/tests/integration/test_job_steps.py index 5a33b386..4ad2de39 100644 --- a/tests/integration/test_job_steps.py +++ b/tests/integration/test_job_steps.py @@ -1,3 +1,23 @@ +######################################################################### +# test_job_steps.py - job steps api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """test_job_steps.py - Test the job steps api functions.""" import pytest diff --git a/tests/integration/test_job_submit.py b/tests/integration/test_job_submit.py index 9a8d1f9e..d2f7c98b 100644 --- a/tests/integration/test_job_submit.py +++ b/tests/integration/test_job_submit.py @@ -1,3 +1,23 @@ +######################################################################### +# test_job_submit.py - job submit api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """test_job_submit.py - Test the job submit api functions.""" import time @@ -14,8 +34,10 @@ def test_submit_example1(): + # TODO assert True def test_submit_example2(): + # TODO assert True diff --git a/tests/integration/test_node.py b/tests/integration/test_node.py index 501cc90d..3e1306da 100644 --- a/tests/integration/test_node.py +++ b/tests/integration/test_node.py @@ -1,3 +1,23 @@ +######################################################################### +# test_node.py - node api integration tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """test_node.py - Test the node api functions.""" import sys diff --git a/tests/integration/util.py b/tests/integration/util.py index 391bfc9f..f5032f1a 100644 --- a/tests/integration/util.py +++ b/tests/integration/util.py @@ -1,3 +1,24 @@ +######################################################################### +# util.py - utility functions for tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + import pytest from pyslurm import ( Job, diff --git a/tests/unit/db/test_db_job.py b/tests/unit/db/test_db_job.py deleted file mode 100644 index e259b137..00000000 --- a/tests/unit/db/test_db_job.py +++ /dev/null @@ -1,32 +0,0 @@ -"""test_db_job.py - Unit test basic database job functionalities.""" - -import pytest -import pyslurm - - -def test_search_filter(): - job_filter = pyslurm.db.JobSearchFilter() - - job_filter.clusters = ["test1"] - job_filter.partitions = ["partition1", "partition2"] - job_filter._create() - - job_filter.ids = [1000, 1001] - job_filter._create() - - job_filter.with_script = True - job_filter._create() - - job_filter.with_env = True - with pytest.raises(ValueError): - job_filter._create() - - -def test_collection_init(): - # TODO - assert True - - -def test_create_instance(): - job = pyslurm.db.Job(9999) - assert job.id == 9999 diff --git a/tests/unit/test_common.py b/tests/unit/test_common.py index dddce37c..7875ad4d 100644 --- a/tests/unit/test_common.py +++ b/tests/unit/test_common.py @@ -1,3 +1,23 @@ +######################################################################### +# test_common.py - common utility tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """test_common.py - Test the most commonly used helper functions.""" import pyslurm diff --git a/tests/unit/test_db_job.py b/tests/unit/test_db_job.py new file mode 100644 index 00000000..43ea5227 --- /dev/null +++ b/tests/unit/test_db_job.py @@ -0,0 +1,52 @@ +######################################################################### +# test_db_job.py - database job unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_db_job.py - Unit test basic database job functionalities.""" + +import pytest +import pyslurm + + +def test_search_filter(): + job_filter = pyslurm.db.JobSearchFilter() + + job_filter.clusters = ["test1"] + job_filter.partitions = ["partition1", "partition2"] + job_filter._create() + + job_filter.ids = [1000, 1001] + job_filter._create() + + job_filter.with_script = True + job_filter._create() + + job_filter.with_env = True + with pytest.raises(ValueError): + job_filter._create() + + +def test_collection_init(): + # TODO + assert True + + +def test_create_instance(): + job = pyslurm.db.Job(9999) + assert job.id == 9999 diff --git a/tests/unit/test_db_qos.py b/tests/unit/test_db_qos.py new file mode 100644 index 00000000..acf12fea --- /dev/null +++ b/tests/unit/test_db_qos.py @@ -0,0 +1,49 @@ +######################################################################### +# test_db_qos.py - database qos unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_db_qos.py - Unit test basic database qos functionalities.""" + +import pytest +import pyslurm + + +def test_search_filter(): + qos_filter = pyslurm.db.QualityOfServiceSearchFilter() + qos_filter._create() + + qos_filter.ids = [1, 2] + qos_filter._create() + + qos_filter.preempt_modes = ["cluster"] + qos_filter._create() + + with pytest.raises(ValueError): + qos_filter.preempt_modes = ["invalid_preempt_mode"] + qos_filter._create() + + +def test_create_collection_instance(): + # TODO + assert True + + +def test_create_instance(): + qos = pyslurm.db.QualityOfService("test") + assert qos.name == "test" diff --git a/tests/unit/db/test_slurm_list.py b/tests/unit/test_db_slurm_list.py similarity index 55% rename from tests/unit/db/test_slurm_list.py rename to tests/unit/test_db_slurm_list.py index 57a47837..41df371c 100644 --- a/tests/unit/db/test_slurm_list.py +++ b/tests/unit/test_db_slurm_list.py @@ -1,4 +1,24 @@ -"""test_slurm_List.py - Unit test basic Slurm list functionalities.""" +######################################################################### +# test_db_slurm_list.py - Slurm list tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_db_slurm_List.py - Unit test basic Slurm list functionalities.""" import pytest import pyslurm @@ -51,6 +71,7 @@ def test_iter(): slist = SlurmList(input_list) assert slist.itr_cnt == 0 assert slist.is_itr_null + assert not slist.is_null assert slist.cnt == 3 for idx, slurm_item in enumerate(slist): @@ -61,6 +82,14 @@ def test_iter(): assert slist.itr_cnt == 0 assert slist.is_itr_null + slist._dealloc_list() + assert slist.is_null + assert slist.cnt == 0 + + for item in slist: + # Should not be possible to get here + assert False + def test_iter_and_pop(): input_list = ["user1", "user2", "user3"] @@ -82,9 +111,24 @@ def test_iter_and_pop(): assert slist.itr_cnt == 0 assert slist.cnt == 2 - for idx, slurm_item in enumerate(SlurmList.iter_and_pop(slist)): + for slurm_item in SlurmList.iter_and_pop(slist): assert slurm_item.has_data assert slist.cnt == 0 assert slist.itr_cnt == 0 assert slist.is_itr_null + + +def test_iter_and_pop_on_null_list(): + input_list = ["user1", "user2", "user3"] + slist = SlurmList(input_list) + assert not slist.is_null + assert slist.cnt == 3 + + slist._dealloc_list() + assert slist.is_null + assert slist.cnt == 0 + + for slurm_item in SlurmList.iter_and_pop(slist): + # Should not be possible to get here + assert False diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index e072ba4d..edcf65d4 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -1,3 +1,23 @@ +######################################################################### +# test_job.py - job unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """test_job.py - Unit test basic job functionalities.""" import pytest diff --git a/tests/unit/test_job_steps.py b/tests/unit/test_job_steps.py index 10e6f9ff..c222ef34 100644 --- a/tests/unit/test_job_steps.py +++ b/tests/unit/test_job_steps.py @@ -1,3 +1,23 @@ +######################################################################### +# test_job_steps.py - job steps unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """test_job_steps.py - Unit test basic job step functionality.""" import pytest diff --git a/tests/unit/test_job_submit.py b/tests/unit/test_job_submit.py index 9be67f5f..d0daf41b 100644 --- a/tests/unit/test_job_submit.py +++ b/tests/unit/test_job_submit.py @@ -1,3 +1,23 @@ +######################################################################### +# test_job_submit.py - job submission unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """test_job_submit.py - Test the job submit api functions.""" import sys diff --git a/tests/unit/test_node.py b/tests/unit/test_node.py index 170ff78c..2caf8d37 100644 --- a/tests/unit/test_node.py +++ b/tests/unit/test_node.py @@ -1,4 +1,24 @@ -"""test_node.py - Test the node api functions.""" +######################################################################### +# test_node.py - node unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""test_node.py - Unit Test basic functionality of the Node class.""" import pytest import pyslurm diff --git a/tests/unit/test_task_dist.py b/tests/unit/test_task_dist.py index 4779ba22..52a3e07c 100644 --- a/tests/unit/test_task_dist.py +++ b/tests/unit/test_task_dist.py @@ -1,3 +1,23 @@ +######################################################################### +# test_task_dist.py - task distribution unit tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """test_task_dist.py - Test task distribution functions.""" import pyslurm diff --git a/tests/unit/util.py b/tests/unit/util.py index f2db880b..d142a3a4 100644 --- a/tests/unit/util.py +++ b/tests/unit/util.py @@ -1,3 +1,24 @@ +######################################################################### +# util.py - utility functions for tests +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + import pytest from pyslurm import ( Job, From 9c48bbc679697043f3eeea08fa0b81a41f3401dd Mon Sep 17 00:00:00 2001 From: Toni Harzendorf Date: Sat, 29 Apr 2023 17:40:39 +0200 Subject: [PATCH 24/28] wip --- pyslurm/api.pxd | 11 ++++++----- pyslurm/api.pyx | 18 ++++++++---------- pyslurm/core/common/__init__.pxd | 12 +++++++----- pyslurm/core/common/__init__.pyx | 12 +++++++----- pyslurm/core/common/cstr.pxd | 8 +++++--- pyslurm/core/common/cstr.pyx | 8 +++++--- pyslurm/core/common/ctime.pxd | 12 +++++++----- pyslurm/core/common/ctime.pyx | 12 +++++++----- pyslurm/core/common/uint.pxd | 12 +++++++----- pyslurm/core/common/uint.pyx | 12 +++++++----- pyslurm/core/db/__init__.py | 22 ++++++++++++++++++++++ pyslurm/core/db/connection.pxd | 11 ++++++----- pyslurm/core/db/connection.pyx | 10 ++++++---- pyslurm/core/db/job.pxd | 8 +++++--- pyslurm/core/db/job.pyx | 15 +++++---------- pyslurm/core/db/qos.pxd | 8 +++++--- pyslurm/core/db/qos.pyx | 26 ++++++++++++++++++++------ pyslurm/core/db/stats.pxd | 8 +++++--- pyslurm/core/db/stats.pyx | 19 +++++++++++++------ pyslurm/core/db/step.pxd | 10 ++++++---- pyslurm/core/db/step.pyx | 13 ++++++++----- pyslurm/core/db/tres.pxd | 5 ++--- pyslurm/core/db/tres.pyx | 10 ++++++---- pyslurm/core/db/util.pxd | 4 ++-- pyslurm/core/db/util.pyx | 26 +++++++++++++++++++------- pyslurm/core/error.pyx | 8 +++++--- pyslurm/core/job/job.pxd | 8 +++++--- pyslurm/core/job/job.pyx | 8 +++++--- pyslurm/core/job/sbatch_opts.pyx | 12 +++++++----- pyslurm/core/job/step.pxd | 10 ++++++---- pyslurm/core/job/step.pyx | 10 ++++++---- pyslurm/core/job/submission.pxd | 12 +++++++----- pyslurm/core/job/submission.pyx | 8 +++++--- pyslurm/core/job/task_dist.pxd | 8 +++++--- pyslurm/core/job/task_dist.pyx | 8 +++++--- pyslurm/core/job/util.pyx | 8 +++++--- pyslurm/core/node.pxd | 12 +++++++----- pyslurm/core/node.pyx | 8 +++++--- pyslurm/core/slurmctld.pxd | 10 ++++++---- pyslurm/core/slurmctld.pyx | 10 ++++++---- 40 files changed, 281 insertions(+), 171 deletions(-) diff --git a/pyslurm/api.pxd b/pyslurm/api.pxd index 7afe0752..9b19ec9a 100644 --- a/pyslurm/api.pxd +++ b/pyslurm/api.pxd @@ -1,25 +1,26 @@ ######################################################################### # api.pxd - pyslurm core API ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 -# cython: embedsignature=True from pyslurm cimport slurm from pyslurm.core.common cimport cstr diff --git a/pyslurm/api.pyx b/pyslurm/api.pyx index 716943b5..0f34fedb 100644 --- a/pyslurm/api.pyx +++ b/pyslurm/api.pyx @@ -1,30 +1,30 @@ ######################################################################### # api.pyx - pyslurm core API ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 -# cython: embedsignature=True def slurm_init(config_path=None): - """ - Initialize the Slurm API. + """Initialize the Slurm API. This function must be called first before certain RPC functions can be executed. slurm_init is automatically called when the pyslurm module is @@ -39,7 +39,5 @@ def slurm_init(config_path=None): def slurm_fini(): - """ - Clean up data structures previously allocated through slurm_init. - """ + """Clean up data structures previously allocated through slurm_init.""" slurm.slurm_fini() diff --git a/pyslurm/core/common/__init__.pxd b/pyslurm/core/common/__init__.pxd index b4ea27fa..7915de2f 100644 --- a/pyslurm/core/common/__init__.pxd +++ b/pyslurm/core/common/__init__.pxd @@ -1,23 +1,25 @@ ######################################################################### # common/__init__.pxd - common/utility functions ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from pyslurm cimport slurm diff --git a/pyslurm/core/common/__init__.pyx b/pyslurm/core/common/__init__.pyx index 0bf4e7d2..e8461d95 100644 --- a/pyslurm/core/common/__init__.pyx +++ b/pyslurm/core/common/__init__.pyx @@ -1,23 +1,25 @@ ######################################################################### # common/__init__.pyx - common/utility functions ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from grp import getgrgid, getgrnam, getgrall diff --git a/pyslurm/core/common/cstr.pxd b/pyslurm/core/common/cstr.pxd index ca20d4a9..b1719bde 100644 --- a/pyslurm/core/common/cstr.pxd +++ b/pyslurm/core/common/cstr.pxd @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/common/cstr.pyx b/pyslurm/core/common/cstr.pyx index bf9d2884..7cd09d7f 100644 --- a/pyslurm/core/common/cstr.pyx +++ b/pyslurm/core/common/cstr.pyx @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/common/ctime.pxd b/pyslurm/core/common/ctime.pxd index ee0600cb..d8abb12d 100644 --- a/pyslurm/core/common/ctime.pxd +++ b/pyslurm/core/common/ctime.pxd @@ -1,23 +1,25 @@ ######################################################################### # ctime.pxd - wrappers around slurm time functions ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from pyslurm cimport slurm diff --git a/pyslurm/core/common/ctime.pyx b/pyslurm/core/common/ctime.pyx index faf5a7a1..fdf68834 100644 --- a/pyslurm/core/common/ctime.pyx +++ b/pyslurm/core/common/ctime.pyx @@ -1,23 +1,25 @@ ######################################################################### # ctime.pyx - wrappers around slurm time functions ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 import datetime diff --git a/pyslurm/core/common/uint.pxd b/pyslurm/core/common/uint.pxd index a18b5a72..0fd38739 100644 --- a/pyslurm/core/common/uint.pxd +++ b/pyslurm/core/common/uint.pxd @@ -1,23 +1,25 @@ ######################################################################### # common/uint.pxd - functions dealing with parsing uint types ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from pyslurm cimport slurm diff --git a/pyslurm/core/common/uint.pyx b/pyslurm/core/common/uint.pyx index b5ff3e1c..7418e109 100644 --- a/pyslurm/core/common/uint.pyx +++ b/pyslurm/core/common/uint.pyx @@ -1,23 +1,25 @@ ######################################################################### # common/uint.pyx - functions dealing with parsing uint types ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 diff --git a/pyslurm/core/db/__init__.py b/pyslurm/core/db/__init__.py index 60db7389..a742f72b 100644 --- a/pyslurm/core/db/__init__.py +++ b/pyslurm/core/db/__init__.py @@ -1,3 +1,25 @@ +######################################################################### +# db/__init__.py - database package __init__ file +######################################################################### +# Copyright (C) 2023 Toni Harzendorf +# +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# PySlurm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with PySlurm; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from pyslurm.core.db.connection import Connection from pyslurm.core.db.step import JobStep from pyslurm.core.db.job import ( Job, diff --git a/pyslurm/core/db/connection.pxd b/pyslurm/core/db/connection.pxd index 1a42f31f..6ac2dfc6 100644 --- a/pyslurm/core/db/connection.pxd +++ b/pyslurm/core/db/connection.pxd @@ -1,14 +1,16 @@ ######################################################################### -# connection.pyx - pyslurm slurmdbd database connection +# connection.pxd - pyslurm slurmdbd database connection ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. @@ -19,7 +21,6 @@ # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 -# cython: embedsignature=True from pyslurm cimport slurm from libc.stdint cimport uint16_t diff --git a/pyslurm/core/db/connection.pyx b/pyslurm/core/db/connection.pyx index d600921c..ff32dd92 100644 --- a/pyslurm/core/db/connection.pyx +++ b/pyslurm/core/db/connection.pyx @@ -1,20 +1,22 @@ ######################################################################### # connection.pyx - pyslurm slurmdbd database connection ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/db/job.pxd b/pyslurm/core/db/job.pxd index a5ba9405..2b220a05 100644 --- a/pyslurm/core/db/job.pxd +++ b/pyslurm/core/db/job.pxd @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index 5554f5a0..e7a53855 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default @@ -165,8 +167,6 @@ cdef class JobSearchFilter: if self.with_script: ptr.flags |= slurm.JOBCOND_FLAG_SCRIPT elif self.with_env: - # TODO: implement a new "envrironment" attribute in the job - # class ptr.flags |= slurm.JOBCOND_FLAG_ENV ptr.step_list = slurm_list_create(slurm_destroy_selected_step) @@ -216,8 +216,6 @@ cdef class Jobs(dict): JobSearchFilter cond SlurmListItem job_ptr QualitiesOfService qos_data - int cpu_tres_rec_count = 0 - int step_cpu_tres_rec_count = 0 if search_filter: cond = search_filter @@ -234,9 +232,6 @@ cdef class Jobs(dict): qos_data = QualitiesOfService.load(name_is_key=False, db_connection=jobs.db_conn) - # tres_alloc_str = cstr.to_unicode() - # cpu_tres_rec_count - # TODO: also get trackable resources with slurmdb_tres_get and store # it in each job instance. tres_alloc_str and tres_req_str only # contain the numeric tres ids, but it probably makes more sense to diff --git a/pyslurm/core/db/qos.pxd b/pyslurm/core/db/qos.pxd index 00293a13..3ba59dc6 100644 --- a/pyslurm/core/db/qos.pxd +++ b/pyslurm/core/db/qos.pxd @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/db/qos.pyx b/pyslurm/core/db/qos.pyx index a81d1879..bd5a35de 100644 --- a/pyslurm/core/db/qos.pyx +++ b/pyslurm/core/db/qos.pyx @@ -3,24 +3,29 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from pyslurm.core.error import RPCError +from pyslurm.core.common import ( + instance_to_dict, +) cdef class QualitiesOfService(dict): @@ -85,8 +90,8 @@ cdef class QualityOfServiceSearchFilter: if not self.preempt_modes: return 0 - if not isinstance(self.preempt_modes, list): - return int(self.preempt_modes) + if isinstance(self.preempt_modes, int): + return self.preempt_modes out = 0 for mode in self.preempt_modes: @@ -141,6 +146,14 @@ cdef class QualityOfService: wrap.ptr = in_ptr return wrap + def as_dict(self): + """Database QualityOfService information formatted as a dictionary. + + Returns: + (dict): Database QualityOfService information as dict + """ + return instance_to_dict(self) + @staticmethod def load(name): """Load the information for a specific Quality of Service. @@ -157,7 +170,8 @@ cdef class QualityOfService: RPCError: If requesting the information from the database was not sucessful. """ - qos_data = QualitiesOfService.load(names=[name]) + qfilter = QualityOfServiceSearchFilter(names=[name]) + qos_data = QualitiesOfService.load(qfilter) if not qos_data or name not in qos_data: raise RPCError(msg=f"QualityOfService {name} does not exist") diff --git a/pyslurm/core/db/stats.pxd b/pyslurm/core/db/stats.pxd index 52d43627..1f321ab2 100644 --- a/pyslurm/core/db/stats.pxd +++ b/pyslurm/core/db/stats.pxd @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/db/stats.pyx b/pyslurm/core/db/stats.pyx index 46ceef2a..bd6606a0 100644 --- a/pyslurm/core/db/stats.pyx +++ b/pyslurm/core/db/stats.pyx @@ -3,24 +3,29 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 -from pyslurm.core.common import nodelist_from_range_str, instance_to_dict +from pyslurm.core.common import ( + nodelist_from_range_str, + instance_to_dict, +) cdef class JobStats: @@ -71,7 +76,6 @@ cdef class JobStats: if ave_freq != slurm.NO_VAL: wrap.avg_cpu_frequency = ptr.act_cpufreq - # Convert to MiB instead of raw bytes? wrap.avg_disk_read = TrackableResources.find_count_in_str( ptr.tres_usage_in_ave, slurm.TRES_FS_DISK) wrap.avg_disk_write = TrackableResources.find_count_in_str( @@ -196,5 +200,8 @@ cdef class JobStats: elapsed = job.elapsed_time if job.elapsed_time else 0 cpus = job.cpus if job.cpus else 0 job_stats.elapsed_cpu_time = elapsed * cpus - job_stats.avg_cpu_frequency /= len(steps) + + step_count = len(steps) + if step_count: + job_stats.avg_cpu_frequency /= step_count diff --git a/pyslurm/core/db/step.pxd b/pyslurm/core/db/step.pxd index 97ef9b50..77d45cd2 100644 --- a/pyslurm/core/db/step.pxd +++ b/pyslurm/core/db/step.pxd @@ -1,20 +1,22 @@ ######################################################################### # step.pxd - pyslurm slurmdbd step api ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/db/step.pyx b/pyslurm/core/db/step.pyx index 6463ca8e..aa1bd612 100644 --- a/pyslurm/core/db/step.pyx +++ b/pyslurm/core/db/step.pyx @@ -1,20 +1,22 @@ ######################################################################### # step.pyx - pyslurm slurmdbd step api ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default @@ -40,7 +42,8 @@ cdef class JobStep: self.ptr = NULL def __init__(self): - raise RuntimeError("You can not instantiate this class directly") + raise RuntimeError("You can not instantiate this class directly " + " at the moment") def __dealloc__(self): slurmdb_destroy_step_rec(self.ptr) diff --git a/pyslurm/core/db/tres.pxd b/pyslurm/core/db/tres.pxd index 82fb43ee..f08bb3df 100644 --- a/pyslurm/core/db/tres.pxd +++ b/pyslurm/core/db/tres.pxd @@ -3,12 +3,12 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. @@ -19,7 +19,6 @@ # # cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 -# cython: embedsignature=True from pyslurm cimport slurm from pyslurm.core.common cimport cstr diff --git a/pyslurm/core/db/tres.pyx b/pyslurm/core/db/tres.pyx index 785d6dd7..1e77994b 100644 --- a/pyslurm/core/db/tres.pyx +++ b/pyslurm/core/db/tres.pyx @@ -1,20 +1,22 @@ ######################################################################### # tres.pyx - pyslurm slurmdbd tres api ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/db/util.pxd b/pyslurm/core/db/util.pxd index 60894058..deb71ed4 100644 --- a/pyslurm/core/db/util.pxd +++ b/pyslurm/core/db/util.pxd @@ -3,12 +3,12 @@ ######################################################################### # Copyright (C) 2022 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. diff --git a/pyslurm/core/db/util.pyx b/pyslurm/core/db/util.pyx index a37ee7e5..70978dbe 100644 --- a/pyslurm/core/db/util.pyx +++ b/pyslurm/core/db/util.pyx @@ -1,20 +1,22 @@ ######################################################################### -# util.pxd - pyslurm slurmdbd util functions +# util.pyx - pyslurm slurmdbd util functions ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default @@ -22,14 +24,15 @@ cdef make_char_list(List *in_list, vals): - if not in_list[0]: + if not vals: return None # Make a new SlurmList wrapper with the values cdef SlurmList slist = SlurmList(vals) # Make sure the previous list is deallocated - slurm_list_destroy(in_list[0]) + if in_list[0]: + slurm_list_destroy(in_list[0]) # Assign the pointer from slist to in_list, and give up ownership of slist in_list[0] = slist.info @@ -58,6 +61,10 @@ cdef class SlurmListItem: else: return False + def to_str(self): + cdef char* entry = self.data + return cstr.to_unicode(entry) + cdef class SlurmList: """Convenience Wrapper around slurms List type""" @@ -90,10 +97,15 @@ cdef class SlurmList: def __iter__(self): self._dealloc_itr() - self.itr = slurm_list_iterator_create(self.info) + if not self.is_null: + self.itr = slurm_list_iterator_create(self.info) + return self def __next__(self): + if self.is_null or self.is_itr_null: + raise StopIteration + if self.itr_cnt < self.cnt: self.itr_cnt += 1 return SlurmListItem.from_ptr(slurm_list_next(self.itr)) diff --git a/pyslurm/core/error.pyx b/pyslurm/core/error.pyx index 4ba24277..69130abd 100644 --- a/pyslurm/core/error.pyx +++ b/pyslurm/core/error.pyx @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2022 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/job/job.pxd b/pyslurm/core/job/job.pxd index bcb3218f..c41c8ced 100644 --- a/pyslurm/core/job/job.pxd +++ b/pyslurm/core/job/job.pxd @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx index 4c8b1188..83902e7b 100644 --- a/pyslurm/core/job/job.pyx +++ b/pyslurm/core/job/job.pyx @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/job/sbatch_opts.pyx b/pyslurm/core/job/sbatch_opts.pyx index 9f0495cd..9af607be 100644 --- a/pyslurm/core/job/sbatch_opts.pyx +++ b/pyslurm/core/job/sbatch_opts.pyx @@ -1,23 +1,25 @@ ######################################################################### # sbatch_opt.pyx - utilities to parse #SBATCH options ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 import re diff --git a/pyslurm/core/job/step.pxd b/pyslurm/core/job/step.pxd index ae0101c9..4cdd6c49 100644 --- a/pyslurm/core/job/step.pxd +++ b/pyslurm/core/job/step.pxd @@ -3,21 +3,23 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t diff --git a/pyslurm/core/job/step.pyx b/pyslurm/core/job/step.pyx index 88d795e9..d84330b1 100644 --- a/pyslurm/core/job/step.pyx +++ b/pyslurm/core/job/step.pyx @@ -3,21 +3,23 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from libc.string cimport memcpy, memset diff --git a/pyslurm/core/job/submission.pxd b/pyslurm/core/job/submission.pxd index f10a24d4..bddabb77 100644 --- a/pyslurm/core/job/submission.pxd +++ b/pyslurm/core/job/submission.pxd @@ -1,23 +1,25 @@ ######################################################################### # submission.pxd - interface for submitting slurm jobs ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from pyslurm cimport slurm diff --git a/pyslurm/core/job/submission.pyx b/pyslurm/core/job/submission.pyx index d4a19743..0803e0bf 100644 --- a/pyslurm/core/job/submission.pyx +++ b/pyslurm/core/job/submission.pyx @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/job/task_dist.pxd b/pyslurm/core/job/task_dist.pxd index 27fa8626..5fe76488 100644 --- a/pyslurm/core/job/task_dist.pxd +++ b/pyslurm/core/job/task_dist.pxd @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/job/task_dist.pyx b/pyslurm/core/job/task_dist.pyx index f1da4ae7..0c46cbc8 100644 --- a/pyslurm/core/job/task_dist.pyx +++ b/pyslurm/core/job/task_dist.pyx @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/job/util.pyx b/pyslurm/core/job/util.pyx index 43dc8489..404869a4 100644 --- a/pyslurm/core/job/util.pyx +++ b/pyslurm/core/job/util.pyx @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/node.pxd b/pyslurm/core/node.pxd index 04568227..3f39ece7 100644 --- a/pyslurm/core/node.pxd +++ b/pyslurm/core/node.pxd @@ -1,23 +1,25 @@ ######################################################################### # node.pxd - interface to work with nodes in slurm ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# cython: c_string_type=unicode, c_string_encoding=utf8 +# cython: c_string_type=unicode, c_string_encoding=default # cython: language_level=3 from libc.string cimport memcpy, memset diff --git a/pyslurm/core/node.pyx b/pyslurm/core/node.pyx index d62df45c..9419f0c8 100644 --- a/pyslurm/core/node.pyx +++ b/pyslurm/core/node.pyx @@ -3,18 +3,20 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/slurmctld.pxd b/pyslurm/core/slurmctld.pxd index f93a600d..f65655c8 100644 --- a/pyslurm/core/slurmctld.pxd +++ b/pyslurm/core/slurmctld.pxd @@ -1,20 +1,22 @@ ######################################################################### # slurmctld.pxd - pyslurm slurmctld api ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default diff --git a/pyslurm/core/slurmctld.pyx b/pyslurm/core/slurmctld.pyx index f6a4559b..2b5367c5 100644 --- a/pyslurm/core/slurmctld.pyx +++ b/pyslurm/core/slurmctld.pyx @@ -1,20 +1,22 @@ ######################################################################### # slurmctld.pyx - pyslurm slurmctld api ######################################################################### -# Copyright (C) 2022 Toni Harzendorf +# Copyright (C) 2023 Toni Harzendorf # -# Pyslurm is free software; you can redistribute it and/or modify +# This file is part of PySlurm +# +# PySlurm is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# Pyslurm is distributed in the hope that it will be useful, +# PySlurm is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., +# with PySlurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # cython: c_string_type=unicode, c_string_encoding=default From cc09454cc4afbbc186645fd289166605232e5749 Mon Sep 17 00:00:00 2001 From: Toni Harzendorf Date: Sun, 30 Apr 2023 16:54:43 +0200 Subject: [PATCH 25/28] wip --- pyslurm/core/db/job.pyx | 6 +- pyslurm/core/job/job.pyx | 53 ++++- pyslurm/core/job/submission.pyx | 7 +- pyslurm/slurm/SLURM_DISCLAIMER | 159 +++++++++++++ pyslurm/slurm/SLURM_LICENSE | 389 ++++++++++++++++++++++++++++++++ 5 files changed, 605 insertions(+), 9 deletions(-) create mode 100644 pyslurm/slurm/SLURM_DISCLAIMER create mode 100644 pyslurm/slurm/SLURM_LICENSE diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index e7a53855..ca0fec33 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -102,9 +102,9 @@ cdef class JobSearchFilter: uid_list = [] for user in self.users: - if isinstance(user, int): - uid_list.append(user) - else: + if not isinstance(user, list): + uid_list.append(int(user)) + elif user: uid_list.append(user_to_uid(user)) return uid_list diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx index 83902e7b..3521caf6 100644 --- a/pyslurm/core/job/job.pyx +++ b/pyslurm/core/job/job.pyx @@ -3,6 +3,12 @@ ######################################################################### # Copyright (C) 2023 Toni Harzendorf # +# Note: Some functions in this File are annotated with additional Copyright +# notices. These functions are: +# +# - get_batch_script +# - get_resource_layout_per_node +# # This file is part of PySlurm # # PySlurm is free software; you can redistribute it and/or modify @@ -552,10 +558,29 @@ cdef class Job: >>> from pyslurm import Job >>> script = Job(9999).get_batch_script() """ - # This reimplements the slurm_job_batch_script API call. Otherwise we - # would have to parse back the FILE* ptr we get from it back into a - # char* which would be a bit silly. - # Source: https://github.com/SchedMD/slurm/blob/7162f15af8deaf02c3bbf940d59e818cdeb5c69d/src/api/job_info.c#L1319 + # The code for this function was taken from here: + # https://github.com/SchedMD/slurm/blob/7162f15af8deaf02c3bbf940d59e818cdeb5c69d/src/api/job_info.c#L1319 + # and therefore reimplements the slurm_job_batch_script API call, with + # slight modifications (e.g. Cython syntax). Otherwise we would have + # to parse the FILE* ptr we get from it back into a char* which + # would be a bit silly. + # + # The copyright notices for the file this function was taken from is + # included below: + # + # Portions Copyright (C) 2010-2017 SchedMD LLC . + # Copyright (C) 2002-2007 The Regents of the University of California. + # Copyright (C) 2008-2010 Lawrence Livermore National Security. + # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + # Written by Morris Jette et. al. + # CODE-OCEC-09-009. All rights reserved. + # + # Slurm is licensed under the GNU General Public License. For the full + # text of Slurm's License, please see here: + # pyslurm/slurm/SLURM_LICENSE + # + # Please, as mentioned above, also have a look at Slurm's DISCLAIMER + # under pyslurm/slurm/SLURM_DISCLAIMER cdef: job_id_msg_t msg slurm_msg_t req @@ -1212,6 +1237,26 @@ cdef class Job: Returns: (dict): Resource layout """ + # The code for this function is a modified reimplementation from here: + # https://github.com/SchedMD/slurm/blob/d525b6872a106d32916b33a8738f12510ec7cf04/src/api/job_info.c#L739 + # + # The copyright notices for the file that contains the original code + # is below: + # + # Portions Copyright (C) 2010-2017 SchedMD LLC . + # Copyright (C) 2002-2007 The Regents of the University of California. + # Copyright (C) 2008-2010 Lawrence Livermore National Security. + # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + # Written by Morris Jette et. al. + # CODE-OCEC-09-009. All rights reserved. + # + # Slurm is licensed under the GNU General Public License. For the full + # text of Slurm's License, please see here: + # pyslurm/slurm/SLURM_LICENSE + # + # Please, as mentioned above, also have a look at Slurm's DISCLAIMER + # under pyslurm/slurm/SLURM_DISCLAIMER + # # TODO: Explain the structure of the return value a bit more. cdef: slurm.job_resources *resources = self.ptr.job_resrcs diff --git a/pyslurm/core/job/submission.pyx b/pyslurm/core/job/submission.pyx index 0803e0bf..1f09e898 100644 --- a/pyslurm/core/job/submission.pyx +++ b/pyslurm/core/job/submission.pyx @@ -163,6 +163,11 @@ cdef class JobSubmitDescription: if self.site_factor: ptr.site_factor = slurm.NICE_OFFSET + int(self.site_factor) + if self.uid is not None: + ptr.user_id = user_to_uid(self.uid) + if self.gid is not None: + ptr.group_id = group_to_gid(self.gid) + cstr.fmalloc(&ptr.name, self.name) cstr.fmalloc(&ptr.account, self.account) cstr.fmalloc(&ptr.wckey, self.wckey) @@ -205,8 +210,6 @@ cdef class JobSubmitDescription: ptr.time_limit = timestr_to_mins(self.time_limit) ptr.time_min = timestr_to_mins(self.time_limit_min) - ptr.user_id = user_to_uid(self.uid) - ptr.group_id = group_to_gid(self.gid) ptr.priority = u32(self.priority, zero_is_noval=False) ptr.num_tasks = u32(self.ntasks) ptr.pn_min_tmp_disk = u32(dehumanize(self.temporary_disk_per_node)) diff --git a/pyslurm/slurm/SLURM_DISCLAIMER b/pyslurm/slurm/SLURM_DISCLAIMER new file mode 100644 index 00000000..5fb615d5 --- /dev/null +++ b/pyslurm/slurm/SLURM_DISCLAIMER @@ -0,0 +1,159 @@ +Slurm was produced at Lawrence Livermore National Laboratory in collaboration +with various organizations. + +Copyright (C) 2012-2013 Los Alamos National Security, LLC. +Copyright (C) 2011 Trinity Centre for High Performance Computing +Copyright (C) 2010-2015 SchedMD LLC +Copyright (C) 2009-2013 CEA/DAM/DIF +Copyright (C) 2009-2011 Centro Svizzero di Calcolo Scientifico (CSCS) +Copyright (C) 2008-2011 Lawrence Livermore National Security +Copyright (C) 2008 Vijay Ramasubramanian +Copyright (C) 2007-2008 Red Hat, Inc. +Copyright (C) 2007-2013 National University of Defense Technology, China +Copyright (C) 2007-2015 Bull +Copyright (C) 2005-2008 Hewlett-Packard Development Company, L.P. +Copyright (C) 2004-2009, Marcus Holland-Moritz +Copyright (C) 2002-2007 The Regents of the University of California +Copyright (C) 2002-2003 Linux NetworX +Copyright (C) 2002 University of Chicago +Copyright (C) 2001, Paul Marquess +Copyright (C) 2000 Markus Friedl +Copyright (C) 1999, Kenneth Albanowski +Copyright (C) 1998 Todd C. Miller +Copyright (C) 1996-2003 Maximum Entropy Data Consultants Ltd, +Copyright (C) 1995 Tatu Ylonen , Espoo, Finland +Copyright (C) 1989-1994, 1996-1999, 2001 Free Software Foundation, Inc. +Many other organizations contributed code and/or documentation without +including a copyright notice. + +Written by: +Amjad Majid Ali (Colorado State University) +Par Andersson (National Supercomputer Centre, Sweden) +Don Albert (Bull) +Ernest Artiaga (Barcelona Supercomputer Center, Spain) +Danny Auble (LLNL, SchedMD LLC) +Susanne Balle (HP) +Anton Blanchard (Samba) +Janne Blomqvist (Aalto University, Finland) +David Bremer (LLNL) +Jon Bringhurst (LANL) +Bill Brophy (Bull) +Hongjia Cao (National University of Defense Techonogy, China) +Daniel Christians (HP) +Gilles Civario (Bull) +Chuck Clouston (Bull) +Joseph Donaghy (LLNL) +Chris Dunlap (LLNL) +Joey Ekstrom (LLNL/Bringham Young University) +Josh England (TGS Management Corporation) +Kent Engstrom (National Supercomputer Centre, Sweden) +Jim Garlick (LLNL) +Didier Gazen (Laboratoire d'Aerologie, France) +Raphael Geissert (Debian) +Yiannis Georgiou (Bull) +Andriy Grytsenko (Massive Solutions Limited, Ukraine) +Mark Grondona (LLNL) +Takao Hatazaki (HP, Japan) +Matthieu Hautreux (CEA, France) +Chris Holmes (HP) +David Hoppner +Nathan Huff (North Dakota State University) +David Jackson (Adaptive Computing) +Morris Jette (LLNL, SchedMD LLC) +Klaus Joas (University Karlsruhe, Germany) +Greg Johnson (LANL) +Jason King (LLNL) +Aaron Knister (Environmental Protection Agency) +Nancy Kritkausky (Bull) +Roman Kurakin (Institute of Natural Science and Ecology, Russia) +Eric Lin (Bull) +Don Lipari (LLNL) +Puenlap Lee (Bull) +Dennis Leepow +Bernard Li (Genome Sciences Centre, Canada) +Donald Lipari (LLNL) +Steven McDougall (SiCortex) +Donna Mecozzi (LLNL) +Bjorn-Helge Mevik (University of Oslo, Norway) +Chris Morrone (LLNL) +Pere Munt (Barcelona Supercomputer Center, Spain) +Michal Novotny (Masaryk University, Czech Republic) +Bryan O'Sullivan (Pathscale) +Gennaro Oliva (Institute of High Performance Computing and Networking, Italy) +Alejandro Lucero Palau (Barcelona Supercomputer Center, Spain) +Daniel Palermo (HP) +Dan Phung (LLNL/Columbia University) +Ashley Pittman (Quadrics, UK) +Vijay Ramasubramanian (University of Maryland) +Krishnakumar Ravi[KK] (HP) +Petter Reinholdtsen (University of Oslo, Norway) +Gerrit Renker (Swiss National Computer Centre) +Andy Riebs (HP) +Asier Roa (Barcelona Supercomputer Center, Spain) +Miguel Ros (Barcelona Supercomputer Center, Spain) +Beat Rubischon (DALCO AG, Switzerland) +Dan Rusak (Bull) +Eygene Ryabinkin (Kurchatov Institute, Russia) +Federico Sacerdoti (D.E. Shaw) +Rod Schultz (Bull) +Tyler Strickland (University of Florida) +Jeff Squyres (LAM MPI) +Prashanth Tamraparni (HP, India) +Jimmy Tang (Trinity College, Ireland) +Kevin Tew (LLNL/Bringham Young University) +Adam Todorski (Rensselaer Polytechnic Institute) +Nathan Weeks (Iowa State University) +Tim Wickberg (Rensselaer Polytechnic Institute) +Ramiro Brito Willmersdorf (Universidade Federal de Pemambuco, Brazil) +Jay Windley (Linux NetworX) +Anne-Marie Wunderlin (Bull) + +CODE-OCEC-09-009. All rights reserved. + +This file is part of Slurm, a resource management program. +For details, see . +Please also read the supplied file: DISCLAIMER. + +Slurm is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) +any later version. + +Slurm is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along +with Slurm; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +OUR NOTICE AND TERMS OF AND CONDITIONS OF THE GNU GENERAL PUBLIC LICENSE + +Our Preamble Notice + +Auspices + +This work performed under the auspices of the U.S. Department of Energy by +Lawrence Livermore National Laboratory under Contract DE-AC52-07NA27344. + +Disclaimer + +This work was sponsored by an agency of the United States government. +Neither the United States Government nor Lawrence Livermore National +Security, LLC, nor any of their employees, makes any warranty, express +or implied, or assumes any liability or responsibility for the accuracy, +completeness, or usefulness of any information, apparatus, product, or +process disclosed, or represents that its use would not infringe privately +owned rights. References herein to any specific commercial products, process, +or services by trade names, trademark, manufacturer or otherwise does not +necessarily constitute or imply its endorsement, recommendation, or +favoring by the United States Government or the Lawrence Livermore National +Security, LLC. The views and opinions of authors expressed herein do not +necessarily state or reflect those of the United States government or +Lawrence Livermore National Security, LLC, and shall not be used for +advertising or product endorsement purposes. + +The precise terms and conditions for copying, distribution and modification +is provided in the file named "COPYING" in this directory. diff --git a/pyslurm/slurm/SLURM_LICENSE b/pyslurm/slurm/SLURM_LICENSE new file mode 100644 index 00000000..0fd4db48 --- /dev/null +++ b/pyslurm/slurm/SLURM_LICENSE @@ -0,0 +1,389 @@ + SLURM LICENSE AGREEMENT + +All Slurm code and documentation is available under the GNU General Public +License. Some tools in the "contribs" directory have other licenses. See +the documentation for individual contributed tools for details. + +In addition, as a special exception, the copyright holders give permission +to link the code of portions of this program with the OpenSSL library under +certain conditions as described in each individual source file, and distribute +linked combinations including the two. You must obey the GNU General Public +License in all respects for all of the code used other than OpenSSL. If you +modify file(s) with this exception, you may extend this exception to your +version of the file(s), but you are not obligated to do so. If you do not +wish to do so, delete this exception statement from your version. If you +delete this exception statement from all source files in the program, then +also delete it here. + +NO WARRANTY: Because the program is licensed free of charge, there is no +warranty for the program. See section 11 below for full details. + +============================================================================= + +OUR NOTICE AND TERMS OF AND CONDITIONS OF THE GNU GENERAL PUBLIC LICENSE + +Auspices + +Portions of this work were performed under the auspices of the U.S. Department +of Energy by Lawrence Livermore National Laboratory under Contract +DE-AC52-07NA27344. + +Disclaimer + +This work was sponsored by an agency of the United States government. +Neither the United States Government nor Lawrence Livermore National +Security, LLC, nor any of their employees, makes any warranty, express +or implied, or assumes any liability or responsibility for the accuracy, +completeness, or usefulness of any information, apparatus, product, or +process disclosed, or represents that its use would not infringe privately +owned rights. References herein to any specific commercial products, process, +or services by trade names, trademark, manufacturer or otherwise does not +necessarily constitute or imply its endorsement, recommendation, or +favoring by the United States Government or the Lawrence Livermore National +Security, LLC. The views and opinions of authors expressed herein do not +necessarily state or reflect those of the United States government or +Lawrence Livermore National Security, LLC, and shall not be used for +advertising or product endorsement purposes. + +============================================================================= + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. From 64958379b80007f4da644acd18c295055e7c5e65 Mon Sep 17 00:00:00 2001 From: Toni Harzendorf Date: Sun, 30 Apr 2023 16:58:17 +0200 Subject: [PATCH 26/28] remove unused file --- pyslurm/slurm/other.pxi | 67 ----------------------------------------- 1 file changed, 67 deletions(-) delete mode 100644 pyslurm/slurm/other.pxi diff --git a/pyslurm/slurm/other.pxi b/pyslurm/slurm/other.pxi deleted file mode 100644 index 79d212f2..00000000 --- a/pyslurm/slurm/other.pxi +++ /dev/null @@ -1,67 +0,0 @@ -# Global Environment -cdef extern char **environ - -# -# Slurm Memory routines -# - -cdef extern void slurm_xfree (void **) -cdef extern void *slurm_xcalloc(size_t, size_t, bool, bool, const char *, int, const char *) - -cdef inline xfree(void **item): - slurm_xfree(item) - -cdef inline void *xmalloc(size_t size): - return slurm_xcalloc(1, size, True, False, __FILE__, __LINE__, __FUNCTION__) - -cdef inline void *try_xmalloc(size_t size): - return slurm_xcalloc(1, size, True, True, __FILE__, __LINE__, __FUNCTION__) - -cdef inline void xfree_ptr(void *ptr): - slurm_xfree(&ptr) - -# -# Slurm xstring functions -# - -cdef extern char *slurm_xstrdup(const char *str) - - -# -# Slurm time functions -# - - -cdef extern void slurm_secs2time_str(time_t time, char *string, int size) -cdef extern void slurm_mins2time_str(time_t time, char *string, int size) -cdef extern int slurm_time_str2mins(const char *string) -cdef extern int slurm_time_str2secs(const char *string) -cdef extern void slurm_make_time_str(time_t *time, char *string, int size) -cdef extern time_t slurm_parse_time(char *time_str, int past) - -# -# Slurm Job functions -# - - -cdef extern void slurm_free_job_desc_msg(job_desc_msg_t *msg) -cdef extern void slurm_free_job_info(job_info_t *job) -cdef extern void slurm_free_job_info_members(job_info_t *job) -cdef extern void slurm_free_job_step_info_response_msg(job_step_info_response_msg_t *msg) -cdef extern void slurm_free_job_step_info_members(job_step_info_t *msg) -cdef extern char *slurm_job_state_string(uint16_t inx) -cdef extern char *slurm_job_reason_string(int inx) -cdef extern char *slurm_job_share_string(uint16_t shared) - -# -# Slurm environment functions -# - -cdef extern void slurm_env_array_merge(char ***dest_array, const char **src_array) -cdef extern char **slurm_env_array_create() -cdef extern int slurm_env_array_overwrite(char ***array_ptr, const char *name, const char *value) -cdef extern void slurm_env_array_free(char **env_array) -# cdef extern void slurm_env_array_merge_slurm(char ***dest_array, const char **src_array) - - -cdef extern int slurm_select_fini() From 4dbd620b535348510a93b7f6f317f7bf64411bc1 Mon Sep 17 00:00:00 2001 From: Toni Harzendorf Date: Mon, 1 May 2023 13:00:32 +0200 Subject: [PATCH 27/28] wip --- pyslurm/core/common/__init__.pyx | 2 +- pyslurm/core/common/cstr.pyx | 13 +------------ pyslurm/core/db/job.pyx | 7 ++++--- pyslurm/core/db/util.pyx | 2 ++ pyslurm/core/job/job.pyx | 8 ++++---- pyslurm/core/job/sbatch_opts.pyx | 4 ++-- pyslurm/core/job/submission.pxd | 8 ++++---- pyslurm/core/job/submission.pyx | 8 ++++---- pyslurm/core/job/util.pyx | 1 - pyslurm/core/node.pyx | 20 ++++++++++---------- tests/unit/test_common.py | 2 +- 11 files changed, 33 insertions(+), 42 deletions(-) diff --git a/pyslurm/core/common/__init__.pyx b/pyslurm/core/common/__init__.pyx index e8461d95..6ad5ae47 100644 --- a/pyslurm/core/common/__init__.pyx +++ b/pyslurm/core/common/__init__.pyx @@ -178,7 +178,7 @@ def nodelist_from_range_str(nodelist): hl = slurm.slurm_hostlist_create(nl) if not hl: - return None + return [] hl_unranged = slurm.slurm_hostlist_deranged_string_malloc(hl) out = cstr.to_list(hl_unranged) diff --git a/pyslurm/core/common/cstr.pyx b/pyslurm/core/common/cstr.pyx index 7cd09d7f..8301c994 100644 --- a/pyslurm/core/common/cstr.pyx +++ b/pyslurm/core/common/cstr.pyx @@ -62,18 +62,7 @@ cdef fmalloc2(char **p1, char **p2, val): cdef fmalloc(char **old, val): """Try to free first and then create xmalloc'ed char* from str. - Also see: - https://github.com/SchedMD/slurm/blob/master/src/common/xstring.c#L454 - - This function is essentially like xstrdup from Slurm, but also tries to free - the previous allocation if needed. - - Uses Slurm's try_xmalloc for routine for allocating memory. try_xmalloc will - return NULL if the allocation failed. We can check this and raise a - MemoryError. - - Just using the normal xmalloc would call abort() if allocation failed (for - example when OOM). + Note: Uses Slurm's memory allocator. """ # TODO: Consider doing some size checks on the input by having an extra # argument like "max_size" which is configurable. Otherwise infinitely huge diff --git a/pyslurm/core/db/job.pyx b/pyslurm/core/db/job.pyx index ca0fec33..d66f789e 100644 --- a/pyslurm/core/db/job.pyx +++ b/pyslurm/core/db/job.pyx @@ -173,6 +173,8 @@ cdef class JobSearchFilter: already_added = [] for i in self.ids: job_id = u32(i) + if job_id in already_added: + continue selected_step = NULL selected_step = try_xmalloc( @@ -184,9 +186,8 @@ cdef class JobSearchFilter: selected_step.het_job_offset = slurm.NO_VAL selected_step.step_id.step_id = slurm.NO_VAL selected_step.step_id.job_id = job_id - - if not job_id in already_added: - slurm_list_append(ptr.step_list, selected_step) + slurm_list_append(ptr.step_list, selected_step) + already_added.append(job_id) cdef class Jobs(dict): diff --git a/pyslurm/core/db/util.pyx b/pyslurm/core/db/util.pyx index 70978dbe..2560c4b0 100644 --- a/pyslurm/core/db/util.pyx +++ b/pyslurm/core/db/util.pyx @@ -62,6 +62,8 @@ cdef class SlurmListItem: return False def to_str(self): + # Mostly for debugging purposes. Can only be used "safely" if we have + # a char* list cdef char* entry = self.data return cstr.to_unicode(entry) diff --git a/pyslurm/core/job/job.pyx b/pyslurm/core/job/job.pyx index 3521caf6..1e160c80 100644 --- a/pyslurm/core/job/job.pyx +++ b/pyslurm/core/job/job.pyx @@ -1230,7 +1230,7 @@ cdef class Job: """Retrieve the resource layout of this Job on each node. This contains the following information: - * cpus (int) + * cpu_ids (str) * gres (dict) * memory (int) @@ -1329,9 +1329,9 @@ cdef class Job: if nodename: output[nodename] = { - "cpus": cpu_ids, - "gres": cstr.to_gres_dict(gres), - "memory": mem, + "cpu_ids": cpu_ids, + "gres": cstr.to_gres_dict(gres), + "memory": mem, } free(host) diff --git a/pyslurm/core/job/sbatch_opts.pyx b/pyslurm/core/job/sbatch_opts.pyx index 9af607be..91724d29 100644 --- a/pyslurm/core/job/sbatch_opts.pyx +++ b/pyslurm/core/job/sbatch_opts.pyx @@ -72,7 +72,7 @@ SBATCH_OPTIONS = [ _SbatchOpt(None, "export-file", None), _SbatchOpt("B", "extra-node-info", None), _SbatchOpt(None, "get-user-env", "get_user_environment"), - _SbatchOpt(None, "gid", "gid"), + _SbatchOpt(None, "gid", "group_id"), _SbatchOpt(None, "gpu-bind", "gpu_binding"), _SbatchOpt(None, "gpu-freq", None), _SbatchOpt("G", "gpus", "gpus"), @@ -131,7 +131,7 @@ SBATCH_OPTIONS = [ _SbatchOpt("t", "time", "time_limit"), _SbatchOpt(None, "time-min", "time_limit_min"), _SbatchOpt(None, "tmp", "temporary_disk_per_node"), - _SbatchOpt(None, "uid", "uid"), + _SbatchOpt(None, "uid", "user_id"), _SbatchOpt(None, "use-min-nodes", "use_min_nodes", True), _SbatchOpt(None, "wait-all-nodes", "wait_all_nodes", True), _SbatchOpt(None, "wckey", "wckey"), diff --git a/pyslurm/core/job/submission.pxd b/pyslurm/core/job/submission.pxd index bddabb77..ebf0b0c5 100644 --- a/pyslurm/core/job/submission.pxd +++ b/pyslurm/core/job/submission.pxd @@ -48,11 +48,11 @@ cdef class JobSubmitDescription: Name of the Job, same as -J/--job-name from sbatch. account (str): Account of the job, same as -A/--account from sbatch. - uid (Union[str, int]): + user_id (Union[str, int]): Run the job as a different User, same as --uid from sbatch. This requires root privileges. You can both specify the name or numeric uid of the User. - gid (Union[str, int]): + group_id (Union[str, int]): Run the job as a different Group, same as --gid from sbatch. This requires root privileges. You can both specify the name or numeric gid of the User. @@ -537,8 +537,8 @@ cdef class JobSubmitDescription: cdef public: name account - uid - gid + user_id + group_id priority site_factor wckey diff --git a/pyslurm/core/job/submission.pyx b/pyslurm/core/job/submission.pyx index 1f09e898..e1f4039d 100644 --- a/pyslurm/core/job/submission.pyx +++ b/pyslurm/core/job/submission.pyx @@ -163,10 +163,10 @@ cdef class JobSubmitDescription: if self.site_factor: ptr.site_factor = slurm.NICE_OFFSET + int(self.site_factor) - if self.uid is not None: - ptr.user_id = user_to_uid(self.uid) - if self.gid is not None: - ptr.group_id = group_to_gid(self.gid) + if self.user_id is not None: + ptr.user_id = user_to_uid(self.user_id) + if self.group_id is not None: + ptr.group_id = group_to_gid(self.group_id) cstr.fmalloc(&ptr.name, self.name) cstr.fmalloc(&ptr.account, self.account) diff --git a/pyslurm/core/job/util.pyx b/pyslurm/core/job/util.pyx index 404869a4..7b463b2c 100644 --- a/pyslurm/core/job/util.pyx +++ b/pyslurm/core/job/util.pyx @@ -276,7 +276,6 @@ def cpu_freq_str_to_int(freq): raise ValueError(f"Invalid cpu freq value: {freq}.") -# https://github.com/SchedMD/slurm/blob/fec3d2648cfdcfa8b4efb1b59e70ebfaac98d9c3/src/common/cpu_frequency.c#L1359 def cpu_freq_int_to_str(freq): """Convert a numerical cpufreq value to its string representation.""" if freq == slurm.CPU_FREQ_LOW: diff --git a/pyslurm/core/node.pyx b/pyslurm/core/node.pyx index 9419f0c8..17429ce1 100644 --- a/pyslurm/core/node.pyx +++ b/pyslurm/core/node.pyx @@ -503,7 +503,7 @@ cdef class Node: slurm.SELECT_NODEDATA_MEM_ALLOC, slurm.NODE_STATE_ALLOCATED, &alloc_memory) - return u64_parse(alloc_memory) + return alloc_memory @property def real_memory(self): @@ -535,11 +535,11 @@ cdef class Node: @property def total_cpus(self): - return u16_parse(self.info.cpus) + return u16_parse(self.info.cpus, on_noval=0) @property def sockets(self): - return u16_parse(self.info.sockets) + return u16_parse(self.info.sockets, on_noval=0) @property def cores_reserved_for_system(self): @@ -647,20 +647,20 @@ cdef class Node: @property def cap_watts(self): if not self.info.power: - return None - return u32_parse(self.info.power.cap_watts) + return 0 + return u32_parse(self.info.power.cap_watts, on_noval=0) @property def current_watts(self): if not self.info.energy: - return None - return u32_parse(self.info.energy.current_watts) + return 0 + return u32_parse(self.info.energy.current_watts, on_noval=0) @property def avg_watts(self): if not self.info.energy: - return None - return u32_parse(self.info.energy.ave_watts) + return 0 + return u32_parse(self.info.energy.ave_watts, on_noval=0) @property def external_sensors(self): @@ -698,7 +698,7 @@ cdef class Node: @property def cpu_load(self): load = u32_parse(self.info.cpu_load) - return load / 100.0 if load is not None else None + return load / 100.0 if load is not None else 0.0 @property def slurmd_port(self): diff --git a/tests/unit/test_common.py b/tests/unit/test_common.py index 7875ad4d..ca3f1cfd 100644 --- a/tests/unit/test_common.py +++ b/tests/unit/test_common.py @@ -368,7 +368,7 @@ def test_nodelist_from_range_str(self): nodelist = ["node001", "node007", "node008", "node009"] nodelist_str = ",".join(nodelist) assert nodelist == nodelist_from_range_str("node[001,007-009]") - assert nodelist_from_range_str("node[001,007:009]") is None + assert nodelist_from_range_str("node[001,007:009]") == [] def test_nodelist_to_range_str(self): nodelist = ["node001", "node007", "node008", "node009"] From 28fee7f658ef6f847d8fc7f6ff7d59fedf3fa262 Mon Sep 17 00:00:00 2001 From: Toni Harzendorf Date: Mon, 1 May 2023 13:46:40 +0200 Subject: [PATCH 28/28] wip --- pyslurm/slurm/extra.pxi | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/pyslurm/slurm/extra.pxi b/pyslurm/slurm/extra.pxi index 9c144ba4..0ccb0708 100644 --- a/pyslurm/slurm/extra.pxi +++ b/pyslurm/slurm/extra.pxi @@ -1,3 +1,11 @@ +# +# Structs that are not in the Slurm headers, which need to be redefined +# in order to implement certain features. +# +# For example: to communicate with the slurmctld directly in order +# to retrieve the actual batch-script as a string. +# +# https://github.com/SchedMD/slurm/blob/26abe9188ea8712ba1eab4a8eb6322851f06a108/src/common/slurm_persist_conn.h#L51 ctypedef enum persist_conn_type_t: PERSIST_TYPE_NONE = 0 PERSIST_TYPE_DBD @@ -6,7 +14,7 @@ ctypedef enum persist_conn_type_t: PERSIST_TYPE_HA_DBD PERSIST_TYPE_ACCT_UPDATE -# https://github.com/SchedMD/slurm/blob/master/src/common/slurm_persist_conn.h +# https://github.com/SchedMD/slurm/blob/26abe9188ea8712ba1eab4a8eb6322851f06a108/src/common/slurm_persist_conn.h#L59 ctypedef struct persist_msg_t: void *conn void *data @@ -17,6 +25,7 @@ ctypedef int (*_slurm_persist_conn_t_callback_proc) (void *arg, persist_msg_t *m ctypedef void (*_slurm_persist_conn_t_callback_fini)(void *arg) +# https://github.com/SchedMD/slurm/blob/26abe9188ea8712ba1eab4a8eb6322851f06a108/src/common/slurm_persist_conn.h#L66 ctypedef struct slurm_persist_conn_t: void *auth_cred _slurm_persist_conn_t_callback_proc callback_proc @@ -37,7 +46,7 @@ ctypedef struct slurm_persist_conn_t: slurm_trigger_callbacks_t trigger_callbacks; uint16_t version -# https://github.com/SchedMD/slurm/blob/master/src/common/pack.h#L68 +# https://github.com/SchedMD/slurm/blob/20e2b354168aeb0f76d67f80122d80925c2ef32b/src/common/pack.h#L68 ctypedef struct buf_t: uint32_t magic char *head @@ -45,20 +54,24 @@ ctypedef struct buf_t: uint32_t processed bool mmaped -# https://github.com/SchedMD/slurm/blob/master/src/common/slurm_protocol_defs.h +# https://github.com/SchedMD/slurm/blob/20e2b354168aeb0f76d67f80122d80925c2ef32b/src/common/pack.h#L68 ctypedef struct return_code_msg_t: uint32_t return_code +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L650 ctypedef struct job_id_msg_t: uint32_t job_id uint16_t show_flags +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L216 +# Only partially defined - not everything needed at the moment. ctypedef enum slurm_msg_type_t: REQUEST_SHARE_INFO = 2022 REQUEST_BATCH_SCRIPT = 2051 RESPONSE_BATCH_SCRIPT = 2052 RESPONSE_SLURM_RC = 8001 +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L469 ctypedef struct forward_t: uint16_t cnt uint16_t init @@ -66,6 +79,7 @@ ctypedef struct forward_t: uint32_t timeout uint16_t tree_width +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L491 ctypedef struct forward_struct_t: char *buf int buf_len @@ -75,6 +89,7 @@ ctypedef struct forward_struct_t: List ret_list uint32_t timeout +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.h#L514 ctypedef struct slurm_msg_t: slurm_addr_t address void *auth_cred @@ -98,12 +113,15 @@ ctypedef struct slurm_msg_t: slurm_addr_t orig_addr List ret_list -# Slurm Protocol stuff +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.c#L865 cdef extern void slurm_free_return_code_msg(return_code_msg_t *msg) + +# https://github.com/SchedMD/slurm/blob/2d2e83674b59410a7ed8ab6fc8d8acfcfa8beaf9/src/common/slurm_protocol_api.c#L2401 cdef extern int slurm_send_recv_controller_msg(slurm_msg_t *request_msg, slurm_msg_t *response_msg, slurmdb_cluster_rec_t *working_cluster_rec) +# https://github.com/SchedMD/slurm/blob/fe82218def7b57f5ecda9222e80662ebbb6415f8/src/common/slurm_protocol_defs.c#L168 cdef extern void slurm_msg_t_init(slurm_msg_t *msg) # https://github.com/SchedMD/slurm/blob/master/src/common/job_resources.h