Skip to content

Commit 4ecdfa0

Browse files
authored
Nodes: Some Bugfixes and new attributes (#318)
* Nodes: Some Bugfixes and new attributes - add idle_memory and allocated_tres properties - correctly return GRES in allocated_gres property - trailing whitespace fixes - add some new tests for GRES parsing
1 parent b435d68 commit 4ecdfa0

File tree

6 files changed

+131
-67
lines changed

6 files changed

+131
-67
lines changed

pyslurm/core/node.pxd

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,12 @@ cdef class Nodes(MultiClusterMap):
6868
Attributes:
6969
free_memory (int):
7070
Amount of free memory in this node collection. (in Mebibytes)
71+
Note that this means actual free memory as returned by the `free`
72+
command
7173
real_memory (int):
7274
Amount of real memory in this node collection. (in Mebibytes)
75+
idle_memory (int):
76+
Amount of idle memory in this node collection. (in Mebibytes)
7377
allocated_memory (int):
7478
Amount of alloc Memory in this node collection. (in Mebibytes)
7579
total_cpus (int):
@@ -100,7 +104,7 @@ cdef class Node:
100104
101105
Other Parameters:
102106
configured_gres (dict):
103-
Configured GRES for the node
107+
Configured GRES for the node
104108
address (str):
105109
Address of the node
106110
hostname (str):
@@ -160,6 +164,10 @@ cdef class Node:
160164
Real Memory in Mebibytes configured for this node.
161165
free_memory (int):
162166
Free Memory in Mebibytes on the node.
167+
Note that this means actual free memory as returned by the `free`
168+
command
169+
idle_memory (int):
170+
Idle Memory in Mebibytes on the node.
163171
memory_reserved_for_system (int):
164172
Memory in Mebibytes reserved for the System not usable by Jobs.
165173
temporary_disk (int):
@@ -194,6 +202,8 @@ cdef class Node:
194202
Time this node was last busy, as unix timestamp.
195203
reason_time (int):
196204
Time the reason was set for the node, as unix timestamp.
205+
allocated_tres (dict):
206+
Currently allocated Trackable Resources
197207
allocated_cpus (int):
198208
Number of allocated CPUs on the node.
199209
idle_cpus (int):
@@ -235,4 +245,4 @@ cdef class Node:
235245

236246
@staticmethod
237247
cdef Node from_ptr(node_info_t *in_ptr)
238-
248+

pyslurm/core/node.pyx

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@ from pyslurm import xcollections
3333
from pyslurm.utils.helpers import (
3434
uid_to_name,
3535
gid_to_name,
36-
humanize,
36+
humanize,
3737
_getgrall_to_dict,
3838
_getpwall_to_dict,
3939
cpubind_to_num,
4040
instance_to_dict,
4141
nodelist_from_range_str,
4242
nodelist_to_range_str,
43+
gres_from_tres_dict,
4344
)
4445

4546

@@ -65,7 +66,7 @@ cdef class Nodes(MultiClusterMap):
6566
"""Load all nodes in the system.
6667
6768
Args:
68-
preload_passwd_info (bool):
69+
preload_passwd_info (bool):
6970
Decides whether to query passwd and groups information from
7071
the system.
7172
Could potentially speed up access to attributes of the Node
@@ -83,7 +84,7 @@ cdef class Nodes(MultiClusterMap):
8384
dict passwd = {}
8485
dict groups = {}
8586
Nodes nodes = Nodes()
86-
int flags = slurm.SHOW_ALL
87+
int flags = slurm.SHOW_ALL | slurm.SHOW_DETAIL
8788
Node node
8889

8990
verify_rpc(slurm_load_node(0, &nodes.info, flags))
@@ -107,14 +108,20 @@ cdef class Nodes(MultiClusterMap):
107108
# is raised by replacing it with a zeroed-out node_info_t.
108109
nodes.info.node_array[cnt] = nodes.tmp_info
109110

111+
name = node.name
112+
if not name:
113+
# Could be possible if there are nodes configured in
114+
# slurm.conf that cannot be reached anymore.
115+
continue
116+
110117
if preload_passwd_info:
111118
node.passwd = passwd
112119
node.groups = groups
113120

114121
cluster = node.cluster
115122
if cluster not in nodes.data:
116123
nodes.data[cluster] = {}
117-
nodes.data[cluster][node.name] = node
124+
nodes.data[cluster][name] = node
118125

119126
# We have extracted all pointers
120127
nodes.info.record_count = 0
@@ -162,7 +169,7 @@ cdef class Nodes(MultiClusterMap):
162169
n._alloc_umsg()
163170
cstr.fmalloc(&n.umsg.node_names, node_str)
164171
verify_rpc(slurm_update_node(n.umsg))
165-
172+
166173
@property
167174
def free_memory(self):
168175
return xcollections.sum_property(self, Node.free_memory)
@@ -171,6 +178,10 @@ cdef class Nodes(MultiClusterMap):
171178
def real_memory(self):
172179
return xcollections.sum_property(self, Node.real_memory)
173180

181+
@property
182+
def idle_memory(self):
183+
return xcollections.sum_property(self, Node.idle_memory)
184+
174185
@property
175186
def allocated_memory(self):
176187
return xcollections.sum_property(self, Node.allocated_memory)
@@ -186,7 +197,7 @@ cdef class Nodes(MultiClusterMap):
186197
@property
187198
def allocated_cpus(self):
188199
return xcollections.sum_property(self, Node.allocated_cpus)
189-
200+
190201
@property
191202
def effective_cpus(self):
192203
return xcollections.sum_property(self, Node.effective_cpus)
@@ -237,7 +248,7 @@ cdef class Node:
237248
xfree(self.info)
238249

239250
def __dealloc__(self):
240-
self._dealloc_impl()
251+
self._dealloc_impl()
241252

242253
def __setattr__(self, name, val):
243254
# When a user wants to set attributes on a Node instance that was
@@ -264,7 +275,7 @@ cdef class Node:
264275
cdef _swap_data(Node dst, Node src):
265276
cdef node_info_t *tmp = NULL
266277
if dst.info and src.info:
267-
tmp = dst.info
278+
tmp = dst.info
268279
dst.info = src.info
269280
src.info = tmp
270281

@@ -319,7 +330,7 @@ cdef class Node:
319330
Implements the slurm_create_node RPC.
320331
321332
Args:
322-
state (str, optional):
333+
state (str, optional):
323334
An optional state the created Node should have. Allowed values
324335
are `future` and `cloud`. `future` is the default.
325336
@@ -421,7 +432,7 @@ cdef class Node:
421432

422433
@configured_gres.setter
423434
def configured_gres(self, val):
424-
cstr.fmalloc2(&self.info.gres, &self.umsg.gres,
435+
cstr.fmalloc2(&self.info.gres, &self.umsg.gres,
425436
cstr.from_gres_dict(val))
426437

427438
@property
@@ -451,7 +462,7 @@ cdef class Node:
451462
@extra.setter
452463
def extra(self, val):
453464
cstr.fmalloc2(&self.info.extra, &self.umsg.extra, val)
454-
465+
455466
@property
456467
def reason(self):
457468
return cstr.to_unicode(self.info.reason)
@@ -486,7 +497,7 @@ cdef class Node:
486497

487498
@property
488499
def allocated_gres(self):
489-
return cstr.to_gres_dict(self.info.gres_used)
500+
return gres_from_tres_dict(self.allocated_tres)
490501

491502
@property
492503
def mcs_label(self):
@@ -511,6 +522,11 @@ cdef class Node:
511522
def free_memory(self):
512523
return u64_parse(self.info.free_mem)
513524

525+
@property
526+
def idle_memory(self):
527+
real = self.real_memory
528+
return 0 if not real else real - self.allocated_memory
529+
514530
@property
515531
def memory_reserved_for_system(self):
516532
return u64_parse(self.info.mem_spec_limit)
@@ -596,17 +612,17 @@ cdef class Node:
596612
# """dict: TRES that are configured on the node."""
597613
# return cstr.to_dict(self.info.tres_fmt_str)
598614

599-
# @property
600-
# def tres_alloc(self):
601-
# cdef char *alloc_tres = NULL
602-
# if self.info.select_nodeinfo:
603-
# slurm_get_select_nodeinfo(
604-
# self.info.select_nodeinfo,
605-
# slurm.SELECT_NODEDATA_TRES_ALLOC_FMT_STR,
606-
# slurm.NODE_STATE_ALLOCATED,
607-
# &alloc_tres
608-
# )
609-
# return cstr.to_gres_dict(alloc_tres)
615+
@property
616+
def allocated_tres(self):
617+
cdef char *alloc_tres = NULL
618+
if self.info.select_nodeinfo:
619+
slurm_get_select_nodeinfo(
620+
self.info.select_nodeinfo,
621+
slurm.SELECT_NODEDATA_TRES_ALLOC_FMT_STR,
622+
slurm.NODE_STATE_ALLOCATED,
623+
&alloc_tres
624+
)
625+
return cstr.to_dict(alloc_tres)
610626

611627
@property
612628
def allocated_cpus(self):
@@ -671,10 +687,22 @@ cdef class Node:
671687
"temperature": u32_parse(self.info.ext_sensors.temperature)
672688
}
673689

690+
@property
691+
def _node_state(self):
692+
idle_cpus = self.idle_cpus
693+
state = self.info.node_state
694+
695+
if idle_cpus and idle_cpus != self.effective_cpus:
696+
# If we aren't idle but also not allocated, then set state to
697+
# MIXED.
698+
state &= slurm.NODE_STATE_FLAGS
699+
state |= slurm.NODE_STATE_MIXED
700+
701+
return state
702+
674703
@property
675704
def state(self):
676-
cdef char* state = slurm_node_state_string_complete(
677-
self.info.node_state)
705+
cdef char* state = slurm_node_state_string_complete(self._node_state)
678706
state_str = cstr.to_unicode(state)
679707
xfree(state)
680708
return state_str
@@ -685,9 +713,10 @@ cdef class Node:
685713

686714
@property
687715
def next_state(self):
716+
state = self._node_state
688717
if ((self.info.next_state != slurm.NO_VAL)
689-
and (self.info.node_state & slurm.NODE_STATE_REBOOT_REQUESTED
690-
or self.info.node_state & slurm.NODE_STATE_REBOOT_ISSUED)):
718+
and (state & slurm.NODE_STATE_REBOOT_REQUESTED
719+
or state & slurm.NODE_STATE_REBOOT_ISSUED)):
691720
return cstr.to_unicode(
692721
slurm_node_state_string(self.info.next_state))
693722
else:

pyslurm/utils/cstr.pyx

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ cpdef dict to_dict(char *str_dict, str delim1=",", str delim2="="):
133133
which can easily be converted to a dict.
134134
"""
135135
cdef:
136-
str _str_dict = to_unicode(str_dict)
136+
str _str_dict = to_unicode(str_dict)
137137
str key, val
138138
dict out = {}
139139

@@ -143,7 +143,7 @@ cpdef dict to_dict(char *str_dict, str delim1=",", str delim2="="):
143143
for kv in _str_dict.split(delim1):
144144
if delim2 in kv:
145145
key, val = kv.split(delim2, 1)
146-
out[key] = val
146+
out[key] = int(val) if val.isdigit() else val
147147

148148
return out
149149

@@ -184,10 +184,10 @@ def dict_to_str(vals, prepend=None, delim1=",", delim2="="):
184184

185185
if isinstance(vals, str):
186186
tmp_dict = validate_str_key_value_format(vals, delim1, delim2)
187-
187+
188188
for k, v in tmp_dict.items():
189189
if ((delim1 in str(k) or delim2 in str(k)) or
190-
delim1 in str(v) or delim2 in str(v)):
190+
delim1 in str(v) or delim2 in str(v)):
191191
raise ValueError(
192192
f"Key or Value cannot contain either {delim1} or {delim2}. "
193193
f"Got Key: {k} and Value: {v}."
@@ -208,22 +208,23 @@ cpdef dict to_gres_dict(char *gres):
208208
cdef:
209209
dict output = {}
210210
str gres_str = to_unicode(gres)
211+
str gres_delim = "gres:"
211212

212213
if not gres_str or gres_str == "(null)":
213214
return {}
214215

215216
for item in re.split(",(?=[^,]+?:)", gres_str):
216217

217218
# Remove the additional "gres" specifier if it exists
218-
if "gres:" in item:
219-
item = item.replace("gres:", "")
219+
if gres_delim in item:
220+
item = item.replace(gres_delim, "")
220221

221222
gres_splitted = re.split(
222-
":(?=[^:]+?)",
223+
":(?=[^:]+?)",
223224
item.replace("(", ":", 1).replace(")", "")
224225
)
225226

226-
name, typ, cnt = gres_splitted[0], gres_splitted[1], 0
227+
name, typ, cnt = gres_splitted[0], gres_splitted[1], 0
227228

228229
# Check if we have a gres type.
229230
if typ.isdigit():
@@ -243,10 +244,10 @@ cpdef dict to_gres_dict(char *gres):
243244
# Cover cases with IDX
244245
idx = gres_splitted[3] if not typ else gres_splitted[4]
245246
output[name_and_typ] = {
246-
"count": cnt,
247+
"count": int(cnt),
247248
"indexes": idx,
248249
}
249-
250+
250251
return output
251252

252253

pyslurm/utils/helpers.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,4 @@ from libc.stdlib cimport free
3030

3131
cpdef uid_to_name(uint32_t uid, err_on_invalid=*, dict lookup=*)
3232
cpdef gid_to_name(uint32_t gid, err_on_invalid=*, dict lookup=*)
33+
cpdef gres_from_tres_dict(dict tres_dict)

pyslurm/utils/helpers.pyx

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def user_to_uid(user, err_on_invalid=True):
9797
try:
9898
if isinstance(user, str) and not user.isdigit():
9999
return getpwnam(user).pw_uid
100-
100+
101101
return getpwuid(int(user)).pw_uid
102102
except KeyError as e:
103103
if err_on_invalid:
@@ -208,7 +208,7 @@ def nodelist_to_range_str(nodelist):
208208
char *nl = nodelist
209209
slurm.hostlist_t hl
210210
char *hl_ranged = NULL
211-
211+
212212
hl = slurm.slurm_hostlist_create(nl)
213213
if not hl:
214214
return None
@@ -219,7 +219,7 @@ def nodelist_to_range_str(nodelist):
219219
free(hl_ranged)
220220
slurm.slurm_hostlist_destroy(hl)
221221

222-
return out
222+
return out
223223

224224

225225
def humanize(num, decimals=1):
@@ -378,3 +378,12 @@ def dehumanize_step_id(sid):
378378
return slurm.SLURM_PENDING_STEP
379379
else:
380380
return int(sid)
381+
382+
383+
cpdef gres_from_tres_dict(dict tres_dict):
384+
gres_prefix = "gres/"
385+
return {
386+
k.replace(gres_prefix, ""):v
387+
for k, v in tres_dict.items()
388+
if gres_prefix in k
389+
}

0 commit comments

Comments
 (0)