@@ -33,13 +33,14 @@ from pyslurm import xcollections
33
33
from pyslurm.utils.helpers import (
34
34
uid_to_name,
35
35
gid_to_name,
36
- humanize,
36
+ humanize,
37
37
_getgrall_to_dict,
38
38
_getpwall_to_dict,
39
39
cpubind_to_num,
40
40
instance_to_dict,
41
41
nodelist_from_range_str,
42
42
nodelist_to_range_str,
43
+ gres_from_tres_dict,
43
44
)
44
45
45
46
@@ -65,7 +66,7 @@ cdef class Nodes(MultiClusterMap):
65
66
""" Load all nodes in the system.
66
67
67
68
Args:
68
- preload_passwd_info (bool):
69
+ preload_passwd_info (bool):
69
70
Decides whether to query passwd and groups information from
70
71
the system.
71
72
Could potentially speed up access to attributes of the Node
@@ -83,7 +84,7 @@ cdef class Nodes(MultiClusterMap):
83
84
dict passwd = {}
84
85
dict groups = {}
85
86
Nodes nodes = Nodes()
86
- int flags = slurm.SHOW_ALL
87
+ int flags = slurm.SHOW_ALL | slurm.SHOW_DETAIL
87
88
Node node
88
89
89
90
verify_rpc(slurm_load_node(0 , & nodes.info, flags))
@@ -107,14 +108,20 @@ cdef class Nodes(MultiClusterMap):
107
108
# is raised by replacing it with a zeroed-out node_info_t.
108
109
nodes.info.node_array[cnt] = nodes.tmp_info
109
110
111
+ name = node.name
112
+ if not name:
113
+ # Could be possible if there are nodes configured in
114
+ # slurm.conf that cannot be reached anymore.
115
+ continue
116
+
110
117
if preload_passwd_info:
111
118
node.passwd = passwd
112
119
node.groups = groups
113
120
114
121
cluster = node.cluster
115
122
if cluster not in nodes.data:
116
123
nodes.data[cluster] = {}
117
- nodes.data[cluster][node. name] = node
124
+ nodes.data[cluster][name] = node
118
125
119
126
# We have extracted all pointers
120
127
nodes.info.record_count = 0
@@ -162,7 +169,7 @@ cdef class Nodes(MultiClusterMap):
162
169
n._alloc_umsg()
163
170
cstr.fmalloc(& n.umsg.node_names, node_str)
164
171
verify_rpc(slurm_update_node(n.umsg))
165
-
172
+
166
173
@property
167
174
def free_memory (self ):
168
175
return xcollections.sum_property(self , Node.free_memory)
@@ -171,6 +178,10 @@ cdef class Nodes(MultiClusterMap):
171
178
def real_memory (self ):
172
179
return xcollections.sum_property(self , Node.real_memory)
173
180
181
+ @property
182
+ def idle_memory (self ):
183
+ return xcollections.sum_property(self , Node.idle_memory)
184
+
174
185
@property
175
186
def allocated_memory (self ):
176
187
return xcollections.sum_property(self , Node.allocated_memory)
@@ -186,7 +197,7 @@ cdef class Nodes(MultiClusterMap):
186
197
@property
187
198
def allocated_cpus (self ):
188
199
return xcollections.sum_property(self , Node.allocated_cpus)
189
-
200
+
190
201
@property
191
202
def effective_cpus (self ):
192
203
return xcollections.sum_property(self , Node.effective_cpus)
@@ -237,7 +248,7 @@ cdef class Node:
237
248
xfree(self .info)
238
249
239
250
def __dealloc__ (self ):
240
- self ._dealloc_impl()
251
+ self ._dealloc_impl()
241
252
242
253
def __setattr__ (self , name , val ):
243
254
# When a user wants to set attributes on a Node instance that was
@@ -264,7 +275,7 @@ cdef class Node:
264
275
cdef _swap_data(Node dst, Node src):
265
276
cdef node_info_t * tmp = NULL
266
277
if dst.info and src.info:
267
- tmp = dst.info
278
+ tmp = dst.info
268
279
dst.info = src.info
269
280
src.info = tmp
270
281
@@ -319,7 +330,7 @@ cdef class Node:
319
330
Implements the slurm_create_node RPC.
320
331
321
332
Args:
322
- state (str, optional):
333
+ state (str, optional):
323
334
An optional state the created Node should have. Allowed values
324
335
are `future` and `cloud`. `future` is the default.
325
336
@@ -421,7 +432,7 @@ cdef class Node:
421
432
422
433
@configured_gres.setter
423
434
def configured_gres (self , val ):
424
- cstr.fmalloc2(& self .info.gres, & self .umsg.gres,
435
+ cstr.fmalloc2(& self .info.gres, & self .umsg.gres,
425
436
cstr.from_gres_dict(val))
426
437
427
438
@property
@@ -451,7 +462,7 @@ cdef class Node:
451
462
@extra.setter
452
463
def extra (self , val ):
453
464
cstr.fmalloc2(& self .info.extra, & self .umsg.extra, val)
454
-
465
+
455
466
@property
456
467
def reason (self ):
457
468
return cstr.to_unicode(self .info.reason)
@@ -486,7 +497,7 @@ cdef class Node:
486
497
487
498
@property
488
499
def allocated_gres (self ):
489
- return cstr.to_gres_dict (self .info.gres_used )
500
+ return gres_from_tres_dict (self .allocated_tres )
490
501
491
502
@property
492
503
def mcs_label (self ):
@@ -511,6 +522,11 @@ cdef class Node:
511
522
def free_memory (self ):
512
523
return u64_parse(self .info.free_mem)
513
524
525
+ @property
526
+ def idle_memory (self ):
527
+ real = self .real_memory
528
+ return 0 if not real else real - self .allocated_memory
529
+
514
530
@property
515
531
def memory_reserved_for_system (self ):
516
532
return u64_parse(self .info.mem_spec_limit)
@@ -596,17 +612,17 @@ cdef class Node:
596
612
# """dict: TRES that are configured on the node."""
597
613
# return cstr.to_dict(self.info.tres_fmt_str)
598
614
599
- # @property
600
- # def tres_alloc (self):
601
- # cdef char *alloc_tres = NULL
602
- # if self.info.select_nodeinfo:
603
- # slurm_get_select_nodeinfo(
604
- # self.info.select_nodeinfo,
605
- # slurm.SELECT_NODEDATA_TRES_ALLOC_FMT_STR,
606
- # slurm.NODE_STATE_ALLOCATED,
607
- # &alloc_tres
608
- # )
609
- # return cstr.to_gres_dict (alloc_tres)
615
+ @property
616
+ def allocated_tres (self ):
617
+ cdef char * alloc_tres = NULL
618
+ if self .info.select_nodeinfo:
619
+ slurm_get_select_nodeinfo(
620
+ self .info.select_nodeinfo,
621
+ slurm.SELECT_NODEDATA_TRES_ALLOC_FMT_STR,
622
+ slurm.NODE_STATE_ALLOCATED,
623
+ & alloc_tres
624
+ )
625
+ return cstr.to_dict (alloc_tres)
610
626
611
627
@property
612
628
def allocated_cpus (self ):
@@ -671,10 +687,22 @@ cdef class Node:
671
687
" temperature" : u32_parse(self .info.ext_sensors.temperature)
672
688
}
673
689
690
+ @property
691
+ def _node_state (self ):
692
+ idle_cpus = self .idle_cpus
693
+ state = self .info.node_state
694
+
695
+ if idle_cpus and idle_cpus != self .effective_cpus:
696
+ # If we aren't idle but also not allocated, then set state to
697
+ # MIXED.
698
+ state &= slurm.NODE_STATE_FLAGS
699
+ state |= slurm.NODE_STATE_MIXED
700
+
701
+ return state
702
+
674
703
@property
675
704
def state (self ):
676
- cdef char * state = slurm_node_state_string_complete(
677
- self .info.node_state)
705
+ cdef char * state = slurm_node_state_string_complete(self ._node_state)
678
706
state_str = cstr.to_unicode(state)
679
707
xfree(state)
680
708
return state_str
@@ -685,9 +713,10 @@ cdef class Node:
685
713
686
714
@property
687
715
def next_state (self ):
716
+ state = self ._node_state
688
717
if ((self .info.next_state != slurm.NO_VAL)
689
- and (self .info.node_state & slurm.NODE_STATE_REBOOT_REQUESTED
690
- or self .info.node_state & slurm.NODE_STATE_REBOOT_ISSUED)):
718
+ and (state & slurm.NODE_STATE_REBOOT_REQUESTED
719
+ or state & slurm.NODE_STATE_REBOOT_ISSUED)):
691
720
return cstr.to_unicode(
692
721
slurm_node_state_string(self .info.next_state))
693
722
else :
0 commit comments