Skip to content

Commit 3abca9b

Browse files
demartinofratilne
authored andcommitted
Optimized the retrieval of nodes info from Slurm scheduler
Retrieving all nodes with scontrol show nodes is much faster than having Slurm filter those from inactive partition out Signed-off-by: Francesco De Martino <[email protected]>
1 parent 8f959ec commit 3abca9b

File tree

6 files changed

+252
-156
lines changed

6 files changed

+252
-156
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ This file is used to list changes made in each version of the aws-parallelcluste
88
-----
99

1010
**CHANGES**
11+
- Optimized the retrieval of nodes info from Slurm scheduler.
1112
- Increase default timeout for Slurm commands submitted by clustermgtd and computemgtd from 10 to 30 seconds.
1213

1314
**BUG FIXES**

src/common/schedulers/slurm_commands.py

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,14 @@ class SlurmNode:
7373
SLURM_SCONTROL_POWERING_DOWN_STATE = "POWERING_DOWN"
7474
SLURM_SCONTROL_POWER_STATE = "IDLE+CLOUD+POWER"
7575

76-
def __init__(self, name, nodeaddr, nodehostname, state):
76+
def __init__(self, name, nodeaddr, nodehostname, state, partitions=None):
7777
"""Initialize slurm node with attributes."""
7878
self.name = name
7979
self.is_static = is_static_node(name)
8080
self.nodeaddr = nodeaddr
8181
self.nodehostname = nodehostname
8282
self.state = state
83+
self.partitions = partitions.strip().split(",") if partitions else None
8384

8485
def is_nodeaddr_set(self):
8586
"""Check if nodeaddr(private ip) for the node is set."""
@@ -328,15 +329,18 @@ def set_nodes_down_and_power_save(node_list, reason):
328329
set_nodes_power_down(node_list, reason=reason)
329330

330331

331-
def get_nodes_info(nodes, command_timeout=DEFAULT_GET_INFO_COMMAND_TIMEOUT):
332+
def get_nodes_info(nodes="", command_timeout=DEFAULT_GET_INFO_COMMAND_TIMEOUT):
332333
"""
333334
Retrieve SlurmNode list from slurm nodelist notation.
334335
335336
Sample slurm nodelist notation: queue1-dy-c5_xlarge-[1-3],queue2-st-t2_micro-5.
336337
"""
338+
# awk is used to replace the \n\n record separator with '---\n'
339+
# Note: In case the node does not belong to any partition the Partitions field is missing from Slurm output
337340
show_node_info_command = (
338-
f'{SCONTROL} show nodes {nodes} | grep -oP "^NodeName=\\K(\\S+)| '
339-
'NodeAddr=\\K(\\S+)| NodeHostName=\\K(\\S+)| State=\\K(\\S+)"'
341+
f'{SCONTROL} show nodes {nodes} | awk \'BEGIN{{RS="\\n\\n" ; ORS="---\\n";}} {{print}}\' | '
342+
'grep -oP "^NodeName=\\K(\\S+)| NodeAddr=\\K(\\S+)| NodeHostName=\\K(\\S+)| State=\\K(\\S+)|'
343+
' Partitions=\\K(\\S+)|(---)"'
340344
)
341345
nodeinfo_str = check_command_output(show_node_info_command, timeout=command_timeout, shell=True)
342346

@@ -393,4 +397,36 @@ def _get_partition_nodes(partition_name, command_timeout=DEFAULT_GET_INFO_COMMAN
393397

394398
def _parse_nodes_info(slurm_node_info):
395399
"""Parse slurm node info into SlurmNode objects."""
396-
return [SlurmNode(*node) for node in grouper(slurm_node_info.splitlines(), 4)]
400+
# [ec2-user@ip-10-0-0-58 ~]$ /opt/slurm/bin/scontrol show nodes compute-dy-c5xlarge-[1-3],compute-dy-c5xlarge-50001\
401+
# | awk 'BEGIN{RS="\n\n" ; ORS="---\n";} {print}' | grep -oP "^NodeName=\K(\S+)| NodeAddr=\K(\S+)|\
402+
# NodeHostName=\K(\S+)| State=\K(\S+)| Partitions=\K(\S+)|(---)"
403+
# compute-dy-c5xlarge-1
404+
# 1.2.3.4
405+
# compute-dy-c5xlarge-1
406+
# IDLE+CLOUD+POWER
407+
# compute,compute2
408+
# ---
409+
# compute-dy-c5xlarge-2
410+
# 1.2.3.4
411+
# compute-dy-c5xlarge-2
412+
# IDLE+CLOUD+POWER
413+
# compute,compute2
414+
# ---
415+
# compute-dy-c5xlarge-3
416+
# 1.2.3.4
417+
# compute-dy-c5xlarge-3
418+
# IDLE+CLOUD+POWER
419+
# compute,compute2
420+
# ---
421+
# compute-dy-c5xlarge-50001
422+
# 1.2.3.4
423+
# compute-dy-c5xlarge-50001
424+
# IDLE+CLOUD+POWER
425+
# ---
426+
node_info = slurm_node_info.split("---")
427+
slurm_nodes = []
428+
for node in node_info:
429+
lines = node.strip().splitlines()
430+
if lines:
431+
slurm_nodes.append(SlurmNode(*lines))
432+
return slurm_nodes

src/slurm_plugin/clustermgtd.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def _write_timestamp_to_file(self):
439439

440440
@staticmethod
441441
@retry(stop_max_attempt_number=2, wait_fixed=1000)
442-
def _get_node_info_with_retry(nodes):
442+
def _get_node_info_with_retry(nodes=""):
443443
return get_nodes_info(nodes)
444444

445445
@staticmethod
@@ -457,18 +457,22 @@ def _get_node_info_from_partition():
457457
try:
458458
inactive_nodes = []
459459
active_nodes = []
460-
partitions = ClusterManager._get_partition_info_with_retry()
460+
ignored_nodes = []
461+
partitions = {partition.name: partition for partition in ClusterManager._get_partition_info_with_retry()}
461462
log.debug("Partitions: %s", partitions)
462-
for part in partitions:
463-
# Get node info if partition has relevant node
464-
# See SlurmNode.SLURM_RELEVANT_SINFO_STATES
465-
if part.nodes:
466-
nodes = ClusterManager._get_node_info_with_retry(part.nodes)
467-
if "INACTIVE" in part.state:
468-
inactive_nodes.extend(nodes)
469-
else:
470-
active_nodes.extend(nodes)
463+
nodes = ClusterManager._get_node_info_with_retry()
464+
log.debug("Nodes: %s", nodes)
465+
for node in nodes:
466+
if not node.partitions or any(p not in partitions for p in node.partitions):
467+
# ignore nodes not belonging to any partition
468+
ignored_nodes.append(node)
469+
elif any("INACTIVE" not in partitions[p].state for p in node.partitions):
470+
active_nodes.append(node)
471+
else:
472+
inactive_nodes.append(node)
471473

474+
if ignored_nodes:
475+
log.warning("Ignoring following nodes because they do not belong to any partition: %s", ignored_nodes)
472476
return active_nodes, inactive_nodes
473477
except Exception as e:
474478
log.error("Failed when getting partition/node states from scheduler with exception %s", e)

tests/common/schedulers/test_slurm_commands.py

Lines changed: 71 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -87,29 +87,62 @@ def test_is_static_node(nodename, expected_is_static):
8787
"172.31.10.155\n"
8888
"172-31-10-155\n"
8989
"MIXED+CLOUD\n"
90+
"multiple\n"
91+
"---\n"
9092
"multiple-dy-c5xlarge-2\n"
9193
"172.31.7.218\n"
9294
"172-31-7-218\n"
9395
"IDLE+CLOUD+POWER\n"
96+
"multiple\n"
97+
"---\n"
9498
"multiple-dy-c5xlarge-3\n"
9599
"multiple-dy-c5xlarge-3\n"
96100
"multiple-dy-c5xlarge-3\n"
97-
"IDLE+CLOUD+POWER"
101+
"IDLE+CLOUD+POWER\n"
102+
"multiple\n"
103+
"---\n"
104+
"multiple-dy-c5xlarge-4\n"
105+
"multiple-dy-c5xlarge-4\n"
106+
"multiple-dy-c5xlarge-4\n"
107+
"IDLE+CLOUD+POWER\n"
108+
"multiple,multiple2\n"
109+
"---\n"
110+
"multiple-dy-c5xlarge-5\n"
111+
"multiple-dy-c5xlarge-5\n"
112+
"multiple-dy-c5xlarge-5\n"
113+
"IDLE+CLOUD+POWER\n"
114+
# missing partitions
115+
"---"
98116
),
99117
[
100-
SlurmNode("multiple-dy-c5xlarge-1", "172.31.10.155", "172-31-10-155", "MIXED+CLOUD"),
101-
SlurmNode("multiple-dy-c5xlarge-2", "172.31.7.218", "172-31-7-218", "IDLE+CLOUD+POWER"),
118+
SlurmNode("multiple-dy-c5xlarge-1", "172.31.10.155", "172-31-10-155", "MIXED+CLOUD", "multiple"),
119+
SlurmNode("multiple-dy-c5xlarge-2", "172.31.7.218", "172-31-7-218", "IDLE+CLOUD+POWER", "multiple"),
102120
SlurmNode(
103121
"multiple-dy-c5xlarge-3",
104122
"multiple-dy-c5xlarge-3",
105123
"multiple-dy-c5xlarge-3",
106124
"IDLE+CLOUD+POWER",
125+
"multiple",
126+
),
127+
SlurmNode(
128+
"multiple-dy-c5xlarge-4",
129+
"multiple-dy-c5xlarge-4",
130+
"multiple-dy-c5xlarge-4",
131+
"IDLE+CLOUD+POWER",
132+
"multiple,multiple2",
133+
),
134+
SlurmNode(
135+
"multiple-dy-c5xlarge-5",
136+
"multiple-dy-c5xlarge-5",
137+
"multiple-dy-c5xlarge-5",
138+
"IDLE+CLOUD+POWER",
139+
None,
107140
),
108141
],
109142
)
110143
],
111144
)
112-
def test_parse_nodes_info(node_info, expected_parsed_nodes_output, mocker):
145+
def test_parse_nodes_info(node_info, expected_parsed_nodes_output):
113146
assert_that(_parse_nodes_info(node_info)).is_equal_to(expected_parsed_nodes_output)
114147

115148

@@ -426,10 +459,13 @@ def test_update_nodes(batch_node_info, state, reason, raise_on_error, run_comman
426459
@pytest.mark.parametrize(
427460
"node, expected_output",
428461
[
429-
(SlurmNode("queue-name-st-t2micro-1", "nodeip", "nodehostname", "somestate"), True),
430-
(SlurmNode("queue-name-st-dy-t2micro-1", "nodeip", "nodehostname", "somestate"), False),
431-
(SlurmNode("queuename-dy-t2micro-1", "nodeip", "nodehostname", "somestate"), False),
432-
(SlurmNode("queuename-dy-dy-dy-st-t2micro-1", "nodeip", "nodehostname", "somestate"), True),
462+
(SlurmNode("queue-name-st-t2micro-1", "nodeip", "nodehostname", "somestate", "queue-name"), True),
463+
(SlurmNode("queue-name-st-dy-t2micro-1", "nodeip", "nodehostname", "somestate", "queue-name-st"), False),
464+
(SlurmNode("queuename-dy-t2micro-1", "nodeip", "nodehostname", "somestate", "queuename"), False),
465+
(
466+
SlurmNode("queuename-dy-dy-dy-st-t2micro-1", "nodeip", "nodehostname", "somestate", "queuename-dy-dy-dy"),
467+
True,
468+
),
433469
],
434470
)
435471
def test_slurm_node_is_static(node, expected_output):
@@ -439,8 +475,11 @@ def test_slurm_node_is_static(node, expected_output):
439475
@pytest.mark.parametrize(
440476
"node, expected_output",
441477
[
442-
(SlurmNode("queue-name-st-t2micro-1", "nodeip", "nodehostname", "somestate"), True),
443-
(SlurmNode("queuename-dy-t2micro-1", "queuename-dy-t2micro-1", "nodehostname", "somestate"), False),
478+
(SlurmNode("queue-name-st-t2micro-1", "nodeip", "nodehostname", "somestate", "queue-name"), True),
479+
(
480+
SlurmNode("queuename-dy-t2micro-1", "queuename-dy-t2micro-1", "nodehostname", "somestate", "queuename"),
481+
False,
482+
),
444483
],
445484
)
446485
def test_slurm_node_is_nodeaddr_set(node, expected_output):
@@ -450,12 +489,12 @@ def test_slurm_node_is_nodeaddr_set(node, expected_output):
450489
@pytest.mark.parametrize(
451490
"node, expected_output",
452491
[
453-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "somestate"), False),
454-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "MIXED#+CLOUD+DRAIN"), True),
455-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "ALLOCATED*+CLOUD+DRAIN"), True),
456-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE+CLOUD"), False),
457-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "DOWN+CLOUD"), False),
458-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "COMPLETING+DRAIN"), True),
492+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "somestate", "queue1"), False),
493+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "MIXED#+CLOUD+DRAIN", "queue1"), True),
494+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "ALLOCATED*+CLOUD+DRAIN", "queue1"), True),
495+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE+CLOUD", "queue1"), False),
496+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "DOWN+CLOUD", "queue1"), False),
497+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "COMPLETING+DRAIN", "queue1"), True),
459498
],
460499
)
461500
def test_slurm_node_has_job(node, expected_output):
@@ -465,11 +504,11 @@ def test_slurm_node_has_job(node, expected_output):
465504
@pytest.mark.parametrize(
466505
"node, expected_output",
467506
[
468-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "somestate"), False),
469-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "MIXED#+CLOUD+DRAIN"), False),
470-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "ALLOCATED*+CLOUD+DRAIN"), False),
471-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE*+CLOUD+DRAIN"), True),
472-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "DOWN+CLOUD+DRAIN"), True),
507+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "somestate", "queue1"), False),
508+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "MIXED#+CLOUD+DRAIN", "queue1"), False),
509+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "ALLOCATED*+CLOUD+DRAIN", "queue1"), False),
510+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE*+CLOUD+DRAIN", "queue1"), True),
511+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "DOWN+CLOUD+DRAIN", "queue1"), True),
473512
],
474513
)
475514
def test_slurm_node_is_drained(node, expected_output):
@@ -479,12 +518,12 @@ def test_slurm_node_is_drained(node, expected_output):
479518
@pytest.mark.parametrize(
480519
"node, expected_output",
481520
[
482-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "somestate"), False),
483-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "MIXED#+CLOUD+DOWN"), True),
484-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "ALLOCATED*+CLOUD+DRAIN"), False),
485-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "DOWN*+CLOUD"), True),
486-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "DOWN+CLOUD+POWER"), True),
487-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE~+CLOUD+POWERING_DOWN"), False),
521+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "somestate", "queue1"), False),
522+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "MIXED#+CLOUD+DOWN", "queue1"), True),
523+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "ALLOCATED*+CLOUD+DRAIN", "queue1"), False),
524+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "DOWN*+CLOUD", "queue1"), True),
525+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "DOWN+CLOUD+POWER", "queue1"), True),
526+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE~+CLOUD+POWERING_DOWN", "queue1"), False),
488527
],
489528
)
490529
def test_slurm_node_is_down(node, expected_output):
@@ -494,11 +533,11 @@ def test_slurm_node_is_down(node, expected_output):
494533
@pytest.mark.parametrize(
495534
"node, expected_output",
496535
[
497-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE+CLOUD+POWER"), True),
498-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "MIXED#+CLOUD+DRAIN"), False),
499-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "ALLOCATED*+CLOUD+DOWN"), False),
500-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE+CLOUD+POWERING_DOWN"), False),
501-
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE#+CLOUD"), True),
536+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE+CLOUD+POWER", "queue1"), True),
537+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "MIXED#+CLOUD+DRAIN", "queue1"), False),
538+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "ALLOCATED*+CLOUD+DOWN", "queue1"), False),
539+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE+CLOUD+POWERING_DOWN", "queue1"), False),
540+
(SlurmNode("queue1-st-c5xlarge-1", "nodeip", "nodehostname", "IDLE#+CLOUD", "queue1"), True),
502541
],
503542
)
504543
def test_slurm_node_is_up(node, expected_output):

0 commit comments

Comments
 (0)