Skip to content

Commit 6adb559

Browse files
committed
Fix DNS name retrieval on Multi NICs instance
Fix the DNS name retrieval on Multi NICs instance, where the PrivateDnsName is the DNS name of the network interface with NetworkCardIndex 0 and DeviceIndex 0. The problem was showing up when using `SlurmSettings/Dns/UseEc2Hostnames` equals to `True`. Signed-off-by: Luca Carrogu <[email protected]>
1 parent 0ad03a1 commit 6adb559

File tree

5 files changed

+153
-13
lines changed

5 files changed

+153
-13
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ This file is used to list changes made in each version of the aws-parallelcluste
1313
- Make `aws-parallelcluster-node` daemons handle only ParallelCluster-managed Slurm partitions.
1414

1515
**BUG FIXES**
16+
- Fix an issue that was causing misalignment of compute nodes DNS name on instances with multiple network interfaces,
17+
when using `SlurmSettings/Dns/UseEc2Hostnames` equals to `True`.
1618

1719
3.6.1
1820
------

src/common/ec2_utils.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,21 @@
1111
# See the License for the specific language governing permissions and limitations under the License.
1212

1313

14-
def get_private_ip_address(instance_info):
14+
def get_private_ip_address_and_dns_name(instance_info):
1515
"""
16-
Return the PrivateIpAddress of the EC2 instance.
16+
Return the PrivateIpAddress and PrivateDnsName of the EC2 instance.
1717
18-
The PrivateIpAddress is considered to be the one for the
18+
The PrivateIpAddress and PrivateDnsName are considered to be the ones for the
1919
network interface with DeviceIndex = NetworkCardIndex = 0.
2020
:param instance_info: the dictionary returned by a EC2:DescribeInstances call.
21-
:return: the PrivateIpAddress of the instance.
21+
:return: the PrivateIpAddress and PrivateDnsName of the instance.
2222
"""
2323
private_ip = instance_info["PrivateIpAddress"]
24+
private_dns_name = instance_info["PrivateDnsName"]
2425
for network_interface in instance_info["NetworkInterfaces"]:
2526
attachment = network_interface["Attachment"]
26-
if attachment["DeviceIndex"] == 0 and attachment["NetworkCardIndex"] == 0:
27-
private_ip = network_interface["PrivateIpAddress"]
27+
if attachment.get("DeviceIndex", -1) == 0 and attachment.get("NetworkCardIndex", -1) == 0:
28+
private_ip = network_interface.get("PrivateIpAddress", private_ip)
29+
private_dns_name = network_interface.get("PrivateDnsName", private_dns_name)
2830
break
29-
return private_ip
31+
return private_ip, private_dns_name

src/slurm_plugin/fleet_manager.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import boto3
1818
from botocore.exceptions import ClientError
19-
from common.ec2_utils import get_private_ip_address
19+
from common.ec2_utils import get_private_ip_address_and_dns_name
2020
from common.utils import setup_logging_filter
2121

2222
logger = logging.getLogger(__name__)
@@ -50,10 +50,11 @@ def __hash__(self):
5050
@staticmethod
5151
def from_describe_instance_data(instance_info):
5252
try:
53+
private_ip, private_dns_name = get_private_ip_address_and_dns_name(instance_info)
5354
return EC2Instance(
5455
instance_info["InstanceId"],
55-
get_private_ip_address(instance_info),
56-
instance_info["PrivateDnsName"].split(".")[0],
56+
private_ip,
57+
private_dns_name.split(".")[0],
5758
instance_info["LaunchTime"],
5859
)
5960
except KeyError as e:

src/slurm_plugin/instance_manager.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import boto3
2424
from botocore.config import Config
2525
from botocore.exceptions import ClientError
26-
from common.ec2_utils import get_private_ip_address
26+
from common.ec2_utils import get_private_ip_address_and_dns_name
2727
from common.schedulers.slurm_commands import get_nodes_info, update_nodes
2828
from common.utils import grouper, setup_logging_filter
2929
from slurm_plugin.common import ComputeInstanceDescriptor, log_exception, print_with_count
@@ -436,11 +436,12 @@ def get_cluster_instances(self, include_head_node=False, alive_states_only=True)
436436
instances = []
437437
for instance_info in filtered_iterator:
438438
try:
439+
private_ip, private_dns_name = get_private_ip_address_and_dns_name(instance_info)
439440
instances.append(
440441
EC2Instance(
441442
instance_info["InstanceId"],
442-
get_private_ip_address(instance_info),
443-
instance_info["PrivateDnsName"].split(".")[0],
443+
private_ip,
444+
private_dns_name.split(".")[0],
444445
instance_info["LaunchTime"],
445446
)
446447
)

tests/common/test_ec2_utils.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License").
4+
# You may not use this file except in compliance with the License.
5+
# A copy of the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "LICENSE.txt" file accompanying this file.
10+
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
11+
# See the License for the specific language governing permissions and limitations under the License.
12+
import pytest
13+
from assertpy import assert_that
14+
from common.ec2_utils import get_private_ip_address_and_dns_name
15+
16+
17+
@pytest.mark.parametrize(
18+
"instance_info, expected_private_ip, expected_private_dns_name",
19+
[
20+
(
21+
{
22+
"InstanceId": "i-12345",
23+
"InstanceType": "c5.xlarge",
24+
"PrivateIpAddress": "ip.1.0.0.1",
25+
"PrivateDnsName": "ip-1-0-0-1",
26+
"NetworkInterfaces": [
27+
{
28+
"Attachment": {
29+
"DeviceIndex": 0,
30+
"NetworkCardIndex": 0,
31+
},
32+
"PrivateIpAddress": "ip.1.0.0.1",
33+
"PrivateDnsName": "ip-1-0-0-1",
34+
},
35+
],
36+
},
37+
"ip.1.0.0.1",
38+
"ip-1-0-0-1",
39+
),
40+
(
41+
{
42+
"InstanceId": "i-12345",
43+
"InstanceType": "c5.xlarge",
44+
"PrivateIpAddress": "ip.1.0.0.1",
45+
"PrivateDnsName": "ip-1-0-0-1",
46+
"NetworkInterfaces": [
47+
{
48+
"Attachment": {
49+
"DeviceIndex": 0,
50+
"NetworkCardIndex": 0,
51+
},
52+
},
53+
],
54+
},
55+
"ip.1.0.0.1",
56+
"ip-1-0-0-1",
57+
),
58+
(
59+
{
60+
"InstanceId": "i-12345",
61+
"InstanceType": "c5.xlarge",
62+
"PrivateIpAddress": "ip.1.0.0.1",
63+
"PrivateDnsName": "ip-1-0-0-1",
64+
"NetworkInterfaces": [
65+
{
66+
"Attachment": {},
67+
},
68+
],
69+
},
70+
"ip.1.0.0.1",
71+
"ip-1-0-0-1",
72+
),
73+
(
74+
{
75+
"InstanceId": "i-12345",
76+
"InstanceType": "c5.xlarge",
77+
"PrivateIpAddress": "ip.1.0.0.1",
78+
"PrivateDnsName": "ip-1-0-0-1",
79+
"NetworkInterfaces": [
80+
{
81+
"Attachment": {
82+
"DeviceIndex": 0,
83+
"NetworkCardIndex": 1,
84+
},
85+
"PrivateIpAddress": "ip.1.0.0.1",
86+
"PrivateDnsName": "ip-1-0-0-1",
87+
},
88+
{
89+
"Attachment": {
90+
"DeviceIndex": 0,
91+
"NetworkCardIndex": 0,
92+
},
93+
"PrivateIpAddress": "ip.1.0.0.2",
94+
"PrivateDnsName": "ip-1-0-0-2",
95+
},
96+
],
97+
},
98+
"ip.1.0.0.2",
99+
"ip-1-0-0-2",
100+
),
101+
(
102+
{
103+
"InstanceId": "i-12345",
104+
"InstanceType": "c5.xlarge",
105+
"PrivateIpAddress": "ip.1.0.0.1",
106+
"PrivateDnsName": "ip-1-0-0-1",
107+
"NetworkInterfaces": [
108+
{
109+
"Attachment": {
110+
"DeviceIndex": 0,
111+
"NetworkCardIndex": 0,
112+
},
113+
"PrivateIpAddress": "ip.1.0.0.1",
114+
"PrivateDnsName": "ip-1-0-0-1",
115+
},
116+
{
117+
"Attachment": {
118+
"DeviceIndex": 0,
119+
"NetworkCardIndex": 1,
120+
},
121+
"PrivateIpAddress": "ip.1.0.0.2",
122+
"PrivateDnsName": "ip-1-0-0-2",
123+
},
124+
],
125+
},
126+
"ip.1.0.0.1",
127+
"ip-1-0-0-1",
128+
),
129+
],
130+
)
131+
def test_get_private_ip_address_and_dns_name(mocker, instance_info, expected_private_ip, expected_private_dns_name):
132+
actual_private_ip, actual_private_dns_name = get_private_ip_address_and_dns_name(instance_info)
133+
assert_that(actual_private_ip).is_equal_to(expected_private_ip)
134+
assert_that(actual_private_dns_name).is_equal_to(expected_private_dns_name)

0 commit comments

Comments
 (0)