Skip to content

Commit 89b3490

Browse files
committed
Add support for custom log file configuration in CW agent
Signed-off-by: chenwany <[email protected]>
1 parent 9d2c1d1 commit 89b3490

File tree

3 files changed

+63
-6
lines changed

3 files changed

+63
-6
lines changed

cookbooks/aws-parallelcluster-config/files/default/cloudwatch_agent/write_cloudwatch_agent_json.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import os
1212
import socket
1313

14+
import yaml
1415

1516
AWS_CLOUDWATCH_CFG_PATH = '/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json'
1617

@@ -39,6 +40,10 @@ def parse_args():
3940
required=True,
4041
choices=['slurm', 'awsbatch', 'plugin'],
4142
help='Scheduler')
43+
parser.add_argument('--cluster-config-path',
44+
required=False,
45+
help='Cluster configuration path',
46+
)
4247
return parser.parse_args()
4348

4449

@@ -132,6 +137,44 @@ def select_logs(configs, args):
132137
return selected_configs
133138

134139

140+
def get_node_roles(scheudler_plugin_node_roles):
141+
node_type_roles_map = {"ALL": ["ComputeFleet", "HeadNode"], "HEAD": ["HeadNode"], "COMPUTE": ["ComputeFleet"]}
142+
return node_type_roles_map.get(scheudler_plugin_node_roles)
143+
144+
145+
def load_config(cluster_config_path):
146+
with open(cluster_config_path) as input_file:
147+
return yaml.load(input_file, Loader=yaml.SafeLoader)
148+
149+
150+
def add_scheduler_plugin_log(config_data, cluster_config_path):
151+
"""Add custom log files to config data if log files specified in scheduler plugin."""
152+
cluster_config = load_config(cluster_config_path)
153+
if (
154+
get_dict_value(cluster_config, "Scheduling.SchedulerSettings.SchedulerDefinition.Monitoring.Logs.Files")
155+
and get_dict_value(cluster_config, "Scheduling.Scheduler") == "plugin"
156+
):
157+
log_files = get_dict_value(
158+
cluster_config, "Scheduling.SchedulerSettings.SchedulerDefinition.Monitoring.Logs.Files"
159+
)
160+
for log_file in log_files:
161+
# Add log config
162+
log_config = {
163+
"timestamp_format_key": log_file.get("LogStreamName"),
164+
"file_path": log_file.get("FilePath"),
165+
"log_stream_name": log_file.get("LogStreamName"),
166+
"schedulers": ["plugin"],
167+
"platforms": ["centos", "ubuntu", "amazon"],
168+
"node_roles": get_node_roles(log_file.get("NodeType")),
169+
"feature_conditions": [],
170+
}
171+
config_data["log_configs"].append(log_config)
172+
173+
# Add timestamp formats
174+
config_data["timestamp_formats"][log_file.get("LogStreamName")] = log_file.get("TimestampFormat")
175+
return config_data
176+
177+
135178
def add_timestamps(configs, timestamps_dict):
136179
"""For each config, set its timestamp_format field based on its timestamp_format_key field."""
137180
for config in configs:
@@ -159,10 +202,21 @@ def create_config(log_configs):
159202
}
160203

161204

205+
def get_dict_value(value, attributes, default=None):
206+
"""Get key value from dictionary and return default if the key does not exist."""
207+
for key in attributes.split("."):
208+
value = value.get(key, None)
209+
if value is None:
210+
return default
211+
return value
212+
213+
162214
def main():
163215
"""Create cloudwatch agent config file."""
164216
args = parse_args()
165217
config_data = read_data(args.config)
218+
if args.cluster_config_path:
219+
config_data = add_scheduler_plugin_log(config_data, args.cluster_config_path)
166220
log_configs = select_logs(config_data['log_configs'], args)
167221
log_configs = add_timestamps(log_configs, config_data['timestamp_formats'])
168222
log_configs = add_log_group_name_params(args.log_group, log_configs)

cookbooks/aws-parallelcluster-config/recipes/cloudwatch_agent.rb

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,12 @@
7373
'CONFIG_DATA_PATH' => config_data_path
7474
)
7575
not_if { ::File.exist?('/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json') }
76+
77+
cluster_config_path = node['cluster']['scheduler'] == 'plugin' ? "--cluster-config-path #{node['cluster']['cluster_config_path']}" : ""
78+
7679
command "#{node.default['cluster']['cookbook_virtualenv_path']}/bin/python #{config_script_path} "\
77-
"--platform #{node['platform']} --config $CONFIG_DATA_PATH --log-group $LOG_GROUP_NAME "\
78-
"--scheduler $SCHEDULER --node-role $NODE_ROLE"
80+
"--platform #{node['platform']} --config $CONFIG_DATA_PATH --log-group $LOG_GROUP_NAME "\
81+
"--scheduler $SCHEDULER --node-role $NODE_ROLE #{cluster_config_path}"
7982
end
8083

8184
execute "cloudwatch-agent-start" do

cookbooks/aws-parallelcluster-config/recipes/init.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,16 @@
5252
mode "0755"
5353
end
5454

55+
include_recipe "aws-parallelcluster-config::mount_shared" if node['cluster']['node_type'] == "ComputeFleet"
56+
57+
fetch_config 'Fetch and load cluster configs' unless node['cluster']['scheduler'] == 'awsbatch'
58+
5559
# Install cloudwatch, write configuration and start it.
5660
include_recipe "aws-parallelcluster-config::cloudwatch_agent"
5761

5862
# Configure additional Networking Interfaces (if present)
5963
include_recipe "aws-parallelcluster-config::network_interfaces" unless virtualized?
6064

61-
include_recipe "aws-parallelcluster-config::mount_shared" if node['cluster']['node_type'] == "ComputeFleet"
62-
63-
fetch_config 'Fetch and load cluster configs' unless node['cluster']['scheduler'] == 'awsbatch'
64-
6565
include_recipe "aws-parallelcluster-slurm::init" if node['cluster']['scheduler'] == 'slurm'
6666
include_recipe "aws-parallelcluster-scheduler-plugin::init" if node['cluster']['scheduler'] == 'plugin'
6767

0 commit comments

Comments
 (0)