Skip to content

Commit a144e85

Browse files
authored
Merge pull request #167 from theburningmonk/feature/cw_alarms
add support for specifying CloudWatch Alarms
2 parents 4911f70 + 7bf46b1 commit a144e85

File tree

5 files changed

+424
-1
lines changed

5 files changed

+424
-1
lines changed

README.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ stepFunctions:
4848
Resource: arn:aws:lambda:#{AWS::Region}:#{AWS::AccountId}:function:${self:service}-${opt:stage}-hello
4949
End: true
5050
dependsOn: CustomIamRole
51+
alarms:
52+
topics:
53+
ok: arn:aws:sns:us-east-1:1234567890:NotifyMe
54+
alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe
55+
insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe
56+
metrics:
57+
- executionsTimeOut
58+
- executionsFailed
59+
- executionsAborted
60+
- executionThrottled
5161
hellostepfunc2:
5262
definition:
5363
StartAt: HelloWorld2
@@ -60,6 +70,16 @@ stepFunctions:
6070
- DynamoDBTable
6171
- KinesisStream
6272
- CUstomIamRole
73+
alarms:
74+
topics:
75+
ok: arn:aws:sns:us-east-1:1234567890:NotifyMe
76+
alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe
77+
insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe
78+
metrics:
79+
- executionsTimeOut
80+
- executionsFailed
81+
- executionsAborted
82+
- executionThrottled
6383
activities:
6484
- myTask
6585
- yourTask
@@ -132,6 +152,45 @@ stepFunctions:
132152
- myStream
133153
```
134154

155+
#### CloudWatch Alarms
156+
It's common practice to want to monitor the health of your state machines and be alerted when something goes wrong. You can either:
157+
158+
* do this using the [serverless-plugin-aws-alerts](https://github.com/ACloudGuru/serverless-plugin-aws-alerts), which lets you configure custom CloudWatch Alarms against the various metrics that Step Functions publishes.
159+
* or, you can use the built-in `alarms` configuration from this plugin, which gives you an opinionated set of default alarms (see below)
160+
161+
```yaml
162+
stepFunctions:
163+
stateMachines:
164+
myStateMachine:
165+
alarms:
166+
topics:
167+
ok: arn:aws:sns:us-east-1:1234567890:NotifyMe
168+
alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe
169+
insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe
170+
metrics:
171+
- executionsTimeOut
172+
- executionsFailed
173+
- executionsAborted
174+
- executionThrottled
175+
```
176+
177+
Both `topics` and `metrics` are required properties. There are 4 supported metrics, each map to the CloudWatch Metrics that Step Functions publishes for your executions.
178+
179+
The generated CloudWatch alarms would have the following configurations:
180+
```yaml
181+
namespace: 'AWS/States'
182+
metric: <ExecutionsTimeOut | ExecutionsFailed | ExecutionsAborted | ExecutionThrottled>
183+
threshold: 1
184+
period: 60
185+
evaluationPeriods: 1
186+
ComparisonOperator: GreaterThanOrEqualToThreshold
187+
Statistic: Sum
188+
treatMissingData: missing
189+
Dimensions:
190+
- Name: StateMachineArn
191+
Value: <ArnOfTheStateMachine>
192+
```
193+
135194
#### Current Gotcha
136195
Please keep this gotcha in mind if you want to reference the `name` from the `resources` section. To generate Logical ID for CloudFormation, the plugin transforms the specified name in serverless.yml based on the following scheme.
137196

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
'use strict';
2+
const _ = require('lodash');
3+
const BbPromise = require('bluebird');
4+
5+
const cloudWatchMetricNames = {
6+
executionsTimeOut: 'ExecutionsTimeOut',
7+
executionsFailed: 'ExecutionsFailed',
8+
executionsAborted: 'ExecutionsAborted',
9+
executionThrottled: 'ExecutionThrottled',
10+
};
11+
12+
const alarmDescriptions = {
13+
executionsTimeOut: 'executions timed out',
14+
executionsFailed: 'executions failed',
15+
executionsAborted: 'executions were aborted',
16+
executionThrottled: 'execution were throttled',
17+
};
18+
19+
function getCloudWatchAlarms(
20+
serverless, region, stage, stateMachineName, stateMachineLogicalId, alarmsObj) {
21+
const okAction = _.get(alarmsObj, 'topics.ok');
22+
const okActions = okAction ? [okAction] : [];
23+
const alarmAction = _.get(alarmsObj, 'topics.alarm');
24+
const alarmActions = alarmAction ? [alarmAction] : [];
25+
const insufficientDataAction = _.get(alarmsObj, 'topics.insufficientData');
26+
const insufficientDataActions = insufficientDataAction ? [insufficientDataAction] : [];
27+
28+
const metrics = _.uniq(_.get(alarmsObj, 'metrics', []));
29+
const [valid, invalid] = _.partition(metrics, m => _.has(cloudWatchMetricNames, m));
30+
31+
if (!_.isEmpty(invalid)) {
32+
serverless.cli.consoleLog(
33+
`state machine [${stateMachineName}] : alarms.metrics has invalid metrics `,
34+
`[${invalid.join(',')}]. ` +
35+
'No CloudWatch Alarms would be created for these. ' +
36+
'Please see https://github.com/horike37/serverless-step-functions for supported metrics');
37+
}
38+
39+
return valid.map(metric => {
40+
const MetricName = cloudWatchMetricNames[metric];
41+
const AlarmDescription =
42+
`${stateMachineName}[${stage}][${region}]: ${alarmDescriptions[metric]}`;
43+
const logicalId = `${stateMachineLogicalId}${MetricName}Alarm`;
44+
45+
return {
46+
logicalId,
47+
alarm: {
48+
Type: 'AWS::CloudWatch::Alarm',
49+
Properties: {
50+
Namespace: 'AWS/States',
51+
MetricName,
52+
AlarmDescription,
53+
Threshold: 1,
54+
Period: 60,
55+
EvaluationPeriods: 1,
56+
ComparisonOperator: 'GreaterThanOrEqualToThreshold',
57+
Statistic: 'Sum',
58+
OKActions: okActions,
59+
AlarmActions: alarmActions,
60+
InsufficientDataActions: insufficientDataActions,
61+
TreatMissingData: 'missing',
62+
Dimensions: [
63+
{
64+
Name: 'StateMachineArn',
65+
Value: {
66+
Ref: stateMachineLogicalId,
67+
},
68+
},
69+
],
70+
},
71+
},
72+
};
73+
});
74+
}
75+
76+
function validateConfig(serverless, stateMachineName, alarmsObj) {
77+
if (!_.isObject(alarmsObj) ||
78+
!_.isObject(alarmsObj.topics) ||
79+
!_.isArray(alarmsObj.metrics) ||
80+
!_.every(alarmsObj.metrics, _.isString)) {
81+
serverless.cli.consoleLog(
82+
`state machine [${stateMachineName}] : alarms config is malformed. ` +
83+
'Please see https://github.com/horike37/serverless-step-functions for examples');
84+
return false;
85+
}
86+
87+
if (!_.has(alarmsObj.topics, 'ok') &&
88+
!_.has(alarmsObj.topics, 'alarm') &&
89+
!_.has(alarmsObj.topics, 'insufficientData')) {
90+
serverless.cli.consoleLog(
91+
`state machine [${stateMachineName}] : alarms config is malformed. ` +
92+
"alarms.topics must specify 'ok', 'alarms' or 'insufficientData'"
93+
);
94+
return false;
95+
}
96+
97+
return true;
98+
}
99+
100+
module.exports = {
101+
compileAlarms() {
102+
const cloudWatchAlarms = _.flatMap(this.getAllStateMachines(), (name) => {
103+
const stateMachineObj = this.getStateMachine(name);
104+
const stateMachineLogicalId = this.getStateMachineLogicalId(name, stateMachineObj);
105+
const stateMachineName = stateMachineObj.name || name;
106+
const alarmsObj = stateMachineObj.alarms;
107+
108+
if (!validateConfig(this.serverless, stateMachineName, alarmsObj)) {
109+
return [];
110+
}
111+
112+
return getCloudWatchAlarms(
113+
this.serverless,
114+
this.region,
115+
this.stage,
116+
stateMachineName,
117+
stateMachineLogicalId,
118+
alarmsObj);
119+
});
120+
121+
const newResources = _.mapValues(_.keyBy(cloudWatchAlarms, 'logicalId'), 'alarm');
122+
123+
_.merge(
124+
this.serverless.service.provider.compiledCloudFormationTemplate.Resources,
125+
newResources);
126+
return BbPromise.resolve();
127+
},
128+
};

0 commit comments

Comments
 (0)