diff --git a/content/en/database_monitoring/guide/rds_auto_install.md b/content/en/database_monitoring/guide/rds_auto_install.md new file mode 100644 index 0000000000000..1892692dfe9ff --- /dev/null +++ b/content/en/database_monitoring/guide/rds_auto_install.md @@ -0,0 +1,5 @@ +--- +title: Automatically Monitoring RDS Instances With DBM + +--- + diff --git a/static/resources/yaml/dbm/rds-auto-install/cloudformation.yaml b/static/resources/yaml/dbm/rds-auto-install/cloudformation.yaml new file mode 100644 index 0000000000000..f4b35fb3477b0 --- /dev/null +++ b/static/resources/yaml/dbm/rds-auto-install/cloudformation.yaml @@ -0,0 +1,235 @@ +# Automatically setup RDS instances to be ready for the Datadog integration +# Before deploying, run deploy.sh in the lambda subdirectory to build the Lambda function +# +# To deploy run deploy.sh in the same directory as this file +# + + +Parameters: + VPC: + Type: "AWS::EC2::VPC::Id" + Description: "The VPC to deploy the resources in" + SubnetId: + Type: "AWS::EC2::Subnet::Id" + Description: "The subnet to deploy the Lambda function in" + LambdaBucket: + Type: String + Description: "The S3 bucket containing the Lambda function code" + LambdaKey: + Type: String + Description: "The S3 key for the Lambda function code" + +Resources: + # The SNS topic is used as the target for RDS instance events + SNS: + Type: AWS::SNS::Topic + Properties: + TopicName: rds-db-instance-events + + # This subscription automatically routes messages to the SQS queue + # to be processed by the Lambda function + SNSSubscription: + Type: AWS::SNS::Subscription + Properties: + TopicArn: !Ref SNS + Protocol: sqs + RawMessageDelivery: true + Endpoint: !GetAtt SQS.Arn + + # The SQS queue holds messages about RDS instance events + # There can be a delay between event creation and all permissions + # being applied, so we use a queue to retry until the permissions are set + SQS: + Type: AWS::SQS::Queue + Properties: + QueueName: rds-db-instance-events + VisibilityTimeout: 30 + MessageRetentionPeriod: 3600 # 1 hour + + # This policy allows the SNS topic to send messages to the SQS queue + SQSPolicy: + Type: AWS::SQS::QueuePolicy + Properties: + Queues: + - !Ref SQS + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: sns.amazonaws.com + Action: sqs:SendMessage + Resource: !GetAtt SQS.Arn + Condition: + ArnEquals: + "aws:SourceArn": !Ref SNS + + # Map SQS queue directly to the Lambda function + SQSLambdaMapping: + Type: AWS::Lambda::EventSourceMapping + Properties: + BatchSize: 1 + EventSourceArn: !GetAtt SQS.Arn + FunctionName: !GetAtt LambdaFunction.Arn + Enabled: true + MaximumBatchingWindowInSeconds: 0 + + # Subscribe to all RDS instance modification events + # availability and maintence events may include version changes + # configuration change events may include changes that enable new Datadog features + RDSEventSubscription: + Type: "AWS::RDS::EventSubscription" + Properties: + SnsTopicArn: !GetAtt SNS.TopicArn + EventCategories: + - "availability" + - "configuration change" + - "creation" + - "maintenance" + SourceType: "db-instance" + Enabled: true + + # Allow any RDS instance to publish to the SNS topic + RDSPermission: + Type: AWS::SNS::TopicPolicy + Properties: + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: events.rds.amazonaws.com + Action: sns:Publish + Resource: "*" + Condition: + StringEquals: + "aws:SourceAccount": !Ref AWS::AccountId + Topics: + - !GetAtt SNS.TopicArn + + # Permissions for the Lambda function that processes the SQS messages + LambdaExecutionRole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: + Service: lambda.amazonaws.com + Action: sts:AssumeRole + Policies: + - PolicyName: "AllowLogs" + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + Resource: "*" + # The execution role for Lambdas within a VPC need permissions to manage network interfaces + - PolicyName: "AllowEC2" + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - ec2:CreateNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DescribeSubnets + - ec2:DeleteNetworkInterface + - ec2:AssignPrivateIpAddresses + - ec2:UnassignPrivateIpAddresses + Resource: "*" + # Allow the Lambda to read from the SQS queue + - PolicyName: "AllowSQS" + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - sqs:ReceiveMessage + - sqs:DeleteMessage + - sqs:GetQueueAttributes + Resource: !GetAtt SQS.Arn + # Allow access to the RDS instances and master secrets + - PolicyName: "AllowRDS" + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - rds:DescribeDBInstances + Resource: "*" + # Condition: + # StringEquals: + # "aws:ResourceTag/datadoghq.com/install-dbm": "true" + # Allow access to the RDS instances and master secrets + - PolicyName: "AllowSecrets" + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - secretsmanager:GetSecretValue + - secretsmanager:DescribeSecret + Resource: "*" + Condition: + StringEquals: + "aws:ResourceTag/aws:secretsmanager:owningService": "rds" + + LambdaSecurityGroup: + Type: "AWS::EC2::SecurityGroup" + Properties: + GroupDescription: "Lambda traffic" + VpcId: !Ref VPC + SecurityGroupEgress: + - IpProtocol: "-1" + CidrIp: "0.0.0.0/0" + + LambdaFunction: + Type: AWS::Lambda::Function + Properties: + Handler: index.handler + Role: !GetAtt LambdaExecutionRole.Arn + Code: + S3Bucket: !Ref LambdaBucket + S3Key: !Ref LambdaKey + # Environment: + # Variables: + # ADMIN_USER_SECRET_ARN: !Ref AdminUserSecretArn + # DATADOG_USER_SECRET_ARN: !Ref DatadogUserSecretArn + Runtime: nodejs22.x + Timeout: 10 + MemorySize: 128 + VpcConfig: + SecurityGroupIds: + - !Ref LambdaSecurityGroup + SubnetIds: + - !Ref SubnetId + + # The Lambda itself inherits the EC2 permissions so we need to deny them to the Lambda function + # This is a separate policy to avoid a circular dependency + LambdaFunctionDenyPolicy: + Type: AWS::IAM::Policy + Properties: + PolicyName: "LambdaFunctionDenyPolicy" + Roles: + - !Ref LambdaExecutionRole + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Deny + Action: + - ec2:CreateNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DescribeSubnets + - ec2:DeleteNetworkInterface + - ec2:AssignPrivateIpAddresses + - ec2:UnassignPrivateIpAddresses + Resource: "*" + Condition: + ArnEquals: + lambda:SourceFunctionArn: !GetAtt LambdaFunction.Arn diff --git a/static/resources/yaml/dbm/rds-auto-install/deploy.sh b/static/resources/yaml/dbm/rds-auto-install/deploy.sh new file mode 100755 index 0000000000000..43610874755df --- /dev/null +++ b/static/resources/yaml/dbm/rds-auto-install/deploy.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +aws cloudformation deploy \ +--template-file cloudformation.yaml \ +--stack-name rds-auto-install \ +--parameter-overrides \ +"VPC=$(aws cloudformation describe-stacks --stack-name "rds-auto-install-demo" --query "Stacks[0].Outputs[?OutputKey=='VPCId'].OutputValue" --output text)" \ +"SubnetId=$(aws cloudformation describe-stacks --stack-name "rds-auto-install-demo" --query "Stacks[0].Outputs[?OutputKey=='SubnetAId'].OutputValue" --output text)" \ +"LambdaBucket=rds-auto-install-demo" \ +"LambdaKey=$(tar c -C lambda index.js package.json package-lock.json | md5 -q).zip" \ +--capabilities CAPABILITY_IAM diff --git a/static/resources/yaml/dbm/rds-auto-install/lambda/.gitignore b/static/resources/yaml/dbm/rds-auto-install/lambda/.gitignore new file mode 100644 index 0000000000000..0e1dbdbdd541c --- /dev/null +++ b/static/resources/yaml/dbm/rds-auto-install/lambda/.gitignore @@ -0,0 +1,2 @@ +node_modules +zips diff --git a/static/resources/yaml/dbm/rds-auto-install/lambda/deploy.sh b/static/resources/yaml/dbm/rds-auto-install/lambda/deploy.sh new file mode 100755 index 0000000000000..34b4900567128 --- /dev/null +++ b/static/resources/yaml/dbm/rds-auto-install/lambda/deploy.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Prep and upload Lambda zip file +# Avoid including the parent directories in the zip file +echo "Installing npm dependencies" +npm ci +hashFile="$(tar c index.js package.json package-lock.json | md5 -q).zip" +echo "Zipping lambda function" +mkdir -p zips +zip -r "zips/$hashFile" ./* >/dev/null +aws s3 cp "zips/$hashFile" "s3://rds-auto-install-demo/$hashFile" diff --git a/static/resources/yaml/dbm/rds-auto-install/lambda/index.js b/static/resources/yaml/dbm/rds-auto-install/lambda/index.js new file mode 100644 index 0000000000000..7f432e998f5b1 --- /dev/null +++ b/static/resources/yaml/dbm/rds-auto-install/lambda/index.js @@ -0,0 +1,146 @@ +const { Client } = require("pg"); +const { + RDSClient, + DescribeDBInstancesCommand, +} = require("@aws-sdk/client-rds"); +const { + SecretsManagerClient, + GetSecretValueCommand, +} = require("@aws-sdk/client-secrets-manager"); + +exports.handler = async (event, context) => { + let client; + let message; + try { + let dbId; + message = JSON.parse(event.Records[0]?.body); + if (message.RequestType) { + // CloudFormation event + return { Status: "SUCCESS" }; + } + // Check for source id in RDS notification + dbId = message["Source ID"]; + if (!dbId) { + throw new Error("No DB Id found in the message"); + } + const rdsClient = new RDSClient(); + const rdsInput = { + DBInstanceIdentifier: dbId, + }; + const rdsCommand = new DescribeDBInstancesCommand(rdsInput); + const rdsResponse = await rdsClient.send(rdsCommand); + const secretArn = rdsResponse.DBInstances[0].MasterUserSecret.SecretArn; + + const dbSecret = await getSecret(secretArn); + + const adminSecret = await getSecret(process.env.ADMIN_USER_SECRET_ARN); + const datadogSecret = await getSecret(process.env.DATADOG_USER_SECRET_ARN); + + const endpoint = rdsResponse.DBInstances[0].Endpoint.Address; + const port = rdsResponse.DBInstances[0].Endpoint.Port; + + client = new Client({ + user: dbSecret.username, + host: endpoint, + database: "postgres", + password: dbSecret.password, + port, + ssl: { + // Allow for the RDS self-signed SSL + rejectUnauthorized: false, + }, + }); + + await client.connect(); + + // Create admin user for orders app + await executeSql( + client, + `CREATE ROLE admin WITH LOGIN PASSWORD '${adminSecret}';` + ); + await executeSql( + client, + `ALTER ROLE admin WITH LOGIN PASSWORD '${adminSecret}';` + ); + await executeSql( + client, + `GRANT ALL PRIVILEGES ON DATABASE postgres TO admin;` + ); + + await executeSql(client, `ALTER USER admin CREATEROLE;`); + await executeSql(client, `ALTER USER admin CREATEDB;`); + + await executeSql( + client, + `CREATE USER datadog WITH password '${datadogSecret}';` + ); + await executeSql( + client, + `ALTER ROLE datadog WITH password '${datadogSecret}';` + ); + await executeSql(client, `ALTER ROLE datadog INHERIT;`); + await executeSql(client, `CREATE SCHEMA IF NOT EXISTS datadog;`); + await executeSql(client, `GRANT USAGE ON SCHEMA datadog TO datadog;`); + await executeSql(client, `GRANT USAGE ON SCHEMA public TO datadog;`); + await executeSql(client, `GRANT pg_monitor TO datadog;`); + await executeSql( + client, + `CREATE EXTENSION IF NOT EXISTS pg_stat_statements;` + ); + await executeSql( + client, + `CREATE OR REPLACE FUNCTION datadog.explain_statement( + l_query TEXT, + OUT explain JSON +) +RETURNS SETOF JSON AS +$$ +DECLARE +curs REFCURSOR; +plan JSON; + +BEGIN + OPEN curs FOR EXECUTE pg_catalog.concat('EXPLAIN (FORMAT JSON) ', l_query); + FETCH curs INTO plan; + CLOSE curs; + RETURN QUERY SELECT plan; +END; +$$ +LANGUAGE 'plpgsql' +RETURNS NULL ON NULL INPUT +SECURITY DEFINER; +` + ); + } catch (err) { + console.error(err); + throw err; + } finally { + if (client) { + await client.end(); + } + } +}; + +const executeSql = async (client, sql) => { + try { + const res = await client.query(sql); + return res; + } catch (err) { + console.error("Error executing SQL", err); + } +}; + +const getSecret = async (secretArn) => { + const secretsClient = new SecretsManagerClient(); + const secretsInput = { + SecretId: secretArn, + }; + const secretsCommand = new GetSecretValueCommand(secretsInput); + const secretsResponse = await secretsClient.send(secretsCommand); + try { + const value = JSON.parse(secretsResponse.SecretString); + return value; + } catch (err) { + return secretsResponse.SecretString; + } +}; diff --git a/static/resources/yaml/dbm/rds-auto-install/lambda/package.json b/static/resources/yaml/dbm/rds-auto-install/lambda/package.json new file mode 100644 index 0000000000000..e65f148f13afa --- /dev/null +++ b/static/resources/yaml/dbm/rds-auto-install/lambda/package.json @@ -0,0 +1,18 @@ +{ + "name": "postgres", + "version": "1.0.0", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "description": "", + "dependencies": { + "@aws-sdk/client-rds": "3.814.0", + "@aws-sdk/client-secrets-manager": "3.812.0", + "cfn-response": "1.0.1", + "pg": "8.16.0" + } +} diff --git a/static/resources/yaml/dbm/rds-auto-install/setup.yaml b/static/resources/yaml/dbm/rds-auto-install/setup.yaml new file mode 100644 index 0000000000000..d56bd666c923c --- /dev/null +++ b/static/resources/yaml/dbm/rds-auto-install/setup.yaml @@ -0,0 +1,220 @@ +# These resources setup a testing environment for the RDS Auto Install demonstration +# Run `aws cloudformation deploy --template-file setup.yaml --stack-name rds-auto-install-demo` to deploy the stack +Resources: +# Networking + VPC: + Type: "AWS::EC2::VPC" + Properties: + CidrBlock: "10.0.0.0/16" + EnableDnsSupport: true + EnableDnsHostnames: true + + InternetGateway: + Type: "AWS::EC2::InternetGateway" + + VpcGatewayAttachment: + Type: AWS::EC2::VPCGatewayAttachment + Properties: + InternetGatewayId: !Ref InternetGateway + VpcId: !Ref VPC + + RouteTable: + Type: "AWS::EC2::RouteTable" + Properties: + VpcId: !Ref "VPC" + + InternetRoute: + DependsOn: VpcGatewayAttachment + Type: AWS::EC2::Route + Properties: + RouteTableId: !Ref RouteTable + GatewayId: !Ref InternetGateway + DestinationCidrBlock: 0.0.0.0/0 + + SubnetA: + Type: "AWS::EC2::Subnet" + Properties: + AvailabilityZone: !Sub "${AWS::Region}a" + VpcId: !Ref "VPC" + CidrBlock: "10.0.0.0/17" + + SubnetARouteTableAssociation: + Type: "AWS::EC2::SubnetRouteTableAssociation" + Properties: + SubnetId: !Ref "SubnetA" + RouteTableId: !Ref "RouteTable" + + # Some regions don't have -b + SubnetC: + Type: "AWS::EC2::Subnet" + Properties: + AvailabilityZone: !Sub "${AWS::Region}c" + VpcId: !Ref "VPC" + CidrBlock: "10.0.128.0/17" + + SubnetCRouteTableAssociation: + Type: "AWS::EC2::SubnetRouteTableAssociation" + Properties: + SubnetId: !Ref "SubnetC" + RouteTableId: !Ref "RouteTable" + + SubnetGroup: + Type: "AWS::RDS::DBSubnetGroup" + Properties: + DBSubnetGroupDescription: "Subnet Group" + SubnetIds: + - !Ref "SubnetA" + - !Ref "SubnetC" + VPCEndpointSecurityGroup: + Type: "AWS::EC2::SecurityGroup" + Properties: + GroupDescription: "VPC endpoints traffic" + VpcId: !Ref VPC + SecurityGroupEgress: + - IpProtocol: "-1" + CidrIp: "0.0.0.0/0" + SecurityGroupIngress: + - IpProtocol: "-1" + FromPort: 0 + ToPort: 65535 + Description: "VPC" + CidrIp: !GetAtt VPC.CidrBlock + # Secrets manager endpoint + SecretsManagerEndpoint: + Type: "AWS::EC2::VPCEndpoint" + Properties: + VpcId: !Ref VPC + VpcEndpointType: "Interface" + ServiceName: !Sub "com.amazonaws.${AWS::Region}.secretsmanager" + PrivateDnsEnabled: true + SecurityGroupIds: + - !Ref "VPCEndpointSecurityGroup" + SubnetIds: + - !Ref SubnetA + - !Ref SubnetC + # RDS endpoint + RDSEndpoint: + Type: "AWS::EC2::VPCEndpoint" + Properties: + VpcId: !Ref VPC + VpcEndpointType: "Interface" + ServiceName: !Sub "com.amazonaws.${AWS::Region}.rds" + PrivateDnsEnabled: true + SecurityGroupIds: + - !Ref "VPCEndpointSecurityGroup" + SubnetIds: + - !Ref SubnetA + - !Ref SubnetC + + # ECR endpoints + ECRDKREndpoint: + Type: "AWS::EC2::VPCEndpoint" + Properties: + VpcId: !Ref VPC + VpcEndpointType: "Interface" + ServiceName: !Sub "com.amazonaws.${AWS::Region}.ecr.dkr" + PrivateDnsEnabled: true + SecurityGroupIds: + - !Ref "VPCEndpointSecurityGroup" + SubnetIds: + - !Ref SubnetA + - !Ref SubnetC + ECRAPIEndpoint: + Type: "AWS::EC2::VPCEndpoint" + Properties: + VpcId: !Ref VPC + VpcEndpointType: "Interface" + ServiceName: !Sub "com.amazonaws.${AWS::Region}.ecr.api" + PrivateDnsEnabled: true + SecurityGroupIds: + - !Ref "VPCEndpointSecurityGroup" + SubnetIds: + - !Ref SubnetA + - !Ref SubnetC + # ECR requires S3 access for image layers + S3Endpoint: + Type: "AWS::EC2::VPCEndpoint" + Properties: + VpcId: !Ref VPC + VpcEndpointType: "Gateway" + ServiceName: !Sub "com.amazonaws.${AWS::Region}.s3" + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: "*" + Action: + - s3:GetObject + Resource: + - "*" + RouteTableIds: + - !Ref RouteTable + # Cloudwatch endpoint for awslogs + CloudwatchEndpoint: + Type: "AWS::EC2::VPCEndpoint" + Properties: + VpcId: !Ref VPC + VpcEndpointType: "Interface" + ServiceName: !Sub "com.amazonaws.${AWS::Region}.logs" + PrivateDnsEnabled: true + SecurityGroupIds: + - !Ref "VPCEndpointSecurityGroup" + SubnetIds: + - !Ref SubnetA + - !Ref SubnetC + + # RDS + RDSAutoInstallDBInstance: + Type: AWS::RDS::DBInstance + DeletionPolicy: Delete + UpdateReplacePolicy: Delete + Properties: + DBInstanceIdentifier: !Sub "${AWS::StackName}-db-instance" + DBInstanceClass: db.t3.medium + Engine: postgres + EngineVersion: "17.2" + MasterUsername: root + ManageMasterUserPassword: true + EnableIAMDatabaseAuthentication: true + AllocatedStorage: "20" + DBSubnetGroupName: !Ref SubnetGroup + VPCSecurityGroups: + - !GetAtt RDSSecurityGroup.GroupId + RDSSecurityGroup: + Type: AWS::EC2::SecurityGroup + DeletionPolicy: Delete + UpdateReplacePolicy: Delete + Properties: + GroupDescription: Security group for RDS Auto Install demo + VpcId: !Ref VPC + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 5432 + ToPort: 5432 + CidrIp: !GetAtt VPC.CidrBlock + + # S3 Bucket for Lambda code upload + S3Bucket: + DeletionPolicy: Delete + UpdateReplacePolicy: Delete + Type: AWS::S3::Bucket + Properties: + BucketName: !Ref AWS::StackName + + +Outputs: + VPCId: + Description: "VPC ID" + Value: !Ref VPC + SubnetAId: + Description: "Subnet A ID" + Value: !Ref SubnetA + # SubnetCId: + # Description: "Subnet C ID" + # Value: !Ref SubnetC + # RDSInstanceEndpoint: + # Description: "RDS Instance Endpoint" + # Value: !GetAtt RDSAutoInstallDBInstance.Endpoint.Address + # RDSSecurityGroupId: + # Description: "RDS Security Group ID" + # Value: !GetAtt RDSSecurityGroup.GroupId