From 93c3ec437849db5922599efd8a5aeffce4a0eb5c Mon Sep 17 00:00:00 2001 From: Ankur Naik Date: Mon, 13 Jan 2020 11:18:11 -0800 Subject: [PATCH 01/37] Temporarily skip syncing of the full signatures table. --- config_table.tf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/config_table.tf b/config_table.tf index ae45c1f..7c8f985 100644 --- a/config_table.tf +++ b/config_table.tf @@ -13,7 +13,8 @@ resource "aws_dynamodb_table" "loader_config" { } resource "aws_dynamodb_table_item" "load_config_full_items" { - for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"]]) + /*for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"]])*/ + for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"] if table["table"]["name"] != "signatures"]) table_name = aws_dynamodb_table.loader_config.name hash_key = aws_dynamodb_table.loader_config.hash_key @@ -34,7 +35,8 @@ resource "aws_dynamodb_table_item" "load_config_full_items" { } data "template_file" "loader_config_full_item" { - for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"]]) + /*for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"]])*/ + for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"] if table["table"]["name"] != "signatures"]) template = "${file("${path.module}/config_item.json")}" vars = { From 330392a7cbbe8a1eac67a4a3f59177e3ac39cbe3 Mon Sep 17 00:00:00 2001 From: Ankur Naik Date: Wed, 15 Jan 2020 14:29:14 -0800 Subject: [PATCH 02/37] Implement an intermediate Lambda that is called by the API Gateway. This Lambda invokes the actual receiver Lambda asynchronously, so that the API Gateway can return immediately, and the receiver Lambda can continue to execute beyond the 29-second API Gateway-Lambda integration timeout hard limit. --- .gitignore | 1 + iam.tf | 32 +++++++++++++++++++++++++++++ loader.tf | 7 ++++--- logs.tf | 8 ++++++++ receiver.tf | 29 +++++++++++++++++++++++--- receiver/invoker.js | 48 ++++++++++++++++++++++++++++++++++++++++++++ receiver/receiver.js | 7 ++++--- 7 files changed, 123 insertions(+), 9 deletions(-) create mode 100644 receiver/invoker.js diff --git a/.gitignore b/.gitignore index 219d324..7091de6 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,6 @@ # .tfvars files *.tfvars +receiver/invoker.zip receiver/receiver.zip .idea/ diff --git a/iam.tf b/iam.tf index 06fc563..e1b7436 100644 --- a/iam.tf +++ b/iam.tf @@ -9,12 +9,38 @@ data "aws_iam_policy_document" "lambda_assume_role" { } } +resource "aws_iam_role" "invoker_lambda_role" { + name = "ControlShiftInvokerLambdaRole" + description = "Used by the controlshift-webhook-handler-invoker Lambda for asynchronously invoking the receiver lambda with the db replication data from ControlShift" + assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json +} + resource "aws_iam_role" "receiver_lambda_role" { name = "ReceiverLambdaRole" description = "Used by the controlshift-webhook-handler Lambda for receiving db replication data from ControlShift" assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json } +data "aws_iam_policy_document" "invoker_execution_policy" { + # allow the lambda to write cloudwatch logs + statement { + effect = "Allow" + actions = [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents" + ] + resources = ["arn:aws:logs:*:*:*"] + } + + # allow the lambda to invoke the receiver lambda + statement { + effect = "Allow" + actions = ["lambda:InvokeFunction"] + resources = ["*"] + } +} + data "aws_iam_policy_document" "receiver_execution_policy" { # allow the lambda to write cloudwatch logs statement { @@ -35,6 +61,12 @@ data "aws_iam_policy_document" "receiver_execution_policy" { } } +resource "aws_iam_role_policy" "lambda_invokes_lambda" { + name = "AllowsInvokerExecution" + role = aws_iam_role.invoker_lambda_role.id + policy = data.aws_iam_policy_document.invoker_execution_policy.json +} + resource "aws_iam_role_policy" "lambda_accesses_code_bucket" { name = "AllowsReceiverExecution" role = aws_iam_role.receiver_lambda_role.id diff --git a/loader.tf b/loader.tf index c8352de..b0d98bf 100644 --- a/loader.tf +++ b/loader.tf @@ -5,7 +5,8 @@ resource "aws_lambda_function" "loader" { role = aws_iam_role.loader_lambda_role.arn handler = "index.handler" runtime = "nodejs8.10" - timeout = 300 + memory_size = 512 + timeout = 900 environment { variables = { "DEBUG" = "true" @@ -38,14 +39,14 @@ resource "aws_s3_bucket_notification" "notifications" { } resource "aws_sns_topic" "success_sns_topic" { - depends_on = ["aws_lambda_function.loader"] + depends_on = [aws_lambda_function.loader] name = var.success_topic_name policy = data.aws_iam_policy_document.success_sns_notification_policy.json } resource "aws_sns_topic" "failure_sns_topic" { - depends_on = ["aws_lambda_function.loader"] + depends_on = [aws_lambda_function.loader] name = var.failure_topic_name policy = data.aws_iam_policy_document.failure_sns_notification_policy.json diff --git a/logs.tf b/logs.tf index a062468..f31870b 100644 --- a/logs.tf +++ b/logs.tf @@ -14,3 +14,11 @@ resource "aws_cloudwatch_log_group" "webhook" { Application = "controlshift-redshift" } } + +resource "aws_cloudwatch_log_group" "invoker" { + name = "/aws/lambda/controlshift-webhook-handler-invoker" + retention_in_days = 5 + tags = { + Application = "controlshift-redshift" + } +} diff --git a/receiver.tf b/receiver.tf index 41b8e02..dc40c56 100644 --- a/receiver.tf +++ b/receiver.tf @@ -1,15 +1,38 @@ +data "archive_file" "invoker_zip" { + type = "zip" + source_file = "${path.module}/receiver/invoker.js" + output_path = "${path.module}/receiver/invoker.zip" +} + data "archive_file" "receiver_zip" { type = "zip" source_file = "${path.module}/receiver/receiver.js" output_path = "${path.module}/receiver/receiver.zip" } +resource "aws_lambda_function" "invoker_lambda" { + filename = data.archive_file.invoker_zip.output_path + function_name = "controlshift-webhook-handler-invoker" + role = aws_iam_role.invoker_lambda_role.arn + handler = "invoker.handler" + runtime = "nodejs10.x" + timeout = 30 + source_code_hash = filebase64sha256(data.archive_file.invoker_zip.output_path) + + environment { + variables = { + S3_BUCKET = aws_s3_bucket.receiver.bucket + } + } +} + resource "aws_lambda_function" "receiver_lambda" { filename = data.archive_file.receiver_zip.output_path function_name = "controlshift-webhook-handler" role = aws_iam_role.receiver_lambda_role.arn handler = "receiver.handler" runtime = "nodejs10.x" + memory_size = 256 timeout = var.receiver_timeout source_code_hash = filebase64sha256(data.archive_file.receiver_zip.output_path) @@ -69,14 +92,14 @@ resource "aws_api_gateway_integration" "request_method_integration" { resource_id = aws_api_gateway_resource.webhook.id http_method = "POST" type = "AWS_PROXY" - uri = "arn:aws:apigateway:${var.aws_region}:lambda:path/2015-03-31/functions/${aws_lambda_function.receiver_lambda.arn}/invocations" + uri = "arn:aws:apigateway:${var.aws_region}:lambda:path/2015-03-31/functions/${aws_lambda_function.invoker_lambda.arn}/invocations" # AWS lambdas can only be invoked with the POST method integration_http_method = "POST" } # The * part allows invocation from any stage within API Gateway REST API. resource "aws_lambda_permission" "allow_api_gateway" { - function_name = aws_lambda_function.receiver_lambda.function_name + function_name = aws_lambda_function.invoker_lambda.function_name statement_id = "AllowExecutionFromApiGateway" action = "lambda:InvokeFunction" principal = "apigateway.amazonaws.com" @@ -85,7 +108,7 @@ resource "aws_lambda_permission" "allow_api_gateway" { # for now, there is only one deployment resource "aws_api_gateway_deployment" "deployment" { - depends_on = ["aws_api_gateway_integration.request_method_integration"] + depends_on = [aws_api_gateway_integration.request_method_integration] rest_api_id = aws_api_gateway_rest_api.receiver.id stage_name = "production" diff --git a/receiver/invoker.js b/receiver/invoker.js new file mode 100644 index 0000000..dee8fb2 --- /dev/null +++ b/receiver/invoker.js @@ -0,0 +1,48 @@ +'use strict'; + +const aws = require('aws-sdk'); +const lambda = new aws.Lambda({ + // region: 'us-west-2' +}); + +function sendResponse(body) { + let response = { + isBase64Encoded: false, + statusCode: 200, + headers: {'Content-Type': 'application/json', 'x-controlshift-processed': '1'}, + body: JSON.stringify(body) + }; + console.log("response: " + JSON.stringify(response)); + return response; +} + +function invokeLambda(payload) { + const params = { + FunctionName: 'controlshift-webhook-handler', + Payload: JSON.stringify(payload), + InvocationType: 'Event' + }; + + return new Promise((resolve, reject) => { + lambda.invoke(params, (err, data) => { + if (err) { + console.log('Error invoking receiver lambda:', err); + reject(err); + } + else { + console.log('Successfully invoked receiver with data:', data); + resolve(data); + } + }); + }); +} + +// Lambda event Handler +exports.handler = async (event) => { + let receivedJSON = JSON.parse(event.body); + console.log('Received event:', receivedJSON); + + await invokeLambda({ body: receivedJSON }); + + return Promise.resolve(sendResponse({"status": "processed", "payload": receivedJSON})); +}; diff --git a/receiver/receiver.js b/receiver/receiver.js index 79b361e..3e44e66 100644 --- a/receiver/receiver.js +++ b/receiver/receiver.js @@ -13,10 +13,10 @@ async function processCsv(downloadUrl, table, kind) { const today = new Date(); const key = `${kind}/${table}/${today.getFullYear()}/${today.getMonth()}/${today.getDate()}/${today.getHours()}-${today.getMinutes()}-${today.getSeconds()}/table.csv`; await copyToS3(downloadUrl, key); - console.log("Successfully copied") + console.log(`Successfully copied ${downloadUrl} to ${key}`) } catch(err){ - console.log(`Failed: ${err.message}`) + console.log(`Failed: ${err.message} (${downloadUrl})`) } finally{ return sendResponse({"status": "processed"}) @@ -55,7 +55,8 @@ function sendResponse(body) { // Lambda event Handler exports.handler = async (event) => { - let receivedJSON = JSON.parse(event.body); + // let receivedJSON = JSON.parse(event.body); + let receivedJSON = event.body; console.log('Received event:', receivedJSON); if(receivedJSON.type === 'data.full_table_exported'){ return processCsv(receivedJSON.data.url, receivedJSON.data.table, 'full'); From 5fbf607aae95bc6aea2ada5d59d8874d73ae0d8f Mon Sep 17 00:00:00 2001 From: Sophie Waldman <62553142+sjwmoveon@users.noreply.github.com> Date: Fri, 5 Jun 2020 16:54:43 -0400 Subject: [PATCH 03/37] Allow for multiple instances of loader_lambda_role The loader lambda role references specific DynamoDB resources, so unlike the other two lambda roles created in iam.tf, you can't just reuse the existing role for a new instance of the sync. Allowing for multiple instances by switching to name_prefix makes it possible to sync to separate Redshift databases in separate regions (though still only one sync can run per region). --- iam.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iam.tf b/iam.tf index e1b7436..a6ccd90 100644 --- a/iam.tf +++ b/iam.tf @@ -97,7 +97,7 @@ resource "aws_iam_role_policy_attachment" "gateway_cloudwatch_logging" { } resource "aws_iam_role" "loader_lambda_role" { - name = "LoaderLambdaRole" + name_prefix = "LoaderLambdaRole" description = "Used by the controlshift-redshift-loader Lambda for processing db replication data from ControlShift into Redshift" assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json } From b8247b26770696d1749df7b7660986d3f2b2929c Mon Sep 17 00:00:00 2001 From: Sophie Waldman <62553142+sjwmoveon@users.noreply.github.com> Date: Mon, 8 Jun 2020 17:50:15 -0400 Subject: [PATCH 04/37] Truncate strings that are too large to fit Fix the error 'String length exceeds DDL length' by truncating anything that's too long. TESTED: Manual update of blast_emails config in DynamoDB. --- config_item.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config_item.json b/config_item.json index b2413be..4083417 100644 --- a/config_item.json +++ b/config_item.json @@ -12,7 +12,7 @@ "truncateTarget": {"BOOL": ${truncate_target}}, "useSSL": {"BOOL": false} }}]}, - "copyOptions": {"S": "EMPTYASNULL"}, + "copyOptions": {"S": "EMPTYASNULL TRUNCATECOLUMNS"}, "dataFormat": {"S": "CSV"}, "csvDelimiter": {"S": ","}, "ignoreCsvHeader": {"BOOL": true}, From 7641d249403e90dbe67c4f90369046f37aeeeca4 Mon Sep 17 00:00:00 2001 From: Sophie Waldman <62553142+sjwmoveon@users.noreply.github.com> Date: Tue, 16 Jun 2020 13:55:10 -0400 Subject: [PATCH 05/37] Node 8.10 environment has been disabled (deprecated) --- loader.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loader.tf b/loader.tf index b0d98bf..f4b68c4 100644 --- a/loader.tf +++ b/loader.tf @@ -4,7 +4,7 @@ resource "aws_lambda_function" "loader" { function_name = "controlshift-redshift-loader" role = aws_iam_role.loader_lambda_role.arn handler = "index.handler" - runtime = "nodejs8.10" + runtime = "nodejs10.x" memory_size = 512 timeout = 900 environment { From d09cc0ab46f0869322c6ed11edfd72f216c53df2 Mon Sep 17 00:00:00 2001 From: Sophie Waldman <62553142+sjwmoveon@users.noreply.github.com> Date: Mon, 16 Nov 2020 17:51:09 -0500 Subject: [PATCH 06/37] Send email clicks and opens to (optional) Kinesis streams --- lambdas/receiver.js | 25 ++++++++++++++++++++++--- receiver.tf | 2 ++ variables.tf | 12 ++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/lambdas/receiver.js b/lambdas/receiver.js index 2b0b86b..5ae3ad4 100644 --- a/lambdas/receiver.js +++ b/lambdas/receiver.js @@ -1,13 +1,27 @@ 'use strict'; const AWS = require('aws-sdk'); - // Set the region AWS.config.update({region: process.env.AWS_REGION}); // Create an SQS service object const sqs = new AWS.SQS(); +// Create a Kinesis service object +const kinesis = new AWS.Kinesis(); + +function sendKinesis(data, key, stream) { + let params = { + Data: data, + PartitionKey: key, + StreamName: stream + }; + kinesis.putRecord(params, function(err, data) { + if (err) console.log(err, err.stack); + else console.log('Record added:',data); + }); +} + function enqueueTask(receivedData, kind) { console.log("Processing: " + receivedData.url); @@ -21,7 +35,7 @@ function enqueueTask(receivedData, kind) { messageBody['kind'] = kind; - const jsonMessageBody = JSON.stringify(messageBody) + const jsonMessageBody = JSON.stringify(messageBody); const loaderQueueParams = { MessageBody: jsonMessageBody, @@ -46,7 +60,6 @@ function enqueueTask(receivedData, kind) { console.log("Error", error); } ); - return resp } function sendResponse(body) { @@ -70,6 +83,12 @@ exports.handler = async (event) => { } else if(receivedJSON.type === 'data.incremental_table_exported'){ await enqueueTask(receivedJSON.data, 'incremental'); return sendResponse({"status": "processed"}); + } else if(receivedJSON.type === 'email.open' && process.env.EMAIL_OPEN_KINESIS_STREAM !== null && process.env.EMAIL_OPEN_KINESIS_STREAM !== ''){ + await sendKinesis(JSON.stringify(receivedJSON.data), receivedJSON.jid, process.env.EMAIL_OPEN_KINESIS_STREAM); + return sendResponse({"status": "processed"}); + } else if(receivedJSON.type === 'email.click' && process.env.EMAIL_CLICK_KINESIS_STREAM !== null && process.env.EMAIL_CLICK_KINESIS_STREAM !== ''){ + await sendKinesis(JSON.stringify(receivedJSON.data), receivedJSON.jid, process.env.EMAIL_CLICK_KINESIS_STREAM); + return sendResponse({"status": "processed"}); } else { return Promise.resolve(sendResponse({"status": "skipped", "payload": receivedJSON})); } diff --git a/receiver.tf b/receiver.tf index a7fe0ff..b013599 100644 --- a/receiver.tf +++ b/receiver.tf @@ -17,6 +17,8 @@ resource "aws_lambda_function" "receiver_lambda" { variables = { SQS_QUEUE_URL = aws_sqs_queue.receiver_queue.id GLUE_SQS_QUEUE_URL = aws_sqs_queue.receiver_queue_glue.id + EMAIL_OPEN_KINESIS_STREAM = var.email_open_kinesis_stream + EMAIL_CLICK_KINESIS_STREAM = var.email_click_kinesis_stream } } } diff --git a/variables.tf b/variables.tf index af2f0f5..29ee230 100644 --- a/variables.tf +++ b/variables.tf @@ -77,6 +77,18 @@ variable "controlshift_hostname" { description = "The hostname of your ControlShift instance. Likely to be something like action.myorganization.org" } +variable "email_open_kinesis_stream" { + type = string + description = "The name of a Kinesis stream that will receive email open events." + default = "" +} + +variable "email_click_kinesis_stream" { + type = string + description = "The name of a Kinesis stream that will receive email click events." + default = "" +} + variable "receiver_timeout" { default = 60 type = number From 904ad1796a794c873e1edc42450eab6242f0b00b Mon Sep 17 00:00:00 2001 From: Sophie Waldman <62553142+sjwmoveon@users.noreply.github.com> Date: Mon, 16 Nov 2020 18:32:26 -0500 Subject: [PATCH 07/37] Remove references to deprecated invoker function --- receiver.tf | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/receiver.tf b/receiver.tf index f510716..b013599 100644 --- a/receiver.tf +++ b/receiver.tf @@ -1,38 +1,15 @@ -data "archive_file" "invoker_zip" { - type = "zip" - source_file = "${path.module}/receiver/invoker.js" - output_path = "${path.module}/receiver/invoker.zip" -} - data "archive_file" "receiver_zip" { type = "zip" source_file = "${path.module}/lambdas/receiver.js" output_path = "${path.module}/lambdas/receiver.zip" } -resource "aws_lambda_function" "invoker_lambda" { - filename = data.archive_file.invoker_zip.output_path - function_name = "controlshift-webhook-handler-invoker" - role = aws_iam_role.invoker_lambda_role.arn - handler = "invoker.handler" - runtime = "nodejs10.x" - timeout = 30 - source_code_hash = filebase64sha256(data.archive_file.invoker_zip.output_path) - - environment { - variables = { - S3_BUCKET = aws_s3_bucket.receiver.bucket - } - } -} - resource "aws_lambda_function" "receiver_lambda" { filename = data.archive_file.receiver_zip.output_path function_name = "controlshift-webhook-handler" role = aws_iam_role.receiver_lambda_role.arn handler = "receiver.handler" runtime = "nodejs10.x" - memory_size = 256 timeout = var.receiver_timeout source_code_hash = data.archive_file.receiver_zip.output_base64sha256 @@ -96,14 +73,14 @@ resource "aws_api_gateway_integration" "request_method_integration" { resource_id = aws_api_gateway_resource.webhook.id http_method = "POST" type = "AWS_PROXY" - uri = "arn:aws:apigateway:${var.aws_region}:lambda:path/2015-03-31/functions/${aws_lambda_function.invoker_lambda.arn}/invocations" + uri = "arn:aws:apigateway:${var.aws_region}:lambda:path/2015-03-31/functions/${aws_lambda_function.receiver_lambda.arn}/invocations" # AWS lambdas can only be invoked with the POST method integration_http_method = "POST" } # The * part allows invocation from any stage within API Gateway REST API. resource "aws_lambda_permission" "allow_api_gateway" { - function_name = aws_lambda_function.invoker_lambda.function_name + function_name = aws_lambda_function.receiver_lambda.function_name statement_id = "AllowExecutionFromApiGateway" action = "lambda:InvokeFunction" principal = "apigateway.amazonaws.com" From 3fdee05970b97f2d7aa4968a82ed64426dc8b321 Mon Sep 17 00:00:00 2001 From: Sophie Waldman <62553142+sjwmoveon@users.noreply.github.com> Date: Tue, 17 Nov 2020 10:20:02 -0500 Subject: [PATCH 08/37] Manually fix all remaining discrepancies with latest CSL code --- config_table.tf | 6 ++---- iam.tf | 26 ------------------------ logs.tf | 8 -------- receiver/invoker.js | 48 --------------------------------------------- 4 files changed, 2 insertions(+), 86 deletions(-) delete mode 100644 receiver/invoker.js diff --git a/config_table.tf b/config_table.tf index b059193..3caf6de 100644 --- a/config_table.tf +++ b/config_table.tf @@ -13,8 +13,7 @@ resource "aws_dynamodb_table" "loader_config" { } resource "aws_dynamodb_table_item" "load_config_full_items" { - /*for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"]])*/ - for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"] if table["table"]["name"] != "signatures"]) + for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"]]) table_name = aws_dynamodb_table.loader_config.name hash_key = aws_dynamodb_table.loader_config.hash_key @@ -35,8 +34,7 @@ resource "aws_dynamodb_table_item" "load_config_full_items" { } data "template_file" "loader_config_full_item" { - /*for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"]])*/ - for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"] if table["table"]["name"] != "signatures"]) + for_each = toset([for table in jsondecode(data.http.bulk_data_schemas.body)["tables"] : table["table"]["name"]]) template = "${file("${path.module}/config_item.json")}" vars = { diff --git a/iam.tf b/iam.tf index c5dbd92..d2720fa 100644 --- a/iam.tf +++ b/iam.tf @@ -9,38 +9,12 @@ data "aws_iam_policy_document" "lambda_assume_role" { } } -resource "aws_iam_role" "invoker_lambda_role" { - name = "ControlShiftInvokerLambdaRole" - description = "Used by the controlshift-webhook-handler-invoker Lambda for asynchronously invoking the receiver lambda with the db replication data from ControlShift" - assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json -} - resource "aws_iam_role" "receiver_lambda_role" { name = "ReceiverLambdaRole" description = "Used by the controlshift-webhook-handler Lambda for receiving db replication data from ControlShift" assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json } -data "aws_iam_policy_document" "invoker_execution_policy" { - # allow the lambda to write cloudwatch logs - statement { - effect = "Allow" - actions = [ - "logs:CreateLogGroup", - "logs:CreateLogStream", - "logs:PutLogEvents" - ] - resources = ["arn:aws:logs:*:*:*"] - } - - # allow the lambda to invoke the receiver lambda - statement { - effect = "Allow" - actions = ["lambda:InvokeFunction"] - resources = ["*"] - } -} - data "aws_iam_policy_document" "receiver_execution_policy" { # allow the lambda to write cloudwatch logs statement { diff --git a/logs.tf b/logs.tf index f31870b..a062468 100644 --- a/logs.tf +++ b/logs.tf @@ -14,11 +14,3 @@ resource "aws_cloudwatch_log_group" "webhook" { Application = "controlshift-redshift" } } - -resource "aws_cloudwatch_log_group" "invoker" { - name = "/aws/lambda/controlshift-webhook-handler-invoker" - retention_in_days = 5 - tags = { - Application = "controlshift-redshift" - } -} diff --git a/receiver/invoker.js b/receiver/invoker.js deleted file mode 100644 index dee8fb2..0000000 --- a/receiver/invoker.js +++ /dev/null @@ -1,48 +0,0 @@ -'use strict'; - -const aws = require('aws-sdk'); -const lambda = new aws.Lambda({ - // region: 'us-west-2' -}); - -function sendResponse(body) { - let response = { - isBase64Encoded: false, - statusCode: 200, - headers: {'Content-Type': 'application/json', 'x-controlshift-processed': '1'}, - body: JSON.stringify(body) - }; - console.log("response: " + JSON.stringify(response)); - return response; -} - -function invokeLambda(payload) { - const params = { - FunctionName: 'controlshift-webhook-handler', - Payload: JSON.stringify(payload), - InvocationType: 'Event' - }; - - return new Promise((resolve, reject) => { - lambda.invoke(params, (err, data) => { - if (err) { - console.log('Error invoking receiver lambda:', err); - reject(err); - } - else { - console.log('Successfully invoked receiver with data:', data); - resolve(data); - } - }); - }); -} - -// Lambda event Handler -exports.handler = async (event) => { - let receivedJSON = JSON.parse(event.body); - console.log('Received event:', receivedJSON); - - await invokeLambda({ body: receivedJSON }); - - return Promise.resolve(sendResponse({"status": "processed", "payload": receivedJSON})); -}; From 97475d50585c71412fd63b9eacb1331bf0da9d63 Mon Sep 17 00:00:00 2001 From: Sophie Waldman <62553142+sjwmoveon@users.noreply.github.com> Date: Tue, 17 Nov 2020 12:47:55 -0500 Subject: [PATCH 09/37] Use Firehose stream (which is different from base Kinesis stream) --- lambdas/receiver.js | 23 ++++++++++++----------- receiver.tf | 4 ++-- variables.tf | 8 ++++---- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/lambdas/receiver.js b/lambdas/receiver.js index 5ae3ad4..0529565 100644 --- a/lambdas/receiver.js +++ b/lambdas/receiver.js @@ -7,16 +7,17 @@ AWS.config.update({region: process.env.AWS_REGION}); // Create an SQS service object const sqs = new AWS.SQS(); -// Create a Kinesis service object -const kinesis = new AWS.Kinesis(); +// Create a Firehose service object +const firehose = new AWS.Firehose(); -function sendKinesis(data, key, stream) { +function putFirehose(data, stream) { let params = { - Data: data, - PartitionKey: key, - StreamName: stream + DeliveryStreamName: stream, + Record:{ + Data: data + } }; - kinesis.putRecord(params, function(err, data) { + firehose.putRecord(params, function(err, data) { if (err) console.log(err, err.stack); else console.log('Record added:',data); }); @@ -83,11 +84,11 @@ exports.handler = async (event) => { } else if(receivedJSON.type === 'data.incremental_table_exported'){ await enqueueTask(receivedJSON.data, 'incremental'); return sendResponse({"status": "processed"}); - } else if(receivedJSON.type === 'email.open' && process.env.EMAIL_OPEN_KINESIS_STREAM !== null && process.env.EMAIL_OPEN_KINESIS_STREAM !== ''){ - await sendKinesis(JSON.stringify(receivedJSON.data), receivedJSON.jid, process.env.EMAIL_OPEN_KINESIS_STREAM); + } else if(receivedJSON.type === 'email.open' && process.env.EMAIL_OPEN_FIREHOSE_STREAM !== null && process.env.EMAIL_OPEN_FIREHOSE_STREAM !== ''){ + await putFirehose(JSON.stringify(receivedJSON.data), receivedJSON.jid, process.env.EMAIL_OPEN_FIREHOSE_STREAM); return sendResponse({"status": "processed"}); - } else if(receivedJSON.type === 'email.click' && process.env.EMAIL_CLICK_KINESIS_STREAM !== null && process.env.EMAIL_CLICK_KINESIS_STREAM !== ''){ - await sendKinesis(JSON.stringify(receivedJSON.data), receivedJSON.jid, process.env.EMAIL_CLICK_KINESIS_STREAM); + } else if(receivedJSON.type === 'email.click' && process.env.EMAIL_CLICK_FIREHOSE_STREAM !== null && process.env.EMAIL_CLICK_FIREHOSE_STREAM !== ''){ + await putFirehose(JSON.stringify(receivedJSON.data), receivedJSON.jid, process.env.EMAIL_CLICK_FIREHOSE_STREAM); return sendResponse({"status": "processed"}); } else { return Promise.resolve(sendResponse({"status": "skipped", "payload": receivedJSON})); diff --git a/receiver.tf b/receiver.tf index b013599..dc9f97f 100644 --- a/receiver.tf +++ b/receiver.tf @@ -17,8 +17,8 @@ resource "aws_lambda_function" "receiver_lambda" { variables = { SQS_QUEUE_URL = aws_sqs_queue.receiver_queue.id GLUE_SQS_QUEUE_URL = aws_sqs_queue.receiver_queue_glue.id - EMAIL_OPEN_KINESIS_STREAM = var.email_open_kinesis_stream - EMAIL_CLICK_KINESIS_STREAM = var.email_click_kinesis_stream + EMAIL_OPEN_FIREHOSE_STREAM = var.email_open_firehose_stream + EMAIL_CLICK_FIREHOSE_STREAM = var.email_click_firehose_stream } } } diff --git a/variables.tf b/variables.tf index 29ee230..3568641 100644 --- a/variables.tf +++ b/variables.tf @@ -77,15 +77,15 @@ variable "controlshift_hostname" { description = "The hostname of your ControlShift instance. Likely to be something like action.myorganization.org" } -variable "email_open_kinesis_stream" { +variable "email_open_firehose_stream" { type = string - description = "The name of a Kinesis stream that will receive email open events." + description = "The name of a Firehose stream that will receive email open events." default = "" } -variable "email_click_kinesis_stream" { +variable "email_click_firehose_stream" { type = string - description = "The name of a Kinesis stream that will receive email click events." + description = "The name of a Firehose stream that will receive email click events." default = "" } From b51c15ca6ec49ffe19f29f97c3781164c7e196c3 Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Tue, 23 Feb 2021 11:27:38 -0500 Subject: [PATCH 10/37] Load gzipped files --- config_item.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config_item.json b/config_item.json index 4083417..e4ca0b2 100644 --- a/config_item.json +++ b/config_item.json @@ -12,7 +12,7 @@ "truncateTarget": {"BOOL": ${truncate_target}}, "useSSL": {"BOOL": false} }}]}, - "copyOptions": {"S": "EMPTYASNULL TRUNCATECOLUMNS"}, + "copyOptions": {"S": "EMPTYASNULL TRUNCATECOLUMNS GZIP"}, "dataFormat": {"S": "CSV"}, "csvDelimiter": {"S": ","}, "ignoreCsvHeader": {"BOOL": true}, From fbbfeda6dc6b6c381215fd64ac75991de027341d Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Mon, 1 Mar 2021 15:48:40 -0500 Subject: [PATCH 11/37] Undo gzip for now --- config_item.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config_item.json b/config_item.json index 4083417..e4ca0b2 100644 --- a/config_item.json +++ b/config_item.json @@ -12,7 +12,7 @@ "truncateTarget": {"BOOL": ${truncate_target}}, "useSSL": {"BOOL": false} }}]}, - "copyOptions": {"S": "EMPTYASNULL TRUNCATECOLUMNS"}, + "copyOptions": {"S": "EMPTYASNULL TRUNCATECOLUMNS GZIP"}, "dataFormat": {"S": "CSV"}, "csvDelimiter": {"S": ","}, "ignoreCsvHeader": {"BOOL": true}, From cb1a933420f260886040f7b309b30a44c9bf5f33 Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Mon, 1 Mar 2021 15:58:07 -0500 Subject: [PATCH 12/37] Last commit was actually backwards This reverts commit fbbfeda6dc6b6c381215fd64ac75991de027341d. --- config_item.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config_item.json b/config_item.json index e4ca0b2..4083417 100644 --- a/config_item.json +++ b/config_item.json @@ -12,7 +12,7 @@ "truncateTarget": {"BOOL": ${truncate_target}}, "useSSL": {"BOOL": false} }}]}, - "copyOptions": {"S": "EMPTYASNULL TRUNCATECOLUMNS GZIP"}, + "copyOptions": {"S": "EMPTYASNULL TRUNCATECOLUMNS"}, "dataFormat": {"S": "CSV"}, "csvDelimiter": {"S": ","}, "ignoreCsvHeader": {"BOOL": true}, From e1ce7e734803f229e5babb64493b952d3cb466ca Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Wed, 7 Apr 2021 10:29:03 -0400 Subject: [PATCH 13/37] Add external_ids field to sync --- templates/signatures_job.py.tpl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/templates/signatures_job.py.tpl b/templates/signatures_job.py.tpl index ca5e464..d9ed97e 100644 --- a/templates/signatures_job.py.tpl +++ b/templates/signatures_job.py.tpl @@ -38,7 +38,7 @@ datasource1 = glueContext.create_dynamic_frame.from_catalog( # Step 3: Map the columns in the data catalog / S3 bucket to the columns we want in Redshift ## @type: ApplyMapping -## @args: [mapping = [("id", "bigint", "id", "long"), ("petition_id", "bigint", "petition_id", "long"), ("email", "string", "email", "string"), ("first_name", "string", "first_name", "string"), ("last_name", "string", "last_name", "string"), ("phone_number", "string", "phone_number", "string"), ("postcode", "string", "postcode", "string"), ("created_at", "string", "created_at", "timestamp"), ("join_organisation", "string", "join_organisation", "boolean"), ("deleted_at", "string", "deleted_at", "timestamp"), ("unsubscribe_at", "string", "unsubscribe_at", "timestamp"), ("external_constituent_id", "bigint", "external_constituent_id", "long"), ("member_id", "bigint", "member_id", "long"), ("additional_fields", "string", "additional_fields", "string"), ("cached_organisation_slug", "string", "cached_organisation_slug", "string"), ("source", "string", "source", "string"), ("join_group", "string", "join_group", "boolean"), ("external_id", "bigint", "external_id", "long"), ("new_member", "string", "new_member", "boolean"), ("external_action_id", "string", "external_action_id", "string"), ("locale", "string", "locale", "string"), ("bucket", "string", "bucket", "string"), ("country", "string", "country", "string"), ("updated_at", "string", "updated_at", "timestamp"), ("user_ip", "string", "user_ip", "string"), ("confirmation_token", "string", "confirmation_token", "string"), ("confirmed_at", "string", "confirmed_at", "timestamp"), ("confirmation_sent_at", "string", "confirmation_sent_at", "timestamp"), ("last_signed_at", "string", "last_signed_at", "timestamp"), ("join_list_suppressed", "string", "join_list_suppressed", "boolean"), ("old_daisy_chain_used", "string", "old_daisy_chain_used", "string"), ("from_embed", "string", "from_embed", "boolean"), ("user_agent", "string", "user_agent", "string"), ("confirmed_reason", "string", "confirmed_reason", "string"), ("synced_to_crm_at", "string", "synced_to_crm_at", "timestamp"), ("daisy_chain_experiment_slug", "string", "daisy_chain_experiment_slug", "string"), ("eu_data_processing_consent", "string", "eu_data_processing_consent", "boolean"), ("from_one_click", "string", "from_one_click", "boolean"), ("consent_content_version_id", "string", "consent_content_version_id", "string"), ("daisy_chain_id_used", "string", "daisy_chain_id_used", "string"), ("email_opt_in_type_id", "bigint", "email_opt_in_type_id", "long"), ("facebook_id", "string", "facebook_id", "string"), ("utm_params", "string", "utm_params", "string"), ("postcode_id", "bigint", "postcode_id", "long"), ("referring_share_click_id", "bigint", "referring_share_click_id", "int"), ("opt_in_sms", "string", "opt_in_sms", "boolean")], transformation_ctx = "applymapping1"] +## @args: [mapping = [("id", "bigint", "id", "long"), ("petition_id", "bigint", "petition_id", "long"), ("email", "string", "email", "string"), ("first_name", "string", "first_name", "string"), ("last_name", "string", "last_name", "string"), ("phone_number", "string", "phone_number", "string"), ("postcode", "string", "postcode", "string"), ("created_at", "string", "created_at", "timestamp"), ("join_organisation", "string", "join_organisation", "boolean"), ("deleted_at", "string", "deleted_at", "timestamp"), ("unsubscribe_at", "string", "unsubscribe_at", "timestamp"), ("external_constituent_id", "bigint", "external_constituent_id", "long"), ("member_id", "bigint", "member_id", "long"), ("additional_fields", "string", "additional_fields", "string"), ("cached_organisation_slug", "string", "cached_organisation_slug", "string"), ("source", "string", "source", "string"), ("join_group", "string", "join_group", "boolean"), ("external_id", "bigint", "external_id", "long"), ("new_member", "string", "new_member", "boolean"), ("external_action_id", "string", "external_action_id", "string"), ("locale", "string", "locale", "string"), ("bucket", "string", "bucket", "string"), ("country", "string", "country", "string"), ("updated_at", "string", "updated_at", "timestamp"), ("user_ip", "string", "user_ip", "string"), ("confirmation_token", "string", "confirmation_token", "string"), ("confirmed_at", "string", "confirmed_at", "timestamp"), ("confirmation_sent_at", "string", "confirmation_sent_at", "timestamp"), ("last_signed_at", "string", "last_signed_at", "timestamp"), ("join_list_suppressed", "string", "join_list_suppressed", "boolean"), ("old_daisy_chain_used", "string", "old_daisy_chain_used", "string"), ("from_embed", "string", "from_embed", "boolean"), ("user_agent", "string", "user_agent", "string"), ("confirmed_reason", "string", "confirmed_reason", "string"), ("synced_to_crm_at", "string", "synced_to_crm_at", "timestamp"), ("daisy_chain_experiment_slug", "string", "daisy_chain_experiment_slug", "string"), ("eu_data_processing_consent", "string", "eu_data_processing_consent", "boolean"), ("from_one_click", "string", "from_one_click", "boolean"), ("consent_content_version_id", "string", "consent_content_version_id", "string"), ("daisy_chain_id_used", "string", "daisy_chain_id_used", "string"), ("email_opt_in_type_id", "bigint", "email_opt_in_type_id", "long"), ("facebook_id", "string", "facebook_id", "string"), ("utm_params", "string", "utm_params", "string"), ("postcode_id", "bigint", "postcode_id", "long"), ("referring_share_click_id", "bigint", "referring_share_click_id", "int"), ("opt_in_sms", "string", "opt_in_sms", "boolean"), ("external_ids", "string", "external_ids", "string")], transformation_ctx = "applymapping1"] ## @return: applymapping1 ## @inputs: [frame = datasource1] applymapping1 = ApplyMapping.apply( @@ -92,7 +92,8 @@ applymapping1 = ApplyMapping.apply( ("opt_in_sms", "string", "opt_in_sms", "boolean"), ("sms_opt_in_type_id", "string", "sms_opt_in_type_id", "bigint"), ("recaptcha_score", "string", "recaptcha_score", "decimal(3,2)"), - ("new_mobile_subscriber", "string", "new_mobile_subscriber", "boolean") + ("new_mobile_subscriber", "string", "new_mobile_subscriber", "boolean"), + ("external_ids", "string", "external_ids", "string") ], transformation_ctx = "applymapping1") From 5520fbf70de646621126ecb4c942636c6689f9d9 Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Thu, 15 Apr 2021 14:29:19 -0400 Subject: [PATCH 14/37] Add Kinesis Firehose permissions to receiver role --- iam.tf | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/iam.tf b/iam.tf index d2720fa..cdfb045 100644 --- a/iam.tf +++ b/iam.tf @@ -34,6 +34,22 @@ data "aws_iam_policy_document" "receiver_execution_policy" { resources = ["arn:aws:sqs:${var.aws_region}:*:${aws_sqs_queue.receiver_queue.name}", "arn:aws:sqs:${var.aws_region}:*:${aws_sqs_queue.receiver_queue_glue.name}"] } + + # allow the receiver lambda to send messages to Firehose streams + statement { + effect = "Allow", + actions = [ + "firehose:DeleteDeliveryStream", + "firehose:PutRecord", + "firehose:StartDeliveryStreamEncryption", + "firehose:CreateDeliveryStream", + "firehose:PutRecordBatch", + "firehose:StopDeliveryStreamEncryption", + "firehose:UpdateDestination" + ], + resources = ["arn:aws:firehose:${var.aws_region}:*:deliverystream/${var.email_open_firehose_stream}", + "arn:aws:firehose:${var.aws_region}:*:deliverystream/${var.email_click_firehose_stream}"] + } } resource "aws_iam_role_policy" "lambda_receiver" { From c2f2cec61a52415eb4c2ac22186c4d7312eff9af Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Thu, 15 Apr 2021 14:35:46 -0400 Subject: [PATCH 15/37] Typo fix --- iam.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iam.tf b/iam.tf index cdfb045..e75bd74 100644 --- a/iam.tf +++ b/iam.tf @@ -37,7 +37,7 @@ data "aws_iam_policy_document" "receiver_execution_policy" { # allow the receiver lambda to send messages to Firehose streams statement { - effect = "Allow", + effect = "Allow" actions = [ "firehose:DeleteDeliveryStream", "firehose:PutRecord", @@ -46,7 +46,7 @@ data "aws_iam_policy_document" "receiver_execution_policy" { "firehose:PutRecordBatch", "firehose:StopDeliveryStreamEncryption", "firehose:UpdateDestination" - ], + ] resources = ["arn:aws:firehose:${var.aws_region}:*:deliverystream/${var.email_open_firehose_stream}", "arn:aws:firehose:${var.aws_region}:*:deliverystream/${var.email_click_firehose_stream}"] } From 1c5670107b4db10cee3ffe824b37316256b7d8cb Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Fri, 16 Apr 2021 12:31:53 -0400 Subject: [PATCH 16/37] Don't need jid --- lambdas/receiver.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lambdas/receiver.js b/lambdas/receiver.js index 0529565..1dd4409 100644 --- a/lambdas/receiver.js +++ b/lambdas/receiver.js @@ -85,10 +85,10 @@ exports.handler = async (event) => { await enqueueTask(receivedJSON.data, 'incremental'); return sendResponse({"status": "processed"}); } else if(receivedJSON.type === 'email.open' && process.env.EMAIL_OPEN_FIREHOSE_STREAM !== null && process.env.EMAIL_OPEN_FIREHOSE_STREAM !== ''){ - await putFirehose(JSON.stringify(receivedJSON.data), receivedJSON.jid, process.env.EMAIL_OPEN_FIREHOSE_STREAM); + await putFirehose(JSON.stringify(receivedJSON.data), process.env.EMAIL_OPEN_FIREHOSE_STREAM); return sendResponse({"status": "processed"}); } else if(receivedJSON.type === 'email.click' && process.env.EMAIL_CLICK_FIREHOSE_STREAM !== null && process.env.EMAIL_CLICK_FIREHOSE_STREAM !== ''){ - await putFirehose(JSON.stringify(receivedJSON.data), receivedJSON.jid, process.env.EMAIL_CLICK_FIREHOSE_STREAM); + await putFirehose(JSON.stringify(receivedJSON.data), process.env.EMAIL_CLICK_FIREHOSE_STREAM); return sendResponse({"status": "processed"}); } else { return Promise.resolve(sendResponse({"status": "skipped", "payload": receivedJSON})); From f686d45a3e28917d715b5b7fe3b46b4fc40fc335 Mon Sep 17 00:00:00 2001 From: Ilona Brand Date: Thu, 10 Feb 2022 12:12:26 -0600 Subject: [PATCH 17/37] Changing config to match CSL dev noticing discrepancy --- config_item.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config_item.json b/config_item.json index 542c5de..d6c4a86 100644 --- a/config_item.json +++ b/config_item.json @@ -23,5 +23,5 @@ "failureTopicARN": {"S": "${failure_topic_arn}"}, "batchSize": {"N": "1"}, "currentBatch": {"S": "${current_batch}"}, - "compress": {"S": "${compress}"} + "compression": {"S": "${compress}"} } From 882b5a48920cd6bc13a04faac58de36fe81c5fd0 Mon Sep 17 00:00:00 2001 From: Ilona Brand Date: Fri, 11 Feb 2022 09:23:45 -0600 Subject: [PATCH 18/37] Reverting to see if this will speed the terraform builds up again. The plan is to eventually reintroduce this change. Revert "Changing config to match CSL dev noticing discrepancy" This reverts commit f686d45a3e28917d715b5b7fe3b46b4fc40fc335. --- config_item.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config_item.json b/config_item.json index d6c4a86..542c5de 100644 --- a/config_item.json +++ b/config_item.json @@ -23,5 +23,5 @@ "failureTopicARN": {"S": "${failure_topic_arn}"}, "batchSize": {"N": "1"}, "currentBatch": {"S": "${current_batch}"}, - "compression": {"S": "${compress}"} + "compress": {"S": "${compress}"} } From ba960752f13a10c68c97c96b0bd56212b32faf98 Mon Sep 17 00:00:00 2001 From: Ilona Brand Date: Mon, 21 Feb 2022 16:56:20 -0600 Subject: [PATCH 19/37] This is a revert of a revert. We are going to try the compression change again. Revert "Reverting to see if this will speed the terraform builds up again. The plan is to eventually reintroduce this change." This reverts commit 882b5a48920cd6bc13a04faac58de36fe81c5fd0. --- config_item.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config_item.json b/config_item.json index 542c5de..d6c4a86 100644 --- a/config_item.json +++ b/config_item.json @@ -23,5 +23,5 @@ "failureTopicARN": {"S": "${failure_topic_arn}"}, "batchSize": {"N": "1"}, "currentBatch": {"S": "${current_batch}"}, - "compress": {"S": "${compress}"} + "compression": {"S": "${compress}"} } From 5d2625cfc35b39101f2732a894885d287852fab6 Mon Sep 17 00:00:00 2001 From: Ilona Brand Date: Tue, 24 May 2022 10:57:42 -0500 Subject: [PATCH 20/37] Change the daisy_chain_id_used field from a string to bigint because the string version was breaking the sync --- templates/signatures_job.py.tpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/signatures_job.py.tpl b/templates/signatures_job.py.tpl index d9ed97e..e4692bf 100644 --- a/templates/signatures_job.py.tpl +++ b/templates/signatures_job.py.tpl @@ -38,7 +38,7 @@ datasource1 = glueContext.create_dynamic_frame.from_catalog( # Step 3: Map the columns in the data catalog / S3 bucket to the columns we want in Redshift ## @type: ApplyMapping -## @args: [mapping = [("id", "bigint", "id", "long"), ("petition_id", "bigint", "petition_id", "long"), ("email", "string", "email", "string"), ("first_name", "string", "first_name", "string"), ("last_name", "string", "last_name", "string"), ("phone_number", "string", "phone_number", "string"), ("postcode", "string", "postcode", "string"), ("created_at", "string", "created_at", "timestamp"), ("join_organisation", "string", "join_organisation", "boolean"), ("deleted_at", "string", "deleted_at", "timestamp"), ("unsubscribe_at", "string", "unsubscribe_at", "timestamp"), ("external_constituent_id", "bigint", "external_constituent_id", "long"), ("member_id", "bigint", "member_id", "long"), ("additional_fields", "string", "additional_fields", "string"), ("cached_organisation_slug", "string", "cached_organisation_slug", "string"), ("source", "string", "source", "string"), ("join_group", "string", "join_group", "boolean"), ("external_id", "bigint", "external_id", "long"), ("new_member", "string", "new_member", "boolean"), ("external_action_id", "string", "external_action_id", "string"), ("locale", "string", "locale", "string"), ("bucket", "string", "bucket", "string"), ("country", "string", "country", "string"), ("updated_at", "string", "updated_at", "timestamp"), ("user_ip", "string", "user_ip", "string"), ("confirmation_token", "string", "confirmation_token", "string"), ("confirmed_at", "string", "confirmed_at", "timestamp"), ("confirmation_sent_at", "string", "confirmation_sent_at", "timestamp"), ("last_signed_at", "string", "last_signed_at", "timestamp"), ("join_list_suppressed", "string", "join_list_suppressed", "boolean"), ("old_daisy_chain_used", "string", "old_daisy_chain_used", "string"), ("from_embed", "string", "from_embed", "boolean"), ("user_agent", "string", "user_agent", "string"), ("confirmed_reason", "string", "confirmed_reason", "string"), ("synced_to_crm_at", "string", "synced_to_crm_at", "timestamp"), ("daisy_chain_experiment_slug", "string", "daisy_chain_experiment_slug", "string"), ("eu_data_processing_consent", "string", "eu_data_processing_consent", "boolean"), ("from_one_click", "string", "from_one_click", "boolean"), ("consent_content_version_id", "string", "consent_content_version_id", "string"), ("daisy_chain_id_used", "string", "daisy_chain_id_used", "string"), ("email_opt_in_type_id", "bigint", "email_opt_in_type_id", "long"), ("facebook_id", "string", "facebook_id", "string"), ("utm_params", "string", "utm_params", "string"), ("postcode_id", "bigint", "postcode_id", "long"), ("referring_share_click_id", "bigint", "referring_share_click_id", "int"), ("opt_in_sms", "string", "opt_in_sms", "boolean"), ("external_ids", "string", "external_ids", "string")], transformation_ctx = "applymapping1"] +## @args: [mapping = [("id", "bigint", "id", "long"), ("petition_id", "bigint", "petition_id", "long"), ("email", "string", "email", "string"), ("first_name", "string", "first_name", "string"), ("last_name", "string", "last_name", "string"), ("phone_number", "string", "phone_number", "string"), ("postcode", "string", "postcode", "string"), ("created_at", "string", "created_at", "timestamp"), ("join_organisation", "string", "join_organisation", "boolean"), ("deleted_at", "string", "deleted_at", "timestamp"), ("unsubscribe_at", "string", "unsubscribe_at", "timestamp"), ("external_constituent_id", "bigint", "external_constituent_id", "long"), ("member_id", "bigint", "member_id", "long"), ("additional_fields", "string", "additional_fields", "string"), ("cached_organisation_slug", "string", "cached_organisation_slug", "string"), ("source", "string", "source", "string"), ("join_group", "string", "join_group", "boolean"), ("external_id", "bigint", "external_id", "long"), ("new_member", "string", "new_member", "boolean"), ("external_action_id", "string", "external_action_id", "string"), ("locale", "string", "locale", "string"), ("bucket", "string", "bucket", "string"), ("country", "string", "country", "string"), ("updated_at", "string", "updated_at", "timestamp"), ("user_ip", "string", "user_ip", "string"), ("confirmation_token", "string", "confirmation_token", "string"), ("confirmed_at", "string", "confirmed_at", "timestamp"), ("confirmation_sent_at", "string", "confirmation_sent_at", "timestamp"), ("last_signed_at", "string", "last_signed_at", "timestamp"), ("join_list_suppressed", "string", "join_list_suppressed", "boolean"), ("old_daisy_chain_used", "string", "old_daisy_chain_used", "string"), ("from_embed", "string", "from_embed", "boolean"), ("user_agent", "string", "user_agent", "string"), ("confirmed_reason", "string", "confirmed_reason", "string"), ("synced_to_crm_at", "string", "synced_to_crm_at", "timestamp"), ("daisy_chain_experiment_slug", "string", "daisy_chain_experiment_slug", "string"), ("eu_data_processing_consent", "string", "eu_data_processing_consent", "boolean"), ("from_one_click", "string", "from_one_click", "boolean"), ("consent_content_version_id", "string", "consent_content_version_id", "string"), ("daisy_chain_id_used", "string", "daisy_chain_id_used", "bigint"), ("email_opt_in_type_id", "bigint", "email_opt_in_type_id", "long"), ("facebook_id", "string", "facebook_id", "string"), ("utm_params", "string", "utm_params", "string"), ("postcode_id", "bigint", "postcode_id", "long"), ("referring_share_click_id", "bigint", "referring_share_click_id", "int"), ("opt_in_sms", "string", "opt_in_sms", "boolean"), ("external_ids", "string", "external_ids", "string")], transformation_ctx = "applymapping1"] ## @return: applymapping1 ## @inputs: [frame = datasource1] applymapping1 = ApplyMapping.apply( @@ -83,7 +83,7 @@ applymapping1 = ApplyMapping.apply( ("eu_data_processing_consent", "string", "eu_data_processing_consent", "boolean"), ("from_one_click", "string", "from_one_click", "boolean"), ("consent_content_version_id", "string", "consent_content_version_id", "bigint"), - ("daisy_chain_id_used", "string", "daisy_chain_id_used", "bigint"), + ("daisy_chain_id_used", "bigint", "daisy_chain_id_used", "bigint"), ("email_opt_in_type_id", "bigint", "email_opt_in_type_id", "bigint"), ("facebook_id", "string", "facebook_id", "string"), ("utm_params", "string", "utm_params", "string"), From e3731c2464ba4990c04d9f56a2108ce2fa00a290 Mon Sep 17 00:00:00 2001 From: Ilona Brand Date: Tue, 24 May 2022 11:37:25 -0500 Subject: [PATCH 21/37] fix comment --- templates/signatures_job.py.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/signatures_job.py.tpl b/templates/signatures_job.py.tpl index e4692bf..598b24f 100644 --- a/templates/signatures_job.py.tpl +++ b/templates/signatures_job.py.tpl @@ -38,7 +38,7 @@ datasource1 = glueContext.create_dynamic_frame.from_catalog( # Step 3: Map the columns in the data catalog / S3 bucket to the columns we want in Redshift ## @type: ApplyMapping -## @args: [mapping = [("id", "bigint", "id", "long"), ("petition_id", "bigint", "petition_id", "long"), ("email", "string", "email", "string"), ("first_name", "string", "first_name", "string"), ("last_name", "string", "last_name", "string"), ("phone_number", "string", "phone_number", "string"), ("postcode", "string", "postcode", "string"), ("created_at", "string", "created_at", "timestamp"), ("join_organisation", "string", "join_organisation", "boolean"), ("deleted_at", "string", "deleted_at", "timestamp"), ("unsubscribe_at", "string", "unsubscribe_at", "timestamp"), ("external_constituent_id", "bigint", "external_constituent_id", "long"), ("member_id", "bigint", "member_id", "long"), ("additional_fields", "string", "additional_fields", "string"), ("cached_organisation_slug", "string", "cached_organisation_slug", "string"), ("source", "string", "source", "string"), ("join_group", "string", "join_group", "boolean"), ("external_id", "bigint", "external_id", "long"), ("new_member", "string", "new_member", "boolean"), ("external_action_id", "string", "external_action_id", "string"), ("locale", "string", "locale", "string"), ("bucket", "string", "bucket", "string"), ("country", "string", "country", "string"), ("updated_at", "string", "updated_at", "timestamp"), ("user_ip", "string", "user_ip", "string"), ("confirmation_token", "string", "confirmation_token", "string"), ("confirmed_at", "string", "confirmed_at", "timestamp"), ("confirmation_sent_at", "string", "confirmation_sent_at", "timestamp"), ("last_signed_at", "string", "last_signed_at", "timestamp"), ("join_list_suppressed", "string", "join_list_suppressed", "boolean"), ("old_daisy_chain_used", "string", "old_daisy_chain_used", "string"), ("from_embed", "string", "from_embed", "boolean"), ("user_agent", "string", "user_agent", "string"), ("confirmed_reason", "string", "confirmed_reason", "string"), ("synced_to_crm_at", "string", "synced_to_crm_at", "timestamp"), ("daisy_chain_experiment_slug", "string", "daisy_chain_experiment_slug", "string"), ("eu_data_processing_consent", "string", "eu_data_processing_consent", "boolean"), ("from_one_click", "string", "from_one_click", "boolean"), ("consent_content_version_id", "string", "consent_content_version_id", "string"), ("daisy_chain_id_used", "string", "daisy_chain_id_used", "bigint"), ("email_opt_in_type_id", "bigint", "email_opt_in_type_id", "long"), ("facebook_id", "string", "facebook_id", "string"), ("utm_params", "string", "utm_params", "string"), ("postcode_id", "bigint", "postcode_id", "long"), ("referring_share_click_id", "bigint", "referring_share_click_id", "int"), ("opt_in_sms", "string", "opt_in_sms", "boolean"), ("external_ids", "string", "external_ids", "string")], transformation_ctx = "applymapping1"] +## @args: [mapping = [("id", "bigint", "id", "long"), ("petition_id", "bigint", "petition_id", "long"), ("email", "string", "email", "string"), ("first_name", "string", "first_name", "string"), ("last_name", "string", "last_name", "string"), ("phone_number", "string", "phone_number", "string"), ("postcode", "string", "postcode", "string"), ("created_at", "string", "created_at", "timestamp"), ("join_organisation", "string", "join_organisation", "boolean"), ("deleted_at", "string", "deleted_at", "timestamp"), ("unsubscribe_at", "string", "unsubscribe_at", "timestamp"), ("external_constituent_id", "bigint", "external_constituent_id", "long"), ("member_id", "bigint", "member_id", "long"), ("additional_fields", "string", "additional_fields", "string"), ("cached_organisation_slug", "string", "cached_organisation_slug", "string"), ("source", "string", "source", "string"), ("join_group", "string", "join_group", "boolean"), ("external_id", "bigint", "external_id", "long"), ("new_member", "string", "new_member", "boolean"), ("external_action_id", "string", "external_action_id", "string"), ("locale", "string", "locale", "string"), ("bucket", "string", "bucket", "string"), ("country", "string", "country", "string"), ("updated_at", "string", "updated_at", "timestamp"), ("user_ip", "string", "user_ip", "string"), ("confirmation_token", "string", "confirmation_token", "string"), ("confirmed_at", "string", "confirmed_at", "timestamp"), ("confirmation_sent_at", "string", "confirmation_sent_at", "timestamp"), ("last_signed_at", "string", "last_signed_at", "timestamp"), ("join_list_suppressed", "string", "join_list_suppressed", "boolean"), ("old_daisy_chain_used", "string", "old_daisy_chain_used", "string"), ("from_embed", "string", "from_embed", "boolean"), ("user_agent", "string", "user_agent", "string"), ("confirmed_reason", "string", "confirmed_reason", "string"), ("synced_to_crm_at", "string", "synced_to_crm_at", "timestamp"), ("daisy_chain_experiment_slug", "string", "daisy_chain_experiment_slug", "string"), ("eu_data_processing_consent", "string", "eu_data_processing_consent", "boolean"), ("from_one_click", "string", "from_one_click", "boolean"), ("consent_content_version_id", "string", "consent_content_version_id", "string"), ("daisy_chain_id_used", "bigint", "daisy_chain_id_used", "bigint"), ("email_opt_in_type_id", "bigint", "email_opt_in_type_id", "long"), ("facebook_id", "string", "facebook_id", "string"), ("utm_params", "string", "utm_params", "string"), ("postcode_id", "bigint", "postcode_id", "long"), ("referring_share_click_id", "bigint", "referring_share_click_id", "int"), ("opt_in_sms", "string", "opt_in_sms", "boolean"), ("external_ids", "string", "external_ids", "string")], transformation_ctx = "applymapping1"] ## @return: applymapping1 ## @inputs: [frame = datasource1] applymapping1 = ApplyMapping.apply( From 19be449c3420f05b3f590e2baa8611d59472ae67 Mon Sep 17 00:00:00 2001 From: Ilona Brand Date: Mon, 20 Feb 2023 16:23:33 -0600 Subject: [PATCH 22/37] add in the lifecycle rule we currently have manually entered in the s3 settings into the terraform config --- s3.tf | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/s3.tf b/s3.tf index eab1f70..a368562 100644 --- a/s3.tf +++ b/s3.tf @@ -24,13 +24,14 @@ resource "aws_s3_bucket" "manifest" { # expire the ingested manifests after 5 days after they have been processed to save disk space while providing enough # time to analyze things that might have gone wrong. lifecycle_rule { - id = "expire-manifests" + id = "Remove temp files over a week old" + abort_incomplete_multipart_upload_days = 0 enabled = true + prefix = "production/temp/" expiration { - days = 5 + days = 7 + expired_object_delete_marker = false } } } - - From 7fa1de1bf76bd0739438572e8cfcea5c1b6b4113 Mon Sep 17 00:00:00 2001 From: Ilona Brand Date: Mon, 20 Feb 2023 16:30:51 -0600 Subject: [PATCH 23/37] put that code in the wrong s3 section --- glue_job.tf | 12 ++++++++++++ s3.tf | 9 ++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/glue_job.tf b/glue_job.tf index d6e7c64..4dafa00 100644 --- a/glue_job.tf +++ b/glue_job.tf @@ -28,6 +28,18 @@ resource "aws_s3_bucket" "glue_resources" { } } } + + lifecycle_rule { + id = "Remove temp files over a week old" + abort_incomplete_multipart_upload_days = 0 + enabled = true + prefix = "production/temp/" + + expiration { + days = 7 + expired_object_delete_marker = false + } + } } data "template_file" "signatures_script" { diff --git a/s3.tf b/s3.tf index a368562..eab1f70 100644 --- a/s3.tf +++ b/s3.tf @@ -24,14 +24,13 @@ resource "aws_s3_bucket" "manifest" { # expire the ingested manifests after 5 days after they have been processed to save disk space while providing enough # time to analyze things that might have gone wrong. lifecycle_rule { - id = "Remove temp files over a week old" - abort_incomplete_multipart_upload_days = 0 + id = "expire-manifests" enabled = true - prefix = "production/temp/" expiration { - days = 7 - expired_object_delete_marker = false + days = 5 } } } + + From 4a2e1c32a1036e892a3e6fb647c2b7d51fd34b9c Mon Sep 17 00:00:00 2001 From: Ilona Brand Date: Mon, 20 Feb 2023 16:41:36 -0600 Subject: [PATCH 24/37] add in other fields that are turning up as conflicting when we try to terraform apply which means they have been manually changed in the aws console --- glue_job.tf | 10 ++++++++++ receiver.tf | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/glue_job.tf b/glue_job.tf index 4dafa00..92f29d4 100644 --- a/glue_job.tf +++ b/glue_job.tf @@ -10,6 +10,14 @@ resource "aws_glue_crawler" "signatures_crawler" { database_name = aws_glue_catalog_database.catalog_db.name name = "${var.controlshift_environment}_full_signatures" role = aws_iam_role.glue_service_role.arn + configuration = jsonencode( + { + Grouping = { + TableGroupingPolicy = "CombineCompatibleSchemas" + } + Version = 1 + } + ) s3_target { path = local.signatures_s3_path @@ -146,6 +154,8 @@ resource "aws_glue_job" "signatures_full" { name = "cs-${var.controlshift_environment}-signatures-full" connections = [ aws_glue_connection.redshift_connection.name ] glue_version = "3.0" + number_of_workers = 9 + worker_type = "G.1X" default_arguments = { "--TempDir": "s3://${aws_s3_bucket.glue_resources.bucket}/${var.controlshift_environment}/temp", "--job-bookmark-option": "job-bookmark-disable", diff --git a/receiver.tf b/receiver.tf index 3070590..7586f3f 100644 --- a/receiver.tf +++ b/receiver.tf @@ -21,6 +21,11 @@ resource "aws_lambda_function" "receiver_lambda" { EMAIL_CLICK_FIREHOSE_STREAM = var.email_click_firehose_stream } } + + // This prevents noisy logs from cluttering up datadog + tags = { + datadog = "exclude" + } } resource "aws_api_gateway_rest_api" "receiver" { From f0d1ad44d7cee167d7bf290132043b8e47ed5bd6 Mon Sep 17 00:00:00 2001 From: Kathy Nguyen Date: Wed, 31 May 2023 09:33:13 -0700 Subject: [PATCH 25/37] Update AWS version --- versions.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.tf b/versions.tf index 4f8f56d..6106285 100644 --- a/versions.tf +++ b/versions.tf @@ -6,7 +6,7 @@ terraform { } aws = { source = "hashicorp/aws" - version = "~> 2.0" + version = "~> 3.0" } http = { source = "hashicorp/http" From 86f80de6e81f82d73c173ee18d8ccb95472f0062 Mon Sep 17 00:00:00 2001 From: Kathy Nguyen Date: Wed, 31 May 2023 09:35:51 -0700 Subject: [PATCH 26/37] Update AWS to version 4 --- versions.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.tf b/versions.tf index 6106285..5e463c5 100644 --- a/versions.tf +++ b/versions.tf @@ -6,7 +6,7 @@ terraform { } aws = { source = "hashicorp/aws" - version = "~> 3.0" + version = "~> 4.0" } http = { source = "hashicorp/http" From 2769394d5165a2b3331d14cd69ead985ef2d4132 Mon Sep 17 00:00:00 2001 From: Kathy Nguyen Date: Wed, 31 May 2023 09:38:52 -0700 Subject: [PATCH 27/37] Remove region attribute --- glue_job.tf | 1 - s3.tf | 1 - 2 files changed, 2 deletions(-) diff --git a/glue_job.tf b/glue_job.tf index 92f29d4..15bea02 100644 --- a/glue_job.tf +++ b/glue_job.tf @@ -26,7 +26,6 @@ resource "aws_glue_crawler" "signatures_crawler" { resource "aws_s3_bucket" "glue_resources" { bucket = var.glue_scripts_bucket_name - region = var.aws_region acl = "private" server_side_encryption_configuration { diff --git a/s3.tf b/s3.tf index eab1f70..359fa29 100644 --- a/s3.tf +++ b/s3.tf @@ -8,7 +8,6 @@ resource "aws_s3_bucket" "manifest" { provider = aws.controlshift bucket = var.manifest_bucket_name acl = "private" - region = var.controlshift_aws_region server_side_encryption_configuration { rule { From dc96685cd06b7dd112c5bab357de39443a484e58 Mon Sep 17 00:00:00 2001 From: Ilona Brand Date: Tue, 14 Nov 2023 17:10:15 -0600 Subject: [PATCH 28/37] ilona schema changes from 11-7 --- templates/signatures_job.py.tpl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/templates/signatures_job.py.tpl b/templates/signatures_job.py.tpl index 598b24f..3bebf0c 100644 --- a/templates/signatures_job.py.tpl +++ b/templates/signatures_job.py.tpl @@ -55,13 +55,10 @@ applymapping1 = ApplyMapping.apply( ("join_organisation", "string", "join_organisation", "boolean"), ("deleted_at", "string", "deleted_at", "timestamp"), ("unsubscribe_at", "string", "unsubscribe_at", "timestamp"), - ("external_constituent_id", "bigint", "external_constituent_id", "string"), ("member_id", "bigint", "member_id", "int"), ("additional_fields", "string", "additional_fields", "string"), ("cached_organisation_slug", "string", "cached_organisation_slug", "string"), ("source", "string", "source", "string"), - ("join_partnership", "string", "join_partnership", "boolean"), - ("external_id", "bigint", "external_id", "string"), ("new_member", "string", "new_member", "boolean"), ("external_action_id", "string", "external_action_id", "string"), ("locale", "string", "locale", "string"), @@ -93,7 +90,9 @@ applymapping1 = ApplyMapping.apply( ("sms_opt_in_type_id", "string", "sms_opt_in_type_id", "bigint"), ("recaptcha_score", "string", "recaptcha_score", "decimal(3,2)"), ("new_mobile_subscriber", "string", "new_mobile_subscriber", "boolean"), - ("external_ids", "string", "external_ids", "string") + ("external_ids", "string", "external_ids", "string"), + ("partnership_opt_ins", "string", "partnership_opt_ins", "string"), + ("after_action", "string", "after_action", "string") ], transformation_ctx = "applymapping1") From 513f862c3c3b6c9ccf181fd1d281580f059aae1a Mon Sep 17 00:00:00 2001 From: Sophie Waldman <62553142+sjwmoveon@users.noreply.github.com> Date: Thu, 11 Jan 2024 13:00:14 -0500 Subject: [PATCH 29/37] Update signatures mappings Copy signature mappings from live job after all the troubleshooting ~1 month ago. --- templates/signatures_job.py.tpl | 592 +++++++++++++++++++++++++++++--- 1 file changed, 542 insertions(+), 50 deletions(-) diff --git a/templates/signatures_job.py.tpl b/templates/signatures_job.py.tpl index 3bebf0c..2c2ec40 100644 --- a/templates/signatures_job.py.tpl +++ b/templates/signatures_job.py.tpl @@ -44,58 +44,550 @@ datasource1 = glueContext.create_dynamic_frame.from_catalog( applymapping1 = ApplyMapping.apply( frame = datasource1, mappings = [ - ("id", "bigint", "id", "int"), - ("petition_id", "bigint", "petition_id", "int"), - ("email", "string", "email", "string"), - ("first_name", "string", "first_name", "string"), - ("last_name", "string", "last_name", "string"), - ("phone_number", "string", "phone_number", "string"), - ("postcode", "string", "postcode", "string"), - ("created_at", "string", "created_at", "timestamp"), - ("join_organisation", "string", "join_organisation", "boolean"), - ("deleted_at", "string", "deleted_at", "timestamp"), - ("unsubscribe_at", "string", "unsubscribe_at", "timestamp"), - ("member_id", "bigint", "member_id", "int"), - ("additional_fields", "string", "additional_fields", "string"), - ("cached_organisation_slug", "string", "cached_organisation_slug", "string"), - ("source", "string", "source", "string"), - ("new_member", "string", "new_member", "boolean"), - ("external_action_id", "string", "external_action_id", "string"), - ("locale", "string", "locale", "string"), - ("bucket", "string", "bucket", "string"), - ("country", "string", "country", "string"), - ("updated_at", "string", "updated_at", "timestamp"), - ("user_ip", "string", "user_ip", "string"), - ("confirmation_token", "string", "confirmation_token", "string"), - ("confirmed_at", "string", "confirmed_at", "timestamp"), - ("confirmation_sent_at", "string", "confirmation_sent_at", "timestamp"), - ("last_signed_at", "string", "last_signed_at", "timestamp"), - ("join_list_suppressed", "string", "join_list_suppressed", "boolean"), - ("old_daisy_chain_used", "string", "old_daisy_chain_used", "string"), - ("from_embed", "string", "from_embed", "boolean"), - ("user_agent", "string", "user_agent", "string"), - ("confirmed_reason", "string", "confirmed_reason", "string"), - ("synced_to_crm_at", "string", "synced_to_crm_at", "timestamp"), - ("daisy_chain_experiment_slug", "string", "daisy_chain_experiment_slug", "string"), - ("eu_data_processing_consent", "string", "eu_data_processing_consent", "boolean"), - ("from_one_click", "string", "from_one_click", "boolean"), - ("consent_content_version_id", "string", "consent_content_version_id", "bigint"), - ("daisy_chain_id_used", "bigint", "daisy_chain_id_used", "bigint"), - ("email_opt_in_type_id", "bigint", "email_opt_in_type_id", "bigint"), - ("facebook_id", "string", "facebook_id", "string"), - ("utm_params", "string", "utm_params", "string"), - ("postcode_id", "bigint", "postcode_id", "bigint"), - ("referring_share_click_id", "bigint", "referring_share_click_id", "int"), - ("opt_in_sms", "string", "opt_in_sms", "boolean"), - ("sms_opt_in_type_id", "string", "sms_opt_in_type_id", "bigint"), - ("recaptcha_score", "string", "recaptcha_score", "decimal(3,2)"), - ("new_mobile_subscriber", "string", "new_mobile_subscriber", "boolean"), - ("external_ids", "string", "external_ids", "string"), - ("partnership_opt_ins", "string", "partnership_opt_ins", "string"), - ("after_action", "string", "after_action", "string") - ], + + ( + "additional_fields", + + "string", + + "additional_fields", + + "character varying", + + ), + + ( + "after_action", + + "string", + + "after_action", + + "character varying" + + ), + + ( + "bucket", + + "string", + + "bucket", + + "character varying(255)" + + ), + + ( + "cached_organisation_slug", + + "string", + + "cached_organisation_slug", + + "character varying(255)" + + ), + + ( + "confirmation_sent_at", + + "string", + + "confirmation_sent_at", + + "timestamp", + + ), + + ( + "confirmation_token", + + "string", + + "confirmation_token", + + "character varying(255)" + + ), + + ( + "confirmed_at", + + "string", + + "confirmed_at", + + "timestamp", + + ), + + ( + "confirmed_reason", + + "string", + + "confirmed_reason", + + "character varying" + + ), + + ( + "consent_content_version_id", + + "string", + + "consent_content_version_id", + + "bigint" + + ), + + ( + "country", + + "string", + + "country", + + "character varying(255)" + + ), + + ( + "created_at", + + "string", + + "created_at", + + "timestamp", + + ), + + ( + "daisy_chain_experiment_slug", + + "string", + + "daisy_chain_experiment_slug", + + "character varying" + + ), + + ( + "daisy_chain_id_used", + + "bigint", + + "daisy_chain_id_used", + + "bigint" + + ), + + ( + "deleted_at", + + "string", + + "deleted_at", + + "timestamp", + + ), + + ( + "email", + + "string", + + "email", + + "character varying(255)" + + ), + + ( + "email_opt_in_type_id", + + "bigint", + + "email_opt_in_type_id", + + "bigint" + + ), + + ( + "eu_data_processing_consent", + + "string", + + "eu_data_processing_consent", + + "boolean" + + ), + + ( + "external_action_id", + + "string", + + "external_action_id", + + "character varying(255)" + + ), + + ( + "external_ids", + + "string", + + "external_ids", + + "character varying", + + ), + + ( + "facebook_id", + + "string", + + "facebook_id", + + "character varying" + + ), + + ( + "first_name", + + "string", + + "first_name", + + "character varying(255)" + + ), + + ( + "from_embed", + + "string", + + "from_embed", + + "boolean" + + ), + + ( + "from_one_click", + + "string", + + "from_one_click", + + "boolean" + + ), + + ( + "id", + + "bigint", + + "id", + + "bigint" + + ), + + ( + "join_list_suppressed", + + "string", + + "join_list_suppressed", + + "boolean" + + ), + + ( + "join_organisation", + + "string", + + "join_organisation", + + "boolean" + + ), + + ( + "last_name", + + "string", + + "last_name", + + "character varying(255)" + + ), + + ( + "last_signed_at", + + "string", + + "last_signed_at", + + "timestamp", + + ), + + ( + "locale", + + "string", + + "locale", + + "character varying(5)" + + ), + + ( + "member_id", + + "bigint", + + "member_id", + + "integer" + + ), + + ( + "new_member", + + "string", + + "new_member", + + "boolean" + + ), + + ( + "new_mobile_subscriber", + + "string", + + "new_mobile_subscriber", + + "boolean" + + ), + + ( + "old_daisy_chain_used", + + "string", + + "old_daisy_chain_used", + + "character varying(50)" + + ), + + ( + "opt_in_sms", + + "string", + + "opt_in_sms", + + "boolean" + + ), + + ( + "partnership_opt_ins", + + "string", + + "partnership_opt_ins", + + "character varying", + + ), + + ( + "petition_id", + + "bigint", + + "petition_id", + + "integer" + + ), + + ( + "phone_number", + + "string", + + "phone_number", + + "character varying(255)" + + ), + + ( + "postcode", + + "string", + + "postcode", + + "character varying(255)" + + ), + + ( + "postcode_id", + + "bigint", + + "postcode_id", + + "bigint" + + ), + + ( + "recaptcha_score", + + "string", + + "recaptcha_score", + + "decimal(3,2)", + + ), + + ( + "referring_share_click_id", + + "bigint", + + "referring_share_click_id", + + "integer" + + ), + + ( + "sms_opt_in_type_id", + + "string", + + "sms_opt_in_type_id", + + "bigint" + + ), + + ( + "source", + + "string", + + "source", + + "character varying(255)" + + ), + + ( + "synced_to_crm_at", + + "string", + + "synced_to_crm_at", + + "timestamp", + + ), + + ( + "unsubscribe_at", + + "string", + + "unsubscribe_at", + + "timestamp", + + ), + + ( + "updated_at", + + "string", + + "updated_at", + + "timestamp", + + ), + + ( + "user_agent", + + "string", + + "user_agent", + + "character varying" + + ), + + ( + "user_ip", + + "string", + + "user_ip", + + "character varying" + + ), + + ( + "utm_params", + + "string", + + "utm_params", + + "character varying", + + ), + + ], transformation_ctx = "applymapping1") + # Step 4: Deal with column types that aren't consistent ## @type: ResolveChoice ## @args: [choice = "make_cols", transformation_ctx = "resolvechoice2"] From 37fef9633fa778989e1996627e5eb32e3ba3d96a Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Wed, 31 Jan 2024 15:45:44 -0500 Subject: [PATCH 30/37] Update Node version to 20 Node 12 is extremely deprecated; AWS is reaching end of life for Node 16. Update to 20 so we'll be up to date. --- loader.tf | 2 +- receiver.tf | 6 ++++-- run_glue_crawler.tf | 2 +- run_glue_job.tf | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/loader.tf b/loader.tf index c4f8b08..a3713bd 100644 --- a/loader.tf +++ b/loader.tf @@ -4,7 +4,7 @@ resource "aws_lambda_function" "loader" { function_name = "controlshift-redshift-loader" role = aws_iam_role.loader_lambda_role.arn handler = "index.handler" - runtime = "nodejs12.x" + runtime = "nodejs20.x" timeout = 900 vpc_config { diff --git a/receiver.tf b/receiver.tf index 7586f3f..24ad69f 100644 --- a/receiver.tf +++ b/receiver.tf @@ -1,6 +1,6 @@ data "archive_file" "receiver_zip" { type = "zip" - source_file = "${path.module}/lambdas/receiver.js" + source_dir = "${path.module}/lambdas/receiver/" output_path = "${path.module}/lambdas/receiver.zip" } @@ -11,7 +11,7 @@ resource "aws_lambda_function" "receiver_lambda" { handler = "receiver.handler" runtime = "nodejs12.x" timeout = var.receiver_timeout - source_code_hash = data.archive_file.receiver_zip.output_base64sha256 + source_code_hash = filebase64sha256(data.archive_file.receiver_zip.output_path) environment { variables = { @@ -19,6 +19,8 @@ resource "aws_lambda_function" "receiver_lambda" { GLUE_SQS_QUEUE_URL = aws_sqs_queue.receiver_queue_glue.id EMAIL_OPEN_FIREHOSE_STREAM = var.email_open_firehose_stream EMAIL_CLICK_FIREHOSE_STREAM = var.email_click_firehose_stream + CSL_CLIENT_ID = var.csl_client_id + CSL_CLIENT_SECRET = var.csl_client_secret } } diff --git a/run_glue_crawler.tf b/run_glue_crawler.tf index 9eb6b3f..a28243b 100644 --- a/run_glue_crawler.tf +++ b/run_glue_crawler.tf @@ -10,7 +10,7 @@ resource "aws_lambda_function" "glue_crawler_lambda" { function_name = "controlshift-run-glue-crawler" role = aws_iam_role.run_glue_crawler_lambda_role.arn handler = "run-glue-crawler.handler" - runtime = "nodejs12.x" + runtime = "nodejs20.x" timeout = 60 source_code_hash = data.archive_file.run_glue_crawler_zip.output_base64sha256 diff --git a/run_glue_job.tf b/run_glue_job.tf index e0af21c..69dfaf4 100644 --- a/run_glue_job.tf +++ b/run_glue_job.tf @@ -10,7 +10,7 @@ resource "aws_lambda_function" "glue_job_lambda" { function_name = "controlshift-run-glue-job" role = aws_iam_role.run_glue_job_lambda_role.arn handler = "run-glue-job.handler" - runtime = "nodejs12.x" + runtime = "nodejs20.x" timeout = 60 source_code_hash = data.archive_file.run_glue_job_zip.output_base64sha256 From 4f27f6167c989b95f2aa73b6bd54a57dd930cf88 Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Fri, 2 Feb 2024 11:04:14 -0500 Subject: [PATCH 31/37] Back out receiver lambda changes These changes were meant for an updated receiver.js that would also auto-cancel some signatures; that project is on hold at the moment so the changes are unnecessary. --- receiver.tf | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/receiver.tf b/receiver.tf index 24ad69f..32731ec 100644 --- a/receiver.tf +++ b/receiver.tf @@ -1,6 +1,6 @@ data "archive_file" "receiver_zip" { type = "zip" - source_dir = "${path.module}/lambdas/receiver/" + source_file = "${path.module}/lambdas/receiver.js" output_path = "${path.module}/lambdas/receiver.zip" } @@ -9,9 +9,9 @@ resource "aws_lambda_function" "receiver_lambda" { function_name = "controlshift-webhook-handler" role = aws_iam_role.receiver_lambda_role.arn handler = "receiver.handler" - runtime = "nodejs12.x" + runtime = "nodejs20.x" timeout = var.receiver_timeout - source_code_hash = filebase64sha256(data.archive_file.receiver_zip.output_path) + source_code_hash = data.archive_file.receiver_zip.output_base64sha256 environment { variables = { @@ -19,8 +19,6 @@ resource "aws_lambda_function" "receiver_lambda" { GLUE_SQS_QUEUE_URL = aws_sqs_queue.receiver_queue_glue.id EMAIL_OPEN_FIREHOSE_STREAM = var.email_open_firehose_stream EMAIL_CLICK_FIREHOSE_STREAM = var.email_click_firehose_stream - CSL_CLIENT_ID = var.csl_client_id - CSL_CLIENT_SECRET = var.csl_client_secret } } From 622e1c66d9c69252cc0e30129909ae2f9d8c2995 Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Mon, 5 Feb 2024 12:52:14 -0500 Subject: [PATCH 32/37] Update AWS version to one with support for nodejs20.x --- versions.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.tf b/versions.tf index 5e463c5..5c6fd54 100644 --- a/versions.tf +++ b/versions.tf @@ -6,7 +6,7 @@ terraform { } aws = { source = "hashicorp/aws" - version = "~> 4.0" + version = "~> 5.26.0" } http = { source = "hashicorp/http" From 0af0560eefff8a4b8fb93176dcd73cee15745e63 Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Tue, 13 Feb 2024 15:03:14 -0500 Subject: [PATCH 33/37] Downgrade to Nodejs 16.x AWS in their infinite wisdom removed their old SDK from the node 18 and 20 environments, requiring an upgrade to their new SDK. This meant that the Lambda would crash when it tried to load the old SDK. Revert back to nodejs 16 to avoid this issue. --- loader.tf | 2 +- receiver.tf | 2 +- run_glue_crawler.tf | 2 +- run_glue_job.tf | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/loader.tf b/loader.tf index a3713bd..cacc317 100644 --- a/loader.tf +++ b/loader.tf @@ -4,7 +4,7 @@ resource "aws_lambda_function" "loader" { function_name = "controlshift-redshift-loader" role = aws_iam_role.loader_lambda_role.arn handler = "index.handler" - runtime = "nodejs20.x" + runtime = "nodejs16.x" timeout = 900 vpc_config { diff --git a/receiver.tf b/receiver.tf index 32731ec..97f7b65 100644 --- a/receiver.tf +++ b/receiver.tf @@ -9,7 +9,7 @@ resource "aws_lambda_function" "receiver_lambda" { function_name = "controlshift-webhook-handler" role = aws_iam_role.receiver_lambda_role.arn handler = "receiver.handler" - runtime = "nodejs20.x" + runtime = "nodejs16.x" timeout = var.receiver_timeout source_code_hash = data.archive_file.receiver_zip.output_base64sha256 diff --git a/run_glue_crawler.tf b/run_glue_crawler.tf index a28243b..fcacd8f 100644 --- a/run_glue_crawler.tf +++ b/run_glue_crawler.tf @@ -10,7 +10,7 @@ resource "aws_lambda_function" "glue_crawler_lambda" { function_name = "controlshift-run-glue-crawler" role = aws_iam_role.run_glue_crawler_lambda_role.arn handler = "run-glue-crawler.handler" - runtime = "nodejs20.x" + runtime = "nodejs16.x" timeout = 60 source_code_hash = data.archive_file.run_glue_crawler_zip.output_base64sha256 diff --git a/run_glue_job.tf b/run_glue_job.tf index 69dfaf4..150c434 100644 --- a/run_glue_job.tf +++ b/run_glue_job.tf @@ -10,7 +10,7 @@ resource "aws_lambda_function" "glue_job_lambda" { function_name = "controlshift-run-glue-job" role = aws_iam_role.run_glue_job_lambda_role.arn handler = "run-glue-job.handler" - runtime = "nodejs20.x" + runtime = "nodejs16.x" timeout = 60 source_code_hash = data.archive_file.run_glue_job_zip.output_base64sha256 From 6241db0556b739d8db258160cd8e126d1684fd7b Mon Sep 17 00:00:00 2001 From: sjwmoveon Date: Thu, 15 Feb 2024 13:00:08 -0500 Subject: [PATCH 34/37] Update Redshift loader version --- loader.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loader.tf b/loader.tf index cacc317..953e840 100644 --- a/loader.tf +++ b/loader.tf @@ -1,6 +1,6 @@ resource "aws_lambda_function" "loader" { s3_bucket = local.lambda_buckets[var.aws_region] - s3_key = "LambdaRedshiftLoader/AWSLambdaRedshiftLoader-2.7.8.zip" + s3_key = "LambdaRedshiftLoader/AWSLambdaRedshiftLoader-2.8.3.zip" function_name = "controlshift-redshift-loader" role = aws_iam_role.loader_lambda_role.arn handler = "index.handler" From 4f44b2506c0f2ccdcb092bc15738e5a9db17d34b Mon Sep 17 00:00:00 2001 From: Ankur Naik Date: Tue, 6 Jan 2026 15:27:11 -0800 Subject: [PATCH 35/37] Update minimum required Terraform version and resolve various deprecation notices, including use of the old template_file provider. --- config_table.tf | 108 ++++++++++++++++++++++++------------------------ glue_job.tf | 58 ++++++++++++++++++-------- s3.tf | 56 ++++++++++++++++++------- versions.tf | 5 +-- 4 files changed, 135 insertions(+), 92 deletions(-) diff --git a/config_table.tf b/config_table.tf index ce5c858..3be4aae 100644 --- a/config_table.tf +++ b/config_table.tf @@ -13,12 +13,12 @@ resource "aws_dynamodb_table" "loader_config" { } resource "aws_dynamodb_table_item" "load_config_full_items" { - for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]]) + for_each = toset(local.table_names) table_name = aws_dynamodb_table.loader_config.name hash_key = aws_dynamodb_table.loader_config.hash_key - item = data.template_file.loader_config_full_item[each.key].rendered + item = local.loader_config_full_items[each.key] lifecycle { ignore_changes = [ @@ -33,39 +33,13 @@ resource "aws_dynamodb_table_item" "load_config_full_items" { } } -data "template_file" "loader_config_full_item" { - for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]]) - - template = "${file("${path.module}/config_item.json")}" - vars = { - kind = "full" - bulk_data_table = each.key - redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint - redshift_database_name: var.redshift_database_name - redshift_port = data.aws_redshift_cluster.sync_data_target.port - redshift_username = var.redshift_username - redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob - schema = var.redshift_schema - s3_bucket = "agra-data-exports-${var.controlshift_environment}" - manifest_bucket = aws_s3_bucket.manifest.bucket - manifest_prefix = var.manifest_prefix - failed_manifest_prefix = var.failed_manifest_prefix - success_topic_arn = aws_sns_topic.success_sns_topic.arn - failure_topic_arn = aws_sns_topic.failure_sns_topic.arn - current_batch = random_id.current_batch.b64_url - column_list = data.http.column_list[each.key].body - truncate_target = true - compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "") - } -} - resource "aws_dynamodb_table_item" "load_config_incremental_items" { - for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]]) + for_each = toset(local.table_names) table_name = aws_dynamodb_table.loader_config.name hash_key = aws_dynamodb_table.loader_config.hash_key - item = data.template_file.loader_config_incremental_item[each.key].rendered + item = local.loader_config_incremental_items[each.key] lifecycle { ignore_changes = [ @@ -80,29 +54,53 @@ resource "aws_dynamodb_table_item" "load_config_incremental_items" { } } -data "template_file" "loader_config_incremental_item" { - for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]]) - - template = "${file("${path.module}/config_item.json")}" - vars = { - kind = "incremental" - bulk_data_table = each.key - redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint - redshift_database_name: var.redshift_database_name - redshift_port = data.aws_redshift_cluster.sync_data_target.port - redshift_username = var.redshift_username - redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob - schema = var.redshift_schema - s3_bucket = "agra-data-exports-${var.controlshift_environment}" - manifest_bucket = aws_s3_bucket.manifest.bucket - manifest_prefix = var.manifest_prefix - failed_manifest_prefix = var.failed_manifest_prefix - success_topic_arn = aws_sns_topic.success_sns_topic.arn - failure_topic_arn = aws_sns_topic.failure_sns_topic.arn - current_batch = random_id.current_batch.b64_url - column_list = data.http.column_list[each.key].body - truncate_target = false - compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "") +locals { + table_names = [for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]] + + loader_config_full_items = { + for name in local.table_names : name => templatefile("${path.module}/config_item.json", { + kind = "full" + bulk_data_table = name + redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint + redshift_database_name: var.redshift_database_name + redshift_port = data.aws_redshift_cluster.sync_data_target.port + redshift_username = var.redshift_username + redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob + schema = var.redshift_schema + s3_bucket = "agra-data-exports-${var.controlshift_environment}" + manifest_bucket = aws_s3_bucket.manifest.bucket + manifest_prefix = var.manifest_prefix + failed_manifest_prefix = var.failed_manifest_prefix + success_topic_arn = aws_sns_topic.success_sns_topic.arn + failure_topic_arn = aws_sns_topic.failure_sns_topic.arn + current_batch = random_id.current_batch.b64_url + column_list = data.http.column_list[name].body + truncate_target = true + compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "") + }) + } + + loader_config_incremental_items = { + for name in local.table_names : name => templatefile("${path.module}/config_item.json", { + kind = "incremental" + bulk_data_table = name + redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint + redshift_database_name: var.redshift_database_name + redshift_port = data.aws_redshift_cluster.sync_data_target.port + redshift_username = var.redshift_username + redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob + schema = var.redshift_schema + s3_bucket = "agra-data-exports-${var.controlshift_environment}" + manifest_bucket = aws_s3_bucket.manifest.bucket + manifest_prefix = var.manifest_prefix + failed_manifest_prefix = var.failed_manifest_prefix + success_topic_arn = aws_sns_topic.success_sns_topic.arn + failure_topic_arn = aws_sns_topic.failure_sns_topic.arn + current_batch = random_id.current_batch.b64_url + column_list = data.http.column_list[name].body + truncate_target = false + compress = try(local.parsed_bulk_data_schemas["settings"]["compression_format"], "") + }) } } @@ -134,11 +132,11 @@ data "http" "bulk_data_schemas" { } locals { - parsed_bulk_data_schemas = jsondecode(data.http.bulk_data_schemas.body) + parsed_bulk_data_schemas = jsondecode(data.http.bulk_data_schemas.response_body) } data "http" "column_list" { - for_each = toset([for table in local.parsed_bulk_data_schemas["tables"] : table["table"]["name"]]) + for_each = toset(local.table_names) url = "https://${var.controlshift_hostname}/api/bulk_data/schema/columns?table=${each.key}" } diff --git a/glue_job.tf b/glue_job.tf index 15bea02..e258101 100644 --- a/glue_job.tf +++ b/glue_job.tf @@ -26,45 +26,69 @@ resource "aws_glue_crawler" "signatures_crawler" { resource "aws_s3_bucket" "glue_resources" { bucket = var.glue_scripts_bucket_name +} + +# Ownership controls block is required to support ACLs. +resource "aws_s3_bucket_ownership_controls" "glue_resources" { + bucket = aws_s3_bucket.glue_resources.id + rule { + object_ownership = "ObjectWriter" + } +} + +resource "aws_s3_bucket_acl" "glue_resources" { + depends_on = [aws_s3_bucket_ownership_controls.glue_resources] + bucket = aws_s3_bucket.glue_resources.id acl = "private" - server_side_encryption_configuration { - rule { - apply_server_side_encryption_by_default { - sse_algorithm = "AES256" - } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "glue_resources" { + bucket = aws_s3_bucket.glue_resources.id + + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" } } +} + +resource "aws_s3_bucket_lifecycle_configuration" "glue_resources" { + bucket = aws_s3_bucket.glue_resources.id + + rule { + id = "Remove temp files over a week old" + status = "Enabled" - lifecycle_rule { - id = "Remove temp files over a week old" - abort_incomplete_multipart_upload_days = 0 - enabled = true - prefix = "production/temp/" + filter { + prefix = "production/temp/" + } expiration { days = 7 - expired_object_delete_marker = false + } + + abort_incomplete_multipart_upload { + days_after_initiation = 7 # Note: must be greater than 0 } } } -data "template_file" "signatures_script" { - template = file("${path.module}/templates/signatures_job.py.tpl") - vars = { +locals { + signatures_script = templatefile("${path.module}/templates/signatures_job.py.tpl", { catalog_database_name = aws_glue_catalog_database.catalog_db.name redshift_database_name = var.redshift_database_name redshift_schema = var.redshift_schema redshift_connection_name = aws_glue_connection.redshift_connection.name - } + }) } -resource "aws_s3_bucket_object" "signatures_script" { +resource "aws_s3_object" "signatures_script" { bucket = aws_s3_bucket.glue_resources.id key = "${var.controlshift_environment}/signatures_job.py" acl = "private" - content = data.template_file.signatures_script.rendered + content = local.signatures_script } resource "aws_iam_role" "glue_service_role" { diff --git a/s3.tf b/s3.tf index 359fa29..00bae87 100644 --- a/s3.tf +++ b/s3.tf @@ -6,30 +6,54 @@ provider "aws" { resource "aws_s3_bucket" "manifest" { provider = aws.controlshift - bucket = var.manifest_bucket_name - acl = "private" + bucket = var.manifest_bucket_name - server_side_encryption_configuration { - rule { - apply_server_side_encryption_by_default { - sse_algorithm = "AES256" - } - } - } tags = { Name = "ControlShift puts import manifests here" } +} + +# Ownership controls block is required to support ACLs. +resource "aws_s3_bucket_ownership_controls" "manifest" { + provider = aws.controlshift + bucket = aws_s3_bucket.manifest.id + rule { + object_ownership = "ObjectWriter" + } +} - # expire the ingested manifests after 5 days after they have been processed to save disk space while providing enough - # time to analyze things that might have gone wrong. - lifecycle_rule { - id = "expire-manifests" - enabled = true +resource "aws_s3_bucket_acl" "manifest" { + provider = aws.controlshift + depends_on = [aws_s3_bucket_ownership_controls.manifest] - expiration { - days = 5 + bucket = aws_s3_bucket.manifest.id + acl = "private" +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "manifest" { + provider = aws.controlshift + bucket = aws_s3_bucket.manifest.id + + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" } } } +resource "aws_s3_bucket_lifecycle_configuration" "manifest" { + provider = aws.controlshift + bucket = aws_s3_bucket.manifest.id + + rule { + id = "expire-manifests" + status = "Enabled" + expiration { + days = 5 + } + + # Best practice: filter is now required inside the rule block + filter {} + } +} diff --git a/versions.tf b/versions.tf index 5c6fd54..9fa1eee 100644 --- a/versions.tf +++ b/versions.tf @@ -1,5 +1,5 @@ terraform { - required_version = ">= 0.13" + required_version = ">= 1.4.5" required_providers { archive = { source = "hashicorp/archive" @@ -14,8 +14,5 @@ terraform { random = { source = "hashicorp/random" } - template = { - source = "hashicorp/template" - } } } From 96401a9a246b4f2f9cb048559f98eab012f902d8 Mon Sep 17 00:00:00 2001 From: Ankur Naik Date: Tue, 13 Jan 2026 17:09:07 -0800 Subject: [PATCH 36/37] Reorder sections to more closely match Controlshift's version. --- s3.tf | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/s3.tf b/s3.tf index 00bae87..b982978 100644 --- a/s3.tf +++ b/s3.tf @@ -6,7 +6,7 @@ provider "aws" { resource "aws_s3_bucket" "manifest" { provider = aws.controlshift - bucket = var.manifest_bucket_name + bucket = var.manifest_bucket_name tags = { Name = "ControlShift puts import manifests here" @@ -16,14 +16,33 @@ resource "aws_s3_bucket" "manifest" { # Ownership controls block is required to support ACLs. resource "aws_s3_bucket_ownership_controls" "manifest" { provider = aws.controlshift - bucket = aws_s3_bucket.manifest.id + bucket = aws_s3_bucket.manifest.id rule { object_ownership = "ObjectWriter" } } +resource "aws_s3_bucket_lifecycle_configuration" "manifest" { + provider = aws.controlshift + bucket = aws_s3_bucket.manifest.id + + # expire the ingested manifests after 5 days after they have been processed to save disk space while providing enough + # time to analyze things that might have gone wrong. + rule { + id = "expire-manifests" + status = "Enabled" + + expiration { + days = 5 + } + + # Best practice: filter is now required inside the rule block + filter {} + } +} + resource "aws_s3_bucket_acl" "manifest" { - provider = aws.controlshift + provider = aws.controlshift depends_on = [aws_s3_bucket_ownership_controls.manifest] bucket = aws_s3_bucket.manifest.id @@ -32,7 +51,7 @@ resource "aws_s3_bucket_acl" "manifest" { resource "aws_s3_bucket_server_side_encryption_configuration" "manifest" { provider = aws.controlshift - bucket = aws_s3_bucket.manifest.id + bucket = aws_s3_bucket.manifest.id rule { apply_server_side_encryption_by_default { @@ -40,20 +59,3 @@ resource "aws_s3_bucket_server_side_encryption_configuration" "manifest" { } } } - -resource "aws_s3_bucket_lifecycle_configuration" "manifest" { - provider = aws.controlshift - bucket = aws_s3_bucket.manifest.id - - rule { - id = "expire-manifests" - status = "Enabled" - - expiration { - days = 5 - } - - # Best practice: filter is now required inside the rule block - filter {} - } -} From c43d3c92c622674912ae150e0d3a70edccc00145 Mon Sep 17 00:00:00 2001 From: Ankur Naik Date: Wed, 14 Jan 2026 11:43:15 -0800 Subject: [PATCH 37/37] Fix typo --- config_table.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config_table.tf b/config_table.tf index 3be4aae..9f0ee65 100644 --- a/config_table.tf +++ b/config_table.tf @@ -62,7 +62,7 @@ locals { kind = "full" bulk_data_table = name redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint - redshift_database_name: var.redshift_database_name + redshift_database_name = var.redshift_database_name redshift_port = data.aws_redshift_cluster.sync_data_target.port redshift_username = var.redshift_username redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob @@ -85,7 +85,7 @@ locals { kind = "incremental" bulk_data_table = name redshift_endpoint = data.aws_redshift_cluster.sync_data_target.endpoint - redshift_database_name: var.redshift_database_name + redshift_database_name = var.redshift_database_name redshift_port = data.aws_redshift_cluster.sync_data_target.port redshift_username = var.redshift_username redshift_password = aws_kms_ciphertext.redshift_password.ciphertext_blob