diff --git a/numerai/examples/crypto-python3/Dockerfile b/numerai/examples/crypto-python3/Dockerfile index 8ec60d7..23d1a09 100644 --- a/numerai/examples/crypto-python3/Dockerfile +++ b/numerai/examples/crypto-python3/Dockerfile @@ -1,5 +1,5 @@ # Provides us a working Python 3 environment. -FROM python:3.9 +FROM python:3.13 # These are docker arguments that `numerai node deploy/test` will always pass into docker. # They are then set in your environment so that numerapi can access them when uploading submissions. diff --git a/numerai/examples/signals-python3/Dockerfile b/numerai/examples/signals-python3/Dockerfile index 8ec60d7..23d1a09 100644 --- a/numerai/examples/signals-python3/Dockerfile +++ b/numerai/examples/signals-python3/Dockerfile @@ -1,5 +1,5 @@ # Provides us a working Python 3 environment. -FROM python:3.9 +FROM python:3.13 # These are docker arguments that `numerai node deploy/test` will always pass into docker. # They are then set in your environment so that numerapi can access them when uploading submissions. diff --git a/numerai/examples/tournament-python3/Dockerfile b/numerai/examples/tournament-python3/Dockerfile index 8ec60d7..23d1a09 100644 --- a/numerai/examples/tournament-python3/Dockerfile +++ b/numerai/examples/tournament-python3/Dockerfile @@ -1,5 +1,5 @@ # Provides us a working Python 3 environment. -FROM python:3.9 +FROM python:3.13 # These are docker arguments that `numerai node deploy/test` will always pass into docker. # They are then set in your environment so that numerapi can access them when uploading submissions. diff --git a/numerai/terraform/aws/aws/cluster.tf b/numerai/terraform/aws/aws/cluster.tf index 088513b..47d7f28 100644 --- a/numerai/terraform/aws/aws/cluster.tf +++ b/numerai/terraform/aws/aws/cluster.tf @@ -179,21 +179,20 @@ resource "aws_batch_job_definition" "node" { retry_strategy { attempts = 2 evaluate_on_exit { - on_reason = "CannotInspectContainerError:*" - action = "RETRY" + # Should catch load and throttling related issues like: + # - CannotPullContainerError + # - CannotStartContainerError + # - CannotInspectContainerError + # - CannotCreateContainerError + # - ThrottlingException + on_status_reason = "Task failed to start" + action = "RETRY" } evaluate_on_exit { - on_reason = "CannotPullContainerError:*" - action = "RETRY" - } - evaluate_on_exit { - action = "RETRY" - on_reason = "CannotStartContainerError:*" - } - evaluate_on_exit { - action = "RETRY" - on_reason = "Task failed to start" + on_status_reason = "DockerTimeoutError*" + action = "RETRY" } + evaluate_on_exit { action = "EXIT" on_reason = "*"