From 67e00652a80c9b861697809c91f1a23015266bea Mon Sep 17 00:00:00 2001
From: Reda Noureddine <reda.noureddine.prepa@gmail.com>
Date: Mon, 22 Apr 2024 10:16:28 +0200
Subject: [PATCH 01/11] feat(containers): inference with hugging face models

---
 containers/hugging-face-inference/Dockerfile  | 19 ++++++++++
 containers/hugging-face-inference/README.md   |  8 +++++
 containers/hugging-face-inference/main.py     | 36 +++++++++++++++++++
 containers/hugging-face-inference/prompt.py   |  4 +++
 .../hugging-face-inference/requirements.txt   |  2 ++
 .../terraform/container.tf                    | 28 +++++++++++++++
 .../terraform/images.tf                       | 20 +++++++++++
 .../terraform/providers.tf                    | 16 +++++++++
 .../hugging-face-inference/terraform/utils.tf |  5 +++
 .../terraform/variables.tf                    | 36 +++++++++++++++++++
 .../terraform/versions.tf                     | 13 +++++++
 11 files changed, 187 insertions(+)
 create mode 100644 containers/hugging-face-inference/Dockerfile
 create mode 100644 containers/hugging-face-inference/README.md
 create mode 100644 containers/hugging-face-inference/main.py
 create mode 100644 containers/hugging-face-inference/prompt.py
 create mode 100644 containers/hugging-face-inference/requirements.txt
 create mode 100644 containers/hugging-face-inference/terraform/container.tf
 create mode 100644 containers/hugging-face-inference/terraform/images.tf
 create mode 100644 containers/hugging-face-inference/terraform/providers.tf
 create mode 100644 containers/hugging-face-inference/terraform/utils.tf
 create mode 100644 containers/hugging-face-inference/terraform/variables.tf
 create mode 100644 containers/hugging-face-inference/terraform/versions.tf

diff --git a/containers/hugging-face-inference/Dockerfile b/containers/hugging-face-inference/Dockerfile
new file mode 100644
index 0000000..61de72f
--- /dev/null
+++ b/containers/hugging-face-inference/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.12-slim-bookworm
+
+ARG MODEL_DOWNLOAD_SOURCE
+
+RUN apt-get update && apt-get install -y wget
+
+WORKDIR /app
+
+RUN pip install --upgrade pip
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+
+RUN pip install llama-cpp-python==0.2.62 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
+
+RUN wget $MODEL_DOWNLOAD_SOURCE
+
+COPY . .
+
+CMD ["uvicorn", "main:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "80"]
diff --git a/containers/hugging-face-inference/README.md b/containers/hugging-face-inference/README.md
new file mode 100644
index 0000000..53a897d
--- /dev/null
+++ b/containers/hugging-face-inference/README.md
@@ -0,0 +1,8 @@
+## Deploy Hugging Face Models in Serverless Containers
+
+### Example of public Hugging Face Models to test
+
+- [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf)
+- [llama-2-7b.Q4_K_M.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_K_M.gguf)
+- [phi-2.Q8_0.gguf](https://huggingface.co/TheBloke/phi-2-GGUF/blob/main/phi-2.Q8_0.gguf)
+
diff --git a/containers/hugging-face-inference/main.py b/containers/hugging-face-inference/main.py
new file mode 100644
index 0000000..753308e
--- /dev/null
+++ b/containers/hugging-face-inference/main.py
@@ -0,0 +1,36 @@
+from fastapi import FastAPI
+from llama_cpp import Llama
+import os
+import prompt
+
+MODEL_FILE_NAME=os.environ["MODEL_FILE_NAME"]
+
+app = FastAPI()
+
+print("loading model from memory starts", flush=True)
+
+llm = Llama(model_path=MODEL_FILE_NAME)
+
+print("loading model from memory successfully ends", flush=True)
+
+@app.get("/")
+def hello():
+    """Get Inference Server Info"""
+
+    return {
+         "message": "Hello, this is the inference server! Serving model {model_name}"
+         .format(model_name=MODEL_FILE_NAME)
+    }
+
+@app.post("/")
+def infer(prompt: prompt.Prompt):
+
+    print("inference endpoint is called", flush=True)
+
+    output = llm(prompt=prompt.message, max_tokens=200)
+
+    print("output is successfully inferred", flush=True)
+
+    print(output, flush=True)
+
+    return output
diff --git a/containers/hugging-face-inference/prompt.py b/containers/hugging-face-inference/prompt.py
new file mode 100644
index 0000000..5dc7363
--- /dev/null
+++ b/containers/hugging-face-inference/prompt.py
@@ -0,0 +1,4 @@
+from pydantic import BaseModel
+
+class Prompt(BaseModel):
+    message: str
\ No newline at end of file
diff --git a/containers/hugging-face-inference/requirements.txt b/containers/hugging-face-inference/requirements.txt
new file mode 100644
index 0000000..3b33077
--- /dev/null
+++ b/containers/hugging-face-inference/requirements.txt
@@ -0,0 +1,2 @@
+fastapi==0.104.1
+uvicorn==0.24.0.post1
\ No newline at end of file
diff --git a/containers/hugging-face-inference/terraform/container.tf b/containers/hugging-face-inference/terraform/container.tf
new file mode 100644
index 0000000..3b98f07
--- /dev/null
+++ b/containers/hugging-face-inference/terraform/container.tf
@@ -0,0 +1,28 @@
+resource "scaleway_container_namespace" "main" {
+  name        = "ifr-${lower(replace(var.hf_model_file_name, "/[.]|[_]/", "-"))}-${random_string.random_suffix.result}"
+  description = "Inference using Hugging Face models"
+}
+
+resource "scaleway_container" "inference-hugging-face" {
+  name           = "inference"
+  description    = "Inference serving API using a Hugging Face model"
+  namespace_id   = scaleway_container_namespace.main.id
+  registry_image = docker_image.inference.name
+  environment_variables = {
+    "MODEL_FILE_NAME" = var.hf_model_file_name
+  }
+  port           = 80
+  cpu_limit      = 2240
+  memory_limit   = 4096
+  min_scale      = 1
+  max_scale      = 1
+  deploy   = true
+}
+
+resource scaleway_container_cron "inference_cron" {
+    container_id = scaleway_container.inference-hugging-face.id
+    schedule = var.inference_cron_schedule
+    args = jsonencode({
+      "message" : "Hello! It's sunny today. How are you doing?"
+    })
+}
\ No newline at end of file
diff --git a/containers/hugging-face-inference/terraform/images.tf b/containers/hugging-face-inference/terraform/images.tf
new file mode 100644
index 0000000..6c857e6
--- /dev/null
+++ b/containers/hugging-face-inference/terraform/images.tf
@@ -0,0 +1,20 @@
+resource "scaleway_registry_namespace" "main" {
+  name       = "ifr-${lower(replace(var.hf_model_file_name, "/[.]|[_]/", "-"))}-${random_string.random_suffix.result}"
+  region     = var.region
+  project_id = var.project_id
+}
+
+resource "docker_image" "inference" {
+  name = "${scaleway_registry_namespace.main.endpoint}/inference-with-huggingface:${var.image_version}"
+  build {
+    context = "${path.cwd}/../"
+    no_cache = true
+    build_args = {
+     MODEL_DOWNLOAD_SOURCE : var.hf_model_download_source
+    }
+  }
+
+  provisioner "local-exec" {
+    command = "docker push ${docker_image.inference.name}"
+  }
+}
diff --git a/containers/hugging-face-inference/terraform/providers.tf b/containers/hugging-face-inference/terraform/providers.tf
new file mode 100644
index 0000000..439df4d
--- /dev/null
+++ b/containers/hugging-face-inference/terraform/providers.tf
@@ -0,0 +1,16 @@
+provider "scaleway" {
+  region     = var.region
+  access_key = var.access_key
+  secret_key = var.secret_key
+  project_id = var.project_id
+}
+
+provider "docker" {
+  host = "unix:///var/run/docker.sock"
+
+  registry_auth {
+    address  = scaleway_registry_namespace.main.endpoint
+    username = "nologin"
+    password = var.secret_key
+  }
+}
diff --git a/containers/hugging-face-inference/terraform/utils.tf b/containers/hugging-face-inference/terraform/utils.tf
new file mode 100644
index 0000000..15d52ab
--- /dev/null
+++ b/containers/hugging-face-inference/terraform/utils.tf
@@ -0,0 +1,5 @@
+resource "random_string" "random_suffix" {
+  length  = 3
+  upper   = false
+  special = false
+}
diff --git a/containers/hugging-face-inference/terraform/variables.tf b/containers/hugging-face-inference/terraform/variables.tf
new file mode 100644
index 0000000..a9b0e68
--- /dev/null
+++ b/containers/hugging-face-inference/terraform/variables.tf
@@ -0,0 +1,36 @@
+variable "access_key" {
+  type = string
+}
+
+variable "secret_key" {
+  type = string
+}
+
+variable "project_id" {
+  type = string
+}
+
+variable "image_version" {
+  type = string
+  default = "0.0.3"
+}
+
+variable "region" {
+  type = string
+  default = "fr-par"
+}
+
+variable "inference_cron_schedule" {
+  type = string
+  default = "*/15 * * * *"
+}
+
+variable "hf_model_file_name" {
+  type = string
+  default = "llama-2-7b.Q4_0.gguf"
+}
+
+variable "hf_model_download_source" {
+  type = string
+  default = "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf"
+}
diff --git a/containers/hugging-face-inference/terraform/versions.tf b/containers/hugging-face-inference/terraform/versions.tf
new file mode 100644
index 0000000..b186193
--- /dev/null
+++ b/containers/hugging-face-inference/terraform/versions.tf
@@ -0,0 +1,13 @@
+terraform {
+  required_providers {
+    scaleway = {
+      source = "scaleway/scaleway"
+      version = ">= 2.39"
+    }
+    docker = {
+      source  = "kreuzwerker/docker"
+      version = "3.0.2"
+    }
+  }
+  required_version = ">= 0.13"
+}

From 273615d3f8ed54a0c6e8a5b5d673241d064f38ee Mon Sep 17 00:00:00 2001
From: Reda Noureddine <reda.noureddine.prepa@gmail.com>
Date: Mon, 22 Apr 2024 18:31:34 +0200
Subject: [PATCH 02/11] feat: deploy multiple models using terraform workpaces

---
 containers/hugging-face-inference/README.md   | 13 +++++++---
 .../terraform/deploy-models.sh                | 26 +++++++++++++++++++
 .../terraform/variables.tf                    |  2 --
 3 files changed, 35 insertions(+), 6 deletions(-)
 create mode 100644 containers/hugging-face-inference/terraform/deploy-models.sh

diff --git a/containers/hugging-face-inference/README.md b/containers/hugging-face-inference/README.md
index 53a897d..3e55d13 100644
--- a/containers/hugging-face-inference/README.md
+++ b/containers/hugging-face-inference/README.md
@@ -1,8 +1,13 @@
 ## Deploy Hugging Face Models in Serverless Containers
 
-### Example of public Hugging Face Models to test
+- Export these variables:
 
-- [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf)
-- [llama-2-7b.Q4_K_M.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_K_M.gguf)
-- [phi-2.Q8_0.gguf](https://huggingface.co/TheBloke/phi-2-GGUF/blob/main/phi-2.Q8_0.gguf)
+```bash
+export SCW_ACCESS_KEY="access-key" SCW_SECRET_KEY="secret-key" SCW_PROJECT_ID="project-id"
+```
 
+- Run script to deploy multiple hugging face models using terraform workspaces:
+
+```bash
+bash ./deploy-models.sh
+```
\ No newline at end of file
diff --git a/containers/hugging-face-inference/terraform/deploy-models.sh b/containers/hugging-face-inference/terraform/deploy-models.sh
new file mode 100644
index 0000000..6655dc5
--- /dev/null
+++ b/containers/hugging-face-inference/terraform/deploy-models.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+set -e
+
+export SCW_ACCESS_KEY=${SCW_ACCESS_KEY} \
+       SCW_SECRET_KEY=${SCW_SECRET_KEY} \
+       SCW_PROJECT_ID=${SCW_PROJECT_ID}
+
+declare -A hf_models 
+
+hf_models["llama-2-7b.Q2_K.gguf"]="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q8_0.gguf"
+hf_models["mistral-7b-instruct-v0.2.Q2_K.gguf"]="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q8_0.gguf"
+
+terraform init
+
+for model_file_name in "${!hf_models[@]}";
+do
+  terraform workspace new $model_file_name
+  export TF_VAR_hf_model_file_name=$model_file_name \
+         TF_VAR_hf_model_download_source=${hf_models[$model_file_name]} \
+         TF_VAR_access_key=$SCW_ACCESS_KEY \
+         TF_VAR_secret_key=$SCW_SECRET_KEY \
+         TF_VAR_project_id=$SCW_PROJECT_ID
+  terraform plan -var-file=testing.tfvars
+  terraform apply -var-file=testing.tfvars -auto-approve
+done
diff --git a/containers/hugging-face-inference/terraform/variables.tf b/containers/hugging-face-inference/terraform/variables.tf
index a9b0e68..623c911 100644
--- a/containers/hugging-face-inference/terraform/variables.tf
+++ b/containers/hugging-face-inference/terraform/variables.tf
@@ -27,10 +27,8 @@ variable "inference_cron_schedule" {
 
 variable "hf_model_file_name" {
   type = string
-  default = "llama-2-7b.Q4_0.gguf"
 }
 
 variable "hf_model_download_source" {
   type = string
-  default = "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf"
 }

From ff810088df0158c6d2099f58e2b1792a890ed8f8 Mon Sep 17 00:00:00 2001
From: Reda Noureddine <reda.noureddine.prepa@gmail.com>
Date: Tue, 23 Apr 2024 18:19:19 +0200
Subject: [PATCH 03/11] feat: deploy models using json file info

---
 containers/hugging-face-inference/Dockerfile  |  4 +-
 .../terraform/deploy-models.sh                | 23 ++++++-----
 .../terraform/hf-models.json                  | 40 +++++++++++++++++++
 3 files changed, 55 insertions(+), 12 deletions(-)
 create mode 100644 containers/hugging-face-inference/terraform/hf-models.json

diff --git a/containers/hugging-face-inference/Dockerfile b/containers/hugging-face-inference/Dockerfile
index 61de72f..5f3088a 100644
--- a/containers/hugging-face-inference/Dockerfile
+++ b/containers/hugging-face-inference/Dockerfile
@@ -10,7 +10,9 @@ RUN pip install --upgrade pip
 COPY requirements.txt .
 RUN pip install -r requirements.txt
 
-RUN pip install llama-cpp-python==0.2.62 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
+RUN pip install llama-cpp-python==0.2.62 \
+    --no-cache-dir \
+    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
 
 RUN wget $MODEL_DOWNLOAD_SOURCE
 
diff --git a/containers/hugging-face-inference/terraform/deploy-models.sh b/containers/hugging-face-inference/terraform/deploy-models.sh
index 6655dc5..b92fbc3 100644
--- a/containers/hugging-face-inference/terraform/deploy-models.sh
+++ b/containers/hugging-face-inference/terraform/deploy-models.sh
@@ -1,15 +1,19 @@
 #!/bin/bash
 
+# Setup
+
 set -e
 
-export SCW_ACCESS_KEY=${SCW_ACCESS_KEY} \
-       SCW_SECRET_KEY=${SCW_SECRET_KEY} \
-       SCW_PROJECT_ID=${SCW_PROJECT_ID}
+export TF_VAR_access_key=${SCW_ACCESS_KEY} \
+       TF_VAR_secret_key=${SCW_SECRET_KEY} \
+       TF_VAR_project_id=${SCW_PROJECT_ID}
+
+# Associative list of models to deploy
 
-declare -A hf_models 
+declare -A hf_models
+eval "$(jq -r '.[]|.[]|"hf_models[\(.file)]=\(.source)"' hf-models.json)"
 
-hf_models["llama-2-7b.Q2_K.gguf"]="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q8_0.gguf"
-hf_models["mistral-7b-instruct-v0.2.Q2_K.gguf"]="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q8_0.gguf"
+# Initialize, plan, and deploy each model in a Terraform workspace
 
 terraform init
 
@@ -18,9 +22,6 @@ do
   terraform workspace new $model_file_name
   export TF_VAR_hf_model_file_name=$model_file_name \
          TF_VAR_hf_model_download_source=${hf_models[$model_file_name]} \
-         TF_VAR_access_key=$SCW_ACCESS_KEY \
-         TF_VAR_secret_key=$SCW_SECRET_KEY \
-         TF_VAR_project_id=$SCW_PROJECT_ID
-  terraform plan -var-file=testing.tfvars
-  terraform apply -var-file=testing.tfvars -auto-approve
+  terraform plan
+  terraform apply -auto-approve
 done
diff --git a/containers/hugging-face-inference/terraform/hf-models.json b/containers/hugging-face-inference/terraform/hf-models.json
new file mode 100644
index 0000000..1c5da8b
--- /dev/null
+++ b/containers/hugging-face-inference/terraform/hf-models.json
@@ -0,0 +1,40 @@
+{
+    "llama" : [
+        {
+            "file": "llama-2-7b.Q2_K.gguf",
+            "source" : "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/llama-2-7b.Q2_K.gguf",
+            "size_gb": "2.83"
+        },
+        {
+            "file": "llama-2-7b.Q3_K_L.gguf",
+            "source" : "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q3_K_L.gguf",
+            "size_gb": "3.6"
+        }
+    ],
+
+    "mistral" : [
+        {
+            "file": "mistral-7b-instruct-v0.2.Q2_K.gguf",
+            "source" : "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q2_K.gguf",
+            "size_gb": "3.08"
+        },
+        {
+            "file": "mistral-7b-instruct-v0.2.Q3_K_L.gguf",
+            "source" : "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q3_K_L.gguf",
+            "size_gb": "3.82"
+        }
+    ],
+
+    "phi" : [
+        {
+            "file": "phi-2.Q2_K.gguf",
+            "source" : "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf",
+            "size_gb": "1.17"
+        },
+        {
+            "file": "phi-2.Q5_K_M.gguf",
+            "source" : "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q5_K_M.gguf",
+            "size_gb": "2.07"
+        }
+    ]
+}
\ No newline at end of file

From 1c3fce3708078ab161dd1b3fe75703d6eb9967f7 Mon Sep 17 00:00:00 2001
From: Reda Noureddine <reda.noureddine.prepa@gmail.com>
Date: Tue, 23 Apr 2024 18:38:29 +0200
Subject: [PATCH 04/11] fix: model sources

---
 containers/hugging-face-inference/terraform/hf-models.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/containers/hugging-face-inference/terraform/hf-models.json b/containers/hugging-face-inference/terraform/hf-models.json
index 1c5da8b..5b7e94a 100644
--- a/containers/hugging-face-inference/terraform/hf-models.json
+++ b/containers/hugging-face-inference/terraform/hf-models.json
@@ -2,7 +2,7 @@
     "llama" : [
         {
             "file": "llama-2-7b.Q2_K.gguf",
-            "source" : "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/llama-2-7b.Q2_K.gguf",
+            "source" : "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q2_K.gguf",
             "size_gb": "2.83"
         },
         {
@@ -15,12 +15,12 @@
     "mistral" : [
         {
             "file": "mistral-7b-instruct-v0.2.Q2_K.gguf",
-            "source" : "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q2_K.gguf",
+            "source" : "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q2_K.gguf",
             "size_gb": "3.08"
         },
         {
             "file": "mistral-7b-instruct-v0.2.Q3_K_L.gguf",
-            "source" : "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q3_K_L.gguf",
+            "source" : "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q3_K_L.gguf",
             "size_gb": "3.82"
         }
     ],

From 7c8a7429c285d09fbaa71b3ee2ce53b44469cbd6 Mon Sep 17 00:00:00 2001
From: Reda Noureddine <reda.noureddine.prepa@gmail.com>
Date: Wed, 24 Apr 2024 10:56:31 +0200
Subject: [PATCH 05/11] feat: docker login + terraform select with create flag

---
 containers/hugging-face-inference/README.md                 | 2 +-
 .../hugging-face-inference/terraform/deploy-models.sh       | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/containers/hugging-face-inference/README.md b/containers/hugging-face-inference/README.md
index 3e55d13..e3fa747 100644
--- a/containers/hugging-face-inference/README.md
+++ b/containers/hugging-face-inference/README.md
@@ -3,7 +3,7 @@
 - Export these variables:
 
 ```bash
-export SCW_ACCESS_KEY="access-key" SCW_SECRET_KEY="secret-key" SCW_PROJECT_ID="project-id"
+export SCW_ACCESS_KEY="access-key" SCW_SECRET_KEY="secret-key" SCW_PROJECT_ID="project-id" REGION="fr-par"
 ```
 
 - Run script to deploy multiple hugging face models using terraform workspaces:
diff --git a/containers/hugging-face-inference/terraform/deploy-models.sh b/containers/hugging-face-inference/terraform/deploy-models.sh
index b92fbc3..cda787e 100644
--- a/containers/hugging-face-inference/terraform/deploy-models.sh
+++ b/containers/hugging-face-inference/terraform/deploy-models.sh
@@ -13,13 +13,17 @@ export TF_VAR_access_key=${SCW_ACCESS_KEY} \
 declare -A hf_models
 eval "$(jq -r '.[]|.[]|"hf_models[\(.file)]=\(.source)"' hf-models.json)"
 
+# Login to docker Scaleway's registry on fr-par
+
+docker login "rg.$REGION.scw.cloud" -u nologin --password-stdin <<< "$SCW_SECRET_KEY"
+
 # Initialize, plan, and deploy each model in a Terraform workspace
 
 terraform init
 
 for model_file_name in "${!hf_models[@]}";
 do
-  terraform workspace new $model_file_name
+  terraform workspace select -or-create $model_file_name
   export TF_VAR_hf_model_file_name=$model_file_name \
          TF_VAR_hf_model_download_source=${hf_models[$model_file_name]} \
   terraform plan

From 975f3890682ee2d385a4832d18a0cc5abc0cbcdc Mon Sep 17 00:00:00 2001
From: Reda Noureddine <reda.noureddine.prepa@gmail.com>
Date: Tue, 7 May 2024 11:51:05 +0200
Subject: [PATCH 06/11] feat: benchmark script

---
 .../terraform/benchmark-models.py             | 77 +++++++++++++++++++
 .../terraform/hf-models.json                  | 18 +++--
 2 files changed, 89 insertions(+), 6 deletions(-)
 create mode 100644 containers/hugging-face-inference/terraform/benchmark-models.py

diff --git a/containers/hugging-face-inference/terraform/benchmark-models.py b/containers/hugging-face-inference/terraform/benchmark-models.py
new file mode 100644
index 0000000..7ffe2af
--- /dev/null
+++ b/containers/hugging-face-inference/terraform/benchmark-models.py
@@ -0,0 +1,77 @@
+import json, requests, csv, pandas
+import matplotlib.pyplot as plt
+
+class Benchmark:
+    _model_families = ["llama", "mistral", "phi"]
+    _endpoints = {}
+
+    def __init__(self, models_file: str, benchmark_file: str, results_figure: str, message: str) -> None:
+        self.models_file = models_file
+        self.benchmark_file = benchmark_file
+        self.message = message
+        self.results_figure = results_figure
+
+    def get_container_endpoints_from_json_file(self)-> None:
+        if self.models_file == "":
+            raise Exception("file name is empty")
+
+        with open(self.models_file, 'r') as models_file:
+            json_data = json.load(models_file)
+
+        for family in self._model_families:
+            self._endpoints[family] = []
+            for model in json_data[family]:
+                self._endpoints[family].append({"model": model["file"], "endpoint": model["ctn_endpoint"]})
+
+    def analyze_results(self) -> None:
+        benchmark_results = pandas.read_csv(self.benchmark_file)
+        benchmark_results.boxplot(column="Total Response Time", by="Family").plot()
+        plt.ylabel("Total Response Time in seconds")
+        plt.savefig(self.results_figure)
+
+    def benchmark_models(self, num_samples: int) -> None:
+        self.get_container_endpoints_from_json_file()
+
+        fields = ['Model', 'Family', 'Total Response Time', 'Response Message']
+        benchmark_data = []
+
+        for family in self._model_families:
+            for endpoint in self._endpoints[family]:
+                if endpoint["endpoint"] == "":
+                    raise Exception("model endpoint is empty")
+
+                for _ in range(num_samples):
+                    try:
+                        print("Calling model {model} on endpoint {endpoint} with message {message}"
+                              .format(model=endpoint["model"], endpoint=endpoint["endpoint"], message=self.message)
+                        )
+
+                        rsp = requests.post(endpoint["endpoint"], json={"message": self.message})
+
+                        response_text = rsp.json()["choices"][0]["text"]
+
+                        print("The model {model} responded with: {response_text}"
+                              .format(model=endpoint["model"], response_text=response_text)
+                        )
+
+                        benchmark_data.append([endpoint["model"], family, rsp.elapsed.total_seconds(), response_text])
+                    except:
+                        pass
+
+        with open(self.benchmark_file, 'w') as results_file:
+            wrt = csv.writer(results_file)
+            wrt.writerow(fields)
+            wrt.writerows(benchmark_data)
+
+        self.analyze_results()
+
+if __name__ == "__main__":
+
+    benchmark = Benchmark(
+        models_file="hf-models.json",  
+        benchmark_file="benchmark-results.csv", 
+        results_figure="results-plot.png", 
+        message="What the difference between an elephant and an ant?",
+    )
+
+    benchmark.benchmark_models(num_samples=50)
diff --git a/containers/hugging-face-inference/terraform/hf-models.json b/containers/hugging-face-inference/terraform/hf-models.json
index 5b7e94a..a4f44f5 100644
--- a/containers/hugging-face-inference/terraform/hf-models.json
+++ b/containers/hugging-face-inference/terraform/hf-models.json
@@ -3,12 +3,14 @@
         {
             "file": "llama-2-7b.Q2_K.gguf",
             "source" : "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q2_K.gguf",
-            "size_gb": "2.83"
+            "size_gb": "2.83",
+            "ctn_endpoint": "paste container endpoint here"
         },
         {
             "file": "llama-2-7b.Q3_K_L.gguf",
             "source" : "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q3_K_L.gguf",
-            "size_gb": "3.6"
+            "size_gb": "3.6",
+            "ctn_endpoint": "paste container endpoint here"
         }
     ],
 
@@ -16,12 +18,14 @@
         {
             "file": "mistral-7b-instruct-v0.2.Q2_K.gguf",
             "source" : "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q2_K.gguf",
-            "size_gb": "3.08"
+            "size_gb": "3.08",
+            "ctn_endpoint": "paste container endpoint here"
         },
         {
             "file": "mistral-7b-instruct-v0.2.Q3_K_L.gguf",
             "source" : "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q3_K_L.gguf",
-            "size_gb": "3.82"
+            "size_gb": "3.82",
+            "ctn_endpoint": "paste container endpoint here"
         }
     ],
 
@@ -29,12 +33,14 @@
         {
             "file": "phi-2.Q2_K.gguf",
             "source" : "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf",
-            "size_gb": "1.17"
+            "size_gb": "1.17",
+            "ctn_endpoint": "paste container endpoint here"
         },
         {
             "file": "phi-2.Q5_K_M.gguf",
             "source" : "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q5_K_M.gguf",
-            "size_gb": "2.07"
+            "size_gb": "2.07",
+            "ctn_endpoint": "paste container endpoint here"
         }
     ]
 }
\ No newline at end of file

From 162a433bd082d8da4eea3c9fd223392c146677de Mon Sep 17 00:00:00 2001
From: Reda Noureddine <reda.noureddine.prepa@gmail.com>
Date: Tue, 7 May 2024 11:53:53 +0200
Subject: [PATCH 07/11] feat: rename script + add flags

---
 .../terraform/deploy-models.sh                | 31 ----------
 .../terraform/terraform.sh                    | 56 +++++++++++++++++++
 2 files changed, 56 insertions(+), 31 deletions(-)
 delete mode 100644 containers/hugging-face-inference/terraform/deploy-models.sh
 create mode 100755 containers/hugging-face-inference/terraform/terraform.sh

diff --git a/containers/hugging-face-inference/terraform/deploy-models.sh b/containers/hugging-face-inference/terraform/deploy-models.sh
deleted file mode 100644
index cda787e..0000000
--- a/containers/hugging-face-inference/terraform/deploy-models.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-# Setup
-
-set -e
-
-export TF_VAR_access_key=${SCW_ACCESS_KEY} \
-       TF_VAR_secret_key=${SCW_SECRET_KEY} \
-       TF_VAR_project_id=${SCW_PROJECT_ID}
-
-# Associative list of models to deploy
-
-declare -A hf_models
-eval "$(jq -r '.[]|.[]|"hf_models[\(.file)]=\(.source)"' hf-models.json)"
-
-# Login to docker Scaleway's registry on fr-par
-
-docker login "rg.$REGION.scw.cloud" -u nologin --password-stdin <<< "$SCW_SECRET_KEY"
-
-# Initialize, plan, and deploy each model in a Terraform workspace
-
-terraform init
-
-for model_file_name in "${!hf_models[@]}";
-do
-  terraform workspace select -or-create $model_file_name
-  export TF_VAR_hf_model_file_name=$model_file_name \
-         TF_VAR_hf_model_download_source=${hf_models[$model_file_name]} \
-  terraform plan
-  terraform apply -auto-approve
-done
diff --git a/containers/hugging-face-inference/terraform/terraform.sh b/containers/hugging-face-inference/terraform/terraform.sh
new file mode 100755
index 0000000..1bf68af
--- /dev/null
+++ b/containers/hugging-face-inference/terraform/terraform.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+set -e
+
+# Common environment variables
+export TF_VAR_access_key=${SCW_ACCESS_KEY} \
+       TF_VAR_secret_key=${SCW_SECRET_KEY} \
+       TF_VAR_project_id=${SCW_PROJECT_ID}
+
+# Associative list of models to deploy using json data
+declare -A hf_models
+eval "$(jq -r '.[]|.[]|"hf_models[\(.file)]=\(.source)"' hf-models.json)"
+
+# Login to docker Scaleway's registry
+docker login "rg.$REGION.scw.cloud" -u nologin --password-stdin <<< "$SCW_SECRET_KEY"
+
+# Initialize, plan, and deploy each model in a Terraform workspace
+apply() {
+       terraform init
+       for model_file_name in "${!hf_models[@]}";
+       do
+         terraform workspace select -or-create $model_file_name
+         export TF_VAR_hf_model_file_name=$model_file_name \
+                TF_VAR_hf_model_download_source=${hf_models[$model_file_name]}
+         terraform plan
+         terraform apply -auto-approve
+       done
+}
+
+# Destroy resources of each Terraform workspace
+destroy(){
+       for model_file_name in "${!hf_models[@]}";
+       do
+         terraform workspace select $model_file_name
+         export TF_VAR_hf_model_file_name=$model_file_name \
+                TF_VAR_hf_model_download_source=${hf_models[$model_file_name]}
+         terraform destroy -auto-approve
+       done
+}
+
+# Script actions per flag
+while getopts "ad" option; do
+  case $option in
+    a)
+      echo "deploying models"
+      apply
+      ;;
+    d)
+      echo "destroying models"
+      destroy
+      ;;
+    *)
+      echo "flag is not provided"
+      exit 1
+  esac
+done
\ No newline at end of file

From 3640c3d8030a2595ba429edc23c3418062922604 Mon Sep 17 00:00:00 2001
From: Reda Noureddine <reda.noureddine.prepa@gmail.com>
Date: Tue, 7 May 2024 11:55:01 +0200
Subject: [PATCH 08/11] refactor: fastapi app

---
 containers/hugging-face-inference/Dockerfile |  4 +++-
 containers/hugging-face-inference/main.py    | 11 +++++++----
 containers/hugging-face-inference/prompt.py  |  4 ----
 3 files changed, 10 insertions(+), 9 deletions(-)
 delete mode 100644 containers/hugging-face-inference/prompt.py

diff --git a/containers/hugging-face-inference/Dockerfile b/containers/hugging-face-inference/Dockerfile
index 5f3088a..f3ce032 100644
--- a/containers/hugging-face-inference/Dockerfile
+++ b/containers/hugging-face-inference/Dockerfile
@@ -7,7 +7,9 @@ RUN apt-get update && apt-get install -y wget
 WORKDIR /app
 
 RUN pip install --upgrade pip
+
 COPY requirements.txt .
+
 RUN pip install -r requirements.txt
 
 RUN pip install llama-cpp-python==0.2.62 \
@@ -16,6 +18,6 @@ RUN pip install llama-cpp-python==0.2.62 \
 
 RUN wget $MODEL_DOWNLOAD_SOURCE
 
-COPY . .
+COPY main.py .
 
 CMD ["uvicorn", "main:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "80"]
diff --git a/containers/hugging-face-inference/main.py b/containers/hugging-face-inference/main.py
index 753308e..aa1be39 100644
--- a/containers/hugging-face-inference/main.py
+++ b/containers/hugging-face-inference/main.py
@@ -1,7 +1,10 @@
 from fastapi import FastAPI
 from llama_cpp import Llama
+from pydantic import BaseModel
 import os
-import prompt
+
+class Message(BaseModel):
+    content: str
 
 MODEL_FILE_NAME=os.environ["MODEL_FILE_NAME"]
 
@@ -23,11 +26,11 @@ def hello():
     }
 
 @app.post("/")
-def infer(prompt: prompt.Prompt):
-
+def infer(message: Message):
+    """Post a message and receive a response"""
     print("inference endpoint is called", flush=True)
 
-    output = llm(prompt=prompt.message, max_tokens=200)
+    output = llm(prompt=message.content, max_tokens=200)
 
     print("output is successfully inferred", flush=True)
 
diff --git a/containers/hugging-face-inference/prompt.py b/containers/hugging-face-inference/prompt.py
deleted file mode 100644
index 5dc7363..0000000
--- a/containers/hugging-face-inference/prompt.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from pydantic import BaseModel
-
-class Prompt(BaseModel):
-    message: str
\ No newline at end of file

From 1a01f21f60d29831a933b05d584b3b994ba06064 Mon Sep 17 00:00:00 2001
From: Reda Noureddine <reda.noureddine.prepa@gmail.com>
Date: Tue, 7 May 2024 12:02:44 +0200
Subject: [PATCH 09/11] feat: remove cron schedule as used for observability
 purposes only

---
 containers/hugging-face-inference/terraform/container.tf | 8 --------
 containers/hugging-face-inference/terraform/variables.tf | 5 -----
 2 files changed, 13 deletions(-)

diff --git a/containers/hugging-face-inference/terraform/container.tf b/containers/hugging-face-inference/terraform/container.tf
index 3b98f07..3502223 100644
--- a/containers/hugging-face-inference/terraform/container.tf
+++ b/containers/hugging-face-inference/terraform/container.tf
@@ -18,11 +18,3 @@ resource "scaleway_container" "inference-hugging-face" {
   max_scale      = 1
   deploy   = true
 }
-
-resource scaleway_container_cron "inference_cron" {
-    container_id = scaleway_container.inference-hugging-face.id
-    schedule = var.inference_cron_schedule
-    args = jsonencode({
-      "message" : "Hello! It's sunny today. How are you doing?"
-    })
-}
\ No newline at end of file
diff --git a/containers/hugging-face-inference/terraform/variables.tf b/containers/hugging-face-inference/terraform/variables.tf
index 623c911..afc799c 100644
--- a/containers/hugging-face-inference/terraform/variables.tf
+++ b/containers/hugging-face-inference/terraform/variables.tf
@@ -20,11 +20,6 @@ variable "region" {
   default = "fr-par"
 }
 
-variable "inference_cron_schedule" {
-  type = string
-  default = "*/15 * * * *"
-}
-
 variable "hf_model_file_name" {
   type = string
 }

From 6990297c235570f256228d2807bfd75076717f71 Mon Sep 17 00:00:00 2001
From: Reda Noureddine <reda.noureddine.prepa@gmail.com>
Date: Tue, 7 May 2024 12:07:50 +0200
Subject: [PATCH 10/11] docs: readme

---
 README.md                                   |  1 +
 containers/hugging-face-inference/README.md | 20 ++++++++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 05757c7..248d251 100644
--- a/README.md
+++ b/README.md
@@ -71,6 +71,7 @@ Table of Contents:
 | **[Python S3 upload](containers/python-s3-upload/README.md)** <br/> A Python + Flask HTTP server that receives file uploads and writes them to S3.                                          | Python       | [Terraform]            |
 | **[Terraform NGINX hello world](containers/terraform-nginx-hello-world/README.md)** <br/> A minimal example running the base NGINX image in a serverless container deployed with Terraform. | N/A          | [Terraform]            |
 | **[Triggers with Terraform](containers/terraform-triggers/README.md)** <br/> Configuring two SQS triggers, used to trigger two containers, one public, one private.                         | N/A          | [Terraform]            |
+| **[Inference with Hugging Face Models](containers/hugging-face-inference/README.md)** <br/> Experimentation to deploy and benchmark some lightweight Hugging Face Models in Serverless Containers.                | N/A          | [Terraform]            |
 
 ### ⚙️ Jobs
 
diff --git a/containers/hugging-face-inference/README.md b/containers/hugging-face-inference/README.md
index e3fa747..de80b2d 100644
--- a/containers/hugging-face-inference/README.md
+++ b/containers/hugging-face-inference/README.md
@@ -1,4 +1,6 @@
-## Deploy Hugging Face Models in Serverless Containers
+# Hugging Face Models
+
+### Deploy models in Serverless Containers
 
 - Export these variables:
 
@@ -9,5 +11,19 @@ export SCW_ACCESS_KEY="access-key" SCW_SECRET_KEY="secret-key" SCW_PROJECT_ID="p
 - Run script to deploy multiple hugging face models using terraform workspaces:
 
 ```bash
-bash ./deploy-models.sh
+cd terraform && bash terraform.sh -a
+```
+
+### Benchmark models
+
+Check your models were deployed on the console and copy your container endpoints to the `terraform/hf-models.json` file, then perform the following command:
+
+```bash
+python benchmark-models.py
+```
+
+### Destroy terraform resources for all models
+
+```bash
+bash terraform.sh -d
 ```
\ No newline at end of file

From 72d4289b16342fa2df11c51a56d05c0bcd091eb2 Mon Sep 17 00:00:00 2001
From: Reda Noureddine <reda.noureddine.prepa@gmail.com>
Date: Tue, 7 May 2024 12:50:56 +0200
Subject: [PATCH 11/11] refactor: style and rewording

---
 containers/hugging-face-inference/README.md   |  4 ++
 containers/hugging-face-inference/main.py     | 23 +++++---
 .../terraform/benchmark-models.py             | 57 +++++++++++++------
 .../terraform/terraform.sh                    |  4 +-
 4 files changed, 62 insertions(+), 26 deletions(-)

diff --git a/containers/hugging-face-inference/README.md b/containers/hugging-face-inference/README.md
index de80b2d..2f0a040 100644
--- a/containers/hugging-face-inference/README.md
+++ b/containers/hugging-face-inference/README.md
@@ -8,6 +8,8 @@
 export SCW_ACCESS_KEY="access-key" SCW_SECRET_KEY="secret-key" SCW_PROJECT_ID="project-id" REGION="fr-par"
 ```
 
+- Add/remove Hugging Face models (with `.gguf` extension) in `terraform/hf-models.json` file.
+
 - Run script to deploy multiple hugging face models using terraform workspaces:
 
 ```bash
@@ -22,6 +24,8 @@ Check your models were deployed on the console and copy your container endpoints
 python benchmark-models.py
 ```
 
+This will generate a box plot to analyze response time per model family, and a `csv` file containing textual responses per each model.
+
 ### Destroy terraform resources for all models
 
 ```bash
diff --git a/containers/hugging-face-inference/main.py b/containers/hugging-face-inference/main.py
index aa1be39..a473554 100644
--- a/containers/hugging-face-inference/main.py
+++ b/containers/hugging-face-inference/main.py
@@ -1,33 +1,40 @@
+import os
+
 from fastapi import FastAPI
 from llama_cpp import Llama
 from pydantic import BaseModel
-import os
+
 
 class Message(BaseModel):
     content: str
 
-MODEL_FILE_NAME=os.environ["MODEL_FILE_NAME"]
+
+MODEL_FILE_NAME = os.environ["MODEL_FILE_NAME"]
 
 app = FastAPI()
 
-print("loading model from memory starts", flush=True)
+print("loading model starts", flush=True)
 
 llm = Llama(model_path=MODEL_FILE_NAME)
 
-print("loading model from memory successfully ends", flush=True)
+print("loading model successfully ends", flush=True)
+
 
 @app.get("/")
 def hello():
-    """Get Inference Server Info"""
+    """Get info of inference server"""
 
     return {
-         "message": "Hello, this is the inference server! Serving model {model_name}"
-         .format(model_name=MODEL_FILE_NAME)
+        "message": "Hello, this is the inference server! Serving model {model_name}".format(
+            model_name=MODEL_FILE_NAME
+        )
     }
 
+
 @app.post("/")
 def infer(message: Message):
-    """Post a message and receive a response"""
+    """Post a message and receive a response from inference server"""
+
     print("inference endpoint is called", flush=True)
 
     output = llm(prompt=message.content, max_tokens=200)
diff --git a/containers/hugging-face-inference/terraform/benchmark-models.py b/containers/hugging-face-inference/terraform/benchmark-models.py
index 7ffe2af..17fddaa 100644
--- a/containers/hugging-face-inference/terraform/benchmark-models.py
+++ b/containers/hugging-face-inference/terraform/benchmark-models.py
@@ -1,27 +1,36 @@
-import json, requests, csv, pandas
+import csv
+import json
+
 import matplotlib.pyplot as plt
+import pandas
+import requests
+
 
 class Benchmark:
     _model_families = ["llama", "mistral", "phi"]
     _endpoints = {}
 
-    def __init__(self, models_file: str, benchmark_file: str, results_figure: str, message: str) -> None:
+    def __init__(
+        self, models_file: str, benchmark_file: str, results_figure: str, message: str
+    ) -> None:
         self.models_file = models_file
         self.benchmark_file = benchmark_file
         self.message = message
         self.results_figure = results_figure
 
-    def get_container_endpoints_from_json_file(self)-> None:
+    def get_container_endpoints_from_json_file(self) -> None:
         if self.models_file == "":
             raise Exception("file name is empty")
 
-        with open(self.models_file, 'r') as models_file:
+        with open(self.models_file, "r") as models_file:
             json_data = json.load(models_file)
 
         for family in self._model_families:
             self._endpoints[family] = []
             for model in json_data[family]:
-                self._endpoints[family].append({"model": model["file"], "endpoint": model["ctn_endpoint"]})
+                self._endpoints[family].append(
+                    {"model": model["file"], "endpoint": model["ctn_endpoint"]}
+                )
 
     def analyze_results(self) -> None:
         benchmark_results = pandas.read_csv(self.benchmark_file)
@@ -32,7 +41,7 @@ def analyze_results(self) -> None:
     def benchmark_models(self, num_samples: int) -> None:
         self.get_container_endpoints_from_json_file()
 
-        fields = ['Model', 'Family', 'Total Response Time', 'Response Message']
+        fields = ["Model", "Family", "Total Response Time", "Response Message"]
         benchmark_data = []
 
         for family in self._model_families:
@@ -42,35 +51,51 @@ def benchmark_models(self, num_samples: int) -> None:
 
                 for _ in range(num_samples):
                     try:
-                        print("Calling model {model} on endpoint {endpoint} with message {message}"
-                              .format(model=endpoint["model"], endpoint=endpoint["endpoint"], message=self.message)
+                        print(
+                            "Calling model {model} on endpoint {endpoint} with message {message}".format(
+                                model=endpoint["model"],
+                                endpoint=endpoint["endpoint"],
+                                message=self.message,
+                            )
                         )
 
-                        rsp = requests.post(endpoint["endpoint"], json={"message": self.message})
+                        rsp = requests.post(
+                            endpoint["endpoint"], json={"message": self.message}
+                        )
 
                         response_text = rsp.json()["choices"][0]["text"]
 
-                        print("The model {model} responded with: {response_text}"
-                              .format(model=endpoint["model"], response_text=response_text)
+                        print(
+                            "The model {model} responded with: {response_text}".format(
+                                model=endpoint["model"], response_text=response_text
+                            )
                         )
 
-                        benchmark_data.append([endpoint["model"], family, rsp.elapsed.total_seconds(), response_text])
+                        benchmark_data.append(
+                            [
+                                endpoint["model"],
+                                family,
+                                rsp.elapsed.total_seconds(),
+                                response_text,
+                            ]
+                        )
                     except:
                         pass
 
-        with open(self.benchmark_file, 'w') as results_file:
+        with open(self.benchmark_file, "w") as results_file:
             wrt = csv.writer(results_file)
             wrt.writerow(fields)
             wrt.writerows(benchmark_data)
 
         self.analyze_results()
 
+
 if __name__ == "__main__":
 
     benchmark = Benchmark(
-        models_file="hf-models.json",  
-        benchmark_file="benchmark-results.csv", 
-        results_figure="results-plot.png", 
+        models_file="hf-models.json",
+        benchmark_file="benchmark-results.csv",
+        results_figure="results-plot.png",
         message="What the difference between an elephant and an ant?",
     )
 
diff --git a/containers/hugging-face-inference/terraform/terraform.sh b/containers/hugging-face-inference/terraform/terraform.sh
index 1bf68af..5bbd07a 100755
--- a/containers/hugging-face-inference/terraform/terraform.sh
+++ b/containers/hugging-face-inference/terraform/terraform.sh
@@ -28,7 +28,7 @@ apply() {
 }
 
 # Destroy resources of each Terraform workspace
-destroy(){
+destroy() {
        for model_file_name in "${!hf_models[@]}";
        do
          terraform workspace select $model_file_name
@@ -38,7 +38,7 @@ destroy(){
        done
 }
 
-# Script actions per flag
+# Script actions
 while getopts "ad" option; do
   case $option in
     a)