From 86a6d570e4178466eec9e21fc59b78d3e00a4e99 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 29 Aug 2025 16:33:21 -0700
Subject: [PATCH 1/3] Updated the GPU shapes index file to include additional
 shapes and corresponding CPU parameters.

---
 ads/aqua/common/entities.py              | 114 +++++++++++++++--------
 ads/aqua/resources/gpu_shapes_index.json | 107 +++++++++++++++++++++
 2 files changed, 183 insertions(+), 38 deletions(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index f3251ebab..5973dd035 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -46,37 +46,77 @@ class Config:
         arbitrary_types_allowed = True
         protected_namespaces = ()
 
+
 class ComputeRank(Serializable):
     """
-    Represents the cost and performance ranking for a compute shape.
+    Represents the cost and performance rankings for a specific compute shape.
+    These rankings help compare different shapes based on their relative pricing
+    and computational capabilities.
     """
-    cost: int = Field(
-    None, description="The relative rank of the cost of the shape. Range is [10 (cost-effective), 100 (most-expensive)]"
+
+    cost: Optional[int] = Field(
+        None,
+        description=(
+            "Relative cost ranking of the compute shape. "
+            "Value ranges from 10 (most cost-effective) to 100 (most expensive). "
+            "Lower values indicate cheaper compute options."
+        ),
     )
 
-    performance: int = Field(
-    None, description="The relative rank of the performance of the shape. Range is [10 (lower performance), 110 (highest performance)]"
+    performance: Optional[int] = Field(
+        None,
+        description=(
+            "Relative performance ranking of the compute shape. "
+            "Value ranges from 10 (lowest performance) to 110 (highest performance). "
+            "Higher values indicate better compute performance."
+        ),
     )
 
+
 class GPUSpecs(Serializable):
     """
-    Represents the GPU specifications for a compute instance.
+    Represents the specifications and capabilities of a GPU-enabled compute shape.
+    Includes details about GPU and CPU resources, supported quantization formats, and
+    relative rankings for cost and performance.
     """
 
-    gpu_memory_in_gbs: Optional[int] = Field(
-        default=None, description="The amount of GPU memory available (in GB)."
-    )
     gpu_count: Optional[int] = Field(
-        default=None, description="The number of GPUs available."
+        default=None,
+        description="Number of physical GPUs available on the compute shape.",
     )
+
+    gpu_memory_in_gbs: Optional[int] = Field(
+        default=None, description="Total GPU memory available in gigabytes (GB)."
+    )
+
     gpu_type: Optional[str] = Field(
-        default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
+        default=None,
+        description="Type of GPU and architecture. Example: 'H100', 'GB200'.",
     )
+
     quantization: Optional[List[str]] = Field(
-        default_factory=list, description="The quantization format supported by shape. (ex.  bitsandbytes, fp8, etc.)"
+        default_factory=list,
+        description=(
+            "List of supported quantization formats for the GPU. "
+            "Examples: 'fp16', 'int8', 'bitsandbytes', 'bf16', 'fp4', etc."
+        ),
+    )
+
+    cpu_count: Optional[int] = Field(
+        default=None, description="Number of CPU cores available on the shape."
     )
+
+    cpu_memory_in_gbs: Optional[int] = Field(
+        default=None, description="Total CPU memory available in gigabytes (GB)."
+    )
+
     ranking: Optional[ComputeRank] = Field(
-        None, description="The relative rank of the cost and performance of the shape."
+        default=None,
+        description=(
+            "Relative cost and performance rankings of this shape. "
+            "Cost is ranked from 10 (least expensive) to 100+ (most expensive), "
+            "and performance from 10 (lowest) to 100+ (highest)."
+        ),
     )
 
 
@@ -97,50 +137,49 @@ class GPUShapesIndex(Serializable):
 
 class ComputeShapeSummary(Serializable):
     """
-    Represents the specifications of a compute instance shape,
-    including CPU, memory, and optional GPU characteristics.
+    Represents a compute shape's specification including CPU, memory, and (if applicable) GPU configuration.
     """
 
     available: Optional[bool] = Field(
-        default = False,
-        description="True if shape is available on user tenancy, "
+        default=False,
+        description="True if the shape is available in the user's tenancy/region.",
     )
+
     core_count: Optional[int] = Field(
-        default=None,
-        description="Total number of CPU cores available for the compute shape.",
+        default=None, description="Number of vCPUs available for the compute shape."
     )
+
     memory_in_gbs: Optional[int] = Field(
-        default=None,
-        description="Amount of memory (in GB) available for the compute shape.",
+        default=None, description="Total CPU memory available for the shape (in GB)."
     )
+
     name: Optional[str] = Field(
-        default=None,
-        description="Full name of the compute shape, e.g., 'VM.GPU.A10.2'.",
+        default=None, description="Name of the compute shape, e.g., 'VM.GPU.A10.2'."
     )
+
     shape_series: Optional[str] = Field(
         default=None,
-        description="Shape family or series, e.g., 'GPU', 'Standard', etc.",
+        description="Series or family of the shape, e.g., 'GPU', 'Standard'.",
     )
+
     gpu_specs: Optional[GPUSpecs] = Field(
-        default=None,
-        description="Optional GPU specifications associated with the shape.",
+        default=None, description="GPU configuration for the shape, if applicable."
     )
 
     @model_validator(mode="after")
     @classmethod
-    def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
+    def populate_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
         """
-        Validates and populates GPU specifications if the shape_series indicates a GPU-based shape.
-
-        - If the shape_series contains "GPU", the validator first checks if the shape name exists
-          in the GPU_SPECS dictionary. If found, it creates a GPUSpecs instance with the corresponding data.
-        - If the shape is not found in the GPU_SPECS, it attempts to extract the GPU count from the shape name
-          using a regex pattern (looking for a number following a dot at the end of the name).
+        Attempts to populate GPU specs if the shape is GPU-based and no GPU specs are explicitly set.
 
-        The information about shapes is taken from: https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm
+        Logic:
+        - If `shape_series` includes 'GPU' and `gpu_specs` is None:
+          - Tries to parse the shape name to extract GPU count (e.g., from 'VM.GPU.A10.2').
+          - Fallback is based on suffix numeric group (e.g., '.2' → gpu_count=2).
+        - If extraction fails, logs debug-level error but does not raise.
 
         Returns:
-            ComputeShapeSummary: The updated instance with gpu_specs populated if applicable.
+            ComputeShapeSummary: The updated model instance.
         """
         try:
             if (
@@ -149,16 +188,15 @@ def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
                 and model.name
                 and not model.gpu_specs
             ):
-                # Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2)
                 match = re.search(r"\.(\d+)$", model.name)
                 if match:
                     gpu_count = int(match.group(1))
                     model.gpu_specs = GPUSpecs(gpu_count=gpu_count)
         except Exception as err:
             logger.debug(
-                f"Error occurred in attempt to extract GPU specification for the f{model.name}. "
-                f"Details: {err}"
+                f"[populate_gpu_specs] Failed to auto-populate GPU specs for shape '{model.name}': {err}"
             )
+
         return model
 
 
diff --git a/ads/aqua/resources/gpu_shapes_index.json b/ads/aqua/resources/gpu_shapes_index.json
index ca470138e..6b103f0f2 100644
--- a/ads/aqua/resources/gpu_shapes_index.json
+++ b/ads/aqua/resources/gpu_shapes_index.json
@@ -1,6 +1,85 @@
 {
+  "BM.GPU.B200.8": {
+    "cpu_count": 128,
+    "cpu_memory_in_gbs": 4096,
+    "gpu_count": 8,
+    "gpu_memory_in_gbs": 1440,
+    "gpu_type": "B200",
+    "quantization": [
+      "fp4",
+      "fp8",
+      "fp16",
+      "bf16",
+      "tf32",
+      "int8",
+      "fp64"
+    ],
+    "ranking": {
+      "cost": 120,
+      "performance": 130
+    }
+  },
+  "BM.GPU.GB200.4": {
+    "cpu_count": 144,
+    "cpu_memory_in_gbs": 1024,
+    "gpu_count": 4,
+    "gpu_memory_in_gbs": 768,
+    "gpu_type": "GB200",
+    "quantization": [
+      "fp4",
+      "fp8",
+      "fp6",
+      "int8",
+      "fp16",
+      "bf16",
+      "tf32",
+      "fp64"
+    ],
+    "ranking": {
+      "cost": 110,
+      "performance": 120
+    }
+  },
+  "BM.GPU4.8": {
+    "cpu_count": 64,
+    "cpu_memory_in_gbs": 2048,
+    "gpu_count": 8,
+    "gpu_memory_in_gbs": 320,
+    "gpu_type": "A100",
+    "quantization": [
+      "int8",
+      "fp16",
+      "bf16",
+      "tf32"
+    ],
+    "ranking": {
+      "cost": 57,
+      "performance": 65
+    }
+  },
+  "VM.GPU3.8": {
+    "cpu_count": 24,
+    "cpu_memory_in_gbs": 768,
+    "gpu_count": 8,
+    "gpu_memory_in_gbs": 128,
+    "gpu_type": "V100",
+    "quantization": [
+      "gptq",
+      "bitblas",
+      "aqlm",
+      "bitsandbytes",
+      "deepspeedfp",
+      "gguf"
+    ],
+    "ranking": {
+      "cost": 56,
+      "performance": 46
+    }
+  },
   "shapes": {
     "BM.GPU.A10.4": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 96,
       "gpu_type": "A10",
@@ -21,6 +100,8 @@
       }
     },
     "BM.GPU.A100-V2.8": {
+      "cpu_count": 128,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
       "gpu_type": "A100",
@@ -41,6 +122,8 @@
       }
     },
     "BM.GPU.B4.8": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 320,
       "gpu_type": "A100",
@@ -61,6 +144,8 @@
       }
     },
     "BM.GPU.H100.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
       "gpu_type": "H100",
@@ -82,6 +167,8 @@
       }
     },
     "BM.GPU.H200.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 3072,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 1128,
       "gpu_type": "H200",
@@ -103,6 +190,8 @@
       }
     },
     "BM.GPU.L40S-NC.4": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
       "gpu_type": "L40S",
@@ -124,6 +213,8 @@
       }
     },
     "BM.GPU.L40S.4": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 1024,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
       "gpu_type": "L40S",
@@ -145,6 +236,8 @@
       }
     },
     "BM.GPU.MI300X.8": {
+      "cpu_count": 112,
+      "cpu_memory_in_gbs": 2048,
       "gpu_count": 8,
       "gpu_memory_in_gbs": 1536,
       "gpu_type": "MI300X",
@@ -158,6 +251,8 @@
       }
     },
     "BM.GPU2.2": {
+      "cpu_count": 28,
+      "cpu_memory_in_gbs": 192,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 32,
       "gpu_type": "P100",
@@ -170,6 +265,8 @@
       }
     },
     "VM.GPU.A10.1": {
+      "cpu_count": 15,
+      "cpu_memory_in_gbs": 240,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 24,
       "gpu_type": "A10",
@@ -190,6 +287,8 @@
       }
     },
     "VM.GPU.A10.2": {
+      "cpu_count": 30,
+      "cpu_memory_in_gbs": 480,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 48,
       "gpu_type": "A10",
@@ -210,6 +309,8 @@
       }
     },
     "VM.GPU2.1": {
+      "cpu_count": 12,
+      "cpu_memory_in_gbs": 72,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
       "gpu_type": "P100",
@@ -222,6 +323,8 @@
       }
     },
     "VM.GPU3.1": {
+      "cpu_count": 6,
+      "cpu_memory_in_gbs": 90,
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
       "gpu_type": "V100",
@@ -239,6 +342,8 @@
       }
     },
     "VM.GPU3.2": {
+      "cpu_count": 12,
+      "cpu_memory_in_gbs": 180,
       "gpu_count": 2,
       "gpu_memory_in_gbs": 32,
       "gpu_type": "V100",
@@ -256,6 +361,8 @@
       }
     },
     "VM.GPU3.4": {
+      "cpu_count": 24,
+      "cpu_memory_in_gbs": 360,
       "gpu_count": 4,
       "gpu_memory_in_gbs": 64,
       "gpu_type": "V100",

From b5b5434ad25167ff1bc0cb618ecd80384e72e49c Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 29 Aug 2025 16:44:45 -0700
Subject: [PATCH 2/3] Fixes the structure of the gpu_shapes_index.json

---
 ads/aqua/resources/gpu_shapes_index.json | 154 +++++++++++------------
 1 file changed, 77 insertions(+), 77 deletions(-)

diff --git a/ads/aqua/resources/gpu_shapes_index.json b/ads/aqua/resources/gpu_shapes_index.json
index 6b103f0f2..f4765ede6 100644
--- a/ads/aqua/resources/gpu_shapes_index.json
+++ b/ads/aqua/resources/gpu_shapes_index.json
@@ -1,81 +1,4 @@
 {
-  "BM.GPU.B200.8": {
-    "cpu_count": 128,
-    "cpu_memory_in_gbs": 4096,
-    "gpu_count": 8,
-    "gpu_memory_in_gbs": 1440,
-    "gpu_type": "B200",
-    "quantization": [
-      "fp4",
-      "fp8",
-      "fp16",
-      "bf16",
-      "tf32",
-      "int8",
-      "fp64"
-    ],
-    "ranking": {
-      "cost": 120,
-      "performance": 130
-    }
-  },
-  "BM.GPU.GB200.4": {
-    "cpu_count": 144,
-    "cpu_memory_in_gbs": 1024,
-    "gpu_count": 4,
-    "gpu_memory_in_gbs": 768,
-    "gpu_type": "GB200",
-    "quantization": [
-      "fp4",
-      "fp8",
-      "fp6",
-      "int8",
-      "fp16",
-      "bf16",
-      "tf32",
-      "fp64"
-    ],
-    "ranking": {
-      "cost": 110,
-      "performance": 120
-    }
-  },
-  "BM.GPU4.8": {
-    "cpu_count": 64,
-    "cpu_memory_in_gbs": 2048,
-    "gpu_count": 8,
-    "gpu_memory_in_gbs": 320,
-    "gpu_type": "A100",
-    "quantization": [
-      "int8",
-      "fp16",
-      "bf16",
-      "tf32"
-    ],
-    "ranking": {
-      "cost": 57,
-      "performance": 65
-    }
-  },
-  "VM.GPU3.8": {
-    "cpu_count": 24,
-    "cpu_memory_in_gbs": 768,
-    "gpu_count": 8,
-    "gpu_memory_in_gbs": 128,
-    "gpu_type": "V100",
-    "quantization": [
-      "gptq",
-      "bitblas",
-      "aqlm",
-      "bitsandbytes",
-      "deepspeedfp",
-      "gguf"
-    ],
-    "ranking": {
-      "cost": 56,
-      "performance": 46
-    }
-  },
   "shapes": {
     "BM.GPU.A10.4": {
       "cpu_count": 64,
@@ -121,6 +44,26 @@
         "performance": 70
       }
     },
+    "BM.GPU.B200.8": {
+      "cpu_count": 128,
+      "cpu_memory_in_gbs": 4096,
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 1440,
+      "gpu_type": "B200",
+      "quantization": [
+        "fp4",
+        "fp8",
+        "fp16",
+        "bf16",
+        "tf32",
+        "int8",
+        "fp64"
+      ],
+      "ranking": {
+        "cost": 120,
+        "performance": 130
+      }
+    },
     "BM.GPU.B4.8": {
       "cpu_count": 64,
       "cpu_memory_in_gbs": 2048,
@@ -143,6 +86,27 @@
         "performance": 60
       }
     },
+    "BM.GPU.GB200.4": {
+      "cpu_count": 144,
+      "cpu_memory_in_gbs": 1024,
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 768,
+      "gpu_type": "GB200",
+      "quantization": [
+        "fp4",
+        "fp8",
+        "fp6",
+        "int8",
+        "fp16",
+        "bf16",
+        "tf32",
+        "fp64"
+      ],
+      "ranking": {
+        "cost": 110,
+        "performance": 120
+      }
+    },
     "BM.GPU.H100.8": {
       "cpu_count": 112,
       "cpu_memory_in_gbs": 2048,
@@ -264,6 +228,23 @@
         "performance": 20
       }
     },
+    "BM.GPU4.8": {
+      "cpu_count": 64,
+      "cpu_memory_in_gbs": 2048,
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 320,
+      "gpu_type": "A100",
+      "quantization": [
+        "int8",
+        "fp16",
+        "bf16",
+        "tf32"
+      ],
+      "ranking": {
+        "cost": 57,
+        "performance": 65
+      }
+    },
     "VM.GPU.A10.1": {
       "cpu_count": 15,
       "cpu_memory_in_gbs": 240,
@@ -378,6 +359,25 @@
         "cost": 55,
         "performance": 45
       }
+    },
+    "VM.GPU3.8": {
+      "cpu_count": 24,
+      "cpu_memory_in_gbs": 768,
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 128,
+      "gpu_type": "V100",
+      "quantization": [
+        "gptq",
+        "bitblas",
+        "aqlm",
+        "bitsandbytes",
+        "deepspeedfp",
+        "gguf"
+      ],
+      "ranking": {
+        "cost": 56,
+        "performance": 46
+      }
     }
   }
 }

From 9223346f6d69e35627b3b88a52f14b58e6eff6a0 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Tue, 2 Sep 2025 20:08:35 -0700
Subject: [PATCH 3/3] Fixes unit tests

---
 .../result-Devstral-Small-2507-GQA.json       | 144 ++++++++++
 .../result-Kimi-K2-Instruct-MOE.json          | 126 ++++++---
 ...ult-Qwen3-235B-A22B-Instruct-2507-FP8.json | 259 ++++++++++++------
 .../with_extras/aqua/test_recommend.py        |  12 +-
 4 files changed, 415 insertions(+), 126 deletions(-)

diff --git a/tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json b/tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json
index 36301b780..87fe896c9 100644
--- a/tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json
+++ b/tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json
@@ -21,6 +21,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 64,
+          "cpu_memory_in_gbs": 1024,
           "gpu_count": 4,
           "gpu_memory_in_gbs": 96,
           "gpu_type": "A10",
@@ -45,6 +47,95 @@
         "shape_series": "GPU"
       }
     },
+    {
+      "configurations": [
+        {
+          "deployment_params": {
+            "max_model_len": 131072,
+            "params": "",
+            "quantization": "bfloat16"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 21.47,
+            "model_size_gb": 47.98,
+            "total_model_gb": 69.46
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1440.0GB allowed)."
+        }
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 128,
+          "cpu_memory_in_gbs": 4096,
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 1440,
+          "gpu_type": "B200",
+          "quantization": [
+            "fp4",
+            "fp8",
+            "fp16",
+            "bf16",
+            "tf32",
+            "int8",
+            "fp64"
+          ],
+          "ranking": {
+            "cost": 120,
+            "performance": 130
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.B200.8",
+        "shape_series": "GPU"
+      }
+    },
+    {
+      "configurations": [
+        {
+          "deployment_params": {
+            "max_model_len": 131072,
+            "params": "",
+            "quantization": "bfloat16"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 21.47,
+            "model_size_gb": 47.98,
+            "total_model_gb": 69.46
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 768.0GB allowed)."
+        }
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 144,
+          "cpu_memory_in_gbs": 1024,
+          "gpu_count": 4,
+          "gpu_memory_in_gbs": 768,
+          "gpu_type": "GB200",
+          "quantization": [
+            "fp4",
+            "fp8",
+            "fp6",
+            "int8",
+            "fp16",
+            "bf16",
+            "tf32",
+            "fp64"
+          ],
+          "ranking": {
+            "cost": 110,
+            "performance": 120
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.GB200.4",
+        "shape_series": "GPU"
+      }
+    },
     {
       "configurations": [
         {
@@ -65,6 +156,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 3072,
           "gpu_count": 8,
           "gpu_memory_in_gbs": 1128,
           "gpu_type": "H200",
@@ -110,6 +203,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 1024,
           "gpu_count": 4,
           "gpu_memory_in_gbs": 192,
           "gpu_type": "L40S",
@@ -155,6 +250,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 1024,
           "gpu_count": 4,
           "gpu_memory_in_gbs": 192,
           "gpu_type": "L40S",
@@ -200,6 +297,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 2048,
           "gpu_count": 8,
           "gpu_memory_in_gbs": 1536,
           "gpu_type": "MI300X",
@@ -217,6 +316,47 @@
         "shape_series": "GPU"
       }
     },
+    {
+      "configurations": [
+        {
+          "deployment_params": {
+            "max_model_len": 131072,
+            "params": "",
+            "quantization": "bfloat16"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 21.47,
+            "model_size_gb": 47.98,
+            "total_model_gb": 69.46
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 320.0GB allowed)."
+        }
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 64,
+          "cpu_memory_in_gbs": 2048,
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 320,
+          "gpu_type": "A100",
+          "quantization": [
+            "int8",
+            "fp16",
+            "bf16",
+            "tf32"
+          ],
+          "ranking": {
+            "cost": 57,
+            "performance": 65
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU4.8",
+        "shape_series": "GPU"
+      }
+    },
     {
       "configurations": [
         {
@@ -237,6 +377,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 15,
+          "cpu_memory_in_gbs": 240,
           "gpu_count": 1,
           "gpu_memory_in_gbs": 24,
           "gpu_type": "A10",
@@ -281,6 +423,8 @@
         "available": false,
         "core_count": null,
         "gpu_specs": {
+          "cpu_count": 30,
+          "cpu_memory_in_gbs": 480,
           "gpu_count": 2,
           "gpu_memory_in_gbs": 48,
           "gpu_type": "A10",
diff --git a/tests/unitary/with_extras/aqua/test_data/recommend/result-Kimi-K2-Instruct-MOE.json b/tests/unitary/with_extras/aqua/test_data/recommend/result-Kimi-K2-Instruct-MOE.json
index 7a4966574..c5b7c5a46 100644
--- a/tests/unitary/with_extras/aqua/test_data/recommend/result-Kimi-K2-Instruct-MOE.json
+++ b/tests/unitary/with_extras/aqua/test_data/recommend/result-Kimi-K2-Instruct-MOE.json
@@ -1,43 +1,89 @@
 {
-    "display_name": "Kimi-K2-Instruct-MOE",
-    "recommendations": [
+  "display_name": "Kimi-K2-Instruct-MOE",
+  "recommendations": [
+    {
+      "configurations": [
         {
-            "shape_details": {
-                "available": false,
-                "core_count": null,
-                "memory_in_gbs": null,
-                "name": "BM.GPU.MI300X.8",
-                "shape_series": "GPU",
-                "gpu_specs": {
-                    "gpu_memory_in_gbs": 1536,
-                    "gpu_count": 8,
-                    "gpu_type": "MI300X",
-                    "quantization": [
-                        "fp8",
-                        "gguf"
-                    ],
-                    "ranking": {
-                        "cost": 90,
-                        "performance": 90
-                    }
-                }
-            },
-            "configurations": [
-                {
-                    "model_details": {
-                        "model_size_gb": 1046.48,
-                        "kv_cache_size_gb": 3.58,
-                        "total_model_gb": 1050.06
-                    },
-                    "deployment_params": {
-                        "quantization": "fp8",
-                        "max_model_len": 2048,
-                        "params": "--max-model-len 2048"
-                    },
-                    "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1536.0GB allowed)."
-                }
-            ]
+          "deployment_params": {
+            "max_model_len": 2048,
+            "params": "--max-model-len 2048",
+            "quantization": "fp8"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 3.58,
+            "model_size_gb": 1046.48,
+            "total_model_gb": 1050.06
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1440.0GB allowed)."
         }
-    ],
-    "troubleshoot": ""
-}
\ No newline at end of file
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 128,
+          "cpu_memory_in_gbs": 4096,
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 1440,
+          "gpu_type": "B200",
+          "quantization": [
+            "fp4",
+            "fp8",
+            "fp16",
+            "bf16",
+            "tf32",
+            "int8",
+            "fp64"
+          ],
+          "ranking": {
+            "cost": 120,
+            "performance": 130
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.B200.8",
+        "shape_series": "GPU"
+      }
+    },
+    {
+      "configurations": [
+        {
+          "deployment_params": {
+            "max_model_len": 2048,
+            "params": "--max-model-len 2048",
+            "quantization": "fp8"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 3.58,
+            "model_size_gb": 1046.48,
+            "total_model_gb": 1050.06
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1536.0GB allowed)."
+        }
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 2048,
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 1536,
+          "gpu_type": "MI300X",
+          "quantization": [
+            "fp8",
+            "gguf"
+          ],
+          "ranking": {
+            "cost": 90,
+            "performance": 90
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.MI300X.8",
+        "shape_series": "GPU"
+      }
+    }
+  ],
+  "troubleshoot": ""
+}
diff --git a/tests/unitary/with_extras/aqua/test_data/recommend/result-Qwen3-235B-A22B-Instruct-2507-FP8.json b/tests/unitary/with_extras/aqua/test_data/recommend/result-Qwen3-235B-A22B-Instruct-2507-FP8.json
index b75fb09cc..dfb7ec7c2 100644
--- a/tests/unitary/with_extras/aqua/test_data/recommend/result-Qwen3-235B-A22B-Instruct-2507-FP8.json
+++ b/tests/unitary/with_extras/aqua/test_data/recommend/result-Qwen3-235B-A22B-Instruct-2507-FP8.json
@@ -1,88 +1,181 @@
 {
-    "display_name": "Qwen3-235B-A22B-Instruct-2507-FP8",
-    "recommendations": [
+  "display_name": "Qwen3-235B-A22B-Instruct-2507-FP8",
+  "recommendations": [
+    {
+      "configurations": [
         {
-            "shape_details": {
-                "available": false,
-                "core_count": null,
-                "memory_in_gbs": null,
-                "name": "BM.GPU.H200.8",
-                "shape_series": "GPU",
-                "gpu_specs": {
-                    "gpu_memory_in_gbs": 1128,
-                    "gpu_count": 8,
-                    "gpu_type": "H200",
-                    "quantization": [
-                        "awq",
-                        "gptq",
-                        "marlin",
-                        "fp8",
-                        "int8",
-                        "bitblas",
-                        "aqlm",
-                        "bitsandbytes",
-                        "deepspeedfp",
-                        "gguf"
-                    ],
-                    "ranking": {
-                        "cost": 100,
-                        "performance": 110
-                    }
-                }
-            },
-            "configurations": [
-                {
-                    "model_details": {
-                        "model_size_gb": 231.89,
-                        "kv_cache_size_gb": 0.39,
-                        "total_model_gb": 232.28
-                    },
-                    "deployment_params": {
-                        "quantization": "fp8",
-                        "max_model_len": 2048,
-                        "params": "--max-model-len 2048"
-                    },
-                    "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 1128.0GB allowed)."
-                }
-            ]
+          "deployment_params": {
+            "max_model_len": 2048,
+            "params": "--max-model-len 2048",
+            "quantization": "fp8"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 0.39,
+            "model_size_gb": 231.89,
+            "total_model_gb": 232.28
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 1440.0GB allowed)."
+        }
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 128,
+          "cpu_memory_in_gbs": 4096,
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 1440,
+          "gpu_type": "B200",
+          "quantization": [
+            "fp4",
+            "fp8",
+            "fp16",
+            "bf16",
+            "tf32",
+            "int8",
+            "fp64"
+          ],
+          "ranking": {
+            "cost": 120,
+            "performance": 130
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.B200.8",
+        "shape_series": "GPU"
+      }
+    },
+    {
+      "configurations": [
+        {
+          "deployment_params": {
+            "max_model_len": 2048,
+            "params": "--max-model-len 2048",
+            "quantization": "fp8"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 0.39,
+            "model_size_gb": 231.89,
+            "total_model_gb": 232.28
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 768.0GB allowed)."
+        }
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 144,
+          "cpu_memory_in_gbs": 1024,
+          "gpu_count": 4,
+          "gpu_memory_in_gbs": 768,
+          "gpu_type": "GB200",
+          "quantization": [
+            "fp4",
+            "fp8",
+            "fp6",
+            "int8",
+            "fp16",
+            "bf16",
+            "tf32",
+            "fp64"
+          ],
+          "ranking": {
+            "cost": 110,
+            "performance": 120
+          }
         },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.GB200.4",
+        "shape_series": "GPU"
+      }
+    },
+    {
+      "configurations": [
         {
-            "shape_details": {
-                "available": false,
-                "core_count": null,
-                "memory_in_gbs": null,
-                "name": "BM.GPU.MI300X.8",
-                "shape_series": "GPU",
-                "gpu_specs": {
-                    "gpu_memory_in_gbs": 1536,
-                    "gpu_count": 8,
-                    "gpu_type": "MI300X",
-                    "quantization": [
-                        "fp8",
-                        "gguf"
-                    ],
-                    "ranking": {
-                        "cost": 90,
-                        "performance": 90
-                    }
-                }
-            },
-            "configurations": [
-                {
-                    "model_details": {
-                        "model_size_gb": 231.89,
-                        "kv_cache_size_gb": 0.39,
-                        "total_model_gb": 232.28
-                    },
-                    "deployment_params": {
-                        "quantization": "fp8",
-                        "max_model_len": 2048,
-                        "params": "--max-model-len 2048"
-                    },
-                    "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 1536.0GB allowed)."
-                }
-            ]
+          "deployment_params": {
+            "max_model_len": 2048,
+            "params": "--max-model-len 2048",
+            "quantization": "fp8"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 0.39,
+            "model_size_gb": 231.89,
+            "total_model_gb": 232.28
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 1128.0GB allowed)."
         }
-    ],
-    "troubleshoot": ""
-}
\ No newline at end of file
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 3072,
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 1128,
+          "gpu_type": "H200",
+          "quantization": [
+            "awq",
+            "gptq",
+            "marlin",
+            "fp8",
+            "int8",
+            "bitblas",
+            "aqlm",
+            "bitsandbytes",
+            "deepspeedfp",
+            "gguf"
+          ],
+          "ranking": {
+            "cost": 100,
+            "performance": 110
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.H200.8",
+        "shape_series": "GPU"
+      }
+    },
+    {
+      "configurations": [
+        {
+          "deployment_params": {
+            "max_model_len": 2048,
+            "params": "--max-model-len 2048",
+            "quantization": "fp8"
+          },
+          "model_details": {
+            "kv_cache_size_gb": 0.39,
+            "model_size_gb": 231.89,
+            "total_model_gb": 232.28
+          },
+          "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 1536.0GB allowed)."
+        }
+      ],
+      "shape_details": {
+        "available": false,
+        "core_count": null,
+        "gpu_specs": {
+          "cpu_count": 112,
+          "cpu_memory_in_gbs": 2048,
+          "gpu_count": 8,
+          "gpu_memory_in_gbs": 1536,
+          "gpu_type": "MI300X",
+          "quantization": [
+            "fp8",
+            "gguf"
+          ],
+          "ranking": {
+            "cost": 90,
+            "performance": 90
+          }
+        },
+        "memory_in_gbs": null,
+        "name": "BM.GPU.MI300X.8",
+        "shape_series": "GPU"
+      }
+    }
+  ],
+  "troubleshoot": ""
+}
diff --git a/tests/unitary/with_extras/aqua/test_recommend.py b/tests/unitary/with_extras/aqua/test_recommend.py
index bfe1b4a54..cb61dae86 100644
--- a/tests/unitary/with_extras/aqua/test_recommend.py
+++ b/tests/unitary/with_extras/aqua/test_recommend.py
@@ -31,7 +31,9 @@
 )
 from ads.model.model_metadata import ModelCustomMetadata, ModelProvenanceMetadata
 
-CONFIG_ROOT = os.path.join(os.path.dirname(__file__), "test_data/recommend/")
+CONFIG_ROOT = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "test_data/recommend/"
+)
 
 
 def load_config(filename):
@@ -232,8 +234,12 @@ def test_suggested_quantizations_from_file(
 # --- Tests for recommend.py ---
 class GPUShapesIndexMock:
     def __init__(self):
-        # local_path = os.path.join(os.path.dirname(__file__), "../../resources", "gpu_shapes_index.json")
-        local_path = "ads/aqua/resources/gpu_shapes_index.json"
+        local_path = os.path.join(
+            os.path.dirname(os.path.abspath(__file__)),
+            "../../../../ads/aqua/resources",
+            "gpu_shapes_index.json",
+        )
+        # local_path = "ads/aqua/resources/gpu_shapes_index.json"
         with open(local_path) as f:
             local_data = json.load(f)