oracle
diff --git a/‎ads/aqua/cli.py
Lines changed: 9 additions & 5 deletions b/‎ads/aqua/cli.py
Lines changed: 9 additions & 5 deletions
diff --git a/‎ads/aqua/common/entities.py
Lines changed: 17 additions & 0 deletions b/‎ads/aqua/common/entities.py
Lines changed: 17 additions & 0 deletions
diff --git a/‎ads/aqua/extension/__init__.py
Lines changed: 2 additions & 0 deletions b/‎ads/aqua/extension/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎ads/aqua/extension/recommend_handler.py
Lines changed: 50 additions & 0 deletions b/‎ads/aqua/extension/recommend_handler.py
Lines changed: 50 additions & 0 deletions
diff --git a/‎ads/aqua/resources/shapes.json
Lines changed: 124 additions & 0 deletions b/‎ads/aqua/resources/shapes.json
Lines changed: 124 additions & 0 deletions
diff --git a/‎ads/aqua/shaperecommend/__init__.py
Lines changed: 6 additions & 0 deletions b/‎ads/aqua/shaperecommend/__init__.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎ads/aqua/shaperecommend/constants.py
Lines changed: 55 additions & 0 deletions b/‎ads/aqua/shaperecommend/constants.py
Lines changed: 55 additions & 0 deletions
@@ -15,6 +15,7 @@
 from ads.aqua.model import AquaModelApp
 from ads.aqua.modeldeployment import AquaDeploymentApp
 from ads.aqua.verify_policies import AquaVerifyPoliciesApp
+from ads.aqua.shaperecommend.recommend import AquaRecommendApp
 from ads.common.utils import LOG_LEVELS
 
 
@@ -31,6 +32,7 @@ class AquaCommand:
     deployment = AquaDeploymentApp
     evaluation = AquaEvaluationApp
     verify_policies = AquaVerifyPoliciesApp
+    recommend = AquaRecommendApp
 
     def __init__(
         self,
@@ -96,18 +98,20 @@ def _validate_value(flag, value):
                 "If you intend to chain a function call to the result, please separate the "
                 "flag and the subsequent function call with separator `-`."
             )
-    
+
     @staticmethod
     def install():
         """Install ADS Aqua Extension from wheel file. Set enviroment variable `AQUA_EXTENSTION_PATH` to change the wheel file path.
 
-        Return 
+        Return
         ------
         int:
             Installatation status.
         """
         import subprocess
 
-        wheel_file_path = os.environ.get("AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl")
-        status =  subprocess.run(f"pip install {wheel_file_path}",shell=True)
-        return status.check_returncode
+        wheel_file_path = os.environ.get(
+            "AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl"
+        )
+        status = subprocess.run(f"pip install {wheel_file_path}", shell=True)
+        return status.check_returncode
@@ -46,6 +46,17 @@ class Config:
         arbitrary_types_allowed = True
         protected_namespaces = ()
 
+class ComputeRank(Serializable):
+    """
+    Represents the cost and performance ranking for a compute shape.
+    """
+    cost: int = Field(
+    None, description="The relative rank of the cost of the shape. Range is [10 (cost-effective), 100 (most-expensive)]"
+    )
+
+    performance: int = Field(
+    None, description="The relative rank of the performance of the shape. Range is [10 (lower performance), 110 (highest performance)]"
+    )
 
 class GPUSpecs(Serializable):
     """
@@ -61,6 +72,12 @@ class GPUSpecs(Serializable):
     gpu_type: Optional[str] = Field(
         default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
     )
+    quantization: Optional[List[str]] = Field(
+        default_factory=list, description="The quantization format supported by shape. (ex.  bitsandbytes, fp8, etc.)"
+    )
+    ranking: Optional[ComputeRank] = Field(
+        None, description="The relative rank of the cost and performance of the shape."
+    )
 
 
 class GPUShapesIndex(Serializable):
 
@@ -12,6 +12,7 @@
 )
 from ads.aqua.extension.evaluation_handler import __handlers__ as __eval_handlers__
 from ads.aqua.extension.finetune_handler import __handlers__ as __finetune_handlers__
+from ads.aqua.extension.gpu_recommend_handler import __handlers__ as __gpu_handlers__
 from ads.aqua.extension.model_handler import __handlers__ as __model_handlers__
 from ads.aqua.extension.ui_handler import __handlers__ as __ui_handlers__
 from ads.aqua.extension.ui_websocket_handler import __handlers__ as __ws_handlers__
@@ -24,6 +25,7 @@
     + __ui_handlers__
     + __eval_handlers__
     + __ws_handlers__
+    + __gpu_handlers__
 )
 
 
 
@@ -0,0 +1,50 @@
+
+from tornado.web import HTTPError
+
+from ads.aqua.common.decorator import handle_exceptions
+from ads.aqua.extension.base_handler import AquaAPIhandler
+from ads.aqua.extension.errors import Errors
+from ads.aqua.shaperecommend.recommend import AquaRecommendApp
+from ads.config import COMPARTMENT_OCID
+
+
+class AquaRecommendHandler(AquaAPIhandler):
+    """
+    Handler for Aqua GPU Recommendation REST APIs.
+
+    Methods
+    -------
+    get(self, id: Union[str, List[str]])
+        Retrieves a list of AQUA deployments or model info or logs by ID.
+    post(self, *args, **kwargs)
+        Obtains the eligible compute shapes that would fit the specifed model, context length, model weights, and quantization level.
+
+    Raises
+    ------
+    HTTPError: For various failure scenarios such as invalid input format, missing data, etc.
+    """
+
+    @handle_exceptions
+    def post(self, *args, **kwargs):  # noqa: ARG002
+        """
+        Lists the eligible GPU compute shapes for the specifed model.
+
+        Returns
+        -------
+        List[ComputeShapeSummary]:
+            The list of the model deployment shapes.
+        """
+        try:
+            input_data = self.get_json_body()
+            # input_data["compartment_id"] = self.get_argument("compartment_id", default=COMPARTMENT_OCID)
+        except Exception as ex:
+            raise HTTPError(400, Errors.INVALID_INPUT_DATA_FORMAT) from ex
+
+        if not input_data:
+            raise HTTPError(400, Errors.NO_INPUT_DATA)
+
+        self.finish(AquaRecommendApp().which_gpu(**input_data))
+
+__handlers__ = [
+    ("gpu-shape-recommendation/?([^/]*)", AquaRecommendHandler),
+]
@@ -0,0 +1,124 @@
+{
+  "shapes": {
+    "BM.GPU.H200.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 1128,
+      "gpu_type": "H200",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+          "cost": 100,
+          "performance": 110
+      }
+    },
+    "BM.GPU.H100.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 640,
+      "gpu_type": "H100",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 100,
+        "performance": 100
+      }
+    },
+    "BM.GPU.MI300X.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 1536,
+      "gpu_type": "MI300X",
+      "quantization": ["fp8", "gguf"],
+      "ranking": {
+        "cost": 90,
+        "performance": 90
+      }
+    },
+    "BM.GPU.A100-V2.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 640,
+      "gpu_type": "A100",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 80,
+        "performance": 70
+      }
+    },
+    "BM.GPU.B4.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 320,
+      "gpu_type": "A100",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 70,
+        "performance": 60
+      }
+    },
+    "BM.GPU.L40S-NC.4": {
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 192,
+      "gpu_type": "L40S",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
+    },
+    "BM.GPU.L40S.4": {
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 192,
+      "gpu_type": "L40S",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
+    },
+    "VM.GPU.A10.1": {
+      "gpu_count": 1,
+      "gpu_memory_in_gbs": 24,
+      "gpu_type": "A10",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking" : {
+        "cost": 20,
+        "performance": 30
+      }
+    },
+    "VM.GPU.A10.2": {
+      "gpu_count": 2,
+      "gpu_memory_in_gbs": 48,
+      "gpu_type": "A10",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking" : {
+        "cost": 40,
+        "performance": 40
+      }
+    },
+    "BM.GPU.A10.4": {
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 96,
+      "gpu_type": "A10",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking" : {
+        "cost": 50,
+        "performance": 50
+      }
+    },
+    "BM.GPU2.2": {
+      "gpu_count": 2,
+      "gpu_memory_in_gbs": 32,
+      "gpu_type": "P100",
+      "quantization": ["fp16"],
+      "ranking": {
+        "cost": 30,
+        "performance": 20
+      }
+    },
+    "VM.GPU2.1": {
+      "gpu_count": 1,
+      "gpu_memory_in_gbs": 16,
+      "gpu_type": "P100",
+      "quantization": ["fp16"],
+      "ranking": {
+        "cost": 10,
+        "performance": 10
+      }
+    }
+  }
+}
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+from ads.aqua.shaperecommend.recommend import AquaRecommendApp
+
+__all__ = ["AquaRecommendApp"]
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+"""
+aqua.shaperecommend.constants
+~~~~~~~~~~~~~~
+
+This module contains constants used in Aqua GPU Recommendation for Models.
+
+LLAMA_REQUIRED_FIELDS refer to fields necessary for calculating model memory for GQA Architecture Models
+
+MOE_REQUIRED_FIELDS refer to fields necessary for Mixture of Experts (MoE) Architecture Models
+
+NEXT_QUANT suggests the next quantization level based on the current quantization (if applied) or the model weights (if no quantization yet)
+"""
+LLAMA_REQUIRED_FIELDS = [
+    "num_hidden_layers", "hidden_size", "num_attention_heads",
+    "num_key_value_heads", "head_dim", "intermediate_size", "vocab_size"
+]
+
+MOE_REQUIRED_FIELDS = LLAMA_REQUIRED_FIELDS + [
+    "num_local_experts", "intermediate_size"
+]
+
+NEXT_QUANT = {
+    "float32": ["4bit", "8bit"], # bits and bytes does not support bfloat16, pytorch responsibility
+    "bfloat16": ["4bit", "8bit"],
+    "float16": ["4bit", "8bit"],
+    "int8": ["4bit"],
+    "fp8":  ["4bit", "8bit"],
+    "8bit": ["4bit"],
+    "int4": ["No smaller quantization available"],
+    "4bit": ["No smaller quantization available"]
+}
+
+#TODO:
+SHAPES_METADATA = "/Users/elizjo/tmp/accelerated-data-science/ads/aqua/resources/shapes.json"
+
+TEXT_MODEL = "text-generation"
+
+QUANT_MAPPING = {
+            "float32": 4,
+            "bfloat16": 2,
+            "float16": 2,
+            "fp16": 2,
+            "half": 2,
+            "int8": 1,
+            "fp8": 1,
+            "8bit": 1,
+            "4bit": 0.5,
+            "int4": 0.5,
+        }
+
+
Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,7 @@`
`12`	`12`	`)`
`13`	`13`	`from ads.aqua.extension.evaluation_handler import __handlers__ as __eval_handlers__`
`14`	`14`	`from ads.aqua.extension.finetune_handler import __handlers__ as __finetune_handlers__`
	`15`	`+from ads.aqua.extension.gpu_recommend_handler import __handlers__ as __gpu_handlers__`
`15`	`16`	`from ads.aqua.extension.model_handler import __handlers__ as __model_handlers__`
`16`	`17`	`from ads.aqua.extension.ui_handler import __handlers__ as __ui_handlers__`
`17`	`18`	`from ads.aqua.extension.ui_websocket_handler import __handlers__ as __ws_handlers__`
`@@ -24,6 +25,7 @@`
`24`	`25`	`+ __ui_handlers__`
`25`	`26`	`+ __eval_handlers__`
`26`	`27`	`+ __ws_handlers__`
	`28`	`+ + __gpu_handlers__`
`27`	`29`	`)`
`28`	`30`
`29`	`31`