From 86a6d570e4178466eec9e21fc59b78d3e00a4e99 Mon Sep 17 00:00:00 2001 From: Dmitrii Cherkasov Date: Fri, 29 Aug 2025 16:33:21 -0700 Subject: [PATCH 1/3] Updated the GPU shapes index file to include additional shapes and corresponding CPU parameters. --- ads/aqua/common/entities.py | 114 +++++++++++++++-------- ads/aqua/resources/gpu_shapes_index.json | 107 +++++++++++++++++++++ 2 files changed, 183 insertions(+), 38 deletions(-) diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py index f3251ebab..5973dd035 100644 --- a/ads/aqua/common/entities.py +++ b/ads/aqua/common/entities.py @@ -46,37 +46,77 @@ class Config: arbitrary_types_allowed = True protected_namespaces = () + class ComputeRank(Serializable): """ - Represents the cost and performance ranking for a compute shape. + Represents the cost and performance rankings for a specific compute shape. + These rankings help compare different shapes based on their relative pricing + and computational capabilities. """ - cost: int = Field( - None, description="The relative rank of the cost of the shape. Range is [10 (cost-effective), 100 (most-expensive)]" + + cost: Optional[int] = Field( + None, + description=( + "Relative cost ranking of the compute shape. " + "Value ranges from 10 (most cost-effective) to 100 (most expensive). " + "Lower values indicate cheaper compute options." + ), ) - performance: int = Field( - None, description="The relative rank of the performance of the shape. Range is [10 (lower performance), 110 (highest performance)]" + performance: Optional[int] = Field( + None, + description=( + "Relative performance ranking of the compute shape. " + "Value ranges from 10 (lowest performance) to 110 (highest performance). " + "Higher values indicate better compute performance." + ), ) + class GPUSpecs(Serializable): """ - Represents the GPU specifications for a compute instance. + Represents the specifications and capabilities of a GPU-enabled compute shape. + Includes details about GPU and CPU resources, supported quantization formats, and + relative rankings for cost and performance. """ - gpu_memory_in_gbs: Optional[int] = Field( - default=None, description="The amount of GPU memory available (in GB)." - ) gpu_count: Optional[int] = Field( - default=None, description="The number of GPUs available." + default=None, + description="Number of physical GPUs available on the compute shape.", ) + + gpu_memory_in_gbs: Optional[int] = Field( + default=None, description="Total GPU memory available in gigabytes (GB)." + ) + gpu_type: Optional[str] = Field( - default=None, description="The type of GPU (e.g., 'V100, A100, H100')." + default=None, + description="Type of GPU and architecture. Example: 'H100', 'GB200'.", ) + quantization: Optional[List[str]] = Field( - default_factory=list, description="The quantization format supported by shape. (ex. bitsandbytes, fp8, etc.)" + default_factory=list, + description=( + "List of supported quantization formats for the GPU. " + "Examples: 'fp16', 'int8', 'bitsandbytes', 'bf16', 'fp4', etc." + ), + ) + + cpu_count: Optional[int] = Field( + default=None, description="Number of CPU cores available on the shape." ) + + cpu_memory_in_gbs: Optional[int] = Field( + default=None, description="Total CPU memory available in gigabytes (GB)." + ) + ranking: Optional[ComputeRank] = Field( - None, description="The relative rank of the cost and performance of the shape." + default=None, + description=( + "Relative cost and performance rankings of this shape. " + "Cost is ranked from 10 (least expensive) to 100+ (most expensive), " + "and performance from 10 (lowest) to 100+ (highest)." + ), ) @@ -97,50 +137,49 @@ class GPUShapesIndex(Serializable): class ComputeShapeSummary(Serializable): """ - Represents the specifications of a compute instance shape, - including CPU, memory, and optional GPU characteristics. + Represents a compute shape's specification including CPU, memory, and (if applicable) GPU configuration. """ available: Optional[bool] = Field( - default = False, - description="True if shape is available on user tenancy, " + default=False, + description="True if the shape is available in the user's tenancy/region.", ) + core_count: Optional[int] = Field( - default=None, - description="Total number of CPU cores available for the compute shape.", + default=None, description="Number of vCPUs available for the compute shape." ) + memory_in_gbs: Optional[int] = Field( - default=None, - description="Amount of memory (in GB) available for the compute shape.", + default=None, description="Total CPU memory available for the shape (in GB)." ) + name: Optional[str] = Field( - default=None, - description="Full name of the compute shape, e.g., 'VM.GPU.A10.2'.", + default=None, description="Name of the compute shape, e.g., 'VM.GPU.A10.2'." ) + shape_series: Optional[str] = Field( default=None, - description="Shape family or series, e.g., 'GPU', 'Standard', etc.", + description="Series or family of the shape, e.g., 'GPU', 'Standard'.", ) + gpu_specs: Optional[GPUSpecs] = Field( - default=None, - description="Optional GPU specifications associated with the shape.", + default=None, description="GPU configuration for the shape, if applicable." ) @model_validator(mode="after") @classmethod - def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary": + def populate_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary": """ - Validates and populates GPU specifications if the shape_series indicates a GPU-based shape. - - - If the shape_series contains "GPU", the validator first checks if the shape name exists - in the GPU_SPECS dictionary. If found, it creates a GPUSpecs instance with the corresponding data. - - If the shape is not found in the GPU_SPECS, it attempts to extract the GPU count from the shape name - using a regex pattern (looking for a number following a dot at the end of the name). + Attempts to populate GPU specs if the shape is GPU-based and no GPU specs are explicitly set. - The information about shapes is taken from: https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm + Logic: + - If `shape_series` includes 'GPU' and `gpu_specs` is None: + - Tries to parse the shape name to extract GPU count (e.g., from 'VM.GPU.A10.2'). + - Fallback is based on suffix numeric group (e.g., '.2' → gpu_count=2). + - If extraction fails, logs debug-level error but does not raise. Returns: - ComputeShapeSummary: The updated instance with gpu_specs populated if applicable. + ComputeShapeSummary: The updated model instance. """ try: if ( @@ -149,16 +188,15 @@ def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary": and model.name and not model.gpu_specs ): - # Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2) match = re.search(r"\.(\d+)$", model.name) if match: gpu_count = int(match.group(1)) model.gpu_specs = GPUSpecs(gpu_count=gpu_count) except Exception as err: logger.debug( - f"Error occurred in attempt to extract GPU specification for the f{model.name}. " - f"Details: {err}" + f"[populate_gpu_specs] Failed to auto-populate GPU specs for shape '{model.name}': {err}" ) + return model diff --git a/ads/aqua/resources/gpu_shapes_index.json b/ads/aqua/resources/gpu_shapes_index.json index ca470138e..6b103f0f2 100644 --- a/ads/aqua/resources/gpu_shapes_index.json +++ b/ads/aqua/resources/gpu_shapes_index.json @@ -1,6 +1,85 @@ { + "BM.GPU.B200.8": { + "cpu_count": 128, + "cpu_memory_in_gbs": 4096, + "gpu_count": 8, + "gpu_memory_in_gbs": 1440, + "gpu_type": "B200", + "quantization": [ + "fp4", + "fp8", + "fp16", + "bf16", + "tf32", + "int8", + "fp64" + ], + "ranking": { + "cost": 120, + "performance": 130 + } + }, + "BM.GPU.GB200.4": { + "cpu_count": 144, + "cpu_memory_in_gbs": 1024, + "gpu_count": 4, + "gpu_memory_in_gbs": 768, + "gpu_type": "GB200", + "quantization": [ + "fp4", + "fp8", + "fp6", + "int8", + "fp16", + "bf16", + "tf32", + "fp64" + ], + "ranking": { + "cost": 110, + "performance": 120 + } + }, + "BM.GPU4.8": { + "cpu_count": 64, + "cpu_memory_in_gbs": 2048, + "gpu_count": 8, + "gpu_memory_in_gbs": 320, + "gpu_type": "A100", + "quantization": [ + "int8", + "fp16", + "bf16", + "tf32" + ], + "ranking": { + "cost": 57, + "performance": 65 + } + }, + "VM.GPU3.8": { + "cpu_count": 24, + "cpu_memory_in_gbs": 768, + "gpu_count": 8, + "gpu_memory_in_gbs": 128, + "gpu_type": "V100", + "quantization": [ + "gptq", + "bitblas", + "aqlm", + "bitsandbytes", + "deepspeedfp", + "gguf" + ], + "ranking": { + "cost": 56, + "performance": 46 + } + }, "shapes": { "BM.GPU.A10.4": { + "cpu_count": 64, + "cpu_memory_in_gbs": 1024, "gpu_count": 4, "gpu_memory_in_gbs": 96, "gpu_type": "A10", @@ -21,6 +100,8 @@ } }, "BM.GPU.A100-V2.8": { + "cpu_count": 128, + "cpu_memory_in_gbs": 2048, "gpu_count": 8, "gpu_memory_in_gbs": 640, "gpu_type": "A100", @@ -41,6 +122,8 @@ } }, "BM.GPU.B4.8": { + "cpu_count": 64, + "cpu_memory_in_gbs": 2048, "gpu_count": 8, "gpu_memory_in_gbs": 320, "gpu_type": "A100", @@ -61,6 +144,8 @@ } }, "BM.GPU.H100.8": { + "cpu_count": 112, + "cpu_memory_in_gbs": 2048, "gpu_count": 8, "gpu_memory_in_gbs": 640, "gpu_type": "H100", @@ -82,6 +167,8 @@ } }, "BM.GPU.H200.8": { + "cpu_count": 112, + "cpu_memory_in_gbs": 3072, "gpu_count": 8, "gpu_memory_in_gbs": 1128, "gpu_type": "H200", @@ -103,6 +190,8 @@ } }, "BM.GPU.L40S-NC.4": { + "cpu_count": 112, + "cpu_memory_in_gbs": 1024, "gpu_count": 4, "gpu_memory_in_gbs": 192, "gpu_type": "L40S", @@ -124,6 +213,8 @@ } }, "BM.GPU.L40S.4": { + "cpu_count": 112, + "cpu_memory_in_gbs": 1024, "gpu_count": 4, "gpu_memory_in_gbs": 192, "gpu_type": "L40S", @@ -145,6 +236,8 @@ } }, "BM.GPU.MI300X.8": { + "cpu_count": 112, + "cpu_memory_in_gbs": 2048, "gpu_count": 8, "gpu_memory_in_gbs": 1536, "gpu_type": "MI300X", @@ -158,6 +251,8 @@ } }, "BM.GPU2.2": { + "cpu_count": 28, + "cpu_memory_in_gbs": 192, "gpu_count": 2, "gpu_memory_in_gbs": 32, "gpu_type": "P100", @@ -170,6 +265,8 @@ } }, "VM.GPU.A10.1": { + "cpu_count": 15, + "cpu_memory_in_gbs": 240, "gpu_count": 1, "gpu_memory_in_gbs": 24, "gpu_type": "A10", @@ -190,6 +287,8 @@ } }, "VM.GPU.A10.2": { + "cpu_count": 30, + "cpu_memory_in_gbs": 480, "gpu_count": 2, "gpu_memory_in_gbs": 48, "gpu_type": "A10", @@ -210,6 +309,8 @@ } }, "VM.GPU2.1": { + "cpu_count": 12, + "cpu_memory_in_gbs": 72, "gpu_count": 1, "gpu_memory_in_gbs": 16, "gpu_type": "P100", @@ -222,6 +323,8 @@ } }, "VM.GPU3.1": { + "cpu_count": 6, + "cpu_memory_in_gbs": 90, "gpu_count": 1, "gpu_memory_in_gbs": 16, "gpu_type": "V100", @@ -239,6 +342,8 @@ } }, "VM.GPU3.2": { + "cpu_count": 12, + "cpu_memory_in_gbs": 180, "gpu_count": 2, "gpu_memory_in_gbs": 32, "gpu_type": "V100", @@ -256,6 +361,8 @@ } }, "VM.GPU3.4": { + "cpu_count": 24, + "cpu_memory_in_gbs": 360, "gpu_count": 4, "gpu_memory_in_gbs": 64, "gpu_type": "V100", From b5b5434ad25167ff1bc0cb618ecd80384e72e49c Mon Sep 17 00:00:00 2001 From: Dmitrii Cherkasov Date: Fri, 29 Aug 2025 16:44:45 -0700 Subject: [PATCH 2/3] Fixes the structure of the gpu_shapes_index.json --- ads/aqua/resources/gpu_shapes_index.json | 154 +++++++++++------------ 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/ads/aqua/resources/gpu_shapes_index.json b/ads/aqua/resources/gpu_shapes_index.json index 6b103f0f2..f4765ede6 100644 --- a/ads/aqua/resources/gpu_shapes_index.json +++ b/ads/aqua/resources/gpu_shapes_index.json @@ -1,81 +1,4 @@ { - "BM.GPU.B200.8": { - "cpu_count": 128, - "cpu_memory_in_gbs": 4096, - "gpu_count": 8, - "gpu_memory_in_gbs": 1440, - "gpu_type": "B200", - "quantization": [ - "fp4", - "fp8", - "fp16", - "bf16", - "tf32", - "int8", - "fp64" - ], - "ranking": { - "cost": 120, - "performance": 130 - } - }, - "BM.GPU.GB200.4": { - "cpu_count": 144, - "cpu_memory_in_gbs": 1024, - "gpu_count": 4, - "gpu_memory_in_gbs": 768, - "gpu_type": "GB200", - "quantization": [ - "fp4", - "fp8", - "fp6", - "int8", - "fp16", - "bf16", - "tf32", - "fp64" - ], - "ranking": { - "cost": 110, - "performance": 120 - } - }, - "BM.GPU4.8": { - "cpu_count": 64, - "cpu_memory_in_gbs": 2048, - "gpu_count": 8, - "gpu_memory_in_gbs": 320, - "gpu_type": "A100", - "quantization": [ - "int8", - "fp16", - "bf16", - "tf32" - ], - "ranking": { - "cost": 57, - "performance": 65 - } - }, - "VM.GPU3.8": { - "cpu_count": 24, - "cpu_memory_in_gbs": 768, - "gpu_count": 8, - "gpu_memory_in_gbs": 128, - "gpu_type": "V100", - "quantization": [ - "gptq", - "bitblas", - "aqlm", - "bitsandbytes", - "deepspeedfp", - "gguf" - ], - "ranking": { - "cost": 56, - "performance": 46 - } - }, "shapes": { "BM.GPU.A10.4": { "cpu_count": 64, @@ -121,6 +44,26 @@ "performance": 70 } }, + "BM.GPU.B200.8": { + "cpu_count": 128, + "cpu_memory_in_gbs": 4096, + "gpu_count": 8, + "gpu_memory_in_gbs": 1440, + "gpu_type": "B200", + "quantization": [ + "fp4", + "fp8", + "fp16", + "bf16", + "tf32", + "int8", + "fp64" + ], + "ranking": { + "cost": 120, + "performance": 130 + } + }, "BM.GPU.B4.8": { "cpu_count": 64, "cpu_memory_in_gbs": 2048, @@ -143,6 +86,27 @@ "performance": 60 } }, + "BM.GPU.GB200.4": { + "cpu_count": 144, + "cpu_memory_in_gbs": 1024, + "gpu_count": 4, + "gpu_memory_in_gbs": 768, + "gpu_type": "GB200", + "quantization": [ + "fp4", + "fp8", + "fp6", + "int8", + "fp16", + "bf16", + "tf32", + "fp64" + ], + "ranking": { + "cost": 110, + "performance": 120 + } + }, "BM.GPU.H100.8": { "cpu_count": 112, "cpu_memory_in_gbs": 2048, @@ -264,6 +228,23 @@ "performance": 20 } }, + "BM.GPU4.8": { + "cpu_count": 64, + "cpu_memory_in_gbs": 2048, + "gpu_count": 8, + "gpu_memory_in_gbs": 320, + "gpu_type": "A100", + "quantization": [ + "int8", + "fp16", + "bf16", + "tf32" + ], + "ranking": { + "cost": 57, + "performance": 65 + } + }, "VM.GPU.A10.1": { "cpu_count": 15, "cpu_memory_in_gbs": 240, @@ -378,6 +359,25 @@ "cost": 55, "performance": 45 } + }, + "VM.GPU3.8": { + "cpu_count": 24, + "cpu_memory_in_gbs": 768, + "gpu_count": 8, + "gpu_memory_in_gbs": 128, + "gpu_type": "V100", + "quantization": [ + "gptq", + "bitblas", + "aqlm", + "bitsandbytes", + "deepspeedfp", + "gguf" + ], + "ranking": { + "cost": 56, + "performance": 46 + } } } } From 9223346f6d69e35627b3b88a52f14b58e6eff6a0 Mon Sep 17 00:00:00 2001 From: Dmitrii Cherkasov Date: Tue, 2 Sep 2025 20:08:35 -0700 Subject: [PATCH 3/3] Fixes unit tests --- .../result-Devstral-Small-2507-GQA.json | 144 ++++++++++ .../result-Kimi-K2-Instruct-MOE.json | 126 ++++++--- ...ult-Qwen3-235B-A22B-Instruct-2507-FP8.json | 259 ++++++++++++------ .../with_extras/aqua/test_recommend.py | 12 +- 4 files changed, 415 insertions(+), 126 deletions(-) diff --git a/tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json b/tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json index 36301b780..87fe896c9 100644 --- a/tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json +++ b/tests/unitary/with_extras/aqua/test_data/recommend/result-Devstral-Small-2507-GQA.json @@ -21,6 +21,8 @@ "available": false, "core_count": null, "gpu_specs": { + "cpu_count": 64, + "cpu_memory_in_gbs": 1024, "gpu_count": 4, "gpu_memory_in_gbs": 96, "gpu_type": "A10", @@ -45,6 +47,95 @@ "shape_series": "GPU" } }, + { + "configurations": [ + { + "deployment_params": { + "max_model_len": 131072, + "params": "", + "quantization": "bfloat16" + }, + "model_details": { + "kv_cache_size_gb": 21.47, + "model_size_gb": 47.98, + "total_model_gb": 69.46 + }, + "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 1440.0GB allowed)." + } + ], + "shape_details": { + "available": false, + "core_count": null, + "gpu_specs": { + "cpu_count": 128, + "cpu_memory_in_gbs": 4096, + "gpu_count": 8, + "gpu_memory_in_gbs": 1440, + "gpu_type": "B200", + "quantization": [ + "fp4", + "fp8", + "fp16", + "bf16", + "tf32", + "int8", + "fp64" + ], + "ranking": { + "cost": 120, + "performance": 130 + } + }, + "memory_in_gbs": null, + "name": "BM.GPU.B200.8", + "shape_series": "GPU" + } + }, + { + "configurations": [ + { + "deployment_params": { + "max_model_len": 131072, + "params": "", + "quantization": "bfloat16" + }, + "model_details": { + "kv_cache_size_gb": 21.47, + "model_size_gb": 47.98, + "total_model_gb": 69.46 + }, + "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 768.0GB allowed)." + } + ], + "shape_details": { + "available": false, + "core_count": null, + "gpu_specs": { + "cpu_count": 144, + "cpu_memory_in_gbs": 1024, + "gpu_count": 4, + "gpu_memory_in_gbs": 768, + "gpu_type": "GB200", + "quantization": [ + "fp4", + "fp8", + "fp6", + "int8", + "fp16", + "bf16", + "tf32", + "fp64" + ], + "ranking": { + "cost": 110, + "performance": 120 + } + }, + "memory_in_gbs": null, + "name": "BM.GPU.GB200.4", + "shape_series": "GPU" + } + }, { "configurations": [ { @@ -65,6 +156,8 @@ "available": false, "core_count": null, "gpu_specs": { + "cpu_count": 112, + "cpu_memory_in_gbs": 3072, "gpu_count": 8, "gpu_memory_in_gbs": 1128, "gpu_type": "H200", @@ -110,6 +203,8 @@ "available": false, "core_count": null, "gpu_specs": { + "cpu_count": 112, + "cpu_memory_in_gbs": 1024, "gpu_count": 4, "gpu_memory_in_gbs": 192, "gpu_type": "L40S", @@ -155,6 +250,8 @@ "available": false, "core_count": null, "gpu_specs": { + "cpu_count": 112, + "cpu_memory_in_gbs": 1024, "gpu_count": 4, "gpu_memory_in_gbs": 192, "gpu_type": "L40S", @@ -200,6 +297,8 @@ "available": false, "core_count": null, "gpu_specs": { + "cpu_count": 112, + "cpu_memory_in_gbs": 2048, "gpu_count": 8, "gpu_memory_in_gbs": 1536, "gpu_type": "MI300X", @@ -217,6 +316,47 @@ "shape_series": "GPU" } }, + { + "configurations": [ + { + "deployment_params": { + "max_model_len": 131072, + "params": "", + "quantization": "bfloat16" + }, + "model_details": { + "kv_cache_size_gb": 21.47, + "model_size_gb": 47.98, + "total_model_gb": 69.46 + }, + "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (69.5GB used / 320.0GB allowed)." + } + ], + "shape_details": { + "available": false, + "core_count": null, + "gpu_specs": { + "cpu_count": 64, + "cpu_memory_in_gbs": 2048, + "gpu_count": 8, + "gpu_memory_in_gbs": 320, + "gpu_type": "A100", + "quantization": [ + "int8", + "fp16", + "bf16", + "tf32" + ], + "ranking": { + "cost": 57, + "performance": 65 + } + }, + "memory_in_gbs": null, + "name": "BM.GPU4.8", + "shape_series": "GPU" + } + }, { "configurations": [ { @@ -237,6 +377,8 @@ "available": false, "core_count": null, "gpu_specs": { + "cpu_count": 15, + "cpu_memory_in_gbs": 240, "gpu_count": 1, "gpu_memory_in_gbs": 24, "gpu_type": "A10", @@ -281,6 +423,8 @@ "available": false, "core_count": null, "gpu_specs": { + "cpu_count": 30, + "cpu_memory_in_gbs": 480, "gpu_count": 2, "gpu_memory_in_gbs": 48, "gpu_type": "A10", diff --git a/tests/unitary/with_extras/aqua/test_data/recommend/result-Kimi-K2-Instruct-MOE.json b/tests/unitary/with_extras/aqua/test_data/recommend/result-Kimi-K2-Instruct-MOE.json index 7a4966574..c5b7c5a46 100644 --- a/tests/unitary/with_extras/aqua/test_data/recommend/result-Kimi-K2-Instruct-MOE.json +++ b/tests/unitary/with_extras/aqua/test_data/recommend/result-Kimi-K2-Instruct-MOE.json @@ -1,43 +1,89 @@ { - "display_name": "Kimi-K2-Instruct-MOE", - "recommendations": [ + "display_name": "Kimi-K2-Instruct-MOE", + "recommendations": [ + { + "configurations": [ { - "shape_details": { - "available": false, - "core_count": null, - "memory_in_gbs": null, - "name": "BM.GPU.MI300X.8", - "shape_series": "GPU", - "gpu_specs": { - "gpu_memory_in_gbs": 1536, - "gpu_count": 8, - "gpu_type": "MI300X", - "quantization": [ - "fp8", - "gguf" - ], - "ranking": { - "cost": 90, - "performance": 90 - } - } - }, - "configurations": [ - { - "model_details": { - "model_size_gb": 1046.48, - "kv_cache_size_gb": 3.58, - "total_model_gb": 1050.06 - }, - "deployment_params": { - "quantization": "fp8", - "max_model_len": 2048, - "params": "--max-model-len 2048" - }, - "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1536.0GB allowed)." - } - ] + "deployment_params": { + "max_model_len": 2048, + "params": "--max-model-len 2048", + "quantization": "fp8" + }, + "model_details": { + "kv_cache_size_gb": 3.58, + "model_size_gb": 1046.48, + "total_model_gb": 1050.06 + }, + "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1440.0GB allowed)." } - ], - "troubleshoot": "" -} \ No newline at end of file + ], + "shape_details": { + "available": false, + "core_count": null, + "gpu_specs": { + "cpu_count": 128, + "cpu_memory_in_gbs": 4096, + "gpu_count": 8, + "gpu_memory_in_gbs": 1440, + "gpu_type": "B200", + "quantization": [ + "fp4", + "fp8", + "fp16", + "bf16", + "tf32", + "int8", + "fp64" + ], + "ranking": { + "cost": 120, + "performance": 130 + } + }, + "memory_in_gbs": null, + "name": "BM.GPU.B200.8", + "shape_series": "GPU" + } + }, + { + "configurations": [ + { + "deployment_params": { + "max_model_len": 2048, + "params": "--max-model-len 2048", + "quantization": "fp8" + }, + "model_details": { + "kv_cache_size_gb": 3.58, + "model_size_gb": 1046.48, + "total_model_gb": 1050.06 + }, + "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (1050.1GB used / 1536.0GB allowed)." + } + ], + "shape_details": { + "available": false, + "core_count": null, + "gpu_specs": { + "cpu_count": 112, + "cpu_memory_in_gbs": 2048, + "gpu_count": 8, + "gpu_memory_in_gbs": 1536, + "gpu_type": "MI300X", + "quantization": [ + "fp8", + "gguf" + ], + "ranking": { + "cost": 90, + "performance": 90 + } + }, + "memory_in_gbs": null, + "name": "BM.GPU.MI300X.8", + "shape_series": "GPU" + } + } + ], + "troubleshoot": "" +} diff --git a/tests/unitary/with_extras/aqua/test_data/recommend/result-Qwen3-235B-A22B-Instruct-2507-FP8.json b/tests/unitary/with_extras/aqua/test_data/recommend/result-Qwen3-235B-A22B-Instruct-2507-FP8.json index b75fb09cc..dfb7ec7c2 100644 --- a/tests/unitary/with_extras/aqua/test_data/recommend/result-Qwen3-235B-A22B-Instruct-2507-FP8.json +++ b/tests/unitary/with_extras/aqua/test_data/recommend/result-Qwen3-235B-A22B-Instruct-2507-FP8.json @@ -1,88 +1,181 @@ { - "display_name": "Qwen3-235B-A22B-Instruct-2507-FP8", - "recommendations": [ + "display_name": "Qwen3-235B-A22B-Instruct-2507-FP8", + "recommendations": [ + { + "configurations": [ { - "shape_details": { - "available": false, - "core_count": null, - "memory_in_gbs": null, - "name": "BM.GPU.H200.8", - "shape_series": "GPU", - "gpu_specs": { - "gpu_memory_in_gbs": 1128, - "gpu_count": 8, - "gpu_type": "H200", - "quantization": [ - "awq", - "gptq", - "marlin", - "fp8", - "int8", - "bitblas", - "aqlm", - "bitsandbytes", - "deepspeedfp", - "gguf" - ], - "ranking": { - "cost": 100, - "performance": 110 - } - } - }, - "configurations": [ - { - "model_details": { - "model_size_gb": 231.89, - "kv_cache_size_gb": 0.39, - "total_model_gb": 232.28 - }, - "deployment_params": { - "quantization": "fp8", - "max_model_len": 2048, - "params": "--max-model-len 2048" - }, - "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 1128.0GB allowed)." - } - ] + "deployment_params": { + "max_model_len": 2048, + "params": "--max-model-len 2048", + "quantization": "fp8" + }, + "model_details": { + "kv_cache_size_gb": 0.39, + "model_size_gb": 231.89, + "total_model_gb": 232.28 + }, + "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 1440.0GB allowed)." + } + ], + "shape_details": { + "available": false, + "core_count": null, + "gpu_specs": { + "cpu_count": 128, + "cpu_memory_in_gbs": 4096, + "gpu_count": 8, + "gpu_memory_in_gbs": 1440, + "gpu_type": "B200", + "quantization": [ + "fp4", + "fp8", + "fp16", + "bf16", + "tf32", + "int8", + "fp64" + ], + "ranking": { + "cost": 120, + "performance": 130 + } + }, + "memory_in_gbs": null, + "name": "BM.GPU.B200.8", + "shape_series": "GPU" + } + }, + { + "configurations": [ + { + "deployment_params": { + "max_model_len": 2048, + "params": "--max-model-len 2048", + "quantization": "fp8" + }, + "model_details": { + "kv_cache_size_gb": 0.39, + "model_size_gb": 231.89, + "total_model_gb": 232.28 + }, + "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 768.0GB allowed)." + } + ], + "shape_details": { + "available": false, + "core_count": null, + "gpu_specs": { + "cpu_count": 144, + "cpu_memory_in_gbs": 1024, + "gpu_count": 4, + "gpu_memory_in_gbs": 768, + "gpu_type": "GB200", + "quantization": [ + "fp4", + "fp8", + "fp6", + "int8", + "fp16", + "bf16", + "tf32", + "fp64" + ], + "ranking": { + "cost": 110, + "performance": 120 + } }, + "memory_in_gbs": null, + "name": "BM.GPU.GB200.4", + "shape_series": "GPU" + } + }, + { + "configurations": [ { - "shape_details": { - "available": false, - "core_count": null, - "memory_in_gbs": null, - "name": "BM.GPU.MI300X.8", - "shape_series": "GPU", - "gpu_specs": { - "gpu_memory_in_gbs": 1536, - "gpu_count": 8, - "gpu_type": "MI300X", - "quantization": [ - "fp8", - "gguf" - ], - "ranking": { - "cost": 90, - "performance": 90 - } - } - }, - "configurations": [ - { - "model_details": { - "model_size_gb": 231.89, - "kv_cache_size_gb": 0.39, - "total_model_gb": 232.28 - }, - "deployment_params": { - "quantization": "fp8", - "max_model_len": 2048, - "params": "--max-model-len 2048" - }, - "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 1536.0GB allowed)." - } - ] + "deployment_params": { + "max_model_len": 2048, + "params": "--max-model-len 2048", + "quantization": "fp8" + }, + "model_details": { + "kv_cache_size_gb": 0.39, + "model_size_gb": 231.89, + "total_model_gb": 232.28 + }, + "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 1128.0GB allowed)." } - ], - "troubleshoot": "" -} \ No newline at end of file + ], + "shape_details": { + "available": false, + "core_count": null, + "gpu_specs": { + "cpu_count": 112, + "cpu_memory_in_gbs": 3072, + "gpu_count": 8, + "gpu_memory_in_gbs": 1128, + "gpu_type": "H200", + "quantization": [ + "awq", + "gptq", + "marlin", + "fp8", + "int8", + "bitblas", + "aqlm", + "bitsandbytes", + "deepspeedfp", + "gguf" + ], + "ranking": { + "cost": 100, + "performance": 110 + } + }, + "memory_in_gbs": null, + "name": "BM.GPU.H200.8", + "shape_series": "GPU" + } + }, + { + "configurations": [ + { + "deployment_params": { + "max_model_len": 2048, + "params": "--max-model-len 2048", + "quantization": "fp8" + }, + "model_details": { + "kv_cache_size_gb": 0.39, + "model_size_gb": 231.89, + "total_model_gb": 232.28 + }, + "recommendation": "No override PARAMS needed. \n\nModel fits well within the allowed compute shape (232.3GB used / 1536.0GB allowed)." + } + ], + "shape_details": { + "available": false, + "core_count": null, + "gpu_specs": { + "cpu_count": 112, + "cpu_memory_in_gbs": 2048, + "gpu_count": 8, + "gpu_memory_in_gbs": 1536, + "gpu_type": "MI300X", + "quantization": [ + "fp8", + "gguf" + ], + "ranking": { + "cost": 90, + "performance": 90 + } + }, + "memory_in_gbs": null, + "name": "BM.GPU.MI300X.8", + "shape_series": "GPU" + } + } + ], + "troubleshoot": "" +} diff --git a/tests/unitary/with_extras/aqua/test_recommend.py b/tests/unitary/with_extras/aqua/test_recommend.py index bfe1b4a54..cb61dae86 100644 --- a/tests/unitary/with_extras/aqua/test_recommend.py +++ b/tests/unitary/with_extras/aqua/test_recommend.py @@ -31,7 +31,9 @@ ) from ads.model.model_metadata import ModelCustomMetadata, ModelProvenanceMetadata -CONFIG_ROOT = os.path.join(os.path.dirname(__file__), "test_data/recommend/") +CONFIG_ROOT = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "test_data/recommend/" +) def load_config(filename): @@ -232,8 +234,12 @@ def test_suggested_quantizations_from_file( # --- Tests for recommend.py --- class GPUShapesIndexMock: def __init__(self): - # local_path = os.path.join(os.path.dirname(__file__), "../../resources", "gpu_shapes_index.json") - local_path = "ads/aqua/resources/gpu_shapes_index.json" + local_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "../../../../ads/aqua/resources", + "gpu_shapes_index.json", + ) + # local_path = "ads/aqua/resources/gpu_shapes_index.json" with open(local_path) as f: local_data = json.load(f)