pytorch · Stonepia · Jul 24, 2025 · Jul 24, 2025 · Jul 24, 2025 · liangan1
diff --git a/torchao/float8/inference.py b/torchao/float8/inference.py
@@ -17,10 +17,6 @@
     PerRow,
     PerTensor,
 )
-from torchao.utils import (
-    is_MI300,
-    is_sm_at_least_89,
-)
 
 Tensor = torch.Tensor
 
@@ -147,27 +143,3 @@ def _normalize_granularity(
             f"Invalid granularity specification: {granularity}, only PerTensor or PerRow are supported."
         )
     return processed_granularity
-
-
-def _check_hardware_support(
-    granularities: Tuple[FP8Granularity, FP8Granularity],
-) -> None:
-    """
-    Validate that the hardware supports the requested granularities.
-
-    Args:
-        granularities: Tuple of (activation_granularity, weight_granularity)
-
-    Raises:
-        AssertionError: If hardware doesn't support the requested granularity
-        ValueError: If invalid granularity type is provided
-    """
-    for _granularity in granularities:
-        if not isinstance(_granularity, (PerTensor, PerRow)):
-            raise ValueError(
-                f"Invalid granularity type: {_granularity}, only PerTensor or PerRow are supported."
-            )
-
-        assert is_sm_at_least_89() or is_MI300(), (
-            "Float8 dynamic quantization requires CUDA compute capability ≥8.9 or MI300+."
-        )
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -60,7 +60,6 @@
 from torchao.float8.inference import (
     Float8MMConfig,
     FP8Granularity,
-    _check_hardware_support,
     _normalize_granularity,
 )
 from torchao.quantization.linear_activation_weight_observed_tensor import (
@@ -270,6 +269,46 @@ def change_linear_weights_to_int4_woqtensors(
 ########
 
 
+def _check_hardware_support(
+    config: Union[
+        "Float8DynamicActivationFloat8WeightConfig",
+        "Float8StaticActivationFloat8WeightConfig",
+    ],
+) -> None:
+    """
+    Validate that the hardware supports the given float8 quantization configuration.
+
+    Args:
+        config: Must be one of Float8DynamicActivationFloat8WeightConfig or Float8StaticActivationFloat8WeightConfig.
+
+    Raises:
+        AssertionError: If hardware doesn't support the float8 feature.
+        ValueError: If invalid granularity type is provided
+        TypeError: If config is not of the correct type.
+    """
+    if not isinstance(
+        config,
+        (
+            Float8DynamicActivationFloat8WeightConfig,
+            Float8StaticActivationFloat8WeightConfig,
+        ),
+    ):
+        raise TypeError(
+            f"config must be one of Float8DynamicActivationFloat8WeightConfig or Float8StaticActivationFloat8WeightConfig, got {type(config)}"
+        )
+    # XPU by default supports float8 by simulation, thus always passes.
+    if not torch.xpu.is_available():
+        assert is_sm_at_least_89() or is_MI300(), (
+            f"{config.__class__.__name__} requires CUDA compute capability ≥8.9 or MI300+."
+        )
+    if config is Float8DynamicActivationFloat8WeightConfig:
+        for _granularity in config.granularity:
+            if not isinstance(_granularity, (PerTensor, PerRow)):
+                raise ValueError(
+                    f"Invalid granularity type: {_granularity}, only PerTensor or PerRow are supported."
+                )
+
+
 def _replace_with_custom_fn_if_matches_filter(
     model,
     replacement_fn,
@@ -1633,7 +1672,7 @@ def _float8_dynamic_activation_float8_weight_quantize_tensor(weight, config):
     mm_config = config.mm_config
 
     # Ensure works on device
-    _check_hardware_support(granularity)
+    _check_hardware_support(config)
     activation_granularity, weight_granularity = granularity
 
     if not _fp8_mm_compat(weight):
@@ -1672,9 +1711,7 @@ def _float8_dynamic_activation_float8_weight_quantize_tensor(weight, config):
 def _float8_dynamic_activation_float8_weight_transform(
     module: torch.nn.Module, config: Float8DynamicActivationFloat8WeightConfig
 ):
-    assert is_sm_at_least_89() or is_MI300(), (
-        "Float8 dynamic activation quantization is only supported on CUDA>=8.9 and MI300+"
-    )
+    _check_hardware_support(config)
     if config.set_inductor_config:
         torchao.quantization.utils.recommended_inductor_config_setter()
 
@@ -1710,7 +1747,8 @@ class Float8DynamicActivationFloat8SemiSparseWeightConfig(AOBaseConfig):
 def _float8_dynamic_activation_float8_semi_sparse_weight_transform(
     module: torch.nn.Module, config: Float8DynamicActivationFloat8SemiSparseWeightConfig
 ):
-    assert is_sm_at_least_90(), "Float8 quantization is only supported on CUDA>=9.0"
+    if torch.cuda.is_available():
+        assert is_sm_at_least_90(), "Float8 quantization is only supported on CUDA>=9.0"
 
     weight = module.weight
     weight_dtype = config.weight_dtype
@@ -1769,9 +1807,7 @@ def __post_init__(self):
 def _float8_static_activation_float8_weight_transform(
     module: torch.nn.Module, config: Float8StaticActivationFloat8WeightConfig
 ):
-    assert is_sm_at_least_89() or is_MI300(), (
-        "Float8 static activation quantization is only supported on CUDA 8.9 and above"
-    )
+    _check_hardware_support(config)
 
     scale = config.scale
     activation_dtype = config.activation_dtype