From 247e4c7472b89a4771b6a0c6ce657e20cff9b8ec Mon Sep 17 00:00:00 2001
From: Hrishith Thadicherla <hthadicherla@nvidia.com>
Date: Fri, 24 Oct 2025 15:12:58 +0530
Subject: [PATCH] Added exception for warning caused while creating int4 tensor

Signed-off-by: Hrishith Thadicherla <hthadicherla@nvidia.com>
---
 modelopt/onnx/quantization/gs_patching.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/modelopt/onnx/quantization/gs_patching.py b/modelopt/onnx/quantization/gs_patching.py
index 8097d8a98..a0eea8495 100644
--- a/modelopt/onnx/quantization/gs_patching.py
+++ b/modelopt/onnx/quantization/gs_patching.py
@@ -39,7 +39,14 @@ def _make_constant(
     converted_dtype = (
         dtype if isinstance(values, LazyValues) else onnx.helper.tensor_dtype_to_np_dtype(dtype)
     )
-    if values.dtype != converted_dtype:
+
+    # Allow int8/uint8 as intermediate representation for INT4/UINT4
+    # INT4/UINT4 values are stored as int8/uint8 in numpy arrays and packed during export
+    is_valid_int4_intermediate = (dtype == onnx.TensorProto.INT4 and values.dtype == np.int8) or (
+        dtype == onnx.TensorProto.UINT4 and values.dtype == np.uint8
+    )
+
+    if not is_valid_int4_intermediate and values.dtype != converted_dtype:
         logger.error(
             f"Trying to create tensor with incompatible types: `{values.dtype}`, `{dtype}`"
         )