ONNX save fix

i-riyad · i-riyad · commit 8113df6849c4 · 2025-11-03T18:19:06.000-08:00
Signed-off-by: Riyad Islam &lt;rislam@nvidia.com&gt;
diff --git a/examples/chained_optimizations/bert_prune_distill_quantize.py b/examples/chained_optimizations/bert_prune_distill_quantize.py
@@ -71,7 +71,7 @@
 import modelopt.torch.opt as mto
 import modelopt.torch.prune as mtp
 import modelopt.torch.quantization as mtq
-from modelopt.torch._deploy.utils import get_onnx_bytes
+from modelopt.torch._deploy.utils import get_onnx_bytes_and_metadata
 
 # Enable automatic save/load of modelopt_state with huggingface checkpointing
 mto.enable_huggingface_checkpointing()
@@ -1222,7 +1222,8 @@ def forward_loop(model):
         dummy_input = dummy_input.to(accelerator.device)
 
         with open(args.onnx_export_path, "wb") as f:
-            f.write(get_onnx_bytes(model, dummy_input, onnx_opset=14))
+            onnx_bytes, _ = get_onnx_bytes_and_metadata(model, dummy_input, onnx_opset=14)
+            f.write(onnx_bytes)
 
     logger.info("Done!")
 
diff --git a/examples/onnx_ptq/download_example_onnx.py b/examples/onnx_ptq/download_example_onnx.py
@@ -20,7 +20,7 @@
 import timm
 import torch
 
-from modelopt.torch._deploy.utils import get_onnx_bytes
+from modelopt.torch._deploy.utils import OnnxBytes, get_onnx_bytes_and_metadata
 
 
 def export_to_onnx(model, input_shape, onnx_save_path, device, weights_dtype="fp32"):
@@ -29,15 +29,25 @@ def export_to_onnx(model, input_shape, onnx_save_path, device, weights_dtype="fp
     input_dtype = model.parameters().__next__().dtype
     input_tensor = torch.randn(input_shape, dtype=input_dtype).to(device)
 
-    onnx_model_bytes = get_onnx_bytes(
+    onnx_bytes, _ = get_onnx_bytes_and_metadata(
         model=model,
         dummy_input=(input_tensor,),
         weights_dtype=weights_dtype,
     )
+    onnx_model = OnnxBytes.from_bytes(onnx_bytes)
 
     # Write ONNX model to disk
-    with open(onnx_save_path, "wb") as f:
-        f.write(onnx_model_bytes)
+    save_dir = os.path.dirname(os.path.abspath(onnx_save_path))
+    os.makedirs(save_dir, exist_ok=True)
+
+    for filename, file_bytes in onnx_model.onnx_model.items():
+        if filename.endswith(".onnx"):
+            file_path = onnx_save_path
+        else:
+            file_path = os.path.join(save_dir, filename)
+        with open(file_path, "wb") as f:
+            f.write(file_bytes)
+            print(f"✅ {file_path}")
 
 
 if __name__ == "__main__":
diff --git a/modelopt/torch/_deploy/utils/torch_onnx.py b/modelopt/torch/_deploy/utils/torch_onnx.py
@@ -129,7 +129,11 @@ def to_bytes(self) -> bytes:
         return json.dumps(data).encode("utf-8")
 
     def get_onnx_model_file_bytes(self) -> bytes:
-        """Returns the bytes of the onnx model file."""
+        """Returns the bytes of the onnx model file.
+
+        Note: Even if the model has external data, this function will return the bytes of the main onnx model file.
+        To get the bytes of the external data, use the get_external_data_bytes() method.
+        """
         return self.onnx_model[self.model_name + ".onnx"]
 
     @classmethod
@@ -563,13 +567,3 @@ def create_model_metadata(
         "is_bytes_pickled": onnx_graph.ByteSize() > TWO_GB,
         "config": model.config if hasattr(model, "config") else None,
     }
-
-
-def get_onnx_bytes(*args, **kwargs) -> bytes:
-    """Return onnx bytes only.
-
-    See ``get_onnx_bytes_and_metadata()`` for more info.
-    """
-    onnx_bytes = get_onnx_bytes_and_metadata(*args, **kwargs)[0]
-    onnx_bytes_obj = OnnxBytes.from_bytes(onnx_bytes)
-    return onnx_bytes_obj.get_onnx_model_file_bytes()
diff --git a/tests/unit/onnx/test_onnx_utils.py b/tests/unit/onnx/test_onnx_utils.py
@@ -38,7 +38,7 @@
     save_onnx_bytes_to_dir,
     validate_onnx,
 )
-from modelopt.torch._deploy.utils import get_onnx_bytes
+from modelopt.torch._deploy.utils import OnnxBytes, get_onnx_bytes_and_metadata
 
 
 @pytest.mark.parametrize(
@@ -103,20 +103,24 @@ def test_random_onnx_weights():
     model, args, kwargs = get_tiny_resnet_and_input()
     assert not kwargs
 
-    onnx_bytes = get_onnx_bytes(model, args)
-    original_avg_var_dict = _get_avg_var_of_weights(onnx.load_from_string(onnx_bytes))
-    original_model_size = len(onnx_bytes)
+    onnx_bytes, _ = get_onnx_bytes_and_metadata(model, args)
+    onnx_model = OnnxBytes.from_bytes(onnx_bytes)
+    model_bytes = onnx_model.get_onnx_model_file_bytes()
+    model = onnx.load_from_string(model_bytes)
 
-    onnx_bytes = remove_weights_data(onnx_bytes)
+    original_avg_var_dict = _get_avg_var_of_weights(model)
+    original_model_size = len(model_bytes)
+
+    onnx_model_wo_weights = remove_weights_data(model_bytes)
     # Removed model weights should be greater than 18 MB
-    assert original_model_size - len(onnx_bytes) > 18e6
+    assert original_model_size - len(onnx_model_wo_weights) > 18e6
 
     # After assigning random weights, model size should be slightly greater than the the original
     # size due to some extra metadata
-    onnx_bytes = randomize_weights_onnx_bytes(onnx_bytes)
-    assert len(onnx_bytes) > original_model_size
+    onnx_model_randomized = randomize_weights_onnx_bytes(onnx_model_wo_weights)
+    assert len(onnx_model_randomized) > original_model_size
 
-    randomized_avg_var_dict = _get_avg_var_of_weights(onnx.load_from_string(onnx_bytes))
+    randomized_avg_var_dict = _get_avg_var_of_weights(onnx.load_from_string(onnx_model_randomized))
     for key, value in original_avg_var_dict.items():
         assert abs(value - randomized_avg_var_dict[key]) < 0.1
 
@@ -125,12 +129,14 @@ def test_reproducible_random_weights():
     model, args, kwargs = get_tiny_resnet_and_input()
     assert not kwargs
 
-    original_onnx_bytes = get_onnx_bytes(model, args)
-    onnx_bytes_wo_weights = remove_weights_data(original_onnx_bytes)
+    onnx_bytes, _ = get_onnx_bytes_and_metadata(model, args)
+    onnx_model = OnnxBytes.from_bytes(onnx_bytes)
+    model_bytes = onnx_model.get_onnx_model_file_bytes()
+    model = onnx.load_from_string(model_bytes)
 
     # Check if the randomization produces the same weights
-    onnx_bytes_1 = randomize_weights_onnx_bytes(onnx_bytes_wo_weights)
-    onnx_bytes_2 = randomize_weights_onnx_bytes(onnx_bytes_wo_weights)
+    onnx_bytes_1 = randomize_weights_onnx_bytes(model_bytes)
+    onnx_bytes_2 = randomize_weights_onnx_bytes(model_bytes)
     assert onnx_bytes_1 == onnx_bytes_2
 
 
diff --git a/tests/unit/torch/deploy/utils/test_torch_onnx_utils.py b/tests/unit/torch/deploy/utils/test_torch_onnx_utils.py
@@ -29,7 +29,6 @@
     OnnxBytes,
     flatten_tree,
     generate_onnx_input,
-    get_onnx_bytes,
     get_onnx_bytes_and_metadata,
 )
 from modelopt.torch._deploy.utils.torch_onnx import _to_expected_onnx_type
@@ -160,7 +159,9 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
 )
 def test_get_and_validate_batch_size(model, n_args, batch_size):
     inputs = (torch.randn([batch_size, 3, 32, 32]),) * n_args
-    onnx_bytes = get_onnx_bytes(model, inputs)
+    onnx_bytes, _ = get_onnx_bytes_and_metadata(model, inputs)
+    onnx_bytes_obj = OnnxBytes.from_bytes(onnx_bytes)
+    onnx_bytes = onnx_bytes_obj.onnx_model[f"{onnx_bytes_obj.model_name}.onnx"]
 
     assert validate_batch_size(onnx_bytes, batch_size)
     assert validate_batch_size(onnx_bytes, 3) is False