Skip to content

Commit a86c7af

Browse files
authored
Merge branch 'main' into jennifchen/nmh-moe-export
2 parents d48514d + 4476f21 commit a86c7af

File tree

15 files changed

+130
-45
lines changed

15 files changed

+130
-45
lines changed

.github/workflows/gpu_tests.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,7 @@ jobs:
7373
- uses: nv-gha-runners/setup-proxy-cache@main
7474
- name: Setup environment variables
7575
run: |
76-
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib" >> $GITHUB_ENV
77-
echo "PATH=${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin" >> $GITHUB_ENV
76+
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV
7877
- name: Run gpu tests
7978
run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
8079
gpu-tests-non-pr:

.gitlab/tests.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,7 @@ unit:
3535
tags: [docker, linux, 2-gpu]
3636
before_script:
3737
# Add libcudnn*.so and libnv*.so to path
38-
- export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib"
39-
# Add trtexec to path
40-
- export PATH="${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin"
38+
- export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu"
4139
# Install git-lfs for Daring-Anteater dataset
4240
- apt-get update && apt-get install -y git-lfs
4341
- git lfs install --system

CHANGELOG.rst

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
Model Optimizer Changelog (Linux)
22
=================================
33

4-
0.39 (2025-11-xx)
4+
0.39 (2025-11-07)
55
^^^^^^^^^^^^^^^^^
66

7-
**Deprecations**
8-
97
**New Features**
108

119
- Add flag ``op_types_to_exclude_fp16`` in ONNX quantization to exclude ops from being converted to FP16/BF16. Alternatively, for custom TensorRT ops, this can also be done by indicating ``'fp32'`` precision in ``trt_plugins_precision``.
1210
- Add LoRA mode support for MCore in a new peft submodule: ``modelopt.torch.peft.update_model(model, LORA_CFG)``.
1311
- Support PTQ and fakequant in vLLM for fast evaluation of arbitrary quantization formats. See ``examples/vllm_serve`` for more details.
14-
- Add support for ``nemotron-post-training-dataset-v2`` and ``nemotron-post-training-dataset-v1`` in ``examples/llm_ptq``. Default to a mix of ``cnn_dailymail`` and ``nemotron-post-training-dataset-v2`` if no dataset is specified.
12+
- Add support for ``nemotron-post-training-dataset-v2`` and ``nemotron-post-training-dataset-v1`` in ``examples/llm_ptq``. Default to a mix of ``cnn_dailymail`` and ``nemotron-post-training-dataset-v2`` (gated dataset accessed using ``HF_TOKEN`` environment variable) if no dataset is specified.
1513
- Allow specifying ``calib_seq`` in ``examples/llm_ptq`` to set the maximum sequence length for calibration.
1614

1715
**Documentation**

docs/source/getting_started/_installation_for_Linux.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ Environment setup
4141
.. code-block:: shell
4242
4343
export PIP_CONSTRAINT=""
44-
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib"
45-
export PATH="${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin"
44+
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu"
4645
4746
You may need to install additional dependencies from the respective examples's `requirements.txt` file.
4847

examples/diffusers/quantization/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ nvtx
44
onnx_graphsurgeon
55
opencv-python>=4.8.1.78,<4.12.0.88
66
sentencepiece
7+
# TODO: Fix for torch 2.9
8+
torch<2.9
9+
torchvision<0.24.0

examples/llm_sparsity/launch_finetune.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ CMD="accelerate launch --multi_gpu --mixed_precision bf16 finetune.py \
9191
--warmup_ratio 0.0 \
9292
--lr_scheduler_type cosine \
9393
--logging_steps 1 \
94-
--fsdp full_shard auto_wrap \
94+
--fsdp 'full_shard auto_wrap' \
9595
--fsdp_transformer_layer_cls_to_wrap LlamaDecoderLayer \
9696
--tf32 True \
9797
--modelopt_restore_path $MODELOPT_RESTORE_PATH \
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
flash-attn
22
sentencepiece>=0.2.0
33
tensorboardX
4-
transformers>=4.57.0

modelopt/onnx/quantization/graph_utils.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
from onnx_graphsurgeon.ir.node import Node
2828
from onnx_graphsurgeon.ir.tensor import Constant, Tensor, Variable
2929
from onnxruntime.quantization.calibrate import CalibrationDataReader
30-
from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference
3130

3231
from modelopt.onnx.logging_config import logger
3332
from modelopt.onnx.op_types import is_copy_op, is_linear_op
@@ -36,6 +35,7 @@
3635
find_lowest_common_ancestor,
3736
get_child_nodes,
3837
get_parent_nodes,
38+
infer_shapes,
3939
parse_shapes_spec,
4040
save_onnx,
4141
)
@@ -966,7 +966,7 @@ def find_nodes_from_matmul_to_exclude(
966966
logger.debug(f"Found {len(matmul_nodes)} MatMul nodes to analyze")
967967

968968
if calibration_shapes:
969-
nodes_to_exclude = _exclude_matmuls_by_symbolic_inference(
969+
nodes_to_exclude = _exclude_matmuls_by_shape_inference(
970970
model, matmul_nodes, calibration_shapes
971971
)
972972
else:
@@ -1058,10 +1058,10 @@ def find_nodes_from_convs_to_exclude(graph: Graph, quantize_mode: str = "int8"):
10581058
return unsupported_conv_nodes
10591059

10601060

1061-
def _exclude_matmuls_by_symbolic_inference(
1061+
def _exclude_matmuls_by_shape_inference(
10621062
model: onnx.ModelProto, matmul_nodes: list, calibration_shapes: str | dict | None = None
10631063
) -> list[str]:
1064-
"""Use symbolic shape inference to find MatMuls with dimension 1."""
1064+
"""Use shape inference to find MatMuls with dimension 1."""
10651065
# Prepare model for symbolic inference
10661066
for graph_input in model.graph.input:
10671067
for dim in graph_input.type.tensor_type.shape.dim:
@@ -1070,11 +1070,13 @@ def _exclude_matmuls_by_symbolic_inference(
10701070
dim.dim_value = 1
10711071

10721072
# Apply calibration shapes if provided
1073-
input_shapes = (
1074-
parse_shapes_spec(calibration_shapes)
1075-
if (calibration_shapes and isinstance(calibration_shapes, str))
1076-
else {}
1077-
)
1073+
input_shapes = {}
1074+
if calibration_shapes:
1075+
input_shapes = (
1076+
parse_shapes_spec(calibration_shapes)
1077+
if isinstance(calibration_shapes, str)
1078+
else calibration_shapes
1079+
)
10781080
for graph_input in model.graph.input:
10791081
if graph_input.name in input_shapes:
10801082
input_shape = input_shapes[graph_input.name]
@@ -1087,9 +1089,9 @@ def _exclude_matmuls_by_symbolic_inference(
10871089
for dim, new_dim_value in zip(tensor_shape, input_shape):
10881090
dim.dim_value = new_dim_value
10891091

1090-
model.graph.ClearField("value_info")
1091-
model = SymbolicShapeInference.infer_shapes(model)
1092+
model = infer_shapes(model)
10921093
value_info_map = {vi.name: vi for vi in model.graph.value_info}
1094+
value_info_map.update({vi.name: vi for vi in model.graph.output})
10931095

10941096
nodes_to_exclude = []
10951097
for matmul_node in matmul_nodes:

modelopt/onnx/quantization/quantize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ def quantize(
470470
calibration_eps,
471471
)
472472

473-
if not calibration_shapes:
473+
if calibrate_per_node and not calibration_shapes:
474474
calibration_shapes = get_input_shapes(onnx_path)
475475

476476
if quantize_mode in ["fp8", "int8"]:

modelopt/onnx/trt_utils.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
except ImportError:
3737
TRT_PYTHON_AVAILABLE = False
3838

39+
MAX_IR_VERSION = 10
40+
3941

4042
def get_custom_layers(
4143
onnx_path: str | onnx.ModelProto,
@@ -296,7 +298,8 @@ def load_onnx_model(
296298

297299
static_shaped_onnx_path = onnx_path.replace(".onnx", "_static.onnx")
298300
save_onnx(onnx_model, static_shaped_onnx_path, use_external_data_format)
299-
intermediate_generated_files.append(static_shaped_onnx_path) # type: ignore[union-attr]
301+
if intermediate_generated_files is not None:
302+
intermediate_generated_files.append(static_shaped_onnx_path)
300303

301304
if TRT_PYTHON_AVAILABLE and platform.system() != "Windows":
302305
# Check if there's a custom TensorRT op in the ONNX model. If so, make it ORT compatible by adding
@@ -318,11 +321,27 @@ def load_onnx_model(
318321
# Infer types and shapes in the graph for ORT compatibility
319322
onnx_model = infer_types_shapes_tensorrt(onnx_model, trt_plugins or [], all_tensor_info)
320323

324+
# Enforce IR version = 10
325+
ir_version_onnx_path = None
326+
if onnx_model.ir_version > MAX_IR_VERSION:
327+
onnx_model.ir_version = MAX_IR_VERSION
328+
ir_version_onnx_path = (
329+
static_shaped_onnx_path.replace(".onnx", f"_ir{MAX_IR_VERSION}.onnx")
330+
if static_shaped_onnx_path
331+
else onnx_path.replace(".onnx", f"_ir{MAX_IR_VERSION}.onnx")
332+
)
333+
save_onnx(onnx_model, ir_version_onnx_path, use_external_data_format)
334+
if intermediate_generated_files is not None:
335+
intermediate_generated_files.append(ir_version_onnx_path)
336+
337+
# Check that the model is valid
338+
onnx.checker.check_model(onnx_model)
339+
321340
return (
322341
onnx_model,
323342
has_custom_op,
324343
custom_ops,
325-
static_shaped_onnx_path or onnx_path,
344+
ir_version_onnx_path or static_shaped_onnx_path or onnx_path,
326345
use_external_data_format,
327346
)
328347

0 commit comments

Comments
 (0)