From ddfbfd2f348a01cfd411bf9f79e8476bb50f440c Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 14 Apr 2026 14:04:20 +0800 Subject: [PATCH 1/7] add LpNormalization --- .../normalization_input_generator.py | 48 +++++++++++-------- tests/unit/analyze/core/test_qdq.py | 1 + 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/winml/modelkit/pattern/op_input_gen/normalization_input_generator.py b/src/winml/modelkit/pattern/op_input_gen/normalization_input_generator.py index a14d1107e..72b3c4382 100644 --- a/src/winml/modelkit/pattern/op_input_gen/normalization_input_generator.py +++ b/src/winml/modelkit/pattern/op_input_gen/normalization_input_generator.py @@ -421,27 +421,33 @@ def get_qdq_config(self) -> dict[str, QDQParameterConfig] | None: # ============================================================================ # LpNormalization - NOT IMPLEMENTED in ONNXRuntime # ============================================================================ -# -# NOTE: LpNormalization(22) exists in the ONNX spec but is NOT IMPLEMENTED -# in ONNXRuntime as of the current version. The validation fails with: -# "NOT_IMPLEMENTED: Could not find an implementation for LpNormalization(22)" -# -# Uncomment and use the implementation below when runtime support is added: -# -# @register_runtime_checker_op -# class LpNormalizationInputGenerator(NormalizationInputGenerator): -# """Input generator for LpNormalization operator.""" -# op_name = "LpNormalization" -# def get_finite_attribute_sets(self) -> dict[str, list]: -# return {"p": [1, 2]} -# def get_input_and_infinite_attribute_combinations(self) -> list[dict[str, InputConstraint]]: -# combinations = [] -# for shape in self.get_common_data_shapes(): -# if len(shape) < 3: -# continue -# # TODO: add axis -# combinations.append({"input": InputShapeConstraint(shape)}) -# return combinations + + +@register_runtime_checker_op +class LpNormalizationInputGenerator(NormalizationInputGenerator): + """Input generator for LpNormalization operator.""" + + op_name = "LpNormalization" + + def get_finite_attribute_sets(self) -> dict[str, list]: + """Return finite attribute values for LpNormalization.""" + return {"p": [1, 2]} + + def get_input_and_infinite_attribute_combinations(self) -> list[dict[str, InputConstraint]]: + """Return input combinations for LpNormalization.""" + combinations = [] + for shape in self.get_common_data_shapes(): + if len(shape) < 3: + continue + # TODO: add axis + combinations.append({"input": InputShapeConstraint(shape)}) + return combinations + + def get_qdq_config(self) -> dict[str, QDQParameterConfig] | None: + """Return QDQ configuration for LpNormalization operator inputs.""" + return { + self.op_input_names[0]: QDQParameterConfig(support_activation=True), + } # ============================================================================ diff --git a/tests/unit/analyze/core/test_qdq.py b/tests/unit/analyze/core/test_qdq.py index 14f9e4152..6e53e4275 100644 --- a/tests/unit/analyze/core/test_qdq.py +++ b/tests/unit/analyze/core/test_qdq.py @@ -1106,6 +1106,7 @@ class TestIterQDQCombinations: ("GlobalAveragePool", 3 * 4), # 12 ("InstanceNormalization", 3 * 16), # 48 ("LayerNormalization", 5 * 2 * 2 * 16), # 320 + ("LpNormalization", 3 * 2 * 4), # 24: 3 shapes (>=3D) x 2 p-values x 4 act types ("MatMul", 36 * (16 * 2 - 4 + 4)), # 1152: +4/shape for B=INT4 ( "MaxPool", From 9da9c43608aa8487c013d41d320ffbe6d388d0a7 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 14 Apr 2026 14:30:09 +0800 Subject: [PATCH 2/7] add axis candidates --- .../pattern/op_input_gen/normalization_input_generator.py | 5 +++-- tests/unit/analyze/core/test_qdq.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/winml/modelkit/pattern/op_input_gen/normalization_input_generator.py b/src/winml/modelkit/pattern/op_input_gen/normalization_input_generator.py index 72b3c4382..53c0440da 100644 --- a/src/winml/modelkit/pattern/op_input_gen/normalization_input_generator.py +++ b/src/winml/modelkit/pattern/op_input_gen/normalization_input_generator.py @@ -439,8 +439,9 @@ def get_input_and_infinite_attribute_combinations(self) -> list[dict[str, InputC for shape in self.get_common_data_shapes(): if len(shape) < 3: continue - # TODO: add axis - combinations.append({"input": InputShapeConstraint(shape)}) + combinations.extend( + {"input": InputShapeConstraint(shape), "axis": axis} for axis in [0, 1, -1, 2] + ) return combinations def get_qdq_config(self) -> dict[str, QDQParameterConfig] | None: diff --git a/tests/unit/analyze/core/test_qdq.py b/tests/unit/analyze/core/test_qdq.py index 6e53e4275..fe68f2b19 100644 --- a/tests/unit/analyze/core/test_qdq.py +++ b/tests/unit/analyze/core/test_qdq.py @@ -1106,7 +1106,7 @@ class TestIterQDQCombinations: ("GlobalAveragePool", 3 * 4), # 12 ("InstanceNormalization", 3 * 16), # 48 ("LayerNormalization", 5 * 2 * 2 * 16), # 320 - ("LpNormalization", 3 * 2 * 4), # 24: 3 shapes (>=3D) x 2 p-values x 4 act types + ("LpNormalization", 3 * 2 * 4 * 4), # 96: 3 shapes (>=3D) x 2 p x 4 axis x 4 act types ("MatMul", 36 * (16 * 2 - 4 + 4)), # 1152: +4/shape for B=INT4 ( "MaxPool", From 9c263cda92d3df2431e7be8152b338379d6dc905 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 14 Apr 2026 16:18:01 +0800 Subject: [PATCH 3/7] add GatherBlockQuantized --- .../modelkit/pattern/op_input_gen/__init__.py | 1 + .../op_input_gen/indexing_input_generator.py | 155 ++++++++++++++++++ tests/unit/analyze/core/test_qdq.py | 27 +++ 3 files changed, 183 insertions(+) diff --git a/src/winml/modelkit/pattern/op_input_gen/__init__.py b/src/winml/modelkit/pattern/op_input_gen/__init__.py index 0ae982453..7de347d8d 100644 --- a/src/winml/modelkit/pattern/op_input_gen/__init__.py +++ b/src/winml/modelkit/pattern/op_input_gen/__init__.py @@ -10,6 +10,7 @@ from .flatten_input_generator import FlattenInputGenerator from .global_pooling_input_generator import * from .indexing_input_generator import ( + GatherBlockQuantizedInputGenerator, GatherInputGenerator, ScatterNDInputGenerator, SplitInputGenerator, diff --git a/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py b/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py index 045cf678d..f37fb268d 100644 --- a/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py +++ b/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py @@ -7,15 +7,18 @@ This module contains input generators for operators that perform indexing and shape manipulation operations: - Gather: Gathers entries along an axis using indices +- GatherBlockQuantized: Fused gather + block-wise dequantize (com.microsoft) - ScatterND: Scatters updates into a copy of data at specified indices - Unsqueeze: Inserts single-dimensional entries to shape - Split: Splits a tensor into multiple outputs """ +import math from typing import Any import numpy as np +from ...onnx import SupportedONNXType from .op_input_gen import ( InputConstraint, InputShapeConstraint, @@ -858,3 +861,155 @@ def get_qdq_config(self): "input": QDQParameterConfig(support_activation=True), "split": QDQParameterConfig(support_non_qdq=True), } + + +@register_runtime_checker_op +class GatherBlockQuantizedInputGenerator(OpInputGenerator): + """Input generator for com.microsoft::GatherBlockQuantized operator. + + GatherBlockQuantized is a fused gather + block-wise dequantize operator. + It gathers rows from a quantized weight tensor and dequantizes them on the fly. + + Inputs: + - data (T1): Block-wise quantized weight (INT4/UINT4/UINT8), always a constant initializer + - indices (Tind): Gather indices (INT32/INT64), the runtime input + - scales (T2): Dequantization scales (FLOAT/FLOAT16), always a constant initializer + - zero_points (T1, optional): Dequantization zero points, always a constant initializer + + Attributes: + - bits: 4 for INT4/UINT4 data, 8 for UINT8 data + - block_size: Quantization block size (power of 2, >= 16) + - gather_axis: Axis to gather on (UINT8 requires gather_axis=0) + - quantize_axis: Axis along which data was quantized (must differ from gather_axis) + + Output (T2): Dequantized gathered tensor. + + Since this op is already a fused dequantize+gather, it does not use external + QDQ wrapping (no get_qdq_config). The type combinations (T1/T2/Tind) and the + coupling between bits and T1 are enumerated explicitly in iter(). + + Coverage: + - T1: INT4 (bits=4), UINT4 (bits=4), UINT8 (bits=8) + - T2: FLOAT, FLOAT16 + - Tind: INT32, INT64 + - block_size: 16, 32 + - gather_axis: 0, 1 for INT4/UINT4; 0 only for UINT8 (spec constraint) + - zero_points: present / absent (doubles the count) + + Count: 2 INT4 gather_axes x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 32 + + 2 UINT4 gather_axes x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 32 + + 1 UINT8 gather_axis x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 16 + = 80 + """ + + op_name = "GatherBlockQuantized" + expand_optionals = False # zero_points presence is enumerated explicitly in iter() + + def get_finite_attribute_sets(self) -> dict[str, list]: + """Not used: attribute enumeration is handled in iter() to couple bits with T1 type.""" + return {} + + def get_input_and_infinite_attribute_combinations(self) -> list[dict]: + """Not used: combinations are enumerated directly in iter().""" + return [] + + def _iter_constant_combinations(self, kwargs: dict) -> Any: + """Yield one constant map: data/scales/zero_points are weights; indices is runtime.""" + is_constant_map = { + k: k != "indices" for k, v in kwargs.items() if self._is_input_key(k) and v is not None + } + yield is_constant_map + + def iter(self) -> Any: + """Enumerate all valid (T1, bits, T2, Tind, shape, axis, block_size, zp) combos.""" + import ml_dtypes + + # One representative 2-D embedding-style data shape + data_shape = (32, 64) + + block_sizes = [16, 32] + t2_types = [ + (SupportedONNXType.FLOAT.np_type, SupportedONNXType.FLOAT.annotation), + (SupportedONNXType.FLOAT16.np_type, SupportedONNXType.FLOAT16.annotation), + ] + tind_types = [ + (np.int32, SupportedONNXType.INT32.annotation), + (np.int64, SupportedONNXType.INT64.annotation), + ] + # (np_dtype, annotation, bits, valid_gather_axes) + t1_configs = [ + (np.dtype(ml_dtypes.int4), SupportedONNXType.INT4.annotation, 4, [0, 1]), + (np.dtype(ml_dtypes.uint4), SupportedONNXType.UINT4.annotation, 4, [0, 1]), + (np.dtype(np.uint8), SupportedONNXType.UINT8.annotation, 8, [0]), + ] + rng = np.random.default_rng(42) + indices_shape = (2, 4) + + for t1_dtype, t1_annotation, bits, gather_axes in t1_configs: + for gather_axis in gather_axes: + quantize_axis = 1 - gather_axis # 2-D: the other axis + for block_size in block_sizes: + sc_dims: list[int] = list(data_shape) + sc_dims[quantize_axis] = math.ceil(sc_dims[quantize_axis] / block_size) + sc_shape = tuple(sc_dims) + axis_size = data_shape[gather_axis] + + for t2_dtype, t2_annotation in t2_types: + for tind_dtype, tind_annotation in tind_types: + for zero_points_present in [False, True]: + data_val = rng.integers( + 0, 7, size=data_shape, dtype=np.int8 + ).astype(t1_dtype) + scales_val = rng.random(sc_shape).astype(t2_dtype) + indices_val = rng.integers( + 0, axis_size, size=indices_shape, dtype=tind_dtype + ) + + kwargs: dict[str, Any] = { + "data": data_val, + "indices": indices_val, + "scales": scales_val, + "bits": bits, + "block_size": block_size, + "gather_axis": gather_axis, + "quantize_axis": quantize_axis, + } + if zero_points_present: + kwargs["zero_points"] = np.zeros(sc_shape, dtype=t1_dtype) + + type_vars = { + f"T1_{self.op_name}": t1_annotation, + f"Tind_{self.op_name}": tind_annotation, + f"T2_{self.op_name}": t2_annotation, + } + attrs = { + k: v for k, v in kwargs.items() if k in self.op_attribute_names + } + input_constraints = { + k: {"type": "shape", "shape": list(v.shape)} + for k, v in kwargs.items() + if self._is_input_key(k) and v is not None + } + tags = { + self.type_vars_key: type_vars, + "input_constraints": input_constraints, + "attrs": attrs, + } + yield self.filter_kwargs_by_opset(kwargs), tags + + def derive_properties(self, properties: dict) -> dict: + """Derive filter properties from node inputs and attributes.""" + item = properties.copy() + item["data_dim"] = len(item.get("data_shape", ())) + item["indices_dim"] = len(item.get("indices_shape", ())) + return item + + def get_infinite_property_names(self) -> list[str]: + """Return names of properties with infinite possible values.""" + return [ + "data_shape", + "indices_shape", + "attr_gather_axis", + "attr_quantize_axis", + "attr_block_size", + ] diff --git a/tests/unit/analyze/core/test_qdq.py b/tests/unit/analyze/core/test_qdq.py index fe68f2b19..eeae70266 100644 --- a/tests/unit/analyze/core/test_qdq.py +++ b/tests/unit/analyze/core/test_qdq.py @@ -1170,3 +1170,30 @@ def test_qdq_total_count(self, op_name: str, expected_count: int) -> None: # For rerun, could track in https://github.com/gim-home/ModelKit/issues/278 assert count == expected_count, "If changes, either bug or need to rerun" + + +class TestIterCOMSOpsModels: + """Tests for com.microsoft domain ops that use explicit type enumeration (no QDQ wrapping).""" + + @pytest.mark.parametrize( + "op_name,expected_count", + [ + # 2 INT4 gather_axes x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 32 + # 2 UINT4 gather_axes x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 32 + # 1 UINT8 gather_axis x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 16 + ("GatherBlockQuantized", 80), + ], + ) + def test_com_microsoft_op_model_count(self, op_name: str, expected_count: int) -> None: + """Test total model count for com.microsoft ops (no QDQ wrapping).""" + from winml.modelkit.pattern.op_input_gen import get_runtime_checker_op + + schema = ONNXDomain.COM_MICROSOFT.get_op_schema(op_name, 1) + generator = get_runtime_checker_op(op_name)(schema) # no qdq_generator + + count = 0 + for kwargs, tags in generator.iter(): + for _model, _final_tags in generator.iter_const_and_dynamic_models(kwargs, tags): + count += 1 + + assert count == expected_count, "If count changes, update both code and this comment" From a1de477a4c8b6b32018e724539de012b9ea269ea Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 14 Apr 2026 16:34:12 +0800 Subject: [PATCH 4/7] skip --- .../pattern/op_input_gen/indexing_input_generator.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py b/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py index f37fb268d..6ed05f92d 100644 --- a/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py +++ b/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py @@ -997,6 +997,17 @@ def iter(self) -> Any: } yield self.filter_kwargs_by_opset(kwargs), tags + def _run_op_on_cpu(self, kwargs: dict, tags: dict) -> Any: + """Skip CPU validation for GatherBlockQuantized. + + This op is a com.microsoft fused op not supported by the CPU EP. + The quantized data inputs (INT4/UINT4/UINT8) are constant initializers + and cannot be fed as runtime inputs; the base class builds an all-dynamic + model for CPU validation, which would fail on sub-byte dtypes. + Our combinations are valid by construction, so CPU pre-validation is not needed. + """ + return [] + def derive_properties(self, properties: dict) -> dict: """Derive filter properties from node inputs and attributes.""" item = properties.copy() From 6c05e0e5391c79cc672c0fa38056b40c5de61ee5 Mon Sep 17 00:00:00 2001 From: hualxie Date: Wed, 15 Apr 2026 10:33:24 +0800 Subject: [PATCH 5/7] add qdq --- .../runtime_checker/result_processor.py | 3 +- .../op_input_gen/indexing_input_generator.py | 34 ++++++++++++++----- tests/unit/analyze/core/test_qdq.py | 19 ++++++----- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/src/winml/modelkit/analyze/runtime_checker/result_processor.py b/src/winml/modelkit/analyze/runtime_checker/result_processor.py index 07e7f10a5..d6eab9645 100644 --- a/src/winml/modelkit/analyze/runtime_checker/result_processor.py +++ b/src/winml/modelkit/analyze/runtime_checker/result_processor.py @@ -611,7 +611,7 @@ def get_opset_version_range(op_name: str, start_opset_version: int, op_domain: s target_domain = "" if args.opset_domain == "ai.onnx" else args.opset_domain domain_str_for_filename = args.opset_domain # Keep original for filename matching - json_files = list(input_dir.rglob("*.json")) + json_files = list(input_dir.glob("*.json")) if not json_files: print(f"No JSON files found in {input_dir}") @@ -694,7 +694,6 @@ def get_opset_version_range(op_name: str, start_opset_version: int, op_domain: s f"_opset{since_version}{qdq_suffix}.json" ) json_file = input_dir / expected_filename - print(f"Processing {expected_filename}...", end=" ") if not json_file.exists(): diff --git a/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py b/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py index 6ed05f92d..89335e62a 100644 --- a/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py +++ b/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py @@ -884,11 +884,12 @@ class GatherBlockQuantizedInputGenerator(OpInputGenerator): Output (T2): Dequantized gathered tensor. - Since this op is already a fused dequantize+gather, it does not use external - QDQ wrapping (no get_qdq_config). The type combinations (T1/T2/Tind) and the - coupling between bits and T1 are enumerated explicitly in iter(). + The op's inputs (INT4/UINT4/UINT8 data, indices, scales, optional zero_points) are + not wrapped by external DQ nodes — they are already quantized. The float output can + be followed by a QuantizeLinear node, so get_qdq_config() marks the output as + support_activation=True and all inputs as support_non_qdq (pass-through). - Coverage: + Coverage (base models, no QDQ): - T1: INT4 (bits=4), UINT4 (bits=4), UINT8 (bits=8) - T2: FLOAT, FLOAT16 - Tind: INT32, INT64 @@ -896,10 +897,12 @@ class GatherBlockQuantizedInputGenerator(OpInputGenerator): - gather_axis: 0, 1 for INT4/UINT4; 0 only for UINT8 (spec constraint) - zero_points: present / absent (doubles the count) - Count: 2 INT4 gather_axes x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 32 - + 2 UINT4 gather_axes x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 32 - + 1 UINT8 gather_axis x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 16 - = 80 + Base count: 2 INT4 gather_axes x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 32 + + 2 UINT4 gather_axes x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 32 + + 1 UINT8 gather_axis x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 16 + = 80 + + QDQ models (output wrapped by Q): 80 base x 4 activation types = 320 """ op_name = "GatherBlockQuantized" @@ -913,6 +916,21 @@ def get_input_and_infinite_attribute_combinations(self) -> list[dict]: """Not used: combinations are enumerated directly in iter().""" return [] + def get_qdq_config(self) -> dict[str, QDQParameterConfig]: + """Return QDQ config: output wrappable by Q; all inputs are pass-through. + + GatherBlockQuantized inputs are already quantized (INT4/UINT4/UINT8) and + must not be wrapped by DQ nodes. Only the float output can be followed by + a QuantizeLinear node (support_activation). + """ + return { + "data": QDQParameterConfig(support_non_qdq=True), + "indices": QDQParameterConfig(support_non_qdq=True), + "scales": QDQParameterConfig(support_non_qdq=True), + "zero_points": QDQParameterConfig(support_non_qdq=True), + "output": QDQParameterConfig(support_activation=True), + } + def _iter_constant_combinations(self, kwargs: dict) -> Any: """Yield one constant map: data/scales/zero_points are weights; indices is runtime.""" is_constant_map = { diff --git a/tests/unit/analyze/core/test_qdq.py b/tests/unit/analyze/core/test_qdq.py index eeae70266..cc958d852 100644 --- a/tests/unit/analyze/core/test_qdq.py +++ b/tests/unit/analyze/core/test_qdq.py @@ -1172,24 +1172,25 @@ def test_qdq_total_count(self, op_name: str, expected_count: int) -> None: assert count == expected_count, "If changes, either bug or need to rerun" -class TestIterCOMSOpsModels: - """Tests for com.microsoft domain ops that use explicit type enumeration (no QDQ wrapping).""" +class TestIterMSQDQCombinations: + """Tests for com.microsoft domain ops.""" @pytest.mark.parametrize( "op_name,expected_count", [ - # 2 INT4 gather_axes x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 32 - # 2 UINT4 gather_axes x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 32 - # 1 UINT8 gather_axis x 2 block_sizes x 2 T2 x 2 Tind x 2 zp = 16 - ("GatherBlockQuantized", 80), + # Only T2=FLOAT combos produce QDQ output models (T2=FLOAT16 fails Q input type check). + # 40 FLOAT base combos x 4 activation output types (INT8/UINT8/INT16/UINT16) = 160 + ("GatherBlockQuantized", 160), ], ) - def test_com_microsoft_op_model_count(self, op_name: str, expected_count: int) -> None: - """Test total model count for com.microsoft ops (no QDQ wrapping).""" + def test_com_microsoft_op_qdq_model_count(self, op_name: str, expected_count: int) -> None: + """Test QDQ model count for com.microsoft ops.""" from winml.modelkit.pattern.op_input_gen import get_runtime_checker_op + from winml.modelkit.pattern.op_input_gen.qdq_gen import QDQGenerator schema = ONNXDomain.COM_MICROSOFT.get_op_schema(op_name, 1) - generator = get_runtime_checker_op(op_name)(schema) # no qdq_generator + qdq_gen = QDQGenerator(opset_version=1, domain=ONNXDomain.COM_MICROSOFT) + generator = get_runtime_checker_op(op_name)(schema, qdq_generator=qdq_gen) count = 0 for kwargs, tags in generator.iter(): From 2f5b5bfd45696c8d191b2fbb2fdd070fbabcc7e8 Mon Sep 17 00:00:00 2001 From: hualxie Date: Wed, 15 Apr 2026 10:34:19 +0800 Subject: [PATCH 6/7] update comment --- tests/unit/analyze/core/test_qdq.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/unit/analyze/core/test_qdq.py b/tests/unit/analyze/core/test_qdq.py index cc958d852..ccc332886 100644 --- a/tests/unit/analyze/core/test_qdq.py +++ b/tests/unit/analyze/core/test_qdq.py @@ -1179,7 +1179,12 @@ class TestIterMSQDQCombinations: "op_name,expected_count", [ # Only T2=FLOAT combos produce QDQ output models (T2=FLOAT16 fails Q input type check). - # 40 FLOAT base combos x 4 activation output types (INT8/UINT8/INT16/UINT16) = 160 + # FLOAT base combos: + # 2 INT4 gather_axes x 2 block_sizes x 2 Tind x 2 zp = 16 + # 2 UINT4 gather_axes x 2 block_sizes x 2 Tind x 2 zp = 16 + # 1 UINT8 gather_axis x 2 block_sizes x 2 Tind x 2 zp = 8 + # total = 40 + # x 4 activation output types (INT8/UINT8/INT16/UINT16) = 160 ("GatherBlockQuantized", 160), ], ) From 6033edc64a3458d36e56a38121a23f604f61219c Mon Sep 17 00:00:00 2001 From: hualxie Date: Wed, 15 Apr 2026 11:03:08 +0800 Subject: [PATCH 7/7] remove bits --- .../pattern/op_input_gen/indexing_input_generator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py b/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py index 89335e62a..ba87ea522 100644 --- a/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py +++ b/src/winml/modelkit/pattern/op_input_gen/indexing_input_generator.py @@ -1041,4 +1041,9 @@ def get_infinite_property_names(self) -> list[str]: "attr_gather_axis", "attr_quantize_axis", "attr_block_size", + # attr_bits is redundant with T1 type (INT4/UINT4 → 4, UINT8 → 8); + # some models omit the bits attribute entirely (attr_bits_is_none=True), + # so exclude both from table matching to avoid false gaps. + "attr_bits", + "attr_bits_is_none", ]