From ffa80c1f13a1b25a9819bffae3fcc62de09f9573 Mon Sep 17 00:00:00 2001 From: hualxie Date: Wed, 1 Apr 2026 14:32:48 +0800 Subject: [PATCH 1/2] bug fix --- src/winml/modelkit/commands/compile.py | 12 ++-- src/winml/modelkit/commands/perf.py | 16 +++-- src/winml/modelkit/eval/evaluate.py | 11 ++++ src/winml/modelkit/export/htp/exporter.py | 12 ++-- .../commands/test_compile_quantize_flags.py | 34 ++++++++++ tests/unit/commands/test_perf_cli.py | 23 +++++++ tests/unit/eval/test_eval.py | 32 +++++++++ tests/unit/export/test_htp_exporter_stats.py | 66 +++++++++++++++++++ 8 files changed, 185 insertions(+), 21 deletions(-) create mode 100644 tests/unit/export/test_htp_exporter_stats.py diff --git a/src/winml/modelkit/commands/compile.py b/src/winml/modelkit/commands/compile.py index a21b9ed77..d7ba4788b 100644 --- a/src/winml/modelkit/commands/compile.py +++ b/src/winml/modelkit/commands/compile.py @@ -25,7 +25,7 @@ import click from rich.console import Console -from ..config.precision import _DEVICE_TO_PROVIDER, VALID_EPS +from ..config.precision import _DEVICE_TO_PROVIDER, _EP_TO_DEVICE, VALID_EPS from ..utils.logging import configure_logging @@ -184,7 +184,7 @@ def compile( # Show info console.print(f"[bold blue]Input:[/bold blue] {model}") - console.print(f"[bold blue]Device:[/bold blue] {device}") + console.print(f"[bold blue]Device:[/bold blue] {_EP_TO_DEVICE.get(provider, device)}") if ep: console.print(f"[bold blue]EP:[/bold blue] {ep}") console.print(f"[bold blue]Provider:[/bold blue] {provider}") @@ -203,13 +203,9 @@ def compile( if result.output_path: console.print(f"[dim]Output: {result.output_path}[/dim]") if result.compile_time: - console.print( - f"[dim]Compile time: {result.compile_time:.2f}s[/dim]" - ) + console.print(f"[dim]Compile time: {result.compile_time:.2f}s[/dim]") if result.total_time: - console.print( - f"[dim]Total time: {result.total_time:.2f}s[/dim]" - ) + console.print(f"[dim]Total time: {result.total_time:.2f}s[/dim]") else: console.print("\n[bold red]Compilation failed:[/bold red]") for error in result.errors: diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py index e1b0ecadb..068f4e9bf 100644 --- a/src/winml/modelkit/commands/perf.py +++ b/src/winml/modelkit/commands/perf.py @@ -1121,6 +1121,15 @@ def perf( console.print(f"[dim]Precision: {precision} (applied during model build)[/dim]") console.print(f"[dim]Loading model:[/dim] {hf_model}") + # Op-tracing pre-flight: fail fast before running any benchmark iterations + if op_tracing: + from ..optracing import is_qnn_profiling_available + + if not is_qnn_profiling_available(): + console.print("[red]Error:[/red] Op-tracing requires onnxruntime-qnn") + console.print("Install with: [bold]pip install onnxruntime-qnn[/bold]") + raise SystemExit(1) + benchmark = PerfBenchmark(config) result = benchmark.run() @@ -1135,13 +1144,6 @@ def perf( # Op-tracing (additive to existing benchmark) # ================================================================= if op_tracing: - from ..optracing import is_qnn_profiling_available - - if not is_qnn_profiling_available(): - console.print("[red]Error:[/red] Op-tracing requires onnxruntime-qnn") - console.print("Install with: [bold]pip install onnxruntime-qnn[/bold]") - raise SystemExit(1) - from ..optracing.registry import get_tracer from ..optracing.report import ( display_op_trace_report, diff --git a/src/winml/modelkit/eval/evaluate.py b/src/winml/modelkit/eval/evaluate.py index a46d21766..a0bfbb391 100644 --- a/src/winml/modelkit/eval/evaluate.py +++ b/src/winml/modelkit/eval/evaluate.py @@ -152,7 +152,18 @@ def evaluate(config: WinMLEvaluationConfig) -> EvalResult: raise ValueError( f"No dataset provided and no default for task '{config.task}'. Use --dataset." ) + user_dataset = config.dataset config.dataset = deepcopy(default) + # Preserve user-specified values that differ from DatasetConfig class defaults + _cls_default = DatasetConfig() + if user_dataset.samples != _cls_default.samples: + config.dataset.samples = user_dataset.samples + if user_dataset.split != _cls_default.split: + config.dataset.split = user_dataset.split + if user_dataset.shuffle != _cls_default.shuffle: + config.dataset.shuffle = user_dataset.shuffle + if user_dataset.seed != _cls_default.seed: + config.dataset.seed = user_dataset.seed logger.info( "Using default dataset for %s: %s", config.task, diff --git a/src/winml/modelkit/export/htp/exporter.py b/src/winml/modelkit/export/htp/exporter.py index f6466be07..f56371772 100644 --- a/src/winml/modelkit/export/htp/exporter.py +++ b/src/winml/modelkit/export/htp/exporter.py @@ -296,7 +296,8 @@ def export( self._export_stats["export_time"] = export_time self._export_stats["hierarchy_modules"] = len(self._hierarchy_data) self._export_stats["onnx_nodes"] = len(onnx_model.graph.node) - self._export_stats["tagged_nodes"] = len(self._tagged_nodes) + embedded_count = len(self._tagged_nodes) if self.embed_hierarchy_attributes else 0 + self._export_stats["tagged_nodes"] = embedded_count # Calculate empty tags (should be 0 with our implementation) empty_tag_count = sum( @@ -306,8 +307,7 @@ def export( # Calculate coverage percentage total_nodes = len(onnx_model.graph.node) - tagged_nodes = len(self._tagged_nodes) - coverage = (tagged_nodes / total_nodes * 100.0) if total_nodes > 0 else 0.0 + coverage = (embedded_count / total_nodes * 100.0) if total_nodes > 0 else 0.0 self._export_stats["coverage_percentage"] = coverage # Update monitor with actual export time @@ -511,7 +511,8 @@ def _apply_hierarchy_tags(self, onnx_model: onnx.ModelProto) -> None: # Update export stats self._export_stats["onnx_nodes"] = len(onnx_model.graph.node) - self._export_stats["tagged_nodes"] = len(self._tagged_nodes) + embedded_count = len(self._tagged_nodes) if self.embed_hierarchy_attributes else 0 + self._export_stats["tagged_nodes"] = embedded_count # Calculate empty tags (should be 0 with our implementation) empty_tag_count = sum( @@ -521,8 +522,7 @@ def _apply_hierarchy_tags(self, onnx_model: onnx.ModelProto) -> None: # Calculate coverage percentage total_nodes = len(onnx_model.graph.node) - tagged_nodes = len(self._tagged_nodes) - coverage = (tagged_nodes / total_nodes * 100.0) if total_nodes > 0 else 0.0 + coverage = (embedded_count / total_nodes * 100.0) if total_nodes > 0 else 0.0 self._export_stats["coverage_percentage"] = coverage def _embed_graph_metadata( diff --git a/tests/unit/commands/test_compile_quantize_flags.py b/tests/unit/commands/test_compile_quantize_flags.py index 2c8b43ce5..7cba77d87 100644 --- a/tests/unit/commands/test_compile_quantize_flags.py +++ b/tests/unit/commands/test_compile_quantize_flags.py @@ -6,6 +6,8 @@ from __future__ import annotations +from unittest.mock import MagicMock, patch + import pytest from winml.modelkit.commands.compile import _resolve_compile_provider @@ -119,3 +121,35 @@ def test_unknown_precision_uses_defaults(self): w, a = _resolve_quant_types("fp16", None, None) assert w == "uint8" assert a == "uint8" + + +# ============================================================================= +# BUG C: compile summary shows wrong Device when --ep overrides device +# ============================================================================= + + +class TestCompileDeviceDisplayLabel: + """Bug C: Device label in compile summary must reflect the resolved EP, not the CLI default.""" + + def test_dml_ep_shows_gpu_device(self, tmp_path): + from click.testing import CliRunner + + from winml.modelkit.commands.compile import compile + + model_file = tmp_path / "model.onnx" + model_file.write_bytes(b"fake") + + mock_result = MagicMock() + mock_result.success = True + mock_result.output_path = None + mock_result.compile_time = None + mock_result.total_time = None + + with ( + patch("winml.modelkit.compiler.compile_onnx", return_value=mock_result), + patch("winml.modelkit.compiler.WinMLCompileConfig"), + ): + result = CliRunner().invoke(compile, ["-m", str(model_file), "--ep", "dml"]) + + assert "Device: gpu" in result.output + assert "Device: npu" not in result.output diff --git a/tests/unit/commands/test_perf_cli.py b/tests/unit/commands/test_perf_cli.py index 7cc2d4335..a61a91f21 100644 --- a/tests/unit/commands/test_perf_cli.py +++ b/tests/unit/commands/test_perf_cli.py @@ -329,3 +329,26 @@ def test_onnx_load_model_passes_ep(self, tmp_path: Path) -> None: benchmark._load_model() assert mock_from_onnx.call_args.kwargs["ep"] == "qnn" + + +# ============================================================================= +# BUG A: op-tracing preflight check +# ============================================================================= + + +class TestOpTracingPreflight: + """Bug A: is_qnn_profiling_available must be checked before the benchmark runs.""" + + def test_benchmark_does_not_run_when_qnn_unavailable(self, runner: CliRunner) -> None: + with ( + patch("winml.modelkit.optracing.is_qnn_profiling_available", return_value=False), + patch("winml.modelkit.commands.perf.PerfBenchmark") as mock_cls, + ): + result = runner.invoke( + perf, + ["-m", "microsoft/resnet-50", "--op-tracing", "basic"], + obj={}, + ) + + assert result.exit_code != 0 + mock_cls.return_value.run.assert_not_called() diff --git a/tests/unit/eval/test_eval.py b/tests/unit/eval/test_eval.py index c69a79777..5e208c1d2 100644 --- a/tests/unit/eval/test_eval.py +++ b/tests/unit/eval/test_eval.py @@ -854,3 +854,35 @@ def test_load_model_from_onnx(self): mock_auto.from_onnx.assert_called_once() assert result.config is mock_hf_config + + +# ============================================================================= +# BUG B: --samples silently ignored when no --dataset +# ============================================================================= + + +class TestEvaluateSamplesPreserved: + """Bug B: user --samples must survive the default-dataset merge.""" + + def test_user_samples_preserved_when_no_dataset_path(self) -> None: + import importlib + import sys + + eval_mod = sys.modules.get("winml.modelkit.eval.evaluate") or importlib.import_module( + "winml.modelkit.eval.evaluate" + ) + + config = WinMLEvaluationConfig( + model_id="test/model", + task="image-classification", + dataset=DatasetConfig(samples=20), + ) + + with ( + patch.object(eval_mod, "_load_model"), + patch.object(eval_mod, "WinMLEvaluator") as mock_cls, + ): + mock_cls.return_value.compute.return_value = {} + result = eval_mod.evaluate(config) + + assert result.config.dataset.samples == 20 diff --git a/tests/unit/export/test_htp_exporter_stats.py b/tests/unit/export/test_htp_exporter_stats.py new file mode 100644 index 000000000..827d6ab01 --- /dev/null +++ b/tests/unit/export/test_htp_exporter_stats.py @@ -0,0 +1,66 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for HTPExporter export statistics correctness.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +from winml.modelkit.export.htp.exporter import HTPExporter + + +class TestHTPExporterTaggedNodesStats: + """Bug D: tagged_nodes and coverage must be 0 when embed_hierarchy_attributes=False.""" + + def test_tagged_nodes_zero_when_hierarchy_disabled(self) -> None: + exporter = HTPExporter(embed_hierarchy_attributes=False) + exporter._node_tagger = MagicMock() + exporter._node_tagger.tag_all_nodes.return_value = { + "node1": "/Model/Layer1", + "node2": "/Model/Layer2", + "node3": "/Model/Layer3", + } + exporter._node_tagger.get_tagging_statistics.return_value = {} + + mock_model = MagicMock() + mock_model.graph.node = [MagicMock() for _ in range(5)] + + exporter._apply_hierarchy_tags(mock_model) + + assert exporter._export_stats["tagged_nodes"] == 0 + + def test_coverage_zero_when_hierarchy_disabled(self) -> None: + exporter = HTPExporter(embed_hierarchy_attributes=False) + exporter._node_tagger = MagicMock() + exporter._node_tagger.tag_all_nodes.return_value = { + "n1": "/t1", + "n2": "/t2", + } + exporter._node_tagger.get_tagging_statistics.return_value = {} + + mock_model = MagicMock() + mock_model.graph.node = [MagicMock() for _ in range(4)] + + exporter._apply_hierarchy_tags(mock_model) + + assert exporter._export_stats["coverage_percentage"] == 0.0 + + def test_tagged_nodes_nonzero_when_hierarchy_enabled(self) -> None: + """Control: stats are populated normally when embedding is enabled.""" + exporter = HTPExporter(embed_hierarchy_attributes=True) + exporter._node_tagger = MagicMock() + exporter._node_tagger.tag_all_nodes.return_value = { + "n1": "/t1", + "n2": "/t2", + } + exporter._node_tagger.get_tagging_statistics.return_value = {} + + mock_model = MagicMock() + mock_model.graph.node = [MagicMock() for _ in range(4)] + + exporter._apply_hierarchy_tags(mock_model) + + assert exporter._export_stats["tagged_nodes"] == 2 + assert exporter._export_stats["coverage_percentage"] == 50.0 From 847bccedbe3dbdd73099bea494aaa99cb6947516 Mon Sep 17 00:00:00 2001 From: hualxie Date: Wed, 1 Apr 2026 15:24:27 +0800 Subject: [PATCH 2/2] fix: address PR review comments on bugs A-D and eval/dataset refactor - Replace fragile class-default comparison in evaluate.py with explicit_fields sentinel tracking; CLI --samples/--split/--shuffle now use None defaults - Add DatasetConfig.explicit_fields frozenset to track caller-set fields - Deduplicate tagged_nodes/coverage stats via _update_tag_stats() helper in HTPExporter; export() stats block now calls the shared helper - Fix test imports: HTPExporter from package level, DatasetConfig with explicit_fields in Bug B regression test --- src/winml/modelkit/commands/eval.py | 36 ++++++++++++-------- src/winml/modelkit/datasets/config.py | 3 ++ src/winml/modelkit/eval/evaluate.py | 14 +++----- src/winml/modelkit/export/htp/exporter.py | 33 ++++++++++-------- tests/unit/eval/test_eval.py | 2 +- tests/unit/export/test_htp_exporter_stats.py | 2 +- 6 files changed, 48 insertions(+), 42 deletions(-) diff --git a/src/winml/modelkit/commands/eval.py b/src/winml/modelkit/commands/eval.py index 9b9278f2d..e1791356a 100644 --- a/src/winml/modelkit/commands/eval.py +++ b/src/winml/modelkit/commands/eval.py @@ -61,22 +61,19 @@ @click.option( "--samples", type=int, - default=100, - show_default=True, - help="Number of dataset samples.", + default=None, + help="Number of dataset samples (default: from dataset config).", ) @click.option( "--split", type=str, - default="validation", - show_default=True, - help="Dataset split.", + default=None, + help="Dataset split (default: from dataset config).", ) @click.option( "--shuffle/--no-shuffle", - default=True, - show_default=True, - help="Shuffle dataset before sampling.", + default=None, + help="Shuffle dataset before sampling (default: from dataset config).", ) @click.option( "--streaming", @@ -125,9 +122,9 @@ def eval( dataset_name: str | None, task: str | None, device: str, - samples: int, - split: str, - shuffle: bool, + samples: int | None, + split: str | None, + shuffle: bool | None, streaming: bool, column: tuple[str, ...], label_mapping: Path | None, @@ -213,15 +210,24 @@ def eval( from ..datasets.config import DatasetConfig from ..eval import WinMLEvaluationConfig, evaluate + explicit: set[str] = set() + if samples is not None: + explicit.add("samples") + if split is not None: + explicit.add("split") + if shuffle is not None: + explicit.add("shuffle") + ds_config = DatasetConfig( path=dataset_path, name=dataset_name, - split=split, - samples=samples, - shuffle=shuffle, + split=split if split is not None else "validation", + samples=samples if samples is not None else 100, + shuffle=shuffle if shuffle is not None else True, columns_mapping=columns_mapping, label_mapping=parsed_label_mapping, streaming=streaming, + explicit_fields=frozenset(explicit), ) config = WinMLEvaluationConfig( diff --git a/src/winml/modelkit/datasets/config.py b/src/winml/modelkit/datasets/config.py index a431eef82..f69f5ff60 100644 --- a/src/winml/modelkit/datasets/config.py +++ b/src/winml/modelkit/datasets/config.py @@ -36,6 +36,9 @@ class DatasetConfig: columns_mapping: dict[str, str] = field(default_factory=dict) label_mapping: dict[str, int] | None = None streaming: bool = False + # Tracks which fields were explicitly set by the caller (e.g. CLI). + # Not serialized; used by evaluate.py to merge user overrides onto defaults. + explicit_fields: frozenset[str] = field(default_factory=frozenset, repr=False, compare=False) def to_dict(self) -> dict[str, Any]: """Convert to dictionary for serialization.""" diff --git a/src/winml/modelkit/eval/evaluate.py b/src/winml/modelkit/eval/evaluate.py index a0bfbb391..ae3351cd6 100644 --- a/src/winml/modelkit/eval/evaluate.py +++ b/src/winml/modelkit/eval/evaluate.py @@ -154,16 +154,10 @@ def evaluate(config: WinMLEvaluationConfig) -> EvalResult: ) user_dataset = config.dataset config.dataset = deepcopy(default) - # Preserve user-specified values that differ from DatasetConfig class defaults - _cls_default = DatasetConfig() - if user_dataset.samples != _cls_default.samples: - config.dataset.samples = user_dataset.samples - if user_dataset.split != _cls_default.split: - config.dataset.split = user_dataset.split - if user_dataset.shuffle != _cls_default.shuffle: - config.dataset.shuffle = user_dataset.shuffle - if user_dataset.seed != _cls_default.seed: - config.dataset.seed = user_dataset.seed + # Apply fields the caller explicitly set (tracked via explicit_fields sentinel). + for f in ("samples", "split", "shuffle", "seed"): + if f in user_dataset.explicit_fields: + setattr(config.dataset, f, getattr(user_dataset, f)) logger.info( "Using default dataset for %s: %s", config.task, diff --git a/src/winml/modelkit/export/htp/exporter.py b/src/winml/modelkit/export/htp/exporter.py index f56371772..055781f8d 100644 --- a/src/winml/modelkit/export/htp/exporter.py +++ b/src/winml/modelkit/export/htp/exporter.py @@ -295,9 +295,8 @@ def export( export_time = time.time() - start_time self._export_stats["export_time"] = export_time self._export_stats["hierarchy_modules"] = len(self._hierarchy_data) - self._export_stats["onnx_nodes"] = len(onnx_model.graph.node) - embedded_count = len(self._tagged_nodes) if self.embed_hierarchy_attributes else 0 - self._export_stats["tagged_nodes"] = embedded_count + total_nodes = len(onnx_model.graph.node) + self._export_stats["onnx_nodes"] = total_nodes # Calculate empty tags (should be 0 with our implementation) empty_tag_count = sum( @@ -305,10 +304,7 @@ def export( ) self._export_stats["empty_tags"] = empty_tag_count - # Calculate coverage percentage - total_nodes = len(onnx_model.graph.node) - coverage = (embedded_count / total_nodes * 100.0) if total_nodes > 0 else 0.0 - self._export_stats["coverage_percentage"] = coverage + self._update_tag_stats(total_nodes) # Update monitor with actual export time monitor.data.export_time = export_time @@ -493,6 +489,18 @@ def _get_optimum_patcher(model: nn.Module, task: str | None) -> Any: ) return contextlib.nullcontext() + def _update_tag_stats(self, total_nodes: int) -> None: + """Update tagged_nodes and coverage_percentage in export stats. + + Centralises the embed-aware calculation so _apply_hierarchy_tags and + the final stats block in export() always stay in sync. + """ + embedded_count = len(self._tagged_nodes) if self.embed_hierarchy_attributes else 0 + self._export_stats["tagged_nodes"] = embedded_count + self._export_stats["coverage_percentage"] = ( + embedded_count / total_nodes * 100.0 if total_nodes > 0 else 0.0 + ) + def _initialize_node_tagger(self, enable_operation_fallback: bool) -> None: """Create node tagger internally.""" self._node_tagger = create_node_tagger_from_hierarchy( @@ -510,9 +518,9 @@ def _apply_hierarchy_tags(self, onnx_model: onnx.ModelProto) -> None: self._tagging_stats = stats # Update export stats - self._export_stats["onnx_nodes"] = len(onnx_model.graph.node) - embedded_count = len(self._tagged_nodes) if self.embed_hierarchy_attributes else 0 - self._export_stats["tagged_nodes"] = embedded_count + total_nodes = len(onnx_model.graph.node) + self._export_stats["onnx_nodes"] = total_nodes + self._update_tag_stats(total_nodes) # Calculate empty tags (should be 0 with our implementation) empty_tag_count = sum( @@ -520,11 +528,6 @@ def _apply_hierarchy_tags(self, onnx_model: onnx.ModelProto) -> None: ) self._export_stats["empty_tags"] = empty_tag_count - # Calculate coverage percentage - total_nodes = len(onnx_model.graph.node) - coverage = (embedded_count / total_nodes * 100.0) if total_nodes > 0 else 0.0 - self._export_stats["coverage_percentage"] = coverage - def _embed_graph_metadata( self, onnx_model: onnx.ModelProto, export_config: WinMLExportConfig ) -> None: diff --git a/tests/unit/eval/test_eval.py b/tests/unit/eval/test_eval.py index 5e208c1d2..677f6a43a 100644 --- a/tests/unit/eval/test_eval.py +++ b/tests/unit/eval/test_eval.py @@ -875,7 +875,7 @@ def test_user_samples_preserved_when_no_dataset_path(self) -> None: config = WinMLEvaluationConfig( model_id="test/model", task="image-classification", - dataset=DatasetConfig(samples=20), + dataset=DatasetConfig(samples=20, explicit_fields=frozenset({"samples"})), ) with ( diff --git a/tests/unit/export/test_htp_exporter_stats.py b/tests/unit/export/test_htp_exporter_stats.py index 827d6ab01..d5a283cb2 100644 --- a/tests/unit/export/test_htp_exporter_stats.py +++ b/tests/unit/export/test_htp_exporter_stats.py @@ -8,7 +8,7 @@ from unittest.mock import MagicMock -from winml.modelkit.export.htp.exporter import HTPExporter +from winml.modelkit.export.htp import HTPExporter class TestHTPExporterTaggedNodesStats: