From 0d34b721bc3c0e53f8f1e0c6562a104c6c6616ef Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Mon, 27 Apr 2026 22:13:09 +0800 Subject: [PATCH 1/4] =?UTF-8?q?fix:=20DML/GPU=20build=20crash=20=E2=80=94?= =?UTF-8?q?=20compiled=5Fpath=20set=20without=20checking=20file=20exists?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compile runs with enable_ep_context=false (DML), no EPContext file is produced. The build pipeline unconditionally set current_path to the non-existent compiled_path, causing FileNotFoundError downstream. Now checks compiled_path.exists() before updating current_path, so the pipeline falls through to the previous stage's output (e.g. quantized.onnx). Fixes #396 --- src/winml/modelkit/commands/build.py | 30 ++++---- tests/unit/commands/test_build.py | 101 +++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 14 deletions(-) diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py index d35ea0882..80b17135c 100644 --- a/src/winml/modelkit/commands/build.py +++ b/src/winml/modelkit/commands/build.py @@ -952,25 +952,27 @@ def _run_compile_stage( and Path(compile_result.output_path).resolve() != compiled_path.resolve() ): copy_onnx_model(compile_result.output_path, compiled_path) - current_path = compiled_path + if compiled_path.exists(): + current_path = compiled_path _compile_elapsed = time.monotonic() - t0 sl.set_done(_compile_elapsed) # Graph summary - try: - summary = get_onnx_graph_summary(compiled_path) - op_parts = ", ".join( - f"[cyan]{op}[/cyan] ({count})" - for op, count in list(summary["op_counts"].items())[:8] - ) - sl.detail(f"[bold]Graph:[/bold] {op_parts}") - except Exception: - logger.debug("Could not load graph summary", exc_info=True) + if compiled_path.exists(): + try: + summary = get_onnx_graph_summary(compiled_path) + op_parts = ", ".join( + f"[cyan]{op}[/cyan] ({count})" + for op, count in list(summary["op_counts"].items())[:8] + ) + sl.detail(f"[bold]Graph:[/bold] {op_parts}") + except Exception: + logger.debug("Could not load graph summary", exc_info=True) - sl.artifact( - str(compiled_path), - _safe_size(compiled_path), - ) + sl.artifact( + str(compiled_path), + _safe_size(compiled_path), + ) stage_timings.append(("Compile", _compile_elapsed)) return current_path diff --git a/tests/unit/commands/test_build.py b/tests/unit/commands/test_build.py index 12fdcaf77..e64edb0ee 100644 --- a/tests/unit/commands/test_build.py +++ b/tests/unit/commands/test_build.py @@ -789,3 +789,104 @@ def test_no_optimize_default_not_present( extra = mock_build_api.call_args.kwargs["extra_kwargs"] assert "skip_optimize" not in extra + + +# ============================================================================= +# _run_compile_stage UNIT TESTS +# ============================================================================= + + +class TestRunCompileStageNoOutput: + """Test _run_compile_stage when compile produces no output file. + + Reproduces the DML/GPU bug (#396): compile succeeds with + enable_ep_context=False, producing no EPContext file. Before the + fix, current_path was set to the non-existent compiled_path, + causing FileNotFoundError downstream. + """ + + @patch("winml.modelkit.utils.console.get_onnx_graph_summary") + @patch("winml.modelkit.utils.console.StageLive") + @patch("winml.modelkit.compiler.compile_onnx") + def test_returns_current_path_when_compiled_missing( + self, + mock_compile: MagicMock, + mock_stage_live: MagicMock, + mock_graph_summary: MagicMock, + tmp_path: Path, + ) -> None: + """When compile produces no output file, current_path must stay unchanged.""" + from winml.modelkit.commands.build import _run_compile_stage + from winml.modelkit.compiler.configs import WinMLCompileConfig + from winml.modelkit.compiler.result import CompileResult + from winml.modelkit.config import WinMLBuildConfig + + # Setup: compile "succeeds" but output_path is None (DML scenario) + mock_compile.return_value = CompileResult( + success=True, + output_path=None, # No EPContext produced + ) + mock_stage_live.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_stage_live.return_value.__exit__ = MagicMock(return_value=False) + + input_path = tmp_path / "quantized.onnx" + input_path.write_bytes(b"dummy") + compiled_path = tmp_path / "compiled.onnx" # Does NOT exist + + config = WinMLBuildConfig(compile=WinMLCompileConfig.for_dml()) + timings: list[tuple[str, float | None]] = [] + + result = _run_compile_stage( + config=config, + current_path=input_path, + compiled_path=compiled_path, + stage_timings=timings, + ) + + # current_path must stay at input_path, NOT compiled_path + assert result == input_path + assert not compiled_path.exists() + + @patch("winml.modelkit.utils.console.get_onnx_graph_summary") + @patch("winml.modelkit.utils.console.StageLive") + @patch("winml.modelkit.compiler.compile_onnx") + @patch("winml.modelkit.onnx.external_data.copy_onnx_model") + def test_returns_compiled_path_when_file_exists( + self, + mock_copy: MagicMock, + mock_compile: MagicMock, + mock_stage_live: MagicMock, + mock_graph_summary: MagicMock, + tmp_path: Path, + ) -> None: + """When compile produces an output file, current_path should update.""" + from winml.modelkit.commands.build import _run_compile_stage + from winml.modelkit.compiler.configs import WinMLCompileConfig + from winml.modelkit.compiler.result import CompileResult + from winml.modelkit.config import WinMLBuildConfig + + input_path = tmp_path / "quantized.onnx" + input_path.write_bytes(b"dummy") + compiled_path = tmp_path / "compiled.onnx" + compiled_path.write_bytes(b"compiled_model") # File EXISTS + + mock_compile.return_value = CompileResult( + success=True, + output_path=str(compiled_path), + ) + mock_stage_live.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_stage_live.return_value.__exit__ = MagicMock(return_value=False) + mock_graph_summary.return_value = {"op_counts": {"EPContext": 1}} + + config = WinMLBuildConfig(compile=WinMLCompileConfig.for_qnn()) + timings: list[tuple[str, float | None]] = [] + + result = _run_compile_stage( + config=config, + current_path=input_path, + compiled_path=compiled_path, + stage_timings=timings, + ) + + # current_path should be updated to compiled_path + assert result == compiled_path From 7dcb34d1906c83924eb8b60e0799d7866662cb98 Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Wed, 29 Apr 2026 12:15:52 +0800 Subject: [PATCH 2/4] refactor: skip compile stage for EPs with enable_ep_context=False DML and CPU don't produce EPContext output, so running compile_onnx for them is pure overhead. Skip the stage early when enable_ep_context=False rather than running compile and silently falling back on missing output. Also replace the compiled_path.exists() silent fallback with an explicit RuntimeError for EPs that do expect EPContext output (e.g. QNN), so silent failures are no longer swallowed. --- src/winml/modelkit/commands/build.py | 42 +++++++++++-------- tests/unit/commands/test_build.py | 63 +++++++++++++++++----------- 2 files changed, 63 insertions(+), 42 deletions(-) diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py index 0f17a20a1..5dd27720b 100644 --- a/src/winml/modelkit/commands/build.py +++ b/src/winml/modelkit/commands/build.py @@ -941,10 +941,16 @@ def _run_compile_stage( if config.compile is None: return current_path + # EPs that don't produce EPContext (e.g. DML, CPU) have nothing to compile + # offline — skip the stage so the pipeline carries forward the previous output. + ep_cfg = getattr(config.compile, "ep_config", None) + if ep_cfg is not None and not ep_cfg.enable_ep_context: + return current_path + with StageLive("compile", console) as sl: _cp = "" - if hasattr(config.compile, "ep_config") and config.compile.ep_config: - _cp = f" for {config.compile.ep_config.provider.upper()}" + if ep_cfg is not None: + _cp = f" for {ep_cfg.provider.upper()}" sl.set_status(f"Compiling{_cp}...") t0 = time.monotonic() compile_result = compile_onnx( @@ -961,27 +967,27 @@ def _run_compile_stage( and Path(compile_result.output_path).resolve() != compiled_path.resolve() ): copy_onnx_model(compile_result.output_path, compiled_path) - if compiled_path.exists(): - current_path = compiled_path + if not compiled_path.exists(): + raise RuntimeError(f"Compile reported success but output not found: {compiled_path}") + current_path = compiled_path _compile_elapsed = time.monotonic() - t0 sl.set_done(_compile_elapsed) # Graph summary - if compiled_path.exists(): - try: - summary = get_onnx_graph_summary(compiled_path) - op_parts = ", ".join( - f"[cyan]{op}[/cyan] ({count})" - for op, count in list(summary["op_counts"].items())[:8] - ) - sl.detail(f"[bold]Graph:[/bold] {op_parts}") - except Exception: - logger.debug("Could not load graph summary", exc_info=True) - - sl.artifact( - str(compiled_path), - _safe_size(compiled_path), + try: + summary = get_onnx_graph_summary(compiled_path) + op_parts = ", ".join( + f"[cyan]{op}[/cyan] ({count})" + for op, count in list(summary["op_counts"].items())[:8] ) + sl.detail(f"[bold]Graph:[/bold] {op_parts}") + except Exception: + logger.debug("Could not load graph summary", exc_info=True) + + sl.artifact( + str(compiled_path), + _safe_size(compiled_path), + ) stage_timings.append(("Compile", _compile_elapsed)) return current_path diff --git a/tests/unit/commands/test_build.py b/tests/unit/commands/test_build.py index e64edb0ee..4025779e8 100644 --- a/tests/unit/commands/test_build.py +++ b/tests/unit/commands/test_build.py @@ -797,35 +797,53 @@ def test_no_optimize_default_not_present( class TestRunCompileStageNoOutput: - """Test _run_compile_stage when compile produces no output file. + """Test _run_compile_stage EP-context skipping and output validation.""" - Reproduces the DML/GPU bug (#396): compile succeeds with - enable_ep_context=False, producing no EPContext file. Before the - fix, current_path was set to the non-existent compiled_path, - causing FileNotFoundError downstream. - """ + @patch("winml.modelkit.compiler.compile_onnx") + def test_dml_skips_compile_entirely( + self, + mock_compile: MagicMock, + tmp_path: Path, + ) -> None: + """EPs with enable_ep_context=False (DML, CPU) skip compile_onnx entirely.""" + from winml.modelkit.commands.build import _run_compile_stage + from winml.modelkit.compiler.configs import WinMLCompileConfig + from winml.modelkit.config import WinMLBuildConfig + + input_path = tmp_path / "quantized.onnx" + input_path.write_bytes(b"dummy") + compiled_path = tmp_path / "compiled.onnx" + + config = WinMLBuildConfig(compile=WinMLCompileConfig.for_dml()) + timings: list[tuple[str, float | None]] = [] + + result = _run_compile_stage( + config=config, + current_path=input_path, + compiled_path=compiled_path, + stage_timings=timings, + ) + + mock_compile.assert_not_called() + assert result == input_path @patch("winml.modelkit.utils.console.get_onnx_graph_summary") @patch("winml.modelkit.utils.console.StageLive") @patch("winml.modelkit.compiler.compile_onnx") - def test_returns_current_path_when_compiled_missing( + def test_raises_when_ep_context_expected_but_missing( self, mock_compile: MagicMock, mock_stage_live: MagicMock, mock_graph_summary: MagicMock, tmp_path: Path, ) -> None: - """When compile produces no output file, current_path must stay unchanged.""" + """When enable_ep_context=True and compile succeeds but file is absent, raise.""" from winml.modelkit.commands.build import _run_compile_stage from winml.modelkit.compiler.configs import WinMLCompileConfig from winml.modelkit.compiler.result import CompileResult from winml.modelkit.config import WinMLBuildConfig - # Setup: compile "succeeds" but output_path is None (DML scenario) - mock_compile.return_value = CompileResult( - success=True, - output_path=None, # No EPContext produced - ) + mock_compile.return_value = CompileResult(success=True, output_path=None) mock_stage_live.return_value.__enter__ = MagicMock(return_value=MagicMock()) mock_stage_live.return_value.__exit__ = MagicMock(return_value=False) @@ -833,19 +851,16 @@ def test_returns_current_path_when_compiled_missing( input_path.write_bytes(b"dummy") compiled_path = tmp_path / "compiled.onnx" # Does NOT exist - config = WinMLBuildConfig(compile=WinMLCompileConfig.for_dml()) + config = WinMLBuildConfig(compile=WinMLCompileConfig.for_qnn()) timings: list[tuple[str, float | None]] = [] - result = _run_compile_stage( - config=config, - current_path=input_path, - compiled_path=compiled_path, - stage_timings=timings, - ) - - # current_path must stay at input_path, NOT compiled_path - assert result == input_path - assert not compiled_path.exists() + with pytest.raises(RuntimeError, match="output not found"): + _run_compile_stage( + config=config, + current_path=input_path, + compiled_path=compiled_path, + stage_timings=timings, + ) @patch("winml.modelkit.utils.console.get_onnx_graph_summary") @patch("winml.modelkit.utils.console.StageLive") From 45992de92f4c89ec0fb27cc107bba1c6be82927f Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Wed, 29 Apr 2026 14:50:03 +0800 Subject: [PATCH 3/4] feat: disable compile stage by default in winml build --no-compile/--compile flag pair replaces the previous --no-compile is_flag, with default=True (no-compile). Compilation is now opt-in: users pass --compile to enable it, or keep the default to skip. --- src/winml/modelkit/commands/build.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py index 5dd27720b..b3513beae 100644 --- a/src/winml/modelkit/commands/build.py +++ b/src/winml/modelkit/commands/build.py @@ -260,10 +260,10 @@ def _build_modules( help="Skip quantization (overrides config)", ) @click.option( - "--no-compile", - is_flag=True, - default=False, - help="Skip compilation (overrides config)", + "--no-compile/--compile", + "no_compile", + default=True, + help="Skip compilation (overrides config). Default: skip.", ) @click.option( "--ep", From c7c1d01c05b2d626ce6589abeb14fdc45d322948 Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Wed, 29 Apr 2026 15:28:26 +0800 Subject: [PATCH 4/4] fix: skip compile stage for EPs with enable_ep_context=False WinMLCompileConfig.for_provider() now checks enable_ep_context on the factory result and returns None for EPs that don't produce EPContext (dml, cpu, cuda, nv_tensorrt_rtx, vitisai, migraphx). This fixes the DML build crash (#396) where compiled_path was set without checking the file exists, by preventing DML from entering the compile stage at all. Also changes --no-compile CLI default to True (compile disabled by default) and adds a RuntimeError when compile reports success but the output file is missing, replacing the previous silent fallback. --- src/winml/modelkit/commands/build.py | 10 ++---- src/winml/modelkit/compiler/configs.py | 6 +++- tests/unit/commands/test_build.py | 9 +++--- tests/unit/compiler/test_compiler_configs.py | 29 +++++++++++++---- tests/unit/config/test_build.py | 33 +++++++++---------- tests/unit/config/test_build_onnx.py | 34 ++++++++++++-------- 6 files changed, 69 insertions(+), 52 deletions(-) diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py index b3513beae..3c1f14c23 100644 --- a/src/winml/modelkit/commands/build.py +++ b/src/winml/modelkit/commands/build.py @@ -941,16 +941,10 @@ def _run_compile_stage( if config.compile is None: return current_path - # EPs that don't produce EPContext (e.g. DML, CPU) have nothing to compile - # offline — skip the stage so the pipeline carries forward the previous output. - ep_cfg = getattr(config.compile, "ep_config", None) - if ep_cfg is not None and not ep_cfg.enable_ep_context: - return current_path - with StageLive("compile", console) as sl: _cp = "" - if ep_cfg is not None: - _cp = f" for {ep_cfg.provider.upper()}" + if hasattr(config.compile, "ep_config") and config.compile.ep_config: + _cp = f" for {config.compile.ep_config.provider.upper()}" sl.set_status(f"Compiling{_cp}...") t0 = time.monotonic() compile_result = compile_onnx( diff --git a/src/winml/modelkit/compiler/configs.py b/src/winml/modelkit/compiler/configs.py index 5ba44b980..e0c015a93 100644 --- a/src/winml/modelkit/compiler/configs.py +++ b/src/winml/modelkit/compiler/configs.py @@ -111,7 +111,11 @@ def for_provider(cls, provider: str | None) -> WinMLCompileConfig | None: } factory = factories.get(provider) if factory: - return factory() + config = factory() + # EPs that don't produce EPContext have no offline compile step + if not config.ep_config.enable_ep_context: + return None + return config # Generic fallback for unknown/custom providers return cls(ep_config=EPConfig(provider=provider, enable_ep_context=False)) diff --git a/tests/unit/commands/test_build.py b/tests/unit/commands/test_build.py index 4025779e8..f05ec4010 100644 --- a/tests/unit/commands/test_build.py +++ b/tests/unit/commands/test_build.py @@ -797,24 +797,23 @@ def test_no_optimize_default_not_present( class TestRunCompileStageNoOutput: - """Test _run_compile_stage EP-context skipping and output validation.""" + """Test _run_compile_stage output validation.""" @patch("winml.modelkit.compiler.compile_onnx") - def test_dml_skips_compile_entirely( + def test_none_compile_config_skips_stage( self, mock_compile: MagicMock, tmp_path: Path, ) -> None: - """EPs with enable_ep_context=False (DML, CPU) skip compile_onnx entirely.""" + """compile=None skips compile_onnx entirely and returns current_path unchanged.""" from winml.modelkit.commands.build import _run_compile_stage - from winml.modelkit.compiler.configs import WinMLCompileConfig from winml.modelkit.config import WinMLBuildConfig input_path = tmp_path / "quantized.onnx" input_path.write_bytes(b"dummy") compiled_path = tmp_path / "compiled.onnx" - config = WinMLBuildConfig(compile=WinMLCompileConfig.for_dml()) + config = WinMLBuildConfig(compile=None) timings: list[tuple[str, float | None]] = [] result = _run_compile_stage( diff --git a/tests/unit/compiler/test_compiler_configs.py b/tests/unit/compiler/test_compiler_configs.py index 21f7b232e..4af0043d2 100644 --- a/tests/unit/compiler/test_compiler_configs.py +++ b/tests/unit/compiler/test_compiler_configs.py @@ -299,15 +299,19 @@ class TestForProvider: "provider,expect_provider", [ (None, None), + # EPs that produce EPContext → compile config returned ("qnn", "qnn"), - ("dml", "dml"), - ("cuda", "cuda"), - ("nv_tensorrt_rtx", "nv_tensorrt_rtx"), ("openvino", "openvino"), - ("vitisai", "vitisai"), - ("migraphx", "migraphx"), - ("cpu", "cpu"), - ("custom_ep", "custom_ep"), # generic fallback + # EPs with enable_ep_context=False → no offline compile step → None + ("dml", None), + ("cpu", None), + ("cuda", None), + ("nv_tensorrt_rtx", None), + ("vitisai", None), + ("migraphx", None), + # Unknown/custom EPs use the generic fallback (enable_ep_context=False + # in the fallback does NOT apply the None rule — only known factories do) + ("custom_ep", "custom_ep"), ], ) def test_for_provider( @@ -323,6 +327,17 @@ def test_for_provider( assert result is not None assert result.ep_config.provider == expect_provider + @pytest.mark.parametrize( + "factory_name", + ["for_dml", "for_cpu", "for_cuda", "for_vitisai", "for_migraphx", "for_nv_tensorrt_rtx"], + ) + def test_direct_factory_still_works(self, factory_name: str) -> None: + """Low-level for_* factories are still callable directly even though + for_provider() returns None for these EPs.""" + config = getattr(WinMLCompileConfig, factory_name)() + assert config is not None + assert config.ep_config.enable_ep_context is False + def test_for_provider_custom_ep_no_context(self): """Custom EP fallback disables EP context.""" result = WinMLCompileConfig.for_provider("custom_ep") diff --git a/tests/unit/config/test_build.py b/tests/unit/config/test_build.py index 1a3a760d5..f43f105e5 100644 --- a/tests/unit/config/test_build.py +++ b/tests/unit/config/test_build.py @@ -1785,9 +1785,9 @@ def _mock_deps( ("npu", "auto", True, "uint8", "uint16", "qnn"), ("npu", "fp16", False, None, None, "qnn"), ("npu", "int8", True, "uint8", "uint8", "qnn"), - ("gpu", "auto", False, None, None, "dml"), - ("gpu", "int8", True, "uint8", "uint8", "dml"), - ("gpu", "fp16", False, None, None, "dml"), + ("gpu", "auto", False, None, None, None), + ("gpu", "int8", True, "uint8", "uint8", None), + ("gpu", "fp16", False, None, None, None), ("cpu", "auto", False, None, None, None), ("cpu", "int8", True, "uint8", "uint8", None), ("cpu", "int16", True, "int16", "uint16", None), @@ -2035,8 +2035,7 @@ def test_device_gpu_precision_fp16(self, tmp_path) -> None: assert result.exit_code == 0, f"CLI failed: {result.output}" data = json.loads(output_file.read_text()) assert data["quant"] is None - assert data["compile"] is not None - assert data["compile"]["execution_provider"] == "dml" + assert data["compile"] is None def test_device_cpu_precision_fp32(self, tmp_path) -> None: """--device cpu --precision fp32 → no quant, no compile.""" @@ -2607,10 +2606,9 @@ def test_raw_onnx_with_gpu(self, tmp_path) -> None: ): config = generate_onnx_build_config(str(onnx_file), device="gpu") - # GPU auto-precision is fp16 -> no quantization, compile=dml + # GPU auto-precision is fp16 -> no quantization, no compile (DML has no offline step) assert config.quant is None - assert config.compile is not None - assert config.compile.ep_config.provider == "dml" + assert config.compile is None def test_ep_override_forwarded(self, tmp_path) -> None: """Explicit ep parameter is forwarded to resolve_quant_compile_config.""" @@ -2631,8 +2629,8 @@ def test_ep_override_forwarded(self, tmp_path) -> None: ep="migraphx", ) - assert config.compile is not None - assert config.compile.ep_config.provider == "migraphx" + # migraphx has enable_ep_context=False → no offline compile step + assert config.compile is None # ============================================================================= @@ -2672,8 +2670,8 @@ def test_npu_returns_quant_and_compile(self) -> None: assert isinstance(compile_cfg, WinMLCompileConfig) assert compile_cfg.ep_config.provider == "qnn" - def test_gpu_returns_none_quant_and_dml_compile(self) -> None: - """device=gpu returns (None, WinMLCompileConfig(dml)).""" + def test_gpu_returns_none_quant_and_none_compile(self) -> None: + """device=gpu returns (None, None) — DML has no offline compile step.""" with patch( "winml.modelkit.sysinfo.resolve_device", return_value=("gpu", ["gpu", "cpu"]), @@ -2681,8 +2679,7 @@ def test_gpu_returns_none_quant_and_dml_compile(self) -> None: quant, compile_cfg = resolve_quant_compile_config(device="gpu") assert quant is None - assert isinstance(compile_cfg, WinMLCompileConfig) - assert compile_cfg.ep_config.provider == "dml" + assert compile_cfg is None def test_cpu_returns_none_none(self) -> None: """device=cpu returns (None, None) since CPU has no compile provider.""" @@ -2696,7 +2693,10 @@ def test_cpu_returns_none_none(self) -> None: assert compile_cfg is None def test_ep_override_changes_provider(self) -> None: - """Explicit ep overrides the default device-to-provider mapping.""" + """Explicit ep overrides the default device-to-provider mapping. + + nv_tensorrt_rtx has enable_ep_context=False so for_provider returns None. + """ with patch( "winml.modelkit.sysinfo.resolve_device", return_value=("gpu", ["gpu", "cpu"]), @@ -2706,8 +2706,7 @@ def test_ep_override_changes_provider(self) -> None: ep="nv_tensorrt_rtx", ) - assert compile_cfg is not None - assert compile_cfg.ep_config.provider == "nv_tensorrt_rtx" + assert compile_cfg is None def test_task_forwarded_to_resolve_precision(self) -> None: """task parameter is forwarded to resolve_precision. diff --git a/tests/unit/config/test_build_onnx.py b/tests/unit/config/test_build_onnx.py index 51112f17a..d945245d3 100644 --- a/tests/unit/config/test_build_onnx.py +++ b/tests/unit/config/test_build_onnx.py @@ -597,7 +597,11 @@ def test_compiled_does_not_call_resolve_quant_compile(self, tmp_path) -> None: mock_resolve.assert_not_called() def test_raw_onnx_with_gpu(self, tmp_path) -> None: - """Raw ONNX + device=gpu resolves quant=None, compile=dml.""" + """Raw ONNX + device=gpu resolves quant=None, compile=None. + + DML has enable_ep_context=False so for_provider("dml") returns None — + no offline compile step is needed. + """ onnx_file = tmp_path / "model.onnx" onnx_file.write_bytes(b"fake") @@ -611,13 +615,15 @@ def test_raw_onnx_with_gpu(self, tmp_path) -> None: ): config = generate_onnx_build_config(str(onnx_file), device="gpu") - # GPU auto-precision is fp16 -> no quantization, compile=dml + # GPU auto-precision is fp16 -> no quantization; DML has no EPContext step assert config.quant is None - assert config.compile is not None - assert config.compile.ep_config.provider == "dml" + assert config.compile is None def test_ep_override_forwarded(self, tmp_path) -> None: - """Explicit ep parameter is forwarded to resolve_quant_compile_config.""" + """Explicit ep parameter is forwarded to resolve_quant_compile_config. + + migraphx has enable_ep_context=False so for_provider("migraphx") returns None. + """ onnx_file = tmp_path / "model.onnx" onnx_file.write_bytes(b"fake") @@ -635,8 +641,7 @@ def test_ep_override_forwarded(self, tmp_path) -> None: ep="migraphx", ) - assert config.compile is not None - assert config.compile.ep_config.provider == "migraphx" + assert config.compile is None # ============================================================================= @@ -676,8 +681,8 @@ def test_npu_returns_quant_and_compile(self) -> None: assert isinstance(compile_cfg, WinMLCompileConfig) assert compile_cfg.ep_config.provider == "qnn" - def test_gpu_returns_none_quant_and_dml_compile(self) -> None: - """device=gpu returns (None, WinMLCompileConfig(dml)).""" + def test_gpu_returns_none_quant_and_none_compile(self) -> None: + """device=gpu returns (None, None) — DML has no EPContext step.""" with patch( "winml.modelkit.sysinfo.resolve_device", return_value=("gpu", ["gpu", "cpu"]), @@ -685,8 +690,7 @@ def test_gpu_returns_none_quant_and_dml_compile(self) -> None: quant, compile_cfg = resolve_quant_compile_config(device="gpu") assert quant is None - assert isinstance(compile_cfg, WinMLCompileConfig) - assert compile_cfg.ep_config.provider == "dml" + assert compile_cfg is None def test_cpu_returns_none_none(self) -> None: """device=cpu returns (None, None) since CPU has no compile provider.""" @@ -700,7 +704,10 @@ def test_cpu_returns_none_none(self) -> None: assert compile_cfg is None def test_ep_override_changes_provider(self) -> None: - """Explicit ep overrides the default device-to-provider mapping.""" + """Explicit ep overrides the default device-to-provider mapping. + + nv_tensorrt_rtx has enable_ep_context=False so for_provider returns None. + """ with patch( "winml.modelkit.sysinfo.resolve_device", return_value=("gpu", ["gpu", "cpu"]), @@ -710,8 +717,7 @@ def test_ep_override_changes_provider(self) -> None: ep="nv_tensorrt_rtx", ) - assert compile_cfg is not None - assert compile_cfg.ep_config.provider == "nv_tensorrt_rtx" + assert compile_cfg is None def test_task_forwarded_to_resolve_precision(self) -> None: """task parameter is forwarded to resolve_precision.