From 0d34b721bc3c0e53f8f1e0c6562a104c6c6616ef Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Mon, 27 Apr 2026 22:13:09 +0800
Subject: [PATCH 1/4] =?UTF-8?q?fix:=20DML/GPU=20build=20crash=20=E2=80=94?=
 =?UTF-8?q?=20compiled=5Fpath=20set=20without=20checking=20file=20exists?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When compile runs with enable_ep_context=false (DML), no EPContext file
is produced. The build pipeline unconditionally set current_path to the
non-existent compiled_path, causing FileNotFoundError downstream.

Now checks compiled_path.exists() before updating current_path, so the
pipeline falls through to the previous stage's output (e.g. quantized.onnx).

Fixes #396
---
 src/winml/modelkit/commands/build.py |  30 ++++----
 tests/unit/commands/test_build.py    | 101 +++++++++++++++++++++++++++
 2 files changed, 117 insertions(+), 14 deletions(-)

diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py
index d35ea0882..80b17135c 100644
--- a/src/winml/modelkit/commands/build.py
+++ b/src/winml/modelkit/commands/build.py
@@ -952,25 +952,27 @@ def _run_compile_stage(
             and Path(compile_result.output_path).resolve() != compiled_path.resolve()
         ):
             copy_onnx_model(compile_result.output_path, compiled_path)
-        current_path = compiled_path
+        if compiled_path.exists():
+            current_path = compiled_path
         _compile_elapsed = time.monotonic() - t0
         sl.set_done(_compile_elapsed)
 
         # Graph summary
-        try:
-            summary = get_onnx_graph_summary(compiled_path)
-            op_parts = ", ".join(
-                f"[cyan]{op}[/cyan] ({count})"
-                for op, count in list(summary["op_counts"].items())[:8]
-            )
-            sl.detail(f"[bold]Graph:[/bold]  {op_parts}")
-        except Exception:
-            logger.debug("Could not load graph summary", exc_info=True)
+        if compiled_path.exists():
+            try:
+                summary = get_onnx_graph_summary(compiled_path)
+                op_parts = ", ".join(
+                    f"[cyan]{op}[/cyan] ({count})"
+                    for op, count in list(summary["op_counts"].items())[:8]
+                )
+                sl.detail(f"[bold]Graph:[/bold]  {op_parts}")
+            except Exception:
+                logger.debug("Could not load graph summary", exc_info=True)
 
-        sl.artifact(
-            str(compiled_path),
-            _safe_size(compiled_path),
-        )
+            sl.artifact(
+                str(compiled_path),
+                _safe_size(compiled_path),
+            )
     stage_timings.append(("Compile", _compile_elapsed))
     return current_path
 
diff --git a/tests/unit/commands/test_build.py b/tests/unit/commands/test_build.py
index 12fdcaf77..e64edb0ee 100644
--- a/tests/unit/commands/test_build.py
+++ b/tests/unit/commands/test_build.py
@@ -789,3 +789,104 @@ def test_no_optimize_default_not_present(
 
         extra = mock_build_api.call_args.kwargs["extra_kwargs"]
         assert "skip_optimize" not in extra
+
+
+# =============================================================================
+# _run_compile_stage UNIT TESTS
+# =============================================================================
+
+
+class TestRunCompileStageNoOutput:
+    """Test _run_compile_stage when compile produces no output file.
+
+    Reproduces the DML/GPU bug (#396): compile succeeds with
+    enable_ep_context=False, producing no EPContext file.  Before the
+    fix, current_path was set to the non-existent compiled_path,
+    causing FileNotFoundError downstream.
+    """
+
+    @patch("winml.modelkit.utils.console.get_onnx_graph_summary")
+    @patch("winml.modelkit.utils.console.StageLive")
+    @patch("winml.modelkit.compiler.compile_onnx")
+    def test_returns_current_path_when_compiled_missing(
+        self,
+        mock_compile: MagicMock,
+        mock_stage_live: MagicMock,
+        mock_graph_summary: MagicMock,
+        tmp_path: Path,
+    ) -> None:
+        """When compile produces no output file, current_path must stay unchanged."""
+        from winml.modelkit.commands.build import _run_compile_stage
+        from winml.modelkit.compiler.configs import WinMLCompileConfig
+        from winml.modelkit.compiler.result import CompileResult
+        from winml.modelkit.config import WinMLBuildConfig
+
+        # Setup: compile "succeeds" but output_path is None (DML scenario)
+        mock_compile.return_value = CompileResult(
+            success=True,
+            output_path=None,  # No EPContext produced
+        )
+        mock_stage_live.return_value.__enter__ = MagicMock(return_value=MagicMock())
+        mock_stage_live.return_value.__exit__ = MagicMock(return_value=False)
+
+        input_path = tmp_path / "quantized.onnx"
+        input_path.write_bytes(b"dummy")
+        compiled_path = tmp_path / "compiled.onnx"  # Does NOT exist
+
+        config = WinMLBuildConfig(compile=WinMLCompileConfig.for_dml())
+        timings: list[tuple[str, float | None]] = []
+
+        result = _run_compile_stage(
+            config=config,
+            current_path=input_path,
+            compiled_path=compiled_path,
+            stage_timings=timings,
+        )
+
+        # current_path must stay at input_path, NOT compiled_path
+        assert result == input_path
+        assert not compiled_path.exists()
+
+    @patch("winml.modelkit.utils.console.get_onnx_graph_summary")
+    @patch("winml.modelkit.utils.console.StageLive")
+    @patch("winml.modelkit.compiler.compile_onnx")
+    @patch("winml.modelkit.onnx.external_data.copy_onnx_model")
+    def test_returns_compiled_path_when_file_exists(
+        self,
+        mock_copy: MagicMock,
+        mock_compile: MagicMock,
+        mock_stage_live: MagicMock,
+        mock_graph_summary: MagicMock,
+        tmp_path: Path,
+    ) -> None:
+        """When compile produces an output file, current_path should update."""
+        from winml.modelkit.commands.build import _run_compile_stage
+        from winml.modelkit.compiler.configs import WinMLCompileConfig
+        from winml.modelkit.compiler.result import CompileResult
+        from winml.modelkit.config import WinMLBuildConfig
+
+        input_path = tmp_path / "quantized.onnx"
+        input_path.write_bytes(b"dummy")
+        compiled_path = tmp_path / "compiled.onnx"
+        compiled_path.write_bytes(b"compiled_model")  # File EXISTS
+
+        mock_compile.return_value = CompileResult(
+            success=True,
+            output_path=str(compiled_path),
+        )
+        mock_stage_live.return_value.__enter__ = MagicMock(return_value=MagicMock())
+        mock_stage_live.return_value.__exit__ = MagicMock(return_value=False)
+        mock_graph_summary.return_value = {"op_counts": {"EPContext": 1}}
+
+        config = WinMLBuildConfig(compile=WinMLCompileConfig.for_qnn())
+        timings: list[tuple[str, float | None]] = []
+
+        result = _run_compile_stage(
+            config=config,
+            current_path=input_path,
+            compiled_path=compiled_path,
+            stage_timings=timings,
+        )
+
+        # current_path should be updated to compiled_path
+        assert result == compiled_path

From 7dcb34d1906c83924eb8b60e0799d7866662cb98 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Wed, 29 Apr 2026 12:15:52 +0800
Subject: [PATCH 2/4] refactor: skip compile stage for EPs with
 enable_ep_context=False

DML and CPU don't produce EPContext output, so running compile_onnx for
them is pure overhead. Skip the stage early when enable_ep_context=False
rather than running compile and silently falling back on missing output.

Also replace the compiled_path.exists() silent fallback with an explicit
RuntimeError for EPs that do expect EPContext output (e.g. QNN), so
silent failures are no longer swallowed.
---
 src/winml/modelkit/commands/build.py | 42 +++++++++++--------
 tests/unit/commands/test_build.py    | 63 +++++++++++++++++-----------
 2 files changed, 63 insertions(+), 42 deletions(-)

diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py
index 0f17a20a1..5dd27720b 100644
--- a/src/winml/modelkit/commands/build.py
+++ b/src/winml/modelkit/commands/build.py
@@ -941,10 +941,16 @@ def _run_compile_stage(
     if config.compile is None:
         return current_path
 
+    # EPs that don't produce EPContext (e.g. DML, CPU) have nothing to compile
+    # offline — skip the stage so the pipeline carries forward the previous output.
+    ep_cfg = getattr(config.compile, "ep_config", None)
+    if ep_cfg is not None and not ep_cfg.enable_ep_context:
+        return current_path
+
     with StageLive("compile", console) as sl:
         _cp = ""
-        if hasattr(config.compile, "ep_config") and config.compile.ep_config:
-            _cp = f" for {config.compile.ep_config.provider.upper()}"
+        if ep_cfg is not None:
+            _cp = f" for {ep_cfg.provider.upper()}"
         sl.set_status(f"Compiling{_cp}...")
         t0 = time.monotonic()
         compile_result = compile_onnx(
@@ -961,27 +967,27 @@ def _run_compile_stage(
             and Path(compile_result.output_path).resolve() != compiled_path.resolve()
         ):
             copy_onnx_model(compile_result.output_path, compiled_path)
-        if compiled_path.exists():
-            current_path = compiled_path
+        if not compiled_path.exists():
+            raise RuntimeError(f"Compile reported success but output not found: {compiled_path}")
+        current_path = compiled_path
         _compile_elapsed = time.monotonic() - t0
         sl.set_done(_compile_elapsed)
 
         # Graph summary
-        if compiled_path.exists():
-            try:
-                summary = get_onnx_graph_summary(compiled_path)
-                op_parts = ", ".join(
-                    f"[cyan]{op}[/cyan] ({count})"
-                    for op, count in list(summary["op_counts"].items())[:8]
-                )
-                sl.detail(f"[bold]Graph:[/bold]  {op_parts}")
-            except Exception:
-                logger.debug("Could not load graph summary", exc_info=True)
-
-            sl.artifact(
-                str(compiled_path),
-                _safe_size(compiled_path),
+        try:
+            summary = get_onnx_graph_summary(compiled_path)
+            op_parts = ", ".join(
+                f"[cyan]{op}[/cyan] ({count})"
+                for op, count in list(summary["op_counts"].items())[:8]
             )
+            sl.detail(f"[bold]Graph:[/bold]  {op_parts}")
+        except Exception:
+            logger.debug("Could not load graph summary", exc_info=True)
+
+        sl.artifact(
+            str(compiled_path),
+            _safe_size(compiled_path),
+        )
     stage_timings.append(("Compile", _compile_elapsed))
     return current_path
 
diff --git a/tests/unit/commands/test_build.py b/tests/unit/commands/test_build.py
index e64edb0ee..4025779e8 100644
--- a/tests/unit/commands/test_build.py
+++ b/tests/unit/commands/test_build.py
@@ -797,35 +797,53 @@ def test_no_optimize_default_not_present(
 
 
 class TestRunCompileStageNoOutput:
-    """Test _run_compile_stage when compile produces no output file.
+    """Test _run_compile_stage EP-context skipping and output validation."""
 
-    Reproduces the DML/GPU bug (#396): compile succeeds with
-    enable_ep_context=False, producing no EPContext file.  Before the
-    fix, current_path was set to the non-existent compiled_path,
-    causing FileNotFoundError downstream.
-    """
+    @patch("winml.modelkit.compiler.compile_onnx")
+    def test_dml_skips_compile_entirely(
+        self,
+        mock_compile: MagicMock,
+        tmp_path: Path,
+    ) -> None:
+        """EPs with enable_ep_context=False (DML, CPU) skip compile_onnx entirely."""
+        from winml.modelkit.commands.build import _run_compile_stage
+        from winml.modelkit.compiler.configs import WinMLCompileConfig
+        from winml.modelkit.config import WinMLBuildConfig
+
+        input_path = tmp_path / "quantized.onnx"
+        input_path.write_bytes(b"dummy")
+        compiled_path = tmp_path / "compiled.onnx"
+
+        config = WinMLBuildConfig(compile=WinMLCompileConfig.for_dml())
+        timings: list[tuple[str, float | None]] = []
+
+        result = _run_compile_stage(
+            config=config,
+            current_path=input_path,
+            compiled_path=compiled_path,
+            stage_timings=timings,
+        )
+
+        mock_compile.assert_not_called()
+        assert result == input_path
 
     @patch("winml.modelkit.utils.console.get_onnx_graph_summary")
     @patch("winml.modelkit.utils.console.StageLive")
     @patch("winml.modelkit.compiler.compile_onnx")
-    def test_returns_current_path_when_compiled_missing(
+    def test_raises_when_ep_context_expected_but_missing(
         self,
         mock_compile: MagicMock,
         mock_stage_live: MagicMock,
         mock_graph_summary: MagicMock,
         tmp_path: Path,
     ) -> None:
-        """When compile produces no output file, current_path must stay unchanged."""
+        """When enable_ep_context=True and compile succeeds but file is absent, raise."""
         from winml.modelkit.commands.build import _run_compile_stage
         from winml.modelkit.compiler.configs import WinMLCompileConfig
         from winml.modelkit.compiler.result import CompileResult
         from winml.modelkit.config import WinMLBuildConfig
 
-        # Setup: compile "succeeds" but output_path is None (DML scenario)
-        mock_compile.return_value = CompileResult(
-            success=True,
-            output_path=None,  # No EPContext produced
-        )
+        mock_compile.return_value = CompileResult(success=True, output_path=None)
         mock_stage_live.return_value.__enter__ = MagicMock(return_value=MagicMock())
         mock_stage_live.return_value.__exit__ = MagicMock(return_value=False)
 
@@ -833,19 +851,16 @@ def test_returns_current_path_when_compiled_missing(
         input_path.write_bytes(b"dummy")
         compiled_path = tmp_path / "compiled.onnx"  # Does NOT exist
 
-        config = WinMLBuildConfig(compile=WinMLCompileConfig.for_dml())
+        config = WinMLBuildConfig(compile=WinMLCompileConfig.for_qnn())
         timings: list[tuple[str, float | None]] = []
 
-        result = _run_compile_stage(
-            config=config,
-            current_path=input_path,
-            compiled_path=compiled_path,
-            stage_timings=timings,
-        )
-
-        # current_path must stay at input_path, NOT compiled_path
-        assert result == input_path
-        assert not compiled_path.exists()
+        with pytest.raises(RuntimeError, match="output not found"):
+            _run_compile_stage(
+                config=config,
+                current_path=input_path,
+                compiled_path=compiled_path,
+                stage_timings=timings,
+            )
 
     @patch("winml.modelkit.utils.console.get_onnx_graph_summary")
     @patch("winml.modelkit.utils.console.StageLive")

From 45992de92f4c89ec0fb27cc107bba1c6be82927f Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Wed, 29 Apr 2026 14:50:03 +0800
Subject: [PATCH 3/4] feat: disable compile stage by default in winml build

--no-compile/--compile flag pair replaces the previous --no-compile
is_flag, with default=True (no-compile). Compilation is now opt-in:
users pass --compile to enable it, or keep the default to skip.
---
 src/winml/modelkit/commands/build.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py
index 5dd27720b..b3513beae 100644
--- a/src/winml/modelkit/commands/build.py
+++ b/src/winml/modelkit/commands/build.py
@@ -260,10 +260,10 @@ def _build_modules(
     help="Skip quantization (overrides config)",
 )
 @click.option(
-    "--no-compile",
-    is_flag=True,
-    default=False,
-    help="Skip compilation (overrides config)",
+    "--no-compile/--compile",
+    "no_compile",
+    default=True,
+    help="Skip compilation (overrides config). Default: skip.",
 )
 @click.option(
     "--ep",

From c7c1d01c05b2d626ce6589abeb14fdc45d322948 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Wed, 29 Apr 2026 15:28:26 +0800
Subject: [PATCH 4/4] fix: skip compile stage for EPs with
 enable_ep_context=False

WinMLCompileConfig.for_provider() now checks enable_ep_context on the
factory result and returns None for EPs that don't produce EPContext
(dml, cpu, cuda, nv_tensorrt_rtx, vitisai, migraphx). This fixes the
DML build crash (#396) where compiled_path was set without checking the
file exists, by preventing DML from entering the compile stage at all.

Also changes --no-compile CLI default to True (compile disabled by
default) and adds a RuntimeError when compile reports success but the
output file is missing, replacing the previous silent fallback.
---
 src/winml/modelkit/commands/build.py         | 10 ++----
 src/winml/modelkit/compiler/configs.py       |  6 +++-
 tests/unit/commands/test_build.py            |  9 +++---
 tests/unit/compiler/test_compiler_configs.py | 29 +++++++++++++----
 tests/unit/config/test_build.py              | 33 +++++++++----------
 tests/unit/config/test_build_onnx.py         | 34 ++++++++++++--------
 6 files changed, 69 insertions(+), 52 deletions(-)

diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py
index b3513beae..3c1f14c23 100644
--- a/src/winml/modelkit/commands/build.py
+++ b/src/winml/modelkit/commands/build.py
@@ -941,16 +941,10 @@ def _run_compile_stage(
     if config.compile is None:
         return current_path
 
-    # EPs that don't produce EPContext (e.g. DML, CPU) have nothing to compile
-    # offline — skip the stage so the pipeline carries forward the previous output.
-    ep_cfg = getattr(config.compile, "ep_config", None)
-    if ep_cfg is not None and not ep_cfg.enable_ep_context:
-        return current_path
-
     with StageLive("compile", console) as sl:
         _cp = ""
-        if ep_cfg is not None:
-            _cp = f" for {ep_cfg.provider.upper()}"
+        if hasattr(config.compile, "ep_config") and config.compile.ep_config:
+            _cp = f" for {config.compile.ep_config.provider.upper()}"
         sl.set_status(f"Compiling{_cp}...")
         t0 = time.monotonic()
         compile_result = compile_onnx(
diff --git a/src/winml/modelkit/compiler/configs.py b/src/winml/modelkit/compiler/configs.py
index 5ba44b980..e0c015a93 100644
--- a/src/winml/modelkit/compiler/configs.py
+++ b/src/winml/modelkit/compiler/configs.py
@@ -111,7 +111,11 @@ def for_provider(cls, provider: str | None) -> WinMLCompileConfig | None:
         }
         factory = factories.get(provider)
         if factory:
-            return factory()
+            config = factory()
+            # EPs that don't produce EPContext have no offline compile step
+            if not config.ep_config.enable_ep_context:
+                return None
+            return config
         # Generic fallback for unknown/custom providers
         return cls(ep_config=EPConfig(provider=provider, enable_ep_context=False))
 
diff --git a/tests/unit/commands/test_build.py b/tests/unit/commands/test_build.py
index 4025779e8..f05ec4010 100644
--- a/tests/unit/commands/test_build.py
+++ b/tests/unit/commands/test_build.py
@@ -797,24 +797,23 @@ def test_no_optimize_default_not_present(
 
 
 class TestRunCompileStageNoOutput:
-    """Test _run_compile_stage EP-context skipping and output validation."""
+    """Test _run_compile_stage output validation."""
 
     @patch("winml.modelkit.compiler.compile_onnx")
-    def test_dml_skips_compile_entirely(
+    def test_none_compile_config_skips_stage(
         self,
         mock_compile: MagicMock,
         tmp_path: Path,
     ) -> None:
-        """EPs with enable_ep_context=False (DML, CPU) skip compile_onnx entirely."""
+        """compile=None skips compile_onnx entirely and returns current_path unchanged."""
         from winml.modelkit.commands.build import _run_compile_stage
-        from winml.modelkit.compiler.configs import WinMLCompileConfig
         from winml.modelkit.config import WinMLBuildConfig
 
         input_path = tmp_path / "quantized.onnx"
         input_path.write_bytes(b"dummy")
         compiled_path = tmp_path / "compiled.onnx"
 
-        config = WinMLBuildConfig(compile=WinMLCompileConfig.for_dml())
+        config = WinMLBuildConfig(compile=None)
         timings: list[tuple[str, float | None]] = []
 
         result = _run_compile_stage(
diff --git a/tests/unit/compiler/test_compiler_configs.py b/tests/unit/compiler/test_compiler_configs.py
index 21f7b232e..4af0043d2 100644
--- a/tests/unit/compiler/test_compiler_configs.py
+++ b/tests/unit/compiler/test_compiler_configs.py
@@ -299,15 +299,19 @@ class TestForProvider:
         "provider,expect_provider",
         [
             (None, None),
+            # EPs that produce EPContext → compile config returned
             ("qnn", "qnn"),
-            ("dml", "dml"),
-            ("cuda", "cuda"),
-            ("nv_tensorrt_rtx", "nv_tensorrt_rtx"),
             ("openvino", "openvino"),
-            ("vitisai", "vitisai"),
-            ("migraphx", "migraphx"),
-            ("cpu", "cpu"),
-            ("custom_ep", "custom_ep"),  # generic fallback
+            # EPs with enable_ep_context=False → no offline compile step → None
+            ("dml", None),
+            ("cpu", None),
+            ("cuda", None),
+            ("nv_tensorrt_rtx", None),
+            ("vitisai", None),
+            ("migraphx", None),
+            # Unknown/custom EPs use the generic fallback (enable_ep_context=False
+            # in the fallback does NOT apply the None rule — only known factories do)
+            ("custom_ep", "custom_ep"),
         ],
     )
     def test_for_provider(
@@ -323,6 +327,17 @@ def test_for_provider(
             assert result is not None
             assert result.ep_config.provider == expect_provider
 
+    @pytest.mark.parametrize(
+        "factory_name",
+        ["for_dml", "for_cpu", "for_cuda", "for_vitisai", "for_migraphx", "for_nv_tensorrt_rtx"],
+    )
+    def test_direct_factory_still_works(self, factory_name: str) -> None:
+        """Low-level for_* factories are still callable directly even though
+        for_provider() returns None for these EPs."""
+        config = getattr(WinMLCompileConfig, factory_name)()
+        assert config is not None
+        assert config.ep_config.enable_ep_context is False
+
     def test_for_provider_custom_ep_no_context(self):
         """Custom EP fallback disables EP context."""
         result = WinMLCompileConfig.for_provider("custom_ep")
diff --git a/tests/unit/config/test_build.py b/tests/unit/config/test_build.py
index 1a3a760d5..f43f105e5 100644
--- a/tests/unit/config/test_build.py
+++ b/tests/unit/config/test_build.py
@@ -1785,9 +1785,9 @@ def _mock_deps(
             ("npu", "auto", True, "uint8", "uint16", "qnn"),
             ("npu", "fp16", False, None, None, "qnn"),
             ("npu", "int8", True, "uint8", "uint8", "qnn"),
-            ("gpu", "auto", False, None, None, "dml"),
-            ("gpu", "int8", True, "uint8", "uint8", "dml"),
-            ("gpu", "fp16", False, None, None, "dml"),
+            ("gpu", "auto", False, None, None, None),
+            ("gpu", "int8", True, "uint8", "uint8", None),
+            ("gpu", "fp16", False, None, None, None),
             ("cpu", "auto", False, None, None, None),
             ("cpu", "int8", True, "uint8", "uint8", None),
             ("cpu", "int16", True, "int16", "uint16", None),
@@ -2035,8 +2035,7 @@ def test_device_gpu_precision_fp16(self, tmp_path) -> None:
         assert result.exit_code == 0, f"CLI failed: {result.output}"
         data = json.loads(output_file.read_text())
         assert data["quant"] is None
-        assert data["compile"] is not None
-        assert data["compile"]["execution_provider"] == "dml"
+        assert data["compile"] is None
 
     def test_device_cpu_precision_fp32(self, tmp_path) -> None:
         """--device cpu --precision fp32 → no quant, no compile."""
@@ -2607,10 +2606,9 @@ def test_raw_onnx_with_gpu(self, tmp_path) -> None:
         ):
             config = generate_onnx_build_config(str(onnx_file), device="gpu")
 
-        # GPU auto-precision is fp16 -> no quantization, compile=dml
+        # GPU auto-precision is fp16 -> no quantization, no compile (DML has no offline step)
         assert config.quant is None
-        assert config.compile is not None
-        assert config.compile.ep_config.provider == "dml"
+        assert config.compile is None
 
     def test_ep_override_forwarded(self, tmp_path) -> None:
         """Explicit ep parameter is forwarded to resolve_quant_compile_config."""
@@ -2631,8 +2629,8 @@ def test_ep_override_forwarded(self, tmp_path) -> None:
                 ep="migraphx",
             )
 
-        assert config.compile is not None
-        assert config.compile.ep_config.provider == "migraphx"
+        # migraphx has enable_ep_context=False → no offline compile step
+        assert config.compile is None
 
 
 # =============================================================================
@@ -2672,8 +2670,8 @@ def test_npu_returns_quant_and_compile(self) -> None:
         assert isinstance(compile_cfg, WinMLCompileConfig)
         assert compile_cfg.ep_config.provider == "qnn"
 
-    def test_gpu_returns_none_quant_and_dml_compile(self) -> None:
-        """device=gpu returns (None, WinMLCompileConfig(dml))."""
+    def test_gpu_returns_none_quant_and_none_compile(self) -> None:
+        """device=gpu returns (None, None) — DML has no offline compile step."""
         with patch(
             "winml.modelkit.sysinfo.resolve_device",
             return_value=("gpu", ["gpu", "cpu"]),
@@ -2681,8 +2679,7 @@ def test_gpu_returns_none_quant_and_dml_compile(self) -> None:
             quant, compile_cfg = resolve_quant_compile_config(device="gpu")
 
         assert quant is None
-        assert isinstance(compile_cfg, WinMLCompileConfig)
-        assert compile_cfg.ep_config.provider == "dml"
+        assert compile_cfg is None
 
     def test_cpu_returns_none_none(self) -> None:
         """device=cpu returns (None, None) since CPU has no compile provider."""
@@ -2696,7 +2693,10 @@ def test_cpu_returns_none_none(self) -> None:
         assert compile_cfg is None
 
     def test_ep_override_changes_provider(self) -> None:
-        """Explicit ep overrides the default device-to-provider mapping."""
+        """Explicit ep overrides the default device-to-provider mapping.
+
+        nv_tensorrt_rtx has enable_ep_context=False so for_provider returns None.
+        """
         with patch(
             "winml.modelkit.sysinfo.resolve_device",
             return_value=("gpu", ["gpu", "cpu"]),
@@ -2706,8 +2706,7 @@ def test_ep_override_changes_provider(self) -> None:
                 ep="nv_tensorrt_rtx",
             )
 
-        assert compile_cfg is not None
-        assert compile_cfg.ep_config.provider == "nv_tensorrt_rtx"
+        assert compile_cfg is None
 
     def test_task_forwarded_to_resolve_precision(self) -> None:
         """task parameter is forwarded to resolve_precision.
diff --git a/tests/unit/config/test_build_onnx.py b/tests/unit/config/test_build_onnx.py
index 51112f17a..d945245d3 100644
--- a/tests/unit/config/test_build_onnx.py
+++ b/tests/unit/config/test_build_onnx.py
@@ -597,7 +597,11 @@ def test_compiled_does_not_call_resolve_quant_compile(self, tmp_path) -> None:
         mock_resolve.assert_not_called()
 
     def test_raw_onnx_with_gpu(self, tmp_path) -> None:
-        """Raw ONNX + device=gpu resolves quant=None, compile=dml."""
+        """Raw ONNX + device=gpu resolves quant=None, compile=None.
+
+        DML has enable_ep_context=False so for_provider("dml") returns None —
+        no offline compile step is needed.
+        """
         onnx_file = tmp_path / "model.onnx"
         onnx_file.write_bytes(b"fake")
 
@@ -611,13 +615,15 @@ def test_raw_onnx_with_gpu(self, tmp_path) -> None:
         ):
             config = generate_onnx_build_config(str(onnx_file), device="gpu")
 
-        # GPU auto-precision is fp16 -> no quantization, compile=dml
+        # GPU auto-precision is fp16 -> no quantization; DML has no EPContext step
         assert config.quant is None
-        assert config.compile is not None
-        assert config.compile.ep_config.provider == "dml"
+        assert config.compile is None
 
     def test_ep_override_forwarded(self, tmp_path) -> None:
-        """Explicit ep parameter is forwarded to resolve_quant_compile_config."""
+        """Explicit ep parameter is forwarded to resolve_quant_compile_config.
+
+        migraphx has enable_ep_context=False so for_provider("migraphx") returns None.
+        """
         onnx_file = tmp_path / "model.onnx"
         onnx_file.write_bytes(b"fake")
 
@@ -635,8 +641,7 @@ def test_ep_override_forwarded(self, tmp_path) -> None:
                 ep="migraphx",
             )
 
-        assert config.compile is not None
-        assert config.compile.ep_config.provider == "migraphx"
+        assert config.compile is None
 
 
 # =============================================================================
@@ -676,8 +681,8 @@ def test_npu_returns_quant_and_compile(self) -> None:
         assert isinstance(compile_cfg, WinMLCompileConfig)
         assert compile_cfg.ep_config.provider == "qnn"
 
-    def test_gpu_returns_none_quant_and_dml_compile(self) -> None:
-        """device=gpu returns (None, WinMLCompileConfig(dml))."""
+    def test_gpu_returns_none_quant_and_none_compile(self) -> None:
+        """device=gpu returns (None, None) — DML has no EPContext step."""
         with patch(
             "winml.modelkit.sysinfo.resolve_device",
             return_value=("gpu", ["gpu", "cpu"]),
@@ -685,8 +690,7 @@ def test_gpu_returns_none_quant_and_dml_compile(self) -> None:
             quant, compile_cfg = resolve_quant_compile_config(device="gpu")
 
         assert quant is None
-        assert isinstance(compile_cfg, WinMLCompileConfig)
-        assert compile_cfg.ep_config.provider == "dml"
+        assert compile_cfg is None
 
     def test_cpu_returns_none_none(self) -> None:
         """device=cpu returns (None, None) since CPU has no compile provider."""
@@ -700,7 +704,10 @@ def test_cpu_returns_none_none(self) -> None:
         assert compile_cfg is None
 
     def test_ep_override_changes_provider(self) -> None:
-        """Explicit ep overrides the default device-to-provider mapping."""
+        """Explicit ep overrides the default device-to-provider mapping.
+
+        nv_tensorrt_rtx has enable_ep_context=False so for_provider returns None.
+        """
         with patch(
             "winml.modelkit.sysinfo.resolve_device",
             return_value=("gpu", ["gpu", "cpu"]),
@@ -710,8 +717,7 @@ def test_ep_override_changes_provider(self) -> None:
                 ep="nv_tensorrt_rtx",
             )
 
-        assert compile_cfg is not None
-        assert compile_cfg.ep_config.provider == "nv_tensorrt_rtx"
+        assert compile_cfg is None
 
     def test_task_forwarded_to_resolve_precision(self) -> None:
         """task parameter is forwarded to resolve_precision.