From 78bb9d1b1e4ccf5f6a4119b9fd37ccf96ccb7f4c Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Mon, 27 Apr 2026 18:13:39 +0800
Subject: [PATCH 01/10] fix: thread `ep` parameter through to WinMLSession

The `--ep` flag was silently dropped in the model construction path.
`WinMLAutoModel.from_pretrained()` and `from_onnx()` received the `ep`
value but never forwarded it to `WinMLPreTrainedModel`, which in turn
never passed it to `WinMLSession`. This caused `ModelCompiler` to fall
back to policy-based EP selection, which it does not support, resulting
in an empty provider type string and a runtime crash.

Fixes #402
---
 src/winml/modelkit/models/auto.py       | 3 +++
 src/winml/modelkit/models/winml/base.py | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/src/winml/modelkit/models/auto.py b/src/winml/modelkit/models/auto.py
index f0f55a4db..2864ddab8 100644
--- a/src/winml/modelkit/models/auto.py
+++ b/src/winml/modelkit/models/auto.py
@@ -192,6 +192,7 @@ def from_onnx(
                 config=None,
                 device=device,
                 session_options=session_options,
+                ep=ep,
             )
 
         # Resolve output directory
@@ -228,6 +229,7 @@ def from_onnx(
             config=None,  # No HF PretrainedConfig for bare ONNX builds
             device=device,
             session_options=session_options,
+            ep=ep,
         )
 
     @classmethod
@@ -425,6 +427,7 @@ def from_pretrained(
             onnx_path=onnx_path,
             config=hf_config,  # HF PretrainedConfig for pipeline compatibility
             device=device,  # pass user's original device string; WinMLSession handles "auto"
+            ep=resolved_ep,
         )
         model._build_config = config  # resolved build config (task, quant, compile)
         return model
diff --git a/src/winml/modelkit/models/winml/base.py b/src/winml/modelkit/models/winml/base.py
index be6e2e075..94c4e464d 100644
--- a/src/winml/modelkit/models/winml/base.py
+++ b/src/winml/modelkit/models/winml/base.py
@@ -65,6 +65,7 @@ def __init__(
         config: PretrainedConfig | None = None,
         device: str = "auto",
         session_options: Any | None = None,
+        ep: str | None = None,
     ) -> None:
         """Initialize inference model.
 
@@ -73,6 +74,7 @@ def __init__(
             config: HuggingFace PretrainedConfig (num_labels, id2label, etc.)
             device: Target device ("auto", "npu", "gpu", "cpu")
             session_options: ORT SessionOptions (e.g., for graph_optimization_level)
+            ep: Explicit EP short name (e.g., "dml", "qnn"). Forwarded to WinMLSession.
         """
         self._onnx_path = Path(onnx_path)
         self.config = config
@@ -86,6 +88,7 @@ def __init__(
             onnx_path=self._onnx_path,
             device=device,
             session_options=session_options,
+            ep=ep,
         )
 
     @property

From 35033d48cb760434c6f89879a2b3d754f32ce734 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Mon, 27 Apr 2026 20:44:26 +0800
Subject: [PATCH 02/10] fix: resolve device to explicit EP in
 _build_session_options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Policy-based EP selection (set_provider_selection_policy) does not work
for InferenceSession on the current ORT build — nodes end up with an
empty provider type string. When no explicit ep is set, fall back to
_DEVICE_TO_EP to resolve device ("gpu"→"dml", "npu"→"qnn") and use
add_provider_for_devices instead.

Fixes #402
---
 src/winml/modelkit/session/session.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py
index e6f819da1..9e4dc318f 100644
--- a/src/winml/modelkit/session/session.py
+++ b/src/winml/modelkit/session/session.py
@@ -72,6 +72,12 @@ class SessionState(Enum):
     "auto": ort.OrtExecutionProviderDevicePolicy.PREFER_NPU,  # Default to NPU
 }
 
+# Device to EP short name fallback (used when ep is not explicitly provided)
+_DEVICE_TO_EP: dict[str, str] = {
+    "npu": "qnn",
+    "gpu": "dml",
+}
+
 
 class WinMLSessionError(Exception):
     """Base exception for WinMLSession."""
@@ -423,8 +429,9 @@ def _build_session_options(self, device: str) -> ort.SessionOptions:
         avoid "already registered" errors from repeated calls.
         """
         # Explicit EP targeting: create fresh opts to avoid double-registration
-        if self._ep and self._ep != "cpu":
-            target_name = self._EP_NAME_MAP.get(self._ep)
+        ep = self._ep or _DEVICE_TO_EP.get(device.lower())
+        if ep and ep != "cpu":
+            target_name = self._EP_NAME_MAP.get(ep)
             if target_name:
                 matched = self._find_ep_device(target_name)
                 if matched:
@@ -432,13 +439,13 @@ def _build_session_options(self, device: str) -> ort.SessionOptions:
                     opts.add_provider_for_devices([matched], self._provider_options)
                     logger.info(
                         "Explicit EP: %s (%s)",
-                        self._ep,
+                        ep,
                         target_name,
                     )
                     return opts
                 logger.warning(
                     "EP '%s' (%s) not found in available devices; falling back to policy",
-                    self._ep,
+                    ep,
                     target_name,
                 )
 

From cbc47dcadcf01db4a3e37e595b555c8c02c3c4ab Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Mon, 27 Apr 2026 20:57:27 +0800
Subject: [PATCH 03/10] fix: use explicit providers param for InferenceSession

ort.get_ep_devices() may not list DML, causing _find_ep_device to
return None and falling back to the broken policy-based path. Instead,
resolve the providers list directly from EP name map and pass it via
the InferenceSession(providers=...) parameter, which does not depend
on get_ep_devices().

Fixes #402
---
 src/winml/modelkit/session/session.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py
index 9e4dc318f..95f674e8f 100644
--- a/src/winml/modelkit/session/session.py
+++ b/src/winml/modelkit/session/session.py
@@ -293,10 +293,15 @@ def compile(self) -> None:
                     logger.warning("ModelCompiler failed, using original: %s", e)
 
         try:
-            # Create InferenceSession
-            sess_options = self._build_session_options(target_device)
+            # Create InferenceSession with explicit providers to avoid
+            # broken policy-based selection (empty provider type in ORT).
+            providers = self._resolve_providers(target_device)
             with _suppress_native_output(compile_log):
-                session = ort.InferenceSession(str(model_path), sess_options=sess_options)
+                session = ort.InferenceSession(
+                    str(model_path),
+                    sess_options=self._session_options,
+                    providers=providers,
+                )
 
             # Log which providers were selected by ORT (based on policy)
             actual_providers = session.get_providers()
@@ -470,6 +475,19 @@ def _find_ep_device(ep_name: str) -> Any:
                 return ep_dev
         return None
 
+    def _resolve_providers(self, device: str) -> list[str] | None:
+        """Resolve explicit provider list for InferenceSession.
+
+        Uses self._ep if set, otherwise infers from device via _DEVICE_TO_EP.
+        Returns None for CPU (let ORT use default CPU provider).
+        """
+        ep = self._ep or _DEVICE_TO_EP.get(device.lower())
+        if ep and ep != "cpu":
+            target_name = self._EP_NAME_MAP.get(ep)
+            if target_name:
+                return [target_name, "CPUExecutionProvider"]
+        return None
+
     def _validate_inputs(self, inputs: dict[str, Any]) -> None:
         """Validate inputs against model expectations.
 

From bc02314984e87ac84c40543d632a083a955457b7 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Mon, 27 Apr 2026 20:59:33 +0800
Subject: [PATCH 04/10] fix: match EP device by both ep_name and hardware
 device type

_find_ep_device previously matched only on ep_name and returned the
first hit. When multiple devices share the same EP (e.g., integrated
+ discrete GPU both using DmlExecutionProvider), this could select the
wrong physical device. Now also matches on OrtHardwareDeviceType,
consistent with the pattern used in runtime_checker_query and winml.py.

Fixes #402
---
 src/winml/modelkit/session/session.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py
index 95f674e8f..980a51416 100644
--- a/src/winml/modelkit/session/session.py
+++ b/src/winml/modelkit/session/session.py
@@ -438,7 +438,7 @@ def _build_session_options(self, device: str) -> ort.SessionOptions:
         if ep and ep != "cpu":
             target_name = self._EP_NAME_MAP.get(ep)
             if target_name:
-                matched = self._find_ep_device(target_name)
+                matched = self._find_ep_device(target_name, device)
                 if matched:
                     opts = ort.SessionOptions()
                     opts.add_provider_for_devices([matched], self._provider_options)
@@ -464,15 +464,27 @@ def _build_session_options(self, device: str) -> ort.SessionOptions:
         return opts
 
     @staticmethod
-    def _find_ep_device(ep_name: str) -> Any:
-        """Find an OrtEpDevice matching the given EP name.
+    def _find_ep_device(ep_name: str, device: str | None = None) -> Any:
+        """Find an OrtEpDevice matching EP name and hardware device type.
+
+        Args:
+            ep_name: Full EP name (e.g., "DmlExecutionProvider").
+            device: Target device string ("gpu", "npu", "cpu"). When provided,
+                also matches on OrtHardwareDeviceType so the correct physical
+                device is selected (e.g., discrete GPU vs integrated).
 
         Returns:
-            The first matching OrtEpDevice, or None if not found.
+            The matching OrtEpDevice, or None if not found.
         """
+        from ..utils.constants import DEVICE_TO_DEVICE_TYPE
+
+        device_type = DEVICE_TO_DEVICE_TYPE.get(device.upper()) if device else None
         for ep_dev in ort.get_ep_devices():
-            if ep_dev.ep_name == ep_name:
-                return ep_dev
+            if ep_dev.ep_name != ep_name:
+                continue
+            if device_type is not None and ep_dev.device.type != device_type:
+                continue
+            return ep_dev
         return None
 
     def _resolve_providers(self, device: str) -> list[str] | None:

From 759b54783d0e6e96da4591a0a6cee4db740bee23 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Mon, 27 Apr 2026 21:08:02 +0800
Subject: [PATCH 05/10] fix: discover EP from runtime instead of hardcoded
 device-to-EP map
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the static _DEVICE_TO_EP mapping (gpu→dml, npu→qnn) with
runtime discovery via ort.get_ep_devices() filtered by device type.
This correctly handles machines with non-default EPs (e.g., CUDA or
MIGraphX on GPU instead of DML).

Fixes #402
---
 src/winml/modelkit/session/session.py | 73 ++++++++++++++++++---------
 1 file changed, 49 insertions(+), 24 deletions(-)

diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py
index 980a51416..6e076d2cf 100644
--- a/src/winml/modelkit/session/session.py
+++ b/src/winml/modelkit/session/session.py
@@ -72,12 +72,6 @@ class SessionState(Enum):
     "auto": ort.OrtExecutionProviderDevicePolicy.PREFER_NPU,  # Default to NPU
 }
 
-# Device to EP short name fallback (used when ep is not explicitly provided)
-_DEVICE_TO_EP: dict[str, str] = {
-    "npu": "qnn",
-    "gpu": "dml",
-}
-
 
 class WinMLSessionError(Exception):
     """Base exception for WinMLSession."""
@@ -427,34 +421,40 @@ def _build_session_options(self, device: str) -> ort.SessionOptions:
         """Build ORT SessionOptions from instance session_options and device.
 
         When ``self._ep`` is set, uses ``add_provider_for_devices`` to
-        explicitly bind a specific EP (e.g., MIGraphX, NvTensorRTRTX). Otherwise
-        falls back to policy-based selection via DEVICE_POLICY_MAP.
+        explicitly bind a specific EP (e.g., MIGraphX, NvTensorRTRTX).
+        When not set, queries ``get_ep_devices()`` to discover available
+        EPs for the target device type. Falls back to policy-based
+        selection only as a last resort.
 
         Note: Returns a **fresh** SessionOptions when using explicit EP to
         avoid "already registered" errors from repeated calls.
         """
         # Explicit EP targeting: create fresh opts to avoid double-registration
-        ep = self._ep or _DEVICE_TO_EP.get(device.lower())
-        if ep and ep != "cpu":
-            target_name = self._EP_NAME_MAP.get(ep)
+        if self._ep and self._ep != "cpu":
+            target_name = self._EP_NAME_MAP.get(self._ep)
             if target_name:
                 matched = self._find_ep_device(target_name, device)
                 if matched:
                     opts = ort.SessionOptions()
                     opts.add_provider_for_devices([matched], self._provider_options)
-                    logger.info(
-                        "Explicit EP: %s (%s)",
-                        ep,
-                        target_name,
-                    )
+                    logger.info("Explicit EP: %s (%s)", self._ep, target_name)
                     return opts
                 logger.warning(
-                    "EP '%s' (%s) not found in available devices; falling back to policy",
-                    ep,
+                    "EP '%s' (%s) not found in available devices",
+                    self._ep,
                     target_name,
                 )
 
-        # Policy-based selection (default path)
+        # No explicit EP — discover available EP for this device type
+        if not self._ep and device.lower() != "cpu":
+            matched = self._find_ep_for_device(device)
+            if matched:
+                opts = ort.SessionOptions()
+                opts.add_provider_for_devices([matched], self._provider_options)
+                logger.info("Discovered EP for %s: %s", device, matched.ep_name)
+                return opts
+
+        # Policy-based selection (last resort)
         opts = self._session_options
         policy = DEVICE_POLICY_MAP.get(
             device.lower(), ort.OrtExecutionProviderDevicePolicy.PREFER_NPU
@@ -487,17 +487,42 @@ def _find_ep_device(ep_name: str, device: str | None = None) -> Any:
             return ep_dev
         return None
 
+    @staticmethod
+    def _find_ep_for_device(device: str) -> Any:
+        """Find the first available OrtEpDevice for the given device type.
+
+        Queries ``ort.get_ep_devices()`` and returns the first EP whose
+        hardware device type matches (e.g., device="gpu" matches GPU EPs).
+
+        Returns:
+            The matching OrtEpDevice, or None if not found.
+        """
+        from ..utils.constants import DEVICE_TO_DEVICE_TYPE
+
+        device_type = DEVICE_TO_DEVICE_TYPE.get(device.upper())
+        if device_type is None:
+            return None
+        for ep_dev in ort.get_ep_devices():
+            if ep_dev.device.type == device_type:
+                return ep_dev
+        return None
+
     def _resolve_providers(self, device: str) -> list[str] | None:
         """Resolve explicit provider list for InferenceSession.
 
-        Uses self._ep if set, otherwise infers from device via _DEVICE_TO_EP.
-        Returns None for CPU (let ORT use default CPU provider).
+        Uses self._ep if set, otherwise queries ``get_ep_devices()`` for the
+        target device type. Returns None for CPU (let ORT use default).
         """
-        ep = self._ep or _DEVICE_TO_EP.get(device.lower())
-        if ep and ep != "cpu":
-            target_name = self._EP_NAME_MAP.get(ep)
+        if self._ep and self._ep != "cpu":
+            target_name = self._EP_NAME_MAP.get(self._ep)
             if target_name:
                 return [target_name, "CPUExecutionProvider"]
+
+        if device.lower() != "cpu":
+            matched = self._find_ep_for_device(device)
+            if matched:
+                return [matched.ep_name, "CPUExecutionProvider"]
+
         return None
 
     def _validate_inputs(self, inputs: dict[str, Any]) -> None:

From 9e028b2c5612897f78dbc82b606f76d3ff12ba04 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Mon, 27 Apr 2026 21:18:36 +0800
Subject: [PATCH 06/10] fix: pass ep to WinMLSession in _run_onnx_benchmark
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When e2e_eval builds a model then benchmarks the resulting .onnx file,
it calls _run_onnx_benchmark which created WinMLSession with only
device but not ep. This was the actual failing path — config.ep was
available but never forwarded to the session.

Fixes #402
---
 src/winml/modelkit/commands/perf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py
index b05dae669..841080fe1 100644
--- a/src/winml/modelkit/commands/perf.py
+++ b/src/winml/modelkit/commands/perf.py
@@ -920,7 +920,7 @@ def _run_onnx_benchmark(
     """
     from ..session import WinMLSession
 
-    session = WinMLSession(onnx_path=onnx_path, device=device)
+    session = WinMLSession(onnx_path=onnx_path, device=device, ep=config.ep)
 
     # Generate random inputs from session's I/O config
     io_cfg = session.io_config

From 66a01480ad7075efcf8da25840e4578e1270c447 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Mon, 27 Apr 2026 21:58:56 +0800
Subject: [PATCH 07/10] fix: map QNN EP to both NPU and GPU

QNN supports GPU via Qualcomm Adreno backend, but _EP_DEVICE_MAP
hardcoded it to NPU only. Change to "npu/gpu" and update _DEVICE_EP_MAP
generation to split multi-device strings so QNN appears in both the NPU
and GPU device lists.

Fixes #402
---
 src/winml/modelkit/sysinfo/device.py | 10 +++++-----
 tests/unit/sysinfo/test_device.py    | 13 ++++++++-----
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/winml/modelkit/sysinfo/device.py b/src/winml/modelkit/sysinfo/device.py
index 28e26fa1f..ebbeb6bdc 100644
--- a/src/winml/modelkit/sysinfo/device.py
+++ b/src/winml/modelkit/sysinfo/device.py
@@ -41,8 +41,8 @@
     # AMD
     "MIGraphXExecutionProvider": "gpu",
     "VitisAIExecutionProvider": "npu",
-    # Qualcomm
-    "QNNExecutionProvider": "npu",
+    # Qualcomm (QNN supports both NPU and GPU via Adreno backend)
+    "QNNExecutionProvider": "npu/gpu",
     # Microsoft
     "DmlExecutionProvider": "gpu",
     # Intel
@@ -51,11 +51,11 @@
     "CPUExecutionProvider": "cpu",
 }
 
-# Derived inverse mapping (excludes multi-device EPs like OpenVINO)
+# Derived inverse mapping (multi-device EPs are included in each device)
 _DEVICE_EP_MAP: dict[str, list[str]] = {}
 for _ep, _device in _EP_DEVICE_MAP.items():
-    if "/" not in _device:
-        _DEVICE_EP_MAP.setdefault(_device, []).append(_ep)
+    for _d in _device.split("/"):
+        _DEVICE_EP_MAP.setdefault(_d, []).append(_ep)
 
 # Valid explicit device values
 _VALID_DEVICES = frozenset({"npu", "gpu", "cpu"})
diff --git a/tests/unit/sysinfo/test_device.py b/tests/unit/sysinfo/test_device.py
index fbe0acb49..ecc766fde 100644
--- a/tests/unit/sysinfo/test_device.py
+++ b/tests/unit/sysinfo/test_device.py
@@ -136,10 +136,13 @@ def test_ep_device_map_values_are_lowercase(self) -> None:
         for ep, device in _EP_DEVICE_MAP.items():
             assert device == device.lower(), f"{ep} maps to non-lowercase '{device}'"
 
-    def test_device_ep_map_excludes_openvino(self) -> None:
-        """_DEVICE_EP_MAP should not contain OpenVINO entries."""
-        all_eps = [ep for eps in _DEVICE_EP_MAP.values() for ep in eps]
-        assert "OpenVINOExecutionProvider" not in all_eps
+    def test_device_ep_map_includes_multi_device_eps(self) -> None:
+        """Multi-device EPs (QNN, OpenVINO) should appear in each device."""
+        assert "QNNExecutionProvider" in _DEVICE_EP_MAP["npu"]
+        assert "QNNExecutionProvider" in _DEVICE_EP_MAP["gpu"]
+        assert "OpenVINOExecutionProvider" in _DEVICE_EP_MAP["npu"]
+        assert "OpenVINOExecutionProvider" in _DEVICE_EP_MAP["gpu"]
+        assert "OpenVINOExecutionProvider" in _DEVICE_EP_MAP["cpu"]
 
     def test_device_ep_map_derived_from_ep_device_map(self) -> None:
         """_DEVICE_EP_MAP should be consistent with _EP_DEVICE_MAP."""
@@ -148,7 +151,7 @@ def test_device_ep_map_derived_from_ep_device_map(self) -> None:
                 assert ep in _EP_DEVICE_MAP, (
                     f"EP '{ep}' in _DEVICE_EP_MAP but not in _EP_DEVICE_MAP"
                 )
-                assert _EP_DEVICE_MAP[ep] == device
+                assert device in _EP_DEVICE_MAP[ep].split("/")
 
     def test_nv_tensorrt_rtx_is_gpu_ep(self) -> None:
         """NvTensorRTRTXExecutionProvider should map to gpu."""

From 0d8e872edfe12894442e54fe3381e62ad7d4cf43 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Mon, 27 Apr 2026 22:10:42 +0800
Subject: [PATCH 08/10] fix: use add_provider_for_devices for InferenceSession
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two issues fixed:

1. Explicit EP (--ep qnn) no longer filters by device type in
   _find_ep_device. QNN reports as NPU in get_ep_devices() but can
   target GPU — trust the user's choice.

2. InferenceSession now uses _build_session_options() (which calls
   add_provider_for_devices, working with WinML EP registry) instead
   of the providers= string parameter (which tries standard DLL
   loading and fails for WinML-registered EPs like QNN). Falls back
   to providers= only when _build_session_options returns policy-based
   options.

Fixes #402
---
 src/winml/modelkit/session/session.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py
index 6e076d2cf..519fff011 100644
--- a/src/winml/modelkit/session/session.py
+++ b/src/winml/modelkit/session/session.py
@@ -287,13 +287,22 @@ def compile(self) -> None:
                     logger.warning("ModelCompiler failed, using original: %s", e)
 
         try:
-            # Create InferenceSession with explicit providers to avoid
-            # broken policy-based selection (empty provider type in ORT).
-            providers = self._resolve_providers(target_device)
+            # Create InferenceSession.
+            # Prefer _build_session_options (uses add_provider_for_devices which
+            # works with WinML EP registry for non-built-in EPs like QNN).
+            # Only fall back to providers= strings when _build_session_options
+            # returned policy-based options (identity check).
+            sess_options = self._build_session_options(target_device)
+            if sess_options is self._session_options:
+                # Policy fallback — use providers= for built-in EPs (e.g., DML)
+                providers = self._resolve_providers(target_device)
+            else:
+                # EP configured via add_provider_for_devices — don't override
+                providers = None
             with _suppress_native_output(compile_log):
                 session = ort.InferenceSession(
                     str(model_path),
-                    sess_options=self._session_options,
+                    sess_options=sess_options,
                     providers=providers,
                 )
 
@@ -430,10 +439,12 @@ def _build_session_options(self, device: str) -> ort.SessionOptions:
         avoid "already registered" errors from repeated calls.
         """
         # Explicit EP targeting: create fresh opts to avoid double-registration
+        # Don't filter by device type — trust the user's --ep choice
+        # (e.g., QNN reports as NPU in get_ep_devices but can target GPU)
         if self._ep and self._ep != "cpu":
             target_name = self._EP_NAME_MAP.get(self._ep)
             if target_name:
-                matched = self._find_ep_device(target_name, device)
+                matched = self._find_ep_device(target_name)
                 if matched:
                     opts = ort.SessionOptions()
                     opts.add_provider_for_devices([matched], self._provider_options)

From cfad4842b4d7bfea547c0f292cba36d7db21e754 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Wed, 29 Apr 2026 14:09:39 +0800
Subject: [PATCH 09/10] fix: remove providers= fallback, always use
 add_provider_for_devices

WinML-registered EPs (e.g. QNN) do not support the providers= parameter
in InferenceSession. Remove _resolve_providers and the conditional
providers= path entirely. EP is now configured exclusively via
add_provider_for_devices in _build_session_options, or left to ORT
device policy in the fallback case.
---
 src/winml/modelkit/session/session.py | 32 +++------------------------
 1 file changed, 3 insertions(+), 29 deletions(-)

diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py
index 519fff011..5ae7b77b0 100644
--- a/src/winml/modelkit/session/session.py
+++ b/src/winml/modelkit/session/session.py
@@ -288,22 +288,14 @@ def compile(self) -> None:
 
         try:
             # Create InferenceSession.
-            # Prefer _build_session_options (uses add_provider_for_devices which
-            # works with WinML EP registry for non-built-in EPs like QNN).
-            # Only fall back to providers= strings when _build_session_options
-            # returned policy-based options (identity check).
+            # EP is either configured via add_provider_for_devices (WinML EP
+            # registry, e.g. QNN) or left to ORT's device policy (fallback).
+            # Never pass providers= — WinML-registered EPs don't support it.
             sess_options = self._build_session_options(target_device)
-            if sess_options is self._session_options:
-                # Policy fallback — use providers= for built-in EPs (e.g., DML)
-                providers = self._resolve_providers(target_device)
-            else:
-                # EP configured via add_provider_for_devices — don't override
-                providers = None
             with _suppress_native_output(compile_log):
                 session = ort.InferenceSession(
                     str(model_path),
                     sess_options=sess_options,
-                    providers=providers,
                 )
 
             # Log which providers were selected by ORT (based on policy)
@@ -518,24 +510,6 @@ def _find_ep_for_device(device: str) -> Any:
                 return ep_dev
         return None
 
-    def _resolve_providers(self, device: str) -> list[str] | None:
-        """Resolve explicit provider list for InferenceSession.
-
-        Uses self._ep if set, otherwise queries ``get_ep_devices()`` for the
-        target device type. Returns None for CPU (let ORT use default).
-        """
-        if self._ep and self._ep != "cpu":
-            target_name = self._EP_NAME_MAP.get(self._ep)
-            if target_name:
-                return [target_name, "CPUExecutionProvider"]
-
-        if device.lower() != "cpu":
-            matched = self._find_ep_for_device(device)
-            if matched:
-                return [matched.ep_name, "CPUExecutionProvider"]
-
-        return None
-
     def _validate_inputs(self, inputs: dict[str, Any]) -> None:
         """Validate inputs against model expectations.
 

From cba9189a8da19d865f3d56d5520dd90e5e2f6325 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Wed, 29 Apr 2026 14:38:22 +0800
Subject: [PATCH 10/10] refactor: clean up _find_ep_device and improve
 docstrings

- Remove unused `device` parameter from `_find_ep_device`; the caller
  intentionally skips device-type filtering (QNN reports as NPU but can
  target GPU), so the parameter was dead code that contradicted the
  comment at the call site
- Update `_build_session_options` docstring to document that `"cpu"` is
  excluded from the add_provider_for_devices path and falls through to
  policy-based selection
- Document registry-order dependency in `_find_ep_for_device`: when
  multiple EPs match the same device type the first one wins; callers
  that need a specific EP should set `self._ep` to bypass discovery
---
 src/winml/modelkit/session/session.py | 35 +++++++++++++--------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py
index 5ae7b77b0..6349cac62 100644
--- a/src/winml/modelkit/session/session.py
+++ b/src/winml/modelkit/session/session.py
@@ -421,11 +421,13 @@ def _is_verbose(self) -> bool:
     def _build_session_options(self, device: str) -> ort.SessionOptions:
         """Build ORT SessionOptions from instance session_options and device.
 
-        When ``self._ep`` is set, uses ``add_provider_for_devices`` to
-        explicitly bind a specific EP (e.g., MIGraphX, NvTensorRTRTX).
-        When not set, queries ``get_ep_devices()`` to discover available
-        EPs for the target device type. Falls back to policy-based
-        selection only as a last resort.
+        When ``self._ep`` is set (and not ``"cpu"``), uses
+        ``add_provider_for_devices`` to explicitly bind that EP.
+        ``"cpu"`` falls through to policy-based selection so ORT handles
+        CPU-only inference without any EP registration.
+        When ``self._ep`` is not set, queries ``get_ep_devices()`` to
+        discover an available EP for the target device type. Falls back to
+        policy-based selection only as a last resort.
 
         Note: Returns a **fresh** SessionOptions when using explicit EP to
         avoid "already registered" errors from repeated calls.
@@ -467,27 +469,18 @@ def _build_session_options(self, device: str) -> ort.SessionOptions:
         return opts
 
     @staticmethod
-    def _find_ep_device(ep_name: str, device: str | None = None) -> Any:
-        """Find an OrtEpDevice matching EP name and hardware device type.
+    def _find_ep_device(ep_name: str) -> Any:
+        """Find the first OrtEpDevice matching the given EP name.
 
         Args:
             ep_name: Full EP name (e.g., "DmlExecutionProvider").
-            device: Target device string ("gpu", "npu", "cpu"). When provided,
-                also matches on OrtHardwareDeviceType so the correct physical
-                device is selected (e.g., discrete GPU vs integrated).
 
         Returns:
             The matching OrtEpDevice, or None if not found.
         """
-        from ..utils.constants import DEVICE_TO_DEVICE_TYPE
-
-        device_type = DEVICE_TO_DEVICE_TYPE.get(device.upper()) if device else None
         for ep_dev in ort.get_ep_devices():
-            if ep_dev.ep_name != ep_name:
-                continue
-            if device_type is not None and ep_dev.device.type != device_type:
-                continue
-            return ep_dev
+            if ep_dev.ep_name == ep_name:
+                return ep_dev
         return None
 
     @staticmethod
@@ -497,6 +490,12 @@ def _find_ep_for_device(device: str) -> Any:
         Queries ``ort.get_ep_devices()`` and returns the first EP whose
         hardware device type matches (e.g., device="gpu" matches GPU EPs).
 
+        Note: Selection order is determined by the ORT EP registry, which is
+        not part of any documented contract. On systems where multiple EPs
+        match the same device type (e.g., QNN and DML both appear as GPU),
+        the result is registry-order dependent. When a specific EP is
+        required, use ``self._ep`` to bypass this discovery path entirely.
+
         Returns:
             The matching OrtEpDevice, or None if not found.
         """