From 78bb9d1b1e4ccf5f6a4119b9fd37ccf96ccb7f4c Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Mon, 27 Apr 2026 18:13:39 +0800 Subject: [PATCH 01/10] fix: thread `ep` parameter through to WinMLSession The `--ep` flag was silently dropped in the model construction path. `WinMLAutoModel.from_pretrained()` and `from_onnx()` received the `ep` value but never forwarded it to `WinMLPreTrainedModel`, which in turn never passed it to `WinMLSession`. This caused `ModelCompiler` to fall back to policy-based EP selection, which it does not support, resulting in an empty provider type string and a runtime crash. Fixes #402 --- src/winml/modelkit/models/auto.py | 3 +++ src/winml/modelkit/models/winml/base.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/winml/modelkit/models/auto.py b/src/winml/modelkit/models/auto.py index f0f55a4db..2864ddab8 100644 --- a/src/winml/modelkit/models/auto.py +++ b/src/winml/modelkit/models/auto.py @@ -192,6 +192,7 @@ def from_onnx( config=None, device=device, session_options=session_options, + ep=ep, ) # Resolve output directory @@ -228,6 +229,7 @@ def from_onnx( config=None, # No HF PretrainedConfig for bare ONNX builds device=device, session_options=session_options, + ep=ep, ) @classmethod @@ -425,6 +427,7 @@ def from_pretrained( onnx_path=onnx_path, config=hf_config, # HF PretrainedConfig for pipeline compatibility device=device, # pass user's original device string; WinMLSession handles "auto" + ep=resolved_ep, ) model._build_config = config # resolved build config (task, quant, compile) return model diff --git a/src/winml/modelkit/models/winml/base.py b/src/winml/modelkit/models/winml/base.py index be6e2e075..94c4e464d 100644 --- a/src/winml/modelkit/models/winml/base.py +++ b/src/winml/modelkit/models/winml/base.py @@ -65,6 +65,7 @@ def __init__( config: PretrainedConfig | None = None, device: str = "auto", session_options: Any | None = None, + ep: str | None = None, ) -> None: """Initialize inference model. @@ -73,6 +74,7 @@ def __init__( config: HuggingFace PretrainedConfig (num_labels, id2label, etc.) device: Target device ("auto", "npu", "gpu", "cpu") session_options: ORT SessionOptions (e.g., for graph_optimization_level) + ep: Explicit EP short name (e.g., "dml", "qnn"). Forwarded to WinMLSession. """ self._onnx_path = Path(onnx_path) self.config = config @@ -86,6 +88,7 @@ def __init__( onnx_path=self._onnx_path, device=device, session_options=session_options, + ep=ep, ) @property From 35033d48cb760434c6f89879a2b3d754f32ce734 Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Mon, 27 Apr 2026 20:44:26 +0800 Subject: [PATCH 02/10] fix: resolve device to explicit EP in _build_session_options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Policy-based EP selection (set_provider_selection_policy) does not work for InferenceSession on the current ORT build — nodes end up with an empty provider type string. When no explicit ep is set, fall back to _DEVICE_TO_EP to resolve device ("gpu"→"dml", "npu"→"qnn") and use add_provider_for_devices instead. Fixes #402 --- src/winml/modelkit/session/session.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py index e6f819da1..9e4dc318f 100644 --- a/src/winml/modelkit/session/session.py +++ b/src/winml/modelkit/session/session.py @@ -72,6 +72,12 @@ class SessionState(Enum): "auto": ort.OrtExecutionProviderDevicePolicy.PREFER_NPU, # Default to NPU } +# Device to EP short name fallback (used when ep is not explicitly provided) +_DEVICE_TO_EP: dict[str, str] = { + "npu": "qnn", + "gpu": "dml", +} + class WinMLSessionError(Exception): """Base exception for WinMLSession.""" @@ -423,8 +429,9 @@ def _build_session_options(self, device: str) -> ort.SessionOptions: avoid "already registered" errors from repeated calls. """ # Explicit EP targeting: create fresh opts to avoid double-registration - if self._ep and self._ep != "cpu": - target_name = self._EP_NAME_MAP.get(self._ep) + ep = self._ep or _DEVICE_TO_EP.get(device.lower()) + if ep and ep != "cpu": + target_name = self._EP_NAME_MAP.get(ep) if target_name: matched = self._find_ep_device(target_name) if matched: @@ -432,13 +439,13 @@ def _build_session_options(self, device: str) -> ort.SessionOptions: opts.add_provider_for_devices([matched], self._provider_options) logger.info( "Explicit EP: %s (%s)", - self._ep, + ep, target_name, ) return opts logger.warning( "EP '%s' (%s) not found in available devices; falling back to policy", - self._ep, + ep, target_name, ) From cbc47dcadcf01db4a3e37e595b555c8c02c3c4ab Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Mon, 27 Apr 2026 20:57:27 +0800 Subject: [PATCH 03/10] fix: use explicit providers param for InferenceSession ort.get_ep_devices() may not list DML, causing _find_ep_device to return None and falling back to the broken policy-based path. Instead, resolve the providers list directly from EP name map and pass it via the InferenceSession(providers=...) parameter, which does not depend on get_ep_devices(). Fixes #402 --- src/winml/modelkit/session/session.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py index 9e4dc318f..95f674e8f 100644 --- a/src/winml/modelkit/session/session.py +++ b/src/winml/modelkit/session/session.py @@ -293,10 +293,15 @@ def compile(self) -> None: logger.warning("ModelCompiler failed, using original: %s", e) try: - # Create InferenceSession - sess_options = self._build_session_options(target_device) + # Create InferenceSession with explicit providers to avoid + # broken policy-based selection (empty provider type in ORT). + providers = self._resolve_providers(target_device) with _suppress_native_output(compile_log): - session = ort.InferenceSession(str(model_path), sess_options=sess_options) + session = ort.InferenceSession( + str(model_path), + sess_options=self._session_options, + providers=providers, + ) # Log which providers were selected by ORT (based on policy) actual_providers = session.get_providers() @@ -470,6 +475,19 @@ def _find_ep_device(ep_name: str) -> Any: return ep_dev return None + def _resolve_providers(self, device: str) -> list[str] | None: + """Resolve explicit provider list for InferenceSession. + + Uses self._ep if set, otherwise infers from device via _DEVICE_TO_EP. + Returns None for CPU (let ORT use default CPU provider). + """ + ep = self._ep or _DEVICE_TO_EP.get(device.lower()) + if ep and ep != "cpu": + target_name = self._EP_NAME_MAP.get(ep) + if target_name: + return [target_name, "CPUExecutionProvider"] + return None + def _validate_inputs(self, inputs: dict[str, Any]) -> None: """Validate inputs against model expectations. From bc02314984e87ac84c40543d632a083a955457b7 Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Mon, 27 Apr 2026 20:59:33 +0800 Subject: [PATCH 04/10] fix: match EP device by both ep_name and hardware device type _find_ep_device previously matched only on ep_name and returned the first hit. When multiple devices share the same EP (e.g., integrated + discrete GPU both using DmlExecutionProvider), this could select the wrong physical device. Now also matches on OrtHardwareDeviceType, consistent with the pattern used in runtime_checker_query and winml.py. Fixes #402 --- src/winml/modelkit/session/session.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py index 95f674e8f..980a51416 100644 --- a/src/winml/modelkit/session/session.py +++ b/src/winml/modelkit/session/session.py @@ -438,7 +438,7 @@ def _build_session_options(self, device: str) -> ort.SessionOptions: if ep and ep != "cpu": target_name = self._EP_NAME_MAP.get(ep) if target_name: - matched = self._find_ep_device(target_name) + matched = self._find_ep_device(target_name, device) if matched: opts = ort.SessionOptions() opts.add_provider_for_devices([matched], self._provider_options) @@ -464,15 +464,27 @@ def _build_session_options(self, device: str) -> ort.SessionOptions: return opts @staticmethod - def _find_ep_device(ep_name: str) -> Any: - """Find an OrtEpDevice matching the given EP name. + def _find_ep_device(ep_name: str, device: str | None = None) -> Any: + """Find an OrtEpDevice matching EP name and hardware device type. + + Args: + ep_name: Full EP name (e.g., "DmlExecutionProvider"). + device: Target device string ("gpu", "npu", "cpu"). When provided, + also matches on OrtHardwareDeviceType so the correct physical + device is selected (e.g., discrete GPU vs integrated). Returns: - The first matching OrtEpDevice, or None if not found. + The matching OrtEpDevice, or None if not found. """ + from ..utils.constants import DEVICE_TO_DEVICE_TYPE + + device_type = DEVICE_TO_DEVICE_TYPE.get(device.upper()) if device else None for ep_dev in ort.get_ep_devices(): - if ep_dev.ep_name == ep_name: - return ep_dev + if ep_dev.ep_name != ep_name: + continue + if device_type is not None and ep_dev.device.type != device_type: + continue + return ep_dev return None def _resolve_providers(self, device: str) -> list[str] | None: From 759b54783d0e6e96da4591a0a6cee4db740bee23 Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Mon, 27 Apr 2026 21:08:02 +0800 Subject: [PATCH 05/10] fix: discover EP from runtime instead of hardcoded device-to-EP map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the static _DEVICE_TO_EP mapping (gpu→dml, npu→qnn) with runtime discovery via ort.get_ep_devices() filtered by device type. This correctly handles machines with non-default EPs (e.g., CUDA or MIGraphX on GPU instead of DML). Fixes #402 --- src/winml/modelkit/session/session.py | 73 ++++++++++++++++++--------- 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py index 980a51416..6e076d2cf 100644 --- a/src/winml/modelkit/session/session.py +++ b/src/winml/modelkit/session/session.py @@ -72,12 +72,6 @@ class SessionState(Enum): "auto": ort.OrtExecutionProviderDevicePolicy.PREFER_NPU, # Default to NPU } -# Device to EP short name fallback (used when ep is not explicitly provided) -_DEVICE_TO_EP: dict[str, str] = { - "npu": "qnn", - "gpu": "dml", -} - class WinMLSessionError(Exception): """Base exception for WinMLSession.""" @@ -427,34 +421,40 @@ def _build_session_options(self, device: str) -> ort.SessionOptions: """Build ORT SessionOptions from instance session_options and device. When ``self._ep`` is set, uses ``add_provider_for_devices`` to - explicitly bind a specific EP (e.g., MIGraphX, NvTensorRTRTX). Otherwise - falls back to policy-based selection via DEVICE_POLICY_MAP. + explicitly bind a specific EP (e.g., MIGraphX, NvTensorRTRTX). + When not set, queries ``get_ep_devices()`` to discover available + EPs for the target device type. Falls back to policy-based + selection only as a last resort. Note: Returns a **fresh** SessionOptions when using explicit EP to avoid "already registered" errors from repeated calls. """ # Explicit EP targeting: create fresh opts to avoid double-registration - ep = self._ep or _DEVICE_TO_EP.get(device.lower()) - if ep and ep != "cpu": - target_name = self._EP_NAME_MAP.get(ep) + if self._ep and self._ep != "cpu": + target_name = self._EP_NAME_MAP.get(self._ep) if target_name: matched = self._find_ep_device(target_name, device) if matched: opts = ort.SessionOptions() opts.add_provider_for_devices([matched], self._provider_options) - logger.info( - "Explicit EP: %s (%s)", - ep, - target_name, - ) + logger.info("Explicit EP: %s (%s)", self._ep, target_name) return opts logger.warning( - "EP '%s' (%s) not found in available devices; falling back to policy", - ep, + "EP '%s' (%s) not found in available devices", + self._ep, target_name, ) - # Policy-based selection (default path) + # No explicit EP — discover available EP for this device type + if not self._ep and device.lower() != "cpu": + matched = self._find_ep_for_device(device) + if matched: + opts = ort.SessionOptions() + opts.add_provider_for_devices([matched], self._provider_options) + logger.info("Discovered EP for %s: %s", device, matched.ep_name) + return opts + + # Policy-based selection (last resort) opts = self._session_options policy = DEVICE_POLICY_MAP.get( device.lower(), ort.OrtExecutionProviderDevicePolicy.PREFER_NPU @@ -487,17 +487,42 @@ def _find_ep_device(ep_name: str, device: str | None = None) -> Any: return ep_dev return None + @staticmethod + def _find_ep_for_device(device: str) -> Any: + """Find the first available OrtEpDevice for the given device type. + + Queries ``ort.get_ep_devices()`` and returns the first EP whose + hardware device type matches (e.g., device="gpu" matches GPU EPs). + + Returns: + The matching OrtEpDevice, or None if not found. + """ + from ..utils.constants import DEVICE_TO_DEVICE_TYPE + + device_type = DEVICE_TO_DEVICE_TYPE.get(device.upper()) + if device_type is None: + return None + for ep_dev in ort.get_ep_devices(): + if ep_dev.device.type == device_type: + return ep_dev + return None + def _resolve_providers(self, device: str) -> list[str] | None: """Resolve explicit provider list for InferenceSession. - Uses self._ep if set, otherwise infers from device via _DEVICE_TO_EP. - Returns None for CPU (let ORT use default CPU provider). + Uses self._ep if set, otherwise queries ``get_ep_devices()`` for the + target device type. Returns None for CPU (let ORT use default). """ - ep = self._ep or _DEVICE_TO_EP.get(device.lower()) - if ep and ep != "cpu": - target_name = self._EP_NAME_MAP.get(ep) + if self._ep and self._ep != "cpu": + target_name = self._EP_NAME_MAP.get(self._ep) if target_name: return [target_name, "CPUExecutionProvider"] + + if device.lower() != "cpu": + matched = self._find_ep_for_device(device) + if matched: + return [matched.ep_name, "CPUExecutionProvider"] + return None def _validate_inputs(self, inputs: dict[str, Any]) -> None: From 9e028b2c5612897f78dbc82b606f76d3ff12ba04 Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Mon, 27 Apr 2026 21:18:36 +0800 Subject: [PATCH 06/10] fix: pass ep to WinMLSession in _run_onnx_benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When e2e_eval builds a model then benchmarks the resulting .onnx file, it calls _run_onnx_benchmark which created WinMLSession with only device but not ep. This was the actual failing path — config.ep was available but never forwarded to the session. Fixes #402 --- src/winml/modelkit/commands/perf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py index b05dae669..841080fe1 100644 --- a/src/winml/modelkit/commands/perf.py +++ b/src/winml/modelkit/commands/perf.py @@ -920,7 +920,7 @@ def _run_onnx_benchmark( """ from ..session import WinMLSession - session = WinMLSession(onnx_path=onnx_path, device=device) + session = WinMLSession(onnx_path=onnx_path, device=device, ep=config.ep) # Generate random inputs from session's I/O config io_cfg = session.io_config From 66a01480ad7075efcf8da25840e4578e1270c447 Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Mon, 27 Apr 2026 21:58:56 +0800 Subject: [PATCH 07/10] fix: map QNN EP to both NPU and GPU QNN supports GPU via Qualcomm Adreno backend, but _EP_DEVICE_MAP hardcoded it to NPU only. Change to "npu/gpu" and update _DEVICE_EP_MAP generation to split multi-device strings so QNN appears in both the NPU and GPU device lists. Fixes #402 --- src/winml/modelkit/sysinfo/device.py | 10 +++++----- tests/unit/sysinfo/test_device.py | 13 ++++++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/winml/modelkit/sysinfo/device.py b/src/winml/modelkit/sysinfo/device.py index 28e26fa1f..ebbeb6bdc 100644 --- a/src/winml/modelkit/sysinfo/device.py +++ b/src/winml/modelkit/sysinfo/device.py @@ -41,8 +41,8 @@ # AMD "MIGraphXExecutionProvider": "gpu", "VitisAIExecutionProvider": "npu", - # Qualcomm - "QNNExecutionProvider": "npu", + # Qualcomm (QNN supports both NPU and GPU via Adreno backend) + "QNNExecutionProvider": "npu/gpu", # Microsoft "DmlExecutionProvider": "gpu", # Intel @@ -51,11 +51,11 @@ "CPUExecutionProvider": "cpu", } -# Derived inverse mapping (excludes multi-device EPs like OpenVINO) +# Derived inverse mapping (multi-device EPs are included in each device) _DEVICE_EP_MAP: dict[str, list[str]] = {} for _ep, _device in _EP_DEVICE_MAP.items(): - if "/" not in _device: - _DEVICE_EP_MAP.setdefault(_device, []).append(_ep) + for _d in _device.split("/"): + _DEVICE_EP_MAP.setdefault(_d, []).append(_ep) # Valid explicit device values _VALID_DEVICES = frozenset({"npu", "gpu", "cpu"}) diff --git a/tests/unit/sysinfo/test_device.py b/tests/unit/sysinfo/test_device.py index fbe0acb49..ecc766fde 100644 --- a/tests/unit/sysinfo/test_device.py +++ b/tests/unit/sysinfo/test_device.py @@ -136,10 +136,13 @@ def test_ep_device_map_values_are_lowercase(self) -> None: for ep, device in _EP_DEVICE_MAP.items(): assert device == device.lower(), f"{ep} maps to non-lowercase '{device}'" - def test_device_ep_map_excludes_openvino(self) -> None: - """_DEVICE_EP_MAP should not contain OpenVINO entries.""" - all_eps = [ep for eps in _DEVICE_EP_MAP.values() for ep in eps] - assert "OpenVINOExecutionProvider" not in all_eps + def test_device_ep_map_includes_multi_device_eps(self) -> None: + """Multi-device EPs (QNN, OpenVINO) should appear in each device.""" + assert "QNNExecutionProvider" in _DEVICE_EP_MAP["npu"] + assert "QNNExecutionProvider" in _DEVICE_EP_MAP["gpu"] + assert "OpenVINOExecutionProvider" in _DEVICE_EP_MAP["npu"] + assert "OpenVINOExecutionProvider" in _DEVICE_EP_MAP["gpu"] + assert "OpenVINOExecutionProvider" in _DEVICE_EP_MAP["cpu"] def test_device_ep_map_derived_from_ep_device_map(self) -> None: """_DEVICE_EP_MAP should be consistent with _EP_DEVICE_MAP.""" @@ -148,7 +151,7 @@ def test_device_ep_map_derived_from_ep_device_map(self) -> None: assert ep in _EP_DEVICE_MAP, ( f"EP '{ep}' in _DEVICE_EP_MAP but not in _EP_DEVICE_MAP" ) - assert _EP_DEVICE_MAP[ep] == device + assert device in _EP_DEVICE_MAP[ep].split("/") def test_nv_tensorrt_rtx_is_gpu_ep(self) -> None: """NvTensorRTRTXExecutionProvider should map to gpu.""" From 0d8e872edfe12894442e54fe3381e62ad7d4cf43 Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Mon, 27 Apr 2026 22:10:42 +0800 Subject: [PATCH 08/10] fix: use add_provider_for_devices for InferenceSession MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues fixed: 1. Explicit EP (--ep qnn) no longer filters by device type in _find_ep_device. QNN reports as NPU in get_ep_devices() but can target GPU — trust the user's choice. 2. InferenceSession now uses _build_session_options() (which calls add_provider_for_devices, working with WinML EP registry) instead of the providers= string parameter (which tries standard DLL loading and fails for WinML-registered EPs like QNN). Falls back to providers= only when _build_session_options returns policy-based options. Fixes #402 --- src/winml/modelkit/session/session.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py index 6e076d2cf..519fff011 100644 --- a/src/winml/modelkit/session/session.py +++ b/src/winml/modelkit/session/session.py @@ -287,13 +287,22 @@ def compile(self) -> None: logger.warning("ModelCompiler failed, using original: %s", e) try: - # Create InferenceSession with explicit providers to avoid - # broken policy-based selection (empty provider type in ORT). - providers = self._resolve_providers(target_device) + # Create InferenceSession. + # Prefer _build_session_options (uses add_provider_for_devices which + # works with WinML EP registry for non-built-in EPs like QNN). + # Only fall back to providers= strings when _build_session_options + # returned policy-based options (identity check). + sess_options = self._build_session_options(target_device) + if sess_options is self._session_options: + # Policy fallback — use providers= for built-in EPs (e.g., DML) + providers = self._resolve_providers(target_device) + else: + # EP configured via add_provider_for_devices — don't override + providers = None with _suppress_native_output(compile_log): session = ort.InferenceSession( str(model_path), - sess_options=self._session_options, + sess_options=sess_options, providers=providers, ) @@ -430,10 +439,12 @@ def _build_session_options(self, device: str) -> ort.SessionOptions: avoid "already registered" errors from repeated calls. """ # Explicit EP targeting: create fresh opts to avoid double-registration + # Don't filter by device type — trust the user's --ep choice + # (e.g., QNN reports as NPU in get_ep_devices but can target GPU) if self._ep and self._ep != "cpu": target_name = self._EP_NAME_MAP.get(self._ep) if target_name: - matched = self._find_ep_device(target_name, device) + matched = self._find_ep_device(target_name) if matched: opts = ort.SessionOptions() opts.add_provider_for_devices([matched], self._provider_options) From cfad4842b4d7bfea547c0f292cba36d7db21e754 Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Wed, 29 Apr 2026 14:09:39 +0800 Subject: [PATCH 09/10] fix: remove providers= fallback, always use add_provider_for_devices WinML-registered EPs (e.g. QNN) do not support the providers= parameter in InferenceSession. Remove _resolve_providers and the conditional providers= path entirely. EP is now configured exclusively via add_provider_for_devices in _build_session_options, or left to ORT device policy in the fallback case. --- src/winml/modelkit/session/session.py | 32 +++------------------------ 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py index 519fff011..5ae7b77b0 100644 --- a/src/winml/modelkit/session/session.py +++ b/src/winml/modelkit/session/session.py @@ -288,22 +288,14 @@ def compile(self) -> None: try: # Create InferenceSession. - # Prefer _build_session_options (uses add_provider_for_devices which - # works with WinML EP registry for non-built-in EPs like QNN). - # Only fall back to providers= strings when _build_session_options - # returned policy-based options (identity check). + # EP is either configured via add_provider_for_devices (WinML EP + # registry, e.g. QNN) or left to ORT's device policy (fallback). + # Never pass providers= — WinML-registered EPs don't support it. sess_options = self._build_session_options(target_device) - if sess_options is self._session_options: - # Policy fallback — use providers= for built-in EPs (e.g., DML) - providers = self._resolve_providers(target_device) - else: - # EP configured via add_provider_for_devices — don't override - providers = None with _suppress_native_output(compile_log): session = ort.InferenceSession( str(model_path), sess_options=sess_options, - providers=providers, ) # Log which providers were selected by ORT (based on policy) @@ -518,24 +510,6 @@ def _find_ep_for_device(device: str) -> Any: return ep_dev return None - def _resolve_providers(self, device: str) -> list[str] | None: - """Resolve explicit provider list for InferenceSession. - - Uses self._ep if set, otherwise queries ``get_ep_devices()`` for the - target device type. Returns None for CPU (let ORT use default). - """ - if self._ep and self._ep != "cpu": - target_name = self._EP_NAME_MAP.get(self._ep) - if target_name: - return [target_name, "CPUExecutionProvider"] - - if device.lower() != "cpu": - matched = self._find_ep_for_device(device) - if matched: - return [matched.ep_name, "CPUExecutionProvider"] - - return None - def _validate_inputs(self, inputs: dict[str, Any]) -> None: """Validate inputs against model expectations. From cba9189a8da19d865f3d56d5520dd90e5e2f6325 Mon Sep 17 00:00:00 2001 From: Qiong Wu Date: Wed, 29 Apr 2026 14:38:22 +0800 Subject: [PATCH 10/10] refactor: clean up _find_ep_device and improve docstrings - Remove unused `device` parameter from `_find_ep_device`; the caller intentionally skips device-type filtering (QNN reports as NPU but can target GPU), so the parameter was dead code that contradicted the comment at the call site - Update `_build_session_options` docstring to document that `"cpu"` is excluded from the add_provider_for_devices path and falls through to policy-based selection - Document registry-order dependency in `_find_ep_for_device`: when multiple EPs match the same device type the first one wins; callers that need a specific EP should set `self._ep` to bypass discovery --- src/winml/modelkit/session/session.py | 35 +++++++++++++-------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/winml/modelkit/session/session.py b/src/winml/modelkit/session/session.py index 5ae7b77b0..6349cac62 100644 --- a/src/winml/modelkit/session/session.py +++ b/src/winml/modelkit/session/session.py @@ -421,11 +421,13 @@ def _is_verbose(self) -> bool: def _build_session_options(self, device: str) -> ort.SessionOptions: """Build ORT SessionOptions from instance session_options and device. - When ``self._ep`` is set, uses ``add_provider_for_devices`` to - explicitly bind a specific EP (e.g., MIGraphX, NvTensorRTRTX). - When not set, queries ``get_ep_devices()`` to discover available - EPs for the target device type. Falls back to policy-based - selection only as a last resort. + When ``self._ep`` is set (and not ``"cpu"``), uses + ``add_provider_for_devices`` to explicitly bind that EP. + ``"cpu"`` falls through to policy-based selection so ORT handles + CPU-only inference without any EP registration. + When ``self._ep`` is not set, queries ``get_ep_devices()`` to + discover an available EP for the target device type. Falls back to + policy-based selection only as a last resort. Note: Returns a **fresh** SessionOptions when using explicit EP to avoid "already registered" errors from repeated calls. @@ -467,27 +469,18 @@ def _build_session_options(self, device: str) -> ort.SessionOptions: return opts @staticmethod - def _find_ep_device(ep_name: str, device: str | None = None) -> Any: - """Find an OrtEpDevice matching EP name and hardware device type. + def _find_ep_device(ep_name: str) -> Any: + """Find the first OrtEpDevice matching the given EP name. Args: ep_name: Full EP name (e.g., "DmlExecutionProvider"). - device: Target device string ("gpu", "npu", "cpu"). When provided, - also matches on OrtHardwareDeviceType so the correct physical - device is selected (e.g., discrete GPU vs integrated). Returns: The matching OrtEpDevice, or None if not found. """ - from ..utils.constants import DEVICE_TO_DEVICE_TYPE - - device_type = DEVICE_TO_DEVICE_TYPE.get(device.upper()) if device else None for ep_dev in ort.get_ep_devices(): - if ep_dev.ep_name != ep_name: - continue - if device_type is not None and ep_dev.device.type != device_type: - continue - return ep_dev + if ep_dev.ep_name == ep_name: + return ep_dev return None @staticmethod @@ -497,6 +490,12 @@ def _find_ep_for_device(device: str) -> Any: Queries ``ort.get_ep_devices()`` and returns the first EP whose hardware device type matches (e.g., device="gpu" matches GPU EPs). + Note: Selection order is determined by the ORT EP registry, which is + not part of any documented contract. On systems where multiple EPs + match the same device type (e.g., QNN and DML both appear as GPU), + the result is registry-order dependent. When a specific EP is + required, use ``self._ep`` to bypass this discovery path entirely. + Returns: The matching OrtEpDevice, or None if not found. """