diff --git a/src/winml/modelkit/cli.py b/src/winml/modelkit/cli.py index 03e14b221..4504856b7 100644 --- a/src/winml/modelkit/cli.py +++ b/src/winml/modelkit/cli.py @@ -264,7 +264,7 @@ def format_commands(self, ctx: click.Context, formatter: click.HelpFormatter) -> def main(ctx: click.Context, verbose: int, quiet: bool, debug: bool) -> None: """WinML CLI - Accelerate Model Deployment on WinML. - Universal ONNX export with QNN and OpenVINO backend support. + Universal ONNX export with various WinML execution providers support. """ # --debug is a backward-compat alias for -vv if debug: diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py index 4ad292d77..7fb4337ea 100644 --- a/src/winml/modelkit/commands/perf.py +++ b/src/winml/modelkit/commands/perf.py @@ -300,7 +300,8 @@ def run(self) -> BenchmarkResult: _print_model_info( self._model.io_config, task=self._model.task or self.config.task, - device=self._model.device, + req_device=self.config.device, + act_device=self._model.device, ep_name=self._model.ep_name, ) @@ -755,6 +756,12 @@ def _perf_modules( # Report Generation # ============================================================================= +def _device_string(req_device: str, act_device: str, ep_name: EPName | None) -> str: + device_str = f"{req_device} ({act_device})" if req_device != act_device else act_device + if ep_name: + device_str = f"{device_str} / {ep_name}" + return device_str + def display_console_report(result: BenchmarkResult, console: Console) -> None: """Display benchmark results in formatted console output.""" @@ -763,9 +770,7 @@ def display_console_report(result: BenchmarkResult, console: Console) -> None: req_device = result.config.device act_device = result.actual_device - device_str = f"{req_device} ({act_device})" if req_device != act_device else act_device - if result.actual_ep: - device_str = f"{device_str} / {result.actual_ep}" + device_str = _device_string(req_device, act_device, result.actual_ep) console.print(f"[dim]Device:[/dim] {device_str}") # TODO: show resolved precision once WinMLPreTrainedModel.precision @@ -885,13 +890,14 @@ def _print_model_info( io_config: dict, *, task: str | None = None, - device: str = "auto", + req_device: str = "auto", + act_device: str = "auto", ep_name: EPName | None = None, ) -> None: """Print model I/O metadata before the benchmark starts.""" console = Console(stderr=True) console.print() - device_line = f"{device} / {ep_name}" if ep_name else device + device_line = _device_string(req_device, act_device, ep_name) console.print(f"[dim]Device:[/dim] {device_line}") if task: console.print(f"[dim]Task:[/dim] {task}") @@ -1011,7 +1017,7 @@ def _run_onnx_benchmark( session.compile() # Print model info before benchmark starts - _print_model_info(io_cfg, device=session.device, ep_name=session.ep_name) + _print_model_info(io_cfg, req_device=device, act_device=session.device, ep_name=session.ep_name) # Run benchmark total_iterations = warmup + iterations @@ -1044,7 +1050,7 @@ def _run_onnx_benchmark( total_iterations=total_iterations, warmup=warmup, model_id=str(onnx_path.name), - device=device, + device=session.device or device, ) hw_metrics = hw.to_dict() else: diff --git a/src/winml/modelkit/session/monitor/_pdh.py b/src/winml/modelkit/session/monitor/_pdh.py index a9ceccd8a..0e0364dc4 100644 --- a/src/winml/modelkit/session/monitor/_pdh.py +++ b/src/winml/modelkit/session/monitor/_pdh.py @@ -329,8 +329,8 @@ def build_npu_query(npu_luid: str, pid: int | None = None) -> PdhQuery: Returns: An opened PdhQuery configured for NPU monitoring. """ - # Neural: OpenVINO NPU - return build_adapter_query(npu_luid, engine_types=("Compute", "Neural"), pid=pid) + # Neural / 3D: OpenVINO NPU + return build_adapter_query(npu_luid, engine_types=("Compute", "Neural", "3D"), pid=pid) def build_gpu_query(gpu_luid: str, pid: int | None = None) -> PdhQuery: diff --git a/tests/e2e/test_perf_e2e.py b/tests/e2e/test_perf_e2e.py index 013433167..8952b5c7d 100644 --- a/tests/e2e/test_perf_e2e.py +++ b/tests/e2e/test_perf_e2e.py @@ -484,6 +484,7 @@ def test_benchmark_ep_cpu(self, ep: str, tmp_path: Path, model_arg: str): def test_benchmark_ep_gpu(self, ep: str, tmp_path: Path, model_arg: str): """Benchmark with --ep .""" require_ep(ep) + _require_gpu() output_file = tmp_path / f"perf_hf_{ep}_gpu.json" @@ -507,6 +508,7 @@ def test_benchmark_ep_gpu(self, ep: str, tmp_path: Path, model_arg: str): def test_benchmark_ep_npu(self, ep: str, tmp_path: Path, model_arg: str): """Benchmark with --ep .""" require_ep(ep) + _require_npu() output_file = tmp_path / f"perf_hf_{ep}_npu.json"