From c0eb192c6e517dae9e6fb7239b6956a968793f8e Mon Sep 17 00:00:00 2001 From: "Qiong Wu (from Dev Box)" Date: Wed, 8 Apr 2026 11:06:34 +0800 Subject: [PATCH 1/2] fix: guard optimize/quantize/compile against EPContext models (#256 #257) Running winml optimize or winml quantize on a compiled QNN EPContext model crashes deep in ORT with a NOT_IMPLEMENTED error. Add an is_compiled_onnx() guard before processing starts in all three commands so users get a clear, actionable error message instead of an ORT traceback. Also guards winml compile against re-compiling an already-compiled model, which would silently produce incorrect output. --- src/winml/modelkit/commands/compile.py | 7 +++++++ src/winml/modelkit/commands/optimize.py | 8 +++++++- src/winml/modelkit/commands/quantize.py | 7 +++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/winml/modelkit/commands/compile.py b/src/winml/modelkit/commands/compile.py index eb849d044..818a99771 100644 --- a/src/winml/modelkit/commands/compile.py +++ b/src/winml/modelkit/commands/compile.py @@ -26,6 +26,7 @@ from rich.console import Console from ..config.precision import _DEVICE_TO_PROVIDER, VALID_EPS +from ..onnx import is_compiled_onnx from ..utils.logging import configure_logging @@ -159,6 +160,12 @@ def compile( if model is None: raise click.UsageError("Missing option '--model' / '-m'.") + if is_compiled_onnx(model): + raise click.ClickException( + f"{model} is already a compiled EPContext model and cannot be re-compiled. " + "Run 'winml compile' on the original ONNX model." + ) + # Import compiler (late import to speed up CLI) from ..compiler import WinMLCompileConfig, compile_onnx diff --git a/src/winml/modelkit/commands/optimize.py b/src/winml/modelkit/commands/optimize.py index c1f704da7..e28534e67 100644 --- a/src/winml/modelkit/commands/optimize.py +++ b/src/winml/modelkit/commands/optimize.py @@ -30,7 +30,7 @@ import click from rich.console import Console -from ..onnx import load_onnx, save_onnx +from ..onnx import is_compiled_onnx, load_onnx, save_onnx if TYPE_CHECKING: @@ -454,6 +454,12 @@ def optimize( if verbose: optimizer_kwargs["verbose"] = True + if is_compiled_onnx(model): + raise click.ClickException( + f"{model} is a compiled EPContext model and cannot be optimized. " + "Run 'winml optimize' on the original ONNX model before compilation." + ) + try: console.print("\n[bold]Loading model...[/bold]") onnx_model = load_onnx(model) diff --git a/src/winml/modelkit/commands/quantize.py b/src/winml/modelkit/commands/quantize.py index 5437837eb..4852844c7 100644 --- a/src/winml/modelkit/commands/quantize.py +++ b/src/winml/modelkit/commands/quantize.py @@ -25,6 +25,7 @@ import click from rich.console import Console +from ..onnx import is_compiled_onnx from ..utils.logging import configure_logging @@ -173,6 +174,12 @@ def quantize( symmetric=symmetric, ) + if is_compiled_onnx(model): + raise click.ClickException( + f"{model} is a compiled EPContext model and cannot be quantized. " + "Run 'winml quantize' on the original ONNX model before compilation." + ) + try: console.print("\n[bold]Running quantization...[/bold]") result = quantize_onnx(model, output_path=output, config=config) From 90a850367626c4a6cc066f4f3db47a20ce010768 Mon Sep 17 00:00:00 2001 From: "Qiong Wu (from Dev Box)" Date: Wed, 8 Apr 2026 19:06:09 +0800 Subject: [PATCH 2/2] fix: move EPContext guard to fail-fast position per review feedback --- src/winml/modelkit/commands/optimize.py | 12 ++++++------ src/winml/modelkit/commands/quantize.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/winml/modelkit/commands/optimize.py b/src/winml/modelkit/commands/optimize.py index e28534e67..7e410760a 100644 --- a/src/winml/modelkit/commands/optimize.py +++ b/src/winml/modelkit/commands/optimize.py @@ -382,6 +382,12 @@ def optimize( if model is None: raise click.UsageError("Missing option '--model' / '-m'.") + if is_compiled_onnx(model): + raise click.ClickException( + f"{model} is a compiled EPContext model and cannot be optimized. " + "Run 'winml optimize' on the original ONNX model before compilation." + ) + # Inherit debug mode from parent if ctx.obj and ctx.obj.get("debug"): verbose = True @@ -454,12 +460,6 @@ def optimize( if verbose: optimizer_kwargs["verbose"] = True - if is_compiled_onnx(model): - raise click.ClickException( - f"{model} is a compiled EPContext model and cannot be optimized. " - "Run 'winml optimize' on the original ONNX model before compilation." - ) - try: console.print("\n[bold]Loading model...[/bold]") onnx_model = load_onnx(model) diff --git a/src/winml/modelkit/commands/quantize.py b/src/winml/modelkit/commands/quantize.py index 4852844c7..5dae75b4b 100644 --- a/src/winml/modelkit/commands/quantize.py +++ b/src/winml/modelkit/commands/quantize.py @@ -142,6 +142,12 @@ def quantize( configure_logging(verbose=verbose) + if is_compiled_onnx(model): + raise click.ClickException( + f"{model} is a compiled EPContext model and cannot be quantized. " + "Run 'winml quantize' on the original ONNX model before compilation." + ) + # Import quantizer (late import to speed up CLI) from ..quant import WinMLQuantizationConfig, quantize_onnx @@ -174,12 +180,6 @@ def quantize( symmetric=symmetric, ) - if is_compiled_onnx(model): - raise click.ClickException( - f"{model} is a compiled EPContext model and cannot be quantized. " - "Run 'winml quantize' on the original ONNX model before compilation." - ) - try: console.print("\n[bold]Running quantization...[/bold]") result = quantize_onnx(model, output_path=output, config=config)