From 18a841b0ce55dc980cc0e2622a90bb208722fc22 Mon Sep 17 00:00:00 2001 From: Zhipeng Wang Date: Wed, 1 Apr 2026 12:29:41 +0800 Subject: [PATCH 1/2] rename: CLI command from wmk to winml (#203) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the CLI entry point and all references from `wmk` to `winml`: - pyproject.toml entry point - cli.py prog_name and docstrings - All 12 command files (export, build, config, analyze, quantize, optimize, compile, inspect, sys, eval, perf, hub) - Source code: error messages, validator hints, cache docstring, temp dir prefix, venv name - Tests: 13 test files - Docs: README.md, e2e_eval scripts and JSON config (wmk_metric_key → winml_metric_key) - Deleted stale egg-info --- README.md | 8 +- pyproject.toml | 2 +- scripts/e2e_eval/README.md | 4 +- scripts/e2e_eval/datasets/build_ai4privacy.py | 18 +- .../e2e_eval/datasets/build_indonlu_posp.py | 8 +- .../datasets/build_pubtables1m_detection.py | 69 +++--- .../datasets/build_pubtables1m_structure.py | 69 +++--- scripts/e2e_eval/run_eval.py | 205 +++++++++++------- scripts/e2e_eval/run_pytorch_baseline.py | 4 +- .../e2e_eval/testsets/models_with_acc.json | 44 ++-- scripts/e2e_eval/utils/accuracy.py | 25 ++- scripts/e2e_eval/utils/dataset_config.py | 9 +- .../constant_folding_validator.py | 2 +- .../pattern_matching_validator.py | 4 +- .../qdq_validation_validator.py | 2 +- .../shape_inference_validator.py | 2 +- src/winml/modelkit/build/hf.py | 4 +- src/winml/modelkit/cache/__init__.py | 2 +- src/winml/modelkit/cli.py | 10 +- src/winml/modelkit/commands/analyze.py | 24 +- src/winml/modelkit/commands/build.py | 32 +-- src/winml/modelkit/commands/compile.py | 32 ++- src/winml/modelkit/commands/config.py | 54 ++--- src/winml/modelkit/commands/eval.py | 12 +- src/winml/modelkit/commands/export.py | 28 +-- src/winml/modelkit/commands/hub.py | 36 ++- src/winml/modelkit/commands/inspect.py | 18 +- src/winml/modelkit/commands/optimize.py | 24 +- src/winml/modelkit/commands/perf.py | 18 +- src/winml/modelkit/commands/quantize.py | 30 ++- src/winml/modelkit/commands/sys.py | 39 ++-- src/winml/modelkit/models/auto.py | 4 +- .../session/qairt/compile_qairt_bin.py | 2 +- .../modelkit/session/qairt/qairt_session.py | 6 +- tests/e2e/test_build_e2e.py | 35 +-- tests/integration/test_module_build.py | 2 +- tests/integration/test_quantization.py | 8 +- .../core/model_validators/test_validators.py | 4 +- tests/unit/cache/test_model.py | 2 +- tests/unit/commands/test_build_module.py | 2 +- tests/unit/commands/test_cli.py | 4 +- tests/unit/commands/test_hub.py | 39 ++-- tests/unit/commands/test_perf_module.py | 6 +- tests/unit/config/test_build.py | 6 +- tests/unit/config/test_build_onnx.py | 2 +- .../unit/optracing/test_perf_optracing_cli.py | 2 +- 46 files changed, 513 insertions(+), 449 deletions(-) diff --git a/README.md b/README.md index d86dc8699..ee64a0de2 100644 --- a/README.md +++ b/README.md @@ -32,17 +32,17 @@ uv sync ### Usage -ModelKit provides a CLI tool `wmk`: +ModelKit provides a CLI tool `winml`: ```bash # Export a Hugging Face model to ONNX -uv run wmk export --model microsoft/resnet-50 --output ./output +uv run winml export --model microsoft/resnet-50 --output ./output # Analyze an ONNX model -uv run wmk analyze --model ./output/model.onnx +uv run winml analyze --model ./output/model.onnx # Quantize an ONNX model -uv run wmk quantize --model ./output/model.onnx +uv run winml quantize --model ./output/model.onnx ``` ## Contributions and Feedback diff --git a/pyproject.toml b/pyproject.toml index 77b82bd6d..4f5d301e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ urls.Repository = "https://github.com/microsoft/ModelKit.git" # but package-dir expects namespace names (winml.modelkit). These don't auto-connect. # For flat layout (modelkit/) with namespace imports (winml.modelkit), explicit listing # ensures the namespace prefix is correctly applied to all subpackages. -scripts.wmk = "winml.modelkit.cli:main" +scripts.winml = "winml.modelkit.cli:main" [dependency-groups] dev = [ diff --git a/scripts/e2e_eval/README.md b/scripts/e2e_eval/README.md index efc475844..911d5320b 100644 --- a/scripts/e2e_eval/README.md +++ b/scripts/e2e_eval/README.md @@ -1,6 +1,6 @@ # E2E Evaluation Scripts -Batch-evaluate ModelKit's `wmk perf` pipeline against a curated set of HuggingFace models. +Batch-evaluate ModelKit's `winml perf` pipeline against a curated set of HuggingFace models. Captures pass/fail, failure classification, and generates interactive reports. ## Quick Start @@ -48,7 +48,7 @@ uv run python scripts/e2e_eval/build_registry.py --dry-run ### `run_eval.py` — Run Evaluation -Executes `wmk perf` for each model in a subprocess, classifies failures, and +Executes `winml perf` for each model in a subprocess, classifies failures, and generates reports (JSON, Markdown, HTML). ```bash diff --git a/scripts/e2e_eval/datasets/build_ai4privacy.py b/scripts/e2e_eval/datasets/build_ai4privacy.py index 54b22305f..daffd5a72 100644 --- a/scripts/e2e_eval/datasets/build_ai4privacy.py +++ b/scripts/e2e_eval/datasets/build_ai4privacy.py @@ -1,3 +1,8 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + """Build a local HF-compatible dataset for Isotonic/distilbert_finetuned_ai4privacy_v2. The ``ai4privacy/pii-masking-200k`` dataset uses string BIO labels @@ -13,6 +18,7 @@ import argparse from pathlib import Path + _NUM_SAMPLES = 10000 @@ -37,10 +43,12 @@ def build_dataset(output_dir: Path) -> None: tokens_list = [s["mbert_text_tokens"] for s in samples] tags_list = [[label2id[lbl] for lbl in s["mbert_bio_labels"]] for s in samples] - features = Features({ - "tokens": Sequence(Value("string")), - "ner_tags": Sequence(ClassLabel(names=all_labels)), - }) + features = Features( + { + "tokens": Sequence(Value("string")), + "ner_tags": Sequence(ClassLabel(names=all_labels)), + } + ) dataset = Dataset.from_dict( {"tokens": tokens_list, "ner_tags": tags_list}, features=features, @@ -51,7 +59,7 @@ def build_dataset(output_dir: Path) -> None: print("Done.") -def main() -> None: +def main() -> None: # noqa: D103 parser = argparse.ArgumentParser(description="Build ai4privacy PII dataset") parser.add_argument("--output", type=Path, required=True, help="Output directory") args = parser.parse_args() diff --git a/scripts/e2e_eval/datasets/build_indonlu_posp.py b/scripts/e2e_eval/datasets/build_indonlu_posp.py index 31efacfe7..8896bbb86 100644 --- a/scripts/e2e_eval/datasets/build_indonlu_posp.py +++ b/scripts/e2e_eval/datasets/build_indonlu_posp.py @@ -1,3 +1,8 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + """Build a local HF-compatible dataset for indonlp/indonlu (posp subset). The upstream ``indonlp/indonlu`` dataset uses a legacy loading script @@ -13,6 +18,7 @@ import argparse from pathlib import Path + _PARQUET_REVISION = "refs/convert/parquet" _PARQUET_PATH = "posp/validation/0000.parquet" @@ -38,7 +44,7 @@ def build_dataset(output_dir: Path) -> None: print("Done.") -def main() -> None: +def main() -> None: # noqa: D103 parser = argparse.ArgumentParser(description="Build indonlu posp dataset") parser.add_argument("--output", type=Path, required=True, help="Output directory") args = parser.parse_args() diff --git a/scripts/e2e_eval/datasets/build_pubtables1m_detection.py b/scripts/e2e_eval/datasets/build_pubtables1m_detection.py index 3b2b41140..0fad24478 100644 --- a/scripts/e2e_eval/datasets/build_pubtables1m_detection.py +++ b/scripts/e2e_eval/datasets/build_pubtables1m_detection.py @@ -1,3 +1,8 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + """Build a local HF-compatible object-detection dataset from PubTables-1M. Downloads the validation split annotations (5 MB) and images (7 GB) from @@ -18,9 +23,10 @@ import argparse import random import tarfile -import xml.etree.ElementTree as ET +import xml.etree.ElementTree as ET # noqa: N817 from pathlib import Path + # Labels matching microsoft/table-transformer-detection config LABEL_NAMES = ["table", "table rotated"] @@ -30,7 +36,7 @@ def _parse_voc_xml(xml_bytes: bytes) -> dict | None: """Parse a PASCAL VOC XML annotation and return structured data.""" - root = ET.fromstring(xml_bytes) + root = ET.fromstring(xml_bytes) # noqa: S314 filename = root.findtext("filename") size_el = root.find("size") if size_el is None or filename is None: @@ -147,38 +153,43 @@ def build_dataset(output_dir: Path) -> None: print(f" Extracted {len(images_by_name)} images") # Step 6: Build dataset rows (skip any missing images) - from datasets import ClassLabel, Dataset, Features, Image as HFImage, Sequence, Value + from datasets import ClassLabel, Dataset, Features, Sequence, Value + from datasets import Image as HFImage rows: list[dict] = [] for idx, ann in enumerate(sampled): img = images_by_name.get(ann["filename"]) if img is None: continue - rows.append({ - "image_id": idx, - "image": img, - "width": ann["width"], - "height": ann["height"], + rows.append( + { + "image_id": idx, + "image": img, + "width": ann["width"], + "height": ann["height"], + "objects": { + "bbox_id": ann["bbox_id"], + "category": ann["category"], + "bbox": ann["bbox"], + "area": ann["area"], + }, + } + ) + + features = Features( + { + "image_id": Value("int64"), + "image": HFImage(), + "width": Value("int64"), + "height": Value("int64"), "objects": { - "bbox_id": ann["bbox_id"], - "category": ann["category"], - "bbox": ann["bbox"], - "area": ann["area"], + "bbox_id": Sequence(Value("int64")), + "category": Sequence(ClassLabel(names=LABEL_NAMES)), + "bbox": Sequence(Sequence(Value("float64"), length=4)), + "area": Sequence(Value("float64")), }, - }) - - features = Features({ - "image_id": Value("int64"), - "image": HFImage(), - "width": Value("int64"), - "height": Value("int64"), - "objects": { - "bbox_id": Sequence(Value("int64")), - "category": Sequence(ClassLabel(names=LABEL_NAMES)), - "bbox": Sequence(Sequence(Value("float64"), length=4)), - "area": Sequence(Value("float64")), - }, - }) + } + ) dataset = Dataset.from_list(rows, features=features) print(f"Saving {len(dataset)} samples to {output_dir} ...") @@ -187,10 +198,8 @@ def build_dataset(output_dir: Path) -> None: print("Done.") -def main() -> None: - parser = argparse.ArgumentParser( - description="Build PubTables-1M detection dataset" - ) +def main() -> None: # noqa: D103 + parser = argparse.ArgumentParser(description="Build PubTables-1M detection dataset") parser.add_argument("--output", type=Path, required=True, help="Output directory") args = parser.parse_args() build_dataset(args.output) diff --git a/scripts/e2e_eval/datasets/build_pubtables1m_structure.py b/scripts/e2e_eval/datasets/build_pubtables1m_structure.py index f967b45e7..e59618267 100644 --- a/scripts/e2e_eval/datasets/build_pubtables1m_structure.py +++ b/scripts/e2e_eval/datasets/build_pubtables1m_structure.py @@ -1,3 +1,8 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + """Build a local HF-compatible object-detection dataset from PubTables-1M (Structure). Downloads the validation split annotations (29 MB) and images (2.6 GB) from @@ -21,9 +26,10 @@ import argparse import random import tarfile -import xml.etree.ElementTree as ET +import xml.etree.ElementTree as ET # noqa: N817 from pathlib import Path + # Labels matching microsoft/table-transformer-structure-recognition config LABEL_NAMES = [ "table", @@ -40,7 +46,7 @@ def _parse_voc_xml(xml_bytes: bytes, label2id: dict[str, int]) -> dict | None: """Parse a PASCAL VOC XML annotation and return structured data.""" - root = ET.fromstring(xml_bytes) + root = ET.fromstring(xml_bytes) # noqa: S314 filename = root.findtext("filename") size_el = root.find("size") if size_el is None or filename is None: @@ -155,38 +161,43 @@ def build_dataset(output_dir: Path) -> None: print(f" Extracted {len(images_by_name)} images") # Step 6: Build dataset rows (skip any missing images) - from datasets import ClassLabel, Dataset, Features, Image as HFImage, Sequence, Value + from datasets import ClassLabel, Dataset, Features, Sequence, Value + from datasets import Image as HFImage rows: list[dict] = [] for idx, ann in enumerate(sampled): img = images_by_name.get(ann["filename"]) if img is None: continue - rows.append({ - "image_id": idx, - "image": img, - "width": ann["width"], - "height": ann["height"], + rows.append( + { + "image_id": idx, + "image": img, + "width": ann["width"], + "height": ann["height"], + "objects": { + "bbox_id": ann["bbox_id"], + "category": ann["category"], + "bbox": ann["bbox"], + "area": ann["area"], + }, + } + ) + + features = Features( + { + "image_id": Value("int64"), + "image": HFImage(), + "width": Value("int64"), + "height": Value("int64"), "objects": { - "bbox_id": ann["bbox_id"], - "category": ann["category"], - "bbox": ann["bbox"], - "area": ann["area"], + "bbox_id": Sequence(Value("int64")), + "category": Sequence(ClassLabel(names=LABEL_NAMES)), + "bbox": Sequence(Sequence(Value("float64"), length=4)), + "area": Sequence(Value("float64")), }, - }) - - features = Features({ - "image_id": Value("int64"), - "image": HFImage(), - "width": Value("int64"), - "height": Value("int64"), - "objects": { - "bbox_id": Sequence(Value("int64")), - "category": Sequence(ClassLabel(names=LABEL_NAMES)), - "bbox": Sequence(Sequence(Value("float64"), length=4)), - "area": Sequence(Value("float64")), - }, - }) + } + ) dataset = Dataset.from_list(rows, features=features) print(f"Saving {len(dataset)} samples to {output_dir} ...") @@ -195,10 +206,8 @@ def build_dataset(output_dir: Path) -> None: print("Done.") -def main() -> None: - parser = argparse.ArgumentParser( - description="Build PubTables-1M structure recognition dataset" - ) +def main() -> None: # noqa: D103 + parser = argparse.ArgumentParser(description="Build PubTables-1M structure recognition dataset") parser.add_argument("--output", type=Path, required=True, help="Output directory") args = parser.parse_args() build_dataset(args.output) diff --git a/scripts/e2e_eval/run_eval.py b/scripts/e2e_eval/run_eval.py index 1aa6b682f..fa532e393 100644 --- a/scripts/e2e_eval/run_eval.py +++ b/scripts/e2e_eval/run_eval.py @@ -1,11 +1,16 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + """E2E evaluation runner — unified perf + accuracy. -Batch-runs wmk perf (and optionally wmk eval + pytorch baseline) for models +Batch-runs winml perf (and optionally winml eval + pytorch baseline) for models in a JSON registry, writes unified eval_result.json per model, and generates combined reports. -Strategy B cache sharing: wmk perf runs first (build + benchmark, populates -model cache). wmk eval then reuses the cache — no redundant build step. +Strategy B cache sharing: winml perf runs first (build + benchmark, populates +model cache). winml eval then reuses the cache — no redundant build step. Usage: # Perf only (default) @@ -14,7 +19,7 @@ # Both perf and accuracy in one batch python scripts/e2e_eval/run_eval.py --eval-type both --priority P0 - # Accuracy only (wmk perf is skipped; wmk eval will build the model if cache is missing) + # Accuracy only (winml perf is skipped; winml eval will build the model if cache is missing) python scripts/e2e_eval/run_eval.py --eval-type accuracy --hf-model microsoft/resnet-50 # Single model @@ -102,10 +107,10 @@ def _get_timeout_skip_reason(hf_id: str, task: str) -> str: # Patterns that indicate the disk is full (cross-platform). _NO_SPACE_PATTERNS = ( - "no space left on device", # Linux/macOS OSError - "oserror: [errno 28]", # Python errno string + "no space left on device", # Linux/macOS OSError + "oserror: [errno 28]", # Python errno string "there is not enough space on the disk", # Windows - "winerror 112", # Windows disk-full error code + "winerror 112", # Windows disk-full error code "disk full", ) @@ -167,7 +172,8 @@ def _kill_process_tree(pid: int) -> None: # Fallback: taskkill on Windows, killpg on Unix if platform.system() == "Windows": subprocess.run( # noqa: S603 - ["taskkill", "/F", "/T", "/PID", str(pid)], capture_output=True + ["taskkill", "/F", "/T", "/PID", str(pid)], # noqa: S607 + capture_output=True, ) else: import signal @@ -309,22 +315,31 @@ def _watchdog() -> None: def _run_build( - entry: ModelEntry, device: str, precision: str, timeout: int, model_dir: Path, + entry: ModelEntry, + device: str, + precision: str, + timeout: int, + model_dir: Path, ) -> dict: - """Run wmk config + wmk build for one model. Returns build result dict. + """Run winml config + winml build for one model. Returns build result dict. - Flow: wmk config → config.json → wmk build --use-cache → ONNX path. + Flow: winml config → config.json → winml build --use-cache → ONNX path. """ config_path = model_dir / "build_config.json" model_dir.mkdir(parents=True, exist_ok=True) - # Step 1: wmk config + # Step 1: winml config config_args = [ - *WMK, "config", - "-m", entry.hf_id, - "--device", device, - "--precision", precision, - "-o", str(config_path), + *WMK, + "config", + "-m", + entry.hf_id, + "--device", + device, + "--precision", + precision, + "-o", + str(config_path), ] if entry.task: config_args += ["--task", entry.task] @@ -338,11 +353,14 @@ def _run_build( "proc": config_proc, } - # Step 2: wmk build --use-cache + # Step 2: winml build --use-cache build_args = [ - *WMK, "build", - "-c", str(config_path), - "-m", entry.hf_id, + *WMK, + "build", + "-c", + str(config_path), + "-m", + entry.hf_id, "--use-cache", ] @@ -356,7 +374,7 @@ def _run_build( } # Extract ONNX path from build output - # wmk build prints "Final artifact: " in stderr + # winml build prints "Final artifact: " in stderr onnx_path = None for line in build_proc["stderr"].splitlines(): if "Final artifact:" in line: @@ -404,9 +422,12 @@ def _find_cached_model(hf_id: str, build_proc: dict) -> str | None: def run_model( - entry: ModelEntry, device: str, timeout: int, onnx_path: str | None = None, + entry: ModelEntry, + device: str, + timeout: int, + onnx_path: str | None = None, ) -> dict: - """Execute wmk perf for one model. Returns raw subprocess result dict. + """Execute winml perf for one model. Returns raw subprocess result dict. When onnx_path is provided, benchmarks the pre-built ONNX directly (skips internal build). Otherwise falls back to HF model ID. @@ -415,8 +436,14 @@ def run_model( args = [*WMK, "perf", "-m", onnx_path, "--device", device] else: args = [ - *WMK, "perf", "-m", entry.hf_id, - "--device", device, "--precision", _DEFAULT_PRECISION, + *WMK, + "perf", + "-m", + entry.hf_id, + "--device", + device, + "--precision", + _DEFAULT_PRECISION, ] if entry.task: args += ["--task", entry.task] @@ -458,10 +485,10 @@ def _parse_metric_from_stdout(stdout: str) -> dict | None: return None -def _parse_metric_from_wmk_output( +def _parse_metric_from_winml_output( output_path: Path, metric_name: str, num_samples: int ) -> dict | None: - """Parse wmk eval --output JSON file into the canonical metric dict.""" + """Parse winml eval --output JSON file into the canonical metric dict.""" try: data = json.loads(output_path.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): @@ -500,7 +527,7 @@ def _build_dataset(ds_config: dict, timeout: int) -> None: safe_print(f" {line}") -def _run_wmk_eval( +def _run_winml_eval( entry: ModelEntry, device: str, timeout: int, @@ -508,27 +535,36 @@ def _run_wmk_eval( model_dir: Path, onnx_path: str | None = None, ) -> dict: - """Invoke wmk eval for one model. Returns process result + parsed metric.""" - output_path = model_dir / "wmk_eval_output.json" + """Invoke winml eval for one model. Returns process result + parsed metric.""" + output_path = model_dir / "winml_eval_output.json" model_dir.mkdir(parents=True, exist_ok=True) - # wmk eval requires explicit device ('cpu'/'gpu'/'npu'); 'auto' is not accepted + # winml eval requires explicit device ('cpu'/'gpu'/'npu'); 'auto' is not accepted eval_device = "npu" if device == "auto" else device if onnx_path: args = [ - *WMK, "eval", "-m", onnx_path, - "--model-id", entry.hf_id, - "--device", eval_device, + *WMK, + "eval", + "-m", + onnx_path, + "--model-id", + entry.hf_id, + "--device", + eval_device, ] else: args = [ - *WMK, "eval", "-m", entry.hf_id, - "--device", eval_device, + *WMK, + "eval", + "-m", + entry.hf_id, + "--device", + eval_device, ] if entry.task: args += ["--task", entry.task] # When ds_config is provided, pass explicit dataset args; - # otherwise wmk eval uses its built-in task defaults. + # otherwise winml eval uses its built-in task defaults. if ds_config.get("dataset"): args += ["--dataset", ds_config["dataset"]] if ds_config.get("split"): @@ -550,14 +586,9 @@ def _run_wmk_eval( metric = None if proc["exit_code"] == 0 and output_path.exists(): - wmk_key = ( - ds_config.get("wmk_metric_key") - or ds_config.get("metric", "accuracy") - ) + winml_key = ds_config.get("winml_metric_key") or ds_config.get("metric", "accuracy") num_samples = ds_config.get("num_samples", _DEFAULT_SAMPLES) - metric = _parse_metric_from_wmk_output( - output_path, wmk_key, num_samples - ) + metric = _parse_metric_from_winml_output(output_path, winml_key, num_samples) status = "PASS" if (proc["exit_code"] == 0 and metric is not None) else "FAIL" return { @@ -606,9 +637,7 @@ def _save_baseline_cache(cache: dict) -> None: ) -def _lookup_baseline_cache( - hf_id: str, task: str, ds_config: dict -) -> dict | None: +def _lookup_baseline_cache(hf_id: str, task: str, ds_config: dict) -> dict | None: """Return cached baseline result dict, or None if not cached.""" cache = _load_baseline_cache() key = _baseline_cache_key(hf_id, task, ds_config) @@ -631,9 +660,7 @@ def _shorten_command(cmd: str) -> str: return " ".join(shortened) -def _store_baseline_cache( - hf_id: str, task: str, ds_config: dict, result: dict -) -> None: +def _store_baseline_cache(hf_id: str, task: str, ds_config: dict, result: dict) -> None: """Store a successful baseline result in cache.""" if result.get("status") != "PASS": return @@ -695,13 +722,13 @@ def _run_accuracy_phase( model_dir: Path, onnx_path: str | None = None, ) -> dict: - """Run wmk eval + pytorch baseline for one model. Returns accuracy sub-section dict.""" + """Run winml eval + pytorch baseline for one model. Returns accuracy sub-section dict.""" ds_config = get_dataset_config(entry.hf_id, entry.task) or {} # Build local dataset if a build_script is configured _build_dataset(ds_config, timeout) - wmk = _run_wmk_eval(entry, device, timeout, ds_config, model_dir, onnx_path) + winml = _run_winml_eval(entry, device, timeout, ds_config, model_dir, onnx_path) # Check baseline cache before running the expensive PyTorch baseline cached = _lookup_baseline_cache(entry.hf_id, entry.task, ds_config) @@ -712,17 +739,17 @@ def _run_accuracy_phase( baseline = _run_pytorch_baseline(entry, device, timeout) _store_baseline_cache(entry.hf_id, entry.task, ds_config, baseline) - delta_abs, delta_rel = compute_delta(wmk["metric"], baseline["metric"]) + delta_abs, delta_rel = compute_delta(winml["metric"], baseline["metric"]) return { "skipped": False, "skip_reason": None, - "wmk_eval_status": wmk["status"], - "wmk_metric": wmk["metric"], - "wmk_eval_exit_code": wmk.get("exit_code"), - "wmk_eval_stdout": wmk.get("stdout", ""), - "wmk_eval_stderr": wmk.get("stderr", ""), - "elapsed_wmk": wmk["elapsed"], + "winml_eval_status": winml["status"], + "winml_metric": winml["metric"], + "winml_eval_exit_code": winml.get("exit_code"), + "winml_eval_stdout": winml.get("stdout", ""), + "winml_eval_stderr": winml.get("stderr", ""), + "elapsed_winml": winml["elapsed"], "pytorch_baseline_status": baseline["status"], "pytorch_baseline_metric": baseline["metric"], "pytorch_baseline_exit_code": baseline.get("exit_code"), @@ -731,7 +758,7 @@ def _run_accuracy_phase( "delta_absolute": delta_abs, "delta_relative": delta_rel, "dataset_config": {k: v for k, v in ds_config.items() if k != "hf_token_required"}, - "wmk_eval_command": wmk["command"], + "winml_eval_command": winml["command"], "pytorch_baseline_command": baseline["command"], } @@ -758,8 +785,10 @@ def save_environment_info(path: Path) -> None: # Git HEAD commit info try: result = subprocess.run( - ["git", "log", "-1", "--format=%H%n%s%n%ai"], # noqa: S603, S607 - capture_output=True, text=True, timeout=5, + ["git", "log", "-1", "--format=%H%n%s%n%ai"], # noqa: S607 + capture_output=True, + text=True, + timeout=5, ) if result.returncode == 0: lines = result.stdout.strip().splitlines() @@ -807,6 +836,7 @@ def model_result_dir(output_dir: Path, hf_id: str, task: str = "") -> Path: def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" parser = argparse.ArgumentParser(description="E2E evaluation runner — unified perf + accuracy") parser.add_argument( "--registry", @@ -822,8 +852,8 @@ def parse_args() -> argparse.Namespace: default="perf", help=( "Evaluation signals to run (default: perf). " - "accuracy/both: wmk perf runs first to populate cache, " - "then wmk eval + pytorch baseline." + "accuracy/both: winml perf runs first to populate cache, " + "then winml eval + pytorch baseline." ), ) parser.add_argument("--task", help="Filter by HF task") @@ -878,6 +908,7 @@ def parse_args() -> argparse.Namespace: def main() -> None: + """Run E2E evaluation pipeline.""" args = parse_args() # 1. Load registry @@ -928,13 +959,7 @@ def main() -> None: safe_print(f"Registry: {len(entries)} models (eval-type: {args.eval_type})") for e in entries: ds = get_dataset_config(e.hf_id, e.task) - skip_acc = ( - "" - if args.eval_type == "perf" - else " [task_default]" - if ds is None - else "" - ) + skip_acc = "" if args.eval_type == "perf" else " [task_default]" if ds is None else "" safe_print( f" [{e.priority}] {e.hf_id} / {e.task} ({e.model_type}, {e.group}){skip_acc}" ) @@ -963,9 +988,9 @@ def main() -> None: save_environment_info(output_dir / "environment.json") # eval_types_run reflects what actually runs for each model: - # "perf" → wmk perf only - # "accuracy" → wmk eval + pytorch baseline only (perf skipped) - # "both" → Strategy B: wmk perf first (populates cache), then wmk eval + baseline + # "perf" → winml perf only + # "accuracy" → winml eval + pytorch baseline only (perf skipped) + # "both" → Strategy B: winml perf first (populates cache), then winml eval + baseline eval_types_run = ( ["accuracy"] if args.eval_type == "accuracy" @@ -1010,9 +1035,7 @@ def main() -> None: # Timeout skip list: skip known-timeout models and write a TIMEOUT result if (entry.hf_id, entry.task or "") in timeout_skip_set: reason = _get_timeout_skip_reason(entry.hf_id, entry.task or "") - safe_print( - f"\n[{i}/{len(entries)}] {label} (SKIP - TIMEOUT: {reason})" - ) + safe_print(f"\n[{i}/{len(entries)}] {label} (SKIP - TIMEOUT: {reason})") model_dir.mkdir(parents=True, exist_ok=True) timeout_result = build_eval_result( entry=entry, @@ -1085,12 +1108,16 @@ def main() -> None: perf_proc: dict | None = None accuracy_result: dict | None = None - # Build phase: wmk config + wmk build → ONNX path + # Build phase: winml config + winml build → ONNX path # Build is shared by perf and eval, avoiding redundant builds. onnx_path: str | None = None if args.eval_type in ("perf", "both"): build_result = _run_build( - entry, args.device, _DEFAULT_PRECISION, args.timeout, model_dir, + entry, + args.device, + _DEFAULT_PRECISION, + args.timeout, + model_dir, ) if build_result["success"]: onnx_path = build_result["onnx_path"] @@ -1098,12 +1125,20 @@ def main() -> None: if args.eval_type == "accuracy": # Accuracy-only: build + eval (no perf) build_result = _run_build( - entry, args.device, _DEFAULT_PRECISION, args.timeout, model_dir, + entry, + args.device, + _DEFAULT_PRECISION, + args.timeout, + model_dir, ) if build_result["success"]: onnx_path = build_result["onnx_path"] accuracy_result = _run_accuracy_phase( - entry, args.device, args.timeout, model_dir, onnx_path, + entry, + args.device, + args.timeout, + model_dir, + onnx_path, ) else: accuracy_result = {"skipped": True, "skip_reason": "build_failed"} @@ -1124,7 +1159,11 @@ def main() -> None: accuracy_result = {"skipped": True, "skip_reason": "perf_failed"} else: accuracy_result = _run_accuracy_phase( - entry, args.device, args.timeout, model_dir, onnx_path, + entry, + args.device, + args.timeout, + model_dir, + onnx_path, ) else: # Build failed diff --git a/scripts/e2e_eval/run_pytorch_baseline.py b/scripts/e2e_eval/run_pytorch_baseline.py index 68b2a10c6..e39bc44b1 100644 --- a/scripts/e2e_eval/run_pytorch_baseline.py +++ b/scripts/e2e_eval/run_pytorch_baseline.py @@ -6,11 +6,11 @@ """PyTorch baseline inference for accuracy evaluation (Signal 2). Performs native PyTorch inference on a HuggingFace model using the same -dataset configuration as ``wmk eval``, so both sides are always evaluated on +dataset configuration as ``winml eval``, so both sides are always evaluated on identical inputs. Dataset config is read from ``utils/dataset_config.py`` — the authoritative -source shared with run_eval.py. When ``wmk eval`` is implemented inside +source shared with run_eval.py. When ``winml eval`` is implemented inside ModelKit, it should import from the same location. Output: prints a single JSON object as the last line on stdout: diff --git a/scripts/e2e_eval/testsets/models_with_acc.json b/scripts/e2e_eval/testsets/models_with_acc.json index b1dae87d1..109c6f578 100644 --- a/scripts/e2e_eval/testsets/models_with_acc.json +++ b/scripts/e2e_eval/testsets/models_with_acc.json @@ -155,7 +155,7 @@ "path": "timm/mini-imagenet", "split": "test", "metric": "top1_accuracy", - "wmk_metric_key": "accuracy" + "winml_metric_key": "accuracy" } }, { @@ -168,7 +168,7 @@ "path": "timm/mini-imagenet", "split": "test", "metric": "top1_accuracy", - "wmk_metric_key": "accuracy" + "winml_metric_key": "accuracy" } }, { @@ -181,7 +181,7 @@ "build_script": "scripts/e2e_eval/datasets/build_fairface.py", "path": "~/.cache/winml/eval_datasets/build_fairface", "metric": "top1_accuracy", - "wmk_metric_key": "accuracy", + "winml_metric_key": "accuracy", "columns_mapping": { "label_column": "gender" } @@ -197,7 +197,7 @@ "path": "timm/mini-imagenet", "split": "test", "metric": "top1_accuracy", - "wmk_metric_key": "accuracy" + "winml_metric_key": "accuracy" } }, { @@ -210,7 +210,7 @@ "path": "timm/mini-imagenet", "split": "test", "metric": "top1_accuracy", - "wmk_metric_key": "accuracy" + "winml_metric_key": "accuracy" } }, { @@ -239,7 +239,7 @@ "path": "timm/mini-imagenet", "split": "test", "metric": "top1_accuracy", - "wmk_metric_key": "accuracy" + "winml_metric_key": "accuracy" } }, { @@ -252,7 +252,7 @@ "build_script": "scripts/e2e_eval/datasets/build_fairface.py", "path": "~/.cache/winml/eval_datasets/build_fairface", "metric": "top1_accuracy", - "wmk_metric_key": "accuracy", + "winml_metric_key": "accuracy", "columns_mapping": { "label_column": "age" } @@ -268,7 +268,7 @@ "path": "detection-datasets/coco", "split": "val", "metric": "map", - "wmk_metric_key": "map", + "winml_metric_key": "map", "columns_mapping": { "annotation_column": "objects", "bbox_key": "bbox", @@ -287,7 +287,7 @@ "path": "detection-datasets/coco", "split": "val", "metric": "map", - "wmk_metric_key": "map", + "winml_metric_key": "map", "columns_mapping": { "annotation_column": "objects", "bbox_key": "bbox", @@ -306,7 +306,7 @@ "path": "detection-datasets/coco", "split": "val", "metric": "map", - "wmk_metric_key": "map", + "winml_metric_key": "map", "label_mapping_file": "scripts/e2e_eval/datasets/coco_to_rtdetr_labels.json", "columns_mapping": { "annotation_column": "objects", @@ -326,7 +326,7 @@ "path": "detection-datasets/coco", "split": "val", "metric": "map", - "wmk_metric_key": "map", + "winml_metric_key": "map", "label_mapping_file": "scripts/e2e_eval/datasets/coco_to_rtdetr_labels.json", "columns_mapping": { "annotation_column": "objects", @@ -346,7 +346,7 @@ "path": "detection-datasets/coco", "split": "val", "metric": "map", - "wmk_metric_key": "map", + "winml_metric_key": "map", "label_mapping_file": "scripts/e2e_eval/datasets/coco_to_rtdetr_labels.json", "columns_mapping": { "annotation_column": "objects", @@ -366,7 +366,7 @@ "path": "detection-datasets/fashionpedia", "split": "val", "metric": "map", - "wmk_metric_key": "map", + "winml_metric_key": "map", "columns_mapping": { "annotation_column": "objects", "bbox_key": "bbox", @@ -386,7 +386,7 @@ "path": "~/.cache/winml/eval_datasets/build_pubtables1m_detection", "split": "validation", "metric": "map", - "wmk_metric_key": "map", + "winml_metric_key": "map", "columns_mapping": { "annotation_column": "objects", "bbox_key": "bbox", @@ -406,7 +406,7 @@ "path": "~/.cache/winml/eval_datasets/build_pubtables1m_detection", "split": "validation", "metric": "map", - "wmk_metric_key": "map", + "winml_metric_key": "map", "label_mapping_file": "scripts/e2e_eval/datasets/pubtables_to_table_labels.json", "columns_mapping": { "annotation_column": "objects", @@ -427,7 +427,7 @@ "path": "~/.cache/winml/eval_datasets/build_pubtables1m_structure", "split": "validation", "metric": "map", - "wmk_metric_key": "map", + "winml_metric_key": "map", "columns_mapping": { "annotation_column": "objects", "bbox_key": "bbox", @@ -446,7 +446,7 @@ "path": "danjacobellis/scene_parse_150", "split": "validation", "metric": "mean_iou", - "wmk_metric_key": "mean_iou", + "winml_metric_key": "mean_iou", "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", "columns_mapping": { "annotation_column": "annotation" @@ -463,7 +463,7 @@ "path": "danjacobellis/scene_parse_150", "split": "validation", "metric": "mean_iou", - "wmk_metric_key": "mean_iou", + "winml_metric_key": "mean_iou", "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", "columns_mapping": { "annotation_column": "annotation" @@ -480,7 +480,7 @@ "path": "danjacobellis/scene_parse_150", "split": "validation", "metric": "mean_iou", - "wmk_metric_key": "mean_iou", + "winml_metric_key": "mean_iou", "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", "columns_mapping": { "annotation_column": "annotation" @@ -497,7 +497,7 @@ "path": "danjacobellis/scene_parse_150", "split": "validation", "metric": "mean_iou", - "wmk_metric_key": "mean_iou", + "winml_metric_key": "mean_iou", "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json", "columns_mapping": { "annotation_column": "annotation" @@ -514,7 +514,7 @@ "path": "mattmdjaga/human_parsing_dataset", "split": "train", "metric": "mean_iou", - "wmk_metric_key": "mean_iou", + "winml_metric_key": "mean_iou", "columns_mapping": { "annotation_column": "mask" } @@ -531,7 +531,7 @@ "split": "validation", "samples": 500, "metric": "mean_iou", - "wmk_metric_key": "mean_iou", + "winml_metric_key": "mean_iou", "label_mapping_file": "scripts/e2e_eval/datasets/cityscapes_label_to_train_id.json", "columns_mapping": { "annotation_column": "semantic_segmentation" diff --git a/scripts/e2e_eval/utils/accuracy.py b/scripts/e2e_eval/utils/accuracy.py index 78327b6c4..10a6b4c8b 100644 --- a/scripts/e2e_eval/utils/accuracy.py +++ b/scripts/e2e_eval/utils/accuracy.py @@ -1,3 +1,8 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + """Accuracy evaluation data structures, threshold logic, and summary generation. Mirrors the design of reporter.py (Signal 1): @@ -28,7 +33,7 @@ class AccuracyVerdict(str, Enum): ACCURACY_PASS = "ACCURACY_PASS" # noqa: S105 # |relative_delta| < 5% ACCURACY_AT_RISK = "ACCURACY_AT_RISK" # 5% ≤ |relative_delta| < 10% ACCURACY_REGRESSION = "ACCURACY_REGRESSION" # |relative_delta| ≥ 10% - EVAL_ERROR = "EVAL_ERROR" # wmk eval or baseline subprocess failed + EVAL_ERROR = "EVAL_ERROR" # winml eval or baseline subprocess failed SKIPPED = "SKIPPED" # perf_failed DATASET_CONFIG_MISSING = "DATASET_CONFIG_MISSING" # no dataset_config in registry @@ -43,7 +48,7 @@ class AccuracyVerdict(str, Enum): def compute_delta( - wmk_metric: dict | None, + winml_metric: dict | None, baseline_metric: dict | None, ) -> tuple[float | None, float | None]: """Return (delta_absolute, delta_relative) from metric dicts. @@ -55,13 +60,13 @@ def compute_delta( means the WMK pipeline is *worse*. The threshold in derive_verdict() uses abs(delta_relative) to handle both directions uniformly. """ - if wmk_metric is None or baseline_metric is None: + if winml_metric is None or baseline_metric is None: return None, None - wmk_val = wmk_metric.get("value") + winml_val = winml_metric.get("value") base_val = baseline_metric.get("value") - if wmk_val is None or base_val is None: + if winml_val is None or base_val is None: return None, None - delta_abs = wmk_val - base_val + delta_abs = winml_val - base_val if base_val == 0: return round(delta_abs, 6), None return round(delta_abs, 6), round(delta_abs / base_val, 6) @@ -89,9 +94,9 @@ def derive_verdict(accuracy: dict | None) -> AccuracyVerdict: return AccuracyVerdict.DATASET_CONFIG_MISSING return AccuracyVerdict.SKIPPED - wmk_ok = accuracy.get("wmk_eval_status") == "PASS" + winml_ok = accuracy.get("winml_eval_status") == "PASS" base_ok = accuracy.get("pytorch_baseline_status") == "PASS" - if not wmk_ok or not base_ok: + if not winml_ok or not base_ok: return AccuracyVerdict.EVAL_ERROR delta_rel = accuracy.get("delta_relative") @@ -244,7 +249,7 @@ def _pct(acc: dict) -> str: acc = r["accuracy"] lines.append( f"| {r['model']} | {r.get('task', '')} " - f"| {_val(acc, 'wmk_metric')} | {_val(acc, 'pytorch_baseline_metric')} " + f"| {_val(acc, 'winml_metric')} | {_val(acc, 'pytorch_baseline_metric')} " f"| {_pct(acc)} |" ) else: @@ -265,7 +270,7 @@ def _pct(acc: dict) -> str: acc = r["accuracy"] lines.append( f"| {r['model']} | {r.get('task', '')} " - f"| {_val(acc, 'wmk_metric')} | {_val(acc, 'pytorch_baseline_metric')} " + f"| {_val(acc, 'winml_metric')} | {_val(acc, 'pytorch_baseline_metric')} " f"| {_pct(acc)} |" ) else: diff --git a/scripts/e2e_eval/utils/dataset_config.py b/scripts/e2e_eval/utils/dataset_config.py index a7566e7c7..cc4c42566 100644 --- a/scripts/e2e_eval/utils/dataset_config.py +++ b/scripts/e2e_eval/utils/dataset_config.py @@ -1,3 +1,8 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + """Dataset configuration for accuracy evaluation (Signal 2). Single source of truth: the model registry (e.g. ``testsets/models_with_acc.json``). @@ -5,7 +10,7 @@ Resolution: 1. Per-model config registered from the registry's ``dataset_config`` field. -2. None — caller decides whether to skip or let wmk eval use its +2. None — caller decides whether to skip or let winml eval use its built-in task defaults. """ @@ -41,6 +46,6 @@ def get_dataset_config(hf_id: str, task: str) -> dict | None: """Return dataset config for a model, or None. None means no explicit config was found; the caller can either - skip or let wmk eval / pytorch baseline use built-in task defaults. + skip or let winml eval / pytorch baseline use built-in task defaults. """ return _DATASET_CONFIGS.get((hf_id, task)) diff --git a/src/winml/modelkit/analyze/core/model_validators/constant_folding_validator.py b/src/winml/modelkit/analyze/core/model_validators/constant_folding_validator.py index c17e1257b..ebd463792 100644 --- a/src/winml/modelkit/analyze/core/model_validators/constant_folding_validator.py +++ b/src/winml/modelkit/analyze/core/model_validators/constant_folding_validator.py @@ -122,7 +122,7 @@ def _create_information(self, constant_nodes: list[dict]) -> Information: [ { "title": "Normalize model", - "command": "wmk optimize --model model.onnx", + "command": "winml optimize --model model.onnx", } ], indent=2, diff --git a/src/winml/modelkit/analyze/core/model_validators/pattern_matching_validator.py b/src/winml/modelkit/analyze/core/model_validators/pattern_matching_validator.py index 22019a7af..faa478304 100644 --- a/src/winml/modelkit/analyze/core/model_validators/pattern_matching_validator.py +++ b/src/winml/modelkit/analyze/core/model_validators/pattern_matching_validator.py @@ -69,10 +69,10 @@ class PatternMatchingValidator(ModelValidator): "Pattern matching requires all nodes to have non-empty names.\n\n" ), # Todo: Update with actual command when available - action_method="wmk onnx_normalize", + action_method="winml onnx_normalize", action_description=("Add missing node names to the model using ONNX utilities"), action_command=( - "[Placeholder] wmk onnx_normalize " + "[Placeholder] winml onnx_normalize " ), ), PatternErrorConfig( diff --git a/src/winml/modelkit/analyze/core/model_validators/qdq_validation_validator.py b/src/winml/modelkit/analyze/core/model_validators/qdq_validation_validator.py index 2dc5dfbf9..12e09a3c7 100644 --- a/src/winml/modelkit/analyze/core/model_validators/qdq_validation_validator.py +++ b/src/winml/modelkit/analyze/core/model_validators/qdq_validation_validator.py @@ -101,7 +101,7 @@ def _create_information(self, invalid_nodes: list[str]) -> Information: [ { "title": "Re-quantize model", - "command": "wmk quantize --model model.onnx --output model-qdq.onnx", + "command": "winml quantize --model model.onnx --output model-qdq.onnx", } ], indent=2, diff --git a/src/winml/modelkit/analyze/core/model_validators/shape_inference_validator.py b/src/winml/modelkit/analyze/core/model_validators/shape_inference_validator.py index 67fb6ee10..b21ccc4cc 100644 --- a/src/winml/modelkit/analyze/core/model_validators/shape_inference_validator.py +++ b/src/winml/modelkit/analyze/core/model_validators/shape_inference_validator.py @@ -118,7 +118,7 @@ def _create_information(self, affected_ops: list[dict]) -> Information: [ { "title": "Normalize model", - "command": "wmk optimize --model model.onnx", + "command": "winml optimize --model model.onnx", } ], indent=2, diff --git a/src/winml/modelkit/build/hf.py b/src/winml/modelkit/build/hf.py index d4a1106e2..73d4f5d1d 100644 --- a/src/winml/modelkit/build/hf.py +++ b/src/winml/modelkit/build/hf.py @@ -455,11 +455,11 @@ def _load_model( "Options:\n" " 1. Provide --model to use pretrained weights\n" " 2. Ensure config has loader.model_type (e.g., 'bert', 'resnet')\n" - " 3. Regenerate config: wmk config -m -o config.json" + " 3. Regenerate config: winml config -m -o config.json" ) hf_config = AutoConfig.for_model(model_type) - # Prefer explicit model_class from loader config (set by wmk config), + # Prefer explicit model_class from loader config (set by winml config), # fall back to resolve_task_and_model_class for auto-detection. model_class = None if config.loader.model_class: diff --git a/src/winml/modelkit/cache/__init__.py b/src/winml/modelkit/cache/__init__.py index f5c1f950f..74648397e 100644 --- a/src/winml/modelkit/cache/__init__.py +++ b/src/winml/modelkit/cache/__init__.py @@ -5,7 +5,7 @@ """Cache management for ModelKit. Provides deterministic path computation for cached build artifacts. -Both ``from_pretrained()`` and ``wmk build --use-cache`` use these +Both ``from_pretrained()`` and ``winml build --use-cache`` use these functions to guarantee identical paths for the same model+config. Usage:: diff --git a/src/winml/modelkit/cli.py b/src/winml/modelkit/cli.py index 4a50847d0..263494c87 100644 --- a/src/winml/modelkit/cli.py +++ b/src/winml/modelkit/cli.py @@ -8,12 +8,12 @@ command discovery from the commands/ directory. Usage: - wmk --version - wmk --help - wmk export --model MODEL --output PATH [--backend BACKEND] [--verbose] + winml --version + winml --help + winml export --model MODEL --output PATH [--backend BACKEND] [--verbose] Entry Points: - - Standalone CLI: wmk + - Standalone CLI: winml - Module execution: python -m winml.modelkit """ @@ -32,7 +32,7 @@ @click.group() -@click.version_option(version=__version__, prog_name="wmk") +@click.version_option(version=__version__, prog_name="winml") @click.option( "--debug", is_flag=True, diff --git a/src/winml/modelkit/commands/analyze.py b/src/winml/modelkit/commands/analyze.py index 479211ec8..7c628a884 100644 --- a/src/winml/modelkit/commands/analyze.py +++ b/src/winml/modelkit/commands/analyze.py @@ -2,19 +2,19 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Analyze command for wmk CLI. +"""Analyze command for winml CLI. This module provides the analyze command that analyzes ONNX models for runtime support across NPU execution providers. Usage: - wmk analyze --model MODEL --ep EP --device DEVICE [OPTIONS] + winml analyze --model MODEL --ep EP --device DEVICE [OPTIONS] Examples: - wmk analyze --model model.onnx --ep QNNExecutionProvider --device NPU - wmk analyze --model model.onnx --ep qnn --device NPU - wmk analyze --model model.onnx --ep ov --device GPU --information - wmk analyze --model model.onnx --ep vitis --device GPU --output results.json + winml analyze --model model.onnx --ep QNNExecutionProvider --device NPU + winml analyze --model model.onnx --ep qnn --device NPU + winml analyze --model model.onnx --ep ov --device GPU --information + winml analyze --model model.onnx --ep vitis --device GPU --output results.json """ from __future__ import annotations @@ -100,27 +100,27 @@ def analyze( Examples: Analyze all supported EPs with default device: - wmk analyze --model model.onnx + winml analyze --model model.onnx Check QNN NPU support (full name): - wmk analyze --model model.onnx --ep QNNExecutionProvider --device NPU + winml analyze --model model.onnx --ep QNNExecutionProvider --device NPU Check QNN NPU support (using alias): - wmk analyze --model model.onnx --ep qnn --device NPU + winml analyze --model model.onnx --ep qnn --device NPU Check Intel OpenVINO GPU support with recommendations (using alias): - wmk analyze --model model.onnx --ep ov --device GPU --information + winml analyze --model model.onnx --ep ov --device GPU --information Analyze all EPs and save results to file: - wmk analyze --model model.onnx --output results.json + winml analyze --model model.onnx --output results.json Use HTP metadata for enhanced pattern extraction: - wmk analyze --model model.onnx + winml analyze --model model.onnx --ep OpenVINOExecutionProvider --driver GPU --information --htp-metadata metadata.json """ # Configure logging diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py index 1fbaf23bb..eebed1ff4 100644 --- a/src/winml/modelkit/commands/build.py +++ b/src/winml/modelkit/commands/build.py @@ -9,11 +9,11 @@ auto-detects ONNX vs HF input, calls the appropriate API, and reports results. Usage: - wmk build -c config.json -m microsoft/resnet-50 -o output/ - wmk build -c config.json -m model.onnx -o output/ - wmk build -c config.json -m bert-base-uncased -o output/ --no-quant --no-compile - wmk build -c config.json -m microsoft/resnet-50 --random-init -o output/ - wmk build -c config.json -m microsoft/resnet-50 -o output/ --rebuild -v + winml build -c config.json -m microsoft/resnet-50 -o output/ + winml build -c config.json -m model.onnx -o output/ + winml build -c config.json -m bert-base-uncased -o output/ --no-quant --no-compile + winml build -c config.json -m microsoft/resnet-50 --random-init -o output/ + winml build -c config.json -m microsoft/resnet-50 -o output/ --rebuild -v """ from __future__ import annotations @@ -213,7 +213,7 @@ def _build_modules( "config_file", type=click.Path(exists=True), required=True, - help="WinMLBuildConfig JSON file (from wmk config)", + help="WinMLBuildConfig JSON file (from winml config)", ) @click.option( "-m", @@ -331,8 +331,8 @@ def build( ) -> None: r"""Build a WinML-optimized ONNX model from a HuggingFace model or .onnx file. - Requires a config file generated by 'wmk config'. The config file already - contains device/precision settings (applied during 'wmk config' generation). + Requires a config file generated by 'winml config'. The config file already + contains device/precision settings (applied during 'winml config' generation). Specify either --output-dir or --use-cache for artifact destination. If -m points to an existing .onnx file, the build skips export and runs @@ -341,22 +341,22 @@ def build( \b Examples: # Full pipeline with pretrained weights - wmk build -c config.json -m microsoft/resnet-50 -o output/ + winml build -c config.json -m microsoft/resnet-50 -o output/ # Build from pre-exported ONNX file - wmk build -c config.json -m model.onnx -o output/ + winml build -c config.json -m model.onnx -o output/ # Export + optimize only - wmk build -c config.json -m bert-base-uncased -o output/ --no-quant --no-compile + winml build -c config.json -m bert-base-uncased -o output/ --no-quant --no-compile # Random-weight build (no weight download) - wmk build -c config.json -m microsoft/resnet-50 --random-init -o output/ + winml build -c config.json -m microsoft/resnet-50 --random-init -o output/ # Use global cache - wmk build -c config.json -m microsoft/resnet-50 --use-cache + winml build -c config.json -m microsoft/resnet-50 --use-cache # Force rebuild - wmk build -c config.json -m microsoft/resnet-50 -o output/ --rebuild + winml build -c config.json -m microsoft/resnet-50 -o output/ --rebuild """ # Inherit debug flag from parent context if ctx.obj and ctx.obj.get("debug"): @@ -426,7 +426,7 @@ def build( raise click.UsageError("Module config array is empty -- nothing to build.") console.print() - console.print("[bold]wmk build[/bold] (module mode)") + console.print("[bold]winml build[/bold] (module mode)") console.print(f" Config: {Path(config_file).name}") console.print(f" Modules: {len(configs)}") console.print(f" Output: {resolved_dir}") @@ -504,7 +504,7 @@ def build( # Report build plan model_label = f"{model_id} (random-init)" if random_init else model_id console.print() - console.print("[bold]wmk build[/bold]") + console.print("[bold]winml build[/bold]") console.print(f" Config: {Path(config_file).name}") console.print(f" Model: {model_label}") console.print(f" Output: {resolved_dir}") diff --git a/src/winml/modelkit/commands/compile.py b/src/winml/modelkit/commands/compile.py index a21b9ed77..eb849d044 100644 --- a/src/winml/modelkit/commands/compile.py +++ b/src/winml/modelkit/commands/compile.py @@ -2,19 +2,19 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Compile command for wmk CLI. +"""Compile command for winml CLI. This module provides the compile command that compiles ONNX models to EP-specific formats (e.g., QNN EPContext) with optional quantization. Usage: - wmk compile --model MODEL [OPTIONS] + winml compile --model MODEL [OPTIONS] Examples: - wmk compile -m model.onnx - wmk compile -m model.onnx --device npu - wmk compile -m model.onnx --device gpu --ep migraphx - wmk compile -m model_qdq.onnx --no-quantize + winml compile -m model.onnx + winml compile -m model.onnx --device npu + winml compile -m model.onnx --device gpu --ep migraphx + winml compile -m model_qdq.onnx --no-quantize """ from __future__ import annotations @@ -127,19 +127,19 @@ def compile( \b Examples: # Compile for NPU (default, uses QNN/VitisAI) - wmk compile -m model.onnx + winml compile -m model.onnx # Compile for NPU with explicit VitisAI EP - wmk compile -m model.onnx --ep vitisai + winml compile -m model.onnx --ep vitisai # Compile for GPU with MIGraphX - wmk compile -m model.onnx --device gpu --ep migraphx + winml compile -m model.onnx --device gpu --ep migraphx # Compile pre-quantized model - wmk compile -m model_qdq.onnx --no-quantize + winml compile -m model_qdq.onnx --no-quantize # Compile using QAIRT SDK - wmk compile -m model.onnx --compiler qairt --qnn-sdk-root /path/to/sdk + winml compile -m model.onnx --compiler qairt --qnn-sdk-root /path/to/sdk """ # Inherit debug mode from parent if ctx.obj and ctx.obj.get("debug"): @@ -179,7 +179,7 @@ def compile( console.print( "[yellow]Note:[/yellow] --no-quantize has no effect. " "Quantization is no longer performed during compile. " - "Use 'wmk quantize' before 'wmk compile' to control quantization." + "Use 'winml quantize' before 'winml compile' to control quantization." ) # Show info @@ -203,13 +203,9 @@ def compile( if result.output_path: console.print(f"[dim]Output: {result.output_path}[/dim]") if result.compile_time: - console.print( - f"[dim]Compile time: {result.compile_time:.2f}s[/dim]" - ) + console.print(f"[dim]Compile time: {result.compile_time:.2f}s[/dim]") if result.total_time: - console.print( - f"[dim]Total time: {result.total_time:.2f}s[/dim]" - ) + console.print(f"[dim]Total time: {result.total_time:.2f}s[/dim]") else: console.print("\n[bold red]Compilation failed:[/bold red]") for error in result.errors: diff --git a/src/winml/modelkit/commands/config.py b/src/winml/modelkit/commands/config.py index 809722048..76cbe5883 100644 --- a/src/winml/modelkit/commands/config.py +++ b/src/winml/modelkit/commands/config.py @@ -11,13 +11,13 @@ export=None (marking it as an ONNX build that skips the export stage). Usage: - wmk config -m microsoft/resnet-50 - wmk config -m bert-base-uncased --task text-classification - wmk config -m model.onnx - wmk config --model-type bert - wmk config --model-type bert --task fill-mask - wmk config -m microsoft/resnet-50 --module ResNetConvLayer - wmk config -m bert-base-uncased -o config.json + winml config -m microsoft/resnet-50 + winml config -m bert-base-uncased --task text-classification + winml config -m model.onnx + winml config --model-type bert + winml config --model-type bert --task fill-mask + winml config -m microsoft/resnet-50 --module ResNetConvLayer + winml config -m bert-base-uncased -o config.json """ from __future__ import annotations @@ -35,9 +35,7 @@ console = Console(stderr=True) -def _apply_stage_overrides( - cfg: Any, *, no_quant: bool, no_compile: bool -) -> None: +def _apply_stage_overrides(cfg: Any, *, no_quant: bool, no_compile: bool) -> None: """Apply --no-quant and --no-compile CLI overrides to a config.""" if no_quant: cfg.quant = None @@ -109,8 +107,7 @@ def _is_onnx_file(model_input: str) -> bool: "device", type=click.Choice(["auto", "npu", "gpu", "cpu"], case_sensitive=False), default="auto", - help="Target device (affects quant/compile config). " - "Default: auto (no changes to config).", + help="Target device (affects quant/compile config). Default: auto (no changes to config).", ) @click.option( "--ep", @@ -199,34 +196,34 @@ def config( \b Examples: # Basic usage - auto-detect everything - wmk config -m microsoft/resnet-50 + winml config -m microsoft/resnet-50 # Override task - wmk config -m bert-base-uncased --task text-classification + winml config -m bert-base-uncased --task text-classification # Target NPU with int8 quantization - wmk config -m microsoft/resnet-50 --device npu --precision int8 + winml config -m microsoft/resnet-50 --device npu --precision int8 # Target GPU with fp16 (no quantization) - wmk config -m bert-base-uncased --device gpu --precision fp16 + winml config -m bert-base-uncased --device gpu --precision fp16 # Model type only (uses default HF config, auto-detects task) - wmk config --model-type bert + winml config --model-type bert # Model type + task - wmk config --model-type bert --task fill-mask + winml config --model-type bert --task fill-mask # Override with JSON config file - wmk config -m bert-base-uncased -c overrides.json + winml config -m bert-base-uncased -c overrides.json # Vision model with shape overrides ({"height": 224, "width": 224}) - wmk config --model-type resnet -t image-classification --shape-config shapes.json + winml config --model-type resnet -t image-classification --shape-config shapes.json # Save to file - wmk config -m bert-base-uncased -o config.json + winml config -m bert-base-uncased -o config.json # Generate configs for submodules - wmk config -m microsoft/resnet-50 --module ResNetConvLayer + winml config -m microsoft/resnet-50 --module ResNetConvLayer """ if verbose: logging.basicConfig(level=logging.DEBUG) @@ -260,9 +257,7 @@ def config( ) override = WinMLBuildConfig.from_dict(data) except json.JSONDecodeError as e: - raise click.UsageError( - f"Invalid JSON in config file {config_path}: {e}" - ) from e + raise click.UsageError(f"Invalid JSON in config file {config_path}: {e}") from e console.print(f"[dim]Loaded overrides from {config_path.name}[/dim]") # Load shape_config (shape overrides) from JSON file if provided @@ -330,9 +325,7 @@ def config( # Apply --no-quant / --no-compile overrides to each config for cfg in configs: _apply_stage_overrides(cfg, no_quant=no_quant, no_compile=no_compile) - console.print( - f"[green]Found {len(configs)} submodules matching '{module}'[/green]" - ) + console.print(f"[green]Found {len(configs)} submodules matching '{module}'[/green]") output_data = [cfg.to_dict() for cfg in configs] else: # Normal mode: result is WinMLBuildConfig @@ -343,10 +336,7 @@ def config( if not task and not module: auto_task = config_obj.loader.task source = model_type or hf_model - console.print( - f"[dim]Auto-selected task: {auto_task} " - f"(from '{source}')[/dim]" - ) + console.print(f"[dim]Auto-selected task: {auto_task} (from '{source}')[/dim]") console.print( f"[green]Generated config for task '{config_obj.loader.task}'[/green]" ) diff --git a/src/winml/modelkit/commands/eval.py b/src/winml/modelkit/commands/eval.py index 9b9278f2d..93331cf27 100644 --- a/src/winml/modelkit/commands/eval.py +++ b/src/winml/modelkit/commands/eval.py @@ -142,15 +142,15 @@ def eval( \b Examples: # Use default dataset (auto-detected from task) - wmk eval -m microsoft/resnet-50 - wmk eval -m model.onnx --model-id dslim/bert-base-NER + winml eval -m microsoft/resnet-50 + winml eval -m model.onnx --model-id dslim/bert-base-NER # Specify dataset explicitly - wmk eval -m microsoft/resnet-50 --dataset imagenet-1k - wmk eval -m model.onnx --model-id microsoft/resnet-50 --dataset imagenet-1k + winml eval -m microsoft/resnet-50 --dataset imagenet-1k + winml eval -m model.onnx --model-id microsoft/resnet-50 --dataset imagenet-1k # Multi-config dataset with column overrides - wmk eval -m model.onnx --model-id Intel/bert-base-uncased-mrpc \\ + winml eval -m model.onnx --model-id Intel/bert-base-uncased-mrpc \\ --dataset glue --dataset-name mrpc \\ --column input_column=sentence1 """ @@ -163,7 +163,7 @@ def eval( if task is None: raise click.UsageError( - "--schema requires --task. Example: wmk eval --schema --task object-detection" + "--schema requires --task. Example: winml eval --schema --task object-detection" ) cls = _EVALUATOR_REGISTRY.get(task, WinMLEvaluator) _print_schema(task, cls.schema_info()) diff --git a/src/winml/modelkit/commands/export.py b/src/winml/modelkit/commands/export.py index fda018c1f..4e8f0b3f3 100644 --- a/src/winml/modelkit/commands/export.py +++ b/src/winml/modelkit/commands/export.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Export command for wmk CLI. +"""Export command for winml CLI. This module provides the export command that uses export_onnx() as the single implementation path for HuggingFace to ONNX model conversion. @@ -13,13 +13,13 @@ - Supports MODEL_BUILD_CONFIGS lookup for input_tensors fallback Usage: - wmk export --model MODEL --output PATH [--verbose] [--with-report] + winml export --model MODEL --output PATH [--verbose] [--with-report] Examples: - wmk export -m prajjwal1/bert-tiny -o model.onnx - wmk export -m facebook/convnext-tiny-224 -o convnext.onnx -v --with-report - wmk export -m bert-base-uncased -o bert.onnx --input-specs inputs.json - wmk export -m bert-base-uncased -o bert.onnx --export-config config.json + winml export -m prajjwal1/bert-tiny -o model.onnx + winml export -m facebook/convnext-tiny-224 -o convnext.onnx -v --with-report + winml export -m bert-base-uncased -o bert.onnx --input-specs inputs.json + winml export -m bert-base-uncased -o bert.onnx --export-config config.json """ from __future__ import annotations @@ -143,28 +143,28 @@ def export( \b Examples: # Basic export - wmk export --model prajjwal1/bert-tiny --output model.onnx + winml export --model prajjwal1/bert-tiny --output model.onnx # Short form - wmk export -m prajjwal1/bert-tiny -o model.onnx + winml export -m prajjwal1/bert-tiny -o model.onnx # With verbose output and full reporting - wmk export -m facebook/convnext-tiny-224 -o convnext.onnx -v --with-report + winml export -m facebook/convnext-tiny-224 -o convnext.onnx -v --with-report # Clean ONNX output (no hierarchy metadata, for optimization) - wmk export -m prajjwal1/bert-tiny -o model.onnx --clean-onnx + winml export -m prajjwal1/bert-tiny -o model.onnx --clean-onnx # Use PyTorch dynamo export (for rich node metadata) - wmk export -m prajjwal1/bert-tiny -o model.onnx --dynamo + winml export -m prajjwal1/bert-tiny -o model.onnx --dynamo # Include torch.nn modules in hierarchy - wmk export -m prajjwal1/bert-tiny -o model.onnx --torch-module LayerNorm,Embedding + winml export -m prajjwal1/bert-tiny -o model.onnx --torch-module LayerNorm,Embedding # Custom input specifications from JSON file - wmk export -m bert-base-uncased -o bert.onnx --input-specs inputs.json + winml export -m bert-base-uncased -o bert.onnx --input-specs inputs.json # Custom ONNX export configuration - wmk export -m bert-base-uncased -o bert.onnx --export-config config.json + winml export -m bert-base-uncased -o bert.onnx --export-config config.json """ # Inherit debug mode from parent if ctx.obj.get("debug"): diff --git a/src/winml/modelkit/commands/hub.py b/src/winml/modelkit/commands/hub.py index 7243921a2..cc5c1093b 100644 --- a/src/winml/modelkit/commands/hub.py +++ b/src/winml/modelkit/commands/hub.py @@ -16,11 +16,11 @@ Negative means the quantized model scored lower. Usage: - wmk hub - wmk hub --model-type bert - wmk hub --task text-classification - wmk hub --model ProsusAI/finbert - wmk hub --output catalog.json + winml hub + winml hub --model-type bert + winml hub --task text-classification + winml hub --model ProsusAI/finbert + winml hub --output catalog.json """ from __future__ import annotations @@ -146,7 +146,6 @@ def _overall_verdict(accuracy: dict[str, Any]) -> str: return "PASS" - # --------------------------------------------------------------------------- # List view # --------------------------------------------------------------------------- @@ -186,12 +185,12 @@ def _build_list_renderable(models: list[dict[str, Any]]) -> Group: panel = Panel( table, title=f"[bold]ModelKit Catalog[/bold] [dim]|[/dim] " - f"[bold cyan]{len(models)}[/bold cyan] validated model(s)", + f"[bold cyan]{len(models)}[/bold cyan] validated model(s)", border_style="blue", padding=(0, 1), ) hint = Text( - "Use wmk hub --model to see perf and accuracy details.", + "Use winml hub --model to see perf and accuracy details.", style="dim", ) return Group(panel, hint) @@ -310,9 +309,7 @@ def _build_detail_renderable(m: dict[str, Any]) -> Group: Text(f"{sign}{drop:.2f}%", style=ep_style), ) - panels.append( - Panel(acc, title=acc_panel_title, border_style="blue", padding=(0, 1)) - ) + panels.append(Panel(acc, title=acc_panel_title, border_style="blue", padding=(0, 1))) if not perf and not accuracy: panels.append( @@ -359,8 +356,7 @@ def _output_detail(models: list[dict[str, Any]], model_id: str) -> dict[str, Any msg += "\n".join(f" {c}" for c in candidates) else: msg = ( - f"Model '{model_id}' not found in the catalog. " - "Run 'wmk hub' to list all models." + f"Model '{model_id}' not found in the catalog. Run 'winml hub' to list all models." ) raise click.ClickException(msg) @@ -440,16 +436,16 @@ def hub( drop % -- relative change vs FP32 baseline \b - Use ``wmk hub --model `` for per-model perf and accuracy. - Use ``wmk inspect -m `` for architecture details. + Use ``winml hub --model `` for per-model perf and accuracy. + Use ``winml inspect -m `` for architecture details. \b Examples: - wmk hub - wmk hub --model-type bert - wmk hub --task text-classification - wmk hub --model ProsusAI/finbert - wmk hub --output results/catalog.json + winml hub + winml hub --model-type bert + winml hub --task text-classification + winml hub --model ProsusAI/finbert + winml hub --output results/catalog.json """ try: catalog = _load_catalog() diff --git a/src/winml/modelkit/commands/inspect.py b/src/winml/modelkit/commands/inspect.py index 9f599aec4..b79f6a649 100644 --- a/src/winml/modelkit/commands/inspect.py +++ b/src/winml/modelkit/commands/inspect.py @@ -8,10 +8,10 @@ with ModelKit, including loader, exporter, and WinML configurations. Usage: - wmk inspect -m openai/clip-vit-base-patch32 - wmk inspect -m google-bert/bert-base-uncased --format json - wmk inspect -m facebook/detr-resnet-50 --verbose - wmk inspect -m openai/clip-vit-base-patch32 --hierarchy + winml inspect -m openai/clip-vit-base-patch32 + winml inspect -m google-bert/bert-base-uncased --format json + winml inspect -m facebook/detr-resnet-50 --verbose + winml inspect -m openai/clip-vit-base-patch32 --hierarchy """ from __future__ import annotations @@ -84,19 +84,19 @@ def inspect( \b Examples: # Basic inspection - wmk inspect -m openai/clip-vit-base-patch32 + winml inspect -m openai/clip-vit-base-patch32 # JSON output for scripting - wmk inspect -m google-bert/bert-base-uncased --format json + winml inspect -m google-bert/bert-base-uncased --format json # Show full build configuration - wmk inspect -m facebook/detr-resnet-50 --verbose + winml inspect -m facebook/detr-resnet-50 --verbose # Include HF module hierarchy (no weight download) - wmk inspect -m openai/clip-vit-base-patch32 --hierarchy + winml inspect -m openai/clip-vit-base-patch32 --hierarchy # Combined verbose + hierarchy - wmk inspect -m google-bert/bert-base-uncased -v -H + winml inspect -m google-bert/bert-base-uncased -v -H """ # Import here to defer heavy transformers/torch imports from ..inspect import ( diff --git a/src/winml/modelkit/commands/optimize.py b/src/winml/modelkit/commands/optimize.py index 52e611cbe..96fdf4c8d 100644 --- a/src/winml/modelkit/commands/optimize.py +++ b/src/winml/modelkit/commands/optimize.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Optimize command for wmk CLI. +"""Optimize command for winml CLI. This module provides the optimize command that uses the capability-driven optimizer for ONNX model optimization with fusion and graph optimizations. @@ -11,12 +11,12 @@ the Open-Closed Principle from the design documentation. Usage: - wmk optimize --model MODEL --output OUTPUT [OPTIONS] + winml optimize --model MODEL --output OUTPUT [OPTIONS] Examples: - wmk optimize -m model.onnx -o model_opt.onnx - wmk optimize -m model.onnx -o model_opt.onnx --enable-gelu-fusion - wmk optimize -m model.onnx --preset transformer-optimized + winml optimize -m model.onnx -o model_opt.onnx + winml optimize -m model.onnx -o model_opt.onnx --enable-gelu-fusion + winml optimize -m model.onnx --preset transformer-optimized """ from __future__ import annotations @@ -253,26 +253,26 @@ def optimize( \b Examples: # List available capabilities - wmk optimize --list-capabilities + winml optimize --list-capabilities # List available rewrite pattern families - wmk optimize --list-rewrites + winml optimize --list-rewrites # Pattern rewrite flags follow: --enable-{source-slug}-{target-slug} # Run --list-rewrites to discover all available flag names. # Example (all GELU variants → single Gelu node): - wmk optimize -m model.onnx -o out.onnx --enable-gelu-singlegelu + winml optimize -m model.onnx -o out.onnx --enable-gelu-singlegelu # Example (only Gelu1 variant → single Gelu node): - wmk optimize -m model.onnx -o out.onnx --enable-gelu1-singlegelu + winml optimize -m model.onnx -o out.onnx --enable-gelu1-singlegelu # Basic optimization with GELU fusion - wmk optimize -m model.onnx -o model_opt.onnx --enable-gelu-fusion + winml optimize -m model.onnx -o model_opt.onnx --enable-gelu-fusion # Use transformer preset - wmk optimize -m bert.onnx --preset transformer-optimized + winml optimize -m bert.onnx --preset transformer-optimized # Use config file - wmk optimize -m model.onnx -c config.toml + winml optimize -m model.onnx -c config.toml """ # Import capabilities (late import to speed up CLI) from ..optim.pipes import get_all_capabilities diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py index e1b0ecadb..8affcd45c 100644 --- a/src/winml/modelkit/commands/perf.py +++ b/src/winml/modelkit/commands/perf.py @@ -7,9 +7,9 @@ Benchmarks model inference performance using WinMLAutoModel and WinMLSession. Usage: - wmk perf -m microsoft/resnet-50 - wmk perf -m microsoft/resnet-50 --device npu --iterations 100 - wmk perf -m bert-base-uncased --task text-classification + winml perf -m microsoft/resnet-50 + winml perf -m microsoft/resnet-50 --device npu --iterations 100 + winml perf -m bert-base-uncased --task text-classification """ from __future__ import annotations @@ -986,22 +986,22 @@ def perf( \b Examples: # Basic benchmark (HuggingFace model) - wmk perf -m microsoft/resnet-50 + winml perf -m microsoft/resnet-50 # Benchmark a pre-exported ONNX file directly - wmk perf -m model.onnx --device cpu + winml perf -m model.onnx --device cpu # With custom iterations on NPU - wmk perf -m microsoft/resnet-50 --iterations 500 --device npu + winml perf -m microsoft/resnet-50 --iterations 500 --device npu # Text model with explicit task - wmk perf -m bert-base-uncased --task text-classification + winml perf -m bert-base-uncased --task text-classification # Per-module benchmarking - wmk perf -m bert-base-uncased --module BertAttention + winml perf -m bert-base-uncased --module BertAttention # Operator-level profiling (QNN NPU) - wmk perf -m model.onnx --op-tracing basic + winml perf -m model.onnx --op-tracing basic """ # Resolve deprecated --hf-model alias if hf_model_deprecated and model_id: diff --git a/src/winml/modelkit/commands/quantize.py b/src/winml/modelkit/commands/quantize.py index 82586dfe8..5437837eb 100644 --- a/src/winml/modelkit/commands/quantize.py +++ b/src/winml/modelkit/commands/quantize.py @@ -2,19 +2,19 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Quantize command for wmk CLI. +"""Quantize command for winml CLI. This module provides the quantize command that inserts QDQ (Quantize-Dequantize) nodes into ONNX models for quantization-aware inference. Usage: - wmk quantize --model MODEL [OPTIONS] + winml quantize --model MODEL [OPTIONS] Examples: - wmk quantize -m model.onnx - wmk quantize -m model.onnx --precision int8 - wmk quantize -m model.onnx -o model_qdq.onnx --samples 100 - wmk quantize -m model.onnx --weight-type int8 --activation-type uint8 + winml quantize -m model.onnx + winml quantize -m model.onnx --precision int8 + winml quantize -m model.onnx -o model_qdq.onnx --samples 100 + winml quantize -m model.onnx --weight-type int8 --activation-type uint8 """ from __future__ import annotations @@ -121,19 +121,19 @@ def quantize( \b Examples: # Basic quantization with defaults (10 samples, uint8) - wmk quantize -m model.onnx + winml quantize -m model.onnx # Use precision shorthand (same as --weight-type uint8 --activation-type uint8) - wmk quantize -m model.onnx --precision int8 + winml quantize -m model.onnx --precision int8 # Int16 quantization - wmk quantize -m model.onnx --precision int16 + winml quantize -m model.onnx --precision int16 # Custom output path and more samples - wmk quantize -m model.onnx -o quantized.onnx --samples 100 + winml quantize -m model.onnx -o quantized.onnx --samples 100 # Explicit types with entropy calibration - wmk quantize -m model.onnx --weight-type int8 --method entropy + winml quantize -m model.onnx --weight-type int8 --method entropy """ # Inherit debug mode from parent if ctx.obj and ctx.obj.get("debug"): @@ -180,12 +180,8 @@ def quantize( if result.success: console.print("\n[bold green]Success![/bold green] Model quantized") console.print(f"[dim]Output: {result.output_path}[/dim]") - console.print( - f"[dim]QDQ nodes inserted: {result.nodes_quantized}[/dim]" - ) - console.print( - f"[dim]Total time: {result.total_time_seconds:.2f}s[/dim]" - ) + console.print(f"[dim]QDQ nodes inserted: {result.nodes_quantized}[/dim]") + console.print(f"[dim]Total time: {result.total_time_seconds:.2f}s[/dim]") else: console.print("\n[bold red]Quantization failed:[/bold red]") for error in result.errors: diff --git a/src/winml/modelkit/commands/sys.py b/src/winml/modelkit/commands/sys.py index 00c0e4fbd..abead4afb 100644 --- a/src/winml/modelkit/commands/sys.py +++ b/src/winml/modelkit/commands/sys.py @@ -13,12 +13,12 @@ - Available devices and execution providers Usage: - wmk sys - wmk sys --format json - wmk sys --format compact - wmk sys --verbose - wmk sys --list-device - wmk sys --list-ep + winml sys + winml sys --format json + winml sys --format compact + winml sys --verbose + winml sys --list-device + winml sys --list-ep """ from __future__ import annotations @@ -431,8 +431,7 @@ def _output_device_text(devices: list[dict[str, Any]]) -> None: console.print("\n[bold blue]Available Devices (priority order)[/bold blue]") for dev in devices: console.print( - f" [bold]#{dev['priority']}[/bold] " - f"[cyan]{dev['type']:5s}[/cyan] {dev['name']}" + f" [bold]#{dev['priority']}[/bold] [cyan]{dev['type']:5s}[/cyan] {dev['name']}" ) details = dev.get("details", {}) if "error" in details: @@ -514,9 +513,7 @@ def _output_ep_text(eps: list[dict[str, Any]]) -> None: for ep in eps: name_padded = ep["name"].ljust(30) - console.print( - f" [bold]{name_padded}[/bold] [dim]->[/dim] [cyan]{ep['device']}[/cyan]" - ) + console.print(f" [bold]{name_padded}[/bold] [dim]->[/dim] [cyan]{ep['device']}[/cyan]") if ep.get("path"): console.print(f" Path: {ep['path']}") else: @@ -576,22 +573,22 @@ def sysinfo( \b Examples: # Display system info (human-readable format) - wmk sys + winml sys # Get output as JSON for scripting - wmk sys --format json + winml sys --format json # Show detailed info - wmk sys --verbose + winml sys --verbose # Compact format for quick overview - wmk sys --format compact + winml sys --format compact # List available devices - wmk sys --list-device + winml sys --list-device # List execution providers as JSON - wmk sys --list-ep --format json + winml sys --list-ep --format json """ # Inherit debug mode from parent if ctx.obj.get("debug"): @@ -637,13 +634,9 @@ def sysinfo( else: _output_ep_text(eps) except Exception as e: - console.print( - f"[bold red]Error detecting execution providers:[/bold red] {e}" - ) + console.print(f"[bold red]Error detecting execution providers:[/bold red] {e}") logger.exception("Failed to detect execution providers") - raise click.ClickException( - f"Error detecting execution providers: {e}" - ) from e + raise click.ClickException(f"Error detecting execution providers: {e}") from e return # Default: full sysinfo including devices and EPs diff --git a/src/winml/modelkit/models/auto.py b/src/winml/modelkit/models/auto.py index 1e7d8b98f..3195d0fee 100644 --- a/src/winml/modelkit/models/auto.py +++ b/src/winml/modelkit/models/auto.py @@ -174,7 +174,7 @@ def from_onnx( else: import tempfile - cache_dir_path = Path(tempfile.mkdtemp(prefix="wmk_")) + cache_dir_path = Path(tempfile.mkdtemp(prefix="winml_")) output_dir = cache_dir_path force_rebuild = True logger.info("Cache disabled -- using temp directory: %s", output_dir) @@ -324,7 +324,7 @@ def from_pretrained( # No cache -- use temp directory, always rebuild import tempfile - cache_dir_path = Path(tempfile.mkdtemp(prefix="wmk_")) + cache_dir_path = Path(tempfile.mkdtemp(prefix="winml_")) force_rebuild = True logger.info("Cache disabled -- using temp directory: %s", cache_dir_path) diff --git a/src/winml/modelkit/session/qairt/compile_qairt_bin.py b/src/winml/modelkit/session/qairt/compile_qairt_bin.py index 6e4c01c37..9a63cdcaa 100644 --- a/src/winml/modelkit/session/qairt/compile_qairt_bin.py +++ b/src/winml/modelkit/session/qairt/compile_qairt_bin.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""QAIRT SDK compilation script - executed in isolated venv-wmk subprocess. +"""QAIRT SDK compilation script - executed in isolated venv-winml subprocess. This script is invoked by qnn_compiler._compile_qairt() and runs in a separate Python 3.10 virtual environment with QAIRT SDK dependencies installed. diff --git a/src/winml/modelkit/session/qairt/qairt_session.py b/src/winml/modelkit/session/qairt/qairt_session.py index 5c92bc7f5..f6406aa1e 100644 --- a/src/winml/modelkit/session/qairt/qairt_session.py +++ b/src/winml/modelkit/session/qairt/qairt_session.py @@ -22,7 +22,7 @@ logger = logging.getLogger(__name__) -# QAIRT SDK dependencies for venv-wmk virtual environment +# QAIRT SDK dependencies for venv-winml virtual environment QAIRT_DEPENDENCIES = [ "onnx>=1.14.0,<1.17", "torch==2.4.1", @@ -72,7 +72,7 @@ def compile(self) -> None: """Compile model using QAIRT SDK. Pipeline: - 1. Ensure venv-wmk in SDK directory + 1. Ensure venv-winml in SDK directory 2. Run compile_qairt_bin.py subprocess → .bin 3. Generate cache_info.json 4. Wrap binary into EPContext ONNX model @@ -89,7 +89,7 @@ def compile(self) -> None: # Step 1: Set up venv with QAIRT dependencies venv_python = ensure_venv( root_path=self._qnn_sdk_root, - venv_name="venv-wmk", + venv_name="venv-winml", python_version="3.10", requirements=QAIRT_DEPENDENCIES, ) diff --git a/tests/e2e/test_build_e2e.py b/tests/e2e/test_build_e2e.py index 7bc7933f2..535fc2f64 100644 --- a/tests/e2e/test_build_e2e.py +++ b/tests/e2e/test_build_e2e.py @@ -11,7 +11,7 @@ The build command uses @click.pass_context and requires obj={"debug": False}. We generate a proper config via ``generate_build_config()`` (same API -the ``wmk config`` command calls) to ensure export input_tensors are +the ``winml config`` command calls) to ensure export input_tensors are populated. A minimal hand-crafted config lacks I/O specs and will fail. Markers: @@ -19,6 +19,7 @@ slow: Tests that take > 30 seconds network: Requires network access to HuggingFace Hub """ + from __future__ import annotations import json @@ -93,6 +94,7 @@ def _make_minimal_config_file(tmp_path, task: str) -> str: # HF model build (export + optimize only) # =========================================================================== + class TestBuildHF: """Build from HuggingFace model with --no-quant --no-compile.""" @@ -102,7 +104,9 @@ def test_bert_text_classification(self, tmp_path: Path): Uses --no-quant --no-compile so only export + optimize run. """ config_path = _generate_config_file( - tmp_path, "bert-base-uncased", task="text-classification", + tmp_path, + "bert-base-uncased", + task="text-classification", ) output_dir = tmp_path / "output" @@ -110,18 +114,19 @@ def test_bert_text_classification(self, tmp_path: Path): result = runner.invoke( build, [ - "-c", config_path, - "-m", "bert-base-uncased", - "-o", str(output_dir), + "-c", + config_path, + "-m", + "bert-base-uncased", + "-o", + str(output_dir), "--no-quant", "--no-compile", ], obj={"debug": False}, catch_exceptions=False, ) - assert result.exit_code == 0, ( - f"build failed (exit {result.exit_code}):\n{result.output}" - ) + assert result.exit_code == 0, f"build failed (exit {result.exit_code}):\n{result.output}" # Build should produce an output directory assert output_dir.exists() # Should contain at least one ONNX file @@ -136,6 +141,7 @@ def test_bert_text_classification(self, tmp_path: Path): # ONNX input build # =========================================================================== + class TestBuildONNX: """Build from pre-exported ONNX file.""" @@ -148,16 +154,17 @@ def test_onnx_passthrough(self, tmp_path: Path, onnx_model_path: Path): result = runner.invoke( build, [ - "-c", config_path, - "-m", str(onnx_model_path), - "-o", str(output_dir), + "-c", + config_path, + "-m", + str(onnx_model_path), + "-o", + str(output_dir), "--no-quant", "--no-compile", ], obj={"debug": False}, catch_exceptions=False, ) - assert result.exit_code == 0, ( - f"build failed (exit {result.exit_code}):\n{result.output}" - ) + assert result.exit_code == 0, f"build failed (exit {result.exit_code}):\n{result.output}" assert output_dir.exists() diff --git a/tests/integration/test_module_build.py b/tests/integration/test_module_build.py index 3d79626c6..ff8df5ed2 100644 --- a/tests/integration/test_module_build.py +++ b/tests/integration/test_module_build.py @@ -18,7 +18,7 @@ class TestModuleConfigE2E: """End-to-end: generate_build_config(module=...) produces valid configs.""" def test_config_module_generates_array_with_module_path(self) -> None: - """Verify wmk config --module outputs a JSON array with module_path.""" + """Verify winml config --module outputs a JSON array with module_path.""" from winml.modelkit.config import generate_build_config # Use model_type only (no download, uses default HF config with random weights) diff --git a/tests/integration/test_quantization.py b/tests/integration/test_quantization.py index 9eb507f15..59b3a1ed5 100644 --- a/tests/integration/test_quantization.py +++ b/tests/integration/test_quantization.py @@ -19,9 +19,9 @@ def test_model_path(self, tmp_path_factory): temp_dir = tmp_path_factory.mktemp("quantization_e2e") model_path = temp_dir / "resnet-50.onnx" - # Export ResNet-50 model using wmk export + # Export ResNet-50 model using winml export cmd = [ - "wmk", + "winml", "export", "-m", "microsoft/resnet-50", @@ -46,8 +46,8 @@ def _run_quantization_and_validate(self, test_model_path, tmp_path, test_name, p output_path = tmp_path / f"resnet50_{test_name}_quantized.onnx" - # Build quantization command using actual wmk quantize CLI flags - cmd = ["wmk", "quantize", "--model", str(test_model_path), "--output", str(output_path)] + # Build quantization command using actual winml quantize CLI flags + cmd = ["winml", "quantize", "--model", str(test_model_path), "--output", str(output_path)] if precision: cmd.extend(["--precision", precision]) diff --git a/tests/unit/analyze/core/model_validators/test_validators.py b/tests/unit/analyze/core/model_validators/test_validators.py index 2cde85eb1..b4a82ebfb 100644 --- a/tests/unit/analyze/core/model_validators/test_validators.py +++ b/tests/unit/analyze/core/model_validators/test_validators.py @@ -128,7 +128,7 @@ def test_detect_constant_only_nodes(self): # Check that explanation mentions constant-only nodes assert "constant inputs" in info.explanation # Check that details contain tool recommendations (JSON format) - assert "wmk optimize" in info.actions[0].details + assert "winml optimize" in info.actions[0].details def test_no_constant_only_nodes(self): """Test that models without constant-only nodes return None.""" @@ -185,7 +185,7 @@ def test_constant_folding_with_initializer(self): # Check explanation mentions constant inputs assert "constant inputs" in info.explanation # Check details contain tool recommendations - assert "wmk optimize" in info.actions[0].details + assert "winml optimize" in info.actions[0].details def test_explanation_contains_node_count(self): """Test that explanation mentions correct node count.""" diff --git a/tests/unit/cache/test_model.py b/tests/unit/cache/test_model.py index 145d96a7d..910ad9d4c 100644 --- a/tests/unit/cache/test_model.py +++ b/tests/unit/cache/test_model.py @@ -160,7 +160,7 @@ def test_multiple_models(self, tmp_path: Path) -> None: class TestCallerConvergence: - """Verify that from_pretrained and wmk build --use-cache produce identical paths.""" + """Verify that from_pretrained and winml build --use-cache produce identical paths.""" def test_same_output_dir(self) -> None: """Both callers compute the same output_dir for a given model_id.""" diff --git a/tests/unit/commands/test_build_module.py b/tests/unit/commands/test_build_module.py index 042989640..8306ba270 100644 --- a/tests/unit/commands/test_build_module.py +++ b/tests/unit/commands/test_build_module.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Tests for wmk build module mode (array config detection and orchestration).""" +"""Tests for winml build module mode (array config detection and orchestration).""" from __future__ import annotations diff --git a/tests/unit/commands/test_cli.py b/tests/unit/commands/test_cli.py index b47a5a92f..e7a998685 100644 --- a/tests/unit/commands/test_cli.py +++ b/tests/unit/commands/test_cli.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""CLI integration tests for wmk command. +"""CLI integration tests for winml command. Tests the CLI interface using Click's CliRunner to ensure commands work correctly without executing actual model exports (which are slow). @@ -42,7 +42,7 @@ def test_version(self, runner: CliRunner) -> None: """Test --version flag shows version info.""" result = runner.invoke(main, ["--version"]) assert result.exit_code == 0 - assert "wmk" in result.output.lower() + assert "winml" in result.output.lower() def test_help(self, runner: CliRunner) -> None: """Test --help shows usage information.""" diff --git a/tests/unit/commands/test_hub.py b/tests/unit/commands/test_hub.py index 3f362ae03..0a9dea702 100644 --- a/tests/unit/commands/test_hub.py +++ b/tests/unit/commands/test_hub.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Tests for the wmk hub CLI command (no network calls, catalog mocked).""" +"""Tests for the winml hub CLI command (no network calls, catalog mocked).""" from __future__ import annotations @@ -40,19 +40,29 @@ "task": "token-classification", "perf": { "QNN": { - "avg_ms": 13.71, "p50_ms": 13.75, "p90_ms": 13.84, - "p95_ms": 13.84, "p99_ms": 13.84, - "min_ms": 13.59, "max_ms": 13.84, "throughput_qps": 72.93, + "avg_ms": 13.71, + "p50_ms": 13.75, + "p90_ms": 13.84, + "p95_ms": 13.84, + "p99_ms": 13.84, + "min_ms": 13.59, + "max_ms": 13.84, + "throughput_qps": 72.93, }, "OV": { - "avg_ms": 25.28, "p50_ms": 24.84, "p90_ms": 35.33, - "p95_ms": 35.33, "p99_ms": 35.33, - "min_ms": 20.6, "max_ms": 35.33, "throughput_qps": 39.56, + "avg_ms": 25.28, + "p50_ms": 24.84, + "p90_ms": 35.33, + "p95_ms": 35.33, + "p99_ms": 35.33, + "min_ms": 20.6, + "max_ms": 35.33, + "throughput_qps": 39.56, }, }, "accuracy": { "QNN": {"verdict": "PASS", "drop_pct": 0.0}, - "OV": {"verdict": "PASS", "drop_pct": 0.0}, + "OV": {"verdict": "PASS", "drop_pct": 0.0}, }, }, { @@ -62,7 +72,7 @@ "perf": None, "accuracy": { "QNN": {"verdict": "REGRESSION", "drop_pct": -36.84}, - "OV": {"verdict": "REGRESSION", "drop_pct": -32.67}, + "OV": {"verdict": "REGRESSION", "drop_pct": -32.67}, }, }, { @@ -167,7 +177,7 @@ def test_hub_default_shows_table(runner, patched_catalog): def test_hub_table_shows_hint(runner, patched_catalog): result = runner.invoke(hub, ["--output", "/dev/null"]) assert result.exit_code == 0 - assert "wmk hub --model" in result.output + assert "winml hub --model" in result.output def test_hub_saves_json_file(runner, patched_catalog, tmp_path): @@ -192,10 +202,7 @@ def test_hub_shows_accuracy_pass(runner, patched_catalog, tmp_path): assert result.exit_code == 0 data = json.loads(out.read_text()) verdicts = { - ep: info["verdict"] - for m in data - if m.get("accuracy") - for ep, info in m["accuracy"].items() + ep: info["verdict"] for m in data if m.get("accuracy") for ep, info in m["accuracy"].items() } assert "PASS" in verdicts.values() @@ -271,9 +278,7 @@ def test_hub_model_detail_shows_accuracy(runner, patched_catalog, tmp_path): def test_hub_model_detail_regression(runner, patched_catalog): - result = runner.invoke( - hub, ["--model", "facebook/detr-resnet-50", "--output", "/dev/null"] - ) + result = runner.invoke(hub, ["--model", "facebook/detr-resnet-50", "--output", "/dev/null"]) assert result.exit_code == 0 assert "REGRESSION" in result.output assert "-36.84%" in result.output diff --git a/tests/unit/commands/test_perf_module.py b/tests/unit/commands/test_perf_module.py index 2dc4619d0..46ae2c2c3 100644 --- a/tests/unit/commands/test_perf_module.py +++ b/tests/unit/commands/test_perf_module.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Tests for wmk perf --module flag.""" +"""Tests for winml perf --module flag.""" from __future__ import annotations @@ -13,10 +13,10 @@ class TestPerfModuleFlag: - """Tests for --module flag on wmk perf.""" + """Tests for --module flag on winml perf.""" def test_module_flag_in_help(self) -> None: - """Verify --module flag appears in wmk perf --help.""" + """Verify --module flag appears in winml perf --help.""" runner = CliRunner() result = runner.invoke(main, ["perf", "--help"]) assert result.exit_code == 0 diff --git a/tests/unit/config/test_build.py b/tests/unit/config/test_build.py index a856b4a78..4e03eae7c 100644 --- a/tests/unit/config/test_build.py +++ b/tests/unit/config/test_build.py @@ -1927,7 +1927,7 @@ def test_explicit_precision_triggers_resolve_device(self) -> None: # ============================================================================= -# TestDevicePrecisionCli - CLI tests for --device/--precision on wmk config +# TestDevicePrecisionCli - CLI tests for --device/--precision on winml config # ============================================================================= @@ -1964,7 +1964,7 @@ def _mock_deps( } def _invoke(self, tmp_path, extra_args: list[str] | None = None): - """Helper: invoke wmk config with standard mocks.""" + """Helper: invoke winml config with standard mocks.""" output_file = tmp_path / "result.json" args = ["-m", "bert-base-uncased", "-o", str(output_file)] if extra_args: @@ -2056,7 +2056,7 @@ def test_auto_precision_int8_triggers_detection(self, tmp_path) -> None: class TestConfigOnnxAutoDetect: - """Test ONNX file auto-detection in wmk config command.""" + """Test ONNX file auto-detection in winml config command.""" def test_config_auto_detect_onnx(self, tmp_path) -> None: """When -m points to an existing .onnx file, generates config with export=None.""" diff --git a/tests/unit/config/test_build_onnx.py b/tests/unit/config/test_build_onnx.py index 4374a6a31..b37a70ab8 100644 --- a/tests/unit/config/test_build_onnx.py +++ b/tests/unit/config/test_build_onnx.py @@ -88,7 +88,7 @@ def mock_export_config() -> WinMLExportConfig: class TestConfigOnnxAutoDetect: - """Test ONNX file auto-detection in wmk config command.""" + """Test ONNX file auto-detection in winml config command.""" def test_config_auto_detect_onnx(self, tmp_path) -> None: """When -m points to an existing .onnx file, generates config with export=None.""" diff --git a/tests/unit/optracing/test_perf_optracing_cli.py b/tests/unit/optracing/test_perf_optracing_cli.py index 0e532b7b5..3abbd0288 100644 --- a/tests/unit/optracing/test_perf_optracing_cli.py +++ b/tests/unit/optracing/test_perf_optracing_cli.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Tests for the --op-tracing CLI option on wmk perf.""" +"""Tests for the --op-tracing CLI option on winml perf.""" from __future__ import annotations From b1e3950bbea1c325095c327cd735626ea2371911 Mon Sep 17 00:00:00 2001 From: Zhipeng Wang Date: Wed, 1 Apr 2026 13:47:22 +0800 Subject: [PATCH 2/2] fix: rename remaining WMK references (case-insensitive) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - WMK variable in run_eval.py → WINML_CLI - WMK_CACHE_DIR env var → WINML_CACHE_DIR (cache/path.py + tests) - WMK table headers in accuracy.py → WinML - WMK_blip comment in blip.py → WinML blip --- scripts/e2e_eval/run_eval.py | 14 +++++++------- scripts/e2e_eval/utils/accuracy.py | 6 +++--- src/winml/modelkit/cache/path.py | 4 ++-- src/winml/modelkit/commands/optimize.py | 10 +++++----- src/winml/modelkit/models/hf/blip.py | 2 +- tests/unit/cache/test_path.py | 10 +++++----- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/scripts/e2e_eval/run_eval.py b/scripts/e2e_eval/run_eval.py index fa532e393..ea10c2b3f 100644 --- a/scripts/e2e_eval/run_eval.py +++ b/scripts/e2e_eval/run_eval.py @@ -71,7 +71,7 @@ # Constants # --------------------------------------------------------------------------- -WMK = [sys.executable, "-m", "winml.modelkit.cli"] +WINML_CLI = [sys.executable, "-m", "winml.modelkit.cli"] BASELINE_SCRIPT = Path(__file__).parent / "run_pytorch_baseline.py" BASELINE_CACHE_PATH = Path(__file__).parent / "cache" / "baseline_cache.json" EVAL_DATASETS_CACHE = Path.home() / ".cache" / "winml" / "eval_datasets" @@ -330,7 +330,7 @@ def _run_build( # Step 1: winml config config_args = [ - *WMK, + *WINML_CLI, "config", "-m", entry.hf_id, @@ -355,7 +355,7 @@ def _run_build( # Step 2: winml build --use-cache build_args = [ - *WMK, + *WINML_CLI, "build", "-c", str(config_path), @@ -433,10 +433,10 @@ def run_model( (skips internal build). Otherwise falls back to HF model ID. """ if onnx_path: - args = [*WMK, "perf", "-m", onnx_path, "--device", device] + args = [*WINML_CLI, "perf", "-m", onnx_path, "--device", device] else: args = [ - *WMK, + *WINML_CLI, "perf", "-m", entry.hf_id, @@ -543,7 +543,7 @@ def _run_winml_eval( eval_device = "npu" if device == "auto" else device if onnx_path: args = [ - *WMK, + *WINML_CLI, "eval", "-m", onnx_path, @@ -554,7 +554,7 @@ def _run_winml_eval( ] else: args = [ - *WMK, + *WINML_CLI, "eval", "-m", entry.hf_id, diff --git a/scripts/e2e_eval/utils/accuracy.py b/scripts/e2e_eval/utils/accuracy.py index 10a6b4c8b..f82baf153 100644 --- a/scripts/e2e_eval/utils/accuracy.py +++ b/scripts/e2e_eval/utils/accuracy.py @@ -57,7 +57,7 @@ def compute_delta( Returns (None, None) if either is missing or baseline value is zero. Note: For error-rate metrics (WER — lower is better) a positive delta - means the WMK pipeline is *worse*. The threshold in derive_verdict() + means the WinML pipeline is *worse*. The threshold in derive_verdict() uses abs(delta_relative) to handle both directions uniformly. """ if winml_metric is None or baseline_metric is None: @@ -242,7 +242,7 @@ def _pct(acc: dict) -> str: lines += ["", "## Accuracy Regressions", ""] if regressions: lines += [ - "| Model | Task | WMK | Baseline | Delta% |", + "| Model | Task | WinML | Baseline | Delta% |", "|-------|------|-----|----------|--------|", ] for r in regressions: @@ -263,7 +263,7 @@ def _pct(acc: dict) -> str: lines += ["", "## At-Risk Models", ""] if at_risk: lines += [ - "| Model | Task | WMK | Baseline | Delta% |", + "| Model | Task | WinML | Baseline | Delta% |", "|-------|------|-----|----------|--------|", ] for r in at_risk: diff --git a/src/winml/modelkit/cache/path.py b/src/winml/modelkit/cache/path.py index ba28ba61d..5ac7ca76d 100644 --- a/src/winml/modelkit/cache/path.py +++ b/src/winml/modelkit/cache/path.py @@ -32,7 +32,7 @@ def get_cache_dir(override: str | Path | None = None) -> Path: Priority: 1. ``override`` parameter (caller-specified) - 2. ``WMK_CACHE_DIR`` environment variable + 2. ``WINML_CACHE_DIR`` environment variable 3. ``~/.cache/winml/`` Args: @@ -43,7 +43,7 @@ def get_cache_dir(override: str | Path | None = None) -> Path: """ if override is not None: return Path(override) - env_dir = os.environ.get("WMK_CACHE_DIR") + env_dir = os.environ.get("WINML_CACHE_DIR") if env_dir: return Path(env_dir) return Path.home() / ".cache" / _DEFAULT_CACHE_DIR_NAME diff --git a/src/winml/modelkit/commands/optimize.py b/src/winml/modelkit/commands/optimize.py index 96fdf4c8d..c1f704da7 100644 --- a/src/winml/modelkit/commands/optimize.py +++ b/src/winml/modelkit/commands/optimize.py @@ -260,9 +260,9 @@ def optimize( # Pattern rewrite flags follow: --enable-{source-slug}-{target-slug} # Run --list-rewrites to discover all available flag names. - # Example (all GELU variants → single Gelu node): + # Example (all GELU variants -> single Gelu node): winml optimize -m model.onnx -o out.onnx --enable-gelu-singlegelu - # Example (only Gelu1 variant → single Gelu node): + # Example (only Gelu1 variant -> single Gelu node): winml optimize -m model.onnx -o out.onnx --enable-gelu1-singlegelu # Basic optimization with GELU fusion @@ -358,7 +358,7 @@ def optimize( console.print("[yellow]No rewrite capabilities discovered.[/yellow]") return - console.print("\n[bold]Rewrite capabilities (source → target):[/bold]\n") + console.print("\n[bold]Rewrite capabilities (source -> target):[/bold]\n") for group in REWRITE_GROUPS: rule_file = Path(group.rule_file).name is_multi = len(group.sources) > 1 @@ -440,7 +440,7 @@ def optimize( if all_errors: console.print("[bold red]Configuration validation errors:[/bold red]") for error in all_errors: - console.print(f" [red]• {error}[/red]") + console.print(f" [red]* {error}[/red]") sys.exit(1) # Convert capability names (kebab-case) to python names (snake_case) for optimizer @@ -471,7 +471,7 @@ def optimize( reduction = (1 - optimized_nodes / original_nodes) * 100 if original_nodes else 0 console.print(f"\n[bold green]Success![/bold green] Model optimized: {output}") - node_info = f"Nodes: {original_nodes} → {optimized_nodes} ({reduction:.1f}% reduction)" + node_info = f"Nodes: {original_nodes} -> {optimized_nodes} ({reduction:.1f}% reduction)" console.print(f"[dim]{node_info}[/dim]") except Exception as e: diff --git a/src/winml/modelkit/models/hf/blip.py b/src/winml/modelkit/models/hf/blip.py index 6dfd30c4d..b5cc48062 100644 --- a/src/winml/modelkit/models/hf/blip.py +++ b/src/winml/modelkit/models/hf/blip.py @@ -10,7 +10,7 @@ - Vision: ViT-B/16, 384x384 input, 577 sequence (1 CLS + 576 patches) - Text: BERT-based decoder, vocab 30524, max 512 positions -Optimization settings match WMK_blip production pipeline: +Optimization settings match WinML blip production pipeline: - GELU fusion enabled - LayerNorm fusion enabled - MatMul+Add fusion enabled (GEMM) diff --git a/tests/unit/cache/test_path.py b/tests/unit/cache/test_path.py index 9e2edf382..1f83b32b2 100644 --- a/tests/unit/cache/test_path.py +++ b/tests/unit/cache/test_path.py @@ -30,17 +30,17 @@ class TestGetCacheDir: """Test cache directory resolution.""" def test_default_is_home_cache_winml(self, monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.delenv("WMK_CACHE_DIR", raising=False) + monkeypatch.delenv("WINML_CACHE_DIR", raising=False) result = get_cache_dir() assert result == Path.home() / ".cache" / "winml" def test_env_var_override(self, monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setenv("WMK_CACHE_DIR", "/custom/cache") + monkeypatch.setenv("WINML_CACHE_DIR", "/custom/cache") result = get_cache_dir() assert result == Path("/custom/cache") def test_explicit_override_takes_priority(self, monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setenv("WMK_CACHE_DIR", "/env/cache") + monkeypatch.setenv("WINML_CACHE_DIR", "/env/cache") result = get_cache_dir(override="/explicit/cache") assert result == Path("/explicit/cache") @@ -49,7 +49,7 @@ def test_explicit_override_as_path(self) -> None: assert result == Path("/some/path") def test_none_override_falls_through(self, monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.delenv("WMK_CACHE_DIR", raising=False) + monkeypatch.delenv("WINML_CACHE_DIR", raising=False) result = get_cache_dir(override=None) assert result == Path.home() / ".cache" / "winml" @@ -67,7 +67,7 @@ def test_appends_artifacts(self) -> None: assert result == Path("/cache/root/artifacts") def test_none_resolves_default(self, monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.delenv("WMK_CACHE_DIR", raising=False) + monkeypatch.delenv("WINML_CACHE_DIR", raising=False) result = get_artifacts_dir() assert result == Path.home() / ".cache" / "winml" / "artifacts"