From 18a841b0ce55dc980cc0e2622a90bb208722fc22 Mon Sep 17 00:00:00 2001
From: Zhipeng Wang <zhiwang@microsoft.com>
Date: Wed, 1 Apr 2026 12:29:41 +0800
Subject: [PATCH 1/2] rename: CLI command from wmk to winml (#203)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename the CLI entry point and all references from `wmk` to `winml`:
- pyproject.toml entry point
- cli.py prog_name and docstrings
- All 12 command files (export, build, config, analyze, quantize, optimize, compile, inspect, sys, eval, perf, hub)
- Source code: error messages, validator hints, cache docstring, temp dir prefix, venv name
- Tests: 13 test files
- Docs: README.md, e2e_eval scripts and JSON config (wmk_metric_key → winml_metric_key)
- Deleted stale egg-info
---
 README.md                                     |   8 +-
 pyproject.toml                                |   2 +-
 scripts/e2e_eval/README.md                    |   4 +-
 scripts/e2e_eval/datasets/build_ai4privacy.py |  18 +-
 .../e2e_eval/datasets/build_indonlu_posp.py   |   8 +-
 .../datasets/build_pubtables1m_detection.py   |  69 +++---
 .../datasets/build_pubtables1m_structure.py   |  69 +++---
 scripts/e2e_eval/run_eval.py                  | 205 +++++++++++-------
 scripts/e2e_eval/run_pytorch_baseline.py      |   4 +-
 .../e2e_eval/testsets/models_with_acc.json    |  44 ++--
 scripts/e2e_eval/utils/accuracy.py            |  25 ++-
 scripts/e2e_eval/utils/dataset_config.py      |   9 +-
 .../constant_folding_validator.py             |   2 +-
 .../pattern_matching_validator.py             |   4 +-
 .../qdq_validation_validator.py               |   2 +-
 .../shape_inference_validator.py              |   2 +-
 src/winml/modelkit/build/hf.py                |   4 +-
 src/winml/modelkit/cache/__init__.py          |   2 +-
 src/winml/modelkit/cli.py                     |  10 +-
 src/winml/modelkit/commands/analyze.py        |  24 +-
 src/winml/modelkit/commands/build.py          |  32 +--
 src/winml/modelkit/commands/compile.py        |  32 ++-
 src/winml/modelkit/commands/config.py         |  54 ++---
 src/winml/modelkit/commands/eval.py           |  12 +-
 src/winml/modelkit/commands/export.py         |  28 +--
 src/winml/modelkit/commands/hub.py            |  36 ++-
 src/winml/modelkit/commands/inspect.py        |  18 +-
 src/winml/modelkit/commands/optimize.py       |  24 +-
 src/winml/modelkit/commands/perf.py           |  18 +-
 src/winml/modelkit/commands/quantize.py       |  30 ++-
 src/winml/modelkit/commands/sys.py            |  39 ++--
 src/winml/modelkit/models/auto.py             |   4 +-
 .../session/qairt/compile_qairt_bin.py        |   2 +-
 .../modelkit/session/qairt/qairt_session.py   |   6 +-
 tests/e2e/test_build_e2e.py                   |  35 +--
 tests/integration/test_module_build.py        |   2 +-
 tests/integration/test_quantization.py        |   8 +-
 .../core/model_validators/test_validators.py  |   4 +-
 tests/unit/cache/test_model.py                |   2 +-
 tests/unit/commands/test_build_module.py      |   2 +-
 tests/unit/commands/test_cli.py               |   4 +-
 tests/unit/commands/test_hub.py               |  39 ++--
 tests/unit/commands/test_perf_module.py       |   6 +-
 tests/unit/config/test_build.py               |   6 +-
 tests/unit/config/test_build_onnx.py          |   2 +-
 .../unit/optracing/test_perf_optracing_cli.py |   2 +-
 46 files changed, 513 insertions(+), 449 deletions(-)

diff --git a/README.md b/README.md
index d86dc8699..ee64a0de2 100644
--- a/README.md
+++ b/README.md
@@ -32,17 +32,17 @@ uv sync
 
 ### Usage
 
-ModelKit provides a CLI tool `wmk`:
+ModelKit provides a CLI tool `winml`:
 
 ```bash
 # Export a Hugging Face model to ONNX
-uv run wmk export --model microsoft/resnet-50 --output ./output
+uv run winml export --model microsoft/resnet-50 --output ./output
 
 # Analyze an ONNX model
-uv run wmk analyze --model ./output/model.onnx
+uv run winml analyze --model ./output/model.onnx
 
 # Quantize an ONNX model
-uv run wmk quantize --model ./output/model.onnx
+uv run winml quantize --model ./output/model.onnx
 ```
 
 ## Contributions and Feedback
diff --git a/pyproject.toml b/pyproject.toml
index 77b82bd6d..4f5d301e6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ urls.Repository = "https://github.com/microsoft/ModelKit.git"
 # but package-dir expects namespace names (winml.modelkit). These don't auto-connect.
 # For flat layout (modelkit/) with namespace imports (winml.modelkit), explicit listing
 # ensures the namespace prefix is correctly applied to all subpackages.
-scripts.wmk = "winml.modelkit.cli:main"
+scripts.winml = "winml.modelkit.cli:main"
 
 [dependency-groups]
 dev = [
diff --git a/scripts/e2e_eval/README.md b/scripts/e2e_eval/README.md
index efc475844..911d5320b 100644
--- a/scripts/e2e_eval/README.md
+++ b/scripts/e2e_eval/README.md
@@ -1,6 +1,6 @@
 # E2E Evaluation Scripts
 
-Batch-evaluate ModelKit's `wmk perf` pipeline against a curated set of HuggingFace models.
+Batch-evaluate ModelKit's `winml perf` pipeline against a curated set of HuggingFace models.
 Captures pass/fail, failure classification, and generates interactive reports.
 
 ## Quick Start
@@ -48,7 +48,7 @@ uv run python scripts/e2e_eval/build_registry.py --dry-run
 
 ### `run_eval.py` — Run Evaluation
 
-Executes `wmk perf` for each model in a subprocess, classifies failures, and
+Executes `winml perf` for each model in a subprocess, classifies failures, and
 generates reports (JSON, Markdown, HTML).
 
 ```bash
diff --git a/scripts/e2e_eval/datasets/build_ai4privacy.py b/scripts/e2e_eval/datasets/build_ai4privacy.py
index 54b22305f..daffd5a72 100644
--- a/scripts/e2e_eval/datasets/build_ai4privacy.py
+++ b/scripts/e2e_eval/datasets/build_ai4privacy.py
@@ -1,3 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
 """Build a local HF-compatible dataset for Isotonic/distilbert_finetuned_ai4privacy_v2.
 
 The ``ai4privacy/pii-masking-200k`` dataset uses string BIO labels
@@ -13,6 +18,7 @@
 import argparse
 from pathlib import Path
 
+
 _NUM_SAMPLES = 10000
 
 
@@ -37,10 +43,12 @@ def build_dataset(output_dir: Path) -> None:
     tokens_list = [s["mbert_text_tokens"] for s in samples]
     tags_list = [[label2id[lbl] for lbl in s["mbert_bio_labels"]] for s in samples]
 
-    features = Features({
-        "tokens": Sequence(Value("string")),
-        "ner_tags": Sequence(ClassLabel(names=all_labels)),
-    })
+    features = Features(
+        {
+            "tokens": Sequence(Value("string")),
+            "ner_tags": Sequence(ClassLabel(names=all_labels)),
+        }
+    )
     dataset = Dataset.from_dict(
         {"tokens": tokens_list, "ner_tags": tags_list},
         features=features,
@@ -51,7 +59,7 @@ def build_dataset(output_dir: Path) -> None:
     print("Done.")
 
 
-def main() -> None:
+def main() -> None:  # noqa: D103
     parser = argparse.ArgumentParser(description="Build ai4privacy PII dataset")
     parser.add_argument("--output", type=Path, required=True, help="Output directory")
     args = parser.parse_args()
diff --git a/scripts/e2e_eval/datasets/build_indonlu_posp.py b/scripts/e2e_eval/datasets/build_indonlu_posp.py
index 31efacfe7..8896bbb86 100644
--- a/scripts/e2e_eval/datasets/build_indonlu_posp.py
+++ b/scripts/e2e_eval/datasets/build_indonlu_posp.py
@@ -1,3 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
 """Build a local HF-compatible dataset for indonlp/indonlu (posp subset).
 
 The upstream ``indonlp/indonlu`` dataset uses a legacy loading script
@@ -13,6 +18,7 @@
 import argparse
 from pathlib import Path
 
+
 _PARQUET_REVISION = "refs/convert/parquet"
 _PARQUET_PATH = "posp/validation/0000.parquet"
 
@@ -38,7 +44,7 @@ def build_dataset(output_dir: Path) -> None:
     print("Done.")
 
 
-def main() -> None:
+def main() -> None:  # noqa: D103
     parser = argparse.ArgumentParser(description="Build indonlu posp dataset")
     parser.add_argument("--output", type=Path, required=True, help="Output directory")
     args = parser.parse_args()
diff --git a/scripts/e2e_eval/datasets/build_pubtables1m_detection.py b/scripts/e2e_eval/datasets/build_pubtables1m_detection.py
index 3b2b41140..0fad24478 100644
--- a/scripts/e2e_eval/datasets/build_pubtables1m_detection.py
+++ b/scripts/e2e_eval/datasets/build_pubtables1m_detection.py
@@ -1,3 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
 """Build a local HF-compatible object-detection dataset from PubTables-1M.
 
 Downloads the validation split annotations (5 MB) and images (7 GB) from
@@ -18,9 +23,10 @@
 import argparse
 import random
 import tarfile
-import xml.etree.ElementTree as ET
+import xml.etree.ElementTree as ET  # noqa: N817
 from pathlib import Path
 
+
 # Labels matching microsoft/table-transformer-detection config
 LABEL_NAMES = ["table", "table rotated"]
 
@@ -30,7 +36,7 @@
 
 def _parse_voc_xml(xml_bytes: bytes) -> dict | None:
     """Parse a PASCAL VOC XML annotation and return structured data."""
-    root = ET.fromstring(xml_bytes)
+    root = ET.fromstring(xml_bytes)  # noqa: S314
     filename = root.findtext("filename")
     size_el = root.find("size")
     if size_el is None or filename is None:
@@ -147,38 +153,43 @@ def build_dataset(output_dir: Path) -> None:
     print(f"  Extracted {len(images_by_name)} images")
 
     # Step 6: Build dataset rows (skip any missing images)
-    from datasets import ClassLabel, Dataset, Features, Image as HFImage, Sequence, Value
+    from datasets import ClassLabel, Dataset, Features, Sequence, Value
+    from datasets import Image as HFImage
 
     rows: list[dict] = []
     for idx, ann in enumerate(sampled):
         img = images_by_name.get(ann["filename"])
         if img is None:
             continue
-        rows.append({
-            "image_id": idx,
-            "image": img,
-            "width": ann["width"],
-            "height": ann["height"],
+        rows.append(
+            {
+                "image_id": idx,
+                "image": img,
+                "width": ann["width"],
+                "height": ann["height"],
+                "objects": {
+                    "bbox_id": ann["bbox_id"],
+                    "category": ann["category"],
+                    "bbox": ann["bbox"],
+                    "area": ann["area"],
+                },
+            }
+        )
+
+    features = Features(
+        {
+            "image_id": Value("int64"),
+            "image": HFImage(),
+            "width": Value("int64"),
+            "height": Value("int64"),
             "objects": {
-                "bbox_id": ann["bbox_id"],
-                "category": ann["category"],
-                "bbox": ann["bbox"],
-                "area": ann["area"],
+                "bbox_id": Sequence(Value("int64")),
+                "category": Sequence(ClassLabel(names=LABEL_NAMES)),
+                "bbox": Sequence(Sequence(Value("float64"), length=4)),
+                "area": Sequence(Value("float64")),
             },
-        })
-
-    features = Features({
-        "image_id": Value("int64"),
-        "image": HFImage(),
-        "width": Value("int64"),
-        "height": Value("int64"),
-        "objects": {
-            "bbox_id": Sequence(Value("int64")),
-            "category": Sequence(ClassLabel(names=LABEL_NAMES)),
-            "bbox": Sequence(Sequence(Value("float64"), length=4)),
-            "area": Sequence(Value("float64")),
-        },
-    })
+        }
+    )
 
     dataset = Dataset.from_list(rows, features=features)
     print(f"Saving {len(dataset)} samples to {output_dir} ...")
@@ -187,10 +198,8 @@ def build_dataset(output_dir: Path) -> None:
     print("Done.")
 
 
-def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="Build PubTables-1M detection dataset"
-    )
+def main() -> None:  # noqa: D103
+    parser = argparse.ArgumentParser(description="Build PubTables-1M detection dataset")
     parser.add_argument("--output", type=Path, required=True, help="Output directory")
     args = parser.parse_args()
     build_dataset(args.output)
diff --git a/scripts/e2e_eval/datasets/build_pubtables1m_structure.py b/scripts/e2e_eval/datasets/build_pubtables1m_structure.py
index f967b45e7..e59618267 100644
--- a/scripts/e2e_eval/datasets/build_pubtables1m_structure.py
+++ b/scripts/e2e_eval/datasets/build_pubtables1m_structure.py
@@ -1,3 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
 """Build a local HF-compatible object-detection dataset from PubTables-1M (Structure).
 
 Downloads the validation split annotations (29 MB) and images (2.6 GB) from
@@ -21,9 +26,10 @@
 import argparse
 import random
 import tarfile
-import xml.etree.ElementTree as ET
+import xml.etree.ElementTree as ET  # noqa: N817
 from pathlib import Path
 
+
 # Labels matching microsoft/table-transformer-structure-recognition config
 LABEL_NAMES = [
     "table",
@@ -40,7 +46,7 @@
 
 def _parse_voc_xml(xml_bytes: bytes, label2id: dict[str, int]) -> dict | None:
     """Parse a PASCAL VOC XML annotation and return structured data."""
-    root = ET.fromstring(xml_bytes)
+    root = ET.fromstring(xml_bytes)  # noqa: S314
     filename = root.findtext("filename")
     size_el = root.find("size")
     if size_el is None or filename is None:
@@ -155,38 +161,43 @@ def build_dataset(output_dir: Path) -> None:
     print(f"  Extracted {len(images_by_name)} images")
 
     # Step 6: Build dataset rows (skip any missing images)
-    from datasets import ClassLabel, Dataset, Features, Image as HFImage, Sequence, Value
+    from datasets import ClassLabel, Dataset, Features, Sequence, Value
+    from datasets import Image as HFImage
 
     rows: list[dict] = []
     for idx, ann in enumerate(sampled):
         img = images_by_name.get(ann["filename"])
         if img is None:
             continue
-        rows.append({
-            "image_id": idx,
-            "image": img,
-            "width": ann["width"],
-            "height": ann["height"],
+        rows.append(
+            {
+                "image_id": idx,
+                "image": img,
+                "width": ann["width"],
+                "height": ann["height"],
+                "objects": {
+                    "bbox_id": ann["bbox_id"],
+                    "category": ann["category"],
+                    "bbox": ann["bbox"],
+                    "area": ann["area"],
+                },
+            }
+        )
+
+    features = Features(
+        {
+            "image_id": Value("int64"),
+            "image": HFImage(),
+            "width": Value("int64"),
+            "height": Value("int64"),
             "objects": {
-                "bbox_id": ann["bbox_id"],
-                "category": ann["category"],
-                "bbox": ann["bbox"],
-                "area": ann["area"],
+                "bbox_id": Sequence(Value("int64")),
+                "category": Sequence(ClassLabel(names=LABEL_NAMES)),
+                "bbox": Sequence(Sequence(Value("float64"), length=4)),
+                "area": Sequence(Value("float64")),
             },
-        })
-
-    features = Features({
-        "image_id": Value("int64"),
-        "image": HFImage(),
-        "width": Value("int64"),
-        "height": Value("int64"),
-        "objects": {
-            "bbox_id": Sequence(Value("int64")),
-            "category": Sequence(ClassLabel(names=LABEL_NAMES)),
-            "bbox": Sequence(Sequence(Value("float64"), length=4)),
-            "area": Sequence(Value("float64")),
-        },
-    })
+        }
+    )
 
     dataset = Dataset.from_list(rows, features=features)
     print(f"Saving {len(dataset)} samples to {output_dir} ...")
@@ -195,10 +206,8 @@ def build_dataset(output_dir: Path) -> None:
     print("Done.")
 
 
-def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="Build PubTables-1M structure recognition dataset"
-    )
+def main() -> None:  # noqa: D103
+    parser = argparse.ArgumentParser(description="Build PubTables-1M structure recognition dataset")
     parser.add_argument("--output", type=Path, required=True, help="Output directory")
     args = parser.parse_args()
     build_dataset(args.output)
diff --git a/scripts/e2e_eval/run_eval.py b/scripts/e2e_eval/run_eval.py
index 1aa6b682f..fa532e393 100644
--- a/scripts/e2e_eval/run_eval.py
+++ b/scripts/e2e_eval/run_eval.py
@@ -1,11 +1,16 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
 """E2E evaluation runner — unified perf + accuracy.
 
-Batch-runs wmk perf (and optionally wmk eval + pytorch baseline) for models
+Batch-runs winml perf (and optionally winml eval + pytorch baseline) for models
 in a JSON registry, writes unified eval_result.json per model, and generates
 combined reports.
 
-Strategy B cache sharing: wmk perf runs first (build + benchmark, populates
-model cache). wmk eval then reuses the cache — no redundant build step.
+Strategy B cache sharing: winml perf runs first (build + benchmark, populates
+model cache). winml eval then reuses the cache — no redundant build step.
 
 Usage:
     # Perf only (default)
@@ -14,7 +19,7 @@
     # Both perf and accuracy in one batch
     python scripts/e2e_eval/run_eval.py --eval-type both --priority P0
 
-    # Accuracy only (wmk perf is skipped; wmk eval will build the model if cache is missing)
+    # Accuracy only (winml perf is skipped; winml eval will build the model if cache is missing)
     python scripts/e2e_eval/run_eval.py --eval-type accuracy --hf-model microsoft/resnet-50
 
     # Single model
@@ -102,10 +107,10 @@ def _get_timeout_skip_reason(hf_id: str, task: str) -> str:
 
 # Patterns that indicate the disk is full (cross-platform).
 _NO_SPACE_PATTERNS = (
-    "no space left on device",          # Linux/macOS OSError
-    "oserror: [errno 28]",              # Python errno string
+    "no space left on device",  # Linux/macOS OSError
+    "oserror: [errno 28]",  # Python errno string
     "there is not enough space on the disk",  # Windows
-    "winerror 112",                     # Windows disk-full error code
+    "winerror 112",  # Windows disk-full error code
     "disk full",
 )
 
@@ -167,7 +172,8 @@ def _kill_process_tree(pid: int) -> None:
         # Fallback: taskkill on Windows, killpg on Unix
         if platform.system() == "Windows":
             subprocess.run(  # noqa: S603
-                ["taskkill", "/F", "/T", "/PID", str(pid)], capture_output=True
+                ["taskkill", "/F", "/T", "/PID", str(pid)],  # noqa: S607
+                capture_output=True,
             )
         else:
             import signal
@@ -309,22 +315,31 @@ def _watchdog() -> None:
 
 
 def _run_build(
-    entry: ModelEntry, device: str, precision: str, timeout: int, model_dir: Path,
+    entry: ModelEntry,
+    device: str,
+    precision: str,
+    timeout: int,
+    model_dir: Path,
 ) -> dict:
-    """Run wmk config + wmk build for one model. Returns build result dict.
+    """Run winml config + winml build for one model. Returns build result dict.
 
-    Flow: wmk config → config.json → wmk build --use-cache → ONNX path.
+    Flow: winml config → config.json → winml build --use-cache → ONNX path.
     """
     config_path = model_dir / "build_config.json"
     model_dir.mkdir(parents=True, exist_ok=True)
 
-    # Step 1: wmk config
+    # Step 1: winml config
     config_args = [
-        *WMK, "config",
-        "-m", entry.hf_id,
-        "--device", device,
-        "--precision", precision,
-        "-o", str(config_path),
+        *WMK,
+        "config",
+        "-m",
+        entry.hf_id,
+        "--device",
+        device,
+        "--precision",
+        precision,
+        "-o",
+        str(config_path),
     ]
     if entry.task:
         config_args += ["--task", entry.task]
@@ -338,11 +353,14 @@ def _run_build(
             "proc": config_proc,
         }
 
-    # Step 2: wmk build --use-cache
+    # Step 2: winml build --use-cache
     build_args = [
-        *WMK, "build",
-        "-c", str(config_path),
-        "-m", entry.hf_id,
+        *WMK,
+        "build",
+        "-c",
+        str(config_path),
+        "-m",
+        entry.hf_id,
         "--use-cache",
     ]
 
@@ -356,7 +374,7 @@ def _run_build(
         }
 
     # Extract ONNX path from build output
-    # wmk build prints "Final artifact: <path>" in stderr
+    # winml build prints "Final artifact: <path>" in stderr
     onnx_path = None
     for line in build_proc["stderr"].splitlines():
         if "Final artifact:" in line:
@@ -404,9 +422,12 @@ def _find_cached_model(hf_id: str, build_proc: dict) -> str | None:
 
 
 def run_model(
-    entry: ModelEntry, device: str, timeout: int, onnx_path: str | None = None,
+    entry: ModelEntry,
+    device: str,
+    timeout: int,
+    onnx_path: str | None = None,
 ) -> dict:
-    """Execute wmk perf for one model. Returns raw subprocess result dict.
+    """Execute winml perf for one model. Returns raw subprocess result dict.
 
     When onnx_path is provided, benchmarks the pre-built ONNX directly
     (skips internal build). Otherwise falls back to HF model ID.
@@ -415,8 +436,14 @@ def run_model(
         args = [*WMK, "perf", "-m", onnx_path, "--device", device]
     else:
         args = [
-            *WMK, "perf", "-m", entry.hf_id,
-            "--device", device, "--precision", _DEFAULT_PRECISION,
+            *WMK,
+            "perf",
+            "-m",
+            entry.hf_id,
+            "--device",
+            device,
+            "--precision",
+            _DEFAULT_PRECISION,
         ]
         if entry.task:
             args += ["--task", entry.task]
@@ -458,10 +485,10 @@ def _parse_metric_from_stdout(stdout: str) -> dict | None:
     return None
 
 
-def _parse_metric_from_wmk_output(
+def _parse_metric_from_winml_output(
     output_path: Path, metric_name: str, num_samples: int
 ) -> dict | None:
-    """Parse wmk eval --output JSON file into the canonical metric dict."""
+    """Parse winml eval --output JSON file into the canonical metric dict."""
     try:
         data = json.loads(output_path.read_text(encoding="utf-8"))
     except (OSError, json.JSONDecodeError):
@@ -500,7 +527,7 @@ def _build_dataset(ds_config: dict, timeout: int) -> None:
                 safe_print(f"      {line}")
 
 
-def _run_wmk_eval(
+def _run_winml_eval(
     entry: ModelEntry,
     device: str,
     timeout: int,
@@ -508,27 +535,36 @@ def _run_wmk_eval(
     model_dir: Path,
     onnx_path: str | None = None,
 ) -> dict:
-    """Invoke wmk eval for one model. Returns process result + parsed metric."""
-    output_path = model_dir / "wmk_eval_output.json"
+    """Invoke winml eval for one model. Returns process result + parsed metric."""
+    output_path = model_dir / "winml_eval_output.json"
     model_dir.mkdir(parents=True, exist_ok=True)
 
-    # wmk eval requires explicit device ('cpu'/'gpu'/'npu'); 'auto' is not accepted
+    # winml eval requires explicit device ('cpu'/'gpu'/'npu'); 'auto' is not accepted
     eval_device = "npu" if device == "auto" else device
     if onnx_path:
         args = [
-            *WMK, "eval", "-m", onnx_path,
-            "--model-id", entry.hf_id,
-            "--device", eval_device,
+            *WMK,
+            "eval",
+            "-m",
+            onnx_path,
+            "--model-id",
+            entry.hf_id,
+            "--device",
+            eval_device,
         ]
     else:
         args = [
-            *WMK, "eval", "-m", entry.hf_id,
-            "--device", eval_device,
+            *WMK,
+            "eval",
+            "-m",
+            entry.hf_id,
+            "--device",
+            eval_device,
         ]
     if entry.task:
         args += ["--task", entry.task]
     # When ds_config is provided, pass explicit dataset args;
-    # otherwise wmk eval uses its built-in task defaults.
+    # otherwise winml eval uses its built-in task defaults.
     if ds_config.get("dataset"):
         args += ["--dataset", ds_config["dataset"]]
     if ds_config.get("split"):
@@ -550,14 +586,9 @@ def _run_wmk_eval(
 
     metric = None
     if proc["exit_code"] == 0 and output_path.exists():
-        wmk_key = (
-            ds_config.get("wmk_metric_key")
-            or ds_config.get("metric", "accuracy")
-        )
+        winml_key = ds_config.get("winml_metric_key") or ds_config.get("metric", "accuracy")
         num_samples = ds_config.get("num_samples", _DEFAULT_SAMPLES)
-        metric = _parse_metric_from_wmk_output(
-            output_path, wmk_key, num_samples
-        )
+        metric = _parse_metric_from_winml_output(output_path, winml_key, num_samples)
     status = "PASS" if (proc["exit_code"] == 0 and metric is not None) else "FAIL"
 
     return {
@@ -606,9 +637,7 @@ def _save_baseline_cache(cache: dict) -> None:
     )
 
 
-def _lookup_baseline_cache(
-    hf_id: str, task: str, ds_config: dict
-) -> dict | None:
+def _lookup_baseline_cache(hf_id: str, task: str, ds_config: dict) -> dict | None:
     """Return cached baseline result dict, or None if not cached."""
     cache = _load_baseline_cache()
     key = _baseline_cache_key(hf_id, task, ds_config)
@@ -631,9 +660,7 @@ def _shorten_command(cmd: str) -> str:
     return " ".join(shortened)
 
 
-def _store_baseline_cache(
-    hf_id: str, task: str, ds_config: dict, result: dict
-) -> None:
+def _store_baseline_cache(hf_id: str, task: str, ds_config: dict, result: dict) -> None:
     """Store a successful baseline result in cache."""
     if result.get("status") != "PASS":
         return
@@ -695,13 +722,13 @@ def _run_accuracy_phase(
     model_dir: Path,
     onnx_path: str | None = None,
 ) -> dict:
-    """Run wmk eval + pytorch baseline for one model. Returns accuracy sub-section dict."""
+    """Run winml eval + pytorch baseline for one model. Returns accuracy sub-section dict."""
     ds_config = get_dataset_config(entry.hf_id, entry.task) or {}
 
     # Build local dataset if a build_script is configured
     _build_dataset(ds_config, timeout)
 
-    wmk = _run_wmk_eval(entry, device, timeout, ds_config, model_dir, onnx_path)
+    winml = _run_winml_eval(entry, device, timeout, ds_config, model_dir, onnx_path)
 
     # Check baseline cache before running the expensive PyTorch baseline
     cached = _lookup_baseline_cache(entry.hf_id, entry.task, ds_config)
@@ -712,17 +739,17 @@ def _run_accuracy_phase(
         baseline = _run_pytorch_baseline(entry, device, timeout)
         _store_baseline_cache(entry.hf_id, entry.task, ds_config, baseline)
 
-    delta_abs, delta_rel = compute_delta(wmk["metric"], baseline["metric"])
+    delta_abs, delta_rel = compute_delta(winml["metric"], baseline["metric"])
 
     return {
         "skipped": False,
         "skip_reason": None,
-        "wmk_eval_status": wmk["status"],
-        "wmk_metric": wmk["metric"],
-        "wmk_eval_exit_code": wmk.get("exit_code"),
-        "wmk_eval_stdout": wmk.get("stdout", ""),
-        "wmk_eval_stderr": wmk.get("stderr", ""),
-        "elapsed_wmk": wmk["elapsed"],
+        "winml_eval_status": winml["status"],
+        "winml_metric": winml["metric"],
+        "winml_eval_exit_code": winml.get("exit_code"),
+        "winml_eval_stdout": winml.get("stdout", ""),
+        "winml_eval_stderr": winml.get("stderr", ""),
+        "elapsed_winml": winml["elapsed"],
         "pytorch_baseline_status": baseline["status"],
         "pytorch_baseline_metric": baseline["metric"],
         "pytorch_baseline_exit_code": baseline.get("exit_code"),
@@ -731,7 +758,7 @@ def _run_accuracy_phase(
         "delta_absolute": delta_abs,
         "delta_relative": delta_rel,
         "dataset_config": {k: v for k, v in ds_config.items() if k != "hf_token_required"},
-        "wmk_eval_command": wmk["command"],
+        "winml_eval_command": winml["command"],
         "pytorch_baseline_command": baseline["command"],
     }
 
@@ -758,8 +785,10 @@ def save_environment_info(path: Path) -> None:
     # Git HEAD commit info
     try:
         result = subprocess.run(
-            ["git", "log", "-1", "--format=%H%n%s%n%ai"],  # noqa: S603, S607
-            capture_output=True, text=True, timeout=5,
+            ["git", "log", "-1", "--format=%H%n%s%n%ai"],  # noqa: S607
+            capture_output=True,
+            text=True,
+            timeout=5,
         )
         if result.returncode == 0:
             lines = result.stdout.strip().splitlines()
@@ -807,6 +836,7 @@ def model_result_dir(output_dir: Path, hf_id: str, task: str = "") -> Path:
 
 
 def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
     parser = argparse.ArgumentParser(description="E2E evaluation runner — unified perf + accuracy")
     parser.add_argument(
         "--registry",
@@ -822,8 +852,8 @@ def parse_args() -> argparse.Namespace:
         default="perf",
         help=(
             "Evaluation signals to run (default: perf). "
-            "accuracy/both: wmk perf runs first to populate cache, "
-            "then wmk eval + pytorch baseline."
+            "accuracy/both: winml perf runs first to populate cache, "
+            "then winml eval + pytorch baseline."
         ),
     )
     parser.add_argument("--task", help="Filter by HF task")
@@ -878,6 +908,7 @@ def parse_args() -> argparse.Namespace:
 
 
 def main() -> None:
+    """Run E2E evaluation pipeline."""
     args = parse_args()
 
     # 1. Load registry
@@ -928,13 +959,7 @@ def main() -> None:
         safe_print(f"Registry: {len(entries)} models  (eval-type: {args.eval_type})")
         for e in entries:
             ds = get_dataset_config(e.hf_id, e.task)
-            skip_acc = (
-                ""
-                if args.eval_type == "perf"
-                else "  [task_default]"
-                if ds is None
-                else ""
-            )
+            skip_acc = "" if args.eval_type == "perf" else "  [task_default]" if ds is None else ""
             safe_print(
                 f"  [{e.priority}] {e.hf_id} / {e.task}  ({e.model_type}, {e.group}){skip_acc}"
             )
@@ -963,9 +988,9 @@ def main() -> None:
     save_environment_info(output_dir / "environment.json")
 
     # eval_types_run reflects what actually runs for each model:
-    #   "perf"     → wmk perf only
-    #   "accuracy" → wmk eval + pytorch baseline only (perf skipped)
-    #   "both"     → Strategy B: wmk perf first (populates cache), then wmk eval + baseline
+    #   "perf"     → winml perf only
+    #   "accuracy" → winml eval + pytorch baseline only (perf skipped)
+    #   "both"     → Strategy B: winml perf first (populates cache), then winml eval + baseline
     eval_types_run = (
         ["accuracy"]
         if args.eval_type == "accuracy"
@@ -1010,9 +1035,7 @@ def main() -> None:
         # Timeout skip list: skip known-timeout models and write a TIMEOUT result
         if (entry.hf_id, entry.task or "") in timeout_skip_set:
             reason = _get_timeout_skip_reason(entry.hf_id, entry.task or "")
-            safe_print(
-                f"\n[{i}/{len(entries)}] {label}  (SKIP - TIMEOUT: {reason})"
-            )
+            safe_print(f"\n[{i}/{len(entries)}] {label}  (SKIP - TIMEOUT: {reason})")
             model_dir.mkdir(parents=True, exist_ok=True)
             timeout_result = build_eval_result(
                 entry=entry,
@@ -1085,12 +1108,16 @@ def main() -> None:
             perf_proc: dict | None = None
             accuracy_result: dict | None = None
 
-            # Build phase: wmk config + wmk build → ONNX path
+            # Build phase: winml config + winml build → ONNX path
             # Build is shared by perf and eval, avoiding redundant builds.
             onnx_path: str | None = None
             if args.eval_type in ("perf", "both"):
                 build_result = _run_build(
-                    entry, args.device, _DEFAULT_PRECISION, args.timeout, model_dir,
+                    entry,
+                    args.device,
+                    _DEFAULT_PRECISION,
+                    args.timeout,
+                    model_dir,
                 )
                 if build_result["success"]:
                     onnx_path = build_result["onnx_path"]
@@ -1098,12 +1125,20 @@ def main() -> None:
             if args.eval_type == "accuracy":
                 # Accuracy-only: build + eval (no perf)
                 build_result = _run_build(
-                    entry, args.device, _DEFAULT_PRECISION, args.timeout, model_dir,
+                    entry,
+                    args.device,
+                    _DEFAULT_PRECISION,
+                    args.timeout,
+                    model_dir,
                 )
                 if build_result["success"]:
                     onnx_path = build_result["onnx_path"]
                     accuracy_result = _run_accuracy_phase(
-                        entry, args.device, args.timeout, model_dir, onnx_path,
+                        entry,
+                        args.device,
+                        args.timeout,
+                        model_dir,
+                        onnx_path,
                     )
                 else:
                     accuracy_result = {"skipped": True, "skip_reason": "build_failed"}
@@ -1124,7 +1159,11 @@ def main() -> None:
                         accuracy_result = {"skipped": True, "skip_reason": "perf_failed"}
                     else:
                         accuracy_result = _run_accuracy_phase(
-                            entry, args.device, args.timeout, model_dir, onnx_path,
+                            entry,
+                            args.device,
+                            args.timeout,
+                            model_dir,
+                            onnx_path,
                         )
                 else:
                     # Build failed
diff --git a/scripts/e2e_eval/run_pytorch_baseline.py b/scripts/e2e_eval/run_pytorch_baseline.py
index 68b2a10c6..e39bc44b1 100644
--- a/scripts/e2e_eval/run_pytorch_baseline.py
+++ b/scripts/e2e_eval/run_pytorch_baseline.py
@@ -6,11 +6,11 @@
 """PyTorch baseline inference for accuracy evaluation (Signal 2).
 
 Performs native PyTorch inference on a HuggingFace model using the same
-dataset configuration as ``wmk eval``, so both sides are always evaluated on
+dataset configuration as ``winml eval``, so both sides are always evaluated on
 identical inputs.
 
 Dataset config is read from ``utils/dataset_config.py`` — the authoritative
-source shared with run_eval.py.  When ``wmk eval`` is implemented inside
+source shared with run_eval.py.  When ``winml eval`` is implemented inside
 ModelKit, it should import from the same location.
 
 Output: prints a single JSON object as the last line on stdout:
diff --git a/scripts/e2e_eval/testsets/models_with_acc.json b/scripts/e2e_eval/testsets/models_with_acc.json
index b1dae87d1..109c6f578 100644
--- a/scripts/e2e_eval/testsets/models_with_acc.json
+++ b/scripts/e2e_eval/testsets/models_with_acc.json
@@ -155,7 +155,7 @@
       "path": "timm/mini-imagenet",
       "split": "test",
       "metric": "top1_accuracy",
-      "wmk_metric_key": "accuracy"
+      "winml_metric_key": "accuracy"
     }
   },
   {
@@ -168,7 +168,7 @@
       "path": "timm/mini-imagenet",
       "split": "test",
       "metric": "top1_accuracy",
-      "wmk_metric_key": "accuracy"
+      "winml_metric_key": "accuracy"
     }
   },
   {
@@ -181,7 +181,7 @@
       "build_script": "scripts/e2e_eval/datasets/build_fairface.py",
       "path": "~/.cache/winml/eval_datasets/build_fairface",
       "metric": "top1_accuracy",
-      "wmk_metric_key": "accuracy",
+      "winml_metric_key": "accuracy",
       "columns_mapping": {
         "label_column": "gender"
       }
@@ -197,7 +197,7 @@
       "path": "timm/mini-imagenet",
       "split": "test",
       "metric": "top1_accuracy",
-      "wmk_metric_key": "accuracy"
+      "winml_metric_key": "accuracy"
     }
   },
   {
@@ -210,7 +210,7 @@
       "path": "timm/mini-imagenet",
       "split": "test",
       "metric": "top1_accuracy",
-      "wmk_metric_key": "accuracy"
+      "winml_metric_key": "accuracy"
     }
   },
   {
@@ -239,7 +239,7 @@
       "path": "timm/mini-imagenet",
       "split": "test",
       "metric": "top1_accuracy",
-      "wmk_metric_key": "accuracy"
+      "winml_metric_key": "accuracy"
     }
   },
   {
@@ -252,7 +252,7 @@
       "build_script": "scripts/e2e_eval/datasets/build_fairface.py",
       "path": "~/.cache/winml/eval_datasets/build_fairface",
       "metric": "top1_accuracy",
-      "wmk_metric_key": "accuracy",
+      "winml_metric_key": "accuracy",
       "columns_mapping": {
         "label_column": "age"
       }
@@ -268,7 +268,7 @@
       "path": "detection-datasets/coco",
       "split": "val",
       "metric": "map",
-      "wmk_metric_key": "map",
+      "winml_metric_key": "map",
       "columns_mapping": {
         "annotation_column": "objects",
         "bbox_key": "bbox",
@@ -287,7 +287,7 @@
       "path": "detection-datasets/coco",
       "split": "val",
       "metric": "map",
-      "wmk_metric_key": "map",
+      "winml_metric_key": "map",
       "columns_mapping": {
         "annotation_column": "objects",
         "bbox_key": "bbox",
@@ -306,7 +306,7 @@
       "path": "detection-datasets/coco",
       "split": "val",
       "metric": "map",
-      "wmk_metric_key": "map",
+      "winml_metric_key": "map",
       "label_mapping_file": "scripts/e2e_eval/datasets/coco_to_rtdetr_labels.json",
       "columns_mapping": {
         "annotation_column": "objects",
@@ -326,7 +326,7 @@
       "path": "detection-datasets/coco",
       "split": "val",
       "metric": "map",
-      "wmk_metric_key": "map",
+      "winml_metric_key": "map",
       "label_mapping_file": "scripts/e2e_eval/datasets/coco_to_rtdetr_labels.json",
       "columns_mapping": {
         "annotation_column": "objects",
@@ -346,7 +346,7 @@
       "path": "detection-datasets/coco",
       "split": "val",
       "metric": "map",
-      "wmk_metric_key": "map",
+      "winml_metric_key": "map",
       "label_mapping_file": "scripts/e2e_eval/datasets/coco_to_rtdetr_labels.json",
       "columns_mapping": {
         "annotation_column": "objects",
@@ -366,7 +366,7 @@
       "path": "detection-datasets/fashionpedia",
       "split": "val",
       "metric": "map",
-      "wmk_metric_key": "map",
+      "winml_metric_key": "map",
       "columns_mapping": {
         "annotation_column": "objects",
         "bbox_key": "bbox",
@@ -386,7 +386,7 @@
       "path": "~/.cache/winml/eval_datasets/build_pubtables1m_detection",
       "split": "validation",
       "metric": "map",
-      "wmk_metric_key": "map",
+      "winml_metric_key": "map",
       "columns_mapping": {
         "annotation_column": "objects",
         "bbox_key": "bbox",
@@ -406,7 +406,7 @@
       "path": "~/.cache/winml/eval_datasets/build_pubtables1m_detection",
       "split": "validation",
       "metric": "map",
-      "wmk_metric_key": "map",
+      "winml_metric_key": "map",
       "label_mapping_file": "scripts/e2e_eval/datasets/pubtables_to_table_labels.json",
       "columns_mapping": {
         "annotation_column": "objects",
@@ -427,7 +427,7 @@
       "path": "~/.cache/winml/eval_datasets/build_pubtables1m_structure",
       "split": "validation",
       "metric": "map",
-      "wmk_metric_key": "map",
+      "winml_metric_key": "map",
       "columns_mapping": {
         "annotation_column": "objects",
         "bbox_key": "bbox",
@@ -446,7 +446,7 @@
       "path": "danjacobellis/scene_parse_150",
       "split": "validation",
       "metric": "mean_iou",
-      "wmk_metric_key": "mean_iou",
+      "winml_metric_key": "mean_iou",
       "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json",
       "columns_mapping": {
         "annotation_column": "annotation"
@@ -463,7 +463,7 @@
       "path": "danjacobellis/scene_parse_150",
       "split": "validation",
       "metric": "mean_iou",
-      "wmk_metric_key": "mean_iou",
+      "winml_metric_key": "mean_iou",
       "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json",
       "columns_mapping": {
         "annotation_column": "annotation"
@@ -480,7 +480,7 @@
       "path": "danjacobellis/scene_parse_150",
       "split": "validation",
       "metric": "mean_iou",
-      "wmk_metric_key": "mean_iou",
+      "winml_metric_key": "mean_iou",
       "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json",
       "columns_mapping": {
         "annotation_column": "annotation"
@@ -497,7 +497,7 @@
       "path": "danjacobellis/scene_parse_150",
       "split": "validation",
       "metric": "mean_iou",
-      "wmk_metric_key": "mean_iou",
+      "winml_metric_key": "mean_iou",
       "label_mapping_file": "scripts/e2e_eval/datasets/ade20k_gt_to_model_label.json",
       "columns_mapping": {
         "annotation_column": "annotation"
@@ -514,7 +514,7 @@
       "path": "mattmdjaga/human_parsing_dataset",
       "split": "train",
       "metric": "mean_iou",
-      "wmk_metric_key": "mean_iou",
+      "winml_metric_key": "mean_iou",
       "columns_mapping": {
         "annotation_column": "mask"
       }
@@ -531,7 +531,7 @@
       "split": "validation",
       "samples": 500,
       "metric": "mean_iou",
-      "wmk_metric_key": "mean_iou",
+      "winml_metric_key": "mean_iou",
       "label_mapping_file": "scripts/e2e_eval/datasets/cityscapes_label_to_train_id.json",
       "columns_mapping": {
         "annotation_column": "semantic_segmentation"
diff --git a/scripts/e2e_eval/utils/accuracy.py b/scripts/e2e_eval/utils/accuracy.py
index 78327b6c4..10a6b4c8b 100644
--- a/scripts/e2e_eval/utils/accuracy.py
+++ b/scripts/e2e_eval/utils/accuracy.py
@@ -1,3 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
 """Accuracy evaluation data structures, threshold logic, and summary generation.
 
 Mirrors the design of reporter.py (Signal 1):
@@ -28,7 +33,7 @@ class AccuracyVerdict(str, Enum):
     ACCURACY_PASS = "ACCURACY_PASS"  # noqa: S105  # |relative_delta| < 5%
     ACCURACY_AT_RISK = "ACCURACY_AT_RISK"  # 5% ≤ |relative_delta| < 10%
     ACCURACY_REGRESSION = "ACCURACY_REGRESSION"  # |relative_delta| ≥ 10%
-    EVAL_ERROR = "EVAL_ERROR"  # wmk eval or baseline subprocess failed
+    EVAL_ERROR = "EVAL_ERROR"  # winml eval or baseline subprocess failed
     SKIPPED = "SKIPPED"  # perf_failed
     DATASET_CONFIG_MISSING = "DATASET_CONFIG_MISSING"  # no dataset_config in registry
 
@@ -43,7 +48,7 @@ class AccuracyVerdict(str, Enum):
 
 
 def compute_delta(
-    wmk_metric: dict | None,
+    winml_metric: dict | None,
     baseline_metric: dict | None,
 ) -> tuple[float | None, float | None]:
     """Return (delta_absolute, delta_relative) from metric dicts.
@@ -55,13 +60,13 @@ def compute_delta(
     means the WMK pipeline is *worse*.  The threshold in derive_verdict()
     uses abs(delta_relative) to handle both directions uniformly.
     """
-    if wmk_metric is None or baseline_metric is None:
+    if winml_metric is None or baseline_metric is None:
         return None, None
-    wmk_val = wmk_metric.get("value")
+    winml_val = winml_metric.get("value")
     base_val = baseline_metric.get("value")
-    if wmk_val is None or base_val is None:
+    if winml_val is None or base_val is None:
         return None, None
-    delta_abs = wmk_val - base_val
+    delta_abs = winml_val - base_val
     if base_val == 0:
         return round(delta_abs, 6), None
     return round(delta_abs, 6), round(delta_abs / base_val, 6)
@@ -89,9 +94,9 @@ def derive_verdict(accuracy: dict | None) -> AccuracyVerdict:
             return AccuracyVerdict.DATASET_CONFIG_MISSING
         return AccuracyVerdict.SKIPPED
 
-    wmk_ok = accuracy.get("wmk_eval_status") == "PASS"
+    winml_ok = accuracy.get("winml_eval_status") == "PASS"
     base_ok = accuracy.get("pytorch_baseline_status") == "PASS"
-    if not wmk_ok or not base_ok:
+    if not winml_ok or not base_ok:
         return AccuracyVerdict.EVAL_ERROR
 
     delta_rel = accuracy.get("delta_relative")
@@ -244,7 +249,7 @@ def _pct(acc: dict) -> str:
             acc = r["accuracy"]
             lines.append(
                 f"| {r['model']} | {r.get('task', '')} "
-                f"| {_val(acc, 'wmk_metric')} | {_val(acc, 'pytorch_baseline_metric')} "
+                f"| {_val(acc, 'winml_metric')} | {_val(acc, 'pytorch_baseline_metric')} "
                 f"| {_pct(acc)} |"
             )
     else:
@@ -265,7 +270,7 @@ def _pct(acc: dict) -> str:
             acc = r["accuracy"]
             lines.append(
                 f"| {r['model']} | {r.get('task', '')} "
-                f"| {_val(acc, 'wmk_metric')} | {_val(acc, 'pytorch_baseline_metric')} "
+                f"| {_val(acc, 'winml_metric')} | {_val(acc, 'pytorch_baseline_metric')} "
                 f"| {_pct(acc)} |"
             )
     else:
diff --git a/scripts/e2e_eval/utils/dataset_config.py b/scripts/e2e_eval/utils/dataset_config.py
index a7566e7c7..cc4c42566 100644
--- a/scripts/e2e_eval/utils/dataset_config.py
+++ b/scripts/e2e_eval/utils/dataset_config.py
@@ -1,3 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
 """Dataset configuration for accuracy evaluation (Signal 2).
 
 Single source of truth: the model registry (e.g. ``testsets/models_with_acc.json``).
@@ -5,7 +10,7 @@
 
 Resolution:
 1. Per-model config registered from the registry's ``dataset_config`` field.
-2. None — caller decides whether to skip or let wmk eval use its
+2. None — caller decides whether to skip or let winml eval use its
    built-in task defaults.
 """
 
@@ -41,6 +46,6 @@ def get_dataset_config(hf_id: str, task: str) -> dict | None:
     """Return dataset config for a model, or None.
 
     None means no explicit config was found; the caller can either
-    skip or let wmk eval / pytorch baseline use built-in task defaults.
+    skip or let winml eval / pytorch baseline use built-in task defaults.
     """
     return _DATASET_CONFIGS.get((hf_id, task))
diff --git a/src/winml/modelkit/analyze/core/model_validators/constant_folding_validator.py b/src/winml/modelkit/analyze/core/model_validators/constant_folding_validator.py
index c17e1257b..ebd463792 100644
--- a/src/winml/modelkit/analyze/core/model_validators/constant_folding_validator.py
+++ b/src/winml/modelkit/analyze/core/model_validators/constant_folding_validator.py
@@ -122,7 +122,7 @@ def _create_information(self, constant_nodes: list[dict]) -> Information:
                 [
                     {
                         "title": "Normalize model",
-                        "command": "wmk optimize --model model.onnx",
+                        "command": "winml optimize --model model.onnx",
                     }
                 ],
                 indent=2,
diff --git a/src/winml/modelkit/analyze/core/model_validators/pattern_matching_validator.py b/src/winml/modelkit/analyze/core/model_validators/pattern_matching_validator.py
index 22019a7af..faa478304 100644
--- a/src/winml/modelkit/analyze/core/model_validators/pattern_matching_validator.py
+++ b/src/winml/modelkit/analyze/core/model_validators/pattern_matching_validator.py
@@ -69,10 +69,10 @@ class PatternMatchingValidator(ModelValidator):
                 "Pattern matching requires all nodes to have non-empty names.\n\n"
             ),
             # Todo: Update with actual command when available
-            action_method="wmk onnx_normalize",
+            action_method="winml onnx_normalize",
             action_description=("Add missing node names to the model using ONNX utilities"),
             action_command=(
-                "[Placeholder] wmk onnx_normalize <input_model.onnx> <output_model.onnx>"
+                "[Placeholder] winml onnx_normalize <input_model.onnx> <output_model.onnx>"
             ),
         ),
         PatternErrorConfig(
diff --git a/src/winml/modelkit/analyze/core/model_validators/qdq_validation_validator.py b/src/winml/modelkit/analyze/core/model_validators/qdq_validation_validator.py
index 2dc5dfbf9..12e09a3c7 100644
--- a/src/winml/modelkit/analyze/core/model_validators/qdq_validation_validator.py
+++ b/src/winml/modelkit/analyze/core/model_validators/qdq_validation_validator.py
@@ -101,7 +101,7 @@ def _create_information(self, invalid_nodes: list[str]) -> Information:
                 [
                     {
                         "title": "Re-quantize model",
-                        "command": "wmk quantize --model model.onnx --output model-qdq.onnx",
+                        "command": "winml quantize --model model.onnx --output model-qdq.onnx",
                     }
                 ],
                 indent=2,
diff --git a/src/winml/modelkit/analyze/core/model_validators/shape_inference_validator.py b/src/winml/modelkit/analyze/core/model_validators/shape_inference_validator.py
index 67fb6ee10..b21ccc4cc 100644
--- a/src/winml/modelkit/analyze/core/model_validators/shape_inference_validator.py
+++ b/src/winml/modelkit/analyze/core/model_validators/shape_inference_validator.py
@@ -118,7 +118,7 @@ def _create_information(self, affected_ops: list[dict]) -> Information:
                 [
                     {
                         "title": "Normalize model",
-                        "command": "wmk optimize --model model.onnx",
+                        "command": "winml optimize --model model.onnx",
                     }
                 ],
                 indent=2,
diff --git a/src/winml/modelkit/build/hf.py b/src/winml/modelkit/build/hf.py
index d4a1106e2..73d4f5d1d 100644
--- a/src/winml/modelkit/build/hf.py
+++ b/src/winml/modelkit/build/hf.py
@@ -455,11 +455,11 @@ def _load_model(
                     "Options:\n"
                     "  1. Provide --model <model_id> to use pretrained weights\n"
                     "  2. Ensure config has loader.model_type (e.g., 'bert', 'resnet')\n"
-                    "  3. Regenerate config: wmk config -m <model_id> -o config.json"
+                    "  3. Regenerate config: winml config -m <model_id> -o config.json"
                 )
             hf_config = AutoConfig.for_model(model_type)
 
-        # Prefer explicit model_class from loader config (set by wmk config),
+        # Prefer explicit model_class from loader config (set by winml config),
         # fall back to resolve_task_and_model_class for auto-detection.
         model_class = None
         if config.loader.model_class:
diff --git a/src/winml/modelkit/cache/__init__.py b/src/winml/modelkit/cache/__init__.py
index f5c1f950f..74648397e 100644
--- a/src/winml/modelkit/cache/__init__.py
+++ b/src/winml/modelkit/cache/__init__.py
@@ -5,7 +5,7 @@
 """Cache management for ModelKit.
 
 Provides deterministic path computation for cached build artifacts.
-Both ``from_pretrained()`` and ``wmk build --use-cache`` use these
+Both ``from_pretrained()`` and ``winml build --use-cache`` use these
 functions to guarantee identical paths for the same model+config.
 
 Usage::
diff --git a/src/winml/modelkit/cli.py b/src/winml/modelkit/cli.py
index 4a50847d0..263494c87 100644
--- a/src/winml/modelkit/cli.py
+++ b/src/winml/modelkit/cli.py
@@ -8,12 +8,12 @@
 command discovery from the commands/ directory.
 
 Usage:
-    wmk --version
-    wmk --help
-    wmk export --model MODEL --output PATH [--backend BACKEND] [--verbose]
+    winml --version
+    winml --help
+    winml export --model MODEL --output PATH [--backend BACKEND] [--verbose]
 
 Entry Points:
-    - Standalone CLI: wmk
+    - Standalone CLI: winml
     - Module execution: python -m winml.modelkit
 """
 
@@ -32,7 +32,7 @@
 
 
 @click.group()
-@click.version_option(version=__version__, prog_name="wmk")
+@click.version_option(version=__version__, prog_name="winml")
 @click.option(
     "--debug",
     is_flag=True,
diff --git a/src/winml/modelkit/commands/analyze.py b/src/winml/modelkit/commands/analyze.py
index 479211ec8..7c628a884 100644
--- a/src/winml/modelkit/commands/analyze.py
+++ b/src/winml/modelkit/commands/analyze.py
@@ -2,19 +2,19 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""Analyze command for wmk CLI.
+"""Analyze command for winml CLI.
 
 This module provides the analyze command that analyzes ONNX models
 for runtime support across NPU execution providers.
 
 Usage:
-    wmk analyze --model MODEL --ep EP --device DEVICE [OPTIONS]
+    winml analyze --model MODEL --ep EP --device DEVICE [OPTIONS]
 
 Examples:
-    wmk analyze --model model.onnx --ep QNNExecutionProvider --device NPU
-    wmk analyze --model model.onnx --ep qnn --device NPU
-    wmk analyze --model model.onnx --ep ov --device GPU --information
-    wmk analyze --model model.onnx --ep vitis --device GPU --output results.json
+    winml analyze --model model.onnx --ep QNNExecutionProvider --device NPU
+    winml analyze --model model.onnx --ep qnn --device NPU
+    winml analyze --model model.onnx --ep ov --device GPU --information
+    winml analyze --model model.onnx --ep vitis --device GPU --output results.json
 """
 
 from __future__ import annotations
@@ -100,27 +100,27 @@ def analyze(
     Examples:
     Analyze all supported EPs with default device:
 
-        wmk analyze --model model.onnx
+        winml analyze --model model.onnx
 
     Check QNN NPU support (full name):
 
-        wmk analyze --model model.onnx --ep QNNExecutionProvider --device NPU
+        winml analyze --model model.onnx --ep QNNExecutionProvider --device NPU
 
     Check QNN NPU support (using alias):
 
-        wmk analyze --model model.onnx --ep qnn --device NPU
+        winml analyze --model model.onnx --ep qnn --device NPU
 
     Check Intel OpenVINO GPU support with recommendations (using alias):
 
-        wmk analyze --model model.onnx --ep ov --device GPU --information
+        winml analyze --model model.onnx --ep ov --device GPU --information
 
     Analyze all EPs and save results to file:
 
-        wmk analyze --model model.onnx --output results.json
+        winml analyze --model model.onnx --output results.json
 
     Use HTP metadata for enhanced pattern extraction:
 
-        wmk analyze --model model.onnx
+        winml analyze --model model.onnx
             --ep OpenVINOExecutionProvider --driver GPU --information --htp-metadata metadata.json
     """
     # Configure logging
diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py
index 1fbaf23bb..eebed1ff4 100644
--- a/src/winml/modelkit/commands/build.py
+++ b/src/winml/modelkit/commands/build.py
@@ -9,11 +9,11 @@
 auto-detects ONNX vs HF input, calls the appropriate API, and reports results.
 
 Usage:
-    wmk build -c config.json -m microsoft/resnet-50 -o output/
-    wmk build -c config.json -m model.onnx -o output/
-    wmk build -c config.json -m bert-base-uncased -o output/ --no-quant --no-compile
-    wmk build -c config.json -m microsoft/resnet-50 --random-init -o output/
-    wmk build -c config.json -m microsoft/resnet-50 -o output/ --rebuild -v
+    winml build -c config.json -m microsoft/resnet-50 -o output/
+    winml build -c config.json -m model.onnx -o output/
+    winml build -c config.json -m bert-base-uncased -o output/ --no-quant --no-compile
+    winml build -c config.json -m microsoft/resnet-50 --random-init -o output/
+    winml build -c config.json -m microsoft/resnet-50 -o output/ --rebuild -v
 """
 
 from __future__ import annotations
@@ -213,7 +213,7 @@ def _build_modules(
     "config_file",
     type=click.Path(exists=True),
     required=True,
-    help="WinMLBuildConfig JSON file (from wmk config)",
+    help="WinMLBuildConfig JSON file (from winml config)",
 )
 @click.option(
     "-m",
@@ -331,8 +331,8 @@ def build(
 ) -> None:
     r"""Build a WinML-optimized ONNX model from a HuggingFace model or .onnx file.
 
-    Requires a config file generated by 'wmk config'. The config file already
-    contains device/precision settings (applied during 'wmk config' generation).
+    Requires a config file generated by 'winml config'. The config file already
+    contains device/precision settings (applied during 'winml config' generation).
     Specify either --output-dir or --use-cache for artifact destination.
 
     If -m points to an existing .onnx file, the build skips export and runs
@@ -341,22 +341,22 @@ def build(
     \b
     Examples:
         # Full pipeline with pretrained weights
-        wmk build -c config.json -m microsoft/resnet-50 -o output/
+        winml build -c config.json -m microsoft/resnet-50 -o output/
 
         # Build from pre-exported ONNX file
-        wmk build -c config.json -m model.onnx -o output/
+        winml build -c config.json -m model.onnx -o output/
 
         # Export + optimize only
-        wmk build -c config.json -m bert-base-uncased -o output/ --no-quant --no-compile
+        winml build -c config.json -m bert-base-uncased -o output/ --no-quant --no-compile
 
         # Random-weight build (no weight download)
-        wmk build -c config.json -m microsoft/resnet-50 --random-init -o output/
+        winml build -c config.json -m microsoft/resnet-50 --random-init -o output/
 
         # Use global cache
-        wmk build -c config.json -m microsoft/resnet-50 --use-cache
+        winml build -c config.json -m microsoft/resnet-50 --use-cache
 
         # Force rebuild
-        wmk build -c config.json -m microsoft/resnet-50 -o output/ --rebuild
+        winml build -c config.json -m microsoft/resnet-50 -o output/ --rebuild
     """
     # Inherit debug flag from parent context
     if ctx.obj and ctx.obj.get("debug"):
@@ -426,7 +426,7 @@ def build(
                 raise click.UsageError("Module config array is empty -- nothing to build.")
 
             console.print()
-            console.print("[bold]wmk build[/bold] (module mode)")
+            console.print("[bold]winml build[/bold] (module mode)")
             console.print(f"  Config:     {Path(config_file).name}")
             console.print(f"  Modules:    {len(configs)}")
             console.print(f"  Output:     {resolved_dir}")
@@ -504,7 +504,7 @@ def build(
             # Report build plan
             model_label = f"{model_id} (random-init)" if random_init else model_id
             console.print()
-            console.print("[bold]wmk build[/bold]")
+            console.print("[bold]winml build[/bold]")
             console.print(f"  Config:     {Path(config_file).name}")
             console.print(f"  Model:      {model_label}")
             console.print(f"  Output:     {resolved_dir}")
diff --git a/src/winml/modelkit/commands/compile.py b/src/winml/modelkit/commands/compile.py
index a21b9ed77..eb849d044 100644
--- a/src/winml/modelkit/commands/compile.py
+++ b/src/winml/modelkit/commands/compile.py
@@ -2,19 +2,19 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""Compile command for wmk CLI.
+"""Compile command for winml CLI.
 
 This module provides the compile command that compiles ONNX models to
 EP-specific formats (e.g., QNN EPContext) with optional quantization.
 
 Usage:
-    wmk compile --model MODEL [OPTIONS]
+    winml compile --model MODEL [OPTIONS]
 
 Examples:
-    wmk compile -m model.onnx
-    wmk compile -m model.onnx --device npu
-    wmk compile -m model.onnx --device gpu --ep migraphx
-    wmk compile -m model_qdq.onnx --no-quantize
+    winml compile -m model.onnx
+    winml compile -m model.onnx --device npu
+    winml compile -m model.onnx --device gpu --ep migraphx
+    winml compile -m model_qdq.onnx --no-quantize
 """
 
 from __future__ import annotations
@@ -127,19 +127,19 @@ def compile(
     \b
     Examples:
         # Compile for NPU (default, uses QNN/VitisAI)
-        wmk compile -m model.onnx
+        winml compile -m model.onnx
 
         # Compile for NPU with explicit VitisAI EP
-        wmk compile -m model.onnx --ep vitisai
+        winml compile -m model.onnx --ep vitisai
 
         # Compile for GPU with MIGraphX
-        wmk compile -m model.onnx --device gpu --ep migraphx
+        winml compile -m model.onnx --device gpu --ep migraphx
 
         # Compile pre-quantized model
-        wmk compile -m model_qdq.onnx --no-quantize
+        winml compile -m model_qdq.onnx --no-quantize
 
         # Compile using QAIRT SDK
-        wmk compile -m model.onnx --compiler qairt --qnn-sdk-root /path/to/sdk
+        winml compile -m model.onnx --compiler qairt --qnn-sdk-root /path/to/sdk
     """
     # Inherit debug mode from parent
     if ctx.obj and ctx.obj.get("debug"):
@@ -179,7 +179,7 @@ def compile(
         console.print(
             "[yellow]Note:[/yellow] --no-quantize has no effect. "
             "Quantization is no longer performed during compile. "
-            "Use 'wmk quantize' before 'wmk compile' to control quantization."
+            "Use 'winml quantize' before 'winml compile' to control quantization."
         )
 
     # Show info
@@ -203,13 +203,9 @@ def compile(
             if result.output_path:
                 console.print(f"[dim]Output: {result.output_path}[/dim]")
             if result.compile_time:
-                console.print(
-                    f"[dim]Compile time: {result.compile_time:.2f}s[/dim]"
-                )
+                console.print(f"[dim]Compile time: {result.compile_time:.2f}s[/dim]")
             if result.total_time:
-                console.print(
-                    f"[dim]Total time: {result.total_time:.2f}s[/dim]"
-                )
+                console.print(f"[dim]Total time: {result.total_time:.2f}s[/dim]")
         else:
             console.print("\n[bold red]Compilation failed:[/bold red]")
             for error in result.errors:
diff --git a/src/winml/modelkit/commands/config.py b/src/winml/modelkit/commands/config.py
index 809722048..76cbe5883 100644
--- a/src/winml/modelkit/commands/config.py
+++ b/src/winml/modelkit/commands/config.py
@@ -11,13 +11,13 @@
 export=None (marking it as an ONNX build that skips the export stage).
 
 Usage:
-    wmk config -m microsoft/resnet-50
-    wmk config -m bert-base-uncased --task text-classification
-    wmk config -m model.onnx
-    wmk config --model-type bert
-    wmk config --model-type bert --task fill-mask
-    wmk config -m microsoft/resnet-50 --module ResNetConvLayer
-    wmk config -m bert-base-uncased -o config.json
+    winml config -m microsoft/resnet-50
+    winml config -m bert-base-uncased --task text-classification
+    winml config -m model.onnx
+    winml config --model-type bert
+    winml config --model-type bert --task fill-mask
+    winml config -m microsoft/resnet-50 --module ResNetConvLayer
+    winml config -m bert-base-uncased -o config.json
 """
 
 from __future__ import annotations
@@ -35,9 +35,7 @@
 console = Console(stderr=True)
 
 
-def _apply_stage_overrides(
-    cfg: Any, *, no_quant: bool, no_compile: bool
-) -> None:
+def _apply_stage_overrides(cfg: Any, *, no_quant: bool, no_compile: bool) -> None:
     """Apply --no-quant and --no-compile CLI overrides to a config."""
     if no_quant:
         cfg.quant = None
@@ -109,8 +107,7 @@ def _is_onnx_file(model_input: str) -> bool:
     "device",
     type=click.Choice(["auto", "npu", "gpu", "cpu"], case_sensitive=False),
     default="auto",
-    help="Target device (affects quant/compile config). "
-    "Default: auto (no changes to config).",
+    help="Target device (affects quant/compile config). Default: auto (no changes to config).",
 )
 @click.option(
     "--ep",
@@ -199,34 +196,34 @@ def config(
     \b
     Examples:
         # Basic usage - auto-detect everything
-        wmk config -m microsoft/resnet-50
+        winml config -m microsoft/resnet-50
 
         # Override task
-        wmk config -m bert-base-uncased --task text-classification
+        winml config -m bert-base-uncased --task text-classification
 
         # Target NPU with int8 quantization
-        wmk config -m microsoft/resnet-50 --device npu --precision int8
+        winml config -m microsoft/resnet-50 --device npu --precision int8
 
         # Target GPU with fp16 (no quantization)
-        wmk config -m bert-base-uncased --device gpu --precision fp16
+        winml config -m bert-base-uncased --device gpu --precision fp16
 
         # Model type only (uses default HF config, auto-detects task)
-        wmk config --model-type bert
+        winml config --model-type bert
 
         # Model type + task
-        wmk config --model-type bert --task fill-mask
+        winml config --model-type bert --task fill-mask
 
         # Override with JSON config file
-        wmk config -m bert-base-uncased -c overrides.json
+        winml config -m bert-base-uncased -c overrides.json
 
         # Vision model with shape overrides ({"height": 224, "width": 224})
-        wmk config --model-type resnet -t image-classification --shape-config shapes.json
+        winml config --model-type resnet -t image-classification --shape-config shapes.json
 
         # Save to file
-        wmk config -m bert-base-uncased -o config.json
+        winml config -m bert-base-uncased -o config.json
 
         # Generate configs for submodules
-        wmk config -m microsoft/resnet-50 --module ResNetConvLayer
+        winml config -m microsoft/resnet-50 --module ResNetConvLayer
     """
     if verbose:
         logging.basicConfig(level=logging.DEBUG)
@@ -260,9 +257,7 @@ def config(
                     )
                 override = WinMLBuildConfig.from_dict(data)
             except json.JSONDecodeError as e:
-                raise click.UsageError(
-                    f"Invalid JSON in config file {config_path}: {e}"
-                ) from e
+                raise click.UsageError(f"Invalid JSON in config file {config_path}: {e}") from e
             console.print(f"[dim]Loaded overrides from {config_path.name}[/dim]")
 
         # Load shape_config (shape overrides) from JSON file if provided
@@ -330,9 +325,7 @@ def config(
                 # Apply --no-quant / --no-compile overrides to each config
                 for cfg in configs:
                     _apply_stage_overrides(cfg, no_quant=no_quant, no_compile=no_compile)
-                console.print(
-                    f"[green]Found {len(configs)} submodules matching '{module}'[/green]"
-                )
+                console.print(f"[green]Found {len(configs)} submodules matching '{module}'[/green]")
                 output_data = [cfg.to_dict() for cfg in configs]
             else:
                 # Normal mode: result is WinMLBuildConfig
@@ -343,10 +336,7 @@ def config(
                 if not task and not module:
                     auto_task = config_obj.loader.task
                     source = model_type or hf_model
-                    console.print(
-                        f"[dim]Auto-selected task: {auto_task} "
-                        f"(from '{source}')[/dim]"
-                    )
+                    console.print(f"[dim]Auto-selected task: {auto_task} (from '{source}')[/dim]")
                 console.print(
                     f"[green]Generated config for task '{config_obj.loader.task}'[/green]"
                 )
diff --git a/src/winml/modelkit/commands/eval.py b/src/winml/modelkit/commands/eval.py
index 9b9278f2d..93331cf27 100644
--- a/src/winml/modelkit/commands/eval.py
+++ b/src/winml/modelkit/commands/eval.py
@@ -142,15 +142,15 @@ def eval(
     \b
     Examples:
         # Use default dataset (auto-detected from task)
-        wmk eval -m microsoft/resnet-50
-        wmk eval -m model.onnx --model-id dslim/bert-base-NER
+        winml eval -m microsoft/resnet-50
+        winml eval -m model.onnx --model-id dslim/bert-base-NER
 
         # Specify dataset explicitly
-        wmk eval -m microsoft/resnet-50 --dataset imagenet-1k
-        wmk eval -m model.onnx --model-id microsoft/resnet-50 --dataset imagenet-1k
+        winml eval -m microsoft/resnet-50 --dataset imagenet-1k
+        winml eval -m model.onnx --model-id microsoft/resnet-50 --dataset imagenet-1k
 
         # Multi-config dataset with column overrides
-        wmk eval -m model.onnx --model-id Intel/bert-base-uncased-mrpc \\
+        winml eval -m model.onnx --model-id Intel/bert-base-uncased-mrpc \\
             --dataset glue --dataset-name mrpc \\
             --column input_column=sentence1
     """
@@ -163,7 +163,7 @@ def eval(
 
         if task is None:
             raise click.UsageError(
-                "--schema requires --task. Example: wmk eval --schema --task object-detection"
+                "--schema requires --task. Example: winml eval --schema --task object-detection"
             )
         cls = _EVALUATOR_REGISTRY.get(task, WinMLEvaluator)
         _print_schema(task, cls.schema_info())
diff --git a/src/winml/modelkit/commands/export.py b/src/winml/modelkit/commands/export.py
index fda018c1f..4e8f0b3f3 100644
--- a/src/winml/modelkit/commands/export.py
+++ b/src/winml/modelkit/commands/export.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""Export command for wmk CLI.
+"""Export command for winml CLI.
 
 This module provides the export command that uses export_onnx() as the single
 implementation path for HuggingFace to ONNX model conversion.
@@ -13,13 +13,13 @@
 - Supports MODEL_BUILD_CONFIGS lookup for input_tensors fallback
 
 Usage:
-    wmk export --model MODEL --output PATH [--verbose] [--with-report]
+    winml export --model MODEL --output PATH [--verbose] [--with-report]
 
 Examples:
-    wmk export -m prajjwal1/bert-tiny -o model.onnx
-    wmk export -m facebook/convnext-tiny-224 -o convnext.onnx -v --with-report
-    wmk export -m bert-base-uncased -o bert.onnx --input-specs inputs.json
-    wmk export -m bert-base-uncased -o bert.onnx --export-config config.json
+    winml export -m prajjwal1/bert-tiny -o model.onnx
+    winml export -m facebook/convnext-tiny-224 -o convnext.onnx -v --with-report
+    winml export -m bert-base-uncased -o bert.onnx --input-specs inputs.json
+    winml export -m bert-base-uncased -o bert.onnx --export-config config.json
 """
 
 from __future__ import annotations
@@ -143,28 +143,28 @@ def export(
     \b
     Examples:
         # Basic export
-        wmk export --model prajjwal1/bert-tiny --output model.onnx
+        winml export --model prajjwal1/bert-tiny --output model.onnx
 
         # Short form
-        wmk export -m prajjwal1/bert-tiny -o model.onnx
+        winml export -m prajjwal1/bert-tiny -o model.onnx
 
         # With verbose output and full reporting
-        wmk export -m facebook/convnext-tiny-224 -o convnext.onnx -v --with-report
+        winml export -m facebook/convnext-tiny-224 -o convnext.onnx -v --with-report
 
         # Clean ONNX output (no hierarchy metadata, for optimization)
-        wmk export -m prajjwal1/bert-tiny -o model.onnx --clean-onnx
+        winml export -m prajjwal1/bert-tiny -o model.onnx --clean-onnx
 
         # Use PyTorch dynamo export (for rich node metadata)
-        wmk export -m prajjwal1/bert-tiny -o model.onnx --dynamo
+        winml export -m prajjwal1/bert-tiny -o model.onnx --dynamo
 
         # Include torch.nn modules in hierarchy
-        wmk export -m prajjwal1/bert-tiny -o model.onnx --torch-module LayerNorm,Embedding
+        winml export -m prajjwal1/bert-tiny -o model.onnx --torch-module LayerNorm,Embedding
 
         # Custom input specifications from JSON file
-        wmk export -m bert-base-uncased -o bert.onnx --input-specs inputs.json
+        winml export -m bert-base-uncased -o bert.onnx --input-specs inputs.json
 
         # Custom ONNX export configuration
-        wmk export -m bert-base-uncased -o bert.onnx --export-config config.json
+        winml export -m bert-base-uncased -o bert.onnx --export-config config.json
     """
     # Inherit debug mode from parent
     if ctx.obj.get("debug"):
diff --git a/src/winml/modelkit/commands/hub.py b/src/winml/modelkit/commands/hub.py
index 7243921a2..cc5c1093b 100644
--- a/src/winml/modelkit/commands/hub.py
+++ b/src/winml/modelkit/commands/hub.py
@@ -16,11 +16,11 @@
   Negative means the quantized model scored lower.
 
 Usage:
-    wmk hub
-    wmk hub --model-type bert
-    wmk hub --task text-classification
-    wmk hub --model ProsusAI/finbert
-    wmk hub --output catalog.json
+    winml hub
+    winml hub --model-type bert
+    winml hub --task text-classification
+    winml hub --model ProsusAI/finbert
+    winml hub --output catalog.json
 """
 
 from __future__ import annotations
@@ -146,7 +146,6 @@ def _overall_verdict(accuracy: dict[str, Any]) -> str:
     return "PASS"
 
 
-
 # ---------------------------------------------------------------------------
 # List view
 # ---------------------------------------------------------------------------
@@ -186,12 +185,12 @@ def _build_list_renderable(models: list[dict[str, Any]]) -> Group:
     panel = Panel(
         table,
         title=f"[bold]ModelKit Catalog[/bold]  [dim]|[/dim]  "
-              f"[bold cyan]{len(models)}[/bold cyan] validated model(s)",
+        f"[bold cyan]{len(models)}[/bold cyan] validated model(s)",
         border_style="blue",
         padding=(0, 1),
     )
     hint = Text(
-        "Use  wmk hub --model <id>  to see perf and accuracy details.",
+        "Use  winml hub --model <id>  to see perf and accuracy details.",
         style="dim",
     )
     return Group(panel, hint)
@@ -310,9 +309,7 @@ def _build_detail_renderable(m: dict[str, Any]) -> Group:
                 Text(f"{sign}{drop:.2f}%", style=ep_style),
             )
 
-        panels.append(
-            Panel(acc, title=acc_panel_title, border_style="blue", padding=(0, 1))
-        )
+        panels.append(Panel(acc, title=acc_panel_title, border_style="blue", padding=(0, 1)))
 
     if not perf and not accuracy:
         panels.append(
@@ -359,8 +356,7 @@ def _output_detail(models: list[dict[str, Any]], model_id: str) -> dict[str, Any
             msg += "\n".join(f"  {c}" for c in candidates)
         else:
             msg = (
-                f"Model '{model_id}' not found in the catalog. "
-                "Run 'wmk hub' to list all models."
+                f"Model '{model_id}' not found in the catalog. Run 'winml hub' to list all models."
             )
         raise click.ClickException(msg)
 
@@ -440,16 +436,16 @@ def hub(
       drop %         -- relative change vs FP32 baseline
 
     \b
-    Use ``wmk hub --model <model_id>`` for per-model perf and accuracy.
-    Use ``wmk inspect -m <model_id>`` for architecture details.
+    Use ``winml hub --model <model_id>`` for per-model perf and accuracy.
+    Use ``winml inspect -m <model_id>`` for architecture details.
 
     \b
     Examples:
-        wmk hub
-        wmk hub --model-type bert
-        wmk hub --task text-classification
-        wmk hub --model ProsusAI/finbert
-        wmk hub --output results/catalog.json
+        winml hub
+        winml hub --model-type bert
+        winml hub --task text-classification
+        winml hub --model ProsusAI/finbert
+        winml hub --output results/catalog.json
     """
     try:
         catalog = _load_catalog()
diff --git a/src/winml/modelkit/commands/inspect.py b/src/winml/modelkit/commands/inspect.py
index 9f599aec4..b79f6a649 100644
--- a/src/winml/modelkit/commands/inspect.py
+++ b/src/winml/modelkit/commands/inspect.py
@@ -8,10 +8,10 @@
 with ModelKit, including loader, exporter, and WinML configurations.
 
 Usage:
-    wmk inspect -m openai/clip-vit-base-patch32
-    wmk inspect -m google-bert/bert-base-uncased --format json
-    wmk inspect -m facebook/detr-resnet-50 --verbose
-    wmk inspect -m openai/clip-vit-base-patch32 --hierarchy
+    winml inspect -m openai/clip-vit-base-patch32
+    winml inspect -m google-bert/bert-base-uncased --format json
+    winml inspect -m facebook/detr-resnet-50 --verbose
+    winml inspect -m openai/clip-vit-base-patch32 --hierarchy
 """
 
 from __future__ import annotations
@@ -84,19 +84,19 @@ def inspect(
     \b
     Examples:
         # Basic inspection
-        wmk inspect -m openai/clip-vit-base-patch32
+        winml inspect -m openai/clip-vit-base-patch32
 
         # JSON output for scripting
-        wmk inspect -m google-bert/bert-base-uncased --format json
+        winml inspect -m google-bert/bert-base-uncased --format json
 
         # Show full build configuration
-        wmk inspect -m facebook/detr-resnet-50 --verbose
+        winml inspect -m facebook/detr-resnet-50 --verbose
 
         # Include HF module hierarchy (no weight download)
-        wmk inspect -m openai/clip-vit-base-patch32 --hierarchy
+        winml inspect -m openai/clip-vit-base-patch32 --hierarchy
 
         # Combined verbose + hierarchy
-        wmk inspect -m google-bert/bert-base-uncased -v -H
+        winml inspect -m google-bert/bert-base-uncased -v -H
     """
     # Import here to defer heavy transformers/torch imports
     from ..inspect import (
diff --git a/src/winml/modelkit/commands/optimize.py b/src/winml/modelkit/commands/optimize.py
index 52e611cbe..96fdf4c8d 100644
--- a/src/winml/modelkit/commands/optimize.py
+++ b/src/winml/modelkit/commands/optimize.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""Optimize command for wmk CLI.
+"""Optimize command for winml CLI.
 
 This module provides the optimize command that uses the capability-driven
 optimizer for ONNX model optimization with fusion and graph optimizations.
@@ -11,12 +11,12 @@
 the Open-Closed Principle from the design documentation.
 
 Usage:
-    wmk optimize --model MODEL --output OUTPUT [OPTIONS]
+    winml optimize --model MODEL --output OUTPUT [OPTIONS]
 
 Examples:
-    wmk optimize -m model.onnx -o model_opt.onnx
-    wmk optimize -m model.onnx -o model_opt.onnx --enable-gelu-fusion
-    wmk optimize -m model.onnx --preset transformer-optimized
+    winml optimize -m model.onnx -o model_opt.onnx
+    winml optimize -m model.onnx -o model_opt.onnx --enable-gelu-fusion
+    winml optimize -m model.onnx --preset transformer-optimized
 """
 
 from __future__ import annotations
@@ -253,26 +253,26 @@ def optimize(
     \b
     Examples:
         # List available capabilities
-        wmk optimize --list-capabilities
+        winml optimize --list-capabilities
 
         # List available rewrite pattern families
-        wmk optimize --list-rewrites
+        winml optimize --list-rewrites
 
         # Pattern rewrite flags follow: --enable-{source-slug}-{target-slug}
         # Run --list-rewrites to discover all available flag names.
         # Example (all GELU variants → single Gelu node):
-        wmk optimize -m model.onnx -o out.onnx --enable-gelu-singlegelu
+        winml optimize -m model.onnx -o out.onnx --enable-gelu-singlegelu
         # Example (only Gelu1 variant → single Gelu node):
-        wmk optimize -m model.onnx -o out.onnx --enable-gelu1-singlegelu
+        winml optimize -m model.onnx -o out.onnx --enable-gelu1-singlegelu
 
         # Basic optimization with GELU fusion
-        wmk optimize -m model.onnx -o model_opt.onnx --enable-gelu-fusion
+        winml optimize -m model.onnx -o model_opt.onnx --enable-gelu-fusion
 
         # Use transformer preset
-        wmk optimize -m bert.onnx --preset transformer-optimized
+        winml optimize -m bert.onnx --preset transformer-optimized
 
         # Use config file
-        wmk optimize -m model.onnx -c config.toml
+        winml optimize -m model.onnx -c config.toml
     """
     # Import capabilities (late import to speed up CLI)
     from ..optim.pipes import get_all_capabilities
diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py
index e1b0ecadb..8affcd45c 100644
--- a/src/winml/modelkit/commands/perf.py
+++ b/src/winml/modelkit/commands/perf.py
@@ -7,9 +7,9 @@
 Benchmarks model inference performance using WinMLAutoModel and WinMLSession.
 
 Usage:
-    wmk perf -m microsoft/resnet-50
-    wmk perf -m microsoft/resnet-50 --device npu --iterations 100
-    wmk perf -m bert-base-uncased --task text-classification
+    winml perf -m microsoft/resnet-50
+    winml perf -m microsoft/resnet-50 --device npu --iterations 100
+    winml perf -m bert-base-uncased --task text-classification
 """
 
 from __future__ import annotations
@@ -986,22 +986,22 @@ def perf(
     \b
     Examples:
         # Basic benchmark (HuggingFace model)
-        wmk perf -m microsoft/resnet-50
+        winml perf -m microsoft/resnet-50
 
         # Benchmark a pre-exported ONNX file directly
-        wmk perf -m model.onnx --device cpu
+        winml perf -m model.onnx --device cpu
 
         # With custom iterations on NPU
-        wmk perf -m microsoft/resnet-50 --iterations 500 --device npu
+        winml perf -m microsoft/resnet-50 --iterations 500 --device npu
 
         # Text model with explicit task
-        wmk perf -m bert-base-uncased --task text-classification
+        winml perf -m bert-base-uncased --task text-classification
 
         # Per-module benchmarking
-        wmk perf -m bert-base-uncased --module BertAttention
+        winml perf -m bert-base-uncased --module BertAttention
 
         # Operator-level profiling (QNN NPU)
-        wmk perf -m model.onnx --op-tracing basic
+        winml perf -m model.onnx --op-tracing basic
     """
     # Resolve deprecated --hf-model alias
     if hf_model_deprecated and model_id:
diff --git a/src/winml/modelkit/commands/quantize.py b/src/winml/modelkit/commands/quantize.py
index 82586dfe8..5437837eb 100644
--- a/src/winml/modelkit/commands/quantize.py
+++ b/src/winml/modelkit/commands/quantize.py
@@ -2,19 +2,19 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""Quantize command for wmk CLI.
+"""Quantize command for winml CLI.
 
 This module provides the quantize command that inserts QDQ (Quantize-Dequantize)
 nodes into ONNX models for quantization-aware inference.
 
 Usage:
-    wmk quantize --model MODEL [OPTIONS]
+    winml quantize --model MODEL [OPTIONS]
 
 Examples:
-    wmk quantize -m model.onnx
-    wmk quantize -m model.onnx --precision int8
-    wmk quantize -m model.onnx -o model_qdq.onnx --samples 100
-    wmk quantize -m model.onnx --weight-type int8 --activation-type uint8
+    winml quantize -m model.onnx
+    winml quantize -m model.onnx --precision int8
+    winml quantize -m model.onnx -o model_qdq.onnx --samples 100
+    winml quantize -m model.onnx --weight-type int8 --activation-type uint8
 """
 
 from __future__ import annotations
@@ -121,19 +121,19 @@ def quantize(
     \b
     Examples:
         # Basic quantization with defaults (10 samples, uint8)
-        wmk quantize -m model.onnx
+        winml quantize -m model.onnx
 
         # Use precision shorthand (same as --weight-type uint8 --activation-type uint8)
-        wmk quantize -m model.onnx --precision int8
+        winml quantize -m model.onnx --precision int8
 
         # Int16 quantization
-        wmk quantize -m model.onnx --precision int16
+        winml quantize -m model.onnx --precision int16
 
         # Custom output path and more samples
-        wmk quantize -m model.onnx -o quantized.onnx --samples 100
+        winml quantize -m model.onnx -o quantized.onnx --samples 100
 
         # Explicit types with entropy calibration
-        wmk quantize -m model.onnx --weight-type int8 --method entropy
+        winml quantize -m model.onnx --weight-type int8 --method entropy
     """
     # Inherit debug mode from parent
     if ctx.obj and ctx.obj.get("debug"):
@@ -180,12 +180,8 @@ def quantize(
         if result.success:
             console.print("\n[bold green]Success![/bold green] Model quantized")
             console.print(f"[dim]Output: {result.output_path}[/dim]")
-            console.print(
-                f"[dim]QDQ nodes inserted: {result.nodes_quantized}[/dim]"
-            )
-            console.print(
-                f"[dim]Total time: {result.total_time_seconds:.2f}s[/dim]"
-            )
+            console.print(f"[dim]QDQ nodes inserted: {result.nodes_quantized}[/dim]")
+            console.print(f"[dim]Total time: {result.total_time_seconds:.2f}s[/dim]")
         else:
             console.print("\n[bold red]Quantization failed:[/bold red]")
             for error in result.errors:
diff --git a/src/winml/modelkit/commands/sys.py b/src/winml/modelkit/commands/sys.py
index 00c0e4fbd..abead4afb 100644
--- a/src/winml/modelkit/commands/sys.py
+++ b/src/winml/modelkit/commands/sys.py
@@ -13,12 +13,12 @@
 - Available devices and execution providers
 
 Usage:
-    wmk sys
-    wmk sys --format json
-    wmk sys --format compact
-    wmk sys --verbose
-    wmk sys --list-device
-    wmk sys --list-ep
+    winml sys
+    winml sys --format json
+    winml sys --format compact
+    winml sys --verbose
+    winml sys --list-device
+    winml sys --list-ep
 """
 
 from __future__ import annotations
@@ -431,8 +431,7 @@ def _output_device_text(devices: list[dict[str, Any]]) -> None:
     console.print("\n[bold blue]Available Devices (priority order)[/bold blue]")
     for dev in devices:
         console.print(
-            f"  [bold]#{dev['priority']}[/bold]  "
-            f"[cyan]{dev['type']:5s}[/cyan] {dev['name']}"
+            f"  [bold]#{dev['priority']}[/bold]  [cyan]{dev['type']:5s}[/cyan] {dev['name']}"
         )
         details = dev.get("details", {})
         if "error" in details:
@@ -514,9 +513,7 @@ def _output_ep_text(eps: list[dict[str, Any]]) -> None:
 
     for ep in eps:
         name_padded = ep["name"].ljust(30)
-        console.print(
-            f"  [bold]{name_padded}[/bold] [dim]->[/dim] [cyan]{ep['device']}[/cyan]"
-        )
+        console.print(f"  [bold]{name_padded}[/bold] [dim]->[/dim] [cyan]{ep['device']}[/cyan]")
         if ep.get("path"):
             console.print(f"    Path: {ep['path']}")
         else:
@@ -576,22 +573,22 @@ def sysinfo(
     \b
     Examples:
         # Display system info (human-readable format)
-        wmk sys
+        winml sys
 
         # Get output as JSON for scripting
-        wmk sys --format json
+        winml sys --format json
 
         # Show detailed info
-        wmk sys --verbose
+        winml sys --verbose
 
         # Compact format for quick overview
-        wmk sys --format compact
+        winml sys --format compact
 
         # List available devices
-        wmk sys --list-device
+        winml sys --list-device
 
         # List execution providers as JSON
-        wmk sys --list-ep --format json
+        winml sys --list-ep --format json
     """
     # Inherit debug mode from parent
     if ctx.obj.get("debug"):
@@ -637,13 +634,9 @@ def sysinfo(
                     else:
                         _output_ep_text(eps)
                 except Exception as e:
-                    console.print(
-                        f"[bold red]Error detecting execution providers:[/bold red] {e}"
-                    )
+                    console.print(f"[bold red]Error detecting execution providers:[/bold red] {e}")
                     logger.exception("Failed to detect execution providers")
-                    raise click.ClickException(
-                        f"Error detecting execution providers: {e}"
-                    ) from e
+                    raise click.ClickException(f"Error detecting execution providers: {e}") from e
             return
 
         # Default: full sysinfo including devices and EPs
diff --git a/src/winml/modelkit/models/auto.py b/src/winml/modelkit/models/auto.py
index 1e7d8b98f..3195d0fee 100644
--- a/src/winml/modelkit/models/auto.py
+++ b/src/winml/modelkit/models/auto.py
@@ -174,7 +174,7 @@ def from_onnx(
         else:
             import tempfile
 
-            cache_dir_path = Path(tempfile.mkdtemp(prefix="wmk_"))
+            cache_dir_path = Path(tempfile.mkdtemp(prefix="winml_"))
             output_dir = cache_dir_path
             force_rebuild = True
             logger.info("Cache disabled -- using temp directory: %s", output_dir)
@@ -324,7 +324,7 @@ def from_pretrained(
             # No cache -- use temp directory, always rebuild
             import tempfile
 
-            cache_dir_path = Path(tempfile.mkdtemp(prefix="wmk_"))
+            cache_dir_path = Path(tempfile.mkdtemp(prefix="winml_"))
             force_rebuild = True
             logger.info("Cache disabled -- using temp directory: %s", cache_dir_path)
 
diff --git a/src/winml/modelkit/session/qairt/compile_qairt_bin.py b/src/winml/modelkit/session/qairt/compile_qairt_bin.py
index 6e4c01c37..9a63cdcaa 100644
--- a/src/winml/modelkit/session/qairt/compile_qairt_bin.py
+++ b/src/winml/modelkit/session/qairt/compile_qairt_bin.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""QAIRT SDK compilation script - executed in isolated venv-wmk subprocess.
+"""QAIRT SDK compilation script - executed in isolated venv-winml subprocess.
 
 This script is invoked by qnn_compiler._compile_qairt() and runs in a separate
 Python 3.10 virtual environment with QAIRT SDK dependencies installed.
diff --git a/src/winml/modelkit/session/qairt/qairt_session.py b/src/winml/modelkit/session/qairt/qairt_session.py
index 5c92bc7f5..f6406aa1e 100644
--- a/src/winml/modelkit/session/qairt/qairt_session.py
+++ b/src/winml/modelkit/session/qairt/qairt_session.py
@@ -22,7 +22,7 @@
 
 logger = logging.getLogger(__name__)
 
-# QAIRT SDK dependencies for venv-wmk virtual environment
+# QAIRT SDK dependencies for venv-winml virtual environment
 QAIRT_DEPENDENCIES = [
     "onnx>=1.14.0,<1.17",
     "torch==2.4.1",
@@ -72,7 +72,7 @@ def compile(self) -> None:
         """Compile model using QAIRT SDK.
 
         Pipeline:
-            1. Ensure venv-wmk in SDK directory
+            1. Ensure venv-winml in SDK directory
             2. Run compile_qairt_bin.py subprocess → .bin
             3. Generate cache_info.json
             4. Wrap binary into EPContext ONNX model
@@ -89,7 +89,7 @@ def compile(self) -> None:
         # Step 1: Set up venv with QAIRT dependencies
         venv_python = ensure_venv(
             root_path=self._qnn_sdk_root,
-            venv_name="venv-wmk",
+            venv_name="venv-winml",
             python_version="3.10",
             requirements=QAIRT_DEPENDENCIES,
         )
diff --git a/tests/e2e/test_build_e2e.py b/tests/e2e/test_build_e2e.py
index 7bc7933f2..535fc2f64 100644
--- a/tests/e2e/test_build_e2e.py
+++ b/tests/e2e/test_build_e2e.py
@@ -11,7 +11,7 @@
 The build command uses @click.pass_context and requires obj={"debug": False}.
 
 We generate a proper config via ``generate_build_config()`` (same API
-the ``wmk config`` command calls) to ensure export input_tensors are
+the ``winml config`` command calls) to ensure export input_tensors are
 populated. A minimal hand-crafted config lacks I/O specs and will fail.
 
 Markers:
@@ -19,6 +19,7 @@
     slow: Tests that take > 30 seconds
     network: Requires network access to HuggingFace Hub
 """
+
 from __future__ import annotations
 
 import json
@@ -93,6 +94,7 @@ def _make_minimal_config_file(tmp_path, task: str) -> str:
 # HF model build (export + optimize only)
 # ===========================================================================
 
+
 class TestBuildHF:
     """Build from HuggingFace model with --no-quant --no-compile."""
 
@@ -102,7 +104,9 @@ def test_bert_text_classification(self, tmp_path: Path):
         Uses --no-quant --no-compile so only export + optimize run.
         """
         config_path = _generate_config_file(
-            tmp_path, "bert-base-uncased", task="text-classification",
+            tmp_path,
+            "bert-base-uncased",
+            task="text-classification",
         )
         output_dir = tmp_path / "output"
 
@@ -110,18 +114,19 @@ def test_bert_text_classification(self, tmp_path: Path):
         result = runner.invoke(
             build,
             [
-                "-c", config_path,
-                "-m", "bert-base-uncased",
-                "-o", str(output_dir),
+                "-c",
+                config_path,
+                "-m",
+                "bert-base-uncased",
+                "-o",
+                str(output_dir),
                 "--no-quant",
                 "--no-compile",
             ],
             obj={"debug": False},
             catch_exceptions=False,
         )
-        assert result.exit_code == 0, (
-            f"build failed (exit {result.exit_code}):\n{result.output}"
-        )
+        assert result.exit_code == 0, f"build failed (exit {result.exit_code}):\n{result.output}"
         # Build should produce an output directory
         assert output_dir.exists()
         # Should contain at least one ONNX file
@@ -136,6 +141,7 @@ def test_bert_text_classification(self, tmp_path: Path):
 # ONNX input build
 # ===========================================================================
 
+
 class TestBuildONNX:
     """Build from pre-exported ONNX file."""
 
@@ -148,16 +154,17 @@ def test_onnx_passthrough(self, tmp_path: Path, onnx_model_path: Path):
         result = runner.invoke(
             build,
             [
-                "-c", config_path,
-                "-m", str(onnx_model_path),
-                "-o", str(output_dir),
+                "-c",
+                config_path,
+                "-m",
+                str(onnx_model_path),
+                "-o",
+                str(output_dir),
                 "--no-quant",
                 "--no-compile",
             ],
             obj={"debug": False},
             catch_exceptions=False,
         )
-        assert result.exit_code == 0, (
-            f"build failed (exit {result.exit_code}):\n{result.output}"
-        )
+        assert result.exit_code == 0, f"build failed (exit {result.exit_code}):\n{result.output}"
         assert output_dir.exists()
diff --git a/tests/integration/test_module_build.py b/tests/integration/test_module_build.py
index 3d79626c6..ff8df5ed2 100644
--- a/tests/integration/test_module_build.py
+++ b/tests/integration/test_module_build.py
@@ -18,7 +18,7 @@ class TestModuleConfigE2E:
     """End-to-end: generate_build_config(module=...) produces valid configs."""
 
     def test_config_module_generates_array_with_module_path(self) -> None:
-        """Verify wmk config --module outputs a JSON array with module_path."""
+        """Verify winml config --module outputs a JSON array with module_path."""
         from winml.modelkit.config import generate_build_config
 
         # Use model_type only (no download, uses default HF config with random weights)
diff --git a/tests/integration/test_quantization.py b/tests/integration/test_quantization.py
index 9eb507f15..59b3a1ed5 100644
--- a/tests/integration/test_quantization.py
+++ b/tests/integration/test_quantization.py
@@ -19,9 +19,9 @@ def test_model_path(self, tmp_path_factory):
         temp_dir = tmp_path_factory.mktemp("quantization_e2e")
         model_path = temp_dir / "resnet-50.onnx"
 
-        # Export ResNet-50 model using wmk export
+        # Export ResNet-50 model using winml export
         cmd = [
-            "wmk",
+            "winml",
             "export",
             "-m",
             "microsoft/resnet-50",
@@ -46,8 +46,8 @@ def _run_quantization_and_validate(self, test_model_path, tmp_path, test_name, p
 
         output_path = tmp_path / f"resnet50_{test_name}_quantized.onnx"
 
-        # Build quantization command using actual wmk quantize CLI flags
-        cmd = ["wmk", "quantize", "--model", str(test_model_path), "--output", str(output_path)]
+        # Build quantization command using actual winml quantize CLI flags
+        cmd = ["winml", "quantize", "--model", str(test_model_path), "--output", str(output_path)]
 
         if precision:
             cmd.extend(["--precision", precision])
diff --git a/tests/unit/analyze/core/model_validators/test_validators.py b/tests/unit/analyze/core/model_validators/test_validators.py
index 2cde85eb1..b4a82ebfb 100644
--- a/tests/unit/analyze/core/model_validators/test_validators.py
+++ b/tests/unit/analyze/core/model_validators/test_validators.py
@@ -128,7 +128,7 @@ def test_detect_constant_only_nodes(self):
         # Check that explanation mentions constant-only nodes
         assert "constant inputs" in info.explanation
         # Check that details contain tool recommendations (JSON format)
-        assert "wmk optimize" in info.actions[0].details
+        assert "winml optimize" in info.actions[0].details
 
     def test_no_constant_only_nodes(self):
         """Test that models without constant-only nodes return None."""
@@ -185,7 +185,7 @@ def test_constant_folding_with_initializer(self):
         # Check explanation mentions constant inputs
         assert "constant inputs" in info.explanation
         # Check details contain tool recommendations
-        assert "wmk optimize" in info.actions[0].details
+        assert "winml optimize" in info.actions[0].details
 
     def test_explanation_contains_node_count(self):
         """Test that explanation mentions correct node count."""
diff --git a/tests/unit/cache/test_model.py b/tests/unit/cache/test_model.py
index 145d96a7d..910ad9d4c 100644
--- a/tests/unit/cache/test_model.py
+++ b/tests/unit/cache/test_model.py
@@ -160,7 +160,7 @@ def test_multiple_models(self, tmp_path: Path) -> None:
 
 
 class TestCallerConvergence:
-    """Verify that from_pretrained and wmk build --use-cache produce identical paths."""
+    """Verify that from_pretrained and winml build --use-cache produce identical paths."""
 
     def test_same_output_dir(self) -> None:
         """Both callers compute the same output_dir for a given model_id."""
diff --git a/tests/unit/commands/test_build_module.py b/tests/unit/commands/test_build_module.py
index 042989640..8306ba270 100644
--- a/tests/unit/commands/test_build_module.py
+++ b/tests/unit/commands/test_build_module.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""Tests for wmk build module mode (array config detection and orchestration)."""
+"""Tests for winml build module mode (array config detection and orchestration)."""
 
 from __future__ import annotations
 
diff --git a/tests/unit/commands/test_cli.py b/tests/unit/commands/test_cli.py
index b47a5a92f..e7a998685 100644
--- a/tests/unit/commands/test_cli.py
+++ b/tests/unit/commands/test_cli.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""CLI integration tests for wmk command.
+"""CLI integration tests for winml command.
 
 Tests the CLI interface using Click's CliRunner to ensure commands work
 correctly without executing actual model exports (which are slow).
@@ -42,7 +42,7 @@ def test_version(self, runner: CliRunner) -> None:
         """Test --version flag shows version info."""
         result = runner.invoke(main, ["--version"])
         assert result.exit_code == 0
-        assert "wmk" in result.output.lower()
+        assert "winml" in result.output.lower()
 
     def test_help(self, runner: CliRunner) -> None:
         """Test --help shows usage information."""
diff --git a/tests/unit/commands/test_hub.py b/tests/unit/commands/test_hub.py
index 3f362ae03..0a9dea702 100644
--- a/tests/unit/commands/test_hub.py
+++ b/tests/unit/commands/test_hub.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""Tests for the wmk hub CLI command (no network calls, catalog mocked)."""
+"""Tests for the winml hub CLI command (no network calls, catalog mocked)."""
 
 from __future__ import annotations
 
@@ -40,19 +40,29 @@
             "task": "token-classification",
             "perf": {
                 "QNN": {
-                    "avg_ms": 13.71, "p50_ms": 13.75, "p90_ms": 13.84,
-                    "p95_ms": 13.84, "p99_ms": 13.84,
-                    "min_ms": 13.59, "max_ms": 13.84, "throughput_qps": 72.93,
+                    "avg_ms": 13.71,
+                    "p50_ms": 13.75,
+                    "p90_ms": 13.84,
+                    "p95_ms": 13.84,
+                    "p99_ms": 13.84,
+                    "min_ms": 13.59,
+                    "max_ms": 13.84,
+                    "throughput_qps": 72.93,
                 },
                 "OV": {
-                    "avg_ms": 25.28, "p50_ms": 24.84, "p90_ms": 35.33,
-                    "p95_ms": 35.33, "p99_ms": 35.33,
-                    "min_ms": 20.6, "max_ms": 35.33, "throughput_qps": 39.56,
+                    "avg_ms": 25.28,
+                    "p50_ms": 24.84,
+                    "p90_ms": 35.33,
+                    "p95_ms": 35.33,
+                    "p99_ms": 35.33,
+                    "min_ms": 20.6,
+                    "max_ms": 35.33,
+                    "throughput_qps": 39.56,
                 },
             },
             "accuracy": {
                 "QNN": {"verdict": "PASS", "drop_pct": 0.0},
-                "OV":  {"verdict": "PASS", "drop_pct": 0.0},
+                "OV": {"verdict": "PASS", "drop_pct": 0.0},
             },
         },
         {
@@ -62,7 +72,7 @@
             "perf": None,
             "accuracy": {
                 "QNN": {"verdict": "REGRESSION", "drop_pct": -36.84},
-                "OV":  {"verdict": "REGRESSION", "drop_pct": -32.67},
+                "OV": {"verdict": "REGRESSION", "drop_pct": -32.67},
             },
         },
         {
@@ -167,7 +177,7 @@ def test_hub_default_shows_table(runner, patched_catalog):
 def test_hub_table_shows_hint(runner, patched_catalog):
     result = runner.invoke(hub, ["--output", "/dev/null"])
     assert result.exit_code == 0
-    assert "wmk hub --model" in result.output
+    assert "winml hub --model" in result.output
 
 
 def test_hub_saves_json_file(runner, patched_catalog, tmp_path):
@@ -192,10 +202,7 @@ def test_hub_shows_accuracy_pass(runner, patched_catalog, tmp_path):
     assert result.exit_code == 0
     data = json.loads(out.read_text())
     verdicts = {
-        ep: info["verdict"]
-        for m in data
-        if m.get("accuracy")
-        for ep, info in m["accuracy"].items()
+        ep: info["verdict"] for m in data if m.get("accuracy") for ep, info in m["accuracy"].items()
     }
     assert "PASS" in verdicts.values()
 
@@ -271,9 +278,7 @@ def test_hub_model_detail_shows_accuracy(runner, patched_catalog, tmp_path):
 
 
 def test_hub_model_detail_regression(runner, patched_catalog):
-    result = runner.invoke(
-        hub, ["--model", "facebook/detr-resnet-50", "--output", "/dev/null"]
-    )
+    result = runner.invoke(hub, ["--model", "facebook/detr-resnet-50", "--output", "/dev/null"])
     assert result.exit_code == 0
     assert "REGRESSION" in result.output
     assert "-36.84%" in result.output
diff --git a/tests/unit/commands/test_perf_module.py b/tests/unit/commands/test_perf_module.py
index 2dc4619d0..46ae2c2c3 100644
--- a/tests/unit/commands/test_perf_module.py
+++ b/tests/unit/commands/test_perf_module.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""Tests for wmk perf --module flag."""
+"""Tests for winml perf --module flag."""
 
 from __future__ import annotations
 
@@ -13,10 +13,10 @@
 
 
 class TestPerfModuleFlag:
-    """Tests for --module flag on wmk perf."""
+    """Tests for --module flag on winml perf."""
 
     def test_module_flag_in_help(self) -> None:
-        """Verify --module flag appears in wmk perf --help."""
+        """Verify --module flag appears in winml perf --help."""
         runner = CliRunner()
         result = runner.invoke(main, ["perf", "--help"])
         assert result.exit_code == 0
diff --git a/tests/unit/config/test_build.py b/tests/unit/config/test_build.py
index a856b4a78..4e03eae7c 100644
--- a/tests/unit/config/test_build.py
+++ b/tests/unit/config/test_build.py
@@ -1927,7 +1927,7 @@ def test_explicit_precision_triggers_resolve_device(self) -> None:
 
 
 # =============================================================================
-# TestDevicePrecisionCli - CLI tests for --device/--precision on wmk config
+# TestDevicePrecisionCli - CLI tests for --device/--precision on winml config
 # =============================================================================
 
 
@@ -1964,7 +1964,7 @@ def _mock_deps(
         }
 
     def _invoke(self, tmp_path, extra_args: list[str] | None = None):
-        """Helper: invoke wmk config with standard mocks."""
+        """Helper: invoke winml config with standard mocks."""
         output_file = tmp_path / "result.json"
         args = ["-m", "bert-base-uncased", "-o", str(output_file)]
         if extra_args:
@@ -2056,7 +2056,7 @@ def test_auto_precision_int8_triggers_detection(self, tmp_path) -> None:
 
 
 class TestConfigOnnxAutoDetect:
-    """Test ONNX file auto-detection in wmk config command."""
+    """Test ONNX file auto-detection in winml config command."""
 
     def test_config_auto_detect_onnx(self, tmp_path) -> None:
         """When -m points to an existing .onnx file, generates config with export=None."""
diff --git a/tests/unit/config/test_build_onnx.py b/tests/unit/config/test_build_onnx.py
index 4374a6a31..b37a70ab8 100644
--- a/tests/unit/config/test_build_onnx.py
+++ b/tests/unit/config/test_build_onnx.py
@@ -88,7 +88,7 @@ def mock_export_config() -> WinMLExportConfig:
 
 
 class TestConfigOnnxAutoDetect:
-    """Test ONNX file auto-detection in wmk config command."""
+    """Test ONNX file auto-detection in winml config command."""
 
     def test_config_auto_detect_onnx(self, tmp_path) -> None:
         """When -m points to an existing .onnx file, generates config with export=None."""
diff --git a/tests/unit/optracing/test_perf_optracing_cli.py b/tests/unit/optracing/test_perf_optracing_cli.py
index 0e532b7b5..3abbd0288 100644
--- a/tests/unit/optracing/test_perf_optracing_cli.py
+++ b/tests/unit/optracing/test_perf_optracing_cli.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""Tests for the --op-tracing CLI option on wmk perf."""
+"""Tests for the --op-tracing CLI option on winml perf."""
 
 from __future__ import annotations
 

From b1e3950bbea1c325095c327cd735626ea2371911 Mon Sep 17 00:00:00 2001
From: Zhipeng Wang <zhiwang@microsoft.com>
Date: Wed, 1 Apr 2026 13:47:22 +0800
Subject: [PATCH 2/2] fix: rename remaining WMK references (case-insensitive)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- WMK variable in run_eval.py → WINML_CLI
- WMK_CACHE_DIR env var → WINML_CACHE_DIR (cache/path.py + tests)
- WMK table headers in accuracy.py → WinML
- WMK_blip comment in blip.py → WinML blip
---
 scripts/e2e_eval/run_eval.py            | 14 +++++++-------
 scripts/e2e_eval/utils/accuracy.py      |  6 +++---
 src/winml/modelkit/cache/path.py        |  4 ++--
 src/winml/modelkit/commands/optimize.py | 10 +++++-----
 src/winml/modelkit/models/hf/blip.py    |  2 +-
 tests/unit/cache/test_path.py           | 10 +++++-----
 6 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/scripts/e2e_eval/run_eval.py b/scripts/e2e_eval/run_eval.py
index fa532e393..ea10c2b3f 100644
--- a/scripts/e2e_eval/run_eval.py
+++ b/scripts/e2e_eval/run_eval.py
@@ -71,7 +71,7 @@
 # Constants
 # ---------------------------------------------------------------------------
 
-WMK = [sys.executable, "-m", "winml.modelkit.cli"]
+WINML_CLI = [sys.executable, "-m", "winml.modelkit.cli"]
 BASELINE_SCRIPT = Path(__file__).parent / "run_pytorch_baseline.py"
 BASELINE_CACHE_PATH = Path(__file__).parent / "cache" / "baseline_cache.json"
 EVAL_DATASETS_CACHE = Path.home() / ".cache" / "winml" / "eval_datasets"
@@ -330,7 +330,7 @@ def _run_build(
 
     # Step 1: winml config
     config_args = [
-        *WMK,
+        *WINML_CLI,
         "config",
         "-m",
         entry.hf_id,
@@ -355,7 +355,7 @@ def _run_build(
 
     # Step 2: winml build --use-cache
     build_args = [
-        *WMK,
+        *WINML_CLI,
         "build",
         "-c",
         str(config_path),
@@ -433,10 +433,10 @@ def run_model(
     (skips internal build). Otherwise falls back to HF model ID.
     """
     if onnx_path:
-        args = [*WMK, "perf", "-m", onnx_path, "--device", device]
+        args = [*WINML_CLI, "perf", "-m", onnx_path, "--device", device]
     else:
         args = [
-            *WMK,
+            *WINML_CLI,
             "perf",
             "-m",
             entry.hf_id,
@@ -543,7 +543,7 @@ def _run_winml_eval(
     eval_device = "npu" if device == "auto" else device
     if onnx_path:
         args = [
-            *WMK,
+            *WINML_CLI,
             "eval",
             "-m",
             onnx_path,
@@ -554,7 +554,7 @@ def _run_winml_eval(
         ]
     else:
         args = [
-            *WMK,
+            *WINML_CLI,
             "eval",
             "-m",
             entry.hf_id,
diff --git a/scripts/e2e_eval/utils/accuracy.py b/scripts/e2e_eval/utils/accuracy.py
index 10a6b4c8b..f82baf153 100644
--- a/scripts/e2e_eval/utils/accuracy.py
+++ b/scripts/e2e_eval/utils/accuracy.py
@@ -57,7 +57,7 @@ def compute_delta(
     Returns (None, None) if either is missing or baseline value is zero.
 
     Note: For error-rate metrics (WER — lower is better) a positive delta
-    means the WMK pipeline is *worse*.  The threshold in derive_verdict()
+    means the WinML pipeline is *worse*.  The threshold in derive_verdict()
     uses abs(delta_relative) to handle both directions uniformly.
     """
     if winml_metric is None or baseline_metric is None:
@@ -242,7 +242,7 @@ def _pct(acc: dict) -> str:
     lines += ["", "## Accuracy Regressions", ""]
     if regressions:
         lines += [
-            "| Model | Task | WMK | Baseline | Delta% |",
+            "| Model | Task | WinML | Baseline | Delta% |",
             "|-------|------|-----|----------|--------|",
         ]
         for r in regressions:
@@ -263,7 +263,7 @@ def _pct(acc: dict) -> str:
     lines += ["", "## At-Risk Models", ""]
     if at_risk:
         lines += [
-            "| Model | Task | WMK | Baseline | Delta% |",
+            "| Model | Task | WinML | Baseline | Delta% |",
             "|-------|------|-----|----------|--------|",
         ]
         for r in at_risk:
diff --git a/src/winml/modelkit/cache/path.py b/src/winml/modelkit/cache/path.py
index ba28ba61d..5ac7ca76d 100644
--- a/src/winml/modelkit/cache/path.py
+++ b/src/winml/modelkit/cache/path.py
@@ -32,7 +32,7 @@ def get_cache_dir(override: str | Path | None = None) -> Path:
 
     Priority:
         1. ``override`` parameter (caller-specified)
-        2. ``WMK_CACHE_DIR`` environment variable
+        2. ``WINML_CACHE_DIR`` environment variable
         3. ``~/.cache/winml/``
 
     Args:
@@ -43,7 +43,7 @@ def get_cache_dir(override: str | Path | None = None) -> Path:
     """
     if override is not None:
         return Path(override)
-    env_dir = os.environ.get("WMK_CACHE_DIR")
+    env_dir = os.environ.get("WINML_CACHE_DIR")
     if env_dir:
         return Path(env_dir)
     return Path.home() / ".cache" / _DEFAULT_CACHE_DIR_NAME
diff --git a/src/winml/modelkit/commands/optimize.py b/src/winml/modelkit/commands/optimize.py
index 96fdf4c8d..c1f704da7 100644
--- a/src/winml/modelkit/commands/optimize.py
+++ b/src/winml/modelkit/commands/optimize.py
@@ -260,9 +260,9 @@ def optimize(
 
         # Pattern rewrite flags follow: --enable-{source-slug}-{target-slug}
         # Run --list-rewrites to discover all available flag names.
-        # Example (all GELU variants → single Gelu node):
+        # Example (all GELU variants -> single Gelu node):
         winml optimize -m model.onnx -o out.onnx --enable-gelu-singlegelu
-        # Example (only Gelu1 variant → single Gelu node):
+        # Example (only Gelu1 variant -> single Gelu node):
         winml optimize -m model.onnx -o out.onnx --enable-gelu1-singlegelu
 
         # Basic optimization with GELU fusion
@@ -358,7 +358,7 @@ def optimize(
             console.print("[yellow]No rewrite capabilities discovered.[/yellow]")
             return
 
-        console.print("\n[bold]Rewrite capabilities (source → target):[/bold]\n")
+        console.print("\n[bold]Rewrite capabilities (source -> target):[/bold]\n")
         for group in REWRITE_GROUPS:
             rule_file = Path(group.rule_file).name
             is_multi = len(group.sources) > 1
@@ -440,7 +440,7 @@ def optimize(
     if all_errors:
         console.print("[bold red]Configuration validation errors:[/bold red]")
         for error in all_errors:
-            console.print(f"  [red]• {error}[/red]")
+            console.print(f"  [red]* {error}[/red]")
         sys.exit(1)
 
     # Convert capability names (kebab-case) to python names (snake_case) for optimizer
@@ -471,7 +471,7 @@ def optimize(
         reduction = (1 - optimized_nodes / original_nodes) * 100 if original_nodes else 0
 
         console.print(f"\n[bold green]Success![/bold green] Model optimized: {output}")
-        node_info = f"Nodes: {original_nodes} → {optimized_nodes} ({reduction:.1f}% reduction)"
+        node_info = f"Nodes: {original_nodes} -> {optimized_nodes} ({reduction:.1f}% reduction)"
         console.print(f"[dim]{node_info}[/dim]")
 
     except Exception as e:
diff --git a/src/winml/modelkit/models/hf/blip.py b/src/winml/modelkit/models/hf/blip.py
index 6dfd30c4d..b5cc48062 100644
--- a/src/winml/modelkit/models/hf/blip.py
+++ b/src/winml/modelkit/models/hf/blip.py
@@ -10,7 +10,7 @@
 - Vision: ViT-B/16, 384x384 input, 577 sequence (1 CLS + 576 patches)
 - Text: BERT-based decoder, vocab 30524, max 512 positions
 
-Optimization settings match WMK_blip production pipeline:
+Optimization settings match WinML blip production pipeline:
 - GELU fusion enabled
 - LayerNorm fusion enabled
 - MatMul+Add fusion enabled (GEMM)
diff --git a/tests/unit/cache/test_path.py b/tests/unit/cache/test_path.py
index 9e2edf382..1f83b32b2 100644
--- a/tests/unit/cache/test_path.py
+++ b/tests/unit/cache/test_path.py
@@ -30,17 +30,17 @@ class TestGetCacheDir:
     """Test cache directory resolution."""
 
     def test_default_is_home_cache_winml(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        monkeypatch.delenv("WMK_CACHE_DIR", raising=False)
+        monkeypatch.delenv("WINML_CACHE_DIR", raising=False)
         result = get_cache_dir()
         assert result == Path.home() / ".cache" / "winml"
 
     def test_env_var_override(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        monkeypatch.setenv("WMK_CACHE_DIR", "/custom/cache")
+        monkeypatch.setenv("WINML_CACHE_DIR", "/custom/cache")
         result = get_cache_dir()
         assert result == Path("/custom/cache")
 
     def test_explicit_override_takes_priority(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        monkeypatch.setenv("WMK_CACHE_DIR", "/env/cache")
+        monkeypatch.setenv("WINML_CACHE_DIR", "/env/cache")
         result = get_cache_dir(override="/explicit/cache")
         assert result == Path("/explicit/cache")
 
@@ -49,7 +49,7 @@ def test_explicit_override_as_path(self) -> None:
         assert result == Path("/some/path")
 
     def test_none_override_falls_through(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        monkeypatch.delenv("WMK_CACHE_DIR", raising=False)
+        monkeypatch.delenv("WINML_CACHE_DIR", raising=False)
         result = get_cache_dir(override=None)
         assert result == Path.home() / ".cache" / "winml"
 
@@ -67,7 +67,7 @@ def test_appends_artifacts(self) -> None:
         assert result == Path("/cache/root/artifacts")
 
     def test_none_resolves_default(self, monkeypatch: pytest.MonkeyPatch) -> None:
-        monkeypatch.delenv("WMK_CACHE_DIR", raising=False)
+        monkeypatch.delenv("WINML_CACHE_DIR", raising=False)
         result = get_artifacts_dir()
         assert result == Path.home() / ".cache" / "winml" / "artifacts"