microsoft · timenick · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
@@ -32,17 +32,17 @@ uv sync
 
 ### Usage
 
-ModelKit provides a CLI tool `wmk`:
+ModelKit provides a CLI tool `winml`:
 
 ```bash
 # Export a Hugging Face model to ONNX
-uv run wmk export --model microsoft/resnet-50 --output ./output
+uv run winml export --model microsoft/resnet-50 --output ./output
 
 # Analyze an ONNX model
-uv run wmk analyze --model ./output/model.onnx
+uv run winml analyze --model ./output/model.onnx
 
 # Quantize an ONNX model
-uv run wmk quantize --model ./output/model.onnx
+uv run winml quantize --model ./output/model.onnx
 ```
 
 ## Contributions and Feedback

@@ -91,7 +91,7 @@ urls.Repository = "https://github.com/microsoft/ModelKit.git"
 # but package-dir expects namespace names (winml.modelkit). These don't auto-connect.
 # For flat layout (modelkit/) with namespace imports (winml.modelkit), explicit listing
 # ensures the namespace prefix is correctly applied to all subpackages.
-scripts.wmk = "winml.modelkit.cli:main"
+scripts.winml = "winml.modelkit.cli:main"
 
 [dependency-groups]
 dev = [

@@ -1,6 +1,6 @@
 # E2E Evaluation Scripts
 
-Batch-evaluate ModelKit's `wmk perf` pipeline against a curated set of HuggingFace models.
+Batch-evaluate ModelKit's `winml perf` pipeline against a curated set of HuggingFace models.
 Captures pass/fail, failure classification, and generates interactive reports.
 
 ## Quick Start
@@ -48,7 +48,7 @@ uv run python scripts/e2e_eval/build_registry.py --dry-run
 
 ### `run_eval.py` — Run Evaluation
 
-Executes `wmk perf` for each model in a subprocess, classifies failures, and
+Executes `winml perf` for each model in a subprocess, classifies failures, and
 generates reports (JSON, Markdown, HTML).
 
 ```bash

@@ -1,3 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
 """Build a local HF-compatible dataset for Isotonic/distilbert_finetuned_ai4privacy_v2.
 
 The ``ai4privacy/pii-masking-200k`` dataset uses string BIO labels
@@ -13,6 +18,7 @@
 import argparse
 from pathlib import Path
 
+
 _NUM_SAMPLES = 10000
 
 
@@ -37,10 +43,12 @@ def build_dataset(output_dir: Path) -> None:
     tokens_list = [s["mbert_text_tokens"] for s in samples]
     tags_list = [[label2id[lbl] for lbl in s["mbert_bio_labels"]] for s in samples]
 
-    features = Features({
-        "tokens": Sequence(Value("string")),
-        "ner_tags": Sequence(ClassLabel(names=all_labels)),
-    })
+    features = Features(
+        {
+            "tokens": Sequence(Value("string")),
+            "ner_tags": Sequence(ClassLabel(names=all_labels)),
+        }
+    )
     dataset = Dataset.from_dict(
         {"tokens": tokens_list, "ner_tags": tags_list},
         features=features,
@@ -51,7 +59,7 @@ def build_dataset(output_dir: Path) -> None:
     print("Done.")
 
 
-def main() -> None:
+def main() -> None:  # noqa: D103
     parser = argparse.ArgumentParser(description="Build ai4privacy PII dataset")
     parser.add_argument("--output", type=Path, required=True, help="Output directory")
     args = parser.parse_args()

@@ -1,3 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
 """Build a local HF-compatible dataset for indonlp/indonlu (posp subset).
 
 The upstream ``indonlp/indonlu`` dataset uses a legacy loading script
@@ -13,6 +18,7 @@
 import argparse
 from pathlib import Path
 
+
 _PARQUET_REVISION = "refs/convert/parquet"
 _PARQUET_PATH = "posp/validation/0000.parquet"
 
@@ -38,7 +44,7 @@ def build_dataset(output_dir: Path) -> None:
     print("Done.")
 
 
-def main() -> None:
+def main() -> None:  # noqa: D103
     parser = argparse.ArgumentParser(description="Build indonlu posp dataset")
     parser.add_argument("--output", type=Path, required=True, help="Output directory")
     args = parser.parse_args()

@@ -1,3 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
 """Build a local HF-compatible object-detection dataset from PubTables-1M.
 
 Downloads the validation split annotations (5 MB) and images (7 GB) from
@@ -18,9 +23,10 @@
 import argparse
 import random
 import tarfile
-import xml.etree.ElementTree as ET
+import xml.etree.ElementTree as ET  # noqa: N817
 from pathlib import Path
 
+
 # Labels matching microsoft/table-transformer-detection config
 LABEL_NAMES = ["table", "table rotated"]
 
@@ -30,7 +36,7 @@
 
 def _parse_voc_xml(xml_bytes: bytes) -> dict | None:
     """Parse a PASCAL VOC XML annotation and return structured data."""
-    root = ET.fromstring(xml_bytes)
+    root = ET.fromstring(xml_bytes)  # noqa: S314
     filename = root.findtext("filename")
     size_el = root.find("size")
     if size_el is None or filename is None:
@@ -147,38 +153,43 @@ def build_dataset(output_dir: Path) -> None:
     print(f"  Extracted {len(images_by_name)} images")
 
     # Step 6: Build dataset rows (skip any missing images)
-    from datasets import ClassLabel, Dataset, Features, Image as HFImage, Sequence, Value
+    from datasets import ClassLabel, Dataset, Features, Sequence, Value
+    from datasets import Image as HFImage
 
     rows: list[dict] = []
     for idx, ann in enumerate(sampled):
         img = images_by_name.get(ann["filename"])
         if img is None:
             continue
-        rows.append({
-            "image_id": idx,
-            "image": img,
-            "width": ann["width"],
-            "height": ann["height"],
+        rows.append(
+            {
+                "image_id": idx,
+                "image": img,
+                "width": ann["width"],
+                "height": ann["height"],
+                "objects": {
+                    "bbox_id": ann["bbox_id"],
+                    "category": ann["category"],
+                    "bbox": ann["bbox"],
+                    "area": ann["area"],
+                },
+            }
+        )
+
+    features = Features(
+        {
+            "image_id": Value("int64"),
+            "image": HFImage(),
+            "width": Value("int64"),
+            "height": Value("int64"),
             "objects": {
-                "bbox_id": ann["bbox_id"],
-                "category": ann["category"],
-                "bbox": ann["bbox"],
-                "area": ann["area"],
+                "bbox_id": Sequence(Value("int64")),
+                "category": Sequence(ClassLabel(names=LABEL_NAMES)),
+                "bbox": Sequence(Sequence(Value("float64"), length=4)),
+                "area": Sequence(Value("float64")),
             },
-        })
-
-    features = Features({
-        "image_id": Value("int64"),
-        "image": HFImage(),
-        "width": Value("int64"),
-        "height": Value("int64"),
-        "objects": {
-            "bbox_id": Sequence(Value("int64")),
-            "category": Sequence(ClassLabel(names=LABEL_NAMES)),
-            "bbox": Sequence(Sequence(Value("float64"), length=4)),
-            "area": Sequence(Value("float64")),
-        },
-    })
+        }
+    )
 
     dataset = Dataset.from_list(rows, features=features)
     print(f"Saving {len(dataset)} samples to {output_dir} ...")
@@ -187,10 +198,8 @@ def build_dataset(output_dir: Path) -> None:
     print("Done.")
 
 
-def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="Build PubTables-1M detection dataset"
-    )
+def main() -> None:  # noqa: D103
+    parser = argparse.ArgumentParser(description="Build PubTables-1M detection dataset")
     parser.add_argument("--output", type=Path, required=True, help="Output directory")
     args = parser.parse_args()
     build_dataset(args.output)

@@ -1,3 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
 """Build a local HF-compatible object-detection dataset from PubTables-1M (Structure).
 
 Downloads the validation split annotations (29 MB) and images (2.6 GB) from
@@ -21,9 +26,10 @@
 import argparse
 import random
 import tarfile
-import xml.etree.ElementTree as ET
+import xml.etree.ElementTree as ET  # noqa: N817
 from pathlib import Path
 
+
 # Labels matching microsoft/table-transformer-structure-recognition config
 LABEL_NAMES = [
     "table",
@@ -40,7 +46,7 @@
 
 def _parse_voc_xml(xml_bytes: bytes, label2id: dict[str, int]) -> dict | None:
     """Parse a PASCAL VOC XML annotation and return structured data."""
-    root = ET.fromstring(xml_bytes)
+    root = ET.fromstring(xml_bytes)  # noqa: S314
     filename = root.findtext("filename")
     size_el = root.find("size")
     if size_el is None or filename is None:
@@ -155,38 +161,43 @@ def build_dataset(output_dir: Path) -> None:
     print(f"  Extracted {len(images_by_name)} images")
 
     # Step 6: Build dataset rows (skip any missing images)
-    from datasets import ClassLabel, Dataset, Features, Image as HFImage, Sequence, Value
+    from datasets import ClassLabel, Dataset, Features, Sequence, Value
+    from datasets import Image as HFImage
 
     rows: list[dict] = []
     for idx, ann in enumerate(sampled):
         img = images_by_name.get(ann["filename"])
         if img is None:
             continue
-        rows.append({
-            "image_id": idx,
-            "image": img,
-            "width": ann["width"],
-            "height": ann["height"],
+        rows.append(
+            {
+                "image_id": idx,
+                "image": img,
+                "width": ann["width"],
+                "height": ann["height"],
+                "objects": {
+                    "bbox_id": ann["bbox_id"],
+                    "category": ann["category"],
+                    "bbox": ann["bbox"],
+                    "area": ann["area"],
+                },
+            }
+        )
+
+    features = Features(
+        {
+            "image_id": Value("int64"),
+            "image": HFImage(),
+            "width": Value("int64"),
+            "height": Value("int64"),
             "objects": {
-                "bbox_id": ann["bbox_id"],
-                "category": ann["category"],
-                "bbox": ann["bbox"],
-                "area": ann["area"],
+                "bbox_id": Sequence(Value("int64")),
+                "category": Sequence(ClassLabel(names=LABEL_NAMES)),
+                "bbox": Sequence(Sequence(Value("float64"), length=4)),
+                "area": Sequence(Value("float64")),
             },
-        })
-
-    features = Features({
-        "image_id": Value("int64"),
-        "image": HFImage(),
-        "width": Value("int64"),
-        "height": Value("int64"),
-        "objects": {
-            "bbox_id": Sequence(Value("int64")),
-            "category": Sequence(ClassLabel(names=LABEL_NAMES)),
-            "bbox": Sequence(Sequence(Value("float64"), length=4)),
-            "area": Sequence(Value("float64")),
-        },
-    })
+        }
+    )
 
     dataset = Dataset.from_list(rows, features=features)
     print(f"Saving {len(dataset)} samples to {output_dir} ...")
@@ -195,10 +206,8 @@ def build_dataset(output_dir: Path) -> None:
     print("Done.")
 
 
-def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="Build PubTables-1M structure recognition dataset"
-    )
+def main() -> None:  # noqa: D103
+    parser = argparse.ArgumentParser(description="Build PubTables-1M structure recognition dataset")
     parser.add_argument("--output", type=Path, required=True, help="Output directory")
     args = parser.parse_args()
     build_dataset(args.output)