fix(cli): restore winml --help startup speed (6.1s → 0.44s)

Zheng Te · Zheng Te · commit ef01bb80e09d · 2026-04-28T13:52:47.000+08:00
_warnings.py was eagerly importing torch.jit at module load, dragging
all of torch (~1.7s) into every winml CLI invocation. The
try:/except ImportError: guard was unreachable since torch is a hard
dependency in this project. Removed the filter; build.py already wraps
export_onnx() in catch_warnings()+filterwarnings("ignore"), which is
strictly broader than the deleted TracerWarning-only filter.

Also:
- onnx/__init__.py: standardize on _LAZY_IMPORTS dict pattern, matching
  the other 6 subpackages and fixing 3 TestLazyImportsDict failures.
- sysinfo/device.py: add @lru_cache(maxsize=1) to _get_available_devices,
  mirroring the existing decorator on _get_available_eps. Fixes a CI
  flake where winml config -m &lt;hf-model&gt; --device &lt;X&gt; would re-run
  Windows WMI/PowerShell hardware probes on every resolve_device call,
  ballooning to 280s+ on cold runners. With the cache, the 2nd call is
  ~1M× faster (subprocess work happens once per process).
- tests/cli/: new top-level category for cross-cutting CLI-surface
  tests; moved test_import_time.py and test_main.py here.
- tests/cli/test_import_time.py: removed TestCommandWithModel — those
  tests invoke handler bodies (feature pipeline territory), not CLI
  surface. Per-command runtime import budgets belong in
  tests/unit/commands/ where mocks isolate dispatch from feature code.
- modelkit-ci.yml: include tests/cli in the "remaining" matrix group.
  Previously test_import_time.py at tests/ root sat outside every
  enumerated CI path, so the regression-detecting tests never ran.
- tests/CLAUDE.md: document the tests/cli/ category and require CI
  matrix updates when adding new top-level test directories.

Constraint: torch is a hard dependency, so try:/except ImportError on torch.* is unreachable
Constraint: Hardware doesn't change during a process lifetime; lru_cache pattern is already established by _get_available_eps
Rejected: Relocate TracerWarning filter into a torch-loaded code path | build.py's catch_warnings is strictly broader; duplication not worthwhile
Rejected: Change _get_available_devices to return frozenset/tuple for cache safety | larger refactor with public-API ripples; current callers only iterate
Confidence: high
Scope-risk: narrow
Directive: Never use try:/except ImportError on a required dependency at startup — use a function-scoped lazy import if you don't want to pay the cost. Never add a top-level tests/ category without also adding it to .github/workflows/modelkit-ci.yml's path matrix. When two probe helpers have parallel structure and identical "doesn't change at runtime" justification, they should both have @lru_cache.
Not-tested: winml export (direct CLI path) now emits TracerWarning noise (UX-only). Eager-probe before device check in resolve_device is now near-zero cost on cached repeat calls — could be cleaned up for clarity, not perf.
diff --git a/.github/workflows/modelkit-ci.yml b/.github/workflows/modelkit-ci.yml
@@ -39,7 +39,7 @@ jobs:
             paths: >-
               tests/unit/core tests/unit/onnx tests/unit/cache
               tests/unit/utils tests/unit/sysinfo tests/unit/inspect
-              tests/unit/optracing tests/regression
+              tests/unit/optracing tests/regression tests/cli
 
     name: test (${{ matrix.group }})
 
diff --git a/src/winml/modelkit/_warnings.py b/src/winml/modelkit/_warnings.py
@@ -78,14 +78,13 @@ def filter(self, record: logging.LogRecord) -> bool:
     for _cat in (FutureWarning, DeprecationWarning, UserWarning):
         warnings.filterwarnings("ignore", category=_cat, module=r"torch\..*")
 
-    # TracerWarning (from torch.jit, inherits Warning not UserWarning)
-    # fires during ONNX export tracing — safe to suppress in both torch and transformers
-    try:
-        from torch.jit import TracerWarning
-
-        warnings.filterwarnings("ignore", category=TracerWarning)
-    except ImportError:
-        pass  # torch not installed
+    # NOTE: TracerWarning (from torch.jit) is intentionally NOT filtered here.
+    # Importing torch.jit at startup would pull all of torch (~1.7s) into
+    # `winml --help` and violate the CLI import budget (tests/cli/test_import_time.py).
+    # During ONNX export, build.py already wraps the export call in
+    # `warnings.catch_warnings()` + `filterwarnings("ignore")`, which is strictly
+    # broader than a TracerWarning-only filter. Direct callers of export_pytorch()
+    # that want the same suppression can apply it locally at the call site.
 
     # Diffusers
     warnings.filterwarnings(
diff --git a/src/winml/modelkit/onnx/__init__.py b/src/winml/modelkit/onnx/__init__.py
@@ -47,16 +47,24 @@
 ]
 
 
+_LAZY_IMPORTS: dict[str, tuple[str, str]] = {
+    "is_compiled_onnx": (".detection", "is_compiled_onnx"),
+    "is_quantized_onnx": (".detection", "is_quantized_onnx"),
+}
+
+
 def __getattr__(name: str):
     """Lazy-load detection module to avoid circular import with compiler."""
-    if name in ("is_compiled_onnx", "is_quantized_onnx"):
-        from .detection import is_compiled_onnx, is_quantized_onnx
+    if name in _LAZY_IMPORTS:
+        module_path, attr_name = _LAZY_IMPORTS[name]
+        import importlib
 
-        globals()["is_compiled_onnx"] = is_compiled_onnx
-        globals()["is_quantized_onnx"] = is_quantized_onnx
-        return globals()[name]
+        mod = importlib.import_module(module_path, __name__)
+        val = getattr(mod, attr_name)
+        globals()[name] = val
+        return val
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
 
 def __dir__() -> list[str]:
-    return __all__
+    return list(set(list(globals()) + __all__))
diff --git a/src/winml/modelkit/sysinfo/device.py b/src/winml/modelkit/sysinfo/device.py
@@ -74,13 +74,24 @@ def get_ep_device_map() -> dict[str, str]:
     return dict(_EP_DEVICE_MAP)
 
 
+@functools.lru_cache(maxsize=1)
 def _get_available_devices() -> list[str]:
-    """Return prioritized list of available devices.
+    """Return prioritized list of available devices (cached).
 
     Priority: NPU > GPU > CPU.
     Always includes "cpu" as fallback.
     Uses SysInfo hardware classes for detection.
 
+    Hardware does not change during a process lifetime, so this result is
+    cached via lru_cache (mirrors ``_get_available_eps``). Without this
+    cache, ``resolve_device`` calls within a single CLI invocation each
+    re-run Windows WMI/PowerShell subprocesses (~1.2s/call locally,
+    5-10x slower on cold CI), which on Windows CI runners has caused
+    user-facing commands like ``winml config -m <model> --device npu``
+    to balloon past 280s.
+
+    Callers must not mutate the returned list (it is shared across calls).
+
     This is an internal helper for :func:`resolve_device` and should not
     be called directly by external code.
 
diff --git a/tests/CLAUDE.md b/tests/CLAUDE.md
@@ -8,10 +8,16 @@ Reference: [`/docs/pytest-best-practices.md`](/docs/pytest-best-practices.md)
 
 - Place unit tests under `tests/unit/<module>/` mirroring `src/winml/modelkit/<module>/`
 - Place integration tests under `tests/integration/`, e2e under `tests/e2e/`
+- Place cross-cutting CLI-surface tests (startup, import budget, arg parsing,
+  command discovery, version/help output) under `tests/cli/` — these don't
+  mirror any single `src/` module, so they don't fit under `tests/unit/<module>/`
 - Put shared fixtures in the narrowest `conftest.py` that covers all consumers
 
 ## Never
 
 - Create module directories directly under `tests/` — use `tests/unit/<module>/` instead
 - Put `test_*.py` files in `assets/`, `fixtures/`, or `mock_data/` — those are helpers only
 - Duplicate fixtures across multiple `conftest.py` files
+- Add a new top-level category under `tests/` without also adding it to the
+  `.github/workflows/modelkit-ci.yml` path matrix — CI enumerates paths
+  explicitly, so a new directory is invisible to CI until it's listed
diff --git a/tests/cli/test_import_time.py b/tests/cli/test_import_time.py
@@ -34,7 +34,9 @@
 def _discover_command_names() -> list[str]:
     from pathlib import Path
 
-    root = Path(__file__).resolve().parent.parent
+    # Walk up until we find the repo root (marked by pyproject.toml).
+    # Resilient to this file's depth within tests/.
+    root = next(p for p in Path(__file__).resolve().parents if (p / "pyproject.toml").exists())
     commands_dir = root / "src" / "winml" / "modelkit" / "commands"
     return sorted(f.stem for f in commands_dir.glob("*.py") if not f.name.startswith("_"))
 
@@ -151,7 +153,6 @@ class TestModuleIsolation:
             "winml.modelkit.loader",
             "winml.modelkit.onnx",
             "winml.modelkit.optim",
-            "winml.modelkit.optracing",
             "winml.modelkit.quant",
             "winml.modelkit.session",
             "winml.modelkit.analyze",
@@ -346,20 +347,25 @@ def test_lazy_imports_all_consistent(self, module: str) -> None:
 
     @pytest.mark.parametrize("module", _LAZY_MODULES)
     def test_lazy_imports_all_resolvable(self, module: str) -> None:
-        """Every _LAZY_IMPORTS entry must resolve to a real attribute."""
+        """Every _LAZY_IMPORTS entry must resolve to a real attribute.
+
+        Convention: ``_LAZY_IMPORTS`` maps a lazy attribute name to a
+        ``(submodule_path, real_attr_name)`` tuple, where ``submodule_path``
+        is relative (e.g. ``".config"``) resolved against the host package.
+        """
         script = textwrap.dedent(f"""\
             import importlib
             import {module} as mod
             errors = []
-            for attr_name, submodule_path in mod._LAZY_IMPORTS.items():
+            for lazy_name, (submodule_path, real_attr) in mod._LAZY_IMPORTS.items():
                 try:
-                    sub = importlib.import_module(submodule_path)
-                    if not hasattr(sub, attr_name):
+                    sub = importlib.import_module(submodule_path, package={module!r})
+                    if not hasattr(sub, real_attr):
                         errors.append(
-                            f'{{attr_name}}: {{submodule_path}} has no attribute {{attr_name}}'
+                            f'{{lazy_name}}: {{submodule_path}}.{{real_attr}} not found'
                         )
                 except ImportError as exc:
-                    errors.append(f'{{attr_name}}: cannot import {{submodule_path}} ({{exc}})')
+                    errors.append(f'{{lazy_name}}: cannot import {{submodule_path}} ({{exc}})')
             if errors:
                 raise AssertionError(
                     f'Unresolvable _LAZY_IMPORTS in {module}:\\n' + '\\n'.join(errors)
@@ -393,68 +399,12 @@ def test_command_help_no_heavy_deps(self, cmd: str) -> None:
         assert_cli_no_heavy_imports([cmd, "--help"])
 
 
-# ===========================================================================
-# (B) Per-Command Tests — with --model (actual command execution)
-# ===========================================================================
-
-_FAKE_ONNX = "nonexistent_test_model.onnx"
-_HF_MODEL = "microsoft/resnet-50"
-
-
-class TestCommandWithModel:
-    """Verify import budgets when commands are invoked with --model.
-
-    Commands that operate on ONNX files should NOT need torch/transformers.
-    Commands that operate on HF models legitimately need them.
-
-    We use a fake model path so commands fail at file I/O, but the import
-    chain is already established by that point.
-    """
-
-    @pytest.mark.parametrize(
-        ("cmd_args", "allowed"),
-        [
-            # ONNX-path commands — should NOT need torch/transformers
-            (
-                ["compile", "--model", _FAKE_ONNX, "-o", "o.onnx", "--ep", "qnn"],
-                (),
-            ),
-            (
-                ["quantize", "--model", _FAKE_ONNX, "-o", "o.onnx", "--ep", "qnn"],
-                (),
-            ),
-            (
-                ["optimize", "--model", _FAKE_ONNX, "-o", "o.onnx"],
-                ("torch", "torchgen"),  # ORT tools.__init__ pulls torch
-            ),
-            (
-                ["perf", "--model", _FAKE_ONNX],
-                (),
-            ),
-            (
-                ["static-analyzer", "check", "--model", _FAKE_ONNX, "--ep", "qnn"],
-                ("torch", "torchgen"),  # ORT tools.__init__ pulls torch
-            ),
-            # HF model commands — legitimately need heavy deps
-            (
-                ["inspect", "-m", _HF_MODEL],
-                (*HEAVY_PREFIXES, "torchgen", "torchvision"),
-            ),
-            (
-                ["config", "-m", _HF_MODEL, "--device", "npu", "--precision", "int8"],
-                (*HEAVY_PREFIXES, "torchgen", "torchvision"),
-            ),
-        ],
-        ids=[
-            "compile-onnx",
-            "quantize-onnx",
-            "optimize-onnx",
-            "perf-onnx",
-            "static-analyzer-onnx",
-            "inspect-hf",
-            "config-hf",
-        ],
-    )
-    def test_command_import_budget(self, cmd_args: list[str], allowed: tuple[str, ...]) -> None:
-        """Verify each command's import budget with --model."""
-        assert_cli_no_heavy_imports(cmd_args, allowed=allowed)
+# Note: tests that invoke commands with --model belong elsewhere (they
+# exercise handler bodies — feature-pipeline territory, not CLI surface).
+# The init-time guarantees here cover what's loaded by:
+#   - importing winml.modelkit.* subpackages (TestModuleIsolation)
+#   - winml --help and winml <cmd> --help (TestCommandHelp)
+#   - lazy-import dict structure (TestLazyImportsDict)
+# Per-command runtime import budgets (e.g., "winml compile --model X.onnx
+# should not pull torch") should be verified in tests/unit/commands/ where
+# mocks can isolate the dispatch from the feature pipeline.
diff --git a/tests/cli/test_main.py b/tests/cli/test_main.py