diff --git a/python/tests/_runner_infra/run/runners/coreai_runner.py b/python/tests/_runner_infra/run/runners/coreai_runner.py index 9908fae..9214466 100644 --- a/python/tests/_runner_infra/run/runners/coreai_runner.py +++ b/python/tests/_runner_infra/run/runners/coreai_runner.py @@ -9,22 +9,16 @@ import shutil from contextlib import AsyncExitStack from pathlib import Path -from typing import TYPE_CHECKING import torch +from coreai.authoring import AIProgram +from coreai.runtime import InferenceFunction, NDArray from typing_extensions import Self, override from ...common.types.dependency_types import Tensor from ...common.utils.coreai.tensor import ndarray_to_torch_tensor from .runner import Runner -if TYPE_CHECKING: - from coreai.authoring import AIProgram - from coreai.runtime import InferenceFunction, NDArray -else: - # NDArray is needed at runtime inside this module to construct inputs. - from coreai.runtime import InferenceFunction, NDArray - class CoreaiRuntime: async def _async_load_model( @@ -32,6 +26,8 @@ async def _async_load_model( coreai_program: AIProgram, asset_path: Path, ) -> InferenceFunction: + from tests.conftest import get_test_specialization_options + # `asset.executable()` requires the asset directory to end in `.aimodel`. aimodel_path = asset_path.parent / "model.aimodel" if aimodel_path.exists(): @@ -40,7 +36,9 @@ async def _async_load_model( # Keep the executable context open for the lifetime of this runtime so # subsequent forward() calls reuse the loaded AIModel. The context is # released when self._exit_stack is closed (or at process exit). - ai_model = await self._exit_stack.enter_async_context(asset.executable()) + ai_model = await self._exit_stack.enter_async_context( + asset.executable(specialization_options=get_test_specialization_options()) + ) return ai_model.load_function("main") def __init__( @@ -53,6 +51,7 @@ def __init__( self._output_names = output_names self._exit_stack = AsyncExitStack() self._function = asyncio.run(self._async_load_model(coreai_program, asset_path)) + self._dumped = False async def _async_forward( self: Self, named_inputs: dict[str, Tensor] @@ -70,6 +69,22 @@ async def _async_forward( coreai_outputs: dict[str, NDArray] = await self._function(coreai_inputs) + if not self._dumped: + from tests.conftest import ( + _dump_optest_artifacts, + dump_optests_enabled, + get_current_test_id, + optest_dump_path, + ) + + if dump_optests_enabled() and get_current_test_id(): + dump_path = optest_dump_path(get_current_test_id()) + dump_path.mkdir(parents=True, exist_ok=True) + _dump_optest_artifacts( + self._coreai_program, coreai_inputs, coreai_outputs, dump_path + ) + self._dumped = True + outputs: dict[str, torch.Tensor] = { name: ndarray_to_torch_tensor(tensor) for name, tensor in coreai_outputs.items() } diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 0000000..dcb6da0 --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,152 @@ +# Copyright 2026 Apple Inc. +# +# Use of this source code is governed by a BSD-3-clause license that can +# be found in the LICENSE file or at https://opensource.org/licenses/BSD-3-Clause + +"""Root test configuration.""" + +from __future__ import annotations + +import os +import platform +from pathlib import Path +from typing import Any + +import numpy as np +import pytest +from coreai.runtime import NDArray, SpecializationOptions + +_current_test_id: str = "" +_dump_optests: bool = False + +_COMPUTE_UNIT_KIND_CHOICES = ("interpreter", "cpu", "gpu", "neural_engine") +_COMPUTE_UNIT_KIND_DEFAULT = "interpreter" +_compute_unit_kind: str = _COMPUTE_UNIT_KIND_DEFAULT + + +@pytest.fixture(autouse=True) +def update_current_test_id(request: pytest.FixtureRequest) -> None: + """Track the running test id so the runtime layer can name dump dirs.""" + global _current_test_id + _current_test_id = request.node.nodeid + + +def get_current_test_id() -> str: + return _current_test_id + + +def dump_optests_enabled() -> bool: + return _dump_optests + + +def get_test_specialization_options() -> SpecializationOptions | None: + """Translate ``--compute-unit-kind`` into ``SpecializationOptions`` (or None). + + On non-macOS platforms only ``interpreter`` is supported — the runtime + does not expose ``SpecializationOptions`` outside Darwin. + """ + if _compute_unit_kind == "interpreter": + return None + if platform.system() != "Darwin": + msg = ( + f"--compute-unit-kind={_compute_unit_kind} is only supported on macOS; " + "use --compute-unit-kind=interpreter on this platform." + ) + raise RuntimeError(msg) + from coreai.runtime import ( # type: ignore[attr-defined] + ComputeUnitKind, + SpecializationOptions, + ) + + if _compute_unit_kind == "cpu": + return SpecializationOptions.cpu_only() + if _compute_unit_kind == "gpu": + return SpecializationOptions.from_preferred_compute_unit_kind( + compute_unit_kind=ComputeUnitKind.gpu(), + ) + if _compute_unit_kind == "neural_engine": + return SpecializationOptions.from_preferred_compute_unit_kind( + compute_unit_kind=ComputeUnitKind.neural_engine(), + ) + msg = f"Unknown compute unit kind: {_compute_unit_kind!r}" + raise ValueError(msg) + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Register CLI options.""" + parser.addoption( + "--compute-unit-kind", + choices=list(_COMPUTE_UNIT_KIND_CHOICES), + default=_COMPUTE_UNIT_KIND_DEFAULT, + help=( + "Compute unit used by validate_numerical_output:\n" + " interpreter (default) - bundled runtime (USE_LOCAL_COREAI=1)\n" + " cpu - SpecializationOptions.cpu_only() (BNNS)\n" + " gpu - preferred ComputeUnitKind.gpu() (MPSGraph)\n" + " neural_engine - preferred ComputeUnitKind.neural_engine()\n" + "Anything other than 'interpreter' unsets USE_LOCAL_COREAI so the OS\n" + "runtime is used." + ), + ) + parser.addoption( + "--dump-optests", + action="store_true", + default=False, + help="Trigger optest dumping", + ) + + +def pytest_configure(config: pytest.Config) -> None: + global _dump_optests, _compute_unit_kind + _dump_optests = config.getoption("--dump-optests") + _compute_unit_kind = config.getoption("--compute-unit-kind") + if _compute_unit_kind == "interpreter": + os.environ.setdefault("USE_LOCAL_COREAI", "1") + else: + os.environ.pop("USE_LOCAL_COREAI", None) + + +def optest_dump_path(test_id: str) -> Path: + """Map a pytest nodeid to ``op_tests//``. + + Example: ``python/tests/test_model_units/test_primitives/test_macos/test_rope.py::TestRoPE::test_rope[Llama3RoPE-f32]`` + -> ``op_tests/test_model_units/test_primitives/test_macos/test_rope/TestRoPE_test_rope-params-Llama3RoPE-f32`` + """ + raw = test_id + for prefix in ("python/tests/", "tests/"): + if raw.startswith(prefix): + raw = raw[len(prefix) :] + break + dir_part, test = raw.split(".py", maxsplit=1) + test = test.removeprefix("::").replace("::", "_").replace("[", "-params-").replace("]", "") + return Path(f"op_tests/{dir_part}") / test + + +def _add_npz_entry(io_numpy: dict[str, np.ndarray], key: str, arr: np.ndarray) -> None: + """Add an array to the npz, emitting a ``_dtype_='bf16'`` companion if void16.""" + io_numpy[key] = arr + if arr.dtype.str == "|V2": + io_numpy[f"_dtype_{key}"] = np.array("bf16") + + +def _dump_optest_artifacts( + coreai_program: Any, + inputs: dict[str, NDArray], + rt_outputs: dict[str, NDArray], + dump_path: Path, +) -> None: + """Write a `.aimodel` + `_test_data.npz` pair. + + Format: aimodel prefix == npz prefix == dump_path + leaf name; npz holds an ``op_name`` scalar plus ``input_`` / + ``output_`` keys. + """ + testname = dump_path.name + coreai_program.save_asset(dump_path / f"{testname}.aimodel") + + io_numpy: dict[str, np.ndarray] = {"op_name": np.array("main")} + for name, arr in inputs.items(): + _add_npz_entry(io_numpy, f"input_{name}", arr.numpy()) + for name, arr in rt_outputs.items(): + _add_npz_entry(io_numpy, f"output_{name}", arr.numpy()) + np.savez(dump_path / f"{testname}_test_data.npz", **io_numpy)