diff --git a/demos/README.md b/demos/README.md index e7f610a756..9e36ba5072 100644 --- a/demos/README.md +++ b/demos/README.md @@ -21,3 +21,13 @@ python3 demos/check_ckpt_for_gelu_shift.py \ `adam_vs_adamw.sh` trains two tiny Shakespeare models, one with Adam and one with AdamW, then compares their statistics using `view_model_stats.py`. + +## ExecuTorch export + +Use `export_ckpt_to_executorch.sh` to convert a training checkpoint into an ExecuTorch `.pte` program. + +```bash +./demos/export_ckpt_to_executorch.sh out/ckpt.pt +``` + +Pass a second argument to control the output path or forward additional options to the Python exporter. diff --git a/demos/export_ckpt_to_executorch.sh b/demos/export_ckpt_to_executorch.sh new file mode 100755 index 0000000000..eb03ab1876 --- /dev/null +++ b/demos/export_ckpt_to_executorch.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ ${1:-} == "" ]]; then + echo "Usage: $0 [pte-path]" + exit 1 +fi + +CKPT_PATH=$1 +PTE_PATH=${2:-} + +if [[ -n "$PTE_PATH" ]]; then + python -m model_exports.executorch.export_checkpoint --ckpt "$CKPT_PATH" --pte-path "$PTE_PATH" "${@:3}" +else + python -m model_exports.executorch.export_checkpoint --ckpt "$CKPT_PATH" "${@:2}" +fi diff --git a/hardware_targets/README.md b/hardware_targets/README.md new file mode 100644 index 0000000000..32f3853822 --- /dev/null +++ b/hardware_targets/README.md @@ -0,0 +1,7 @@ +# Hardware profiling targets + +This directory contains automation helpers for running exported ExecuTorch programs on specific devices. + +## Android + +Use `android/profile_pte.py` to stage a runner and `.pte` file onto an attached device via `adb`, invoke the runner, and parse energy/latency metrics emitted between `EXECUTORCH_METRICS_BEGIN` and `EXECUTORCH_METRICS_END` markers. diff --git a/hardware_targets/android/profile_pte.py b/hardware_targets/android/profile_pte.py new file mode 100644 index 0000000000..91494a1842 --- /dev/null +++ b/hardware_targets/android/profile_pte.py @@ -0,0 +1,139 @@ +"""Utility for profiling ExecuTorch `.pte` programs on Android devices via `adb`.""" + +from __future__ import annotations + +import argparse +import json +import re +import shlex +import subprocess +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, Optional + +METRICS_BEGIN = "EXECUTORCH_METRICS_BEGIN" +METRICS_END = "EXECUTORCH_METRICS_END" + + +@dataclass(slots=True) +class MetricsSummary: + phase: str + tokens: int + latency_ms: float + energy_mj: float + + @property + def latency_per_token_ms(self) -> float: + return self.latency_ms / max(self.tokens, 1) + + @property + def energy_per_token_mj(self) -> float: + return self.energy_mj / max(self.tokens, 1) + + +def _adb_cmd(args: list[str], serial: Optional[str] = None, **kwargs: Any) -> subprocess.CompletedProcess[str]: + base = ["adb"] + if serial: + base += ["-s", serial] + result = subprocess.run(base + args, check=True, capture_output=True, text=True, **kwargs) + return result + + +def _extract_metrics(stdout: str) -> Dict[str, MetricsSummary]: + pattern = re.compile(rf"{METRICS_BEGIN}(.*?){METRICS_END}", re.DOTALL) + match = pattern.search(stdout) + if not match: + return {} + payload = match.group(1).strip() + data = json.loads(payload) + summaries: Dict[str, MetricsSummary] = {} + for phase, values in data.items(): + summaries[phase] = MetricsSummary( + phase=phase, + tokens=int(values.get("tokens", 0)), + latency_ms=float(values.get("latency_ms", 0.0)), + energy_mj=float(values.get("energy_mj", 0.0)), + ) + return summaries + + +def _format_summary(summary: MetricsSummary) -> str: + return ( + f"{summary.phase}: tokens={summary.tokens} " + f"latency={summary.latency_ms:.2f}ms (per token {summary.latency_per_token_ms:.2f}ms) " + f"energy={summary.energy_mj:.3f}mJ (per token {summary.energy_per_token_mj:.3f}mJ)" + ) + + +def profile(args: argparse.Namespace) -> None: + remote_dir = Path(args.remote_dir) + remote_dir_str = str(remote_dir) + remote_runner = remote_dir / Path(args.runner).name + remote_pte = remote_dir / Path(args.pte).name + + print(f"[INFO] Pushing runner to {remote_runner}") + _adb_cmd(["push", args.runner, str(remote_runner)], serial=args.serial) + print(f"[INFO] Pushing PTE to {remote_pte}") + _adb_cmd(["push", args.pte, str(remote_pte)], serial=args.serial) + + prompt = args.prompt or "Hello world!" + runner_invocation = ( + f"cd {shlex.quote(remote_dir_str)} && " + f"chmod +x {shlex.quote(remote_runner.name)} && " + f"echo {shlex.quote(prompt)} | " + f"{shlex.quote('./' + remote_runner.name)}" + ) + + print(f"[INFO] Launching runner via adb shell: {runner_invocation}") + result = _adb_cmd(["shell", runner_invocation], serial=args.serial) + stdout = result.stdout + if stdout: + print("[DEVICE OUTPUT]") + print(stdout) + + summaries = _extract_metrics(stdout) + if not summaries: + print( + "[WARN] No ExecuTorch metrics detected. Ensure the runner prints JSON between " + f"{METRICS_BEGIN} and {METRICS_END}." + ) + return + + print("[INFO] Parsed metrics:") + for summary in summaries.values(): + print(" " + _format_summary(summary)) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--runner", required=True, help="Path to the compiled ExecuTorch runner binary.") + parser.add_argument("--pte", required=True, help="Path to the exported ExecuTorch .pte program.") + parser.add_argument( + "--remote-dir", + default="/data/local/tmp/nanogpt", + help="Directory on the device where artifacts will be staged.", + ) + parser.add_argument( + "--prompt", + help="Prompt text to feed into the runner. Defaults to 'Hello world!'.", + ) + parser.add_argument( + "--serial", + help="Optional adb serial number when multiple devices are connected.", + ) + return parser.parse_args() + + +def main() -> None: + args = parse_args() + try: + profile(args) + except FileNotFoundError as exc: + print(f"[ERROR] Failed to invoke external tool: {exc}") + except subprocess.CalledProcessError as exc: + print("[ERROR] adb command failed:") + print(exc.stderr) + + +if __name__ == "__main__": + main() diff --git a/hyperparam_search.py b/hyperparam_search.py index e738868dc4..f4e479762f 100644 --- a/hyperparam_search.py +++ b/hyperparam_search.py @@ -21,6 +21,7 @@ import os import subprocess import sys +from dataclasses import dataclass from contextlib import contextmanager import re from copy import deepcopy @@ -34,6 +35,56 @@ import ast +# ExecuTorch export settings +@dataclass(slots=True) +class ExecuTorchExportOptions: + enabled: bool = False + delegate: str = 'none' + smoke_test_tokens: int = 0 + smoke_test_prompt: str | None = None + tokenizer_vocab: Path | None = None + max_output_tokens: int = 32 + + +def maybe_export_executorch(ckpt_dir: Path, run_label: str, options: ExecuTorchExportOptions) -> None: + if not options.enabled: + return + + ckpt_path = ckpt_dir / 'ckpt.pt' + if not ckpt_path.exists(): + print(f"[WARN] ExecuTorch export skipped (missing {ckpt_path}).") + return + + try: + from model_exports.executorch.exporter import ExportConfig, export_checkpoint_to_pte + except ImportError as exc: + print(f"[WARN] ExecuTorch export unavailable: {exc}") + return + + export_dir = ckpt_path.parent / 'executorch' + export_dir.mkdir(parents=True, exist_ok=True) + safe_name = re.sub(r'[^A-Za-z0-9._-]+', '_', run_label).strip('_') or 'model' + pte_path = export_dir / f"{safe_name}.pte" + + config = ExportConfig( + delegate=options.delegate, + generate_etrecord=False, + smoke_test_tokens=max(0, options.smoke_test_tokens), + smoke_test_prompt=options.smoke_test_prompt, + tokenizer_path=options.tokenizer_vocab, + max_output_tokens=options.max_output_tokens, + metadata=True, + ) + + try: + export_checkpoint_to_pte(ckpt_path, pte_path, config) + print(f"[INFO] ExecuTorch export ready: {pte_path}") + except ImportError as exc: + print(f"[WARN] ExecuTorch export failed (missing dependency): {exc}") + except Exception as exc: + print(f"[ERROR] ExecuTorch export failed for {run_label}: {exc}") + + # ───────────────────────── helpers ────────────────────────── def dict_to_cli(d: Dict[str, Any]) -> List[str]: """ @@ -70,7 +121,11 @@ def patched_argv(argv: List[str]): sys.argv = old -def run_trial_inproc(cfg: Dict[str, Any]) -> Tuple[float, float, int, float, float]: +def run_trial_inproc( + cfg: Dict[str, Any], + export_options: ExecuTorchExportOptions, + run_label: str, +) -> Tuple[float, float, int, float, float]: """Return (best_val_loss, num_params, best_iter, peak_gpu_mb, iter_latency_ms).""" from train import Trainer from train_args import parse_args as parse_train_args @@ -85,13 +140,18 @@ def run_trial_inproc(cfg: Dict[str, Any]) -> Tuple[float, float, int, float, flo best_iter = int(getattr(tr, "iter_num_best_val_loss", 0)) peak_gpu_mb = float(getattr(tr, "peak_gpu_usage", 0.0) / (1024 ** 2)) iter_latency_ms = float(getattr(tr, "iter_latency_avg", 0.0)) + maybe_export_executorch(Path(cfg.get("out_dir", "out")), run_label, export_options) del tr torch.cuda.empty_cache() gc.collect() return loss, nparam, best_iter, peak_gpu_mb, iter_latency_ms -def run_trial_subproc(cfg: Dict[str, Any]) -> Tuple[float, float, int, float, float]: +def run_trial_subproc( + cfg: Dict[str, Any], + export_options: ExecuTorchExportOptions, + run_label: str, +) -> Tuple[float, float, int, float, float]: script_dir = Path(__file__).parent cmd = [sys.executable, str(script_dir / "train.py")] + dict_to_cli(cfg) env = {k: v for k, v in os.environ.items() if k not in {"RANK", "WORLD_SIZE"}} @@ -101,12 +161,13 @@ def run_trial_subproc(cfg: Dict[str, Any]) -> Tuple[float, float, int, float, fl raise RuntimeError("train.py failed") out_dir = Path(cfg.get("out_dir", "out")) - line = (out_dir / "best_val_loss_and_iter.txt").read_text().strip().split(",") + line = (out_dir / "best_val_loss_and_iter.txt").read_text().strip().split(',') loss = float(line[0]) best_iter = int(line[1]) nparam = float(line[2]) peak_gpu_mb = float(line[5]) iter_latency_ms = float(line[6]) + maybe_export_executorch(out_dir, run_label, export_options) torch.cuda.empty_cache() gc.collect() return loss, nparam, best_iter, peak_gpu_mb, iter_latency_ms @@ -174,6 +235,45 @@ def main(): "'vram' for peak GPU memory in MB, or 'iter' for average iteration latency in ms." ), ) + ap.add_argument( + "--executorch_export", + dest="executorch_export", + action='store_true', + default=True, + help="Automatically export ExecuTorch programs for each candidate run.", + ) + ap.add_argument( + "--no-executorch-export", + dest="executorch_export", + action='store_false', + help="Disable ExecuTorch exports.", + ) + ap.add_argument( + "--executorch_delegate", + choices=['none', 'xnnpack'], + default='none', + help="Delegate to target when exporting to ExecuTorch.", + ) + ap.add_argument( + "--executorch_smoke_test_tokens", + type=int, + default=0, + help="If >0, run a random-token smoke test after export.", + ) + ap.add_argument( + "--executorch_smoke_test_prompt", + help="Optional prompt to evaluate with the exported program.", + ) + ap.add_argument( + "--executorch_tokenizer_vocab", + help="Path to a vocab.json for ExecuTorch prompt smoke tests.", + ) + ap.add_argument( + "--executorch_max_output_tokens", + type=int, + default=32, + help="Maximum decode tokens when running ExecuTorch smoke tests.", + ) @@ -186,7 +286,21 @@ def main(): sys.exit("--increments length mismatch") inc_map = dict(zip(args.param_names, args.increments)) - run_fn = run_trial_subproc if args.spawn_subprocess else run_trial_inproc + export_options = ExecuTorchExportOptions( + enabled=args.executorch_export, + delegate=args.executorch_delegate, + smoke_test_tokens=max(0, args.executorch_smoke_test_tokens), + smoke_test_prompt=args.executorch_smoke_test_prompt, + tokenizer_vocab=Path(args.executorch_tokenizer_vocab) if args.executorch_tokenizer_vocab else None, + max_output_tokens=args.executorch_max_output_tokens, + ) + + if args.spawn_subprocess: + def run_trial(cfg: Dict[str, Any], label: str) -> Tuple[float, float, int, float, float]: + return run_trial_subproc(cfg, export_options, label) + else: + def run_trial(cfg: Dict[str, Any], label: str) -> Tuple[float, float, int, float, float]: + return run_trial_inproc(cfg, export_options, label) baseline_cfg_master = yaml.safe_load(Path(args.orig_settings).read_text()) log_path = Path(args.results_file) @@ -259,9 +373,9 @@ def _extend_layerlists(cfg: Dict[str, Any], dup_idx: int) -> None: _apply_overrides_to_active_config(baseline_cfg, args.override_cfg, "initial baseline_cfg for new sweep") print("[BASELINE] measuring initial config …") - # run_fn receives a deepcopy of the (potentially overridden) baseline_cfg + # run_trial receives a deepcopy of the (potentially overridden) baseline_cfg - base_loss, base_params, base_best_iter, base_gpu, base_iter_ms = run_fn(deepcopy(baseline_cfg)) + base_loss, base_params, base_best_iter, base_gpu, base_iter_ms = run_trial(deepcopy(baseline_cfg), 'baseline') base_score = 1 / math.exp(base_loss) log["iterations"].append( { @@ -319,7 +433,7 @@ def _evaluate(cfg_template: Dict[str, Any], print(f"[TEST] {label_for_log}={value_for_log} seed={cfg_run['seed']}") try: - loss, nparam, best_it, peak_mb, iter_ms = run_fn(cfg_run) + loss, nparam, best_it, peak_mb, iter_ms = run_trial(cfg_run, f"{label_for_log}-seed{cfg_run['seed']}") except Exception as exc: print(" ⚠", exc) return # discard this candidate diff --git a/model_exports/executorch/__init__.py b/model_exports/executorch/__init__.py new file mode 100644 index 0000000000..5f72ef5b7d --- /dev/null +++ b/model_exports/executorch/__init__.py @@ -0,0 +1,5 @@ +"""ExecuTorch export utilities for nanoGPT checkpoints.""" + +from .exporter import ExportConfig, export_checkpoint_to_pte + +__all__ = ["ExportConfig", "export_checkpoint_to_pte"] diff --git a/model_exports/executorch/export_checkpoint.py b/model_exports/executorch/export_checkpoint.py new file mode 100644 index 0000000000..88a37aedb0 --- /dev/null +++ b/model_exports/executorch/export_checkpoint.py @@ -0,0 +1,86 @@ +"""Command line interface for exporting nanoGPT checkpoints to ExecuTorch.""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +from .exporter import ExportConfig, export_checkpoint_to_pte + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--ckpt", required=True, help="Path to the ckpt.pt file produced by training.") + parser.add_argument( + "--pte-path", + help=( + "Destination for the generated .pte file. If omitted, the exporter writes to " + "/executorch/.pte" + ), + ) + parser.add_argument( + "--delegate", + default="none", + choices=["none", "xnnpack"], + help="ExecuTorch delegate to target during export.", + ) + parser.add_argument( + "--generate-etrecord", + action="store_true", + help="Generate an ETRecord artifact alongside the .pte file.", + ) + parser.add_argument( + "--smoke-test-tokens", + type=int, + default=0, + help="If >0, run a random-token smoke test against the exported program.", + ) + parser.add_argument( + "--smoke-test-prompt", + help="Optional prompt to evaluate with the exported program (requires --tokenizer-vocab).", + ) + parser.add_argument( + "--tokenizer-vocab", + type=Path, + help="Path to a vocab.json file compatible with the model. Required for prompt smoke tests.", + ) + parser.add_argument( + "--max-output-tokens", + type=int, + default=32, + help="Maximum number of tokens to request during smoke tests.", + ) + parser.add_argument( + "--no-metadata", + action="store_true", + help="Skip writing an export metadata JSON alongside the .pte file.", + ) + return parser.parse_args() + + +def main() -> None: + args = _parse_args() + ckpt_path = Path(args.ckpt) + if args.pte_path: + pte_path = Path(args.pte_path) + else: + default_dir = ckpt_path.parent / "executorch" + default_dir.mkdir(parents=True, exist_ok=True) + pte_path = default_dir / f"{ckpt_path.stem}.pte" + + config = ExportConfig( + delegate=args.delegate, + generate_etrecord=args.generate_etrecord, + smoke_test_tokens=args.smoke_test_tokens, + smoke_test_prompt=args.smoke_test_prompt, + tokenizer_path=args.tokenizer_vocab, + max_output_tokens=args.max_output_tokens, + metadata=not args.no_metadata, + ) + + export_checkpoint_to_pte(ckpt_path, pte_path, config) + print(f"[executorch] Exported program written to {pte_path}") + + +if __name__ == "__main__": + main() diff --git a/model_exports/executorch/exporter.py b/model_exports/executorch/exporter.py new file mode 100644 index 0000000000..80ae7bd4ef --- /dev/null +++ b/model_exports/executorch/exporter.py @@ -0,0 +1,304 @@ +"""Utilities to export nanoGPT checkpoints to ExecuTorch ``.pte`` programs.""" + +from __future__ import annotations + +import copy +import json +import os +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +import torch + +from gpt_conf import GPTConfig +from model import GPT + + +@dataclass(slots=True) +class ExportConfig: + """Configuration parameters that control ExecuTorch exports.""" + + delegate: str = "none" + generate_etrecord: bool = False + smoke_test_tokens: int = 0 + smoke_test_prompt: Optional[str] = None + tokenizer_path: Optional[Path] = None + max_output_tokens: int = 32 + metadata: bool = True + + def validate(self) -> None: + if self.delegate not in {"none", "xnnpack"}: + raise ValueError(f"Unsupported delegate '{self.delegate}'.") + if self.smoke_test_tokens < 0: + raise ValueError("smoke_test_tokens must be non-negative.") + if self.max_output_tokens <= 0: + raise ValueError("max_output_tokens must be positive.") + + +def _infer_vocab_size(model_args: dict) -> int: + vocab_size = model_args.get("vocab_size") + if vocab_size is None: + raise ValueError("Model arguments do not include 'vocab_size'.") + return int(vocab_size) + + +def _infer_block_size(model_args: dict) -> int: + block_size = model_args.get("block_size") + if block_size is None: + raise ValueError("Model arguments do not include 'block_size'.") + return int(block_size) + + +def _prepare_state_dict(state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: + """Normalize checkpoint parameter keys so they load with the current model.""" + + prepared = dict(state_dict) + prepared = _strip_module_prefix(prepared) + prepared = _convert_legacy_attention_weights(prepared) + return prepared + + +def _strip_module_prefix(state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: + """Drop common wrapper prefixes (DDP, torch.compile) from parameter names.""" + + if not state_dict: + return state_dict + + prefixes = ( + "module.", + "_orig_mod.", + "_orig_mod.module.", + "module._orig_mod.", + ) + + def strip_prefix( + params: dict[str, torch.Tensor], prefix: str + ) -> dict[str, torch.Tensor]: + if all(key.startswith(prefix) for key in params): + return {key[len(prefix) :]: value for key, value in params.items()} + return params + + prepared = state_dict + for prefix in prefixes: + updated = strip_prefix(prepared, prefix) + if updated is not prepared: + prepared = updated + + return prepared + + +def _convert_legacy_attention_weights( + state_dict: dict[str, torch.Tensor] +) -> dict[str, torch.Tensor]: + """Split combined QKV projections saved by older checkpoints.""" + + legacy_keys = [key for key in state_dict if key.endswith("attn.c_attn.weight")] + if not legacy_keys: + return state_dict + + updated = dict(state_dict) + for weight_key in legacy_keys: + bias_key = weight_key.replace(".weight", ".bias") + + weight = updated.pop(weight_key) + bias = updated.pop(bias_key, None) + + try: + q_weight, k_weight, v_weight = weight.chunk(3, dim=0) + except RuntimeError as err: # pragma: no cover - defensive path + raise RuntimeError( + f"Failed to split legacy attention weights for '{weight_key}': {err}" + ) from err + + base = weight_key.replace("c_attn.weight", "c_attn_") + updated[f"{base}q.weight"] = q_weight + updated[f"{base}k.weight"] = k_weight + updated[f"{base}v.weight"] = v_weight + + if bias is not None: + q_bias, k_bias, v_bias = bias.chunk(3, dim=0) + bias_base = bias_key.replace("c_attn.bias", "c_attn_") + updated[f"{bias_base}q.bias"] = q_bias + updated[f"{bias_base}k.bias"] = k_bias + updated[f"{bias_base}v.bias"] = v_bias + + return updated + + +def export_checkpoint_to_pte( + ckpt_path: os.PathLike | str, + output_path: os.PathLike | str, + export_config: Optional[ExportConfig] = None, +) -> Path: + """Convert ``ckpt.pt`` files generated by nanoGPT into ExecuTorch ``.pte`` files.""" + + export_config = export_config or ExportConfig() + export_config.validate() + + ckpt_path = Path(ckpt_path) + output_path = Path(output_path) + if ckpt_path.suffix == "": + raise ValueError("Checkpoint path must include a filename, not just a directory.") + if not ckpt_path.exists(): + raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}") + output_path.parent.mkdir(parents=True, exist_ok=True) + + checkpoint = torch.load(ckpt_path, map_location="cpu") + if "model" not in checkpoint: + raise KeyError("Checkpoint does not contain 'model' weights.") + if "model_args" not in checkpoint: + raise KeyError("Checkpoint does not contain 'model_args'.") + + model_args = checkpoint["model_args"] + gptconf = GPTConfig(**model_args) + model = GPT(gptconf) + + state_dict = _prepare_state_dict(checkpoint["model"]) + missing, unexpected = model.load_state_dict(state_dict, strict=False) + if missing or unexpected: + raise RuntimeError( + "Checkpoint parameters do not match the GPT architecture. " + f"Missing keys: {sorted(missing)}; Unexpected keys: {sorted(unexpected)}" + ) + model.eval() + + vocab_size = _infer_vocab_size(model_args) + block_size = _infer_block_size(model_args) + + example_inputs = ( + torch.randint(0, vocab_size, (1, block_size), dtype=torch.long), + ) + dynamic_shape = ( + {1: torch.export.Dim("token_dim", max=block_size)}, + ) + + from torch.nn.attention import SDPBackend, sdpa_kernel + from torch.export import export, export_for_training + + with sdpa_kernel([SDPBackend.MATH]), torch.no_grad(): + training_program = export_for_training( + model, example_inputs, dynamic_shapes=dynamic_shape + ).module() + traced_program = export(training_program, example_inputs, dynamic_shapes=dynamic_shape) + + if export_config.delegate == "xnnpack": + from executorch.backends.xnnpack.partition.xnnpack_partitioner import ( + XnnpackPartitioner, + ) + from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config + from executorch.exir import to_edge_transform_and_lower + + edge_config = get_xnnpack_edge_compile_config() + edge_manager = to_edge_transform_and_lower( + traced_program, + partitioner=[XnnpackPartitioner()], + compile_config=edge_config, + ) + else: + from executorch.exir import EdgeCompileConfig, to_edge + + edge_config = EdgeCompileConfig(_check_ir_validity=False) + edge_manager = to_edge(traced_program, compile_config=edge_config) + + edge_manager_copy = copy.deepcopy(edge_manager) if export_config.generate_etrecord else None + et_program = edge_manager.to_executorch() + + output_path.write_bytes(et_program.buffer) + + if export_config.generate_etrecord and edge_manager_copy is not None: + from executorch.devtools import generate_etrecord + + etrecord_path = output_path.with_suffix(output_path.suffix + ".etrecord") + generate_etrecord(str(etrecord_path), edge_manager_copy, et_program) + + if export_config.metadata: + metadata_path = output_path.with_suffix(output_path.suffix + ".json") + metadata = { + "checkpoint": str(ckpt_path.resolve()), + "pte": str(output_path.resolve()), + "delegate": export_config.delegate, + "generate_etrecord": export_config.generate_etrecord, + "vocab_size": vocab_size, + "block_size": block_size, + "max_output_tokens": export_config.max_output_tokens, + } + metadata_path.write_text(json.dumps(metadata, indent=2)) + + if export_config.smoke_test_tokens: + _smoke_test_export( + output_path, + block_size=block_size, + vocab_size=vocab_size, + num_tokens=export_config.smoke_test_tokens, + ) + + if export_config.smoke_test_prompt: + _smoke_test_prompt( + output_path, + prompt=export_config.smoke_test_prompt, + max_input_length=block_size, + tokenizer_path=export_config.tokenizer_path, + max_output_tokens=export_config.max_output_tokens, + ) + + return output_path + + +def _smoke_test_export( + pte_path: Path, *, block_size: int, vocab_size: int, num_tokens: int +) -> None: + import numpy as np + + from executorch.extension.module import Module + from executorch.extension.tensor import from_numpy + + module = Module(str(pte_path)) + max_tokens = min(block_size, max(1, num_tokens)) + tokens = np.random.randint( + 0, + max(vocab_size, 1), + size=(1, max_tokens), + dtype=np.int64, + ) + inputs = from_numpy(tokens) + module.forward(inputs) + + +def _smoke_test_prompt( + pte_path: Path, + prompt: str, + max_input_length: int, + tokenizer_path: Optional[Path], + max_output_tokens: int, +) -> None: + if tokenizer_path is None: + raise ValueError("A tokenizer vocabulary is required to run prompt-based smoke tests.") + + import json as json_lib + + import numpy as np + + from executorch.extension.module import Module + from executorch.extension.tensor import from_numpy + + vocab_map = json_lib.loads(Path(tokenizer_path).read_text()) + token_ids: list[int] = [] + if isinstance(vocab_map, dict): + if prompt in vocab_map: + token_ids.append(int(vocab_map[prompt])) + else: + for piece in prompt.split(): + token_ids.append(int(vocab_map.get(piece, 0))) + if not token_ids: + token_ids = [0] + + token_array = np.array([token_ids[:max_input_length]], dtype=np.int64) + _ = max_output_tokens # placeholder to document intent for decode length handling + + module = Module(str(pte_path)) + inputs = from_numpy(token_array) + outputs = module.forward(inputs) + + if not outputs: + raise RuntimeError("ExecuTorch module returned no outputs during smoke test.") diff --git a/optimization_and_search/run_experiments.py b/optimization_and_search/run_experiments.py index 6c0c8632fc..f1181118ab 100644 --- a/optimization_and_search/run_experiments.py +++ b/optimization_and_search/run_experiments.py @@ -5,6 +5,7 @@ from itertools import product import argparse import os +import re from copy import deepcopy import yaml @@ -37,10 +38,10 @@ ] + + def parse_args() -> argparse.Namespace: - """ - Parse command-line arguments. - """ + """Parse command-line arguments.""" parser = argparse.ArgumentParser( description="Run experiments based on a configuration file (JSON or YAML)." ) @@ -80,66 +81,51 @@ def parse_args() -> argparse.Namespace: "to keep run identifiers shorter." ), ) + parser.add_argument( + '--executorch-export', + dest='executorch_export', + action='store_true', + default=True, + help='Automatically export ExecuTorch programs after each run.', + ) + parser.add_argument( + '--no-executorch-export', + dest='executorch_export', + action='store_false', + help='Disable automatic ExecuTorch exports.', + ) + parser.add_argument( + '--executorch-delegate', + choices=['none', 'xnnpack'], + default='none', + help='Delegate to target when exporting to ExecuTorch.', + ) + parser.add_argument( + '--executorch-smoke-test-tokens', + type=int, + default=0, + help='If >0, run a random-token smoke test after export.', + ) + parser.add_argument( + '--executorch-smoke-test-prompt', + help='Optional prompt to evaluate with the exported program.', + ) + parser.add_argument( + '--executorch-tokenizer-vocab', + help='Path to a vocab.json for ExecuTorch prompt smoke tests.', + ) + parser.add_argument( + '--executorch-max-output-tokens', + type=int, + default=32, + help='Maximum decode tokens when running ExecuTorch smoke tests.', + ) return parser.parse_args() -def load_configurations(path: str, fmt: str) -> list[dict]: - """ - Load experiment configurations from a JSON or YAML file. - - Args: - path: File path. - fmt: 'json' or 'yaml'. - - Returns: - A list of configuration dictionaries. - """ - text = Path(path).read_text() - if fmt == 'yaml': - # YAML may contain multiple documents or a single list - loaded = list(yaml.safe_load_all(text)) - # Flatten if outer list-of-lists - if len(loaded) == 1 and isinstance(loaded[0], list): - return loaded[0] - return loaded - else: - return json.loads(text) - - -RUN_NAME_VAR = "${RUN_NAME}" - - -def expand_range(val): - """Expand dicts with 'range' into a list of values.""" - if isinstance(val, dict) and 'range' in val: - r = val['range'] - start, end = r['start'], r['end'] - step = r.get('step', 1 if isinstance(start, int) else 0.1) - if isinstance(start, int): - return list(range(start, end + 1, step)) - count = int(round((end - start) / step)) + 1 - return [start + i * step for i in range(count)] - return val - - -def _substitute_run_name(obj, run_name: str): - """Recursively substitute the run name placeholder inside ``obj``.""" - if isinstance(obj, str): - return obj.replace(RUN_NAME_VAR, run_name) - if isinstance(obj, list): - return [_substitute_run_name(o, run_name) for o in obj] - if isinstance(obj, dict): - return {k: _substitute_run_name(v, run_name) for k, v in obj.items()} - return obj - - -def _ensure_list(value): - if value is None: - return [] - if isinstance(value, list): - return value - return [value] - +def _sanitize_run_name(name: str) -> str: + sanitized = re.sub(r'[^A-Za-z0-9._-]+', '_', name).strip('_') + return sanitized or 'model' def _merge_parameter_groups(existing, new): existing_list = [] @@ -582,6 +568,49 @@ def append_progress(log_file: Path, message: str) -> None: f.write(f"[{timestamp}] {message}\n") +def maybe_export_executorch(run_name: str, out_dir: str, args: argparse.Namespace) -> None: + if not getattr(args, 'executorch_export', False): + return + + ckpt_path = Path(out_dir) / 'ckpt.pt' + if not ckpt_path.exists(): + print(f"[yellow]ExecuTorch export skipped (missing {ckpt_path}).") + return + + try: + from model_exports.executorch.exporter import ExportConfig, export_checkpoint_to_pte + except ImportError as exc: + print(f"[yellow]ExecuTorch export unavailable: {exc}") + return + + export_dir = ckpt_path.parent / 'executorch' + export_dir.mkdir(parents=True, exist_ok=True) + export_name = _sanitize_run_name(run_name) + pte_path = export_dir / f"{export_name}.pte" + + tokenizer_path = getattr(args, 'executorch_tokenizer_vocab', None) + if tokenizer_path: + tokenizer_path = Path(tokenizer_path) + + config = ExportConfig( + delegate=getattr(args, 'executorch_delegate', 'none'), + generate_etrecord=False, + smoke_test_tokens=max(0, getattr(args, 'executorch_smoke_test_tokens', 0)), + smoke_test_prompt=getattr(args, 'executorch_smoke_test_prompt', None), + tokenizer_path=tokenizer_path, + max_output_tokens=getattr(args, 'executorch_max_output_tokens', 32), + metadata=True, + ) + + try: + export_checkpoint_to_pte(ckpt_path, pte_path, config) + print(f"[green]ExecuTorch export complete:[/] {pte_path}") + except ImportError as exc: + print(f"[yellow]ExecuTorch export failed (missing dependency): {exc}") + except Exception as exc: + print(f"[red]ExecuTorch export failed for {run_name}: {exc}") + + def build_command(combo: dict) -> list[str]: """ Construct the command-line invocation for train.py. @@ -654,6 +683,8 @@ def run_experiment( except subprocess.CalledProcessError: print(f"[red]Process exited with error for run:[/] {run_name}") + maybe_export_executorch(run_name, combo['out_dir'], args) + # Read metrics (use existing or nan on failure) try: metrics = read_metrics(str(combo['out_dir']))