diff --git a/demos/README.md b/demos/README.md
index e7f610a756..9e36ba5072 100644
--- a/demos/README.md
+++ b/demos/README.md
@@ -21,3 +21,13 @@ python3 demos/check_ckpt_for_gelu_shift.py \
 
 `adam_vs_adamw.sh` trains two tiny Shakespeare models, one with Adam and one
 with AdamW, then compares their statistics using `view_model_stats.py`.
+
+## ExecuTorch export
+
+Use `export_ckpt_to_executorch.sh` to convert a training checkpoint into an ExecuTorch `.pte` program.
+
+```bash
+./demos/export_ckpt_to_executorch.sh out/ckpt.pt
+```
+
+Pass a second argument to control the output path or forward additional options to the Python exporter.
diff --git a/demos/export_ckpt_to_executorch.sh b/demos/export_ckpt_to_executorch.sh
new file mode 100755
index 0000000000..eb03ab1876
--- /dev/null
+++ b/demos/export_ckpt_to_executorch.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+if [[ ${1:-} == "" ]]; then
+  echo "Usage: $0 <ckpt-path> [pte-path]"
+  exit 1
+fi
+
+CKPT_PATH=$1
+PTE_PATH=${2:-}
+
+if [[ -n "$PTE_PATH" ]]; then
+  python -m model_exports.executorch.export_checkpoint --ckpt "$CKPT_PATH" --pte-path "$PTE_PATH" "${@:3}"
+else
+  python -m model_exports.executorch.export_checkpoint --ckpt "$CKPT_PATH" "${@:2}"
+fi
diff --git a/hardware_targets/README.md b/hardware_targets/README.md
new file mode 100644
index 0000000000..32f3853822
--- /dev/null
+++ b/hardware_targets/README.md
@@ -0,0 +1,7 @@
+# Hardware profiling targets
+
+This directory contains automation helpers for running exported ExecuTorch programs on specific devices.
+
+## Android
+
+Use `android/profile_pte.py` to stage a runner and `.pte` file onto an attached device via `adb`, invoke the runner, and parse energy/latency metrics emitted between `EXECUTORCH_METRICS_BEGIN` and `EXECUTORCH_METRICS_END` markers.
diff --git a/hardware_targets/android/profile_pte.py b/hardware_targets/android/profile_pte.py
new file mode 100644
index 0000000000..91494a1842
--- /dev/null
+++ b/hardware_targets/android/profile_pte.py
@@ -0,0 +1,139 @@
+"""Utility for profiling ExecuTorch `.pte` programs on Android devices via `adb`."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import shlex
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+METRICS_BEGIN = "EXECUTORCH_METRICS_BEGIN"
+METRICS_END = "EXECUTORCH_METRICS_END"
+
+
+@dataclass(slots=True)
+class MetricsSummary:
+    phase: str
+    tokens: int
+    latency_ms: float
+    energy_mj: float
+
+    @property
+    def latency_per_token_ms(self) -> float:
+        return self.latency_ms / max(self.tokens, 1)
+
+    @property
+    def energy_per_token_mj(self) -> float:
+        return self.energy_mj / max(self.tokens, 1)
+
+
+def _adb_cmd(args: list[str], serial: Optional[str] = None, **kwargs: Any) -> subprocess.CompletedProcess[str]:
+    base = ["adb"]
+    if serial:
+        base += ["-s", serial]
+    result = subprocess.run(base + args, check=True, capture_output=True, text=True, **kwargs)
+    return result
+
+
+def _extract_metrics(stdout: str) -> Dict[str, MetricsSummary]:
+    pattern = re.compile(rf"{METRICS_BEGIN}(.*?){METRICS_END}", re.DOTALL)
+    match = pattern.search(stdout)
+    if not match:
+        return {}
+    payload = match.group(1).strip()
+    data = json.loads(payload)
+    summaries: Dict[str, MetricsSummary] = {}
+    for phase, values in data.items():
+        summaries[phase] = MetricsSummary(
+            phase=phase,
+            tokens=int(values.get("tokens", 0)),
+            latency_ms=float(values.get("latency_ms", 0.0)),
+            energy_mj=float(values.get("energy_mj", 0.0)),
+        )
+    return summaries
+
+
+def _format_summary(summary: MetricsSummary) -> str:
+    return (
+        f"{summary.phase}: tokens={summary.tokens} "
+        f"latency={summary.latency_ms:.2f}ms (per token {summary.latency_per_token_ms:.2f}ms) "
+        f"energy={summary.energy_mj:.3f}mJ (per token {summary.energy_per_token_mj:.3f}mJ)"
+    )
+
+
+def profile(args: argparse.Namespace) -> None:
+    remote_dir = Path(args.remote_dir)
+    remote_dir_str = str(remote_dir)
+    remote_runner = remote_dir / Path(args.runner).name
+    remote_pte = remote_dir / Path(args.pte).name
+
+    print(f"[INFO] Pushing runner to {remote_runner}")
+    _adb_cmd(["push", args.runner, str(remote_runner)], serial=args.serial)
+    print(f"[INFO] Pushing PTE to {remote_pte}")
+    _adb_cmd(["push", args.pte, str(remote_pte)], serial=args.serial)
+
+    prompt = args.prompt or "Hello world!"
+    runner_invocation = (
+        f"cd {shlex.quote(remote_dir_str)} && "
+        f"chmod +x {shlex.quote(remote_runner.name)} && "
+        f"echo {shlex.quote(prompt)} | "
+        f"{shlex.quote('./' + remote_runner.name)}"
+    )
+
+    print(f"[INFO] Launching runner via adb shell: {runner_invocation}")
+    result = _adb_cmd(["shell", runner_invocation], serial=args.serial)
+    stdout = result.stdout
+    if stdout:
+        print("[DEVICE OUTPUT]")
+        print(stdout)
+
+    summaries = _extract_metrics(stdout)
+    if not summaries:
+        print(
+            "[WARN] No ExecuTorch metrics detected. Ensure the runner prints JSON between "
+            f"{METRICS_BEGIN} and {METRICS_END}."
+        )
+        return
+
+    print("[INFO] Parsed metrics:")
+    for summary in summaries.values():
+        print("  " + _format_summary(summary))
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--runner", required=True, help="Path to the compiled ExecuTorch runner binary.")
+    parser.add_argument("--pte", required=True, help="Path to the exported ExecuTorch .pte program.")
+    parser.add_argument(
+        "--remote-dir",
+        default="/data/local/tmp/nanogpt",
+        help="Directory on the device where artifacts will be staged.",
+    )
+    parser.add_argument(
+        "--prompt",
+        help="Prompt text to feed into the runner. Defaults to 'Hello world!'.",
+    )
+    parser.add_argument(
+        "--serial",
+        help="Optional adb serial number when multiple devices are connected.",
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+    try:
+        profile(args)
+    except FileNotFoundError as exc:
+        print(f"[ERROR] Failed to invoke external tool: {exc}")
+    except subprocess.CalledProcessError as exc:
+        print("[ERROR] adb command failed:")
+        print(exc.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperparam_search.py b/hyperparam_search.py
index e738868dc4..f4e479762f 100644
--- a/hyperparam_search.py
+++ b/hyperparam_search.py
@@ -21,6 +21,7 @@
 import os
 import subprocess
 import sys
+from dataclasses import dataclass
 from contextlib import contextmanager
 import re
 from copy import deepcopy
@@ -34,6 +35,56 @@
 import ast
 
 
+# ExecuTorch export settings
+@dataclass(slots=True)
+class ExecuTorchExportOptions:
+    enabled: bool = False
+    delegate: str = 'none'
+    smoke_test_tokens: int = 0
+    smoke_test_prompt: str | None = None
+    tokenizer_vocab: Path | None = None
+    max_output_tokens: int = 32
+
+
+def maybe_export_executorch(ckpt_dir: Path, run_label: str, options: ExecuTorchExportOptions) -> None:
+    if not options.enabled:
+        return
+
+    ckpt_path = ckpt_dir / 'ckpt.pt'
+    if not ckpt_path.exists():
+        print(f"[WARN] ExecuTorch export skipped (missing {ckpt_path}).")
+        return
+
+    try:
+        from model_exports.executorch.exporter import ExportConfig, export_checkpoint_to_pte
+    except ImportError as exc:
+        print(f"[WARN] ExecuTorch export unavailable: {exc}")
+        return
+
+    export_dir = ckpt_path.parent / 'executorch'
+    export_dir.mkdir(parents=True, exist_ok=True)
+    safe_name = re.sub(r'[^A-Za-z0-9._-]+', '_', run_label).strip('_') or 'model'
+    pte_path = export_dir / f"{safe_name}.pte"
+
+    config = ExportConfig(
+        delegate=options.delegate,
+        generate_etrecord=False,
+        smoke_test_tokens=max(0, options.smoke_test_tokens),
+        smoke_test_prompt=options.smoke_test_prompt,
+        tokenizer_path=options.tokenizer_vocab,
+        max_output_tokens=options.max_output_tokens,
+        metadata=True,
+    )
+
+    try:
+        export_checkpoint_to_pte(ckpt_path, pte_path, config)
+        print(f"[INFO] ExecuTorch export ready: {pte_path}")
+    except ImportError as exc:
+        print(f"[WARN] ExecuTorch export failed (missing dependency): {exc}")
+    except Exception as exc:
+        print(f"[ERROR] ExecuTorch export failed for {run_label}: {exc}")
+
+
 # ───────────────────────── helpers ──────────────────────────
 def dict_to_cli(d: Dict[str, Any]) -> List[str]:
     """
@@ -70,7 +121,11 @@ def patched_argv(argv: List[str]):
         sys.argv = old
 
 
-def run_trial_inproc(cfg: Dict[str, Any]) -> Tuple[float, float, int, float, float]:
+def run_trial_inproc(
+    cfg: Dict[str, Any],
+    export_options: ExecuTorchExportOptions,
+    run_label: str,
+) -> Tuple[float, float, int, float, float]:
     """Return (best_val_loss, num_params, best_iter, peak_gpu_mb, iter_latency_ms)."""
     from train import Trainer
     from train_args import parse_args as parse_train_args
@@ -85,13 +140,18 @@ def run_trial_inproc(cfg: Dict[str, Any]) -> Tuple[float, float, int, float, flo
     best_iter = int(getattr(tr, "iter_num_best_val_loss", 0))
     peak_gpu_mb = float(getattr(tr, "peak_gpu_usage", 0.0) / (1024 ** 2))
     iter_latency_ms = float(getattr(tr, "iter_latency_avg", 0.0))
+    maybe_export_executorch(Path(cfg.get("out_dir", "out")), run_label, export_options)
     del tr
     torch.cuda.empty_cache()
     gc.collect()
     return loss, nparam, best_iter, peak_gpu_mb, iter_latency_ms
 
 
-def run_trial_subproc(cfg: Dict[str, Any]) -> Tuple[float, float, int, float, float]:
+def run_trial_subproc(
+    cfg: Dict[str, Any],
+    export_options: ExecuTorchExportOptions,
+    run_label: str,
+) -> Tuple[float, float, int, float, float]:
     script_dir = Path(__file__).parent
     cmd = [sys.executable, str(script_dir / "train.py")] + dict_to_cli(cfg)
     env = {k: v for k, v in os.environ.items() if k not in {"RANK", "WORLD_SIZE"}}
@@ -101,12 +161,13 @@ def run_trial_subproc(cfg: Dict[str, Any]) -> Tuple[float, float, int, float, fl
         raise RuntimeError("train.py failed")
 
     out_dir = Path(cfg.get("out_dir", "out"))
-    line = (out_dir / "best_val_loss_and_iter.txt").read_text().strip().split(",")
+    line = (out_dir / "best_val_loss_and_iter.txt").read_text().strip().split(',')
     loss = float(line[0])
     best_iter = int(line[1])
     nparam = float(line[2])
     peak_gpu_mb = float(line[5])
     iter_latency_ms = float(line[6])
+    maybe_export_executorch(out_dir, run_label, export_options)
     torch.cuda.empty_cache()
     gc.collect()
     return loss, nparam, best_iter, peak_gpu_mb, iter_latency_ms
@@ -174,6 +235,45 @@ def main():
             "'vram' for peak GPU memory in MB, or 'iter' for average iteration latency in ms."
         ),
     )
+    ap.add_argument(
+        "--executorch_export",
+        dest="executorch_export",
+        action='store_true',
+        default=True,
+        help="Automatically export ExecuTorch programs for each candidate run.",
+    )
+    ap.add_argument(
+        "--no-executorch-export",
+        dest="executorch_export",
+        action='store_false',
+        help="Disable ExecuTorch exports.",
+    )
+    ap.add_argument(
+        "--executorch_delegate",
+        choices=['none', 'xnnpack'],
+        default='none',
+        help="Delegate to target when exporting to ExecuTorch.",
+    )
+    ap.add_argument(
+        "--executorch_smoke_test_tokens",
+        type=int,
+        default=0,
+        help="If >0, run a random-token smoke test after export.",
+    )
+    ap.add_argument(
+        "--executorch_smoke_test_prompt",
+        help="Optional prompt to evaluate with the exported program.",
+    )
+    ap.add_argument(
+        "--executorch_tokenizer_vocab",
+        help="Path to a vocab.json for ExecuTorch prompt smoke tests.",
+    )
+    ap.add_argument(
+        "--executorch_max_output_tokens",
+        type=int,
+        default=32,
+        help="Maximum decode tokens when running ExecuTorch smoke tests.",
+    )
 
 
 
@@ -186,7 +286,21 @@ def main():
         sys.exit("--increments length mismatch")
 
     inc_map = dict(zip(args.param_names, args.increments))
-    run_fn = run_trial_subproc if args.spawn_subprocess else run_trial_inproc
+    export_options = ExecuTorchExportOptions(
+        enabled=args.executorch_export,
+        delegate=args.executorch_delegate,
+        smoke_test_tokens=max(0, args.executorch_smoke_test_tokens),
+        smoke_test_prompt=args.executorch_smoke_test_prompt,
+        tokenizer_vocab=Path(args.executorch_tokenizer_vocab) if args.executorch_tokenizer_vocab else None,
+        max_output_tokens=args.executorch_max_output_tokens,
+    )
+
+    if args.spawn_subprocess:
+        def run_trial(cfg: Dict[str, Any], label: str) -> Tuple[float, float, int, float, float]:
+            return run_trial_subproc(cfg, export_options, label)
+    else:
+        def run_trial(cfg: Dict[str, Any], label: str) -> Tuple[float, float, int, float, float]:
+            return run_trial_inproc(cfg, export_options, label)
 
     baseline_cfg_master = yaml.safe_load(Path(args.orig_settings).read_text())
     log_path = Path(args.results_file)
@@ -259,9 +373,9 @@ def _extend_layerlists(cfg: Dict[str, Any], dup_idx: int) -> None:
         _apply_overrides_to_active_config(baseline_cfg, args.override_cfg, "initial baseline_cfg for new sweep")
 
         print("[BASELINE] measuring initial config …")
-        # run_fn receives a deepcopy of the (potentially overridden) baseline_cfg
+        # run_trial receives a deepcopy of the (potentially overridden) baseline_cfg
 
-        base_loss, base_params, base_best_iter, base_gpu, base_iter_ms = run_fn(deepcopy(baseline_cfg))
+        base_loss, base_params, base_best_iter, base_gpu, base_iter_ms = run_trial(deepcopy(baseline_cfg), 'baseline')
         base_score = 1 / math.exp(base_loss)
         log["iterations"].append(
             {
@@ -319,7 +433,7 @@ def _evaluate(cfg_template: Dict[str, Any],
 
                     print(f"[TEST] {label_for_log}={value_for_log}  seed={cfg_run['seed']}")
                     try:
-                        loss, nparam, best_it, peak_mb, iter_ms = run_fn(cfg_run)
+                        loss, nparam, best_it, peak_mb, iter_ms = run_trial(cfg_run, f"{label_for_log}-seed{cfg_run['seed']}")
                     except Exception as exc:
                         print("   ⚠", exc)
                         return                                      # discard this candidate
diff --git a/model_exports/executorch/__init__.py b/model_exports/executorch/__init__.py
new file mode 100644
index 0000000000..5f72ef5b7d
--- /dev/null
+++ b/model_exports/executorch/__init__.py
@@ -0,0 +1,5 @@
+"""ExecuTorch export utilities for nanoGPT checkpoints."""
+
+from .exporter import ExportConfig, export_checkpoint_to_pte
+
+__all__ = ["ExportConfig", "export_checkpoint_to_pte"]
diff --git a/model_exports/executorch/export_checkpoint.py b/model_exports/executorch/export_checkpoint.py
new file mode 100644
index 0000000000..88a37aedb0
--- /dev/null
+++ b/model_exports/executorch/export_checkpoint.py
@@ -0,0 +1,86 @@
+"""Command line interface for exporting nanoGPT checkpoints to ExecuTorch."""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from .exporter import ExportConfig, export_checkpoint_to_pte
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--ckpt", required=True, help="Path to the ckpt.pt file produced by training.")
+    parser.add_argument(
+        "--pte-path",
+        help=(
+            "Destination for the generated .pte file. If omitted, the exporter writes to "
+            "<ckpt_dir>/executorch/<ckpt_stem>.pte"
+        ),
+    )
+    parser.add_argument(
+        "--delegate",
+        default="none",
+        choices=["none", "xnnpack"],
+        help="ExecuTorch delegate to target during export.",
+    )
+    parser.add_argument(
+        "--generate-etrecord",
+        action="store_true",
+        help="Generate an ETRecord artifact alongside the .pte file.",
+    )
+    parser.add_argument(
+        "--smoke-test-tokens",
+        type=int,
+        default=0,
+        help="If >0, run a random-token smoke test against the exported program.",
+    )
+    parser.add_argument(
+        "--smoke-test-prompt",
+        help="Optional prompt to evaluate with the exported program (requires --tokenizer-vocab).",
+    )
+    parser.add_argument(
+        "--tokenizer-vocab",
+        type=Path,
+        help="Path to a vocab.json file compatible with the model. Required for prompt smoke tests.",
+    )
+    parser.add_argument(
+        "--max-output-tokens",
+        type=int,
+        default=32,
+        help="Maximum number of tokens to request during smoke tests.",
+    )
+    parser.add_argument(
+        "--no-metadata",
+        action="store_true",
+        help="Skip writing an export metadata JSON alongside the .pte file.",
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = _parse_args()
+    ckpt_path = Path(args.ckpt)
+    if args.pte_path:
+        pte_path = Path(args.pte_path)
+    else:
+        default_dir = ckpt_path.parent / "executorch"
+        default_dir.mkdir(parents=True, exist_ok=True)
+        pte_path = default_dir / f"{ckpt_path.stem}.pte"
+
+    config = ExportConfig(
+        delegate=args.delegate,
+        generate_etrecord=args.generate_etrecord,
+        smoke_test_tokens=args.smoke_test_tokens,
+        smoke_test_prompt=args.smoke_test_prompt,
+        tokenizer_path=args.tokenizer_vocab,
+        max_output_tokens=args.max_output_tokens,
+        metadata=not args.no_metadata,
+    )
+
+    export_checkpoint_to_pte(ckpt_path, pte_path, config)
+    print(f"[executorch] Exported program written to {pte_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/model_exports/executorch/exporter.py b/model_exports/executorch/exporter.py
new file mode 100644
index 0000000000..80ae7bd4ef
--- /dev/null
+++ b/model_exports/executorch/exporter.py
@@ -0,0 +1,304 @@
+"""Utilities to export nanoGPT checkpoints to ExecuTorch ``.pte`` programs."""
+
+from __future__ import annotations
+
+import copy
+import json
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+
+import torch
+
+from gpt_conf import GPTConfig
+from model import GPT
+
+
+@dataclass(slots=True)
+class ExportConfig:
+    """Configuration parameters that control ExecuTorch exports."""
+
+    delegate: str = "none"
+    generate_etrecord: bool = False
+    smoke_test_tokens: int = 0
+    smoke_test_prompt: Optional[str] = None
+    tokenizer_path: Optional[Path] = None
+    max_output_tokens: int = 32
+    metadata: bool = True
+
+    def validate(self) -> None:
+        if self.delegate not in {"none", "xnnpack"}:
+            raise ValueError(f"Unsupported delegate '{self.delegate}'.")
+        if self.smoke_test_tokens < 0:
+            raise ValueError("smoke_test_tokens must be non-negative.")
+        if self.max_output_tokens <= 0:
+            raise ValueError("max_output_tokens must be positive.")
+
+
+def _infer_vocab_size(model_args: dict) -> int:
+    vocab_size = model_args.get("vocab_size")
+    if vocab_size is None:
+        raise ValueError("Model arguments do not include 'vocab_size'.")
+    return int(vocab_size)
+
+
+def _infer_block_size(model_args: dict) -> int:
+    block_size = model_args.get("block_size")
+    if block_size is None:
+        raise ValueError("Model arguments do not include 'block_size'.")
+    return int(block_size)
+
+
+def _prepare_state_dict(state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
+    """Normalize checkpoint parameter keys so they load with the current model."""
+
+    prepared = dict(state_dict)
+    prepared = _strip_module_prefix(prepared)
+    prepared = _convert_legacy_attention_weights(prepared)
+    return prepared
+
+
+def _strip_module_prefix(state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
+    """Drop common wrapper prefixes (DDP, torch.compile) from parameter names."""
+
+    if not state_dict:
+        return state_dict
+
+    prefixes = (
+        "module.",
+        "_orig_mod.",
+        "_orig_mod.module.",
+        "module._orig_mod.",
+    )
+
+    def strip_prefix(
+        params: dict[str, torch.Tensor], prefix: str
+    ) -> dict[str, torch.Tensor]:
+        if all(key.startswith(prefix) for key in params):
+            return {key[len(prefix) :]: value for key, value in params.items()}
+        return params
+
+    prepared = state_dict
+    for prefix in prefixes:
+        updated = strip_prefix(prepared, prefix)
+        if updated is not prepared:
+            prepared = updated
+
+    return prepared
+
+
+def _convert_legacy_attention_weights(
+    state_dict: dict[str, torch.Tensor]
+) -> dict[str, torch.Tensor]:
+    """Split combined QKV projections saved by older checkpoints."""
+
+    legacy_keys = [key for key in state_dict if key.endswith("attn.c_attn.weight")]
+    if not legacy_keys:
+        return state_dict
+
+    updated = dict(state_dict)
+    for weight_key in legacy_keys:
+        bias_key = weight_key.replace(".weight", ".bias")
+
+        weight = updated.pop(weight_key)
+        bias = updated.pop(bias_key, None)
+
+        try:
+            q_weight, k_weight, v_weight = weight.chunk(3, dim=0)
+        except RuntimeError as err:  # pragma: no cover - defensive path
+            raise RuntimeError(
+                f"Failed to split legacy attention weights for '{weight_key}': {err}"
+            ) from err
+
+        base = weight_key.replace("c_attn.weight", "c_attn_")
+        updated[f"{base}q.weight"] = q_weight
+        updated[f"{base}k.weight"] = k_weight
+        updated[f"{base}v.weight"] = v_weight
+
+        if bias is not None:
+            q_bias, k_bias, v_bias = bias.chunk(3, dim=0)
+            bias_base = bias_key.replace("c_attn.bias", "c_attn_")
+            updated[f"{bias_base}q.bias"] = q_bias
+            updated[f"{bias_base}k.bias"] = k_bias
+            updated[f"{bias_base}v.bias"] = v_bias
+
+    return updated
+
+
+def export_checkpoint_to_pte(
+    ckpt_path: os.PathLike | str,
+    output_path: os.PathLike | str,
+    export_config: Optional[ExportConfig] = None,
+) -> Path:
+    """Convert ``ckpt.pt`` files generated by nanoGPT into ExecuTorch ``.pte`` files."""
+
+    export_config = export_config or ExportConfig()
+    export_config.validate()
+
+    ckpt_path = Path(ckpt_path)
+    output_path = Path(output_path)
+    if ckpt_path.suffix == "":
+        raise ValueError("Checkpoint path must include a filename, not just a directory.")
+    if not ckpt_path.exists():
+        raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}")
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    checkpoint = torch.load(ckpt_path, map_location="cpu")
+    if "model" not in checkpoint:
+        raise KeyError("Checkpoint does not contain 'model' weights.")
+    if "model_args" not in checkpoint:
+        raise KeyError("Checkpoint does not contain 'model_args'.")
+
+    model_args = checkpoint["model_args"]
+    gptconf = GPTConfig(**model_args)
+    model = GPT(gptconf)
+
+    state_dict = _prepare_state_dict(checkpoint["model"])
+    missing, unexpected = model.load_state_dict(state_dict, strict=False)
+    if missing or unexpected:
+        raise RuntimeError(
+            "Checkpoint parameters do not match the GPT architecture. "
+            f"Missing keys: {sorted(missing)}; Unexpected keys: {sorted(unexpected)}"
+        )
+    model.eval()
+
+    vocab_size = _infer_vocab_size(model_args)
+    block_size = _infer_block_size(model_args)
+
+    example_inputs = (
+        torch.randint(0, vocab_size, (1, block_size), dtype=torch.long),
+    )
+    dynamic_shape = (
+        {1: torch.export.Dim("token_dim", max=block_size)},
+    )
+
+    from torch.nn.attention import SDPBackend, sdpa_kernel
+    from torch.export import export, export_for_training
+
+    with sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
+        training_program = export_for_training(
+            model, example_inputs, dynamic_shapes=dynamic_shape
+        ).module()
+        traced_program = export(training_program, example_inputs, dynamic_shapes=dynamic_shape)
+
+    if export_config.delegate == "xnnpack":
+        from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
+            XnnpackPartitioner,
+        )
+        from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
+        from executorch.exir import to_edge_transform_and_lower
+
+        edge_config = get_xnnpack_edge_compile_config()
+        edge_manager = to_edge_transform_and_lower(
+            traced_program,
+            partitioner=[XnnpackPartitioner()],
+            compile_config=edge_config,
+        )
+    else:
+        from executorch.exir import EdgeCompileConfig, to_edge
+
+        edge_config = EdgeCompileConfig(_check_ir_validity=False)
+        edge_manager = to_edge(traced_program, compile_config=edge_config)
+
+    edge_manager_copy = copy.deepcopy(edge_manager) if export_config.generate_etrecord else None
+    et_program = edge_manager.to_executorch()
+
+    output_path.write_bytes(et_program.buffer)
+
+    if export_config.generate_etrecord and edge_manager_copy is not None:
+        from executorch.devtools import generate_etrecord
+
+        etrecord_path = output_path.with_suffix(output_path.suffix + ".etrecord")
+        generate_etrecord(str(etrecord_path), edge_manager_copy, et_program)
+
+    if export_config.metadata:
+        metadata_path = output_path.with_suffix(output_path.suffix + ".json")
+        metadata = {
+            "checkpoint": str(ckpt_path.resolve()),
+            "pte": str(output_path.resolve()),
+            "delegate": export_config.delegate,
+            "generate_etrecord": export_config.generate_etrecord,
+            "vocab_size": vocab_size,
+            "block_size": block_size,
+            "max_output_tokens": export_config.max_output_tokens,
+        }
+        metadata_path.write_text(json.dumps(metadata, indent=2))
+
+    if export_config.smoke_test_tokens:
+        _smoke_test_export(
+            output_path,
+            block_size=block_size,
+            vocab_size=vocab_size,
+            num_tokens=export_config.smoke_test_tokens,
+        )
+
+    if export_config.smoke_test_prompt:
+        _smoke_test_prompt(
+            output_path,
+            prompt=export_config.smoke_test_prompt,
+            max_input_length=block_size,
+            tokenizer_path=export_config.tokenizer_path,
+            max_output_tokens=export_config.max_output_tokens,
+        )
+
+    return output_path
+
+
+def _smoke_test_export(
+    pte_path: Path, *, block_size: int, vocab_size: int, num_tokens: int
+) -> None:
+    import numpy as np
+
+    from executorch.extension.module import Module
+    from executorch.extension.tensor import from_numpy
+
+    module = Module(str(pte_path))
+    max_tokens = min(block_size, max(1, num_tokens))
+    tokens = np.random.randint(
+        0,
+        max(vocab_size, 1),
+        size=(1, max_tokens),
+        dtype=np.int64,
+    )
+    inputs = from_numpy(tokens)
+    module.forward(inputs)
+
+
+def _smoke_test_prompt(
+    pte_path: Path,
+    prompt: str,
+    max_input_length: int,
+    tokenizer_path: Optional[Path],
+    max_output_tokens: int,
+) -> None:
+    if tokenizer_path is None:
+        raise ValueError("A tokenizer vocabulary is required to run prompt-based smoke tests.")
+
+    import json as json_lib
+
+    import numpy as np
+
+    from executorch.extension.module import Module
+    from executorch.extension.tensor import from_numpy
+
+    vocab_map = json_lib.loads(Path(tokenizer_path).read_text())
+    token_ids: list[int] = []
+    if isinstance(vocab_map, dict):
+        if prompt in vocab_map:
+            token_ids.append(int(vocab_map[prompt]))
+        else:
+            for piece in prompt.split():
+                token_ids.append(int(vocab_map.get(piece, 0)))
+    if not token_ids:
+        token_ids = [0]
+
+    token_array = np.array([token_ids[:max_input_length]], dtype=np.int64)
+    _ = max_output_tokens  # placeholder to document intent for decode length handling
+
+    module = Module(str(pte_path))
+    inputs = from_numpy(token_array)
+    outputs = module.forward(inputs)
+
+    if not outputs:
+        raise RuntimeError("ExecuTorch module returned no outputs during smoke test.")
diff --git a/optimization_and_search/run_experiments.py b/optimization_and_search/run_experiments.py
index 6c0c8632fc..f1181118ab 100644
--- a/optimization_and_search/run_experiments.py
+++ b/optimization_and_search/run_experiments.py
@@ -5,6 +5,7 @@
 from itertools import product
 import argparse
 import os
+import re
 from copy import deepcopy
 
 import yaml
@@ -37,10 +38,10 @@
 ]
 
 
+
+
 def parse_args() -> argparse.Namespace:
-    """
-    Parse command-line arguments.
-    """
+    """Parse command-line arguments."""
     parser = argparse.ArgumentParser(
         description="Run experiments based on a configuration file (JSON or YAML)."
     )
@@ -80,66 +81,51 @@ def parse_args() -> argparse.Namespace:
             "to keep run identifiers shorter."
         ),
     )
+    parser.add_argument(
+        '--executorch-export',
+        dest='executorch_export',
+        action='store_true',
+        default=True,
+        help='Automatically export ExecuTorch programs after each run.',
+    )
+    parser.add_argument(
+        '--no-executorch-export',
+        dest='executorch_export',
+        action='store_false',
+        help='Disable automatic ExecuTorch exports.',
+    )
+    parser.add_argument(
+        '--executorch-delegate',
+        choices=['none', 'xnnpack'],
+        default='none',
+        help='Delegate to target when exporting to ExecuTorch.',
+    )
+    parser.add_argument(
+        '--executorch-smoke-test-tokens',
+        type=int,
+        default=0,
+        help='If >0, run a random-token smoke test after export.',
+    )
+    parser.add_argument(
+        '--executorch-smoke-test-prompt',
+        help='Optional prompt to evaluate with the exported program.',
+    )
+    parser.add_argument(
+        '--executorch-tokenizer-vocab',
+        help='Path to a vocab.json for ExecuTorch prompt smoke tests.',
+    )
+    parser.add_argument(
+        '--executorch-max-output-tokens',
+        type=int,
+        default=32,
+        help='Maximum decode tokens when running ExecuTorch smoke tests.',
+    )
     return parser.parse_args()
 
 
-def load_configurations(path: str, fmt: str) -> list[dict]:
-    """
-    Load experiment configurations from a JSON or YAML file.
-
-    Args:
-        path: File path.
-        fmt: 'json' or 'yaml'.
-
-    Returns:
-        A list of configuration dictionaries.
-    """
-    text = Path(path).read_text()
-    if fmt == 'yaml':
-        # YAML may contain multiple documents or a single list
-        loaded = list(yaml.safe_load_all(text))
-        # Flatten if outer list-of-lists
-        if len(loaded) == 1 and isinstance(loaded[0], list):
-            return loaded[0]
-        return loaded
-    else:
-        return json.loads(text)
-
-
-RUN_NAME_VAR = "${RUN_NAME}"
-
-
-def expand_range(val):
-    """Expand dicts with 'range' into a list of values."""
-    if isinstance(val, dict) and 'range' in val:
-        r = val['range']
-        start, end = r['start'], r['end']
-        step = r.get('step', 1 if isinstance(start, int) else 0.1)
-        if isinstance(start, int):
-            return list(range(start, end + 1, step))
-        count = int(round((end - start) / step)) + 1
-        return [start + i * step for i in range(count)]
-    return val
-
-
-def _substitute_run_name(obj, run_name: str):
-    """Recursively substitute the run name placeholder inside ``obj``."""
-    if isinstance(obj, str):
-        return obj.replace(RUN_NAME_VAR, run_name)
-    if isinstance(obj, list):
-        return [_substitute_run_name(o, run_name) for o in obj]
-    if isinstance(obj, dict):
-        return {k: _substitute_run_name(v, run_name) for k, v in obj.items()}
-    return obj
-
-
-def _ensure_list(value):
-    if value is None:
-        return []
-    if isinstance(value, list):
-        return value
-    return [value]
-
+def _sanitize_run_name(name: str) -> str:
+    sanitized = re.sub(r'[^A-Za-z0-9._-]+', '_', name).strip('_')
+    return sanitized or 'model'
 
 def _merge_parameter_groups(existing, new):
     existing_list = []
@@ -582,6 +568,49 @@ def append_progress(log_file: Path, message: str) -> None:
         f.write(f"[{timestamp}] {message}\n")
 
 
+def maybe_export_executorch(run_name: str, out_dir: str, args: argparse.Namespace) -> None:
+    if not getattr(args, 'executorch_export', False):
+        return
+
+    ckpt_path = Path(out_dir) / 'ckpt.pt'
+    if not ckpt_path.exists():
+        print(f"[yellow]ExecuTorch export skipped (missing {ckpt_path}).")
+        return
+
+    try:
+        from model_exports.executorch.exporter import ExportConfig, export_checkpoint_to_pte
+    except ImportError as exc:
+        print(f"[yellow]ExecuTorch export unavailable: {exc}")
+        return
+
+    export_dir = ckpt_path.parent / 'executorch'
+    export_dir.mkdir(parents=True, exist_ok=True)
+    export_name = _sanitize_run_name(run_name)
+    pte_path = export_dir / f"{export_name}.pte"
+
+    tokenizer_path = getattr(args, 'executorch_tokenizer_vocab', None)
+    if tokenizer_path:
+        tokenizer_path = Path(tokenizer_path)
+
+    config = ExportConfig(
+        delegate=getattr(args, 'executorch_delegate', 'none'),
+        generate_etrecord=False,
+        smoke_test_tokens=max(0, getattr(args, 'executorch_smoke_test_tokens', 0)),
+        smoke_test_prompt=getattr(args, 'executorch_smoke_test_prompt', None),
+        tokenizer_path=tokenizer_path,
+        max_output_tokens=getattr(args, 'executorch_max_output_tokens', 32),
+        metadata=True,
+    )
+
+    try:
+        export_checkpoint_to_pte(ckpt_path, pte_path, config)
+        print(f"[green]ExecuTorch export complete:[/] {pte_path}")
+    except ImportError as exc:
+        print(f"[yellow]ExecuTorch export failed (missing dependency): {exc}")
+    except Exception as exc:
+        print(f"[red]ExecuTorch export failed for {run_name}: {exc}")
+
+
 def build_command(combo: dict) -> list[str]:
     """
     Construct the command-line invocation for train.py.
@@ -654,6 +683,8 @@ def run_experiment(
     except subprocess.CalledProcessError:
         print(f"[red]Process exited with error for run:[/] {run_name}")
 
+    maybe_export_executorch(run_name, combo['out_dir'], args)
+
     # Read metrics (use existing or nan on failure)
     try:
         metrics = read_metrics(str(combo['out_dir']))