diff --git a/configs/CONTRIBUTING.md b/configs/CONTRIBUTING.md
new file mode 100644
index 000000000..5ddb2b46b
--- /dev/null
+++ b/configs/CONTRIBUTING.md
@@ -0,0 +1,178 @@
+# Contributing a New Profile
+
+Thank you for contributing a benchmark result. Follow these five steps exactly.
+
+---
+
+## Step 1 — Run the benchmark and save the log
+
+Run the inference command and capture stdout/stderr to a file:
+
+```bash
+<your binary> <flags> 2>&1 | tee .sisyphus/notes/<experiment>/<your-run>.log
+```
+
+The log file is the ground truth for the numbers in your profile. Without it
+the profile will be rejected.
+
+---
+
+## Step 2 — Create the profile TOML
+
+Copy the template from `configs/profiles/base.toml` or the most similar
+existing profile. Name it `<hw>-<model>-<method>-<ctx>.toml`, e.g.:
+`rtx4090-dense31b-mtp-128k.toml`.
+
+### Required keys
+
+```toml
+extends = "base"          # or another profile stem
+backend = "dflash"        # must match a file in configs/backends/
+
+[hardware]
+gpu = "RTX 4090"
+sm = 89
+
+[model]
+target = "${LUCEBOX_ROOT}/models/your-model.gguf"
+# mtp_assistant required when spec.method = "mtp"
+# dflash_draft  required when spec.method = "dflash"
+
+[runtime]
+ctx = 131072
+kv_k = "tq3_0"
+kv_v = "tq3_0"
+
+[runtime.spec]
+method = "mtp"      # "none" | "mtp" | "dflash"
+gamma = 2           # required for mtp
+# draft_max = 4     # required for dflash
+
+[expected_floors]
+decode_tok_s = 15.0
+# ttft_ms_max = 80.0
+# prefill_tok_s = 500.0
+
+[provenance]
+source_log = ".sisyphus/notes/<experiment>/<your-run>.log"
+measured_at = "2026-01-15"       # ISO date
+hardware_id = "yourname-rtx4090-linux"
+commit = "abc1234"               # optional git SHA
+```
+
+### Auto-rejection rules (the linter will reject these)
+
+- `provenance.source_log = "<NEEDS_RUN>"` — fill in the real log path
+- Hardcoded `/absolute/paths` anywhere — use `${VAR}/...` or relative paths
+- `spec.method = "mtp"` without `model.mtp_assistant`
+- `spec.method = "dflash"` without `model.dflash_draft`
+- Empty `[expected_floors]` — set at least one floor
+- Missing `[provenance]` section or any of its three required fields
+- `source_log` pointing to a file that does not exist (warning, not error,
+  but reviewers will ask you to provide it)
+
+---
+
+## Step 3 — Lint before submitting
+
+```bash
+python dflash/scripts/config_lint.py --profile <your-profile-stem>
+```
+
+Must exit 0 (warnings about missing binaries are OK).
+
+For strict checking (promotes warnings to errors):
+
+```bash
+python dflash/scripts/config_lint.py --profile <your-profile-stem> --strict
+```
+
+---
+
+## Step 4 — Add or validate the backend
+
+If your profile uses a backend that already exists, skip this step.
+
+To add a backend, create `configs/backends/<name>.toml`:
+
+```toml
+name = "my-backend"      # must match filename stem exactly
+upstream = "https://..."
+build_hint = "..."       # optional build instructions
+
+[binary]
+# exactly one of:
+in_tree = "path/relative/to/git/root"
+# env_var = "MY_BINARY_VAR"
+
+[supports]
+spec_types = ["none", "mtp"]   # which methods this binary supports
+kv_quants = ["q8_0", "tq3_0"]
+
+[flags]
+# map canonical key -> CLI flag string
+model = "--model"
+ctx = "--ctx-size"
+kv_k = "--kv-k"
+kv_v = "--kv-v"
+# if "mtp" in spec_types:
+spec_model = "--mtp"
+spec_gamma = "--gamma"
+# if "dflash" in spec_types:
+# draft_model = "--draft"
+# draft_max  = "--draft-max"
+
+[stdout_parse]
+tok_s   = "eval time.*?([0-9]+\.[0-9]+) tokens per second"
+ttft_ms = "time to first token.*?([0-9]+\.[0-9]+) ms"
+```
+
+Backend validation rules:
+- `name` must equal the filename stem
+- Exactly one of `binary.in_tree` or `binary.env_var` must be set
+- All required flags for declared `spec_types` must be present
+
+---
+
+## Step 5 — Open a pull request
+
+Include in the PR body:
+- A snippet from the log file showing the measured tok/s and TTFT
+- The exact hardware (GPU model, driver version, VRAM)
+- The date of measurement
+- Confirmation that `config_lint.py --strict` exits 0
+
+### Disclosure requirement
+
+If any part of the profile, code, or PR description was AI-generated, state
+this explicitly. PRs with AI-generated content that is not disclosed will be
+closed.
+
+---
+
+## Schema reference summary
+
+### Profile keys
+
+| Key | Type | Required | Notes |
+|-----|------|----------|-------|
+| extends | string | yes | parent profile stem or "" for none |
+| backend | string | yes | stem of a file in configs/backends/ |
+| hardware.gpu | string | yes | GPU model name |
+| hardware.sm | int | yes | CUDA SM version (e.g. 86 for Ampere) |
+| model.target | path | yes | main model GGUF |
+| model.mtp_assistant | path | when method=mtp | MTP assistant GGUF |
+| model.dflash_draft | path | when method=dflash | DFlash draft GGUF |
+| runtime.ctx | int | yes | context length in tokens |
+| runtime.kv_k | string | yes | KV cache key quantization |
+| runtime.kv_v | string | yes | KV cache value quantization |
+| runtime.spec.method | string | yes | "none", "mtp", or "dflash" |
+| runtime.spec.gamma | int | when method=mtp | speculative tokens per step |
+| runtime.spec.draft_max | int | when method=dflash | max draft tokens |
+| runtime.flash_attn | bool | no | enable flash attention |
+| runtime.pflash | bool | no | enable pflash (MoE models) |
+| expected_floors | table | yes | at least one floor metric |
+| provenance.source_log | path | yes | path to benchmark log |
+| provenance.measured_at | date | yes | ISO 8601 date |
+| provenance.hardware_id | string | yes | unique hardware identifier |
+| provenance.commit | string | no | git SHA of code under test |
diff --git a/configs/README.md b/configs/README.md
new file mode 100644
index 000000000..859145e65
--- /dev/null
+++ b/configs/README.md
@@ -0,0 +1,73 @@
+# configs — Declarative Inference Profiles
+
+This directory contains declarative TOML profiles and backend definitions for
+running Gemma-4 inference on lucebox-hub. Each profile captures a specific
+(model, context length, speculative decode method, hardware) combination along
+with measured performance floors, so every run is reproducible and comparable.
+
+## Why this exists
+
+Ad-hoc shell commands diverge over time. Profiles make the connection between
+a benchmark log and the exact flags used to produce it explicit and machine-checkable.
+
+## Directory layout
+
+```
+configs/
+  profiles/        — one .toml per (model, ctx, method, hw) combination
+  backends/        — one .toml per inference binary variant
+```
+
+## Quick start
+
+```bash
+# Lint everything (exits 0 if no errors, prints warnings)
+python dflash/scripts/config_lint.py
+
+# Dry-run a profile (validates env, paths, backend; does NOT run inference)
+python dflash/scripts/profile_run.py --profile rtx3090-moe26b-dflash-256k --dry-run
+
+# Print the resolved command (for inspection or shell scripting)
+python dflash/scripts/profile_run.py --profile rtx3090-moe26b-dflash-256k --print-cmd
+
+# Run (execvp — replaces the Python process)
+LUCEBOX_ROOT=/your/root python dflash/scripts/profile_run.py --profile rtx3090-dense31b-mtp-64k
+
+# Override a single field at runtime
+python dflash/scripts/profile_run.py --profile rtx3090-moe26b-dflash-256k \
+    --override runtime.ctx=131072
+
+# Verify a running server meets the floors declared in the profile
+python dflash/scripts/verify_server.py --profile rtx3090-moe26b-dflash-256k \
+    --base-url http://127.0.0.1:8080 --runs 5
+```
+
+## Required environment variables
+
+| Profile | Variable | Purpose |
+|---------|----------|---------|
+| rtx3090-dense31b-mtp-64k | `LUCEBOX_ROOT` | Root containing models/ |
+| rtx3090-moe26b-dflash-256k | `HOME` (auto-set) | Root for ~/models/ paths |
+| rtx3090-moe26b-mtp-1m | `HOME` (auto-set) | Root for ~/models/ paths |
+| llama-upstream backend | `LUCEBOX_LLAMA_BIN` | Path to llama-server or llama-cli |
+
+## Shipped profiles
+
+| Profile | Model | Method | CTX | Measured decode | Floor |
+|---------|-------|--------|-----|-----------------|-------|
+| rtx3090-dense31b-mtp-64k | Gemma-4 31B dense Q4_K_M | MTP γ=2 | 64K | 10.07 tok/s | 9.5 tok/s |
+| rtx3090-moe26b-dflash-256k | Gemma-4 26B-A4B MoE Q4_K_M | DFlash dm=4+pflash | 256K | 67.95 tok/s / 55ms TTFT | 65.0 tok/s / 65ms |
+| rtx3090-moe26b-mtp-1m | Gemma-4 26B-A4B MoE Q4_K_M | MTP γ=2+pflash | 1M | 23.65 tok/s / 108ms TTFT | 22.0 tok/s / 120ms |
+
+All measurements taken on RTX 3090 (24 GB VRAM) running WSL2 (peppi-rtx3090-wsl).
+
+## Backends
+
+| Backend | Binary | Spec methods |
+|---------|--------|-------------|
+| dflash | `dflash/build/test_gemma4_dflash` (in-tree) | none, mtp, dflash |
+| llama-upstream | `$LUCEBOX_LLAMA_BIN` (external) | none |
+
+## Schema reference
+
+See `configs/CONTRIBUTING.md` for the full schema and contribution guide.
diff --git a/configs/backends/dflash.toml b/configs/backends/dflash.toml
new file mode 100644
index 000000000..2c58232ae
--- /dev/null
+++ b/configs/backends/dflash.toml
@@ -0,0 +1,37 @@
+# dflash backend — in-tree speculative decode binary
+name = "dflash"
+upstream = "https://github.com/dusterbloom/lucebox-hub"
+build_hint = "mkdir -p dflash/build && cd dflash/build && cmake .. -DCMAKE_BUILD_TYPE=Release && make -j$(nproc) test_gemma4_dflash"
+
+[binary]
+in_tree = "dflash/build/test_gemma4_dflash"
+
+[supports]
+spec_types = ["none", "mtp", "dflash"]
+kv_quants = ["q8_0", "tq3_0", "f16"]
+
+[flags]
+# Core
+model = "--model"
+ctx = "--ctx-size"
+kv_k = "--kv-k"
+kv_v = "--kv-v"
+# MTP speculative decode
+spec_model = "--mtp"
+spec_gamma = "--gamma"
+# DFlash speculative decode
+draft_model = "--draft"
+draft_max = "--draft-max"
+# Optional
+pflash = "--pflash"
+flash_attn = "--flash-attn"
+temp = "--temp"
+seed = "--seed"
+n_predict = "--n-predict"
+ignore_eos = "--ignore-eos"
+batch = "--batch-size"
+ubatch = "--ubatch-size"
+
+[stdout_parse]
+tok_s = "eval time.*?([0-9]+\\.[0-9]+) tokens per second"
+ttft_ms = "time to first token.*?([0-9]+\\.[0-9]+) ms"
diff --git a/configs/backends/llama-upstream.toml b/configs/backends/llama-upstream.toml
new file mode 100644
index 000000000..dd23c90ca
--- /dev/null
+++ b/configs/backends/llama-upstream.toml
@@ -0,0 +1,29 @@
+# llama-upstream backend — external llama.cpp server binary
+# Set LUCEBOX_LLAMA_BIN to the path of your compiled llama-server or llama-cli.
+name = "llama-upstream"
+upstream = "https://github.com/ggerganov/llama.cpp"
+build_hint = "cmake -B build -DGGML_CUDA=ON && cmake --build build --config Release -t llama-cli"
+
+[binary]
+env_var = "LUCEBOX_LLAMA_BIN"
+
+[supports]
+spec_types = ["none"]
+kv_quants = ["q8_0", "f16", "f32"]
+
+[flags]
+model = "--model"
+ctx = "--ctx-size"
+kv_k = "--kv-cache-type-k"
+kv_v = "--kv-cache-type-v"
+flash_attn = "--flash-attn"
+temp = "--temp"
+seed = "--seed"
+n_predict = "--n-predict"
+ignore_eos = "--ignore-eos"
+batch = "--batch-size"
+ubatch = "--ubatch-size"
+
+[stdout_parse]
+tok_s = "eval time.*?([0-9]+\\.[0-9]+) tokens per second"
+ttft_ms = "load time.*?([0-9]+\\.[0-9]+) ms"
diff --git a/configs/profiles/base.toml b/configs/profiles/base.toml
new file mode 100644
index 000000000..e5cf77bbe
--- /dev/null
+++ b/configs/profiles/base.toml
@@ -0,0 +1,25 @@
+# Base profile template — all profiles extend this or a child of it.
+# This file is NOT directly runnable; it lacks provenance and measured data.
+extends = ""
+backend = "dflash"
+
+[hardware]
+gpu = ""
+sm = 0
+
+[model]
+target = ""
+
+[runtime]
+ctx = 4096
+kv_k = "q8_0"
+kv_v = "q8_0"
+flash_attn = true
+
+[runtime.spec]
+method = "none"
+
+[expected_floors]
+# at least one of: decode_tok_s, prefill_tok_s, ttft_ms_max
+
+# [provenance] intentionally absent — fill in when deriving a real profile
diff --git a/configs/profiles/rtx3090-dense31b-mtp-64k.toml b/configs/profiles/rtx3090-dense31b-mtp-64k.toml
new file mode 100644
index 000000000..c2a34c224
--- /dev/null
+++ b/configs/profiles/rtx3090-dense31b-mtp-64k.toml
@@ -0,0 +1,31 @@
+# RTX 3090 — Gemma-4 Dense 31B + MTP (gamma=2) @ 64K context
+# Measured: 10.07 tok/s decode, acceptance_length=0.73 (+61% over no-MTP)
+extends = "base"
+backend = "dflash"
+
+[hardware]
+gpu = "RTX 3090"
+sm = 86
+
+[model]
+target = "${LUCEBOX_ROOT}/models/gemma-4-31B-it-Q4_K_M.gguf"
+mtp_assistant = "${LUCEBOX_ROOT}/models/gemma4-mtp-31B/gemma-4-31B-it-assistant.Q4_K_M.gguf"
+
+[runtime]
+ctx = 65536
+kv_k = "tq3_0"
+kv_v = "tq3_0"
+flash_attn = true
+
+[runtime.spec]
+method = "mtp"
+gamma = 2
+
+[expected_floors]
+decode_tok_s = 9.5
+
+[provenance]
+source_log = ".sisyphus/notes/gemma4-baseline/mtp-gamma/phase4-b/mtp_g2_ctx65536.log"
+measured_at = "2026-05-11"
+hardware_id = "peppi-rtx3090-wsl"
+commit = "4bcb972"
diff --git a/configs/profiles/rtx3090-moe26b-dflash-256k.toml b/configs/profiles/rtx3090-moe26b-dflash-256k.toml
new file mode 100644
index 000000000..cb5e09598
--- /dev/null
+++ b/configs/profiles/rtx3090-moe26b-dflash-256k.toml
@@ -0,0 +1,32 @@
+# RTX 3090 — Gemma-4 MoE 26B-A4B + DFlash (dm=4) + pflash @ 256K context
+# Measured: 67.95 tok/s decode, TTFT ~55ms, VRAM 21.73 GB
+extends = "base"
+backend = "dflash"
+
+[hardware]
+gpu = "RTX 3090"
+sm = 86
+
+[model]
+target = "${HOME}/models/gemma4-26b-a4b-it/gemma-4-26B-A4B-it-UD-Q4_K_M.gguf"
+dflash_draft = "${HOME}/models/gemma4-26b-a4b-dflash/draft-q8_0.gguf"
+
+[runtime]
+ctx = 262144
+kv_k = "q8_0"
+kv_v = "q8_0"
+flash_attn = true
+pflash = true
+
+[runtime.spec]
+method = "dflash"
+draft_max = 4
+
+[expected_floors]
+decode_tok_s = 65.0
+ttft_ms_max = 65.0
+
+[provenance]
+source_log = ".sisyphus/notes/gemma4-baseline/tq3-frontier/C3_dflash_pflash_256K_q8_dm4.log"
+measured_at = "2026-05-10"
+hardware_id = "peppi-rtx3090-wsl"
diff --git a/configs/profiles/rtx3090-moe26b-mtp-1m.toml b/configs/profiles/rtx3090-moe26b-mtp-1m.toml
new file mode 100644
index 000000000..5e80fbda0
--- /dev/null
+++ b/configs/profiles/rtx3090-moe26b-mtp-1m.toml
@@ -0,0 +1,32 @@
+# RTX 3090 — Gemma-4 MoE 26B-A4B + MTP (gamma=2) @ 1M context
+# Measured: 23.65 tok/s decode, TTFT 107.9ms, VRAM 23.88 GB
+extends = "base"
+backend = "dflash"
+
+[hardware]
+gpu = "RTX 3090"
+sm = 86
+
+[model]
+target = "${HOME}/models/gemma4-26b-a4b-it/gemma-4-26B-A4B-it-UD-Q4_K_M.gguf"
+mtp_assistant = "${HOME}/models/gemma4-mtp-26b-a4b/gemma-4-26B-A4B-it-assistant.Q4_K_M.gguf"
+
+[runtime]
+ctx = 1048576
+kv_k = "tq3_0"
+kv_v = "tq3_0"
+flash_attn = true
+pflash = true
+
+[runtime.spec]
+method = "mtp"
+gamma = 2
+
+[expected_floors]
+decode_tok_s = 22.0
+ttft_ms_max = 120.0
+
+[provenance]
+source_log = ".sisyphus/notes/gemma4-baseline/mtp-gamma/moe-scientific/mtp_g2_ctx1048576.log"
+measured_at = "2026-05-11"
+hardware_id = "peppi-rtx3090-wsl"
diff --git a/dflash/__init__.py b/dflash/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/dflash/scripts/__init__.py b/dflash/scripts/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/dflash/scripts/config_lint.py b/dflash/scripts/config_lint.py
new file mode 100644
index 000000000..051615b0a
--- /dev/null
+++ b/dflash/scripts/config_lint.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""config_lint.py — validate all profiles and backends.
+
+Usage:
+    config_lint.py [--profile NAME] [--strict]
+
+Exit codes:
+    0 — all valid (warnings may be printed)
+    1 — one or more errors
+
+Note: Missing binaries and unset env vars produce warnings (not errors) in lint
+mode, since the binary may not be built and env vars may differ per workstation.
+Use profile_run.py --dry-run to fully validate a profile against the current env.
+"""
+import argparse
+import sys
+from pathlib import Path
+
+
+def _find_git_root(start: Path) -> Path:
+    p = start.resolve()
+    while p != p.parent:
+        if (p / ".git").exists():
+            return p
+        p = p.parent
+    raise RuntimeError(f"Could not find git root from {start}")
+
+
+_ENV_VAR_MSGS = ("Unset environment variable", "env_var", "is not set")
+_BINARY_MSGS = ("not found", "does not exist")
+
+
+def _is_env_or_binary_error(msg: str) -> bool:
+    """Return True if this error is about an unset env var or missing binary."""
+    return any(k in msg for k in _ENV_VAR_MSGS + _BINARY_MSGS)
+
+
+def lint_profile(profile_path: Path, git_root: str, profiles_dir: str, strict: bool):
+    """Lint a single profile. Returns (errors, warnings)."""
+    from dflash.scripts.configlib.loader import load_profile, ProfileError
+    from dflash.scripts.configlib.validate import validate_profile
+
+    name = profile_path.stem
+    try:
+        profile = load_profile(profile_path, git_root=git_root, profiles_dir=profiles_dir)
+    except ProfileError as exc:
+        msg = str(exc)
+        if _is_env_or_binary_error(msg):
+            # Env not configured on this workstation — warn, do not fail lint
+            return [], [f"[{name}] Env/path warning (set vars to run): {msg}"]
+        return [f"[{name}] Load error: {msg}"], []
+
+    errors, warnings = validate_profile(
+        profile,
+        profile_name=profile_path.name,
+        strict=strict,
+        git_root=git_root,
+    )
+    return errors, warnings
+
+
+def lint_backend(backend_path: Path, git_root: str):
+    """Lint a single backend. Returns (errors, warnings)."""
+    from dflash.scripts.configlib.backends import load_backend, BackendError
+
+    name = backend_path.stem
+    try:
+        load_backend(backend_path, git_root=git_root)
+        return [], []
+    except BackendError as exc:
+        msg = str(exc)
+        if _is_env_or_binary_error(msg):
+            return [], [f"[{name}] Binary/env warning (build or set vars): {msg}"]
+        return [f"[{name}] Backend error: {msg}"], []
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Lint lucebox-hub config profiles and backends")
+    parser.add_argument("--profile", help="Lint only this profile name (stem)")
+    parser.add_argument("--strict", action="store_true", help="Escalate warnings to errors")
+    args = parser.parse_args()
+
+    script_dir = Path(__file__).resolve().parent
+    try:
+        git_root = str(_find_git_root(script_dir))
+    except RuntimeError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    sys.path.insert(0, git_root)
+
+    profiles_dir = Path(git_root) / "configs" / "profiles"
+    backends_dir = Path(git_root) / "configs" / "backends"
+
+    total_errors = []
+    total_warnings = []
+
+    if args.profile:
+        profile_path = profiles_dir / f"{args.profile}.toml"
+        if not profile_path.exists():
+            print(f"ERROR: Profile {args.profile!r} not found at {profile_path}", file=sys.stderr)
+            sys.exit(1)
+        errs, warns = lint_profile(profile_path, git_root, str(profiles_dir), args.strict)
+        total_errors.extend(errs)
+        total_warnings.extend(warns)
+    else:
+        # Lint all profiles (skip base.toml — template, no provenance)
+        for profile_path in sorted(profiles_dir.glob("*.toml")):
+            if profile_path.stem == "base":
+                try:
+                    from dflash.scripts.configlib.loader import load_profile, ProfileError
+                    load_profile(profile_path, git_root=git_root, profiles_dir=str(profiles_dir))
+                    print(f"  base.toml: OK (template, provenance skipped)")
+                except ProfileError as exc:
+                    msg = str(exc)
+                    if _is_env_or_binary_error(msg):
+                        total_warnings.append(f"[base] {msg}")
+                    else:
+                        total_errors.append(f"[base] Parse error: {msg}")
+                continue
+            errs, warns = lint_profile(profile_path, git_root, str(profiles_dir), args.strict)
+            total_errors.extend(errs)
+            total_warnings.extend(warns)
+
+        for backend_path in sorted(backends_dir.glob("*.toml")):
+            errs, warns = lint_backend(backend_path, git_root)
+            total_errors.extend(errs)
+            total_warnings.extend(warns)
+
+    if args.strict and total_warnings:
+        total_errors.extend([f"(strict) {w}" for w in total_warnings])
+        total_warnings = []
+
+    for w in total_warnings:
+        print(f"WARNING: {w}")
+    for e in total_errors:
+        print(f"ERROR: {e}", file=sys.stderr)
+
+    if total_errors:
+        print(f"\n{len(total_errors)} error(s), {len(total_warnings)} warning(s). FAIL.", file=sys.stderr)
+        sys.exit(1)
+    else:
+        print(f"\n0 errors, {len(total_warnings)} warning(s). OK.")
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/dflash/scripts/configlib/__init__.py b/dflash/scripts/configlib/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/dflash/scripts/configlib/backends.py b/dflash/scripts/configlib/backends.py
new file mode 100644
index 000000000..f7d9d414a
--- /dev/null
+++ b/dflash/scripts/configlib/backends.py
@@ -0,0 +1,168 @@
+"""Backend TOML loader and argv builder."""
+import os
+from pathlib import Path
+
+try:
+    import tomllib
+except ImportError:
+    try:
+        import tomli as tomllib
+    except ImportError:
+        import tomllib
+
+
+class BackendError(Exception):
+    """Raised for any backend loading or validation failure."""
+
+
+# Required flag keys for each spec type beyond "none"
+_SPEC_FLAG_REQUIREMENTS = {
+    "mtp": {"spec_model", "spec_gamma"},
+    "dflash": {"draft_model", "draft_max"},
+}
+
+# Runtime keys that are boolean flags (added as bare flags when True)
+_BOOL_RUNTIME_KEYS = {"pflash", "ignore_eos", "flash_attn"}
+
+
+def load_backend(
+    backend_path,
+    git_root: str,
+) -> dict:
+    """Load and validate a backend TOML file.
+
+    Args:
+        backend_path: Path-like to the backend .toml file.
+        git_root: Repository root for resolving in_tree paths.
+
+    Returns:
+        Backend dict with additional key ``resolved_binary``.
+
+    Raises:
+        BackendError: On any validation or resolution failure.
+    """
+    backend_path = Path(backend_path)
+    stem = backend_path.stem
+
+    if not backend_path.exists():
+        raise BackendError(f"Backend file not found: {backend_path}")
+
+    try:
+        data = tomllib.loads(backend_path.read_bytes().decode())
+    except Exception as exc:
+        raise BackendError(f"TOML parse error in {backend_path.name!r}: {exc}") from exc
+
+    # name == filename stem
+    name = data.get("name", "")
+    if name != stem:
+        raise BackendError(
+            f"Backend name {name!r} does not match filename stem {stem!r} in {backend_path.name!r}"
+        )
+
+    # binary: exactly one of in_tree or env_var
+    binary = data.get("binary", {})
+    in_tree = binary.get("in_tree")
+    env_var = binary.get("env_var")
+
+    if in_tree and env_var:
+        raise BackendError(
+            f"[{stem}] binary.in_tree and binary.env_var are mutually exclusive"
+        )
+    if not in_tree and not env_var:
+        raise BackendError(
+            f"[{stem}] [binary] must have exactly one of in_tree or env_var"
+        )
+
+    # Resolve binary path
+    if in_tree:
+        resolved = Path(git_root) / in_tree if not Path(in_tree).is_absolute() else Path(in_tree)
+        if not resolved.exists():
+            raise BackendError(
+                f"[{stem}] in_tree binary not found: {in_tree!r} (resolved to {resolved})"
+            )
+        resolved_binary = str(resolved)
+    else:
+        # env_var
+        val = os.environ.get(env_var)
+        if val is None:
+            raise BackendError(
+                f"[{stem}] env_var {env_var!r} is not set — cannot resolve binary path"
+            )
+        if not Path(val).exists():
+            raise BackendError(
+                f"[{stem}] binary from ${env_var}={val!r} does not exist"
+            )
+        resolved_binary = val
+
+    # Validate required flags for declared spec_types
+    spec_types = data.get("supports", {}).get("spec_types", [])
+    flags = data.get("flags", {})
+    for spec_type in spec_types:
+        required = _SPEC_FLAG_REQUIREMENTS.get(spec_type, set())
+        missing = required - set(flags.keys())
+        if missing:
+            raise BackendError(
+                f"[{stem}] Missing required flags for spec_type={spec_type!r}: {sorted(missing)}"
+            )
+
+    result = dict(data)
+    result["resolved_binary"] = resolved_binary
+    return result
+
+
+def build_argv(backend: dict, profile: dict) -> list[str]:
+    """Build the command-line argv from a loaded backend and merged profile.
+
+    Args:
+        backend: dict returned by load_backend (must have resolved_binary).
+        profile: merged profile dict.
+
+    Returns:
+        List of strings [binary, flag, value, ...] suitable for os.execvp.
+    """
+    flags = backend.get("flags", {})
+    runtime = profile.get("runtime", {})
+    model = profile.get("model", {})
+    spec = runtime.get("spec", {})
+    method = spec.get("method", "none")
+
+    argv = [backend["resolved_binary"]]
+
+    # model (always)
+    if "model" in flags:
+        argv += [flags["model"], str(model["target"])]
+
+    # ctx
+    if "ctx" in flags:
+        argv += [flags["ctx"], str(runtime["ctx"])]
+
+    # kv_k, kv_v
+    if "kv_k" in flags:
+        argv += [flags["kv_k"], str(runtime["kv_k"])]
+    if "kv_v" in flags:
+        argv += [flags["kv_v"], str(runtime["kv_v"])]
+
+    # Optional scalar runtime flags
+    for key in ("temp", "seed", "n_predict", "batch", "ubatch"):
+        if key in flags and key in runtime:
+            argv += [flags[key], str(runtime[key])]
+
+    # Boolean flags — add bare flag only when True
+    for key in _BOOL_RUNTIME_KEYS:
+        if key in flags and runtime.get(key) is True:
+            argv.append(flags[key])
+
+    # Speculative decode
+    if method == "mtp":
+        if "spec_model" in flags:
+            argv += [flags["spec_model"], str(model.get("mtp_assistant", ""))]
+        if "spec_gamma" in flags:
+            argv += [flags["spec_gamma"], str(spec.get("gamma", 1))]
+
+    elif method == "dflash":
+        if "draft_model" in flags:
+            argv += [flags["draft_model"], str(model.get("dflash_draft", ""))]
+        if "draft_max" in flags:
+            argv += [flags["draft_max"], str(spec.get("draft_max", 4))]
+
+    return argv
diff --git a/dflash/scripts/configlib/loader.py b/dflash/scripts/configlib/loader.py
new file mode 100644
index 000000000..e053eb031
--- /dev/null
+++ b/dflash/scripts/configlib/loader.py
@@ -0,0 +1,169 @@
+"""Profile TOML loader with inheritance, env expansion, and path validation."""
+import os
+import re
+import copy
+from pathlib import Path
+
+try:
+    import tomllib
+except ImportError:
+    try:
+        import tomli as tomllib
+    except ImportError:
+        import tomllib  # Python 3.11+
+
+
+class ProfileError(Exception):
+    """Raised for any profile loading or validation failure."""
+
+
+# Regex to match ${VAR} or ${VAR:-default}
+_ENV_RE = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)(?::-(.*?))?\}")
+
+
+def _expand_env(value: str, profile_name: str) -> tuple[str, bool]:
+    """Expand ${VAR} and ${VAR:-default} in value.
+
+    Returns (expanded, had_env_var) where had_env_var is True if any ${...}
+    was present in the original string (even after expansion).
+    """
+    had_env_var = bool(_ENV_RE.search(value))
+
+    def _replace(m):
+        var = m.group(1)
+        default = m.group(2)
+        val = os.environ.get(var)
+        if val is None:
+            if default is not None:
+                return default
+            raise ProfileError(
+                f"Unset environment variable ${{{var}}} referenced in profile {profile_name!r}"
+            )
+        return val
+
+    return _ENV_RE.sub(_replace, value), had_env_var
+
+
+def _resolve_path(raw: str, git_root: str, profile_name: str) -> str:
+    """Resolve a path string according to spec rules.
+
+    1. Expand ${VAR} / ${VAR:-default}.
+    2. Expand leading ~.
+    3. If resolved starts with / AND raw had no ${...} AND raw did not start with ~ -> raise.
+    4. Otherwise resolve relative to git_root.
+    """
+    had_tilde = raw.startswith("~")
+    expanded, had_env_var = _expand_env(raw, profile_name)
+    expanded = os.path.expanduser(expanded)
+
+    if expanded.startswith("/") and not had_env_var and not had_tilde:
+        raise ProfileError(
+            f"Hardcoded absolute path {raw!r} in profile {profile_name!r}. "
+            "Use ${{VAR}}/... or a relative path instead."
+        )
+
+    if os.path.isabs(expanded):
+        return expanded  # env-expanded absolute or tilde-expanded — allowed
+
+    # Relative — resolve against git root
+    return str(Path(git_root) / expanded)
+
+
+def _is_path_key(key: str) -> bool:
+    """Heuristic: keys whose values should be treated as paths."""
+    path_keys = {"target", "mtp_assistant", "dflash_draft", "source_log"}
+    return key in path_keys
+
+
+def _resolve_paths_in(obj, git_root: str, profile_name: str, resolve_paths: bool = True):
+    """Recursively walk obj and resolve path-like string values."""
+    if isinstance(obj, dict):
+        return {
+            k: (
+                _resolve_path(v, git_root, profile_name)
+                if resolve_paths and isinstance(v, str) and _is_path_key(k)
+                else _resolve_paths_in(v, git_root, profile_name, resolve_paths)
+            )
+            for k, v in obj.items()
+        }
+    if isinstance(obj, list):
+        return [_resolve_paths_in(i, git_root, profile_name, resolve_paths) for i in obj]
+    return obj
+
+
+def _deep_merge(base: dict, override: dict) -> dict:
+    """Deep merge override into base (override wins)."""
+    result = copy.deepcopy(base)
+    for k, v in override.items():
+        if isinstance(v, dict) and isinstance(result.get(k), dict):
+            result[k] = _deep_merge(result[k], v)
+        else:
+            result[k] = copy.deepcopy(v)
+    return result
+
+
+def load_profile(
+    profile_path,
+    git_root: str,
+    profiles_dir: str = None,
+    _seen: set = None,
+) -> dict:
+    """Load and merge a profile TOML, resolving inheritance and paths.
+
+    Args:
+        profile_path: Path-like to the profile TOML file.
+        git_root: Absolute path to the repository root (used for relative paths).
+        profiles_dir: Directory containing profiles for extends resolution.
+                      Defaults to the directory of profile_path.
+        _seen: Internal set for circular dependency detection.
+
+    Returns:
+        Merged profile dict with all paths resolved.
+
+    Raises:
+        ProfileError: On any loading, parsing, or path validation error.
+    """
+    profile_path = Path(profile_path)
+    profile_name = profile_path.name
+
+    if not profile_path.exists():
+        raise ProfileError(f"Profile not found: {profile_path}")
+
+    if _seen is None:
+        _seen = set()
+
+    canonical = str(profile_path.resolve())
+    if canonical in _seen:
+        raise ProfileError(
+            f"Circular extends chain detected involving {profile_name!r}"
+        )
+    _seen = _seen | {canonical}
+
+    # Parse TOML
+    try:
+        raw_bytes = profile_path.read_bytes()
+        data = tomllib.loads(raw_bytes.decode())
+    except Exception as exc:
+        raise ProfileError(f"TOML parse error in {profile_name!r}: {exc}") from exc
+
+    # Handle inheritance
+    extends = data.get("extends")
+    if extends and extends != "null":
+        if profiles_dir is None:
+            profiles_dir = str(profile_path.parent)
+        parent_path = Path(profiles_dir) / f"{extends}.toml"
+        parent = load_profile(
+            parent_path,
+            git_root=git_root,
+            profiles_dir=profiles_dir,
+            _seen=_seen,
+        )
+        # Merge: parent is base, child overrides
+        merged = _deep_merge(parent, data)
+        merged["extends"] = extends
+    else:
+        merged = data
+
+    # Resolve paths in the merged result
+    resolved = _resolve_paths_in(merged, git_root, profile_name)
+    return resolved
diff --git a/dflash/scripts/configlib/validate.py b/dflash/scripts/configlib/validate.py
new file mode 100644
index 000000000..8e12cd8f5
--- /dev/null
+++ b/dflash/scripts/configlib/validate.py
@@ -0,0 +1,71 @@
+"""Profile validation: structural rules and provenance checks."""
+import os
+from pathlib import Path
+
+
+class ProfileError(Exception):
+    """Raised when a profile fails validation."""
+
+
+def validate_profile(
+    profile: dict,
+    profile_name: str = "unknown",
+    strict: bool = False,
+    git_root: str = None,
+) -> tuple[list[str], list[str]]:
+    """Validate a merged profile dict.
+
+    Returns:
+        (errors, warnings) — lists of human-readable strings.
+        Caller should treat any non-empty errors list as fatal.
+    """
+    errors: list[str] = []
+    warnings: list[str] = []
+
+    def err(msg):
+        errors.append(f"[{profile_name}] {msg}")
+
+    def warn(msg):
+        warnings.append(f"[{profile_name}] {msg}")
+
+    # --- provenance ---
+    prov = profile.get("provenance")
+    if not prov:
+        err("Missing [provenance] section (required: source_log, measured_at, hardware_id)")
+    else:
+        for field in ("source_log", "measured_at", "hardware_id"):
+            if not prov.get(field):
+                err(f"Missing provenance.{field}")
+
+        source_log = prov.get("source_log", "")
+        if source_log == "<NEEDS_RUN>":
+            msg = "provenance.source_log is <NEEDS_RUN> — run the benchmark and fill in the real log path"
+            if strict:
+                err(msg)
+            else:
+                warn(msg)
+        elif source_log and git_root:
+            log_path = Path(git_root) / source_log if not os.path.isabs(source_log) else Path(source_log)
+            if not log_path.exists():
+                warn(f"provenance.source_log points to nonexistent file: {source_log!r}")
+
+    # --- expected_floors ---
+    floors = profile.get("expected_floors", {})
+    if not floors:
+        err("Empty or missing [expected_floors] — at least one of decode_tok_s, prefill_tok_s, ttft_ms_max required")
+
+    # --- spec method cross-checks ---
+    runtime = profile.get("runtime", {})
+    spec = runtime.get("spec", {})
+    method = spec.get("method", "none")
+    model = profile.get("model", {})
+
+    if method == "mtp":
+        if not model.get("mtp_assistant"):
+            err("spec.method=mtp requires model.mtp_assistant to be set")
+
+    if method == "dflash":
+        if not model.get("dflash_draft"):
+            err("spec.method=dflash requires model.dflash_draft to be set")
+
+    return errors, warnings
diff --git a/dflash/scripts/profile_run.py b/dflash/scripts/profile_run.py
new file mode 100644
index 000000000..71d8f9c42
--- /dev/null
+++ b/dflash/scripts/profile_run.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""profile_run.py — run a lucebox-hub profile.
+
+Usage:
+    profile_run.py --profile NAME [--override key.path=value ...] [--dry-run] [--print-cmd]
+"""
+import argparse
+import os
+import sys
+from pathlib import Path
+
+
+def _find_git_root(start: Path) -> Path:
+    """Walk up to find the git root (directory containing .git)."""
+    p = start.resolve()
+    while p != p.parent:
+        if (p / ".git").exists():
+            return p
+        p = p.parent
+    raise RuntimeError(f"Could not find git root from {start}")
+
+
+def _dot_path_set(obj: dict, dot_path: str, value):
+    """Set a nested dict value given a dot-separated path."""
+    keys = dot_path.split(".")
+    for key in keys[:-1]:
+        obj = obj.setdefault(key, {})
+    obj[keys[-1]] = value
+
+
+def _coerce(value: str):
+    """Auto-coerce a string value to bool, int, float, or leave as str."""
+    if value.lower() == "true":
+        return True
+    if value.lower() == "false":
+        return False
+    try:
+        return int(value)
+    except ValueError:
+        pass
+    try:
+        return float(value)
+    except ValueError:
+        pass
+    return value
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run a lucebox-hub inference profile"
+    )
+    parser.add_argument("--profile", required=True, help="Profile name (stem of TOML file in configs/profiles/)")
+    parser.add_argument("--override", action="append", default=[], metavar="KEY=VALUE",
+                        help="Dot-path override (e.g. runtime.ctx=131072)")
+    parser.add_argument("--dry-run", action="store_true", help="Validate only, exit 0")
+    parser.add_argument("--print-cmd", action="store_true", help="Print resolved argv, exit 0")
+    args = parser.parse_args()
+
+    # Resolve paths
+    script_dir = Path(__file__).resolve().parent
+    try:
+        git_root = _find_git_root(script_dir)
+    except RuntimeError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    sys.path.insert(0, str(git_root))
+
+    from dflash.scripts.configlib.loader import load_profile, ProfileError
+    from dflash.scripts.configlib.validate import validate_profile
+    from dflash.scripts.configlib.backends import load_backend, build_argv, BackendError
+
+    profiles_dir = git_root / "configs" / "profiles"
+    backends_dir = git_root / "configs" / "backends"
+    profile_path = profiles_dir / f"{args.profile}.toml"
+
+    # Load profile
+    try:
+        profile = load_profile(profile_path, git_root=str(git_root), profiles_dir=str(profiles_dir))
+    except ProfileError as exc:
+        print(f"ERROR loading profile {args.profile!r}: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    # Apply overrides
+    for ov in args.override:
+        if "=" not in ov:
+            print(f"ERROR: --override {ov!r} must be in KEY=VALUE format", file=sys.stderr)
+            sys.exit(1)
+        key, _, val = ov.partition("=")
+        _dot_path_set(profile, key, _coerce(val))
+
+    # Validate
+    errors, warnings = validate_profile(profile, profile_name=args.profile, git_root=str(git_root))
+    for w in warnings:
+        print(f"WARNING: {w}", file=sys.stderr)
+    if errors:
+        for e in errors:
+            print(f"ERROR: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    if args.dry_run:
+        print(f"Profile {args.profile!r} is valid.")
+        sys.exit(0)
+
+    # Load backend
+    backend_name = profile.get("backend", "")
+    backend_path = backends_dir / f"{backend_name}.toml"
+    try:
+        backend = load_backend(backend_path, git_root=str(git_root))
+    except BackendError as exc:
+        print(f"ERROR loading backend {backend_name!r}: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    # Build argv
+    try:
+        argv = build_argv(backend, profile)
+    except Exception as exc:
+        print(f"ERROR building command: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    if args.print_cmd:
+        for tok in argv:
+            print(tok)
+        sys.exit(0)
+
+    # Execute
+    os.execvp(argv[0], argv)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/dflash/scripts/verify_server.py b/dflash/scripts/verify_server.py
new file mode 100644
index 000000000..bc7278bce
--- /dev/null
+++ b/dflash/scripts/verify_server.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+"""verify_server.py — verify a running server meets profile floor metrics.
+
+Usage:
+    verify_server.py --profile NAME [--base-url URL] [--runs N] [--json-out FILE]
+
+Exit codes:
+    0 — all floors met
+    1 — config/connection error
+    2 — floor(s) failed
+"""
+import argparse
+import json
+import sys
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+
+def _find_git_root(start: Path) -> Path:
+    p = start.resolve()
+    while p != p.parent:
+        if (p / ".git").exists():
+            return p
+        p = p.parent
+    raise RuntimeError(f"Could not find git root from {start}")
+
+
+def _http_json(url: str, payload: dict = None, timeout: float = 30.0):
+    """Make a JSON HTTP request. Returns (response_dict, elapsed_s, first_byte_s)."""
+    body = json.dumps(payload).encode() if payload else None
+    headers = {"Content-Type": "application/json"} if body else {}
+    req = urllib.request.Request(url, data=body, headers=headers)
+
+    t0 = time.monotonic()
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            t_first = time.monotonic()
+            data = json.loads(resp.read().decode())
+            t_end = time.monotonic()
+        return data, t_end - t0, t_first - t0
+    except urllib.error.URLError as exc:
+        raise ConnectionError(f"Request to {url} failed: {exc}") from exc
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Verify server meets profile floor metrics")
+    parser.add_argument("--profile", required=True)
+    parser.add_argument("--base-url", default="http://127.0.0.1:8080")
+    parser.add_argument("--runs", type=int, default=3)
+    parser.add_argument("--json-out", metavar="FILE")
+    args = parser.parse_args()
+
+    script_dir = Path(__file__).resolve().parent
+    try:
+        git_root = _find_git_root(script_dir)
+    except RuntimeError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    sys.path.insert(0, str(git_root))
+
+    from dflash.scripts.configlib.loader import load_profile, ProfileError
+    from dflash.scripts.configlib.validate import validate_profile
+
+    profiles_dir = git_root / "configs" / "profiles"
+    profile_path = profiles_dir / f"{args.profile}.toml"
+
+    try:
+        profile = load_profile(profile_path, git_root=str(git_root), profiles_dir=str(profiles_dir))
+    except ProfileError as exc:
+        print(f"ERROR loading profile {args.profile!r}: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    errors, warnings = validate_profile(profile, profile_name=args.profile, git_root=str(git_root))
+    for w in warnings:
+        print(f"WARNING: {w}", file=sys.stderr)
+    if errors:
+        for e in errors:
+            print(f"ERROR: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    floors = profile.get("expected_floors", {})
+    base_url = args.base_url.rstrip("/")
+
+    # Health check
+    try:
+        health, _, _ = _http_json(f"{base_url}/health", timeout=5.0)
+    except ConnectionError as exc:
+        print(f"ERROR: Health check failed: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    # Completion runs
+    prompt = "Hello, world! Please respond briefly."
+    decode_rates = []
+    ttfts = []
+
+    for i in range(args.runs):
+        payload = {
+            "model": "default",
+            "messages": [{"role": "user", "content": prompt}],
+            "max_tokens": 64,
+            "stream": False,
+        }
+        try:
+            resp, elapsed, ttft = _http_json(
+                f"{base_url}/v1/chat/completions", payload=payload, timeout=60.0
+            )
+        except ConnectionError as exc:
+            print(f"ERROR: Run {i+1} failed: {exc}", file=sys.stderr)
+            sys.exit(1)
+
+        usage = resp.get("usage", {})
+        completion_tokens = usage.get("completion_tokens", 0)
+        if completion_tokens > 0 and elapsed > 0:
+            decode_rates.append(completion_tokens / elapsed)
+        ttfts.append(ttft * 1000)  # convert to ms
+
+    avg_decode = sum(decode_rates) / len(decode_rates) if decode_rates else 0.0
+    avg_ttft = sum(ttfts) / len(ttfts) if ttfts else 0.0
+
+    # Compare to floors
+    floor_results = {}
+    passed = True
+
+    if "decode_tok_s" in floors:
+        floor_val = floors["decode_tok_s"]
+        ok = avg_decode >= floor_val
+        floor_results["decode_tok_s"] = {"measured": avg_decode, "floor": floor_val, "passed": ok}
+        if not ok:
+            passed = False
+            print(f"FAIL: decode_tok_s={avg_decode:.2f} < floor={floor_val}", file=sys.stderr)
+        else:
+            print(f"PASS: decode_tok_s={avg_decode:.2f} >= floor={floor_val}")
+
+    if "ttft_ms_max" in floors:
+        floor_val = floors["ttft_ms_max"]
+        ok = avg_ttft <= floor_val
+        floor_results["ttft_ms_max"] = {"measured": avg_ttft, "floor": floor_val, "passed": ok}
+        if not ok:
+            passed = False
+            print(f"FAIL: ttft_ms={avg_ttft:.1f} > max={floor_val}", file=sys.stderr)
+        else:
+            print(f"PASS: ttft_ms={avg_ttft:.1f} <= max={floor_val}")
+
+    result = {
+        "profile": args.profile,
+        "runs": args.runs,
+        "avg_decode_tok_s": avg_decode,
+        "avg_ttft_ms": avg_ttft,
+        "floors": floor_results,
+        "passed": passed,
+    }
+
+    if args.json_out:
+        with open(args.json_out, "w") as f:
+            json.dump(result, f, indent=2)
+        print(f"Results written to {args.json_out}")
+
+    sys.exit(0 if passed else 2)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 000000000..27eec68ed
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+testpaths = tests
+python_files = test_*.py
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/configs/__init__.py b/tests/configs/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/configs/fixtures/base.toml b/tests/configs/fixtures/base.toml
new file mode 100644
index 000000000..e1da20f49
--- /dev/null
+++ b/tests/configs/fixtures/base.toml
@@ -0,0 +1,25 @@
+extends = ""
+backend = "dflash"
+
+[hardware]
+gpu = "RTX 3090"
+sm = 86
+
+[model]
+target = "models/base-model.gguf"
+
+[runtime]
+ctx = 4096
+kv_k = "q8_0"
+kv_v = "q8_0"
+
+[runtime.spec]
+method = "none"
+
+[expected_floors]
+decode_tok_s = 5.0
+
+[provenance]
+source_log = "tests/configs/fixtures/base.toml"
+measured_at = "2026-01-01"
+hardware_id = "test-device"
diff --git a/tests/configs/fixtures/child.toml b/tests/configs/fixtures/child.toml
new file mode 100644
index 000000000..82882a1ba
--- /dev/null
+++ b/tests/configs/fixtures/child.toml
@@ -0,0 +1,25 @@
+extends = "base"
+backend = "dflash"
+
+[hardware]
+gpu = "RTX 3090"
+sm = 86
+
+[model]
+target = "models/child-model.gguf"
+
+[runtime]
+ctx = 8192
+kv_k = "tq3_0"
+kv_v = "tq3_0"
+
+[runtime.spec]
+method = "none"
+
+[expected_floors]
+decode_tok_s = 8.0
+
+[provenance]
+source_log = "tests/configs/fixtures/child.toml"
+measured_at = "2026-01-02"
+hardware_id = "test-device"
diff --git a/tests/configs/test_backends.py b/tests/configs/test_backends.py
new file mode 100644
index 000000000..3a2530694
--- /dev/null
+++ b/tests/configs/test_backends.py
@@ -0,0 +1,269 @@
+"""Tests for configlib.backends — TDD harness."""
+import os
+import pytest
+from dflash.scripts.configlib.backends import load_backend, build_argv, BackendError
+
+
+def _write(tmp_path, name, content):
+    p = tmp_path / name
+    p.write_text(content)
+    return p
+
+
+def _minimal_backend_toml(bin_path, name="dflash", spec_types=None, extra_flags=""):
+    spec_types = spec_types or ["none"]
+    spec_list = "[" + ", ".join('"' + s + '"' for s in spec_types) + "]"
+    flags_for_mtp = ""
+    flags_for_dflash = ""
+    if "mtp" in spec_types:
+        flags_for_mtp = 'spec_model = "--mtp"\nspec_gamma = "--gamma"\n'
+    if "dflash" in spec_types:
+        flags_for_dflash = 'draft_model = "--draft"\ndraft_max = "--draft-max"\n'
+    return (
+        'name = "' + name + '"\n'
+        '[binary]\n'
+        'in_tree = "' + str(bin_path) + '"\n'
+        '[supports]\n'
+        'spec_types = ' + spec_list + '\n'
+        'kv_quants = ["q8_0"]\n'
+        '[flags]\n'
+        'ctx = "--ctx-size"\n'
+        'kv_k = "--kv-k"\n'
+        'kv_v = "--kv-v"\n'
+        'model = "--model"\n'
+        + flags_for_mtp + flags_for_dflash + extra_flags
+    )
+
+
+def test_in_tree_binary_exists_resolves(tmp_path):
+    bin_path = tmp_path / "mybin"
+    bin_path.touch()
+    toml = _minimal_backend_toml(bin_path)
+    p = _write(tmp_path, "dflash.toml", toml)
+    backend = load_backend(p, git_root=str(tmp_path))
+    assert backend["resolved_binary"] == str(bin_path)
+
+
+def test_in_tree_binary_missing_raises_error(tmp_path):
+    toml = _minimal_backend_toml("nonexistent/binary")
+    p = _write(tmp_path, "dflash.toml", toml)
+    with pytest.raises(BackendError, match="not found"):
+        load_backend(p, git_root=str(tmp_path))
+
+
+def test_env_var_backend_unset_raises(tmp_path, monkeypatch):
+    monkeypatch.delenv("LUCEBOX_LLAMA_BIN", raising=False)
+    toml = (
+        'name = "llama-upstream"\n'
+        '[binary]\n'
+        'env_var = "LUCEBOX_LLAMA_BIN"\n'
+        '[supports]\n'
+        'spec_types = ["none"]\n'
+        'kv_quants = ["q8_0"]\n'
+        '[flags]\n'
+        'ctx = "--ctx-size"\n'
+        'kv_k = "--kv-cache-type-k"\n'
+        'kv_v = "--kv-cache-type-v"\n'
+        'model = "--model"\n'
+    )
+    p = _write(tmp_path, "llama-upstream.toml", toml)
+    with pytest.raises(BackendError, match="LUCEBOX_LLAMA_BIN"):
+        load_backend(p, git_root=str(tmp_path))
+
+
+def test_env_var_backend_set_to_existing_resolves(tmp_path, monkeypatch):
+    bin_path = tmp_path / "llamabin"
+    bin_path.touch()
+    monkeypatch.setenv("LUCEBOX_LLAMA_BIN", str(bin_path))
+    toml = (
+        'name = "llama-upstream"\n'
+        '[binary]\n'
+        'env_var = "LUCEBOX_LLAMA_BIN"\n'
+        '[supports]\n'
+        'spec_types = ["none"]\n'
+        'kv_quants = ["q8_0"]\n'
+        '[flags]\n'
+        'ctx = "--ctx-size"\n'
+        'kv_k = "--kv-cache-type-k"\n'
+        'kv_v = "--kv-cache-type-v"\n'
+        'model = "--model"\n'
+    )
+    p = _write(tmp_path, "llama-upstream.toml", toml)
+    backend = load_backend(p, git_root=str(tmp_path))
+    assert backend["resolved_binary"] == str(bin_path)
+
+
+def test_env_var_backend_set_to_nonexistent_raises(tmp_path, monkeypatch):
+    monkeypatch.setenv("LUCEBOX_LLAMA_BIN", "/nonexistent/path/llama")
+    toml = (
+        'name = "llama-upstream"\n'
+        '[binary]\n'
+        'env_var = "LUCEBOX_LLAMA_BIN"\n'
+        '[supports]\n'
+        'spec_types = ["none"]\n'
+        'kv_quants = ["q8_0"]\n'
+        '[flags]\n'
+        'ctx = "--ctx-size"\n'
+        'kv_k = "--kv-cache-type-k"\n'
+        'kv_v = "--kv-cache-type-v"\n'
+        'model = "--model"\n'
+    )
+    p = _write(tmp_path, "llama-upstream.toml", toml)
+    with pytest.raises(BackendError, match="does not exist"):
+        load_backend(p, git_root=str(tmp_path))
+
+
+def test_name_mismatch_raises(tmp_path):
+    toml = (
+        'name = "wrong-name"\n'
+        '[binary]\n'
+        'in_tree = "somewhere"\n'
+        '[supports]\n'
+        'spec_types = ["none"]\n'
+        'kv_quants = ["q8_0"]\n'
+        '[flags]\n'
+        'ctx = "--ctx-size"\n'
+        'kv_k = "--kv-k"\n'
+        'kv_v = "--kv-v"\n'
+        'model = "--model"\n'
+    )
+    p = _write(tmp_path, "dflash.toml", toml)
+    with pytest.raises(BackendError, match="name"):
+        load_backend(p, git_root=str(tmp_path))
+
+
+def test_both_in_tree_and_env_var_raises(tmp_path):
+    toml = (
+        'name = "dflash"\n'
+        '[binary]\n'
+        'in_tree = "somewhere"\n'
+        'env_var = "SOME_VAR"\n'
+        '[supports]\n'
+        'spec_types = ["none"]\n'
+        'kv_quants = ["q8_0"]\n'
+        '[flags]\n'
+        'ctx = "--ctx-size"\n'
+        'kv_k = "--kv-k"\n'
+        'kv_v = "--kv-v"\n'
+        'model = "--model"\n'
+    )
+    p = _write(tmp_path, "dflash.toml", toml)
+    with pytest.raises(BackendError, match="[Mm]utually exclusive"):
+        load_backend(p, git_root=str(tmp_path))
+
+
+def test_neither_in_tree_nor_env_var_raises(tmp_path):
+    toml = (
+        'name = "dflash"\n'
+        '[binary]\n'
+        '[supports]\n'
+        'spec_types = ["none"]\n'
+        'kv_quants = ["q8_0"]\n'
+        '[flags]\n'
+        'ctx = "--ctx-size"\n'
+        'kv_k = "--kv-k"\n'
+        'kv_v = "--kv-v"\n'
+        'model = "--model"\n'
+    )
+    p = _write(tmp_path, "dflash.toml", toml)
+    with pytest.raises(BackendError):
+        load_backend(p, git_root=str(tmp_path))
+
+
+def test_missing_flags_for_spec_types_raises(tmp_path):
+    toml = (
+        'name = "dflash"\n'
+        '[binary]\n'
+        'in_tree = "somewhere"\n'
+        '[supports]\n'
+        'spec_types = ["none", "mtp"]\n'
+        'kv_quants = ["q8_0"]\n'
+        '[flags]\n'
+        'ctx = "--ctx-size"\n'
+        'kv_k = "--kv-k"\n'
+        'kv_v = "--kv-v"\n'
+        'model = "--model"\n'
+    )
+    p = _write(tmp_path, "dflash.toml", toml)
+    with pytest.raises(BackendError, match="[Ff]lag"):
+        load_backend(p, git_root=str(tmp_path))
+
+
+def test_build_argv_includes_ctx_kv_flags(tmp_path):
+    bin_path = tmp_path / "mybin"
+    bin_path.touch()
+    toml = _minimal_backend_toml(bin_path)
+    p = _write(tmp_path, "dflash.toml", toml)
+    backend = load_backend(p, git_root=str(tmp_path))
+    profile = {
+        "model": {"target": "/model.gguf"},
+        "runtime": {"ctx": 4096, "kv_k": "q8_0", "kv_v": "q8_0", "spec": {"method": "none"}},
+    }
+    argv = build_argv(backend, profile)
+    assert "--ctx-size" in argv
+    assert "4096" in argv
+    assert "--kv-k" in argv
+    assert "q8_0" in argv
+
+
+def test_build_argv_boolean_flag_only_when_true(tmp_path):
+    bin_path = tmp_path / "mybin"
+    bin_path.touch()
+    extra = 'pflash = "--pflash"\nignore_eos = "--ignore-eos"\n'
+    toml = _minimal_backend_toml(bin_path, extra_flags=extra)
+    p = _write(tmp_path, "dflash.toml", toml)
+    backend = load_backend(p, git_root=str(tmp_path))
+
+    profile_false = {
+        "model": {"target": "/model.gguf"},
+        "runtime": {"ctx": 4096, "kv_k": "q8_0", "kv_v": "q8_0",
+                    "pflash": False, "ignore_eos": False, "spec": {"method": "none"}},
+    }
+    argv = build_argv(backend, profile_false)
+    assert "--pflash" not in argv
+    assert "--ignore-eos" not in argv
+
+    profile_true = {
+        "model": {"target": "/model.gguf"},
+        "runtime": {"ctx": 4096, "kv_k": "q8_0", "kv_v": "q8_0",
+                    "pflash": True, "ignore_eos": True, "spec": {"method": "none"}},
+    }
+    argv = build_argv(backend, profile_true)
+    assert "--pflash" in argv
+    assert "--ignore-eos" in argv
+
+
+def test_build_argv_mtp_method_adds_mtp_and_gamma(tmp_path):
+    bin_path = tmp_path / "mybin"
+    bin_path.touch()
+    toml = _minimal_backend_toml(bin_path, spec_types=["none", "mtp"])
+    p = _write(tmp_path, "dflash.toml", toml)
+    backend = load_backend(p, git_root=str(tmp_path))
+    profile = {
+        "model": {"target": "/model.gguf", "mtp_assistant": "/assistant.gguf"},
+        "runtime": {"ctx": 4096, "kv_k": "q8_0", "kv_v": "q8_0",
+                    "spec": {"method": "mtp", "gamma": 2}},
+    }
+    argv = build_argv(backend, profile)
+    assert "--mtp" in argv
+    assert "/assistant.gguf" in argv
+    assert "--gamma" in argv
+    assert "2" in argv
+
+
+def test_build_argv_dflash_method_adds_draft_and_draft_max(tmp_path):
+    bin_path = tmp_path / "mybin"
+    bin_path.touch()
+    toml = _minimal_backend_toml(bin_path, spec_types=["none", "dflash"])
+    p = _write(tmp_path, "dflash.toml", toml)
+    backend = load_backend(p, git_root=str(tmp_path))
+    profile = {
+        "model": {"target": "/model.gguf", "dflash_draft": "/draft.gguf"},
+        "runtime": {"ctx": 4096, "kv_k": "q8_0", "kv_v": "q8_0",
+                    "spec": {"method": "dflash", "draft_max": 4}},
+    }
+    argv = build_argv(backend, profile)
+    assert "--draft" in argv
+    assert "/draft.gguf" in argv
+    assert "--draft-max" in argv
+    assert "4" in argv
diff --git a/tests/configs/test_loader.py b/tests/configs/test_loader.py
new file mode 100644
index 000000000..117bc5faa
--- /dev/null
+++ b/tests/configs/test_loader.py
@@ -0,0 +1,161 @@
+"""Tests for configlib.loader — TDD harness."""
+import os
+import pytest
+
+from dflash.scripts.configlib.loader import load_profile, ProfileError
+
+
+def _write(tmp_path, name, content):
+    p = tmp_path / name
+    p.write_text(content)
+    return p
+
+
+MINIMAL_TOML = (
+    'extends = ""\n'
+    'backend = "dflash"\n'
+    '\n'
+    '[hardware]\n'
+    'gpu = "RTX 3090"\n'
+    'sm = 86\n'
+    '\n'
+    '[model]\n'
+    'target = "models/base.gguf"\n'
+    '\n'
+    '[runtime]\n'
+    'ctx = 4096\n'
+    'kv_k = "q8_0"\n'
+    'kv_v = "q8_0"\n'
+    '\n'
+    '[runtime.spec]\n'
+    'method = "none"\n'
+    '\n'
+    '[expected_floors]\n'
+    'decode_tok_s = 5.0\n'
+    '\n'
+    '[provenance]\n'
+    'source_log = "tests/configs/fixtures/base.toml"\n'
+    'measured_at = "2026-01-01"\n'
+    'hardware_id = "test-device"\n'
+)
+
+
+def test_valid_base_parses(tmp_path):
+    p = _write(tmp_path, "base.toml", MINIMAL_TOML)
+    profile = load_profile(p, git_root=str(tmp_path))
+    assert profile["backend"] == "dflash"
+    assert profile["hardware"]["gpu"] == "RTX 3090"
+    assert profile["runtime"]["ctx"] == 4096
+
+
+def test_child_inherits_and_overrides(tmp_path):
+    _write(tmp_path, "base.toml", MINIMAL_TOML)
+    child_toml = (
+        MINIMAL_TOML
+        .replace('extends = ""', 'extends = "base"')
+        .replace('target = "models/base.gguf"', 'target = "models/child.gguf"')
+        .replace("ctx = 4096", "ctx = 8192")
+        .replace('kv_k = "q8_0"', 'kv_k = "tq3_0"')
+        .replace('kv_v = "q8_0"', 'kv_v = "tq3_0"')
+        .replace("decode_tok_s = 5.0", "decode_tok_s = 8.0")
+    )
+    p = _write(tmp_path, "child.toml", child_toml)
+    profile = load_profile(p, git_root=str(tmp_path), profiles_dir=str(tmp_path))
+    assert profile["runtime"]["ctx"] == 8192
+    assert profile["runtime"]["kv_k"] == "tq3_0"
+    assert profile["hardware"]["sm"] == 86
+
+
+def test_missing_profile_file_raises(tmp_path):
+    with pytest.raises(ProfileError, match="not found"):
+        load_profile(tmp_path / "nonexistent.toml", git_root=str(tmp_path))
+
+
+def test_toml_parse_error_raises(tmp_path):
+    p = _write(tmp_path, "bad.toml", "this = [broken toml {{{")
+    with pytest.raises(ProfileError, match="TOML"):
+        load_profile(p, git_root=str(tmp_path))
+
+
+def test_circular_extends_raises(tmp_path):
+    a_toml = MINIMAL_TOML.replace('extends = ""', 'extends = "b"')
+    b_toml = MINIMAL_TOML.replace('extends = ""', 'extends = "a"')
+    _write(tmp_path, "a.toml", a_toml)
+    _write(tmp_path, "b.toml", b_toml)
+    with pytest.raises(ProfileError, match="[Cc]ircular"):
+        load_profile(tmp_path / "a.toml", git_root=str(tmp_path), profiles_dir=str(tmp_path))
+
+
+def test_env_var_expands(tmp_path, monkeypatch):
+    monkeypatch.setenv("MY_MODELS", str(tmp_path))
+    toml = MINIMAL_TOML.replace(
+        'target = "models/base.gguf"',
+        'target = "${MY_MODELS}/models/base.gguf"'
+    )
+    p = _write(tmp_path, "profile.toml", toml)
+    profile = load_profile(p, git_root=str(tmp_path))
+    assert profile["model"]["target"] == f"{tmp_path}/models/base.gguf"
+
+
+def test_env_var_default_used_when_unset(tmp_path, monkeypatch):
+    monkeypatch.delenv("UNSET_VAR_XYZ", raising=False)
+    toml = MINIMAL_TOML.replace(
+        'target = "models/base.gguf"',
+        'target = "${UNSET_VAR_XYZ:-models}/base.gguf"'
+    )
+    p = _write(tmp_path, "profile.toml", toml)
+    profile = load_profile(p, git_root=str(tmp_path))
+    assert profile["model"]["target"].endswith("models/base.gguf")
+
+
+def test_unset_required_var_raises(tmp_path, monkeypatch):
+    monkeypatch.delenv("LUCEBOX_ROOT", raising=False)
+    toml = MINIMAL_TOML.replace(
+        'target = "models/base.gguf"',
+        'target = "${LUCEBOX_ROOT}/models/base.gguf"'
+    )
+    p = _write(tmp_path, "profile.toml", toml)
+    with pytest.raises(ProfileError) as exc_info:
+        load_profile(p, git_root=str(tmp_path))
+    err = str(exc_info.value)
+    assert "LUCEBOX_ROOT" in err
+    assert "profile.toml" in err
+
+
+def test_hardcoded_absolute_path_raises(tmp_path):
+    toml = MINIMAL_TOML.replace(
+        'target = "models/base.gguf"',
+        'target = "/absolute/path/model.gguf"'
+    )
+    p = _write(tmp_path, "profile.toml", toml)
+    with pytest.raises(ProfileError, match="[Hh]ardcoded absolute"):
+        load_profile(p, git_root=str(tmp_path))
+
+
+def test_env_expanded_absolute_allowed(tmp_path, monkeypatch):
+    monkeypatch.setenv("MY_ROOT", "/some/absolute/root")
+    toml = MINIMAL_TOML.replace(
+        'target = "models/base.gguf"',
+        'target = "${MY_ROOT}/models/base.gguf"'
+    )
+    p = _write(tmp_path, "profile.toml", toml)
+    profile = load_profile(p, git_root=str(tmp_path))
+    assert profile["model"]["target"] == "/some/absolute/root/models/base.gguf"
+
+
+def test_tilde_expands(tmp_path):
+    toml = MINIMAL_TOML.replace(
+        'target = "models/base.gguf"',
+        'target = "~/models/base.gguf"'
+    )
+    p = _write(tmp_path, "profile.toml", toml)
+    profile = load_profile(p, git_root=str(tmp_path))
+    home = os.path.expanduser("~")
+    assert profile["model"]["target"] == f"{home}/models/base.gguf"
+
+
+def test_relative_path_resolves_against_git_root(tmp_path):
+    toml = MINIMAL_TOML
+    p = _write(tmp_path, "profile.toml", toml)
+    profile = load_profile(p, git_root=str(tmp_path))
+    assert profile["model"]["target"] == str(tmp_path / "models" / "base.gguf")
diff --git a/tests/configs/test_validate.py b/tests/configs/test_validate.py
new file mode 100644
index 000000000..ab1703b5a
--- /dev/null
+++ b/tests/configs/test_validate.py
@@ -0,0 +1,95 @@
+"""Tests for configlib.validate."""
+import pytest
+from dflash.scripts.configlib.validate import validate_profile, ProfileError
+
+
+def _make_profile(overrides=None):
+    p = {
+        "extends": None,
+        "backend": "dflash",
+        "hardware": {"gpu": "RTX 3090", "sm": 86},
+        "model": {"target": "/some/model.gguf"},
+        "runtime": {
+            "ctx": 4096,
+            "kv_k": "q8_0",
+            "kv_v": "q8_0",
+            "spec": {"method": "none"},
+        },
+        "expected_floors": {"decode_tok_s": 5.0},
+        "provenance": {
+            "source_log": "some/existing/file.toml",
+            "measured_at": "2026-01-01",
+            "hardware_id": "test-device",
+        },
+    }
+    if overrides:
+        _deep_update(p, overrides)
+    return p
+
+
+def _deep_update(base, updates):
+    for k, v in updates.items():
+        if isinstance(v, dict) and isinstance(base.get(k), dict) and v:
+            _deep_update(base[k], v)
+        else:
+            base[k] = v
+
+
+def test_needs_run_source_log_is_warning(tmp_path):
+    profile = _make_profile({"provenance": {"source_log": "<NEEDS_RUN>"}})
+    errors, warnings = validate_profile(profile, profile_name="p.toml", strict=False)
+    assert not errors
+    assert any("NEEDS_RUN" in w for w in warnings)
+
+
+def test_needs_run_with_strict_is_error():
+    profile = _make_profile({"provenance": {"source_log": "<NEEDS_RUN>"}})
+    errors, warnings = validate_profile(profile, profile_name="p.toml", strict=True)
+    assert any("NEEDS_RUN" in e for e in errors)
+
+
+def test_missing_provenance_is_error():
+    profile = _make_profile()
+    del profile["provenance"]
+    errors, warnings = validate_profile(profile, profile_name="p.toml")
+    assert any("provenance" in e.lower() for e in errors)
+
+
+def test_empty_floors_is_error():
+    profile = _make_profile({"expected_floors": {}})
+    errors, warnings = validate_profile(profile, profile_name="p.toml")
+    assert any("floor" in e.lower() or "expected_floors" in e.lower() for e in errors)
+
+
+def test_mtp_without_assistant_is_error():
+    profile = _make_profile({
+        "runtime": {"spec": {"method": "mtp", "gamma": 2}},
+    })
+    errors, warnings = validate_profile(profile, profile_name="p.toml")
+    assert any("mtp_assistant" in e.lower() or "assistant" in e.lower() for e in errors)
+
+
+def test_dflash_without_draft_is_error():
+    profile = _make_profile({
+        "runtime": {"spec": {"method": "dflash", "draft_max": 4}},
+    })
+    errors, warnings = validate_profile(profile, profile_name="p.toml")
+    assert any("dflash_draft" in e.lower() or "draft" in e.lower() for e in errors)
+
+
+def test_valid_mtp_profile_no_errors():
+    profile = _make_profile({
+        "model": {"target": "/some/model.gguf", "mtp_assistant": "/some/assistant.gguf"},
+        "runtime": {"spec": {"method": "mtp", "gamma": 2}},
+    })
+    errors, warnings = validate_profile(profile, profile_name="p.toml")
+    assert not errors
+
+
+def test_valid_dflash_profile_no_errors():
+    profile = _make_profile({
+        "model": {"target": "/some/model.gguf", "dflash_draft": "/some/draft.gguf"},
+        "runtime": {"spec": {"method": "dflash", "draft_max": 4}},
+    })
+    errors, warnings = validate_profile(profile, profile_name="p.toml")
+    assert not errors