diff --git a/configs/CONTRIBUTING.md b/configs/CONTRIBUTING.md new file mode 100644 index 000000000..5ddb2b46b --- /dev/null +++ b/configs/CONTRIBUTING.md @@ -0,0 +1,178 @@ +# Contributing a New Profile + +Thank you for contributing a benchmark result. Follow these five steps exactly. + +--- + +## Step 1 — Run the benchmark and save the log + +Run the inference command and capture stdout/stderr to a file: + +```bash + 2>&1 | tee .sisyphus/notes//.log +``` + +The log file is the ground truth for the numbers in your profile. Without it +the profile will be rejected. + +--- + +## Step 2 — Create the profile TOML + +Copy the template from `configs/profiles/base.toml` or the most similar +existing profile. Name it `---.toml`, e.g.: +`rtx4090-dense31b-mtp-128k.toml`. + +### Required keys + +```toml +extends = "base" # or another profile stem +backend = "dflash" # must match a file in configs/backends/ + +[hardware] +gpu = "RTX 4090" +sm = 89 + +[model] +target = "${LUCEBOX_ROOT}/models/your-model.gguf" +# mtp_assistant required when spec.method = "mtp" +# dflash_draft required when spec.method = "dflash" + +[runtime] +ctx = 131072 +kv_k = "tq3_0" +kv_v = "tq3_0" + +[runtime.spec] +method = "mtp" # "none" | "mtp" | "dflash" +gamma = 2 # required for mtp +# draft_max = 4 # required for dflash + +[expected_floors] +decode_tok_s = 15.0 +# ttft_ms_max = 80.0 +# prefill_tok_s = 500.0 + +[provenance] +source_log = ".sisyphus/notes//.log" +measured_at = "2026-01-15" # ISO date +hardware_id = "yourname-rtx4090-linux" +commit = "abc1234" # optional git SHA +``` + +### Auto-rejection rules (the linter will reject these) + +- `provenance.source_log = ""` — fill in the real log path +- Hardcoded `/absolute/paths` anywhere — use `${VAR}/...` or relative paths +- `spec.method = "mtp"` without `model.mtp_assistant` +- `spec.method = "dflash"` without `model.dflash_draft` +- Empty `[expected_floors]` — set at least one floor +- Missing `[provenance]` section or any of its three required fields +- `source_log` pointing to a file that does not exist (warning, not error, + but reviewers will ask you to provide it) + +--- + +## Step 3 — Lint before submitting + +```bash +python dflash/scripts/config_lint.py --profile +``` + +Must exit 0 (warnings about missing binaries are OK). + +For strict checking (promotes warnings to errors): + +```bash +python dflash/scripts/config_lint.py --profile --strict +``` + +--- + +## Step 4 — Add or validate the backend + +If your profile uses a backend that already exists, skip this step. + +To add a backend, create `configs/backends/.toml`: + +```toml +name = "my-backend" # must match filename stem exactly +upstream = "https://..." +build_hint = "..." # optional build instructions + +[binary] +# exactly one of: +in_tree = "path/relative/to/git/root" +# env_var = "MY_BINARY_VAR" + +[supports] +spec_types = ["none", "mtp"] # which methods this binary supports +kv_quants = ["q8_0", "tq3_0"] + +[flags] +# map canonical key -> CLI flag string +model = "--model" +ctx = "--ctx-size" +kv_k = "--kv-k" +kv_v = "--kv-v" +# if "mtp" in spec_types: +spec_model = "--mtp" +spec_gamma = "--gamma" +# if "dflash" in spec_types: +# draft_model = "--draft" +# draft_max = "--draft-max" + +[stdout_parse] +tok_s = "eval time.*?([0-9]+\.[0-9]+) tokens per second" +ttft_ms = "time to first token.*?([0-9]+\.[0-9]+) ms" +``` + +Backend validation rules: +- `name` must equal the filename stem +- Exactly one of `binary.in_tree` or `binary.env_var` must be set +- All required flags for declared `spec_types` must be present + +--- + +## Step 5 — Open a pull request + +Include in the PR body: +- A snippet from the log file showing the measured tok/s and TTFT +- The exact hardware (GPU model, driver version, VRAM) +- The date of measurement +- Confirmation that `config_lint.py --strict` exits 0 + +### Disclosure requirement + +If any part of the profile, code, or PR description was AI-generated, state +this explicitly. PRs with AI-generated content that is not disclosed will be +closed. + +--- + +## Schema reference summary + +### Profile keys + +| Key | Type | Required | Notes | +|-----|------|----------|-------| +| extends | string | yes | parent profile stem or "" for none | +| backend | string | yes | stem of a file in configs/backends/ | +| hardware.gpu | string | yes | GPU model name | +| hardware.sm | int | yes | CUDA SM version (e.g. 86 for Ampere) | +| model.target | path | yes | main model GGUF | +| model.mtp_assistant | path | when method=mtp | MTP assistant GGUF | +| model.dflash_draft | path | when method=dflash | DFlash draft GGUF | +| runtime.ctx | int | yes | context length in tokens | +| runtime.kv_k | string | yes | KV cache key quantization | +| runtime.kv_v | string | yes | KV cache value quantization | +| runtime.spec.method | string | yes | "none", "mtp", or "dflash" | +| runtime.spec.gamma | int | when method=mtp | speculative tokens per step | +| runtime.spec.draft_max | int | when method=dflash | max draft tokens | +| runtime.flash_attn | bool | no | enable flash attention | +| runtime.pflash | bool | no | enable pflash (MoE models) | +| expected_floors | table | yes | at least one floor metric | +| provenance.source_log | path | yes | path to benchmark log | +| provenance.measured_at | date | yes | ISO 8601 date | +| provenance.hardware_id | string | yes | unique hardware identifier | +| provenance.commit | string | no | git SHA of code under test | diff --git a/configs/README.md b/configs/README.md new file mode 100644 index 000000000..859145e65 --- /dev/null +++ b/configs/README.md @@ -0,0 +1,73 @@ +# configs — Declarative Inference Profiles + +This directory contains declarative TOML profiles and backend definitions for +running Gemma-4 inference on lucebox-hub. Each profile captures a specific +(model, context length, speculative decode method, hardware) combination along +with measured performance floors, so every run is reproducible and comparable. + +## Why this exists + +Ad-hoc shell commands diverge over time. Profiles make the connection between +a benchmark log and the exact flags used to produce it explicit and machine-checkable. + +## Directory layout + +``` +configs/ + profiles/ — one .toml per (model, ctx, method, hw) combination + backends/ — one .toml per inference binary variant +``` + +## Quick start + +```bash +# Lint everything (exits 0 if no errors, prints warnings) +python dflash/scripts/config_lint.py + +# Dry-run a profile (validates env, paths, backend; does NOT run inference) +python dflash/scripts/profile_run.py --profile rtx3090-moe26b-dflash-256k --dry-run + +# Print the resolved command (for inspection or shell scripting) +python dflash/scripts/profile_run.py --profile rtx3090-moe26b-dflash-256k --print-cmd + +# Run (execvp — replaces the Python process) +LUCEBOX_ROOT=/your/root python dflash/scripts/profile_run.py --profile rtx3090-dense31b-mtp-64k + +# Override a single field at runtime +python dflash/scripts/profile_run.py --profile rtx3090-moe26b-dflash-256k \ + --override runtime.ctx=131072 + +# Verify a running server meets the floors declared in the profile +python dflash/scripts/verify_server.py --profile rtx3090-moe26b-dflash-256k \ + --base-url http://127.0.0.1:8080 --runs 5 +``` + +## Required environment variables + +| Profile | Variable | Purpose | +|---------|----------|---------| +| rtx3090-dense31b-mtp-64k | `LUCEBOX_ROOT` | Root containing models/ | +| rtx3090-moe26b-dflash-256k | `HOME` (auto-set) | Root for ~/models/ paths | +| rtx3090-moe26b-mtp-1m | `HOME` (auto-set) | Root for ~/models/ paths | +| llama-upstream backend | `LUCEBOX_LLAMA_BIN` | Path to llama-server or llama-cli | + +## Shipped profiles + +| Profile | Model | Method | CTX | Measured decode | Floor | +|---------|-------|--------|-----|-----------------|-------| +| rtx3090-dense31b-mtp-64k | Gemma-4 31B dense Q4_K_M | MTP γ=2 | 64K | 10.07 tok/s | 9.5 tok/s | +| rtx3090-moe26b-dflash-256k | Gemma-4 26B-A4B MoE Q4_K_M | DFlash dm=4+pflash | 256K | 67.95 tok/s / 55ms TTFT | 65.0 tok/s / 65ms | +| rtx3090-moe26b-mtp-1m | Gemma-4 26B-A4B MoE Q4_K_M | MTP γ=2+pflash | 1M | 23.65 tok/s / 108ms TTFT | 22.0 tok/s / 120ms | + +All measurements taken on RTX 3090 (24 GB VRAM) running WSL2 (peppi-rtx3090-wsl). + +## Backends + +| Backend | Binary | Spec methods | +|---------|--------|-------------| +| dflash | `dflash/build/test_gemma4_dflash` (in-tree) | none, mtp, dflash | +| llama-upstream | `$LUCEBOX_LLAMA_BIN` (external) | none | + +## Schema reference + +See `configs/CONTRIBUTING.md` for the full schema and contribution guide. diff --git a/configs/backends/dflash.toml b/configs/backends/dflash.toml new file mode 100644 index 000000000..2c58232ae --- /dev/null +++ b/configs/backends/dflash.toml @@ -0,0 +1,37 @@ +# dflash backend — in-tree speculative decode binary +name = "dflash" +upstream = "https://github.com/dusterbloom/lucebox-hub" +build_hint = "mkdir -p dflash/build && cd dflash/build && cmake .. -DCMAKE_BUILD_TYPE=Release && make -j$(nproc) test_gemma4_dflash" + +[binary] +in_tree = "dflash/build/test_gemma4_dflash" + +[supports] +spec_types = ["none", "mtp", "dflash"] +kv_quants = ["q8_0", "tq3_0", "f16"] + +[flags] +# Core +model = "--model" +ctx = "--ctx-size" +kv_k = "--kv-k" +kv_v = "--kv-v" +# MTP speculative decode +spec_model = "--mtp" +spec_gamma = "--gamma" +# DFlash speculative decode +draft_model = "--draft" +draft_max = "--draft-max" +# Optional +pflash = "--pflash" +flash_attn = "--flash-attn" +temp = "--temp" +seed = "--seed" +n_predict = "--n-predict" +ignore_eos = "--ignore-eos" +batch = "--batch-size" +ubatch = "--ubatch-size" + +[stdout_parse] +tok_s = "eval time.*?([0-9]+\\.[0-9]+) tokens per second" +ttft_ms = "time to first token.*?([0-9]+\\.[0-9]+) ms" diff --git a/configs/backends/llama-upstream.toml b/configs/backends/llama-upstream.toml new file mode 100644 index 000000000..dd23c90ca --- /dev/null +++ b/configs/backends/llama-upstream.toml @@ -0,0 +1,29 @@ +# llama-upstream backend — external llama.cpp server binary +# Set LUCEBOX_LLAMA_BIN to the path of your compiled llama-server or llama-cli. +name = "llama-upstream" +upstream = "https://github.com/ggerganov/llama.cpp" +build_hint = "cmake -B build -DGGML_CUDA=ON && cmake --build build --config Release -t llama-cli" + +[binary] +env_var = "LUCEBOX_LLAMA_BIN" + +[supports] +spec_types = ["none"] +kv_quants = ["q8_0", "f16", "f32"] + +[flags] +model = "--model" +ctx = "--ctx-size" +kv_k = "--kv-cache-type-k" +kv_v = "--kv-cache-type-v" +flash_attn = "--flash-attn" +temp = "--temp" +seed = "--seed" +n_predict = "--n-predict" +ignore_eos = "--ignore-eos" +batch = "--batch-size" +ubatch = "--ubatch-size" + +[stdout_parse] +tok_s = "eval time.*?([0-9]+\\.[0-9]+) tokens per second" +ttft_ms = "load time.*?([0-9]+\\.[0-9]+) ms" diff --git a/configs/profiles/base.toml b/configs/profiles/base.toml new file mode 100644 index 000000000..e5cf77bbe --- /dev/null +++ b/configs/profiles/base.toml @@ -0,0 +1,25 @@ +# Base profile template — all profiles extend this or a child of it. +# This file is NOT directly runnable; it lacks provenance and measured data. +extends = "" +backend = "dflash" + +[hardware] +gpu = "" +sm = 0 + +[model] +target = "" + +[runtime] +ctx = 4096 +kv_k = "q8_0" +kv_v = "q8_0" +flash_attn = true + +[runtime.spec] +method = "none" + +[expected_floors] +# at least one of: decode_tok_s, prefill_tok_s, ttft_ms_max + +# [provenance] intentionally absent — fill in when deriving a real profile diff --git a/configs/profiles/rtx3090-dense31b-mtp-64k.toml b/configs/profiles/rtx3090-dense31b-mtp-64k.toml new file mode 100644 index 000000000..c2a34c224 --- /dev/null +++ b/configs/profiles/rtx3090-dense31b-mtp-64k.toml @@ -0,0 +1,31 @@ +# RTX 3090 — Gemma-4 Dense 31B + MTP (gamma=2) @ 64K context +# Measured: 10.07 tok/s decode, acceptance_length=0.73 (+61% over no-MTP) +extends = "base" +backend = "dflash" + +[hardware] +gpu = "RTX 3090" +sm = 86 + +[model] +target = "${LUCEBOX_ROOT}/models/gemma-4-31B-it-Q4_K_M.gguf" +mtp_assistant = "${LUCEBOX_ROOT}/models/gemma4-mtp-31B/gemma-4-31B-it-assistant.Q4_K_M.gguf" + +[runtime] +ctx = 65536 +kv_k = "tq3_0" +kv_v = "tq3_0" +flash_attn = true + +[runtime.spec] +method = "mtp" +gamma = 2 + +[expected_floors] +decode_tok_s = 9.5 + +[provenance] +source_log = ".sisyphus/notes/gemma4-baseline/mtp-gamma/phase4-b/mtp_g2_ctx65536.log" +measured_at = "2026-05-11" +hardware_id = "peppi-rtx3090-wsl" +commit = "4bcb972" diff --git a/configs/profiles/rtx3090-moe26b-dflash-256k.toml b/configs/profiles/rtx3090-moe26b-dflash-256k.toml new file mode 100644 index 000000000..cb5e09598 --- /dev/null +++ b/configs/profiles/rtx3090-moe26b-dflash-256k.toml @@ -0,0 +1,32 @@ +# RTX 3090 — Gemma-4 MoE 26B-A4B + DFlash (dm=4) + pflash @ 256K context +# Measured: 67.95 tok/s decode, TTFT ~55ms, VRAM 21.73 GB +extends = "base" +backend = "dflash" + +[hardware] +gpu = "RTX 3090" +sm = 86 + +[model] +target = "${HOME}/models/gemma4-26b-a4b-it/gemma-4-26B-A4B-it-UD-Q4_K_M.gguf" +dflash_draft = "${HOME}/models/gemma4-26b-a4b-dflash/draft-q8_0.gguf" + +[runtime] +ctx = 262144 +kv_k = "q8_0" +kv_v = "q8_0" +flash_attn = true +pflash = true + +[runtime.spec] +method = "dflash" +draft_max = 4 + +[expected_floors] +decode_tok_s = 65.0 +ttft_ms_max = 65.0 + +[provenance] +source_log = ".sisyphus/notes/gemma4-baseline/tq3-frontier/C3_dflash_pflash_256K_q8_dm4.log" +measured_at = "2026-05-10" +hardware_id = "peppi-rtx3090-wsl" diff --git a/configs/profiles/rtx3090-moe26b-mtp-1m.toml b/configs/profiles/rtx3090-moe26b-mtp-1m.toml new file mode 100644 index 000000000..5e80fbda0 --- /dev/null +++ b/configs/profiles/rtx3090-moe26b-mtp-1m.toml @@ -0,0 +1,32 @@ +# RTX 3090 — Gemma-4 MoE 26B-A4B + MTP (gamma=2) @ 1M context +# Measured: 23.65 tok/s decode, TTFT 107.9ms, VRAM 23.88 GB +extends = "base" +backend = "dflash" + +[hardware] +gpu = "RTX 3090" +sm = 86 + +[model] +target = "${HOME}/models/gemma4-26b-a4b-it/gemma-4-26B-A4B-it-UD-Q4_K_M.gguf" +mtp_assistant = "${HOME}/models/gemma4-mtp-26b-a4b/gemma-4-26B-A4B-it-assistant.Q4_K_M.gguf" + +[runtime] +ctx = 1048576 +kv_k = "tq3_0" +kv_v = "tq3_0" +flash_attn = true +pflash = true + +[runtime.spec] +method = "mtp" +gamma = 2 + +[expected_floors] +decode_tok_s = 22.0 +ttft_ms_max = 120.0 + +[provenance] +source_log = ".sisyphus/notes/gemma4-baseline/mtp-gamma/moe-scientific/mtp_g2_ctx1048576.log" +measured_at = "2026-05-11" +hardware_id = "peppi-rtx3090-wsl" diff --git a/dflash/__init__.py b/dflash/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dflash/scripts/__init__.py b/dflash/scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dflash/scripts/config_lint.py b/dflash/scripts/config_lint.py new file mode 100644 index 000000000..051615b0a --- /dev/null +++ b/dflash/scripts/config_lint.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +"""config_lint.py — validate all profiles and backends. + +Usage: + config_lint.py [--profile NAME] [--strict] + +Exit codes: + 0 — all valid (warnings may be printed) + 1 — one or more errors + +Note: Missing binaries and unset env vars produce warnings (not errors) in lint +mode, since the binary may not be built and env vars may differ per workstation. +Use profile_run.py --dry-run to fully validate a profile against the current env. +""" +import argparse +import sys +from pathlib import Path + + +def _find_git_root(start: Path) -> Path: + p = start.resolve() + while p != p.parent: + if (p / ".git").exists(): + return p + p = p.parent + raise RuntimeError(f"Could not find git root from {start}") + + +_ENV_VAR_MSGS = ("Unset environment variable", "env_var", "is not set") +_BINARY_MSGS = ("not found", "does not exist") + + +def _is_env_or_binary_error(msg: str) -> bool: + """Return True if this error is about an unset env var or missing binary.""" + return any(k in msg for k in _ENV_VAR_MSGS + _BINARY_MSGS) + + +def lint_profile(profile_path: Path, git_root: str, profiles_dir: str, strict: bool): + """Lint a single profile. Returns (errors, warnings).""" + from dflash.scripts.configlib.loader import load_profile, ProfileError + from dflash.scripts.configlib.validate import validate_profile + + name = profile_path.stem + try: + profile = load_profile(profile_path, git_root=git_root, profiles_dir=profiles_dir) + except ProfileError as exc: + msg = str(exc) + if _is_env_or_binary_error(msg): + # Env not configured on this workstation — warn, do not fail lint + return [], [f"[{name}] Env/path warning (set vars to run): {msg}"] + return [f"[{name}] Load error: {msg}"], [] + + errors, warnings = validate_profile( + profile, + profile_name=profile_path.name, + strict=strict, + git_root=git_root, + ) + return errors, warnings + + +def lint_backend(backend_path: Path, git_root: str): + """Lint a single backend. Returns (errors, warnings).""" + from dflash.scripts.configlib.backends import load_backend, BackendError + + name = backend_path.stem + try: + load_backend(backend_path, git_root=git_root) + return [], [] + except BackendError as exc: + msg = str(exc) + if _is_env_or_binary_error(msg): + return [], [f"[{name}] Binary/env warning (build or set vars): {msg}"] + return [f"[{name}] Backend error: {msg}"], [] + + +def main(): + parser = argparse.ArgumentParser(description="Lint lucebox-hub config profiles and backends") + parser.add_argument("--profile", help="Lint only this profile name (stem)") + parser.add_argument("--strict", action="store_true", help="Escalate warnings to errors") + args = parser.parse_args() + + script_dir = Path(__file__).resolve().parent + try: + git_root = str(_find_git_root(script_dir)) + except RuntimeError as exc: + print(f"ERROR: {exc}", file=sys.stderr) + sys.exit(1) + + sys.path.insert(0, git_root) + + profiles_dir = Path(git_root) / "configs" / "profiles" + backends_dir = Path(git_root) / "configs" / "backends" + + total_errors = [] + total_warnings = [] + + if args.profile: + profile_path = profiles_dir / f"{args.profile}.toml" + if not profile_path.exists(): + print(f"ERROR: Profile {args.profile!r} not found at {profile_path}", file=sys.stderr) + sys.exit(1) + errs, warns = lint_profile(profile_path, git_root, str(profiles_dir), args.strict) + total_errors.extend(errs) + total_warnings.extend(warns) + else: + # Lint all profiles (skip base.toml — template, no provenance) + for profile_path in sorted(profiles_dir.glob("*.toml")): + if profile_path.stem == "base": + try: + from dflash.scripts.configlib.loader import load_profile, ProfileError + load_profile(profile_path, git_root=git_root, profiles_dir=str(profiles_dir)) + print(f" base.toml: OK (template, provenance skipped)") + except ProfileError as exc: + msg = str(exc) + if _is_env_or_binary_error(msg): + total_warnings.append(f"[base] {msg}") + else: + total_errors.append(f"[base] Parse error: {msg}") + continue + errs, warns = lint_profile(profile_path, git_root, str(profiles_dir), args.strict) + total_errors.extend(errs) + total_warnings.extend(warns) + + for backend_path in sorted(backends_dir.glob("*.toml")): + errs, warns = lint_backend(backend_path, git_root) + total_errors.extend(errs) + total_warnings.extend(warns) + + if args.strict and total_warnings: + total_errors.extend([f"(strict) {w}" for w in total_warnings]) + total_warnings = [] + + for w in total_warnings: + print(f"WARNING: {w}") + for e in total_errors: + print(f"ERROR: {e}", file=sys.stderr) + + if total_errors: + print(f"\n{len(total_errors)} error(s), {len(total_warnings)} warning(s). FAIL.", file=sys.stderr) + sys.exit(1) + else: + print(f"\n0 errors, {len(total_warnings)} warning(s). OK.") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/dflash/scripts/configlib/__init__.py b/dflash/scripts/configlib/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dflash/scripts/configlib/backends.py b/dflash/scripts/configlib/backends.py new file mode 100644 index 000000000..f7d9d414a --- /dev/null +++ b/dflash/scripts/configlib/backends.py @@ -0,0 +1,168 @@ +"""Backend TOML loader and argv builder.""" +import os +from pathlib import Path + +try: + import tomllib +except ImportError: + try: + import tomli as tomllib + except ImportError: + import tomllib + + +class BackendError(Exception): + """Raised for any backend loading or validation failure.""" + + +# Required flag keys for each spec type beyond "none" +_SPEC_FLAG_REQUIREMENTS = { + "mtp": {"spec_model", "spec_gamma"}, + "dflash": {"draft_model", "draft_max"}, +} + +# Runtime keys that are boolean flags (added as bare flags when True) +_BOOL_RUNTIME_KEYS = {"pflash", "ignore_eos", "flash_attn"} + + +def load_backend( + backend_path, + git_root: str, +) -> dict: + """Load and validate a backend TOML file. + + Args: + backend_path: Path-like to the backend .toml file. + git_root: Repository root for resolving in_tree paths. + + Returns: + Backend dict with additional key ``resolved_binary``. + + Raises: + BackendError: On any validation or resolution failure. + """ + backend_path = Path(backend_path) + stem = backend_path.stem + + if not backend_path.exists(): + raise BackendError(f"Backend file not found: {backend_path}") + + try: + data = tomllib.loads(backend_path.read_bytes().decode()) + except Exception as exc: + raise BackendError(f"TOML parse error in {backend_path.name!r}: {exc}") from exc + + # name == filename stem + name = data.get("name", "") + if name != stem: + raise BackendError( + f"Backend name {name!r} does not match filename stem {stem!r} in {backend_path.name!r}" + ) + + # binary: exactly one of in_tree or env_var + binary = data.get("binary", {}) + in_tree = binary.get("in_tree") + env_var = binary.get("env_var") + + if in_tree and env_var: + raise BackendError( + f"[{stem}] binary.in_tree and binary.env_var are mutually exclusive" + ) + if not in_tree and not env_var: + raise BackendError( + f"[{stem}] [binary] must have exactly one of in_tree or env_var" + ) + + # Resolve binary path + if in_tree: + resolved = Path(git_root) / in_tree if not Path(in_tree).is_absolute() else Path(in_tree) + if not resolved.exists(): + raise BackendError( + f"[{stem}] in_tree binary not found: {in_tree!r} (resolved to {resolved})" + ) + resolved_binary = str(resolved) + else: + # env_var + val = os.environ.get(env_var) + if val is None: + raise BackendError( + f"[{stem}] env_var {env_var!r} is not set — cannot resolve binary path" + ) + if not Path(val).exists(): + raise BackendError( + f"[{stem}] binary from ${env_var}={val!r} does not exist" + ) + resolved_binary = val + + # Validate required flags for declared spec_types + spec_types = data.get("supports", {}).get("spec_types", []) + flags = data.get("flags", {}) + for spec_type in spec_types: + required = _SPEC_FLAG_REQUIREMENTS.get(spec_type, set()) + missing = required - set(flags.keys()) + if missing: + raise BackendError( + f"[{stem}] Missing required flags for spec_type={spec_type!r}: {sorted(missing)}" + ) + + result = dict(data) + result["resolved_binary"] = resolved_binary + return result + + +def build_argv(backend: dict, profile: dict) -> list[str]: + """Build the command-line argv from a loaded backend and merged profile. + + Args: + backend: dict returned by load_backend (must have resolved_binary). + profile: merged profile dict. + + Returns: + List of strings [binary, flag, value, ...] suitable for os.execvp. + """ + flags = backend.get("flags", {}) + runtime = profile.get("runtime", {}) + model = profile.get("model", {}) + spec = runtime.get("spec", {}) + method = spec.get("method", "none") + + argv = [backend["resolved_binary"]] + + # model (always) + if "model" in flags: + argv += [flags["model"], str(model["target"])] + + # ctx + if "ctx" in flags: + argv += [flags["ctx"], str(runtime["ctx"])] + + # kv_k, kv_v + if "kv_k" in flags: + argv += [flags["kv_k"], str(runtime["kv_k"])] + if "kv_v" in flags: + argv += [flags["kv_v"], str(runtime["kv_v"])] + + # Optional scalar runtime flags + for key in ("temp", "seed", "n_predict", "batch", "ubatch"): + if key in flags and key in runtime: + argv += [flags[key], str(runtime[key])] + + # Boolean flags — add bare flag only when True + for key in _BOOL_RUNTIME_KEYS: + if key in flags and runtime.get(key) is True: + argv.append(flags[key]) + + # Speculative decode + if method == "mtp": + if "spec_model" in flags: + argv += [flags["spec_model"], str(model.get("mtp_assistant", ""))] + if "spec_gamma" in flags: + argv += [flags["spec_gamma"], str(spec.get("gamma", 1))] + + elif method == "dflash": + if "draft_model" in flags: + argv += [flags["draft_model"], str(model.get("dflash_draft", ""))] + if "draft_max" in flags: + argv += [flags["draft_max"], str(spec.get("draft_max", 4))] + + return argv diff --git a/dflash/scripts/configlib/loader.py b/dflash/scripts/configlib/loader.py new file mode 100644 index 000000000..e053eb031 --- /dev/null +++ b/dflash/scripts/configlib/loader.py @@ -0,0 +1,169 @@ +"""Profile TOML loader with inheritance, env expansion, and path validation.""" +import os +import re +import copy +from pathlib import Path + +try: + import tomllib +except ImportError: + try: + import tomli as tomllib + except ImportError: + import tomllib # Python 3.11+ + + +class ProfileError(Exception): + """Raised for any profile loading or validation failure.""" + + +# Regex to match ${VAR} or ${VAR:-default} +_ENV_RE = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)(?::-(.*?))?\}") + + +def _expand_env(value: str, profile_name: str) -> tuple[str, bool]: + """Expand ${VAR} and ${VAR:-default} in value. + + Returns (expanded, had_env_var) where had_env_var is True if any ${...} + was present in the original string (even after expansion). + """ + had_env_var = bool(_ENV_RE.search(value)) + + def _replace(m): + var = m.group(1) + default = m.group(2) + val = os.environ.get(var) + if val is None: + if default is not None: + return default + raise ProfileError( + f"Unset environment variable ${{{var}}} referenced in profile {profile_name!r}" + ) + return val + + return _ENV_RE.sub(_replace, value), had_env_var + + +def _resolve_path(raw: str, git_root: str, profile_name: str) -> str: + """Resolve a path string according to spec rules. + + 1. Expand ${VAR} / ${VAR:-default}. + 2. Expand leading ~. + 3. If resolved starts with / AND raw had no ${...} AND raw did not start with ~ -> raise. + 4. Otherwise resolve relative to git_root. + """ + had_tilde = raw.startswith("~") + expanded, had_env_var = _expand_env(raw, profile_name) + expanded = os.path.expanduser(expanded) + + if expanded.startswith("/") and not had_env_var and not had_tilde: + raise ProfileError( + f"Hardcoded absolute path {raw!r} in profile {profile_name!r}. " + "Use ${{VAR}}/... or a relative path instead." + ) + + if os.path.isabs(expanded): + return expanded # env-expanded absolute or tilde-expanded — allowed + + # Relative — resolve against git root + return str(Path(git_root) / expanded) + + +def _is_path_key(key: str) -> bool: + """Heuristic: keys whose values should be treated as paths.""" + path_keys = {"target", "mtp_assistant", "dflash_draft", "source_log"} + return key in path_keys + + +def _resolve_paths_in(obj, git_root: str, profile_name: str, resolve_paths: bool = True): + """Recursively walk obj and resolve path-like string values.""" + if isinstance(obj, dict): + return { + k: ( + _resolve_path(v, git_root, profile_name) + if resolve_paths and isinstance(v, str) and _is_path_key(k) + else _resolve_paths_in(v, git_root, profile_name, resolve_paths) + ) + for k, v in obj.items() + } + if isinstance(obj, list): + return [_resolve_paths_in(i, git_root, profile_name, resolve_paths) for i in obj] + return obj + + +def _deep_merge(base: dict, override: dict) -> dict: + """Deep merge override into base (override wins).""" + result = copy.deepcopy(base) + for k, v in override.items(): + if isinstance(v, dict) and isinstance(result.get(k), dict): + result[k] = _deep_merge(result[k], v) + else: + result[k] = copy.deepcopy(v) + return result + + +def load_profile( + profile_path, + git_root: str, + profiles_dir: str = None, + _seen: set = None, +) -> dict: + """Load and merge a profile TOML, resolving inheritance and paths. + + Args: + profile_path: Path-like to the profile TOML file. + git_root: Absolute path to the repository root (used for relative paths). + profiles_dir: Directory containing profiles for extends resolution. + Defaults to the directory of profile_path. + _seen: Internal set for circular dependency detection. + + Returns: + Merged profile dict with all paths resolved. + + Raises: + ProfileError: On any loading, parsing, or path validation error. + """ + profile_path = Path(profile_path) + profile_name = profile_path.name + + if not profile_path.exists(): + raise ProfileError(f"Profile not found: {profile_path}") + + if _seen is None: + _seen = set() + + canonical = str(profile_path.resolve()) + if canonical in _seen: + raise ProfileError( + f"Circular extends chain detected involving {profile_name!r}" + ) + _seen = _seen | {canonical} + + # Parse TOML + try: + raw_bytes = profile_path.read_bytes() + data = tomllib.loads(raw_bytes.decode()) + except Exception as exc: + raise ProfileError(f"TOML parse error in {profile_name!r}: {exc}") from exc + + # Handle inheritance + extends = data.get("extends") + if extends and extends != "null": + if profiles_dir is None: + profiles_dir = str(profile_path.parent) + parent_path = Path(profiles_dir) / f"{extends}.toml" + parent = load_profile( + parent_path, + git_root=git_root, + profiles_dir=profiles_dir, + _seen=_seen, + ) + # Merge: parent is base, child overrides + merged = _deep_merge(parent, data) + merged["extends"] = extends + else: + merged = data + + # Resolve paths in the merged result + resolved = _resolve_paths_in(merged, git_root, profile_name) + return resolved diff --git a/dflash/scripts/configlib/validate.py b/dflash/scripts/configlib/validate.py new file mode 100644 index 000000000..8e12cd8f5 --- /dev/null +++ b/dflash/scripts/configlib/validate.py @@ -0,0 +1,71 @@ +"""Profile validation: structural rules and provenance checks.""" +import os +from pathlib import Path + + +class ProfileError(Exception): + """Raised when a profile fails validation.""" + + +def validate_profile( + profile: dict, + profile_name: str = "unknown", + strict: bool = False, + git_root: str = None, +) -> tuple[list[str], list[str]]: + """Validate a merged profile dict. + + Returns: + (errors, warnings) — lists of human-readable strings. + Caller should treat any non-empty errors list as fatal. + """ + errors: list[str] = [] + warnings: list[str] = [] + + def err(msg): + errors.append(f"[{profile_name}] {msg}") + + def warn(msg): + warnings.append(f"[{profile_name}] {msg}") + + # --- provenance --- + prov = profile.get("provenance") + if not prov: + err("Missing [provenance] section (required: source_log, measured_at, hardware_id)") + else: + for field in ("source_log", "measured_at", "hardware_id"): + if not prov.get(field): + err(f"Missing provenance.{field}") + + source_log = prov.get("source_log", "") + if source_log == "": + msg = "provenance.source_log is — run the benchmark and fill in the real log path" + if strict: + err(msg) + else: + warn(msg) + elif source_log and git_root: + log_path = Path(git_root) / source_log if not os.path.isabs(source_log) else Path(source_log) + if not log_path.exists(): + warn(f"provenance.source_log points to nonexistent file: {source_log!r}") + + # --- expected_floors --- + floors = profile.get("expected_floors", {}) + if not floors: + err("Empty or missing [expected_floors] — at least one of decode_tok_s, prefill_tok_s, ttft_ms_max required") + + # --- spec method cross-checks --- + runtime = profile.get("runtime", {}) + spec = runtime.get("spec", {}) + method = spec.get("method", "none") + model = profile.get("model", {}) + + if method == "mtp": + if not model.get("mtp_assistant"): + err("spec.method=mtp requires model.mtp_assistant to be set") + + if method == "dflash": + if not model.get("dflash_draft"): + err("spec.method=dflash requires model.dflash_draft to be set") + + return errors, warnings diff --git a/dflash/scripts/profile_run.py b/dflash/scripts/profile_run.py new file mode 100644 index 000000000..71d8f9c42 --- /dev/null +++ b/dflash/scripts/profile_run.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +"""profile_run.py — run a lucebox-hub profile. + +Usage: + profile_run.py --profile NAME [--override key.path=value ...] [--dry-run] [--print-cmd] +""" +import argparse +import os +import sys +from pathlib import Path + + +def _find_git_root(start: Path) -> Path: + """Walk up to find the git root (directory containing .git).""" + p = start.resolve() + while p != p.parent: + if (p / ".git").exists(): + return p + p = p.parent + raise RuntimeError(f"Could not find git root from {start}") + + +def _dot_path_set(obj: dict, dot_path: str, value): + """Set a nested dict value given a dot-separated path.""" + keys = dot_path.split(".") + for key in keys[:-1]: + obj = obj.setdefault(key, {}) + obj[keys[-1]] = value + + +def _coerce(value: str): + """Auto-coerce a string value to bool, int, float, or leave as str.""" + if value.lower() == "true": + return True + if value.lower() == "false": + return False + try: + return int(value) + except ValueError: + pass + try: + return float(value) + except ValueError: + pass + return value + + +def main(): + parser = argparse.ArgumentParser( + description="Run a lucebox-hub inference profile" + ) + parser.add_argument("--profile", required=True, help="Profile name (stem of TOML file in configs/profiles/)") + parser.add_argument("--override", action="append", default=[], metavar="KEY=VALUE", + help="Dot-path override (e.g. runtime.ctx=131072)") + parser.add_argument("--dry-run", action="store_true", help="Validate only, exit 0") + parser.add_argument("--print-cmd", action="store_true", help="Print resolved argv, exit 0") + args = parser.parse_args() + + # Resolve paths + script_dir = Path(__file__).resolve().parent + try: + git_root = _find_git_root(script_dir) + except RuntimeError as exc: + print(f"ERROR: {exc}", file=sys.stderr) + sys.exit(1) + + sys.path.insert(0, str(git_root)) + + from dflash.scripts.configlib.loader import load_profile, ProfileError + from dflash.scripts.configlib.validate import validate_profile + from dflash.scripts.configlib.backends import load_backend, build_argv, BackendError + + profiles_dir = git_root / "configs" / "profiles" + backends_dir = git_root / "configs" / "backends" + profile_path = profiles_dir / f"{args.profile}.toml" + + # Load profile + try: + profile = load_profile(profile_path, git_root=str(git_root), profiles_dir=str(profiles_dir)) + except ProfileError as exc: + print(f"ERROR loading profile {args.profile!r}: {exc}", file=sys.stderr) + sys.exit(1) + + # Apply overrides + for ov in args.override: + if "=" not in ov: + print(f"ERROR: --override {ov!r} must be in KEY=VALUE format", file=sys.stderr) + sys.exit(1) + key, _, val = ov.partition("=") + _dot_path_set(profile, key, _coerce(val)) + + # Validate + errors, warnings = validate_profile(profile, profile_name=args.profile, git_root=str(git_root)) + for w in warnings: + print(f"WARNING: {w}", file=sys.stderr) + if errors: + for e in errors: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + + if args.dry_run: + print(f"Profile {args.profile!r} is valid.") + sys.exit(0) + + # Load backend + backend_name = profile.get("backend", "") + backend_path = backends_dir / f"{backend_name}.toml" + try: + backend = load_backend(backend_path, git_root=str(git_root)) + except BackendError as exc: + print(f"ERROR loading backend {backend_name!r}: {exc}", file=sys.stderr) + sys.exit(1) + + # Build argv + try: + argv = build_argv(backend, profile) + except Exception as exc: + print(f"ERROR building command: {exc}", file=sys.stderr) + sys.exit(1) + + if args.print_cmd: + for tok in argv: + print(tok) + sys.exit(0) + + # Execute + os.execvp(argv[0], argv) + + +if __name__ == "__main__": + main() diff --git a/dflash/scripts/verify_server.py b/dflash/scripts/verify_server.py new file mode 100644 index 000000000..bc7278bce --- /dev/null +++ b/dflash/scripts/verify_server.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +"""verify_server.py — verify a running server meets profile floor metrics. + +Usage: + verify_server.py --profile NAME [--base-url URL] [--runs N] [--json-out FILE] + +Exit codes: + 0 — all floors met + 1 — config/connection error + 2 — floor(s) failed +""" +import argparse +import json +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path + + +def _find_git_root(start: Path) -> Path: + p = start.resolve() + while p != p.parent: + if (p / ".git").exists(): + return p + p = p.parent + raise RuntimeError(f"Could not find git root from {start}") + + +def _http_json(url: str, payload: dict = None, timeout: float = 30.0): + """Make a JSON HTTP request. Returns (response_dict, elapsed_s, first_byte_s).""" + body = json.dumps(payload).encode() if payload else None + headers = {"Content-Type": "application/json"} if body else {} + req = urllib.request.Request(url, data=body, headers=headers) + + t0 = time.monotonic() + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + t_first = time.monotonic() + data = json.loads(resp.read().decode()) + t_end = time.monotonic() + return data, t_end - t0, t_first - t0 + except urllib.error.URLError as exc: + raise ConnectionError(f"Request to {url} failed: {exc}") from exc + + +def main(): + parser = argparse.ArgumentParser(description="Verify server meets profile floor metrics") + parser.add_argument("--profile", required=True) + parser.add_argument("--base-url", default="http://127.0.0.1:8080") + parser.add_argument("--runs", type=int, default=3) + parser.add_argument("--json-out", metavar="FILE") + args = parser.parse_args() + + script_dir = Path(__file__).resolve().parent + try: + git_root = _find_git_root(script_dir) + except RuntimeError as exc: + print(f"ERROR: {exc}", file=sys.stderr) + sys.exit(1) + + sys.path.insert(0, str(git_root)) + + from dflash.scripts.configlib.loader import load_profile, ProfileError + from dflash.scripts.configlib.validate import validate_profile + + profiles_dir = git_root / "configs" / "profiles" + profile_path = profiles_dir / f"{args.profile}.toml" + + try: + profile = load_profile(profile_path, git_root=str(git_root), profiles_dir=str(profiles_dir)) + except ProfileError as exc: + print(f"ERROR loading profile {args.profile!r}: {exc}", file=sys.stderr) + sys.exit(1) + + errors, warnings = validate_profile(profile, profile_name=args.profile, git_root=str(git_root)) + for w in warnings: + print(f"WARNING: {w}", file=sys.stderr) + if errors: + for e in errors: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + + floors = profile.get("expected_floors", {}) + base_url = args.base_url.rstrip("/") + + # Health check + try: + health, _, _ = _http_json(f"{base_url}/health", timeout=5.0) + except ConnectionError as exc: + print(f"ERROR: Health check failed: {exc}", file=sys.stderr) + sys.exit(1) + + # Completion runs + prompt = "Hello, world! Please respond briefly." + decode_rates = [] + ttfts = [] + + for i in range(args.runs): + payload = { + "model": "default", + "messages": [{"role": "user", "content": prompt}], + "max_tokens": 64, + "stream": False, + } + try: + resp, elapsed, ttft = _http_json( + f"{base_url}/v1/chat/completions", payload=payload, timeout=60.0 + ) + except ConnectionError as exc: + print(f"ERROR: Run {i+1} failed: {exc}", file=sys.stderr) + sys.exit(1) + + usage = resp.get("usage", {}) + completion_tokens = usage.get("completion_tokens", 0) + if completion_tokens > 0 and elapsed > 0: + decode_rates.append(completion_tokens / elapsed) + ttfts.append(ttft * 1000) # convert to ms + + avg_decode = sum(decode_rates) / len(decode_rates) if decode_rates else 0.0 + avg_ttft = sum(ttfts) / len(ttfts) if ttfts else 0.0 + + # Compare to floors + floor_results = {} + passed = True + + if "decode_tok_s" in floors: + floor_val = floors["decode_tok_s"] + ok = avg_decode >= floor_val + floor_results["decode_tok_s"] = {"measured": avg_decode, "floor": floor_val, "passed": ok} + if not ok: + passed = False + print(f"FAIL: decode_tok_s={avg_decode:.2f} < floor={floor_val}", file=sys.stderr) + else: + print(f"PASS: decode_tok_s={avg_decode:.2f} >= floor={floor_val}") + + if "ttft_ms_max" in floors: + floor_val = floors["ttft_ms_max"] + ok = avg_ttft <= floor_val + floor_results["ttft_ms_max"] = {"measured": avg_ttft, "floor": floor_val, "passed": ok} + if not ok: + passed = False + print(f"FAIL: ttft_ms={avg_ttft:.1f} > max={floor_val}", file=sys.stderr) + else: + print(f"PASS: ttft_ms={avg_ttft:.1f} <= max={floor_val}") + + result = { + "profile": args.profile, + "runs": args.runs, + "avg_decode_tok_s": avg_decode, + "avg_ttft_ms": avg_ttft, + "floors": floor_results, + "passed": passed, + } + + if args.json_out: + with open(args.json_out, "w") as f: + json.dump(result, f, indent=2) + print(f"Results written to {args.json_out}") + + sys.exit(0 if passed else 2) + + +if __name__ == "__main__": + main() diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..27eec68ed --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +python_files = test_*.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/configs/__init__.py b/tests/configs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/configs/fixtures/base.toml b/tests/configs/fixtures/base.toml new file mode 100644 index 000000000..e1da20f49 --- /dev/null +++ b/tests/configs/fixtures/base.toml @@ -0,0 +1,25 @@ +extends = "" +backend = "dflash" + +[hardware] +gpu = "RTX 3090" +sm = 86 + +[model] +target = "models/base-model.gguf" + +[runtime] +ctx = 4096 +kv_k = "q8_0" +kv_v = "q8_0" + +[runtime.spec] +method = "none" + +[expected_floors] +decode_tok_s = 5.0 + +[provenance] +source_log = "tests/configs/fixtures/base.toml" +measured_at = "2026-01-01" +hardware_id = "test-device" diff --git a/tests/configs/fixtures/child.toml b/tests/configs/fixtures/child.toml new file mode 100644 index 000000000..82882a1ba --- /dev/null +++ b/tests/configs/fixtures/child.toml @@ -0,0 +1,25 @@ +extends = "base" +backend = "dflash" + +[hardware] +gpu = "RTX 3090" +sm = 86 + +[model] +target = "models/child-model.gguf" + +[runtime] +ctx = 8192 +kv_k = "tq3_0" +kv_v = "tq3_0" + +[runtime.spec] +method = "none" + +[expected_floors] +decode_tok_s = 8.0 + +[provenance] +source_log = "tests/configs/fixtures/child.toml" +measured_at = "2026-01-02" +hardware_id = "test-device" diff --git a/tests/configs/test_backends.py b/tests/configs/test_backends.py new file mode 100644 index 000000000..3a2530694 --- /dev/null +++ b/tests/configs/test_backends.py @@ -0,0 +1,269 @@ +"""Tests for configlib.backends — TDD harness.""" +import os +import pytest +from dflash.scripts.configlib.backends import load_backend, build_argv, BackendError + + +def _write(tmp_path, name, content): + p = tmp_path / name + p.write_text(content) + return p + + +def _minimal_backend_toml(bin_path, name="dflash", spec_types=None, extra_flags=""): + spec_types = spec_types or ["none"] + spec_list = "[" + ", ".join('"' + s + '"' for s in spec_types) + "]" + flags_for_mtp = "" + flags_for_dflash = "" + if "mtp" in spec_types: + flags_for_mtp = 'spec_model = "--mtp"\nspec_gamma = "--gamma"\n' + if "dflash" in spec_types: + flags_for_dflash = 'draft_model = "--draft"\ndraft_max = "--draft-max"\n' + return ( + 'name = "' + name + '"\n' + '[binary]\n' + 'in_tree = "' + str(bin_path) + '"\n' + '[supports]\n' + 'spec_types = ' + spec_list + '\n' + 'kv_quants = ["q8_0"]\n' + '[flags]\n' + 'ctx = "--ctx-size"\n' + 'kv_k = "--kv-k"\n' + 'kv_v = "--kv-v"\n' + 'model = "--model"\n' + + flags_for_mtp + flags_for_dflash + extra_flags + ) + + +def test_in_tree_binary_exists_resolves(tmp_path): + bin_path = tmp_path / "mybin" + bin_path.touch() + toml = _minimal_backend_toml(bin_path) + p = _write(tmp_path, "dflash.toml", toml) + backend = load_backend(p, git_root=str(tmp_path)) + assert backend["resolved_binary"] == str(bin_path) + + +def test_in_tree_binary_missing_raises_error(tmp_path): + toml = _minimal_backend_toml("nonexistent/binary") + p = _write(tmp_path, "dflash.toml", toml) + with pytest.raises(BackendError, match="not found"): + load_backend(p, git_root=str(tmp_path)) + + +def test_env_var_backend_unset_raises(tmp_path, monkeypatch): + monkeypatch.delenv("LUCEBOX_LLAMA_BIN", raising=False) + toml = ( + 'name = "llama-upstream"\n' + '[binary]\n' + 'env_var = "LUCEBOX_LLAMA_BIN"\n' + '[supports]\n' + 'spec_types = ["none"]\n' + 'kv_quants = ["q8_0"]\n' + '[flags]\n' + 'ctx = "--ctx-size"\n' + 'kv_k = "--kv-cache-type-k"\n' + 'kv_v = "--kv-cache-type-v"\n' + 'model = "--model"\n' + ) + p = _write(tmp_path, "llama-upstream.toml", toml) + with pytest.raises(BackendError, match="LUCEBOX_LLAMA_BIN"): + load_backend(p, git_root=str(tmp_path)) + + +def test_env_var_backend_set_to_existing_resolves(tmp_path, monkeypatch): + bin_path = tmp_path / "llamabin" + bin_path.touch() + monkeypatch.setenv("LUCEBOX_LLAMA_BIN", str(bin_path)) + toml = ( + 'name = "llama-upstream"\n' + '[binary]\n' + 'env_var = "LUCEBOX_LLAMA_BIN"\n' + '[supports]\n' + 'spec_types = ["none"]\n' + 'kv_quants = ["q8_0"]\n' + '[flags]\n' + 'ctx = "--ctx-size"\n' + 'kv_k = "--kv-cache-type-k"\n' + 'kv_v = "--kv-cache-type-v"\n' + 'model = "--model"\n' + ) + p = _write(tmp_path, "llama-upstream.toml", toml) + backend = load_backend(p, git_root=str(tmp_path)) + assert backend["resolved_binary"] == str(bin_path) + + +def test_env_var_backend_set_to_nonexistent_raises(tmp_path, monkeypatch): + monkeypatch.setenv("LUCEBOX_LLAMA_BIN", "/nonexistent/path/llama") + toml = ( + 'name = "llama-upstream"\n' + '[binary]\n' + 'env_var = "LUCEBOX_LLAMA_BIN"\n' + '[supports]\n' + 'spec_types = ["none"]\n' + 'kv_quants = ["q8_0"]\n' + '[flags]\n' + 'ctx = "--ctx-size"\n' + 'kv_k = "--kv-cache-type-k"\n' + 'kv_v = "--kv-cache-type-v"\n' + 'model = "--model"\n' + ) + p = _write(tmp_path, "llama-upstream.toml", toml) + with pytest.raises(BackendError, match="does not exist"): + load_backend(p, git_root=str(tmp_path)) + + +def test_name_mismatch_raises(tmp_path): + toml = ( + 'name = "wrong-name"\n' + '[binary]\n' + 'in_tree = "somewhere"\n' + '[supports]\n' + 'spec_types = ["none"]\n' + 'kv_quants = ["q8_0"]\n' + '[flags]\n' + 'ctx = "--ctx-size"\n' + 'kv_k = "--kv-k"\n' + 'kv_v = "--kv-v"\n' + 'model = "--model"\n' + ) + p = _write(tmp_path, "dflash.toml", toml) + with pytest.raises(BackendError, match="name"): + load_backend(p, git_root=str(tmp_path)) + + +def test_both_in_tree_and_env_var_raises(tmp_path): + toml = ( + 'name = "dflash"\n' + '[binary]\n' + 'in_tree = "somewhere"\n' + 'env_var = "SOME_VAR"\n' + '[supports]\n' + 'spec_types = ["none"]\n' + 'kv_quants = ["q8_0"]\n' + '[flags]\n' + 'ctx = "--ctx-size"\n' + 'kv_k = "--kv-k"\n' + 'kv_v = "--kv-v"\n' + 'model = "--model"\n' + ) + p = _write(tmp_path, "dflash.toml", toml) + with pytest.raises(BackendError, match="[Mm]utually exclusive"): + load_backend(p, git_root=str(tmp_path)) + + +def test_neither_in_tree_nor_env_var_raises(tmp_path): + toml = ( + 'name = "dflash"\n' + '[binary]\n' + '[supports]\n' + 'spec_types = ["none"]\n' + 'kv_quants = ["q8_0"]\n' + '[flags]\n' + 'ctx = "--ctx-size"\n' + 'kv_k = "--kv-k"\n' + 'kv_v = "--kv-v"\n' + 'model = "--model"\n' + ) + p = _write(tmp_path, "dflash.toml", toml) + with pytest.raises(BackendError): + load_backend(p, git_root=str(tmp_path)) + + +def test_missing_flags_for_spec_types_raises(tmp_path): + toml = ( + 'name = "dflash"\n' + '[binary]\n' + 'in_tree = "somewhere"\n' + '[supports]\n' + 'spec_types = ["none", "mtp"]\n' + 'kv_quants = ["q8_0"]\n' + '[flags]\n' + 'ctx = "--ctx-size"\n' + 'kv_k = "--kv-k"\n' + 'kv_v = "--kv-v"\n' + 'model = "--model"\n' + ) + p = _write(tmp_path, "dflash.toml", toml) + with pytest.raises(BackendError, match="[Ff]lag"): + load_backend(p, git_root=str(tmp_path)) + + +def test_build_argv_includes_ctx_kv_flags(tmp_path): + bin_path = tmp_path / "mybin" + bin_path.touch() + toml = _minimal_backend_toml(bin_path) + p = _write(tmp_path, "dflash.toml", toml) + backend = load_backend(p, git_root=str(tmp_path)) + profile = { + "model": {"target": "/model.gguf"}, + "runtime": {"ctx": 4096, "kv_k": "q8_0", "kv_v": "q8_0", "spec": {"method": "none"}}, + } + argv = build_argv(backend, profile) + assert "--ctx-size" in argv + assert "4096" in argv + assert "--kv-k" in argv + assert "q8_0" in argv + + +def test_build_argv_boolean_flag_only_when_true(tmp_path): + bin_path = tmp_path / "mybin" + bin_path.touch() + extra = 'pflash = "--pflash"\nignore_eos = "--ignore-eos"\n' + toml = _minimal_backend_toml(bin_path, extra_flags=extra) + p = _write(tmp_path, "dflash.toml", toml) + backend = load_backend(p, git_root=str(tmp_path)) + + profile_false = { + "model": {"target": "/model.gguf"}, + "runtime": {"ctx": 4096, "kv_k": "q8_0", "kv_v": "q8_0", + "pflash": False, "ignore_eos": False, "spec": {"method": "none"}}, + } + argv = build_argv(backend, profile_false) + assert "--pflash" not in argv + assert "--ignore-eos" not in argv + + profile_true = { + "model": {"target": "/model.gguf"}, + "runtime": {"ctx": 4096, "kv_k": "q8_0", "kv_v": "q8_0", + "pflash": True, "ignore_eos": True, "spec": {"method": "none"}}, + } + argv = build_argv(backend, profile_true) + assert "--pflash" in argv + assert "--ignore-eos" in argv + + +def test_build_argv_mtp_method_adds_mtp_and_gamma(tmp_path): + bin_path = tmp_path / "mybin" + bin_path.touch() + toml = _minimal_backend_toml(bin_path, spec_types=["none", "mtp"]) + p = _write(tmp_path, "dflash.toml", toml) + backend = load_backend(p, git_root=str(tmp_path)) + profile = { + "model": {"target": "/model.gguf", "mtp_assistant": "/assistant.gguf"}, + "runtime": {"ctx": 4096, "kv_k": "q8_0", "kv_v": "q8_0", + "spec": {"method": "mtp", "gamma": 2}}, + } + argv = build_argv(backend, profile) + assert "--mtp" in argv + assert "/assistant.gguf" in argv + assert "--gamma" in argv + assert "2" in argv + + +def test_build_argv_dflash_method_adds_draft_and_draft_max(tmp_path): + bin_path = tmp_path / "mybin" + bin_path.touch() + toml = _minimal_backend_toml(bin_path, spec_types=["none", "dflash"]) + p = _write(tmp_path, "dflash.toml", toml) + backend = load_backend(p, git_root=str(tmp_path)) + profile = { + "model": {"target": "/model.gguf", "dflash_draft": "/draft.gguf"}, + "runtime": {"ctx": 4096, "kv_k": "q8_0", "kv_v": "q8_0", + "spec": {"method": "dflash", "draft_max": 4}}, + } + argv = build_argv(backend, profile) + assert "--draft" in argv + assert "/draft.gguf" in argv + assert "--draft-max" in argv + assert "4" in argv diff --git a/tests/configs/test_loader.py b/tests/configs/test_loader.py new file mode 100644 index 000000000..117bc5faa --- /dev/null +++ b/tests/configs/test_loader.py @@ -0,0 +1,161 @@ +"""Tests for configlib.loader — TDD harness.""" +import os +import pytest + +from dflash.scripts.configlib.loader import load_profile, ProfileError + + +def _write(tmp_path, name, content): + p = tmp_path / name + p.write_text(content) + return p + + +MINIMAL_TOML = ( + 'extends = ""\n' + 'backend = "dflash"\n' + '\n' + '[hardware]\n' + 'gpu = "RTX 3090"\n' + 'sm = 86\n' + '\n' + '[model]\n' + 'target = "models/base.gguf"\n' + '\n' + '[runtime]\n' + 'ctx = 4096\n' + 'kv_k = "q8_0"\n' + 'kv_v = "q8_0"\n' + '\n' + '[runtime.spec]\n' + 'method = "none"\n' + '\n' + '[expected_floors]\n' + 'decode_tok_s = 5.0\n' + '\n' + '[provenance]\n' + 'source_log = "tests/configs/fixtures/base.toml"\n' + 'measured_at = "2026-01-01"\n' + 'hardware_id = "test-device"\n' +) + + +def test_valid_base_parses(tmp_path): + p = _write(tmp_path, "base.toml", MINIMAL_TOML) + profile = load_profile(p, git_root=str(tmp_path)) + assert profile["backend"] == "dflash" + assert profile["hardware"]["gpu"] == "RTX 3090" + assert profile["runtime"]["ctx"] == 4096 + + +def test_child_inherits_and_overrides(tmp_path): + _write(tmp_path, "base.toml", MINIMAL_TOML) + child_toml = ( + MINIMAL_TOML + .replace('extends = ""', 'extends = "base"') + .replace('target = "models/base.gguf"', 'target = "models/child.gguf"') + .replace("ctx = 4096", "ctx = 8192") + .replace('kv_k = "q8_0"', 'kv_k = "tq3_0"') + .replace('kv_v = "q8_0"', 'kv_v = "tq3_0"') + .replace("decode_tok_s = 5.0", "decode_tok_s = 8.0") + ) + p = _write(tmp_path, "child.toml", child_toml) + profile = load_profile(p, git_root=str(tmp_path), profiles_dir=str(tmp_path)) + assert profile["runtime"]["ctx"] == 8192 + assert profile["runtime"]["kv_k"] == "tq3_0" + assert profile["hardware"]["sm"] == 86 + + +def test_missing_profile_file_raises(tmp_path): + with pytest.raises(ProfileError, match="not found"): + load_profile(tmp_path / "nonexistent.toml", git_root=str(tmp_path)) + + +def test_toml_parse_error_raises(tmp_path): + p = _write(tmp_path, "bad.toml", "this = [broken toml {{{") + with pytest.raises(ProfileError, match="TOML"): + load_profile(p, git_root=str(tmp_path)) + + +def test_circular_extends_raises(tmp_path): + a_toml = MINIMAL_TOML.replace('extends = ""', 'extends = "b"') + b_toml = MINIMAL_TOML.replace('extends = ""', 'extends = "a"') + _write(tmp_path, "a.toml", a_toml) + _write(tmp_path, "b.toml", b_toml) + with pytest.raises(ProfileError, match="[Cc]ircular"): + load_profile(tmp_path / "a.toml", git_root=str(tmp_path), profiles_dir=str(tmp_path)) + + +def test_env_var_expands(tmp_path, monkeypatch): + monkeypatch.setenv("MY_MODELS", str(tmp_path)) + toml = MINIMAL_TOML.replace( + 'target = "models/base.gguf"', + 'target = "${MY_MODELS}/models/base.gguf"' + ) + p = _write(tmp_path, "profile.toml", toml) + profile = load_profile(p, git_root=str(tmp_path)) + assert profile["model"]["target"] == f"{tmp_path}/models/base.gguf" + + +def test_env_var_default_used_when_unset(tmp_path, monkeypatch): + monkeypatch.delenv("UNSET_VAR_XYZ", raising=False) + toml = MINIMAL_TOML.replace( + 'target = "models/base.gguf"', + 'target = "${UNSET_VAR_XYZ:-models}/base.gguf"' + ) + p = _write(tmp_path, "profile.toml", toml) + profile = load_profile(p, git_root=str(tmp_path)) + assert profile["model"]["target"].endswith("models/base.gguf") + + +def test_unset_required_var_raises(tmp_path, monkeypatch): + monkeypatch.delenv("LUCEBOX_ROOT", raising=False) + toml = MINIMAL_TOML.replace( + 'target = "models/base.gguf"', + 'target = "${LUCEBOX_ROOT}/models/base.gguf"' + ) + p = _write(tmp_path, "profile.toml", toml) + with pytest.raises(ProfileError) as exc_info: + load_profile(p, git_root=str(tmp_path)) + err = str(exc_info.value) + assert "LUCEBOX_ROOT" in err + assert "profile.toml" in err + + +def test_hardcoded_absolute_path_raises(tmp_path): + toml = MINIMAL_TOML.replace( + 'target = "models/base.gguf"', + 'target = "/absolute/path/model.gguf"' + ) + p = _write(tmp_path, "profile.toml", toml) + with pytest.raises(ProfileError, match="[Hh]ardcoded absolute"): + load_profile(p, git_root=str(tmp_path)) + + +def test_env_expanded_absolute_allowed(tmp_path, monkeypatch): + monkeypatch.setenv("MY_ROOT", "/some/absolute/root") + toml = MINIMAL_TOML.replace( + 'target = "models/base.gguf"', + 'target = "${MY_ROOT}/models/base.gguf"' + ) + p = _write(tmp_path, "profile.toml", toml) + profile = load_profile(p, git_root=str(tmp_path)) + assert profile["model"]["target"] == "/some/absolute/root/models/base.gguf" + + +def test_tilde_expands(tmp_path): + toml = MINIMAL_TOML.replace( + 'target = "models/base.gguf"', + 'target = "~/models/base.gguf"' + ) + p = _write(tmp_path, "profile.toml", toml) + profile = load_profile(p, git_root=str(tmp_path)) + home = os.path.expanduser("~") + assert profile["model"]["target"] == f"{home}/models/base.gguf" + + +def test_relative_path_resolves_against_git_root(tmp_path): + toml = MINIMAL_TOML + p = _write(tmp_path, "profile.toml", toml) + profile = load_profile(p, git_root=str(tmp_path)) + assert profile["model"]["target"] == str(tmp_path / "models" / "base.gguf") diff --git a/tests/configs/test_validate.py b/tests/configs/test_validate.py new file mode 100644 index 000000000..ab1703b5a --- /dev/null +++ b/tests/configs/test_validate.py @@ -0,0 +1,95 @@ +"""Tests for configlib.validate.""" +import pytest +from dflash.scripts.configlib.validate import validate_profile, ProfileError + + +def _make_profile(overrides=None): + p = { + "extends": None, + "backend": "dflash", + "hardware": {"gpu": "RTX 3090", "sm": 86}, + "model": {"target": "/some/model.gguf"}, + "runtime": { + "ctx": 4096, + "kv_k": "q8_0", + "kv_v": "q8_0", + "spec": {"method": "none"}, + }, + "expected_floors": {"decode_tok_s": 5.0}, + "provenance": { + "source_log": "some/existing/file.toml", + "measured_at": "2026-01-01", + "hardware_id": "test-device", + }, + } + if overrides: + _deep_update(p, overrides) + return p + + +def _deep_update(base, updates): + for k, v in updates.items(): + if isinstance(v, dict) and isinstance(base.get(k), dict) and v: + _deep_update(base[k], v) + else: + base[k] = v + + +def test_needs_run_source_log_is_warning(tmp_path): + profile = _make_profile({"provenance": {"source_log": ""}}) + errors, warnings = validate_profile(profile, profile_name="p.toml", strict=False) + assert not errors + assert any("NEEDS_RUN" in w for w in warnings) + + +def test_needs_run_with_strict_is_error(): + profile = _make_profile({"provenance": {"source_log": ""}}) + errors, warnings = validate_profile(profile, profile_name="p.toml", strict=True) + assert any("NEEDS_RUN" in e for e in errors) + + +def test_missing_provenance_is_error(): + profile = _make_profile() + del profile["provenance"] + errors, warnings = validate_profile(profile, profile_name="p.toml") + assert any("provenance" in e.lower() for e in errors) + + +def test_empty_floors_is_error(): + profile = _make_profile({"expected_floors": {}}) + errors, warnings = validate_profile(profile, profile_name="p.toml") + assert any("floor" in e.lower() or "expected_floors" in e.lower() for e in errors) + + +def test_mtp_without_assistant_is_error(): + profile = _make_profile({ + "runtime": {"spec": {"method": "mtp", "gamma": 2}}, + }) + errors, warnings = validate_profile(profile, profile_name="p.toml") + assert any("mtp_assistant" in e.lower() or "assistant" in e.lower() for e in errors) + + +def test_dflash_without_draft_is_error(): + profile = _make_profile({ + "runtime": {"spec": {"method": "dflash", "draft_max": 4}}, + }) + errors, warnings = validate_profile(profile, profile_name="p.toml") + assert any("dflash_draft" in e.lower() or "draft" in e.lower() for e in errors) + + +def test_valid_mtp_profile_no_errors(): + profile = _make_profile({ + "model": {"target": "/some/model.gguf", "mtp_assistant": "/some/assistant.gguf"}, + "runtime": {"spec": {"method": "mtp", "gamma": 2}}, + }) + errors, warnings = validate_profile(profile, profile_name="p.toml") + assert not errors + + +def test_valid_dflash_profile_no_errors(): + profile = _make_profile({ + "model": {"target": "/some/model.gguf", "dflash_draft": "/some/draft.gguf"}, + "runtime": {"spec": {"method": "dflash", "draft_max": 4}}, + }) + errors, warnings = validate_profile(profile, profile_name="p.toml") + assert not errors