diff --git a/CHANGELOG.md b/CHANGELOG.md
index 224ce863..3c1ebf70 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased]
 
+### Ask this book — user-book RAG eval, P1 (live grounding validation) — backend (2026-06-22)
+
+Automated live grounding validation for **any user-uploaded book** — the user-book sibling to the catalog `RagEvalRunner` (AI-027). The catalog eval scores fixed golden sets keyed by `editionId`; user books have no goldens, so the new `UserBookRagEvalRunner` **synthesises probes from the book's own chunks**: it seed-retrieves a spread of chunks (`RetrieveUserBookAsync`, no gate), asks the generator for one self-contained question per chunk (`FeatureTag eval.userbook.gen`), runs the **real Ask path** per question, and judges the resulting answer's citations with the **shared `CitationJudge`** — same rubric + SupportRate (D1≥4) as the catalog. Two behaviour probes round it out: a **warm greeting** ("hi") checked *structurally* (answers, no citations, no `[n]` marker, not refused — no judge call) and a fixed **off-book** question judged for invented facts (passes iff the answer declines or stays grounded). **Empty/un-embedded book → short-circuit**: NO generator/judge LLM call, persist a failed 0-row with a note (mirrors the catalog no-LLM-on-empty invariant). Refactor: the catalog citation judge (`JudgeCitationsAsync`) + the `EvalRun` row factory (`MakeRun`) are extracted into one internal `CitationJudge` helper that both runners call — the rubric never forks; the catalog `RagEvalRunner` + its tests stay byte-for-byte green. New endpoint `POST /admin/rag/userbook/{id}/eval?judge=openai` (admin-auth inherited) resolves the owner via `db.UserBooks`, **logs the target userId** (privacy: admin eval reads private user content), runs 6 probes, persists, and returns `UserBookRagEvalDto` (citation score/supportRate, retrieval fraction, behaviour pass, per-probe breakdown). 404 on unknown book, 503 with no OpenAI key. Persists `rag.userbook.citation` / `rag.userbook.behavior` / `rag.userbook.retrieval` `eval_run` rows. `dotnet build -c Release` clean; AiEvals 61 + UnitTests 886 green (synthesised-probe aggregate, greeting structural pass/fail, off-book judge pass/fail, **empty-chunks → asserted zero generator/judge calls** via a throwing fake, SupportRate math, persist on/off). P2 admin UI is a separate slice.
+
 ### Ask this book — conversational, streaming web chat — backend (AI-028) (2026-06-19)
 
 Backend for the conversational "Ask this book" upgrade: **model bump + multi-turn memory + warm-companion prompt + SSE streaming**, with grounding, citations, and the spoiler gate intact. `rag.ask` now routes to a dedicated keyed provider `openai-rag` on **gpt-4.1-mini** (was gpt-4.1-nano), mirroring `openai-explain` (`OpenAI:RagAsk:Model`, `Ai:Routes:rag.ask → openai-rag`, decorator-loop entry, `ModelRegistrySeeder` row). The system prompt is rewritten from "answer ONLY from excerpts else refuse" to a **warm reading companion** that is still strictly grounded — every book-fact claim must come from the numbered excerpts and cite `[n]` (citation contract + parser unchanged), but greetings/meta ("hi", "what can you do") get a warm invite with **no forced citation and no refusal**, and a genuine question with no matching excerpt gets a graceful "I don't see that in what you've read so far" rather than an invented fact. **Multi-turn**: `AskRequest` gains `History: AskTurnDto[]` (role `"user"`/`"assistant"`); the server defensively clamps to the **last 6 turns**, caps each turn at 4000 chars, normalizes roles, and assembles a real chat (system → numbered-excerpts context block → prior turns → new question last). Retrieval still runs on the latest question only, so the grounding eval is byte-identical with `[]` history. **SSE** (content-negotiated, mirrors Explain): `Accept: text/event-stream` → `delta` events (token fragments) then a terminal `done` carrying `{ citations, lastReadOrd, insufficient }` (camelCase, identical citation shape to the JSON path); empty-chunks → one friendly `delta` + `done {insufficient:true}` with **no model call**; provider/mid-stream failure → terminal `error`. JSON path returns the unchanged `AskResponse` (eval + mobile keep working). Ask `MaxOutputTokens` raised 320 → 400 for conversational length. `dotnet build -c Release` clean; 868 unit tests green (history clamp, multi-turn message assembly, SSE event sequencing over a fake delta stream, companion greeting-vs-content prompt structure) + integration (catalog spoiler-gate, owner-404, SSE content-type + framing, JSON history passthrough — skip-on-unavailable). **Note: the grounding golden eval (`RagEvalRunner`) MUST be re-run on mini post-deploy** — the companion prompt loosened the refusal rule, so this is the real hallucination-risk gate (paid; not runnable in CI). Frontend = parallel agent (AI-026e).
diff --git a/backend/src/Ai/TextStack.Ai.EvalSuite/CitationJudge.cs b/backend/src/Ai/TextStack.Ai.EvalSuite/CitationJudge.cs
new file mode 100644
index 00000000..e5ff8c77
--- /dev/null
+++ b/backend/src/Ai/TextStack.Ai.EvalSuite/CitationJudge.cs
@@ -0,0 +1,134 @@
+using Application.Rag;
+using Domain.Entities;
+using Microsoft.Extensions.AI;
+using Microsoft.Extensions.AI.Evaluation;
+using TextStack.Ai.Core;
+using TextStack.Ai.Evals;
+using TextStack.Ai.Llm;
+using TextStack.Ai.Rag;
+
+namespace TextStack.Ai.EvalSuite;
+
+/// <summary>
+/// Shared citation-correctness machinery for the RAG evals (AI-027b + user-book P1). Both the catalog
+/// <see cref="RagEvalRunner"/> and the <see cref="UserBookRagEvalRunner"/> call ONE copy of the judge
+/// rubric + scoring + the <see cref="EvalRun"/> row factory, so the support metric never forks. The
+/// "support" axis MUST stay Dim1 — SupportRate reads <see cref="JudgeScore.D1"/>.
+/// </summary>
+internal static class CitationJudge
+{
+    internal const string CitationFeature = "rag.citation";
+    internal const int SupportPassThreshold = 4; // judge ≥4/5 on the support axis = a correct citation
+    internal const string NoJudge = "n/a";
+
+    internal static readonly Rubric Rubric = new(
+        "support: does the cited excerpt actually contain or directly imply the specific claim it is attached to?",
+        "relevance: is the excerpt genuinely on-topic for the answer, not a loosely-related passage?",
+        "faithfulness: does the answer avoid asserting anything the cited excerpts do not support (no outside knowledge)?");
+
+    private static readonly ChatMessage[] JudgePlaceholderMessages = [new ChatMessage(ChatRole.User, string.Empty)];
+
+    /// <summary>
+    /// Generates a grounded answer per question (over its already-retrieved chunks) and judges each
+    /// citation against the FULL text of the excerpt it points to. Returns the mean 1–5 score and the
+    /// support rate (citations scored ≥<see cref="SupportPassThreshold"/> on the support axis). Shared
+    /// verbatim by both runners.
+    /// </summary>
+    internal static async Task<RagCitationSummary> JudgeCitationsAsync(
+        IRagAskService ask,
+        ILlmService judge,
+        IReadOnlyList<(string Question, IReadOnlyList<RetrievedChunk> Chunks)> retrieved,
+        CancellationToken ct)
+    {
+        var chatConfig = new ChatConfiguration(new LlmServiceChatClient(judge, defaultFeatureTag: "eval.judge"));
+        var scores = new List<JudgeScore>();
+        var supported = 0;
+        var answersGenerated = 0;
+
+        foreach (var (question, chunks) in retrieved)
+        {
+            ct.ThrowIfCancellationRequested();
+            // lastReadOrd is irrelevant here — chunks are supplied directly (no user gating).
+            var answer = await ask.AskFromChunksAsync(question, chunks, [], [], lastReadOrd: int.MaxValue, ct);
+            if (answer.Insufficient || answer.Citations.Count == 0)
+                continue;
+            answersGenerated++;
+
+            foreach (var cited in answer.Citations)
+            {
+                ct.ThrowIfCancellationRequested();
+                var evidence =
+                    $"Question: {question}\n\nAnswer:\n{answer.Answer}\n\n" +
+                    $"Cited excerpt [{cited.Marker}] (the answer attributes a claim to this passage):\n{cited.Chunk.Text}";
+
+                var evaluator = new RubricEvaluator(CitationFeature, Rubric);
+                var result = await evaluator.EvaluateAsync(
+                    JudgePlaceholderMessages,
+                    new ChatResponse(new ChatMessage(ChatRole.Assistant, answer.Answer)),
+                    chatConfig, [new RubricEvidenceContext(evidence)], ct);
+
+                var score = new JudgeScore(
+                    ReadAxis(result, Rubric.Dim1),
+                    ReadAxis(result, Rubric.Dim2),
+                    ReadAxis(result, Rubric.Dim3),
+                    string.Empty);
+                scores.Add(score);
+                if (score.D1 >= SupportPassThreshold)
+                    supported++;
+            }
+        }
+
+        if (scores.Count == 0)
+            return new RagCitationSummary(0, 0, 0, answersGenerated);
+
+        var summary = JudgeRunner.Aggregate(scores);
+        var supportRate = (double)supported / scores.Count;
+        return new RagCitationSummary(summary.MeanOverall, supportRate, scores.Count, answersGenerated);
+    }
+
+    /// <summary>
+    /// Lightweight faithfulness check for the user-book off-book probe: does <paramref name="answer"/>
+    /// introduce facts not grounded in the book? Returns true when the answer is clean (gracefully
+    /// declines or stays grounded — no invented facts), false when it hallucinates outside knowledge.
+    /// One judge call; parses a leading yes/no.
+    /// </summary>
+    internal static async Task<bool> JudgeNoInventedFactsAsync(
+        ILlmService judge, string question, string answer, CancellationToken ct)
+    {
+        var prompt =
+            "You are grading whether an assistant answer about a book introduces facts NOT grounded in " +
+            "that book. A good answer either gracefully declines (says it can't find this in the book) " +
+            "or stays grounded. A bad answer asserts specific outside facts (dates, names, scores, " +
+            "events) as if from the book.\n\n" +
+            $"Question: {question}\n\nAnswer:\n{answer}\n\n" +
+            "Does the answer introduce facts not grounded in the book? Reply with exactly YES or NO.";
+        var request = new LlmRequest(
+            SystemPrompt: "You are a strict faithfulness grader. Reply YES or NO only.",
+            Messages: [new LlmMessage("user", prompt)],
+            MaxOutputTokens: 4,
+            FeatureTag: "eval.judge");
+        var response = await judge.CompleteAsync(request, ct);
+        var verdict = response.Text.Trim().ToLowerInvariant();
+        // "yes" = invented facts present → NOT clean. Anything else (incl. "no") = clean.
+        return !verdict.StartsWith("yes", StringComparison.Ordinal);
+    }
+
+    // RubricEvaluator names each axis "{feature}.{label}" (label = text before ':').
+    private static int ReadAxis(EvaluationResult result, string dim) =>
+        (int)Math.Round(result.Get<NumericMetric>($"{CitationFeature}.{dim.Split(':')[0].Trim()}").Value ?? 0);
+
+    /// <summary>Shared <see cref="EvalRun"/> row factory — one copy for every RAG eval feature.</summary>
+    internal static EvalRun MakeRun(
+        string feature, string modelId, string judgeModelId, decimal score, int n, string? gitSha, string breakdown) => new()
+        {
+            Id = Guid.NewGuid(),
+            Feature = feature,
+            ModelId = modelId,
+            JudgeModelId = judgeModelId,
+            Score = Math.Round(score, 3),
+            N = n,
+            BreakdownJson = breakdown,
+            GitSha = gitSha,
+            CreatedAt = DateTimeOffset.UtcNow,
+        };
+}
diff --git a/backend/src/Ai/TextStack.Ai.EvalSuite/RagEvalRunner.cs b/backend/src/Ai/TextStack.Ai.EvalSuite/RagEvalRunner.cs
index 1ce45dbe..2d3c3b81 100644
--- a/backend/src/Ai/TextStack.Ai.EvalSuite/RagEvalRunner.cs
+++ b/backend/src/Ai/TextStack.Ai.EvalSuite/RagEvalRunner.cs
@@ -1,12 +1,8 @@
 using Application.Common.Interfaces;
 using Application.Rag;
-using Domain.Entities;
-using Microsoft.Extensions.AI;
-using Microsoft.Extensions.AI.Evaluation;
 using Microsoft.Extensions.Logging;
 using TextStack.Ai.Core;
 using TextStack.Ai.Evals;
-using TextStack.Ai.Llm;
 using TextStack.Ai.Rag;
 
 namespace TextStack.Ai.EvalSuite;
@@ -49,17 +45,7 @@ public sealed class RagEvalRunner(ILogger<RagEvalRunner> logger)
 {
     // Retrieval scores 0–1 (recall / 1−leak), unlike the 1–5 judged features — the feature key disambiguates.
     private const string RetrievalModelId = "hybrid-retrieval";
-    private const string NoJudge = "n/a";
-    private const int SupportPassThreshold = 4; // judge ≥4/5 on the support axis = a correct citation
-    private const string CitationFeature = "rag.citation";
-
-    // The "support" axis MUST stay Dim1 — SupportRate reads JudgeScore.D1.
-    private static readonly Rubric CitationRubric = new(
-        "support: does the cited excerpt actually contain or directly imply the specific claim it is attached to?",
-        "relevance: is the excerpt genuinely on-topic for the answer, not a loosely-related passage?",
-        "faithfulness: does the answer avoid asserting anything the cited excerpts do not support (no outside knowledge)?");
-
-    private static readonly ChatMessage[] JudgePlaceholderMessages = [new ChatMessage(ChatRole.User, string.Empty)];
+    private const string NoJudge = CitationJudge.NoJudge;
 
     public async Task<RagEvalResult> RunAsync(
         IRagService rag,
@@ -110,7 +96,7 @@ public async Task<RagEvalResult> RunAsync(
         // Citation correctness (027b) — only when a generator + judge are supplied.
         RagCitationSummary? citation = null;
         if (ask is not null && judge is not null)
-            citation = await JudgeCitationsAsync(ask, judge, retrievedByQuestion, ct);
+            citation = await CitationJudge.JudgeCitationsAsync(ask, judge, retrievedByQuestion, ct);
 
         logger.LogInformation(
             "RAG eval edition={Edition} recall@{K}={Recall:0.00} (N={RecallN}) spoilerLeakRate={Leak:0.00} (N={SpoilerN}) citation={Cit}",
@@ -119,12 +105,12 @@ public async Task<RagEvalResult> RunAsync(
 
         if (persist && db is not null)
         {
-            db.EvalRuns.Add(MakeRun("rag.retrieval", RetrievalModelId, NoJudge, (decimal)recall, recallCases.Count, gitSha,
+            db.EvalRuns.Add(CitationJudge.MakeRun("rag.retrieval", RetrievalModelId, NoJudge, (decimal)recall, recallCases.Count, gitSha,
                 $"{{\"recallAtK\":{recall:0.000},\"k\":{k},\"hits\":{recallDetail.Count(c => c.Hit)}}}"));
-            db.EvalRuns.Add(MakeRun("rag.spoiler", RetrievalModelId, NoJudge, (decimal)(1.0 - leakRate), spoilerCases.Count, gitSha,
+            db.EvalRuns.Add(CitationJudge.MakeRun("rag.spoiler", RetrievalModelId, NoJudge, (decimal)(1.0 - leakRate), spoilerCases.Count, gitSha,
                 $"{{\"leakRate\":{leakRate:0.000},\"leakingCases\":{spoilerDetail.Count(c => c.LeakCount > 0)}}}"));
             if (citation is not null)
-                db.EvalRuns.Add(MakeRun(CitationFeature, RagAskService.FeatureTag, judgeModelId ?? NoJudge,
+                db.EvalRuns.Add(CitationJudge.MakeRun(CitationJudge.CitationFeature, RagAskService.FeatureTag, judgeModelId ?? NoJudge,
                     (decimal)citation.Score, citation.CitationsJudged, gitSha,
                     $"{{\"supportRate\":{citation.SupportRate:0.000},\"answers\":{citation.AnswersGenerated}}}"));
             await db.SaveChangesAsync(ct);
@@ -132,79 +118,4 @@ public async Task<RagEvalResult> RunAsync(
 
         return new RagEvalResult(recall, recallCases.Count, leakRate, spoilerCases.Count, recallDetail, spoilerDetail, citation);
     }
-
-    /// <summary>
-    /// Generates a grounded answer per question (over its already-retrieved chunks) and judges each
-    /// citation against the FULL text of the excerpt it points to. Returns the mean 1–5 score and the
-    /// support rate (citations scored ≥<see cref="SupportPassThreshold"/> on the support axis).
-    /// </summary>
-    private async Task<RagCitationSummary> JudgeCitationsAsync(
-        IRagAskService ask,
-        ILlmService judge,
-        IReadOnlyList<(string Question, IReadOnlyList<RetrievedChunk> Chunks)> retrieved,
-        CancellationToken ct)
-    {
-        var chatConfig = new ChatConfiguration(new LlmServiceChatClient(judge, defaultFeatureTag: "eval.judge"));
-        var scores = new List<JudgeScore>();
-        var supported = 0;
-        var answersGenerated = 0;
-
-        foreach (var (question, chunks) in retrieved)
-        {
-            ct.ThrowIfCancellationRequested();
-            // lastReadOrd is irrelevant here — chunks are supplied directly (no user gating).
-            var answer = await ask.AskFromChunksAsync(question, chunks, [], [], lastReadOrd: int.MaxValue, ct);
-            if (answer.Insufficient || answer.Citations.Count == 0)
-                continue;
-            answersGenerated++;
-
-            foreach (var cited in answer.Citations)
-            {
-                ct.ThrowIfCancellationRequested();
-                var evidence =
-                    $"Question: {question}\n\nAnswer:\n{answer.Answer}\n\n" +
-                    $"Cited excerpt [{cited.Marker}] (the answer attributes a claim to this passage):\n{cited.Chunk.Text}";
-
-                var evaluator = new RubricEvaluator(CitationFeature, CitationRubric);
-                var result = await evaluator.EvaluateAsync(
-                    JudgePlaceholderMessages,
-                    new ChatResponse(new ChatMessage(ChatRole.Assistant, answer.Answer)),
-                    chatConfig, [new RubricEvidenceContext(evidence)], ct);
-
-                var score = new JudgeScore(
-                    ReadAxis(result, CitationRubric.Dim1),
-                    ReadAxis(result, CitationRubric.Dim2),
-                    ReadAxis(result, CitationRubric.Dim3),
-                    string.Empty);
-                scores.Add(score);
-                if (score.D1 >= SupportPassThreshold)
-                    supported++;
-            }
-        }
-
-        if (scores.Count == 0)
-            return new RagCitationSummary(0, 0, 0, answersGenerated);
-
-        var summary = JudgeRunner.Aggregate(scores);
-        var supportRate = (double)supported / scores.Count;
-        return new RagCitationSummary(summary.MeanOverall, supportRate, scores.Count, answersGenerated);
-    }
-
-    // RubricEvaluator names each axis "{feature}.{label}" (label = text before ':').
-    private static int ReadAxis(EvaluationResult result, string dim) =>
-        (int)Math.Round(result.Get<NumericMetric>($"{CitationFeature}.{dim.Split(':')[0].Trim()}").Value ?? 0);
-
-    private static EvalRun MakeRun(
-        string feature, string modelId, string judgeModelId, decimal score, int n, string? gitSha, string breakdown) => new()
-        {
-            Id = Guid.NewGuid(),
-            Feature = feature,
-            ModelId = modelId,
-            JudgeModelId = judgeModelId,
-            Score = Math.Round(score, 3),
-            N = n,
-            BreakdownJson = breakdown,
-            GitSha = gitSha,
-            CreatedAt = DateTimeOffset.UtcNow,
-        };
 }
diff --git a/backend/src/Ai/TextStack.Ai.EvalSuite/UserBookRagEvalRunner.cs b/backend/src/Ai/TextStack.Ai.EvalSuite/UserBookRagEvalRunner.cs
new file mode 100644
index 00000000..06eda4fe
--- /dev/null
+++ b/backend/src/Ai/TextStack.Ai.EvalSuite/UserBookRagEvalRunner.cs
@@ -0,0 +1,239 @@
+using Application.Common.Interfaces;
+using Application.Rag;
+using Microsoft.Extensions.Logging;
+using TextStack.Ai.Core;
+using TextStack.Ai.Evals;
+using TextStack.Ai.Llm;
+using TextStack.Ai.Rag;
+
+namespace TextStack.Ai.EvalSuite;
+
+/// <summary>One generated grounding probe's outcome — the question we synthesised and whether Ask cited it.</summary>
+public sealed record UserBookProbeCase(string Question, int Citations, bool Insufficient);
+
+/// <summary>
+/// The two behavioural probes (greeting + off-book) surfaced for the admin UI — each pass/fail with a
+/// short note so the owner can see WHY a companion run failed without re-reading private content.
+/// </summary>
+public sealed record UserBookBehaviorCase(string Kind, string Question, bool Pass, string Note);
+
+/// <summary>
+/// Result of a user-book RAG eval (P1): the citation summary over the generated grounding probes, the
+/// fraction of those probes that retrieved ≥1 chunk (<see cref="Retrieval"/>), the combined behaviour
+/// pass-fraction (greeting + off-book), and per-probe detail. <see cref="Citation"/> is null when the
+/// book has no indexed chunks (empty-chunks short-circuit — no LLM call was made).
+/// </summary>
+public sealed record UserBookRagEvalResult(
+    RagCitationSummary? Citation,
+    double Retrieval,
+    int ProbeN,
+    double BehaviorPass,
+    IReadOnlyList<UserBookProbeCase> ProbeCases,
+    IReadOnlyList<UserBookBehaviorCase> BehaviorCases,
+    string? Note);
+
+/// <summary>
+/// Live grounding validation for ANY user-uploaded book (P1, sibling to <see cref="RagEvalRunner"/>).
+/// The catalog eval scores fixed golden sets keyed by editionId; user books have no goldens, so this
+/// runner SYNTHESISES probes from the book's own chunks instead:
+/// <list type="bullet">
+///   <item><b>Generated grounding (N probes):</b> seed retrieval for a spread of chunks, ask the
+///   generator to write one self-contained question per chunk, then run the REAL Ask path per question
+///   (full-book retrieval, no gate) and judge the answer's citations with the shared
+///   <see cref="CitationJudge"/> — same rubric + SupportRate as the catalog.</item>
+///   <item><b>Greeting ("hi"):</b> a warm greeting must answer, not cite, and not refuse — a purely
+///   structural check (no judge call).</item>
+///   <item><b>Off-book:</b> a clearly off-book question must either decline or stay grounded — judged
+///   for invented facts.</item>
+/// </list>
+/// Empty/un-embedded book → short-circuit: NO generator/judge LLM call, persist a failed 0-row with a
+/// note (mirrors the catalog "no-LLM-on-empty" invariant). Persists <c>rag.userbook.citation</c>,
+/// <c>rag.userbook.behavior</c> (and <c>rag.userbook.retrieval</c>) <see cref="EvalRun"/> rows.
+/// </summary>
+public sealed class UserBookRagEvalRunner(ILogger<UserBookRagEvalRunner> logger)
+{
+    public const string CitationFeature = "rag.userbook.citation";
+    public const string BehaviorFeature = "rag.userbook.behavior";
+    public const string RetrievalFeature = "rag.userbook.retrieval";
+    private const string GeneratorModelId = "userbook-probe-gen";
+
+    // A broad seed that surfaces a spread of the book's chunks regardless of subject matter.
+    private const string SeedQuery = "main ideas and key topics summary";
+
+    // A fixed off-book question — its answer must decline or stay grounded, never invent.
+    private const string OffBookQuestion =
+        "What does this book say about the 2026 FIFA World Cup final score?";
+
+    private const string ProbeGenSystemPrompt =
+        "You write a single reading-comprehension question grounded in one passage.";
+
+    public async Task<UserBookRagEvalResult> RunAsync(
+        IRagService rag,
+        IRagAskService ask,
+        ILlmService generator,
+        ILlmService judge,
+        string judgeModelId,
+        Guid userId,
+        Guid userBookId,
+        int probeCount,
+        int k,
+        bool persist,
+        IAppDbContext? db,
+        string? gitSha,
+        CancellationToken ct)
+    {
+        // Seed retrieval for a spread of chunks. Empty => un-embedded/empty book: short-circuit with NO
+        // generator/judge LLM call (mirrors the catalog no-LLM-on-empty invariant).
+        var seed = await rag.RetrieveUserBookAsync(userId, userBookId, SeedQuery, probeCount, maxChapterOrd: null, ct);
+        if (seed.Count == 0)
+        {
+            const string note = "No indexed chunks for this user book — not embedded yet (no LLM call made).";
+            logger.LogInformation(
+                "user-book RAG eval userBook={UserBook} skipped — no indexed chunks", userBookId);
+            if (persist && db is not null)
+            {
+                db.EvalRuns.Add(CitationJudge.MakeRun(CitationFeature, GeneratorModelId, judgeModelId, 0m, 0, gitSha,
+                    $"{{\"note\":\"empty-chunks\",\"probes\":0}}"));
+                db.EvalRuns.Add(CitationJudge.MakeRun(BehaviorFeature, GeneratorModelId, judgeModelId, 0m, 0, gitSha,
+                    $"{{\"note\":\"empty-chunks\"}}"));
+                await db.SaveChangesAsync(ct);
+            }
+            return new UserBookRagEvalResult(
+                Citation: null, Retrieval: 0, ProbeN: 0, BehaviorPass: 0, ProbeCases: [], BehaviorCases: [], Note: note);
+        }
+
+        // 1) Generate one self-contained question per seed chunk (one generator call each).
+        var questions = new List<string>(seed.Count);
+        foreach (var chunk in seed)
+        {
+            ct.ThrowIfCancellationRequested();
+            questions.Add(await GenerateProbeAsync(generator, chunk.Text, ct));
+        }
+
+        // 2) Run the REAL Ask path per question (full-book retrieval, no gate) and keep the retrieved
+        //    chunks so the shared CitationJudge can score the resulting answer's citations.
+        var retrievedByQuestion = new List<(string Question, IReadOnlyList<RetrievedChunk> Chunks)>();
+        var probeCases = new List<UserBookProbeCase>();
+        var probesWithChunks = 0;
+        foreach (var question in questions)
+        {
+            ct.ThrowIfCancellationRequested();
+            var chunks = await rag.RetrieveUserBookAsync(userId, userBookId, question, k, maxChapterOrd: null, ct);
+            if (chunks.Count > 0)
+                probesWithChunks++;
+            retrievedByQuestion.Add((question, chunks));
+        }
+
+        var citation = await CitationJudge.JudgeCitationsAsync(ask, judge, retrievedByQuestion, ct);
+
+        // Per-probe detail: re-run Ask once for the breakdown (cheap, mirrors the judge's own Ask call —
+        // we surface citation count + insufficiency per probe for the admin view).
+        foreach (var (question, chunks) in retrievedByQuestion)
+        {
+            ct.ThrowIfCancellationRequested();
+            var answer = await ask.AskFromChunksAsync(question, chunks, [], [], lastReadOrd: int.MaxValue, ct);
+            probeCases.Add(new UserBookProbeCase(question, answer.Citations.Count, answer.Insufficient));
+        }
+
+        var retrievalRate = (double)probesWithChunks / questions.Count;
+
+        // 3) Behaviour probes: greeting (structural) + off-book (judged for invented facts).
+        var greeting = await EvaluateGreetingAsync(rag, ask, userId, userBookId, k, ct);
+        var offBook = await EvaluateOffBookAsync(rag, ask, judge, userId, userBookId, k, ct);
+        var behaviorCases = new[] { greeting, offBook };
+        var behaviorPass = behaviorCases.Count(c => c.Pass) / (double)behaviorCases.Length;
+
+        logger.LogInformation(
+            "user-book RAG eval userBook={UserBook} probes={Probes} citation={Cit}/support {Support:0.00} retrieval={Retrieval:0.00} behavior={Behavior:0.00}",
+            userBookId, questions.Count, citation.Score, citation.SupportRate, retrievalRate, behaviorPass);
+
+        if (persist && db is not null)
+        {
+            db.EvalRuns.Add(CitationJudge.MakeRun(CitationFeature, GeneratorModelId, judgeModelId,
+                (decimal)citation.Score, citation.CitationsJudged, gitSha,
+                $"{{\"supportRate\":{citation.SupportRate:0.000},\"answers\":{citation.AnswersGenerated},\"probes\":{questions.Count}}}"));
+            db.EvalRuns.Add(CitationJudge.MakeRun(BehaviorFeature, GeneratorModelId, judgeModelId,
+                (decimal)behaviorPass, behaviorCases.Length, gitSha,
+                $"{{\"greeting\":{greeting.Pass.ToString().ToLowerInvariant()},\"offBook\":{offBook.Pass.ToString().ToLowerInvariant()}}}"));
+            db.EvalRuns.Add(CitationJudge.MakeRun(RetrievalFeature, GeneratorModelId, CitationJudge.NoJudge,
+                (decimal)retrievalRate, questions.Count, gitSha,
+                $"{{\"probesWithChunks\":{probesWithChunks}}}"));
+            await db.SaveChangesAsync(ct);
+        }
+
+        return new UserBookRagEvalResult(
+            citation, retrievalRate, questions.Count, behaviorPass, probeCases, behaviorCases, Note: null);
+    }
+
+    /// <summary>One generator call: a self-contained, non-yes/no question answerable only from the passage.</summary>
+    private static async Task<string> GenerateProbeAsync(ILlmService generator, string passage, CancellationToken ct)
+    {
+        var prompt =
+            "Write one self-contained question answerable ONLY from this passage. No yes/no. " +
+            "Output the question only.\n\nPassage:\n" + passage;
+        var request = new LlmRequest(
+            SystemPrompt: ProbeGenSystemPrompt,
+            Messages: [new LlmMessage("user", prompt)],
+            MaxOutputTokens: 80,
+            FeatureTag: "eval.userbook.gen");
+        var response = await generator.CompleteAsync(request, ct);
+        return response.Text.Trim();
+    }
+
+    /// <summary>
+    /// Greeting probe: a warm "hi" must answer (non-empty), NOT cite (no citations, no <c>[n]</c> marker
+    /// in the text), and NOT refuse (not insufficient). Purely structural — no judge call.
+    /// </summary>
+    private static async Task<UserBookBehaviorCase> EvaluateGreetingAsync(
+        IRagService rag, IRagAskService ask, Guid userId, Guid userBookId, int k, CancellationToken ct)
+    {
+        const string question = "hi";
+        var chunks = await rag.RetrieveUserBookAsync(userId, userBookId, question, k, maxChapterOrd: null, ct);
+        var answer = await ask.AskFromChunksAsync(question, chunks, [], [], lastReadOrd: int.MaxValue, ct);
+
+        var nonEmpty = !string.IsNullOrWhiteSpace(answer.Answer);
+        var noCitations = answer.Citations.Count == 0;
+        var notRefused = !answer.Insufficient;
+        var noMarker = !HasCitationMarker(answer.Answer);
+        var pass = nonEmpty && noCitations && notRefused && noMarker;
+
+        var note = pass
+            ? "Warm greeting: answered, no citations, not refused."
+            : $"Failed structural greeting check (nonEmpty={nonEmpty}, noCitations={noCitations}, notRefused={notRefused}, noMarker={noMarker}).";
+        return new UserBookBehaviorCase("greeting", question, pass, note);
+    }
+
+    /// <summary>
+    /// Off-book probe: a clearly off-book question must gracefully decline OR stay grounded with no
+    /// invented facts. Passes iff the answer is insufficient OR the judge finds no invented facts.
+    /// </summary>
+    private static async Task<UserBookBehaviorCase> EvaluateOffBookAsync(
+        IRagService rag, IRagAskService ask, ILlmService judge, Guid userId, Guid userBookId, int k, CancellationToken ct)
+    {
+        var chunks = await rag.RetrieveUserBookAsync(userId, userBookId, OffBookQuestion, k, maxChapterOrd: null, ct);
+        var answer = await ask.AskFromChunksAsync(OffBookQuestion, chunks, [], [], lastReadOrd: int.MaxValue, ct);
+
+        if (answer.Insufficient)
+            return new UserBookBehaviorCase("off_book", OffBookQuestion, true,
+                "Declined an off-book question (insufficient context).");
+
+        var noInvented = await CitationJudge.JudgeNoInventedFactsAsync(judge, OffBookQuestion, answer.Answer, ct);
+        var note = noInvented
+            ? "Stayed grounded on an off-book question (no invented facts)."
+            : "Introduced facts not grounded in the book on an off-book question.";
+        return new UserBookBehaviorCase("off_book", OffBookQuestion, noInvented, note);
+    }
+
+    private static bool HasCitationMarker(string text)
+    {
+        for (var i = 0; i + 2 < text.Length; i++)
+            if (text[i] == '[' && char.IsDigit(text[i + 1]))
+            {
+                var j = i + 1;
+                while (j < text.Length && char.IsDigit(text[j])) j++;
+                if (j < text.Length && text[j] == ']')
+                    return true;
+            }
+        return false;
+    }
+}
diff --git a/backend/src/Api/Endpoints/AdminRagEndpoints.cs b/backend/src/Api/Endpoints/AdminRagEndpoints.cs
index 1fb78847..a375d1b3 100644
--- a/backend/src/Api/Endpoints/AdminRagEndpoints.cs
+++ b/backend/src/Api/Endpoints/AdminRagEndpoints.cs
@@ -3,6 +3,7 @@
 using Application.Rag;
 using Contracts.Admin;
 using Microsoft.AspNetCore.Mvc;
+using Microsoft.EntityFrameworkCore;
 using Microsoft.Extensions.DependencyInjection;
 using TextStack.Ai.Core;
 using TextStack.Ai.EvalSuite;
@@ -27,6 +28,79 @@ public static void MapAdminRagEndpoints(this WebApplication app)
         group.MapGet("/{editionId:guid}/search", Search);
         group.MapGet("/{editionId:guid}/context", Context);
         group.MapPost("/{editionId:guid}/eval", RunEval);
+        group.MapPost("/userbook/{id:guid}/eval", RunUserBookEval);
+    }
+
+    // User-book RAG eval (P1): live grounding validation for ANY user-uploaded book — no goldens, so it
+    // synthesises probes from the book's own chunks (generated grounding + greeting + off-book). Reads
+    // PRIVATE user content under admin auth, so it logs the target userId. `judge` = openai (default,
+    // Eval:JudgeModel) | ollama. Persists rag.userbook.citation / .behavior / .retrieval EvalRun rows.
+    private static async Task<IResult> RunUserBookEval(
+        Guid id,
+        [FromQuery] int? k,
+        [FromQuery] string? judge,
+        IServiceProvider services,
+        IConfiguration config,
+        UserBookRagEvalRunner runner,
+        IAppDbContext db,
+        ILogger<UserBookRagEvalRunner> logger,
+        CancellationToken ct)
+    {
+        var userId = await db.UserBooks
+            .Where(b => b.Id == id)
+            .Select(b => (Guid?)b.UserId)
+            .FirstOrDefaultAsync(ct);
+        if (userId is null)
+            return Results.NotFound("Book not found");
+
+        // Privacy note: admin-triggered eval reads this user's private uploaded content.
+        logger.LogInformation("admin RAG eval reads private user content for userId {UserId}", userId.Value);
+
+        if (!TryResolve<IRagService>(services, out var rag, out var unavailable))
+            return unavailable;
+
+        IRagAskService ask;
+        ILlmService generator;
+        try
+        {
+            ask = services.GetRequiredService<IRagAskService>();
+            generator = services.GetRequiredService<ILlmService>();
+        }
+        catch (InvalidOperationException)
+        {
+            return Results.Problem("Embeddings are not configured (no OpenAI key).", statusCode: 503);
+        }
+
+        var useOllama = (judge ?? "openai").Trim().ToLowerInvariant() == "ollama";
+        var judgeKey = useOllama ? "ollama" : "openai-judge";
+        var judgeModelId = useOllama
+            ? config["Ollama:Model"] ?? "gemma4:e2b"
+            : config["Eval:JudgeModel"] ?? "gpt-4.1";
+        var judgeClient = services.GetRequiredKeyedService<ILlmService>(judgeKey);
+
+        var limit = Math.Clamp(k ?? IRagService.DefaultK, 1, MaxK);
+        var gitSha = Environment.GetEnvironmentVariable("GIT_SHA");
+
+        var result = await runner.RunAsync(
+            rag, ask, generator, judgeClient, judgeModelId, userId.Value, id,
+            probeCount: 6, limit, persist: true, db, gitSha, ct);
+
+        var citation = result.Citation is null
+            ? null
+            : new RagCitationDto(
+                Math.Round(result.Citation.Score, 3),
+                Math.Round(result.Citation.SupportRate, 4),
+                result.Citation.CitationsJudged,
+                result.Citation.AnswersGenerated);
+
+        return Results.Ok(new UserBookRagEvalDto(
+            citation,
+            Math.Round(result.Retrieval, 4),
+            result.ProbeN,
+            Math.Round(result.BehaviorPass, 4),
+            result.ProbeCases.Select(c => new UserBookProbeDto(c.Question, c.Citations, c.Insufficient)).ToList(),
+            result.BehaviorCases.Select(c => new UserBookBehaviorDto(c.Kind, c.Question, c.Pass, c.Note)).ToList(),
+            result.Note));
     }
 
     // Phase 4 DoD gate (AI-027): runs the RAG eval against a real, embedded edition. Retrieval
diff --git a/backend/src/Api/Program.cs b/backend/src/Api/Program.cs
index c2780178..10f19229 100644
--- a/backend/src/Api/Program.cs
+++ b/backend/src/Api/Program.cs
@@ -86,6 +86,7 @@
 builder.Services.AddSingleton<Application.Ai.IEvalRunGate, Application.Ai.EvalRunGate>();
 builder.Services.AddSingleton<Application.Ai.EvalRegressionDetector>();
 builder.Services.AddSingleton<TextStack.Ai.EvalSuite.RagEvalRunner>();
+builder.Services.AddSingleton<TextStack.Ai.EvalSuite.UserBookRagEvalRunner>();
 builder.Services.AddSingleton<TextStack.Ai.EvalSuite.ToolCallEvalRunner>();
 builder.Services.AddSingleton<TextStack.Ai.EvalSuite.StudyBuddyEvalRunner>();
 builder.Services.AddSingleton<TextStack.Ai.EvalSuite.CriticDefectEvalRunner>();
diff --git a/backend/src/Contracts/Admin/RagDtos.cs b/backend/src/Contracts/Admin/RagDtos.cs
index d1968145..edea1976 100644
--- a/backend/src/Contracts/Admin/RagDtos.cs
+++ b/backend/src/Contracts/Admin/RagDtos.cs
@@ -51,3 +51,24 @@ public record RagEvalDto(
     RagCitationDto? Citation,
     IReadOnlyList<RagRecallCaseDto> RecallCases,
     IReadOnlyList<RagSpoilerCaseDto> SpoilerCases);
+
+/// <summary>One synthesised grounding probe's outcome in the user-book eval (P1): citation count + refusal.</summary>
+public record UserBookProbeDto(string Question, int Citations, bool Insufficient);
+
+/// <summary>One behaviour probe's outcome (greeting | off_book) with a short pass/fail note.</summary>
+public record UserBookBehaviorDto(string Kind, string Question, bool Pass, string Note);
+
+/// <summary>
+/// Result of the user-book RAG eval (P1): citation correctness over the generated grounding probes, the
+/// fraction of probes that retrieved ≥1 chunk, the combined greeting+off-book behaviour pass-fraction,
+/// and per-probe detail. <see cref="Citation"/> is null + <see cref="Note"/> set when the book has no
+/// indexed chunks (empty short-circuit — no LLM call made).
+/// </summary>
+public record UserBookRagEvalDto(
+    RagCitationDto? Citation,
+    double Retrieval,
+    int ProbeN,
+    double BehaviorPass,
+    IReadOnlyList<UserBookProbeDto> Probes,
+    IReadOnlyList<UserBookBehaviorDto> Behavior,
+    string? Note);
diff --git a/tests/TextStack.AiEvals/UserBookRagEvalRunnerTests.cs b/tests/TextStack.AiEvals/UserBookRagEvalRunnerTests.cs
new file mode 100644
index 00000000..fcee50d7
--- /dev/null
+++ b/tests/TextStack.AiEvals/UserBookRagEvalRunnerTests.cs
@@ -0,0 +1,283 @@
+using Application.Rag;
+using Microsoft.Extensions.Logging.Abstractions;
+using TextStack.Ai.Core;
+using TextStack.Ai.EvalSuite;
+using TextStack.Ai.Rag;
+
+namespace TextStack.AiEvals;
+
+/// <summary>
+/// Deterministic coverage for <see cref="UserBookRagEvalRunner"/> (user-book RAG eval, P1) with fake
+/// retrieval + generator + judge + Ask (no DB, no embeddings, no key). Proves the synthesised-probe
+/// path: seed retrieval → one generated question per chunk → real Ask path per question → shared
+/// CitationJudge → greeting (structural) + off-book (judged) behaviour. Counting fakes assert the
+/// empty-chunks short-circuit makes NO generator/judge LLM call (the catalog no-LLM-on-empty invariant).
+/// </summary>
+public class UserBookRagEvalRunnerTests
+{
+    private static readonly Guid UserId = Guid.NewGuid();
+    private static readonly Guid BookId = Guid.NewGuid();
+
+    private static UserBookRagEvalRunner Runner() =>
+        new(NullLogger<UserBookRagEvalRunner>.Instance);
+
+    private static RetrievedChunk Chunk(string text) =>
+        new(Guid.NewGuid(), Guid.NewGuid(), 0, 0, text, 0, text.Length, 1.0);
+
+    /// <summary>Returns <paramref name="count"/> chunks for any user-book query; empty on demand.</summary>
+    private sealed class FakeUserBookRag(int count) : IRagService
+    {
+        public Task<IReadOnlyList<RetrievedChunk>> RetrieveAsync(
+            Guid editionId, string query, int k, int? maxChapterOrd, CancellationToken ct) =>
+            throw new NotSupportedException();
+
+        public Task<IReadOnlyList<RetrievedChunk>> RetrieveUserBookAsync(
+            Guid userId, Guid userBookId, string query, int k, int? maxChapterOrd, CancellationToken ct)
+        {
+            Assert.Equal(UserId, userId);
+            Assert.Equal(BookId, userBookId);
+            Assert.Null(maxChapterOrd); // user books are never gated
+            // `count` is the book's available chunk pool; a retrieval returns up to k of them.
+            var chunks = Enumerable.Range(0, Math.Min(count, k)).Select(i => Chunk($"passage {i} for '{query}'")).ToList();
+            return Task.FromResult<IReadOnlyList<RetrievedChunk>>(chunks);
+        }
+    }
+
+    /// <summary>Echoes a one-citation answer over the first chunk; empty chunks → insufficient (no cite).</summary>
+    private sealed class FakeAsk : IRagAskService
+    {
+        public Task<AskAnswer> AskAsync(Guid u, Guid s, Guid e, string q, int k, Guid? currentChapterId, IReadOnlyList<Contracts.Books.AskTurnDto> history, CancellationToken ct) =>
+            throw new NotSupportedException();
+
+        public Task<AskAnswer> AskFromChunksAsync(
+            string question, IReadOnlyList<RetrievedChunk> chunks, IReadOnlyList<string> notes, IReadOnlyList<Contracts.Books.AskTurnDto> history, int lastReadOrd, CancellationToken ct)
+        {
+            var citations = chunks.Count == 0
+                ? Array.Empty<AskCitationSource>()
+                : [new AskCitationSource(1, chunks[0])];
+            return Task.FromResult(new AskAnswer($"Grounded answer [1]. ({question})", citations, lastReadOrd, Insufficient: chunks.Count == 0));
+        }
+    }
+
+    /// <summary>A warm-greeting Ask: always non-empty, no citation, no [n] marker, never insufficient.</summary>
+    private sealed class WarmGreetingAsk : IRagAskService
+    {
+        public Task<AskAnswer> AskAsync(Guid u, Guid s, Guid e, string q, int k, Guid? currentChapterId, IReadOnlyList<Contracts.Books.AskTurnDto> history, CancellationToken ct) =>
+            throw new NotSupportedException();
+
+        public Task<AskAnswer> AskFromChunksAsync(
+            string question, IReadOnlyList<RetrievedChunk> chunks, IReadOnlyList<string> notes, IReadOnlyList<Contracts.Books.AskTurnDto> history, int lastReadOrd, CancellationToken ct) =>
+            Task.FromResult(new AskAnswer("Hello! Happy to chat about this book.", [], lastReadOrd, Insufficient: false));
+    }
+
+    /// <summary>An Ask that cites even on a greeting — fails the structural greeting check.</summary>
+    private sealed class CitingGreetingAsk : IRagAskService
+    {
+        public Task<AskAnswer> AskAsync(Guid u, Guid s, Guid e, string q, int k, Guid? currentChapterId, IReadOnlyList<Contracts.Books.AskTurnDto> history, CancellationToken ct) =>
+            throw new NotSupportedException();
+
+        public Task<AskAnswer> AskFromChunksAsync(
+            string question, IReadOnlyList<RetrievedChunk> chunks, IReadOnlyList<string> notes, IReadOnlyList<Contracts.Books.AskTurnDto> history, int lastReadOrd, CancellationToken ct)
+        {
+            var cite = chunks.Count == 0 ? Array.Empty<AskCitationSource>() : [new AskCitationSource(1, chunks[0])];
+            return Task.FromResult(new AskAnswer("Hi [1].", cite, lastReadOrd, Insufficient: false));
+        }
+    }
+
+    /// <summary>Generator that returns a fixed question; counts calls (to assert no-call-on-empty).</summary>
+    private sealed class CountingGenerator : ILlmService
+    {
+        public int Calls;
+        public Task<LlmResponse> CompleteAsync(LlmRequest request, CancellationToken ct)
+        {
+            Calls++;
+            return Task.FromResult(new LlmResponse(
+                "What is the main idea of this passage?", [], new LlmUsage(0, 0, 0m), "gen-fake", Guid.NewGuid()));
+        }
+
+        public IAsyncEnumerable<LlmDelta> StreamAsync(LlmRequest request, CancellationToken ct) =>
+            throw new NotSupportedException();
+    }
+
+    /// <summary>
+    /// Judge returning a fixed rubric verdict (support/relevance/faithfulness) for citation grading, and
+    /// a fixed YES/NO for the off-book faithfulness grade. Counts calls (to assert no-call-on-empty).
+    /// </summary>
+    private sealed class CountingJudge(int d1, int d2, int d3, string offBookVerdict) : ILlmService
+    {
+        public int Calls;
+        public Task<LlmResponse> CompleteAsync(LlmRequest request, CancellationToken ct)
+        {
+            Calls++;
+            // The off-book faithfulness grader uses MaxOutputTokens=4 and asks for YES/NO; everything
+            // else is the rubric JSON judge.
+            var text = request.MaxOutputTokens <= 4
+                ? offBookVerdict
+                : $"{{\"d1\": {d1}, \"d2\": {d2}, \"d3\": {d3}, \"rationale\": \"ok\"}}";
+            return Task.FromResult(new LlmResponse(text, [], new LlmUsage(0, 0, 0m), "judge-fake", Guid.NewGuid()));
+        }
+
+        public IAsyncEnumerable<LlmDelta> StreamAsync(LlmRequest request, CancellationToken ct) =>
+            throw new NotSupportedException();
+    }
+
+    /// <summary>A fake that throws if any LLM call is made — proves the empty-chunks short-circuit.</summary>
+    private sealed class ThrowingLlm : ILlmService
+    {
+        public Task<LlmResponse> CompleteAsync(LlmRequest request, CancellationToken ct) =>
+            throw new InvalidOperationException("LLM must not be called on empty chunks");
+
+        public IAsyncEnumerable<LlmDelta> StreamAsync(LlmRequest request, CancellationToken ct) =>
+            throw new NotSupportedException();
+    }
+
+    [Fact]
+    public async Task RunAsync_IndexedBook_GeneratesNProbesAndAggregatesCitations()
+    {
+        var gen = new CountingGenerator();
+        var result = await Runner().RunAsync(
+            new FakeUserBookRag(count: 6), new FakeAsk(), gen, new CountingJudge(5, 4, 5, "no"),
+            judgeModelId: "judge-fake", UserId, BookId, probeCount: 6, k: 8,
+            persist: false, db: null, gitSha: null, TestContext.Current.CancellationToken);
+
+        // Seed retrieval returns 6 chunks → 6 generated questions → 6 generator calls.
+        Assert.Equal(6, gen.Calls);
+        Assert.Equal(6, result.ProbeN);
+        Assert.Equal(6, result.ProbeCases.Count);
+        Assert.NotNull(result.Citation);
+        Assert.Equal(6, result.Citation!.CitationsJudged); // one citation per probe answer
+        Assert.Equal((5 + 4 + 5) / 3.0, result.Citation.Score, 3);
+        Assert.Equal(1.0, result.Citation.SupportRate, 12); // D1=5 ≥4 for every citation
+        Assert.Equal(1.0, result.Retrieval, 12);            // every probe retrieved ≥1 chunk
+        Assert.Null(result.Note);
+    }
+
+    [Fact]
+    public async Task RunAsync_LowSupportAxis_ZeroSupportRate()
+    {
+        var result = await Runner().RunAsync(
+            new FakeUserBookRag(count: 1), new FakeAsk(), new CountingGenerator(),
+            new CountingJudge(2, 5, 5, "no"), judgeModelId: "judge-fake", UserId, BookId,
+            probeCount: 6, k: 8, persist: false, db: null, gitSha: null, TestContext.Current.CancellationToken);
+
+        Assert.NotNull(result.Citation);
+        Assert.Equal(0.0, result.Citation!.SupportRate, 12); // D1=2 < 4 → no citation supported
+    }
+
+    [Fact]
+    public async Task RunAsync_WarmGreeting_StructuralPass()
+    {
+        var result = await Runner().RunAsync(
+            new FakeUserBookRag(count: 1), new WarmGreetingAsk(), new CountingGenerator(),
+            new CountingJudge(5, 5, 5, "no"), judgeModelId: "judge-fake", UserId, BookId,
+            probeCount: 6, k: 8, persist: false, db: null, gitSha: null, TestContext.Current.CancellationToken);
+
+        var greeting = result.BehaviorCases.Single(c => c.Kind == "greeting");
+        Assert.True(greeting.Pass); // non-empty, 0 citations, not insufficient, no [n] marker
+    }
+
+    [Fact]
+    public async Task RunAsync_GreetingThatCites_StructuralFail()
+    {
+        var result = await Runner().RunAsync(
+            new FakeUserBookRag(count: 1), new CitingGreetingAsk(), new CountingGenerator(),
+            new CountingJudge(5, 5, 5, "no"), judgeModelId: "judge-fake", UserId, BookId,
+            probeCount: 6, k: 8, persist: false, db: null, gitSha: null, TestContext.Current.CancellationToken);
+
+        var greeting = result.BehaviorCases.Single(c => c.Kind == "greeting");
+        Assert.False(greeting.Pass); // cited + has [1] marker → fails the warm-greeting check
+    }
+
+    [Fact]
+    public async Task RunAsync_OffBookGrounded_JudgePass()
+    {
+        // Judge says "no" (no invented facts) → off-book probe passes. WarmGreetingAsk keeps the off-book
+        // answer non-insufficient so it actually reaches the judge.
+        var result = await Runner().RunAsync(
+            new FakeUserBookRag(count: 1), new WarmGreetingAsk(), new CountingGenerator(),
+            new CountingJudge(5, 5, 5, offBookVerdict: "no"), judgeModelId: "judge-fake", UserId, BookId,
+            probeCount: 6, k: 8, persist: false, db: null, gitSha: null, TestContext.Current.CancellationToken);
+
+        var offBook = result.BehaviorCases.Single(c => c.Kind == "off_book");
+        Assert.True(offBook.Pass);
+    }
+
+    [Fact]
+    public async Task RunAsync_OffBookHallucinates_JudgeFail()
+    {
+        // Judge says "yes" (invented facts) → off-book probe fails. WarmGreetingAsk makes the greeting
+        // pass and keeps the off-book answer non-insufficient so the "yes" verdict is what fails it.
+        var result = await Runner().RunAsync(
+            new FakeUserBookRag(count: 1), new WarmGreetingAsk(), new CountingGenerator(),
+            new CountingJudge(5, 5, 5, offBookVerdict: "yes"), judgeModelId: "judge-fake", UserId, BookId,
+            probeCount: 6, k: 8, persist: false, db: null, gitSha: null, TestContext.Current.CancellationToken);
+
+        var offBook = result.BehaviorCases.Single(c => c.Kind == "off_book");
+        Assert.False(offBook.Pass);
+        Assert.Equal(0.5, result.BehaviorPass, 12); // greeting passes, off-book fails → 1 of 2
+    }
+
+    [Fact]
+    public async Task RunAsync_Persist_WritesCitationBehaviorRetrievalRows()
+    {
+        var db = new CapturingDb();
+        await Runner().RunAsync(
+            new FakeUserBookRag(count: 1), new FakeAsk(), new CountingGenerator(),
+            new CountingJudge(5, 5, 5, "no"), judgeModelId: "judge-fake", UserId, BookId,
+            probeCount: 6, k: 8, persist: true, db, gitSha: "abc123", TestContext.Current.CancellationToken);
+
+        Assert.Equal(3, db.Added.Count);
+        Assert.Contains(db.Added, r => r.Feature == UserBookRagEvalRunner.CitationFeature);
+        Assert.Contains(db.Added, r => r.Feature == UserBookRagEvalRunner.BehaviorFeature);
+        Assert.Contains(db.Added, r => r.Feature == UserBookRagEvalRunner.RetrievalFeature);
+        Assert.All(db.Added, r => Assert.Equal("abc123", r.GitSha));
+        Assert.Equal(1, db.SaveCalls);
+    }
+
+    [Fact]
+    public async Task RunAsync_PersistOff_WritesNothing()
+    {
+        var db = new CapturingDb();
+        await Runner().RunAsync(
+            new FakeUserBookRag(count: 1), new FakeAsk(), new CountingGenerator(),
+            new CountingJudge(5, 5, 5, "no"), judgeModelId: "judge-fake", UserId, BookId,
+            probeCount: 6, k: 8, persist: false, db, gitSha: null, TestContext.Current.CancellationToken);
+
+        Assert.Empty(db.Added);
+        Assert.Equal(0, db.SaveCalls);
+    }
+
+    [Fact]
+    public async Task RunAsync_EmptyChunks_PersistsTwoFailedRows()
+    {
+        var db = new CapturingDb();
+        await Runner().RunAsync(
+            new FakeUserBookRag(count: 0), new FakeAsk(), new ThrowingLlm(), new ThrowingLlm(),
+            judgeModelId: "judge-fake", UserId, BookId, probeCount: 6, k: 8,
+            persist: true, db, gitSha: null, TestContext.Current.CancellationToken);
+
+        // citation + behavior rows, both 0-score with an empty-chunks note; NO retrieval row.
+        Assert.Equal(2, db.Added.Count);
+        Assert.All(db.Added, r => Assert.Equal(0m, r.Score));
+        Assert.All(db.Added, r => Assert.Contains("empty-chunks", r.BreakdownJson));
+        Assert.Equal(1, db.SaveCalls);
+    }
+
+    [Fact]
+    public async Task RunAsync_EmptyChunks_ShortCircuitsWithNoLlmCall()
+    {
+        var result = await Runner().RunAsync(
+            new FakeUserBookRag(count: 0), new FakeAsk(), new ThrowingLlm(), new ThrowingLlm(),
+            judgeModelId: "judge-fake", UserId, BookId, probeCount: 6, k: 8,
+            persist: false, db: null, gitSha: null, TestContext.Current.CancellationToken);
+
+        // Empty seed → no generator/judge call (ThrowingLlm would have thrown), failed 0-row + note.
+        Assert.Null(result.Citation);
+        Assert.Equal(0, result.ProbeN);
+        Assert.Equal(0.0, result.Retrieval, 12);
+        Assert.Equal(0.0, result.BehaviorPass, 12);
+        Assert.Empty(result.ProbeCases);
+        Assert.Empty(result.BehaviorCases);
+        Assert.NotNull(result.Note);
+    }
+}