diff --git a/CLAUDE.md b/CLAUDE.md
index 72333e97f7..83bcfa27dc 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -137,7 +137,7 @@ All code MUST use gen2 syntax (add `options gen2` at the top of every file). Key
 - Structs/arrays/tables always pass by reference — no `&` needed.
 - Only **workhorse types** (`int`, `float`, `bool`, `string`, …, `isWorkhorseType` on the C++ side) pass by value.
 - **AST pointers (gc_node) pass by value** — copying the pointer, no refcount, no allocation. `def foo(p : ExpressionPtr)` shares the node; `var p` lets you reassign locally; `var p : ExpressionPtr&` propagates reassignment back. For mutable field access, take the param as `var`.
-- **Lambdas pass by value (copy aliases the capture frame).** A `lambda<…>` is a fat pointer to a heap-allocated capture frame, so `=` copies the pointer (creates an alias) and pass-by-value is free. **`delete lam` requires `unsafe`** since other aliases may still be live — same rule as raw pointer / class `delete`. The rule cascades: `array<lambda<…>>`, structs with a lambda field, tuple/variant containing a lambda — all inherit the unsafe-delete requirement.
+- **Lambdas are copyable.** A `lambda<…>` is a fat pointer to a heap-allocated capture frame; `=` and pass-by-value copy the pointer (creates an alias), and `push`/array storage works without `push_clone`. **`delete lam` requires `unsafe`** since other aliases may still be live — same rule as raw pointer / class `delete`. The unsafe-delete rule cascades: `array<lambda<…>>`, structs with a lambda field, tuple/variant containing a lambda — all inherit the unsafe-delete requirement.
 - **Strings:** `var s : string` is a writable local copy (no propagation). `var s : string&` propagates. `:=` clones into current context's heap (required across contexts); plain `=` copies the pointer.
 - **Residual `smart_ptr` types** (`ProgramPtr`, `ContextPtr`, `FileAccessPtr`, `DebugAgentPtr`, `VisitorAdapterPtr`) still use refcount semantics — variables holding them need `var inscope`. AST types do NOT — see below.
 
@@ -178,6 +178,7 @@ Full migration table (when reading older docs that say `var inscope` or `<-` for
 - `panic("message")`, `assert(condition)`, `verify(condition)` (stays in release)
 - **Postfix conditional:** `return expr if (cond)`, `break if (cond)`, `continue if (cond)` — early-exit guard on one line
 - **Braceless early-exit:** prefer `if (cond) return X` (or postfix `return X if (cond)`) over `if (cond) { return X }` — STYLE005 flags the braced single-terminator form as noise
+- **Panic is fatal, not an exception.** daslang has no C++/JS-style exception model. A `panic` (or failed `assert` / `verify`) means the program is broken — the only correct response is to print diagnostics and exit. `try/recover` exists to capture the message before exit so you can log it nicely, NOT to recover-and-continue. Do not write code that relies on continuing after `recover`; do not design APIs around panic-as-control-flow. Corollary: `{ body } finally { cleanup }` deliberately skips `cleanup` on panic (the cleanup can't run safely on a broken program); this is not a bug. Don't try to "fix" it; don't use `finally` for cleanup that needs to run on panic. If you need post-statements that run after a block in the normal path, just put them after the block — panic skips everything, and that's the design.
 
 ### Generic function dispatch
 
@@ -232,7 +233,7 @@ Full migration table (when reading older docs that say `var inscope` or `<-` for
 - `require foo public` — re-exports `foo` transitively
 - `[export] def main()` defaults to returning `void`, but you can declare it as `def main() : int { ... return rc }` when you need to surface a non-zero process exit code (e.g. CLI tools where callers — MCP wrappers, CI, parent shells — branch on exit). See `dastest/dastest.das` for the canonical pattern. Don't reach for `panic` just to force a non-zero exit; declare `: int` and `return rc` instead.
 - `push` copies (fails for non-copyable types), `emplace` moves (zeros source), `push_clone` clones (preserves source)
-- Non-copyable types (`array<T>`, `table<K;V>`, lambdas): use `:=`, `push_clone`, or `<-`
+- Non-copyable types (`array<T>`, `table<K;V>`): use `:=`, `push_clone`, or `<-`. (Lambdas are copyable — see above.)
 - Blocks cannot be stored/returned/captured — use lambdas or function pointers
 - Class methods: `def const`, `def abstract const`, `def static`; call syntax `obj.method()`, `obj->method()`, `obj |> method()`
 - **`is`/`as` on handled types checks EXACT type**, not C++ inheritance — `expr is ExprField` is `false` when `expr` is `ExprSafeField`. `as` on wrong type crashes. Must handle each concrete type explicitly.
diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das
index c5b36c26c9..4d9d403852 100644
--- a/daslib/linq_fold.das
+++ b/daslib/linq_fold.das
@@ -35,6 +35,7 @@ require daslib/templates_boost
 require daslib/macro_boost
 require strings
 
+// LinqCall — registry record describing an operator name (where_/select/distinct/...) and its fold disposition.
 struct private LinqCall {
     name : string
     moduleName : string = "linq"
@@ -44,6 +45,289 @@ struct private LinqCall {
     recursive : array<int>          // indices of arguments to apply fold_linq_default on
 }
 
+// ===== Pattern-table refactor kernel =====
+// See daslib/linq_fold.md for masterplan.
+
+variant private SourceAdapter {
+    Array : tuple<Expression?; string>     // (top, srcName) — PR C widens with Decs/DecsFind/Zip/DecsJoin
+}
+
+variant SlotMatcher {
+    literal : string                       // exact name match
+    one_of  : array<string>                // any-of name set
+    alias   : string                       // looked up in alias_table
+}
+
+variant SlotCardinality {
+    one      : void?                       // required, exactly 1
+    optional : void?                       // 0 or 1
+    chain    : void?                       // 0 or more (greedy); captures as array<ExprCall?> via Captures.many
+}
+
+struct Slot {
+    matcher      : SlotMatcher
+    cardinality  : SlotCardinality
+    capture_name : string = ""             // "" = don't capture
+    arity        : int = -1                // -1 = any; positive = require N args on the matched call
+}
+
+// Captures bundle: `single` for c_one / c_opt slots; `many` for c_chain (a contiguous run of calls captured in chain order).
+struct Captures {
+    single : table<string; ExprCall?>
+    many   : table<string; array<ExprCall?>>
+}
+
+variant private MatchResult {
+    no_match : void?                       // daslang-idiomatic Option<Captures>
+    matched  : Captures
+}
+
+typedef private RequiresPredicate = function<(var c : Captures; var top : Expression?) : bool>
+
+// Fold-time context passed to every emit archetype. Carries the peeled source expression, source adapter, and the outer
+// `_fold(...)` expression's iterator-ness (drives `buffer_return` wrap so iterator-typed contexts wrap survivors with
+// `.to_sequence_move()`; array-typed contexts return the buffer directly).
+struct EmitCtx {
+    top              : Expression?         // peel_each'd; stubs pre-clone per invoke
+    src              : SourceAdapter
+    expr_is_iterator : bool
+}
+
+typedef EmitFn = function<(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression?>
+
+struct SplicePattern {
+    name     : string                      // debug / lint diagnostics
+    chain    : array<Slot>
+    requires : array<RequiresPredicate>
+    emit     : EmitFn
+}
+
+// Slot construction helpers (variant arms need named-field syntax; helpers keep pattern rows compact)
+def m_literal(s : string) : SlotMatcher {
+    return SlotMatcher(literal = s)
+}
+def m_alias(s : string) : SlotMatcher {
+    return SlotMatcher(alias = s)
+}
+def c_one() : SlotCardinality {
+    return SlotCardinality(one = null)
+}
+def c_opt() : SlotCardinality {
+    return SlotCardinality(optional = null)
+}
+def c_chain() : SlotCardinality {
+    return SlotCardinality(chain = null)
+}
+
+// Convenience: c_chain + m_one_of always pair (head shape "0+ contiguous calls whose name is in `names`, captured as `cap`"). `names` is consumed (moved into the slot matcher).
+def slot_chain_of(var names : array<string>; cap : string) : Slot {
+    return Slot(matcher = SlotMatcher(one_of <- names), cardinality = c_chain(), capture_name = cap)
+}
+
+// Prefix-conflict lint: pattern A shadows pattern B (B unreachable) when A's chain is a structural prefix of B's chain.
+// PR A scope: simple structural check (matcher + cardinality + arity equality, capture_name ignored). Doesn't catch all subsumption cases (e.g. opt slots interacting with required slots) — exhaustive subsumption deferred to a later PR.
+def private slot_matchers_equal(a : SlotMatcher; b : SlotMatcher) : bool {
+    if (a is literal && b is literal) return (a as literal) == (b as literal)
+    if (a is alias && b is alias) return (a as alias) == (b as alias)
+    if (a is one_of && b is one_of) {
+        // `one_of` is set-membership — equality is order-insensitive (same length + every element in a is in b).
+        let la & = unsafe(a as one_of)
+        let lb & = unsafe(b as one_of)
+        if (length(la) != length(lb)) return false
+        for (name in la) {
+            var found = false
+            for (other in lb) {
+                if (other == name) { found = true; break; }
+            }
+            if (!found) return false
+        }
+        return true
+    }
+    return false
+}
+
+def private slots_structurally_match(a : Slot; b : Slot) : bool {
+    return (slot_matchers_equal(a.matcher, b.matcher)
+        && (a.cardinality is one)      == (b.cardinality is one)
+        && (a.cardinality is optional) == (b.cardinality is optional)
+        && (a.cardinality is chain)    == (b.cardinality is chain)
+        && a.arity == b.arity)
+}
+
+// Strict prefix: a must be SHORTER than b. Two patterns with identical chains but different `requires`
+// predicates are both legitimately reachable (the walker falls through to the second when the first's
+// requires fail), so equal-length is not a shadowing relation.
+def chain_prefix_of(a : array<Slot>; b : array<Slot>) : bool {
+    if (length(a) >= length(b)) return false
+    for (i in 0 .. length(a)) {
+        if (!slots_structurally_match(a[i], b[i])) return false
+    }
+    return true
+}
+
+// Walk a pattern table; return true iff every pattern is reachable (no earlier pattern is its strict prefix).
+// Silent — return value carries the verdict. Tests assert on the bool; future compile-time hook can add diagnostics.
+def check_pattern_table_reachable(name : string; patterns : array<SplicePattern>) : bool {
+    for (i in 0 .. length(patterns)) {
+        for (j in i + 1 .. length(patterns)) {
+            if (chain_prefix_of(patterns[i].chain, patterns[j].chain)) return false
+        }
+    }
+    return true
+}
+
+// Normalize an ExprCall's name through linqCalls (e.g. "distinct_by_to_array" → "distinct_by").
+// Walks func.fromGeneric chain — ExprCall.name itself is the mangled generic-instance name (`__::linq\`distinct_by\`<hash>`); the original user-facing name lives at the root of the fromGeneric chain.
+def private call_norm_name(call : ExprCall?) : string {
+    var topFunc = call.func
+    while (topFunc.fromGeneric != null) {
+        topFunc = topFunc.fromGeneric
+    }
+    let raw = string(topFunc.name)
+    if (linqCalls |> key_exists(raw)) return linqCalls[raw].name
+    return raw
+}
+
+var alias_table : table<string; array<string>> <- {
+    "distinct_family"            => ["distinct", "distinct_by"],
+    "first_family"               => ["first", "first_or_default"],
+    "count_family"               => ["count", "long_count"],
+    // Narrow to the terminators emit_hashtable_dedup actually handles — first[_or_default] would be over-matched then cascade.
+    "distinct_terminator_family" => ["count", "long_count", "sum"],
+    // PR B additions:
+    "order_family"               => ["order", "order_descending", "order_by", "order_by_descending"],
+    "range_op_family"            => ["skip", "skip_while", "take_while", "take"],
+    // accum_family: all reducing terminators routed via emit_accumulator_lane in plan_loop_or_count.
+    "accum_family"               => ["sum", "min", "max", "average", "aggregate",
+                                     "min_by", "max_by", "min_max", "min_max_by",
+                                     "min_max_average", "min_max_average_by", "long_count"],
+    "early_exit_family"          => ["any", "all", "contains", "first", "first_or_default"],
+    // loop_terminator_family: union used by plan_loop_or_count's single pattern row's optional terminator slot.
+    "loop_terminator_family"     => ["count", "long_count", "sum", "min", "max", "average", "aggregate",
+                                     "min_by", "max_by", "min_max", "min_max_by",
+                                     "min_max_average", "min_max_average_by",
+                                     "any", "all", "contains", "first", "first_or_default",
+                                     "last", "last_or_default", "single", "single_or_default",
+                                     "element_at", "element_at_or_default"]
+}
+
+// Walker — see daslib/linq_fold.md Step 3 contract. Returns MatchResult by move;
+// caller binds with `var r <- match_pattern(...)` and reads via `if (r is matched) { let c & = r as matched; ... }`.
+
+def private slot_matches_call(slot : Slot; call : ExprCall?; name : string; pattern_name : string) : bool {
+    var in_set = false
+    if (slot.matcher is literal) {
+        in_set = (slot.matcher as literal) == name
+    } elif (slot.matcher is one_of) {
+        for (n in slot.matcher as one_of) {
+            if (n == name) { in_set = true; break; }
+        }
+    } elif (slot.matcher is alias) {
+        let key = slot.matcher as alias
+        if (alias_table |> key_exists(key)) {
+            for (n in alias_table[key]) {
+                if (n == name) { in_set = true; break; }
+            }
+        } else {
+            panic("match_pattern: unknown alias '{key}' in pattern '{pattern_name}'")
+        }
+    }
+    return in_set && (slot.arity == -1 || length(call.arguments) == slot.arity)
+}
+
+def private match_pattern(p : SplicePattern;
+                          var calls : array<tuple<ExprCall?; LinqCall?>>;
+                          var top : Expression?) : MatchResult {
+    var captures : Captures
+    var slot_i = 0
+    var call_i = 0
+    while (slot_i < length(p.chain)) {
+        let slot & = unsafe(p.chain[slot_i])
+        if (slot.cardinality is chain) {
+            // c_chain: greedy match-while-in-set. Always succeeds (0+); empty match still creates a Captures.many entry so emit fns can rely on the key existing.
+            var captured : array<ExprCall?>
+            while (call_i < length(calls) && slot_matches_call(slot, calls[call_i]._0, calls[call_i]._1.name, p.name)) {
+                captured |> push(calls[call_i]._0)
+                call_i ++
+            }
+            if (slot.capture_name != "") {
+                captures.many[slot.capture_name] <- captured
+            }
+            slot_i ++
+            continue
+        }
+        var matched_here = false
+        if (call_i < length(calls)) {
+            let cur & = unsafe(calls[call_i])
+            matched_here = slot_matches_call(slot, cur._0, cur._1.name, p.name)
+        }
+        if (matched_here) {
+            if (slot.capture_name != "") {
+                captures.single |> insert(slot.capture_name, calls[call_i]._0)
+            }
+            slot_i ++
+            call_i ++
+        } elif (slot.cardinality is one) {
+            return MatchResult(no_match = null)
+        } else {
+            slot_i ++
+        }
+    }
+    if (call_i < length(calls)) return MatchResult(no_match = null)
+    for (pred in p.requires) {
+        if (!invoke(pred, captures, top)) return MatchResult(no_match = null)
+    }
+    return MatchResult(matched <- captures)
+}
+
+// Predicate library — named module-level functions (named so JIT can take their address; inline `@@(...)` lambdas
+// produce `_localfunction_*` symbols the JIT pass can't resolve). Pattern rows wrap each with `@@<RequiresPredicate>`.
+// Starts minimal; grows with use per masterplan.
+
+def private array_source(var c : Captures; var top : Expression?) : bool {
+    // top is already peel_each'd by the calling stub; verify it carries an array type (indexed iteration safe).
+    return top != null && top._type != null && (top._type.isGoodArrayType || top._type.isArray)
+}
+
+def private take_arg_is_int(var c : Captures; var top : Expression?) : bool {
+    if (!(c.single |> key_exists("take"))) return true   // vacuous: no take to constrain
+    let take = c.single["take"]
+    return (take != null && length(take.arguments) >= 2 && take.arguments[1] != null
+            && take.arguments[1]._type != null && take.arguments[1]._type.baseType == Type.tInt)
+}
+
+// `no_terminator` — chain ends bare (no `count` / `sum` / `first[_or_default]` / `to_array` captured).
+// Return shape (array vs iterator) is decided by `ctx.expr_is_iterator` in the emit fn, not this predicate.
+def private no_terminator(var c : Captures; var top : Expression?) : bool {
+    return !(c.single |> key_exists("term"))
+}
+
+// True only for `order_by[_descending]` with an inline-splice-able key lambda. `order` / `order_descending` route elsewhere.
+def private inline_cmp_available(var c : Captures; var top : Expression?) : bool {
+    if (!(c.single |> key_exists("order"))) return false
+    let orderCall = c.single["order"]
+    if (orderCall == null || orderCall._type == null || orderCall._type.firstType == null
+            || (orderCall.arguments |> length) < 2) return false
+    let orderName = call_norm_name(orderCall)
+    if (orderName != "order_by" && orderName != "order_by_descending") return false
+    return try_make_inline_cmp(orderCall.arguments[1], orderName, orderCall._type.firstType, orderCall.at) != null
+}
+
+// `has_where_or_distinct` — used by order_fused_prefilter row to distinguish from bare buffer_helper_dispatch.
+// At least one prefilter source must be present for the fused-loop path to make sense.
+def private has_where_or_distinct(var c : Captures; var top : Expression?) : bool {
+    return (c.single |> key_exists("where")) || (c.single |> key_exists("distinct"))
+}
+
+// Per-plan pattern tables — collapsed into splice_patterns in PR D.
+
+var private plan_reverse_patterns : array<SplicePattern>
+var private plan_distinct_patterns : array<SplicePattern>
+var private plan_loop_or_count_patterns : array<SplicePattern>
+var private splice_patterns : array<SplicePattern>     // populated in PR D when per-plan tables collapse
+
+// ===== End of pattern-table kernel =====
+
 var private linqCalls = {
 // filtering data
     "where_" => LinqCall(name = "where_"),
@@ -274,6 +558,63 @@ def private collapse_chained_selects(var calls : array<tuple<ExprCall?; LinqCall
     }
 }
 
+[macro_function]
+def private collapse_chained_wheres(var calls : array<tuple<ExprCall?; LinqCall?>>) {
+    // Mirror of collapse_chained_selects shape (find adjacent same-op pairs, rename to fresh param, rewire backlink, erase inner, stay at i) but composes via `pred1 && pred2` instead of function compose. Both predicates take the source element, so the composed body uses ONE fresh param shared by both halves. No has_sideeffects bail needed — composition doesn't duplicate either body (each runs at most once per element, same as the imperative chain with short-circuit `&&`).
+    var i = 0
+    while (i + 1 < length(calls)) {
+        if (calls[i]._1.name != "where_" || calls[i + 1]._1.name != "where_") {
+            i ++
+            continue
+        }
+        var innerLam = calls[i]._0.arguments[1]
+        var outerLam = calls[i + 1]._0.arguments[1]
+        if (innerLam == null || outerLam == null
+                || !(innerLam is ExprMakeBlock) || !(outerLam is ExprMakeBlock)) {
+            i ++
+            continue
+        }
+        var innerMblk = innerLam as ExprMakeBlock
+        var innerBlk = innerMblk._block as ExprBlock
+        var outerMblk = outerLam as ExprMakeBlock
+        var outerBlk = outerMblk._block as ExprBlock
+        if (innerBlk == null || outerBlk == null
+                || innerBlk.arguments |> length != 1 || outerBlk.arguments |> length != 1
+                || innerBlk.list |> length != 1 || outerBlk.list |> length != 1
+                || !(innerBlk.list[0] is ExprReturn) || !(outerBlk.list[0] is ExprReturn)) {
+            i ++
+            continue
+        }
+        var innerRet = innerBlk.list[0] as ExprReturn
+        var outerRet = outerBlk.list[0] as ExprReturn
+        if (innerRet.subexpr == null || outerRet.subexpr == null) {
+            i ++
+            continue
+        }
+        // Rename both predicate params to the same fresh name — composed body references one shared bind.
+        let freshName = qn("cw", innerLam.at)
+        var innerBodyFresh = peel_lambda_rename_var(innerLam, freshName)
+        var outerBodyFresh = peel_lambda_rename_var(outerLam, freshName)
+        if (innerBodyFresh == null || outerBodyFresh == null) {
+            i ++
+            continue
+        }
+        var newLam = clone_expression(innerLam)
+        var newMblk = newLam as ExprMakeBlock
+        var newBlk = newMblk._block as ExprBlock
+        if (newBlk == null || newBlk.list |> length != 1 || !(newBlk.list[0] is ExprReturn)) {
+            i ++
+            continue
+        }
+        var newRet = newBlk.list[0] as ExprReturn
+        newBlk.arguments[0].name := freshName
+        newRet.subexpr = merge_where_cond(innerBodyFresh, outerBodyFresh)
+        calls[i + 1]._0.arguments[1] = newLam
+        calls[i + 1]._0.arguments[0] = calls[i]._0.arguments[0]
+        calls |> erase(i)
+    }
+}
+
 [macro_function]
 def private flatten_linq(var expr : Expression?)  {
     var top = expr
@@ -707,12 +1048,11 @@ def private emit_counter_lane(var top : Expression?; srcName, accName, itName :
 }
 
 [clone(top), macro_function]
-def private emit_array_lane(var top : Expression?; var expr : Expression?; var loopBody : Expression?; var elementType : TypeDeclPtr;
+def private emit_array_lane(var top : Expression?; isIter : bool; var loopBody : Expression?; var elementType : TypeDeclPtr;
                             srcName, accName, itName : string; names : RangeStateNames;
                             var skipExpr, takeExpr, skipWhileCond : Expression?;
                             at : LineInfo) : Expression? {
     // Array lane: `[skip/take init]; var acc : array<T>; [reserve]; for (it in src) { $loopBody }; return <- acc`
-    let isIter = expr._type.isIterator
     let sourceHasLength = type_has_length(top._type)
     var topExpr = clone_expression(top)
     topExpr.genFlags.alwaysSafe = true
@@ -1885,53 +2225,40 @@ def private plan_order_family(var expr : Expression?) : Expression? {
     return finalize_invoke(emission, at)
 }
 
+// ===== plan_loop_or_count migration (PR B) =====
+// Single emit + single pattern row. c_chain captures where_/select head; canonical-order slots carry the rest.
+
 [macro_function]
-def private plan_loop_or_count(var expr : Expression?) : Expression? {
-    // Phase-2C loop planner. Recognizes chains of shape `[where_*][select*][skip?][take?]`
-    var (top, calls) = flatten_linq(expr)
-    if (empty(calls)) return null
-    top = peel_each(top)
-    let lastName = calls.back()._1.name
-    let lane = classify_terminator(lastName)
-    // Marker: future PRs add BufferTopN / BufferDistinct / etc. for `is_buffer_required_op`
-    if (lane == LinqLane.UNKNOWN) return null
-    let counterLane = lane == LinqLane.COUNTER
-    let hasTerminator = lane != LinqLane.ARRAY
-    let intermediateCount = hasTerminator ? length(calls) - 1 : length(calls)
-    let at = calls[0]._0.at
-    let srcName = qn("source", at)
-    let itName  = qn("it", at)
+def private emit_loop_or_count_lane(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? {
+    var top = ctx.top
+    let srcName = (ctx.src as Array)._1
+    let itName = qn("it", at)
     let accName = qn("acc", at)
     let names <- make_range_names(at)
+    let hasTerminator = c.single |> key_exists("term")
+    var lastName : string
+    if (hasTerminator) {
+        lastName = call_norm_name(c.single["term"])
+    }
+    let lane = hasTerminator ? classify_terminator(lastName) : LinqLane.ARRAY
+    if (lane == LinqLane.UNKNOWN) return null
+    let counterLane = lane == LinqLane.COUNTER
+    if (top._type == null || top._type.firstType == null) return null
     var whereCond : Expression?
-    // postTakeWhereCond — Theme 2 5c: gates per-element contribution AFTER the take cap fires. Distinct from whereCond (which wraps the entire take/skip body); this preserves take.where semantics ("first N elements, then filter") that auto-rewriting can't reproduce.
     var postTakeWhereCond : Expression?
     var projection : Expression?
     var intermediateBinds : array<Expression?>
-    // preConditionStmts evaluate UNCONDITIONALLY per element, BEFORE the where filter —
     var preCondStmts : array<Expression?>
-    var skipExpr : Expression?
-    var takeExpr : Expression?
-    // skip_while / take_while: predicate-driven ranges. Both peel with itName (source elem); seenSelect bails to tier 2.
-    var skipWhileCond : Expression?
-    var takeWhileCond : Expression?
     var seenSelect = false
-    var seenSkip = false
-    var seenSkipWhile = false
-    var seenTakeWhile = false
-    var seenTake = false
     var allProjectionsPure = true
     var elementType = clone_type(top._type.firstType)
     var lastBindName = itName
-    for (i in 0 .. intermediateCount) {
-        var cll & = unsafe(calls[i])
-        let opName = cll._1.name
+    for (call in c.many["head"]) {
+        let opName = call_norm_name(call)
         if (opName == "where_") {
-            // Theme 2 5c — `take(N)._where(p)` allowed (routed to postTakeWhereCond, gates contribution only); other prior range ops still bail; single post-take where in v1.
-            if (seenSkip || seenSkipWhile || seenTakeWhile || (seenTake && postTakeWhereCond != null)) return null
             var predicate : Expression?
             if (seenSelect) {
-                // Phase 3d / single-eval: where-after-select. Bind the current projection
+                // where-after-select: bind the running projection to a typed local so the
                 if (has_sideeffects(projection)) return null
                 if (lane != LinqLane.COUNTER) {
                     let wbName = "`vw`{at.line}`{at.column}`{length(preCondStmts)}"
@@ -1939,26 +2266,21 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
                     preCondStmts |> push <| qmacro_expr() {
                         var $i(wbName) : $t(projType) := $e(projection)
                     }
-                    // Replace projection with a typed ExprVar so downstream typer passes
                     var pvar = new ExprVar(at = at, name := wbName)
                     pvar._type = clone_type(elementType)
                     pvar._type.flags.ref = true
                     projection = pvar
                 }
-                predicate = peel_lambda_replace_var(cll._0.arguments[1], projection)
+                predicate = peel_lambda_replace_var(call.arguments[1], projection)
             } else {
-                predicate = peel_lambda_rename_var(cll._0.arguments[1], itName)
+                predicate = peel_lambda_rename_var(call.arguments[1], itName)
             }
-            if (seenTake) {
-                postTakeWhereCond = predicate
-            } elif (whereCond == null) {
+            if (whereCond == null) {
                 whereCond = predicate
             } else {
                 whereCond = qmacro($e(whereCond) && $e(predicate))
             }
         } elif (opName == "select") {
-            if (seenSkip || seenSkipWhile || seenTakeWhile || seenTake) return null
-            // Chained selects: bind the previous projection to a fresh local now so the next
             if (projection != null) {
                 if (has_sideeffects(projection)) {
                     allProjectionsPure = false
@@ -1969,41 +2291,9 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
                 }
                 lastBindName = bindName
             }
-            projection = peel_lambda_rename_var(cll._0.arguments[1], lastBindName)
-            elementType = clone_type(cll._0._type.firstType)
+            projection = peel_lambda_rename_var(call.arguments[1], lastBindName)
+            elementType = clone_type(call._type.firstType)
             seenSelect = true
-        } elif (opName == "skip") {
-            // Canonical chain: at most one skip, before any skip_while/take_while/take.
-            if (seenSkip || seenSkipWhile || seenTakeWhile || seenTake) return null
-            var skipArg = cll._0.arguments[1]
-            if (skipArg == null || skipArg._type == null || skipArg._type.baseType != Type.tInt) return null
-            skipExpr = clone_expression(skipArg)
-            seenSkip = true
-        } elif (opName == "skip_while") {
-            // pred uses itName; seenSelect bails (chained-bind peel is a follow-up). Canonical: after skip, before take_while/take.
-            if (seenSelect || seenSkipWhile || seenTakeWhile || seenTake) return null
-            var swArg = cll._0.arguments[1]
-            if (swArg == null) return null
-            skipWhileCond = peel_lambda_rename_var(swArg, itName)
-            if (skipWhileCond == null) return null
-            seenSkipWhile = true
-        } elif (opName == "take_while") {
-            // take_while pred sees source element (itName). Same select-cascade rule as skip_while.
-            if (seenSelect || seenTakeWhile || seenTake) return null
-            var twArg = cll._0.arguments[1]
-            if (twArg == null) return null
-            takeWhileCond = peel_lambda_rename_var(twArg, itName)
-            if (takeWhileCond == null) return null
-            seenTakeWhile = true
-        } elif (opName == "take") {
-            if (seenTake) return null
-            var takeArg = cll._0.arguments[1]
-            if (takeArg == null || takeArg._type == null || takeArg._type.baseType != Type.tInt) return null
-            takeExpr = clone_expression(takeArg)
-            seenTake = true
-        } elif (is_buffer_required_op(opName)) {     // nolint:LINT009
-            // TODO Phase 2X: BufferTopN (order_by + take/skip), BufferDistinct (distinct/_by),
-            return null
         } else {
             return null
         }
@@ -2011,14 +2301,61 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
     if (projection != null && has_sideeffects(projection)) {
         allProjectionsPure = false
     }
+    var skipExpr : Expression?
+    var skipWhileCond : Expression?
+    var takeWhileCond : Expression?
+    var takeExpr : Expression?
+    if (c.single |> key_exists("skip")) {
+        var skipArg = c.single["skip"].arguments[1]
+        if (skipArg == null || skipArg._type == null || skipArg._type.baseType != Type.tInt) return null
+        skipExpr = clone_expression(skipArg)
+    }
+    if (c.single |> key_exists("skip_while")) {
+        if (seenSelect) return null
+        var swArg = c.single["skip_while"].arguments[1]
+        if (swArg == null) return null
+        skipWhileCond = peel_lambda_rename_var(swArg, itName)
+        if (skipWhileCond == null) return null
+    }
+    if (c.single |> key_exists("take_while")) {
+        if (seenSelect) return null
+        var twArg = c.single["take_while"].arguments[1]
+        if (twArg == null) return null
+        takeWhileCond = peel_lambda_rename_var(twArg, itName)
+        if (takeWhileCond == null) return null
+    }
+    if (c.single |> key_exists("take")) {
+        var takeArg = c.single["take"].arguments[1]
+        if (takeArg == null || takeArg._type == null || takeArg._type.baseType != Type.tInt) return null
+        takeExpr = clone_expression(takeArg)
+    }
+    if (c.single |> key_exists("post_take_where")) {
+        var ptwArg = c.single["post_take_where"].arguments[1]
+        if (ptwArg == null) return null
+        if (seenSelect) {
+            if (has_sideeffects(projection)) return null
+            if (lane != LinqLane.COUNTER) {
+                let wbName = "`vw`{at.line}`{at.column}`{length(preCondStmts)}"
+                var projType = clone_type(elementType)
+                preCondStmts |> push <| qmacro_expr() {
+                    var $i(wbName) : $t(projType) := $e(projection)
+                }
+                var pvar = new ExprVar(at = at, name := wbName)
+                pvar._type = clone_type(elementType)
+                pvar._type.flags.ref = true
+                projection = pvar
+            }
+            postTakeWhereCond = peel_lambda_replace_var(ptwArg, projection)
+        } else {
+            postTakeWhereCond = peel_lambda_rename_var(ptwArg, itName)
+        }
+    }
     let noLimits = skipExpr == null && takeExpr == null && skipWhileCond == null && takeWhileCond == null
-    // Count-shaped shortcut: when terminator is `count` (→ int) or `long_count` (→ int64),
     let isCountShaped = (lane == LinqLane.COUNTER
         || (lane == LinqLane.ACCUMULATOR && lastName == "long_count"))
     if (isCountShaped && whereCond == null && allProjectionsPure && noLimits
             && type_has_length(top._type))
         return emit_length_shortcut(lastName, top, srcName, at)
-    // Ring 1: accumulator lane builds its own per-op loop body (typed accumulator, optional
     if (lane == LinqLane.ACCUMULATOR) {
         var laneTops : array<Expression?>
         laneTops |> push(top)
@@ -2028,9 +2365,8 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
             intermediateBinds, preCondStmts, elementType, laneSrcs, accName, itName, names,
             skipExpr, takeExpr, skipWhileCond, takeWhileCond, at)
     }
-    // Ring 2: early-exit lane — `any` no-pred + no upstream work + no limits + length-bearing
     if (lane == LinqLane.EARLY_EXIT) {
-        let terminatorCall = calls.back()._0
+        let terminatorCall = c.single["term"]
         let isAnyNoPred = lastName == "any" && length(terminatorCall.arguments) == 1
         if (isAnyNoPred && whereCond == null && allProjectionsPure && noLimits
                 && type_has_length(top._type))
@@ -2043,10 +2379,8 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
             intermediateBinds, preCondStmts, elementType, terminatorCall, laneSrcs, itName, names,
             skipExpr, takeExpr, skipWhileCond, takeWhileCond, at)
     }
-    // Build the per-element loop body for COUNTER / ARRAY. Both lanes follow the same shape:
     var loopBody : Expression?
     if (counterLane) {
-        // Counter lane must evaluate the projection (and any chained intermediates) per
         var stmts : array<Expression?>
         if (projection != null && has_sideeffects(projection)) {
             let finalBindName = qn("vfinal", at)
@@ -2054,7 +2388,6 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
                 var $i(finalBindName) = $e(projection)
             }
         }
-        // Theme 2 5c: when postTakeWhereCond is set, gate JUST the acc++ — the take cap still ticks unconditionally above.
         var incExpr = qmacro_expr() {
             $i(accName) ++
         }
@@ -2063,7 +2396,6 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
         wrap_with_ranges(stmts, skipExpr, takeExpr, skipWhileCond, takeWhileCond, names)
         loopBody = prepend_precond(wrap_with_condition(stmts_to_expr(stmts), whereCond), preCondStmts)
     } else {
-        // Array lane. `push_clone` is the safe append everywhere: for workhorse types it's a
         var stmts : array<Expression?>
         var pushExpr : Expression?
         if (projection != null) {
@@ -2072,15 +2404,12 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
             }
         } elif (whereCond != null || postTakeWhereCond != null || skipExpr != null || takeExpr != null
                 || skipWhileCond != null || takeWhileCond != null) {
-            // Identity push: `it` aliases the source element. Reached when chain is bare
             pushExpr = qmacro_expr() {
                 $i(accName) |> push_clone($i(itName))
             }
         } else {
-            // identity chain — nothing to fuse; let the caller fall through.
             return null
         }
-        // Theme 2 5c: postTakeWhereCond gates JUST the push — same shape as counter lane.
         stmts |> push(wrap_with_condition(pushExpr, postTakeWhereCond))
         prepend_binds(stmts, intermediateBinds)
         wrap_with_ranges(stmts, skipExpr, takeExpr, skipWhileCond, takeWhileCond, names)
@@ -2090,290 +2419,365 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
         return emit_counter_lane(top, srcName, accName, itName, names,
             skipExpr, takeExpr, skipWhileCond, loopBody, at)
     } else {
-        return emit_array_lane(top, expr, loopBody, elementType, srcName, accName, itName,
+        return emit_array_lane(top, ctx.expr_is_iterator, loopBody, elementType, srcName, accName, itName,
             names, skipExpr, takeExpr, skipWhileCond, at)
     }
 }
 
+// Stub — table-driven dispatch into plan_loop_or_count_patterns.
 [macro_function]
-def private plan_reverse(var expr : Expression?) : Expression? {
+def private plan_loop_or_count(var expr : Expression?) : Expression? {
     var (top, calls) = flatten_linq(expr)
     if (empty(calls)) return null
-    normalize_order_reverse(calls)
     collapse_chained_selects(calls)
+    collapse_chained_wheres(calls)
     top = peel_each(top)
-    var terminatorName : string  = ""
-    var terminatorCall : ExprCall?
-    {
-        let lastName = calls.back()._1.name
-        if (lastName == "count" || lastName == "first" || lastName == "first_or_default") {
-            terminatorName = lastName
-            terminatorCall = calls.back()._0
-            calls |> pop
+    let at = calls[0]._0.at
+    let exprIsIter = expr._type != null && expr._type.isIterator
+    let srcName = qn("source", at)
+    for (p in plan_loop_or_count_patterns) {
+        var r <- match_pattern(p, calls, top)
+        if (r is matched) {
+            var topClone = clone_expression(top)
+            var ctx = EmitCtx(top = topClone, src = SourceAdapter(Array = (topClone, srcName)), expr_is_iterator = exprIsIter)
+            var result = invoke(p.emit, r as matched, ctx, at)
+            if (result != null) return result
         }
     }
-    if (empty(calls)) return null
+    return null
+}
+
+[_macro]
+def private populate_plan_loop_or_count_patterns {
+    if (!is_compiling_macros_in_module("linq_fold") || !empty(plan_loop_or_count_patterns)) return
+    // Single row — c_chain head matches the where_/select run (0+ contiguous calls), then canonical-order range ops, optional post-take where, optional terminator from loop_terminator_family.
+    plan_loop_or_count_patterns |> emplace <| SplicePattern(
+        name = "loop_or_count_general",
+        chain <- [
+            slot_chain_of(["where_", "select"], "head"),
+            Slot(matcher = m_literal("skip"),       cardinality = c_opt(), capture_name = "skip"),
+            Slot(matcher = m_literal("skip_while"), cardinality = c_opt(), capture_name = "skip_while"),
+            Slot(matcher = m_literal("take_while"), cardinality = c_opt(), capture_name = "take_while"),
+            Slot(matcher = m_literal("take"),       cardinality = c_opt(), capture_name = "take"),
+            Slot(matcher = m_literal("where_"),     cardinality = c_opt(), capture_name = "post_take_where"),
+            Slot(matcher = m_alias("loop_terminator_family"), cardinality = c_opt(), capture_name = "term")
+        ],
+        requires <- array<RequiresPredicate>(),
+        emit = @@ < EmitFn > emit_loop_or_count_lane
+    )
+}
+
+// ===== plan_reverse migration (PR A — pattern-table refactor) =====
+// 5 emit archetypes + 5 pattern rows + stub. Each archetype lifted verbatim from
+// the prior imperative plan_reverse branches (see git history before PR A).
+
+// Ra — counter (reverse is identity for count). Side-effecting projection still fires per match.
+[macro_function]
+def private emit_reverse_counter(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? {
+    var top = ctx.top
+    let srcName = (ctx.src as Array)._1
+    let itName = qn("it", at)
+    let cntName = qn("cnt", at)
     var whereCond : Expression?
+    if (c.single |> key_exists("where")) {
+        whereCond = peel_lambda_rename_var(c.single["where"].arguments[1], itName)
+    }
     var projection : Expression?
-    var hasReverse = false
-    var seenSelect = false
-    var takeExpr : Expression?
-    var terminalSelectLam : Expression?
-    var terminalSelectElemType : TypeDeclPtr
-    // Theme 8 (audit 2a): trailing `distinct[_by]` after reverse. Single backward source walk with set-gated push — saves the cascade's reverse_to_array + distinct_by_inplace second walk. v1 limited to array source + implicit to_array terminator.
-    var distinctName : string
-    var distinctKey : Expression?
-    let at = calls[0]._0.at
-    let srcName = qn("source", at)
-    let itName  = qn("it", at)
-    let bufName = qn("buf", at)
+    if (c.single |> key_exists("proj")) {
+        projection = peel_lambda_rename_var(c.single["proj"].arguments[1], itName)
+    }
+    var perElement : Expression?
+    if (projection != null && has_sideeffects(projection)) {
+        let vfinalName = qn("vfinal", at)
+        perElement = qmacro_block() {
+            var $i(vfinalName) = $e(projection)
+            $i(cntName) ++
+        }
+    } else {
+        perElement = qmacro_expr() {
+            $i(cntName) ++
+        }
+    }
+    perElement = wrap_with_condition(perElement, whereCond)
+    var body : Expression? = qmacro_block() {
+        var $i(cntName) = 0
+        for ($i(itName) in $i(srcName)) {
+            $e(perElement)
+        }
+        return $i(cntName)
+    }
+    var bodyStmts : array<Expression?>
+    bodyStmts |> push_block_list(body)
+    return finalize_emission_stmts(top, srcName, at, bodyStmts)
+}
+
+// Rb — walk + overwrite-last scalar (terminator: first / first_or_default).
+// "first of reversed" = LAST surviving element; walk source, overwrite `last` on each match. No buffer.
+[macro_function]
+def private emit_reverse_walk_overwrite_scalar(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? {
+    var top = ctx.top
+    let srcName = (ctx.src as Array)._1
+    let itName = qn("it", at)
     let foundName = qn("found", at)
     let lastName = qn("last", at)
-    let cntName = qn("cnt", at)
     let dBindName = qn("d", at)
-    let dkeyName = qn("rev_dkey", at)
-    let dsetName = qn("rev_dset", at)
-    let idxName = qn("rev_k", at)
-    let rlenName = qn("rev_len", at)
-    var reverseCall : ExprCall?
-    for (i in 0 .. length(calls)) {
-        var cll & = unsafe(calls[i])
-        let name = cll._1.name
-        if (name == "where_") {
-            if (hasReverse || seenSelect) return null
-            whereCond = merge_where_cond(whereCond, peel_lambda_rename_var(cll._0.arguments[1], itName))
-        } elif (name == "select") {
-            if (!hasReverse && !seenSelect) {
-                // Pre-reverse select: existing path (buffer holds projected values).
-                seenSelect = true
-                projection = peel_lambda_rename_var(cll._0.arguments[1], itName)
-            } elif (hasReverse && !seenSelect && terminalSelectLam == null && i == length(calls) - 1) {
-                // Terminal post-reverse select: project at return (R1-R4 buf or first scalar).
-                terminalSelectLam = cll._0.arguments[1]
-                if (terminalSelectLam == null
-                        || cll._0._type == null || cll._0._type.firstType == null) return null
-                terminalSelectElemType = clone_type(cll._0._type.firstType)
-            } else {
-                return null
-            }
-        } elif (name == "reverse") {
-            if (hasReverse) return null
-            hasReverse = true
-            reverseCall = cll._0
-        } elif (name == "take") {
-            if (!hasReverse || takeExpr != null) return null
-            var arg = cll._0.arguments[1]
-            if (arg == null || arg._type == null || arg._type.baseType != Type.tInt) return null
-            takeExpr = clone_expression(arg)
-        } elif (name == "distinct" || name == "distinct_by") {
-            // Theme 8 (audit 2a): trailing distinct[_by] after reverse, no other chain ops, implicit to_array terminator, array source. Walks source backward with set-gated push.
-            if (!hasReverse || distinctName != "" || i != length(calls) - 1) return null
-            distinctName = name
-            if (name == "distinct_by") {
-                if ((cll._0.arguments |> length) < 2) return null
-                distinctKey = clone_expression(cll._0.arguments[1])
-            }
-        } else {
-            return null
+    var whereCond : Expression?
+    if (c.single |> key_exists("where")) {
+        whereCond = peel_lambda_rename_var(c.single["where"].arguments[1], itName)
+    }
+    var projection : Expression?
+    if (c.single |> key_exists("proj")) {
+        projection = peel_lambda_rename_var(c.single["proj"].arguments[1], itName)
+    }
+    var terminalSelectLam : Expression?
+    if (c.single |> key_exists("termsel")) {
+        terminalSelectLam = c.single["termsel"].arguments[1]
+    }
+    if (!(c.single |> key_exists("term"))) return null
+    let terminatorCall = c.single["term"]
+    let terminatorName = call_norm_name(terminatorCall)
+    // Bail to cascade if upstream typing is incomplete — `lastType` reads `projection._type` / `top._type.firstType`, either can be null on partially-typed exprs.
+    if (projection != null) {
+        if (projection._type == null) return null
+    } else {
+        if (top._type == null || top._type.firstType == null) return null
+    }
+    var lastType = strip_const_ref(clone_type(projection != null ? projection._type : top._type.firstType))
+    var valueExpr : Expression?
+    if (projection != null) {
+        valueExpr = clone_expression(projection)
+    } else {
+        valueExpr = qmacro_expr() {
+            $i(itName)
         }
     }
-    // Bail conditions: no reverse; take+terminator (take only with implicit to_array); count+terminal-select (would drop side effects); Theme 8 (audit 2a) distinct path constrained to array source + implicit to_array + no other chain ops.
-    if (!hasReverse || (takeExpr != null && terminatorName != "")
-            || (terminalSelectLam != null && terminatorName == "count")
-            || (distinctName != "" && (terminatorName != "" || takeExpr != null || projection != null
-                    || whereCond != null || terminalSelectLam != null
-                    || !(top._type.isGoodArrayType || top._type.isArray)))) return null
+    var matchBlock : Expression? = qmacro_block() {
+        $i(lastName) := $e(valueExpr)
+        $i(foundName) = true
+    }
+    var perElement = wrap_with_condition(matchBlock, whereCond)
+    var lastRetExpr : Expression?
+    if (terminalSelectLam != null) {
+        lastRetExpr = peel_lambda_replace_var(terminalSelectLam, qmacro($i(lastName)))
+    } else {
+        lastRetExpr = qmacro($i(lastName))
+    }
+    // first_or_default's user default is already at post-termsel type — re-projecting through `termsel` double-applies.
+    var dRetExpr : Expression? = qmacro($i(dBindName))
     var body : Expression?
-    if (terminatorName == "count") {
-        // Reverse is identity for count — counter loop, no buffer. Side-effecting projection still fires per match.
-        var perElement : Expression?
-        if (projection != null && has_sideeffects(projection)) {
-            let vfinalName = qn("vfinal", at)
-            perElement = qmacro_block() {
-                var $i(vfinalName) = $e(projection)
-                $i(cntName) ++
+    if (terminatorName == "first") {
+        body = qmacro_block() {
+            var $i(foundName) = false
+            var $i(lastName) : $t(lastType) = default<$t(lastType)>
+            for ($i(itName) in $i(srcName)) {
+                $e(perElement)
             }
-        } else {
-            perElement = qmacro_expr() {
-                $i(cntName) ++
+            if (!$i(foundName)) {
+                panic("sequence contains no elements")
             }
+            return $e(lastRetExpr)
         }
-        perElement = wrap_with_condition(perElement, whereCond)
+    } else {
         body = qmacro_block() {
-            var $i(cntName) = 0
+            let $i(dBindName) = $e(terminatorCall.arguments[1])
+            var $i(foundName) = false
+            var $i(lastName) : $t(lastType) = default<$t(lastType)>
             for ($i(itName) in $i(srcName)) {
                 $e(perElement)
             }
-            return $i(cntName)
+            return $i(foundName) ? $e(lastRetExpr) : $e(dRetExpr)
         }
-    } elif (terminatorName == "first" || terminatorName == "first_or_default") {
-        // "first of reversed" = LAST surviving element. Walk source, overwrite `last` on each match. No buffer.
-        var lastType = strip_const_ref(clone_type(projection != null ? projection._type : top._type.firstType))
-        var valueExpr : Expression?
-        if (projection != null) {
-            valueExpr = clone_expression(projection)
-        } else {
-            valueExpr = qmacro_expr() {
-                $i(itName)
-            }
+    }
+    var bodyStmts : array<Expression?>
+    bodyStmts |> push_block_list(body)
+    return finalize_emission_stmts(top, srcName, at, bodyStmts)
+}
+
+// R6 — backward index walk (bare reverse + take(N) + implicit to_array on array source).
+// Visits only the last takeN indices — skips full-source push + O(length) reverse_inplace.
+[macro_function]
+def private emit_reverse_backward_index_walk(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? {
+    var top = ctx.top
+    let srcName = (ctx.src as Array)._1
+    if (!(c.single |> key_exists("take")) || top._type == null || top._type.firstType == null) return null
+    var bufElemType = strip_const_ref(clone_type(top._type.firstType))
+    let bufName = qn("buf", at)
+    let lenName = qn("rlen", at)
+    let takeLimName = qn("rtakeLim", at)
+    let takeNName = qn("rtn", at)
+    let kName = qn("rk", at)
+    var returnExpr = buffer_return(bufName, ctx.expr_is_iterator)
+    // Bind `take` arg once — user expression may have side effects; matches normal call semantics.
+    var body : Expression? = qmacro_block() {
+        let $i(lenName) = length($i(srcName))
+        let $i(takeLimName) = $e(c.single["take"].arguments[1])
+        let $i(takeNName) = $i(takeLimName) <= 0 ? 0 : ($i(takeLimName) < $i(lenName) ? $i(takeLimName) : $i(lenName))
+        var $i(bufName) : array<$t(bufElemType)>
+        $i(bufName) |> reserve($i(takeNName))
+        for ($i(kName) in 0 .. $i(takeNName)) {
+            $i(bufName) |> push_clone($i(srcName)[$i(lenName) - 1 - $i(kName)])
         }
-        var matchBlock : Expression? = qmacro_block() {
-            $i(lastName) := $e(valueExpr)
-            $i(foundName) = true
+        $e(returnExpr)
+    }
+    var bodyStmts : array<Expression?>
+    bodyStmts |> push_block_list(body)
+    return finalize_emission_stmts(top, srcName, at, bodyStmts)
+}
+
+// R-2a — backward walk + dset gate (Theme 8 / audit 2a: reverse + distinct[_by] + implicit to_array).
+// Single backward source walk with set-gated push — saves the cascade's reverse_to_array + distinct_by_inplace second walk.
+[macro_function]
+def private emit_reverse_backward_walk_dset_gate(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? {
+    var top = ctx.top
+    let srcName = (ctx.src as Array)._1
+    if (!(c.single |> key_exists("dist"))) return null
+    let distinctCall = c.single["dist"]
+    let distinctName = call_norm_name(distinctCall)
+    var distinctKey : Expression?
+    if (distinctName == "distinct_by") {
+        if ((distinctCall.arguments |> length) < 2) return null
+        distinctKey = clone_expression(distinctCall.arguments[1])
+    }
+    if (top._type == null || top._type.firstType == null) return null
+    var bufElemType = strip_const_ref(clone_type(top._type.firstType))
+    let itName = qn("it", at)
+    let bufName = qn("buf", at)
+    let dkeyName = qn("rev_dkey", at)
+    let dsetName = qn("rev_dset", at)
+    let idxName = qn("rev_k", at)
+    let rlenName = qn("rev_len", at)
+    var dkeyExpr : Expression?
+    if (distinctName == "distinct_by") {
+        dkeyExpr = peel_lambda_rename_var(distinctKey, itName)
+        if (dkeyExpr == null) return null
+    } else {
+        dkeyExpr = qmacro($i(itName))
+    }
+    var dsetDecl : Expression?
+    if (distinctName == "distinct_by") {
+        dsetDecl = qmacro_expr() {
+            var inscope $i(dsetName) : table<typedecl(_::unique_key(invoke($e(distinctKey), default<$t(bufElemType)>)))>
         }
-        var perElement = wrap_with_condition(matchBlock, whereCond)
-        // Terminal _select: `last` stays source-typed; project (and the default) at return.
-        var lastRetExpr : Expression?
-        var dRetExpr : Expression?
-        if (terminalSelectLam != null) {
-            lastRetExpr = peel_lambda_replace_var(terminalSelectLam, qmacro($i(lastName)))
-            dRetExpr = peel_lambda_replace_var(terminalSelectLam, qmacro($i(dBindName)))
-        } else {
-            lastRetExpr = qmacro($i(lastName))
-            dRetExpr = qmacro($i(dBindName))
+    } else {
+        dsetDecl = qmacro_expr() {
+            var inscope $i(dsetName) : table<typedecl(_::unique_key(default<$t(bufElemType)>))>
         }
-        if (terminatorName == "first") {
-            body = qmacro_block() {
-                var $i(foundName) = false
-                var $i(lastName) : $t(lastType) = default<$t(lastType)>
-                for ($i(itName) in $i(srcName)) {
-                    $e(perElement)
-                }
-                if (!$i(foundName)) {
-                    panic("sequence contains no elements")
-                }
-                return $e(lastRetExpr)
-            }
-        } else {
-            body = qmacro_block() {
-                let $i(dBindName) = $e(terminatorCall.arguments[1])
-                var $i(foundName) = false
-                var $i(lastName) : $t(lastType) = default<$t(lastType)>
-                for ($i(itName) in $i(srcName)) {
-                    $e(perElement)
-                }
-                return $i(foundName) ? $e(lastRetExpr) : $e(dRetExpr)
+    }
+    var returnExpr = buffer_return(bufName, ctx.expr_is_iterator)
+    var body : Expression? = qmacro_block() {
+        let $i(rlenName) = length($i(srcName))
+        var $i(bufName) : array<$t(bufElemType)>
+        $e(dsetDecl)
+        for ($i(idxName) in 0 .. $i(rlenName)) {
+            let $i(itName) = $i(srcName)[$i(rlenName) - 1 - $i(idxName)]
+            let $i(dkeyName) = _::unique_key($e(dkeyExpr))
+            if (!$i(dsetName) |> key_exists($i(dkeyName))) {
+                $i(dsetName) |> insert($i(dkeyName))
+                $i(bufName) |> push_clone($i(itName))
             }
         }
+        $e(returnExpr)
+    }
+    var bodyStmts : array<Expression?>
+    bodyStmts |> push_block_list(body)
+    return finalize_emission_stmts(top, srcName, at, bodyStmts)
+}
+
+// R1-R4 — catch-all buffer + reverse_inplace + optional resize + optional terminal _select.
+[macro_function]
+def private emit_reverse_buffer_inplace(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? {
+    var top = ctx.top
+    let srcName = (ctx.src as Array)._1
+    let itName = qn("it", at)
+    let bufName = qn("buf", at)
+    let outBufName = qn("rev_proj_buf", at)
+    let elemName = qn("rev_proj_e", at)
+    var whereCond : Expression?
+    if (c.single |> key_exists("where")) {
+        whereCond = peel_lambda_rename_var(c.single["where"].arguments[1], itName)
+    }
+    var projection : Expression?
+    if (c.single |> key_exists("preproj")) {
+        projection = peel_lambda_rename_var(c.single["preproj"].arguments[1], itName)
+    }
+    var takeExpr : Expression?
+    if (c.single |> key_exists("take")) {
+        takeExpr = clone_expression(c.single["take"].arguments[1])
+    }
+    var terminalSelectLam : Expression?
+    var terminalSelectElemType : TypeDeclPtr
+    if (c.single |> key_exists("termsel")) {
+        terminalSelectLam = c.single["termsel"].arguments[1]
+        if (terminalSelectLam == null || c.single["termsel"]._type == null || c.single["termsel"]._type.firstType == null) return null
+        terminalSelectElemType = clone_type(c.single["termsel"]._type.firstType)
+    }
+    if (top._type == null || top._type.firstType == null) return null
+    // Buffer element type: post-select if a pre-reverse projection narrowed the element shape; otherwise source type.
+    var bufElemType : TypeDeclPtr
+    if (projection != null) {
+        bufElemType = strip_const_ref(clone_type(projection._type))
     } else {
-        // R1-R4 path: buffer + reverse_inplace + optional resize + return buffer.
-        let needIterWrap = expr._type.isIterator
-        var bufElemType = strip_const_ref(clone_type(reverseCall._type.firstType))
-        // Terminal _select projects buffer survivors at return (after resize trims to take(N)).
-        let outBufName = qn("rev_proj_buf", at)
-        let elemName = qn("rev_proj_e", at)
-        var projBody : Expression?
-        if (terminalSelectLam != null) {
-            projBody = peel_lambda_replace_var(terminalSelectLam, qmacro($i(elemName)))
+        bufElemType = strip_const_ref(clone_type(top._type.firstType))
+    }
+    var projBody : Expression?
+    if (terminalSelectLam != null) {
+        projBody = peel_lambda_replace_var(terminalSelectLam, qmacro($i(elemName)))
+    }
+    var pushExpr : Expression?
+    if (projection != null) {
+        pushExpr = qmacro_expr() {
+            $i(bufName) |> push_clone($e(projection))
         }
-        // Theme 8 (audit 2a): reverse + distinct[_by] + to_array on array source — single backward walk with set-gated push. Mirrors canBackwardIndex below but gates push by dedup key instead of indexing into the last takeN slots.
-        if (distinctName != "") {
-            var dkeyExpr : Expression?
-            if (distinctName == "distinct_by") {
-                dkeyExpr = peel_lambda_rename_var(distinctKey, itName)
-                if (dkeyExpr == null) return null
-            } else {
-                dkeyExpr = qmacro($i(itName))
-            }
-            var dsetDecl : Expression?
-            if (distinctName == "distinct_by") {
-                dsetDecl = qmacro_expr() {
-                    var inscope $i(dsetName) : table<typedecl(_::unique_key(invoke($e(distinctKey), default<$t(bufElemType)>)))>
-                }
-            } else {
-                dsetDecl = qmacro_expr() {
-                    var inscope $i(dsetName) : table<typedecl(_::unique_key(default<$t(bufElemType)>))>
-                }
-            }
-            var returnExpr = buffer_return(bufName, needIterWrap)
-            body = qmacro_block() {
-                let $i(rlenName) = length($i(srcName))
-                var $i(bufName) : array<$t(bufElemType)>
-                $e(dsetDecl)
-                for ($i(idxName) in 0 .. $i(rlenName)) {
-                    let $i(itName) = $i(srcName)[$i(rlenName) - 1 - $i(idxName)]
-                    let $i(dkeyName) = _::unique_key($e(dkeyExpr))
-                    if (!$i(dsetName) |> key_exists($i(dkeyName))) {
-                        $i(dsetName) |> insert($i(dkeyName))
-                        $i(bufName) |> push_clone($i(itName))
-                    }
-                }
-                $e(returnExpr)
-            }
-        } elif (takeExpr != null && projection == null && whereCond == null
-                && terminalSelectLam == null
-                && (top._type.isGoodArrayType || top._type.isArray)) {
-            // R6: visit only the last takeN indices — skips full-source push + O(length) reverse_inplace.
-            let lenName   = qn("rlen", at)
-            let takeNName = qn("rtn", at)
-            let kName     = qn("rk", at)
-            var returnExpr = buffer_return(bufName, needIterWrap)
-            body = qmacro_block() {
-                let $i(lenName) = length($i(srcName))
-                let $i(takeNName) = $e(takeExpr) <= 0 ? 0 : ($e(takeExpr) < $i(lenName) ? $e(takeExpr) : $i(lenName))
-                var $i(bufName) : array<$t(bufElemType)>
-                $i(bufName) |> reserve($i(takeNName))
-                for ($i(kName) in 0 .. $i(takeNName)) {
-                    $i(bufName) |> push_clone($i(srcName)[$i(lenName) - 1 - $i(kName)])
-                }
-                $e(returnExpr)
-            }
-        } else {
-            var pushExpr : Expression?
-            if (projection != null) {
-                pushExpr = qmacro_expr() {
-                    $i(bufName) |> push_clone($e(projection))
-                }
-            } else {
-                pushExpr = qmacro_expr() {
-                    $i(bufName) |> push_clone($i(itName))
-                }
-            }
-            pushExpr = wrap_with_condition(pushExpr, whereCond)
-            var reserveStmts : array<Expression?>
-            if (type_has_length(top._type) && whereCond == null) {
-                reserveStmts |> push <| qmacro_expr() {
-                    $i(bufName) |> reserve(length($i(srcName)))
-                }
+    } else {
+        pushExpr = qmacro_expr() {
+            $i(bufName) |> push_clone($i(itName))
+        }
+    }
+    pushExpr = wrap_with_condition(pushExpr, whereCond)
+    var reserveStmts : array<Expression?>
+    if (type_has_length(top._type) && whereCond == null) {
+        reserveStmts |> push <| qmacro_expr() {
+            $i(bufName) |> reserve(length($i(srcName)))
+        }
+    }
+    var resizeStmts : array<Expression?>
+    if (takeExpr != null) {
+        // Bind `take` arg once — user expression may have side effects; matches normal call semantics.
+        let takeLimName = qn("rev_takeLim", at)
+        resizeStmts |> push_from <| qmacro_block_to_array() {
+            let $i(takeLimName) = $e(takeExpr)
+            $i(bufName) |> resize($i(takeLimName) <= 0 ? 0 : ($i(takeLimName) < length($i(bufName)) ? $i(takeLimName) : length($i(bufName))))
+        }
+    }
+    var body : Expression?
+    if (terminalSelectLam != null) {
+        var returnExpr = buffer_return(outBufName, ctx.expr_is_iterator)
+        body = qmacro_block() {
+            var $i(bufName) : array<$t(bufElemType)>
+            $b(reserveStmts)
+            for ($i(itName) in $i(srcName)) {
+                $e(pushExpr)
             }
-            var resizeStmts : array<Expression?>
-            if (takeExpr != null) {
-                // take(N) of reversed-buffer = last N of source reversed. Three clones since no math::min import.
-                resizeStmts |> push <| qmacro_expr() {
-                    $i(bufName) |> resize($e(takeExpr) <= 0 ? 0 : ($e(takeExpr) < length($i(bufName)) ? $e(takeExpr) : length($i(bufName))))
-                }
+            _::reverse_inplace($i(bufName))
+            $b(resizeStmts)
+            var $i(outBufName) : array<$t(terminalSelectElemType)>
+            $i(outBufName) |> reserve(length($i(bufName)))
+            for ($i(elemName) in $i(bufName)) {
+                $i(outBufName) |> push_clone($e(projBody))
             }
-            if (terminalSelectLam != null) {
-                // Post-reverse projection: outBuf returned in place of bufName.
-                var returnExpr = buffer_return(outBufName, needIterWrap)
-                body = qmacro_block() {
-                    var $i(bufName) : array<$t(bufElemType)>
-                    $b(reserveStmts)
-                    for ($i(itName) in $i(srcName)) {
-                        $e(pushExpr)
-                    }
-                    _::reverse_inplace($i(bufName))
-                    $b(resizeStmts)
-                    var $i(outBufName) : array<$t(terminalSelectElemType)>
-                    $i(outBufName) |> reserve(length($i(bufName)))
-                    for ($i(elemName) in $i(bufName)) {
-                        $i(outBufName) |> push_clone($e(projBody))
-                    }
-                    $e(returnExpr)
-                }
-            } else {
-                var returnExpr = buffer_return(bufName, needIterWrap)
-                body = qmacro_block() {
-                    var $i(bufName) : array<$t(bufElemType)>
-                    $b(reserveStmts)
-                    for ($i(itName) in $i(srcName)) {
-                        $e(pushExpr)
-                    }
-                    _::reverse_inplace($i(bufName))
-                    $b(resizeStmts)
-                    $e(returnExpr)
-                }
+            $e(returnExpr)
+        }
+    } else {
+        var returnExpr = buffer_return(bufName, ctx.expr_is_iterator)
+        body = qmacro_block() {
+            var $i(bufName) : array<$t(bufElemType)>
+            $b(reserveStmts)
+            for ($i(itName) in $i(srcName)) {
+                $e(pushExpr)
             }
+            _::reverse_inplace($i(bufName))
+            $b(resizeStmts)
+            $e(returnExpr)
         }
     }
     var bodyStmts : array<Expression?>
@@ -2381,72 +2785,150 @@ def private plan_reverse(var expr : Expression?) : Expression? {
     return finalize_emission_stmts(top, srcName, at, bodyStmts)
 }
 
+// Stub — table-driven dispatch into plan_reverse_patterns. Populated by populate_plan_reverse_patterns [_macro].
 [macro_function]
-def private plan_distinct(var expr : Expression?) : Expression? {
+def private plan_reverse(var expr : Expression?) : Expression? {
     var (top, calls) = flatten_linq(expr)
     if (empty(calls)) return null
+    normalize_order_reverse(calls)
     collapse_chained_selects(calls)
+    collapse_chained_wheres(calls)
     top = peel_each(top)
-    var terminatorName : string  = ""
-    var countPred : Expression?
-    {
-        let lastName = calls.back()._1.name
-        let lastArgs = calls.back()._0.arguments |> length
-        // `sum` has no selector overload that interacts cleanly with distinct buffering — keep 1-arg only. count/long_count(predicate) (Theme 4): dedup runs UNCONDITIONALLY (distinct_by keeps the FIRST occurrence per key); a separate `acc` counter increments only when the predicate matches that first occurrence. Wrapping dedup in `if(P)` would diverge from tier-2 when a later duplicate matches but the first didn't.
-        if ((lastName == "count" || lastName == "sum" || lastName == "long_count") && lastArgs == 1) {
-            terminatorName = lastName
-            calls |> pop
-        } elif ((lastName == "count" || lastName == "long_count") && lastArgs == 2) {
-            countPred = calls.back()._0.arguments[1]
-            if (countPred == null) return null
-            terminatorName = lastName
-            calls |> pop
-        }
-    }
-    if (empty(calls)) return null
-    var whereCond : Expression?
-    var projection : Expression?
-    var hasDistinct = false
-    var seenSelect = false
-    var isDistinctBy = false
-    var distinctKeyBlock : Expression?
-    var takeExpr : Expression?
     let at = calls[0]._0.at
+    let exprIsIter = expr._type != null && expr._type.isIterator
     let srcName = qn("source", at)
-    let itName  = qn("it", at)
+    for (p in plan_reverse_patterns) {
+        var r <- match_pattern(p, calls, top)
+        if (r is matched) {
+            var topClone = clone_expression(top)
+            var ctx = EmitCtx(top = topClone, src = SourceAdapter(Array = (topClone, srcName)), expr_is_iterator = exprIsIter)
+            var result = invoke(p.emit, r as matched, ctx, at)
+            if (result != null) return result
+        }
+    }
+    return null
+}
+
+// [_macro] runs at macro-compile time and stays off the JIT runtime graph — the rows carry @@<EmitFn>
+// addresses of [macro_function] emit fns whose quote() bodies the LLVM JIT can't lower.
+[_macro]
+def private populate_plan_reverse_patterns {
+    if (!is_compiling_macros_in_module("linq_fold") || !empty(plan_reverse_patterns)) return
+    // R-2a (most specific) — Theme 8 / audit 2a — backward walk + dset gate
+    plan_reverse_patterns |> emplace <| SplicePattern(
+        name = "reverse_distinct_backward_walk",
+        chain <- [
+            Slot(matcher = m_literal("reverse"), cardinality = c_one()),
+            Slot(matcher = m_alias("distinct_family"), cardinality = c_one(), capture_name = "dist")
+        ],
+        requires <- [@@ < RequiresPredicate > array_source, @@ < RequiresPredicate > no_terminator],
+        emit = @@ < EmitFn > emit_reverse_backward_walk_dset_gate
+    )
+    // R6 — bare reverse + take + to_array, backward index walk
+    plan_reverse_patterns |> emplace <| SplicePattern(
+        name = "reverse_take_backward_index",
+        chain <- [
+            Slot(matcher = m_literal("reverse"), cardinality = c_one()),
+            Slot(matcher = m_literal("take"), cardinality = c_one(), capture_name = "take")
+        ],
+        requires <- [@@ < RequiresPredicate > array_source, @@ < RequiresPredicate > take_arg_is_int, @@ < RequiresPredicate > no_terminator],
+        emit = @@ < EmitFn > emit_reverse_backward_index_walk
+    )
+    // Ra — reverse + count (reverse is identity for count)
+    plan_reverse_patterns |> emplace <| SplicePattern(
+        name = "reverse_counter",
+        chain <- [
+            Slot(matcher = m_literal("where_"), cardinality = c_opt(), capture_name = "where"),
+            Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "proj"),
+            Slot(matcher = m_literal("reverse"), cardinality = c_one()),
+            Slot(matcher = m_literal("count"), cardinality = c_one(), capture_name = "term", arity = 1)
+        ],
+        requires <- array<RequiresPredicate>(),     // Ra works on iterator and array sources (for-loop body, no indexed access)
+        emit = @@ < EmitFn > emit_reverse_counter
+    )
+    // Rb — reverse + first[_or_default], walk-and-overwrite-last scalar; optional terminal _select
+    plan_reverse_patterns |> emplace <| SplicePattern(
+        name = "reverse_walk_overwrite",
+        chain <- [
+            Slot(matcher = m_literal("where_"), cardinality = c_opt(), capture_name = "where"),
+            Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "proj"),
+            Slot(matcher = m_literal("reverse"), cardinality = c_one()),
+            Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "termsel"),
+            Slot(matcher = m_alias("first_family"), cardinality = c_one(), capture_name = "term")
+        ],
+        requires <- array<RequiresPredicate>(),
+        emit = @@ < EmitFn > emit_reverse_walk_overwrite_scalar
+    )
+    // R1-R4 — catch-all buffer + reverse_inplace + optional resize + optional terminal _select
+    plan_reverse_patterns |> emplace <| SplicePattern(
+        name = "reverse_buffer_inplace",
+        chain <- [
+            Slot(matcher = m_literal("where_"), cardinality = c_opt(), capture_name = "where"),
+            Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "preproj"),
+            Slot(matcher = m_literal("reverse"), cardinality = c_one()),
+            Slot(matcher = m_literal("take"), cardinality = c_opt(), capture_name = "take"),
+            Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "termsel")
+        ],
+        requires <- [@@ < RequiresPredicate > take_arg_is_int],
+        emit = @@ < EmitFn > emit_reverse_buffer_inplace
+    )
+}
+
+// ===== plan_distinct migration (PR A — pattern-table refactor) =====
+// 1 emit archetype with internal terminator-shape dispatch + 2 pattern rows + stub.
+
+// Hashtable dedup + per-fresh-key consume; terminator picks return wiring (count/long_count/sum/implicit to_array).
+// take(N) bounds outer loop with cross-iteration break for true O(N)-source streaming exit.
+[macro_function]
+def private emit_hashtable_dedup(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? {
+    var top = ctx.top
+    let srcName = (ctx.src as Array)._1
+    let itName = qn("it", at)
     let bufName = qn("buf", at)
     let seenName = qn("seen", at)
     let keyName = qn("k", at)
     let takenName = qn("taken", at)
     let accName = qn("acc", at)
-    for (i in 0 .. length(calls)) {
-        var cll & = unsafe(calls[i])
-        let name = cll._1.name
-        if (name == "where_") {
-            if (hasDistinct || seenSelect) return null
-            whereCond = merge_where_cond(whereCond, peel_lambda_rename_var(cll._0.arguments[1], itName))
-        } elif (name == "select") {
-            if (hasDistinct || seenSelect) return null
-            seenSelect = true
-            projection = peel_lambda_rename_var(cll._0.arguments[1], itName)
-        } elif (name == "distinct") {
-            if (hasDistinct) return null
-            hasDistinct = true
-        } elif (name == "distinct_by") {
-            if (hasDistinct) return null
-            hasDistinct = true
-            isDistinctBy = true
-            distinctKeyBlock = clone_expression(cll._0.arguments[1])
-        } elif (name == "take") {
-            if (!hasDistinct || takeExpr != null) return null
-            var arg = cll._0.arguments[1]
-            if (arg == null || arg._type == null || arg._type.baseType != Type.tInt) return null
-            takeExpr = clone_expression(arg)
+    let takeLimName = qn("takeLim", at)
+    let pvName = qn("pv", at)
+    var whereCond : Expression?
+    if (c.single |> key_exists("where")) {
+        whereCond = peel_lambda_rename_var(c.single["where"].arguments[1], itName)
+    }
+    var projection : Expression?
+    if (c.single |> key_exists("proj")) {
+        projection = peel_lambda_rename_var(c.single["proj"].arguments[1], itName)
+    }
+    if (!(c.single |> key_exists("dist"))) return null
+    let distinctCall = c.single["dist"]
+    let isDistinctBy = call_norm_name(distinctCall) == "distinct_by"
+    var distinctKeyBlock : Expression?
+    if (isDistinctBy) {
+        if ((distinctCall.arguments |> length) < 2) return null
+        distinctKeyBlock = clone_expression(distinctCall.arguments[1])
+    }
+    var takeExpr : Expression?
+    if (c.single |> key_exists("take")) {
+        takeExpr = clone_expression(c.single["take"].arguments[1])
+    }
+    var terminatorName : string  = ""
+    var countPred : Expression?
+    if (c.single |> key_exists("term")) {
+        let terminatorCall = c.single["term"]
+        let termName = call_norm_name(terminatorCall)
+        let termArgs = terminatorCall.arguments |> length
+        // `sum` has no selector overload that interacts cleanly with distinct buffering — keep 1-arg only. count/long_count(predicate) (Theme 4): dedup runs UNCONDITIONALLY (distinct_by keeps the FIRST occurrence per key); a separate `acc` counter increments only when the predicate matches that first occurrence.
+        if ((termName == "count" || termName == "sum" || termName == "long_count") && termArgs == 1) {
+            terminatorName = termName
+        } elif ((termName == "count" || termName == "long_count") && termArgs == 2) {
+            if (terminatorCall.arguments[1] == null) return null
+            countPred = clone_expression(terminatorCall.arguments[1])
+            terminatorName = termName
         } else {
             return null
         }
     }
-    if (!hasDistinct || (takeExpr != null && terminatorName != "")) return null
+    if ((takeExpr != null && terminatorName != "") || top._type == null || top._type.firstType == null) return null
     var elemType = strip_const_ref(clone_type(projection != null ? projection._type : top._type.firstType))
     var stmts : array<Expression?>
     if (isDistinctBy) {
@@ -2478,16 +2960,12 @@ def private plan_distinct(var expr : Expression?) : Expression? {
             var $i(accName) = 0l
         }
     }
-    // Bind take(N) limit once at outer scope so a side-effecting arg fires once, not on every fresh-key check.
-    let takeLimName = qn("takeLim", at)
     if (takeExpr != null) {
         stmts |> push_from <| qmacro_block_to_array() {
             let $i(takeLimName) = $e(takeExpr)
             var $i(takenName) = 0
         }
     }
-    // Bind side-effecting projection once per element; key + buffer/acc share the bind (matches original LINQ's one-eval per source elem).
-    let pvName = qn("pv", at)
     let bindProjection = projection != null && has_sideeffects(projection)
     var pushExpr : Expression?
     if (projection != null) {
@@ -2509,12 +2987,10 @@ def private plan_distinct(var expr : Expression?) : Expression? {
     } else {
         keyExpr = clone_expression(pushExpr)
     }
-    // count(p) / long_count(p): peel against pushExpr so the predicate sees the post-projection element (`_select(_.brand)._distinct().count(b => b > 0)` → b binds to the projected brand, not the source tuple).
     if (countPred != null) {
         countPred = peel_lambda_replace_var(countPred, pushExpr)
         if (countPred == null) return null
     }
-    // Per-match consume: only inside the fresh-key branch; take-break is the outer-loop guard below.
     var consumeStmts : array<Expression?>
     if (takeExpr != null) {
         consumeStmts |> push <| qmacro_expr() {
@@ -2533,7 +3009,6 @@ def private plan_distinct(var expr : Expression?) : Expression? {
             $i(accName) += $e(pushExpr)
         }
     }
-    // count(p) / long_count(p): dedup happens unconditionally so `distinct_by` semantics stay correct (FIRST occurrence per key); the predicate gates only the matched counter. Without this split the chain `distinct_by(k).count(p)` would diverge from tier-2 when a later duplicate matches p but the first one didn't.
     if (countPred != null && isCountTerminator) {
         consumeStmts |> push <| qmacro_expr() {
             if ($e(countPred)) {
@@ -2559,7 +3034,6 @@ def private plan_distinct(var expr : Expression?) : Expression? {
     perMatchStmts |> push(ifNew)
     var perElement = stmts_to_expr(perMatchStmts)
     perElement = wrap_with_condition(perElement, whereCond)
-    // Outer-loop take guard: top-of-loop break for true O(N)-source streaming exit (skips duplicates after Nth distinct).
     if (takeExpr != null) {
         stmts |> push <| qmacro_expr() {
             for ($i(itName) in $i(srcName)) {
@@ -2601,11 +3075,64 @@ def private plan_distinct(var expr : Expression?) : Expression? {
             return $i(accName)
         }
     } else {
-        stmts |> push(buffer_return(bufName, expr._type.isIterator))
+        stmts |> push(buffer_return(bufName, ctx.expr_is_iterator))
     }
     return finalize_emission_stmts(top, srcName, at, stmts)
 }
 
+// Stub — table-driven dispatch into plan_distinct_patterns.
+[macro_function]
+def private plan_distinct(var expr : Expression?) : Expression? {
+    var (top, calls) = flatten_linq(expr)
+    if (empty(calls)) return null
+    collapse_chained_selects(calls)
+    collapse_chained_wheres(calls)
+    top = peel_each(top)
+    let at = calls[0]._0.at
+    let exprIsIter = expr._type != null && expr._type.isIterator
+    let srcName = qn("source", at)
+    for (p in plan_distinct_patterns) {
+        var r <- match_pattern(p, calls, top)
+        if (r is matched) {
+            var topClone = clone_expression(top)
+            var ctx = EmitCtx(top = topClone, src = SourceAdapter(Array = (topClone, srcName)), expr_is_iterator = exprIsIter)
+            var result = invoke(p.emit, r as matched, ctx, at)
+            if (result != null) return result
+        }
+    }
+    return null
+}
+
+[_macro]
+def private populate_plan_distinct_patterns {
+    if (!is_compiling_macros_in_module("linq_fold") || !empty(plan_distinct_patterns)) return
+    // distinct_take — distinct[_by] |> take(N) [|> terminator]; inner-loop break on take cap
+    plan_distinct_patterns |> emplace <| SplicePattern(
+        name = "distinct_take",
+        chain <- [
+            Slot(matcher = m_literal("where_"), cardinality = c_opt(), capture_name = "where"),
+            Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "proj"),
+            Slot(matcher = m_alias("distinct_family"), cardinality = c_one(), capture_name = "dist"),
+            Slot(matcher = m_literal("take"), cardinality = c_one(), capture_name = "take"),
+            Slot(matcher = m_alias("distinct_terminator_family"), cardinality = c_opt(), capture_name = "term")
+        ],
+        requires <- [@@ < RequiresPredicate > take_arg_is_int],
+        emit = @@ < EmitFn > emit_hashtable_dedup
+    )
+    // distinct_main — distinct[_by]; optional terminator (count / long_count / sum / implicit to_array)
+    plan_distinct_patterns |> emplace <| SplicePattern(
+        name = "distinct_main",
+        chain <- [
+            Slot(matcher = m_literal("where_"), cardinality = c_opt(), capture_name = "where"),
+            Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "proj"),
+            Slot(matcher = m_alias("distinct_family"), cardinality = c_one(), capture_name = "dist"),
+            Slot(matcher = m_alias("distinct_terminator_family"), cardinality = c_opt(), capture_name = "term")
+        ],
+        requires <- array<RequiresPredicate>(),
+        emit = @@ < EmitFn > emit_hashtable_dedup
+    )
+}
+
 // ── Group-by helpers ──────────────────────────────────────────────────
 
 [macro_function]
diff --git a/daslib/linq_fold.md b/daslib/linq_fold.md
new file mode 100644
index 0000000000..c58eca4a61
--- /dev/null
+++ b/daslib/linq_fold.md
@@ -0,0 +1,466 @@
+# linq_fold.das refactor — masterplan
+
+Living document. Update **Status** + **Decision log** as phases ship.
+
+## Status
+
+- [x] **PR A** — Foundation + first migrations (plan_reverse, plan_distinct) — branch `bbatkin/linq-fold-patterns-foundation`
+- [x] **PR B1** — KR-1 closure (`collapse_chained_wheres`) + `c_chain` cardinality + `Captures` wrapper struct + `plan_loop_or_count` migration — branch `bbatkin/linq-fold-pattern-table-prb`
+- [ ] **PR B2** — `plan_order_family` migration (5 emit archetypes + 5 rows) — deferred follow-up; foundation (aliases / predicates / c_chain) shipped in B1
+- [ ] **PR C** — SourceAdapter + decs mirrors (plan_decs_reverse / _distinct / _order_family / _unroll)
+- [ ] **PR D** — Group-by + special cases (plan_group_by family, plan_zip, plan_decs_join, reducer-spec data table)
+
+## Goal
+
+Split `_fold` splice machinery into two layers:
+
+- **Pattern recognition** — declared via a data table (`splice_patterns`). Each row names a chain shape, a list of requires-predicates, and a target emit fn.
+- **Code generation** — reusable emit archetypes parameterized by a `SourceAdapter` (array / decs / decs-find, plus future zip / decs-join variants).
+
+Today's 13 `plan_*` functions (~3300 LOC across the file) re-implement the same boilerplate: `flatten_linq → declare 8-30 tracking flags → giant for-loop with if/elif on op name → ad-hoc co-occurrence guards → giant final bail → emit dispatch`. 70-80% of plan_* code is recognition state + co-occurrence checking, not codegen. Adding a splice arm means patching 1-3 plan_* cores at 5-8 sites each, hunting through if/elif walls.
+
+End state: adding an arm = adding a row to `splice_patterns`. Coverage gaps surface as missing rows, not as negative bails buried at the bottom of a function.
+
+Estimated savings: **~-1750 LOC** across the 4 PRs (~-25% of the file).
+
+## Today's situation (input to the refactor)
+
+13 `plan_*` functions cover all splice cases. From the census:
+
+| Plan | LOC | Anchor | Emit shapes |
+|---|---|---|---|
+| plan_order_family | 543 | `order*` | 11 (3 archetypes × variants) |
+| plan_loop_or_count | 208 | terminator dispatch | 6 (already lane-factored) |
+| plan_reverse | 284 | `reverse` | 5 |
+| plan_distinct | 223 | `distinct*` | 1 archetype × 7 returns |
+| plan_group_by_core | 364 | (pre-stripped contract) | 8 + 12-arm reducer table |
+| plan_group_by | 62 | `group_by_lazy` | delegate |
+| plan_decs_unroll | 61 | decs + terminator | 7 (already dispatcher) |
+| plan_decs_group_by | 103 | `group_by_lazy` + decs | delegate |
+| plan_decs_order_family | 384 | `order*` + decs | ~12 (near-mirror of array sibling) |
+| plan_decs_reverse | 288 | `reverse` + decs | 4 (near-mirror) |
+| plan_decs_distinct | 254 | `distinct*` + decs | 1 × 4 × 2 (near-mirror) |
+| plan_decs_join | 189 | `join` + 2 decs bridges | 1 × 4 (no array sibling) |
+| plan_zip | 352 | `zip` | 8 |
+
+Decs-side plans are near-mirrors of their array-side siblings modulo source-loop wrap. `GroupBySourceAdapter` (existing) is the proof-case: `plan_group_by_core` is fully source-agnostic, with the array-side and decs-side wrappers each ~60-100 LOC.
+
+## Grammar kernel (lives inline at top of linq_fold.das)
+
+Visibility follows the rule in **Tests / exports philosophy** below: default `private`, public only what walker tests must name (`Slot` / `SlotMatcher` / `SlotCardinality` + their `m_*`/`c_*` constructors, `SplicePattern`, `Captures` / `EmitCtx` / `EmitFn` typedefs, `chain_prefix_of`, `check_pattern_table_reachable`, `alias_table`). The snippet below omits the `private` keyword for readability; the implementation in `linq_fold.das` carries it on everything not in that public list.
+
+```das
+variant SourceAdapter {
+    Array        : tuple<Expression?; string>   // (top, srcName) — PR A scope
+    // PR C widens: Decs(DecsBridgeShape), DecsFind(DecsBridgeShape)
+    // PR D widens: Zip(...), DecsJoin(...)
+}
+
+variant SlotMatcher {
+    literal      : string                       // exact name match
+    one_of       : array<string>                // any-of name set
+    alias        : string                       // named group looked up in alias_table
+}
+
+variant SlotCardinality {
+    one          : void?                        // required, exactly 1
+    optional     : void?                        // 0 or 1
+    chain        : void?                        // 0 or more (greedy); captures as array<ExprCall?> via Captures.many — PR B1
+}
+
+struct Slot {
+    matcher      : SlotMatcher
+    cardinality  : SlotCardinality
+    capture_name : string = ""                  // "" = don't capture
+    arity        : int = -1                     // -1 = any; positive = require N args
+}
+
+// PR B1 — Captures is a wrapper struct: `single` for c_one/c_opt slots, `many` for c_chain slots.
+struct Captures {
+    single : table<string; ExprCall?>
+    many   : table<string; array<ExprCall?>>
+}
+
+variant MatchResult {
+    no_match : void?                            // daslang-idiomatic Option<Captures>
+    matched  : Captures
+}
+
+typedef RequiresPredicate = function<(var c : Captures; var top : Expression?) : bool>
+
+// Fold-time context passed to every emit archetype. Carries the peeled source expression, source adapter,
+// and the outer `_fold(...)` expression's iterator-ness (drives `buffer_return` wrap).
+struct EmitCtx {
+    top              : Expression?              // peel_each'd; stubs pre-clone per invoke
+    src              : SourceAdapter
+    expr_is_iterator : bool
+}
+
+typedef EmitFn = function<(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression?>
+
+struct SplicePattern {
+    name     : string                           // for debug / lint diagnostics
+    chain    : array<Slot>
+    requires : array<RequiresPredicate>         // all must hold
+    emit     : EmitFn
+}
+
+var plan_reverse_patterns : array<SplicePattern>
+var plan_distinct_patterns : array<SplicePattern>
+var plan_loop_or_count_patterns : array<SplicePattern>   // PR B1
+var splice_patterns : array<SplicePattern>     // PR D: collapsed from per-plan tables; first match wins
+```
+
+Predicates and emit archetypes are NAMED module-level `def` functions wrapped at use sites with `@@<RequiresPredicate>` / `@@<EmitFn>` (anonymous `@@(...)` lambdas produce `_localfunction_*` symbols that the LLVM JIT pass can't resolve — named functions take a stable address).
+
+### Walker contract
+
+```das
+def match_pattern(p : SplicePattern;
+                  var calls : array<tuple<ExprCall?; LinqCall?>>;
+                  var top : Expression?) : MatchResult
+```
+
+Walks `calls` left-to-right. For each slot:
+
+- `one` — current call must match (name + arity if specified); both cursors advance.
+- `optional` — if current call matches, both cursors advance; otherwise the slot is skipped without consuming.
+- `chain` (PR B1) — greedy match-while-in-set. Captured as `array<ExprCall?>` into `captures.many[capture_name]`. Always succeeds (0+); empty match still creates the `many` entry so emit fns can rely on `c.many |> key_exists("…")`. Pairs with `m_one_of` via the `slot_chain_of(names, cap)` convenience constructor.
+
+After all slots, no unconsumed calls remain. If any of the above fails → `MatchResult(no_match = null)`.
+
+Then each `RequiresPredicate` in `p.requires` is evaluated against the populated `Captures` and the peeled `top`. All must return true. If any fails → `MatchResult(no_match = null)`.
+
+Returns `MatchResult(matched <- captures)` on full success (move semantics — `Captures` is a table). Caller binds `var r <- match_pattern(...)` and reads via `if (r is matched) { let c & = r as matched; … }`.
+
+### Alias table (named op-name groups)
+
+The snippet below is the projected end-state at PR D. The authoritative live list is the `alias_table` literal in [daslib/linq_fold.das](linq_fold.das). Status reflects what's populated through PR B1.
+
+```das
+// projected end-state at PR D
+var alias_table : table<string; array<string>> <- {
+    "order_family"               => ["order", "order_descending", "order_by", "order_by_descending"],  // PR B1 ✓
+    "distinct_family"            => ["distinct", "distinct_by"],                                       // PR A ✓
+    "first_family"               => ["first", "first_or_default"],                                     // PR A ✓
+    "count_family"               => ["count", "long_count"],                                           // PR A ✓
+    "accum_family"               => ["sum", "min", "max", "average", "aggregate",                      // PR B1 ✓
+                                     "min_by", "max_by", "min_max", "min_max_by",
+                                     "min_max_average", "min_max_average_by", "long_count"],
+    "early_exit_family"          => ["any", "all", "contains", "first", "first_or_default"],           // PR B1 ✓
+    "range_op_family"            => ["skip", "skip_while", "take_while", "take"],                      // PR B1 ✓
+    "loop_terminator_family"     => union of count + accum + early_exit + last/single/element_at,      // PR B1 ✓ (loop_or_count terminator slot)
+    "distinct_terminator_family" => ["count", "long_count", "sum"]                                     // PR A ✓ — narrow to terminators emit_hashtable_dedup actually handles
+}
+```
+
+### Predicate library
+
+Module-level named `RequiresPredicate` constants for reuse across patterns. As with `alias_table`, this table shows the projected end-state — see [daslib/linq_fold.das](linq_fold.das) for what's actually defined today (PR A: `array_source`, `take_arg_is_int`, `no_terminator`).
+
+| Name | Status | Meaning |
+|---|---|---|
+| `array_source` | PR A ✓ | `top._type.isGoodArrayType \|\| top._type.isArray` (after `peel_each`) |
+| `array_random_access` | planned | `array_source && top._type.isGoodArrayType` |
+| `decs_source` | planned | `extract_decs_bridge(top) != null` |
+| `inline_cmp_available` | PR B1 ✓ | `try_make_inline_cmp` succeeds on `c.single["order"]` (only `order_by[_descending]` with inline-splice-able key). Hard-wired to `"order"` capture key; promote to factory on second use |
+| `has_where_or_distinct` | PR B1 ✓ | `c.single \|> key_exists("where") \|\| c.single \|> key_exists("distinct")`. For `order_fused_prefilter` row to distinguish from bare `buffer_helper_dispatch` |
+| `take_arg_is_int` | PR A ✓ | captured `take`'s 2nd arg `_type.baseType == Type.tInt`; vacuous if no `take` slot |
+| `arity_eq(cap, n)` / `arity_ge(cap, n)` | planned (factory) | structural checks on captured calls |
+| `no_terminator` | PR A ✓ | no terminator captured (final optional slot empty); return shape decided by `ctx.expr_is_iterator` in the emit fn |
+| `is_primitive_key(cap, argIdx)` | planned (factory) | `is_primitive_join_key_type` on captured arg |
+
+PR A's `take_arg_is_int` is hard-wired to the `"take"` capture key (not a factory) because every PR A consumer uses that name. Promote to a `make_arity_eq(cap, n)` / `make_arg_type_is(cap, idx, type)` factory shape on second use, per the masterplan rule of thumb.
+
+Inline closures (`@@(c, top) => …`) acceptable for one-off pattern-specific checks. **Rule of thumb:** promote to named predicate (or factory) on second use.
+
+## Migration phases
+
+| PR | Phase | Scope | Status |
+|---|---|---|---|
+| **A** | 0 — Foundation | Kernel types + walker + alias_table + predicate library + per-archetype unit tests. `splice_patterns` empty initially (safe state — all cascades unchanged). | complete |
+| **A** | 1 — First migrations | `plan_reverse` (5 rows: Ra/Rb/R6/R-2a/R1-R4), `plan_distinct` (2 rows + return-shape switch in emit). Archetypes: `emit_counter_array`, `emit_walk_overwrite_scalar`, `emit_backward_walk`, `emit_buffer_reverse_inplace`, `emit_hashtable_dedup`. **Hard-delete imperative bodies.** | complete |
+| **B1** | 2a — Array core (`plan_loop_or_count`) | `c_chain` cardinality + `Captures` wrapper struct (`single` / `many`) + `slot_chain_of(names, cap)` constructor. `collapse_chained_wheres` pre-pass (KR-1 fix). `plan_loop_or_count` migration (1 row + lane dispatch — preserves existing factoring; head c_chain matches `["where_", "select"]` greedy). | complete |
+| **B2** | 2b — Array core (`plan_order_family`) | `plan_order_family` (5 rows: streaming-min / bounded-heap / fused-prefilter / buffer-helper-dispatch / order_then_plain_distinct). Archetypes: `emit_streaming_min`, `emit_bounded_heap`, `emit_fused_prefilter`, `emit_buffer_helper_dispatch`, shared `emit_terminal_select_project`. **Hard-delete imperative body.** | not started |
+| **C** | 3 — SourceAdapter + decs mirrors | Widen `SourceAdapter` to multi-variant + methods. Migrate `plan_decs_reverse / _distinct / _order_family / _unroll` — **reuse array-side rows + emit fns** modulo adapter swap. **Hard-delete decs imperative bodies.** | not started |
+| **D** | 4 — Group-by + special cases | Reconcile `GroupBySourceAdapter` with `SourceAdapter`. `plan_group_by` + `plan_decs_group_by` → thin pattern rows delegating to existing `plan_group_by_core` (which stays as a sub-codegen). `plan_zip` (1-2 rows, possibly `SourceAdapter::Zip`). `plan_decs_join` (1 row, `SourceAdapter::DecsJoin` or special-case emit). Migrate `emit_reducer_branches` 12-arm if/elif into a `ReducerSpec` data table. | not started |
+
+**Net at end: ~-1750 LOC.**
+
+## Migration mechanics
+
+During migration, today's planner cascade at lines 6302-6337 stays unchanged. Each migrated `plan_X` function body becomes a 3-line stub:
+
+```das
+def private plan_reverse(var expr : Expression?) : Expression? {
+    for (p in plan_reverse_patterns) {  // subset filtered by owner_plan_id
+        let captures = match_pattern(p, calls, top)
+        if (captures != null) return p.emit(captures, top, source_adapter, at)
+    }
+    return null
+}
+```
+
+After PR D, collapse all stubs + cascade into one flat walk over `splice_patterns` (no `owner_plan_id` filtering needed once all are rows).
+
+## What we KEEP from today's code
+
+All shared helpers stay as building blocks for emit archetypes:
+
+- `flatten_linq`, `peel_each`, `extract_decs_bridge`
+- `normalize_order_reverse`, `collapse_chained_selects`
+- `peel_lambda_rename_var`, `peel_lambda_replace_var`, `peel_lambda_rename_2vars`
+- `merge_where_cond`, `wrap_with_condition`, `wrap_with_ranges`
+- `try_make_inline_cmp`, `make_inline_less_call`
+- `buffer_return`, `finalize_emission_stmts`, `finalize_decs_emission`
+- `qn`, `clone_expression`, `clone_type`, `strip_const_ref`
+- `has_sideeffects`, `type_has_length`, `classify_terminator`
+- `plan_group_by_core` (stays as sub-codegen; only its wrapper plans migrate)
+- `GroupBySourceAdapter` (PR D folds it into the new `SourceAdapter`)
+
+## Tests / exports philosophy
+
+Default private; promote ONLY what a synthetic-input test must name. PR A's actual public surface is narrow:
+
+- Slot construction: `Slot`, `SlotMatcher`, `SlotCardinality`, `m_literal`, `m_alias`, `c_one`, `c_opt`, `c_chain`, `slot_chain_of`
+- Pattern row: `SplicePattern`
+- Struct/typedefs used in test fn signatures: `Captures` (struct), `EmitCtx`, `EmitFn`
+- Lint helpers tests assert on: `chain_prefix_of`, `check_pattern_table_reachable`
+- `alias_table` (so tests can read which aliases populated)
+
+Everything else stays private: the walker (`match_pattern`), the per-plan tables, the predicate library, all `emit_*` archetypes, all populate fns, `SourceAdapter` / `MatchResult` / `RequiresPredicate` / `LinqCall`. They're only called inside this module — bare names compose without cross-module visibility.
+
+Per-archetype unit testing via direct calls is impractical anyway: emit fns are `[macro_function]` whose bodies contain `quote()` nodes the runtime can't lower (LLVM JIT bail). End-to-end behavioral tests carry the per-archetype coverage in `tests/linq/test_linq_fold_*` (each user chain exercises one or more archetypes through the splice).
+
+## Naming (decided)
+
+| Name | Role |
+|---|---|
+| `splice_patterns` | The master table — `array<SplicePattern>` |
+| `SplicePattern` | Per-row struct |
+| `Slot` | Chain slot |
+| `SlotMatcher`, `SlotCardinality` | Variant types |
+| `Captures` | Struct `{ single : table<string;ExprCall?>; many : table<string;array<ExprCall?>> }`. `single` for c_one/c_opt slots, `many` for c_chain slots. The `LinqCall` record is accessible separately via `linqCalls[call_norm_name(c)]` |
+| `MatchResult` | Variant `no_match : void? \| matched : Captures` — walker return type |
+| `c_chain` / `slot_chain_of(names, cap)` | Greedy run cardinality + (matcher = m_one_of(names), cardinality = c_chain()) convenience constructor — PR B1 |
+| `RequiresPredicate`, `EmitFn` | Function-typedef types — see kernel snippet for current signatures |
+| `EmitCtx` | Struct `{ top; src; expr_is_iterator }` passed to every emit archetype |
+| `SourceAdapter` | Source-loop abstraction variant |
+| `alias_table` | Named op-name groups |
+| `match_pattern(...)` | Walker function |
+| `plan_<X>_patterns` | Per-plan filtered subset (only during migration; deleted in PR D) |
+
+## PR B1 (shipped) + PR B2 (planned) sketch
+
+### PR B1 — shipped
+
+**Branch:** `bbatkin/linq-fold-pattern-table-prb`
+
+**Scope (delivered):** KR-1 closure (`collapse_chained_wheres`) + `c_chain` cardinality + `Captures` wrapper struct (`single` / `many`) + `slot_chain_of(names, cap)` constructor + `plan_loop_or_count` migration (1 row, replaces 210 LOC imperative). PR A's 6 emit fns + 5 predicates mechanically migrated to `c.single[…]` (~47 sites).
+
+### PR B2 — planned
+
+**Scope:** `plan_order_family` migration (5 rows). All foundation (aliases / predicates / `c_chain`) shipped in B1; B2 is row + emit-archetype work only.
+
+### Pre-pass (PR B1 ✓)
+
+- `collapse_chained_wheres(calls)` — mirrors `collapse_chained_selects` modulo composition: clone inner where lambda, rename param to fresh name, build composed body via `merge_where_cond(innerBodyFresh, outerBodyFresh)` (which is `inner && outer`), rewire chain backlink, erase inner from `calls`. **No `has_sideeffects` bail** — composition uses cloned ASTs and AND-merge preserves left-to-right evaluation order with short-circuit semantics identical to the imperative cascade. Called from `plan_reverse`, `plan_distinct`, `plan_loop_or_count` stubs. **KR-1 fix; load-bearing for plan_loop_or_count row.**
+
+### Pattern row shipped (PR B1)
+
+**`plan_loop_or_count`** — 1 row using the new `c_chain` cardinality for the head:
+
+```das
+SplicePattern(
+    name = "loop_or_count_general",
+    chain = [
+        slot_chain_of(["where_", "select"], "head"),                   // c_chain — 0+ contiguous where/select
+        Slot(m_literal("skip"),       c_opt(), "skip"),
+        Slot(m_literal("skip_while"), c_opt(), "skip_while"),
+        Slot(m_literal("take_while"), c_opt(), "take_while"),
+        Slot(m_literal("take"),       c_opt(), "take"),
+        Slot(m_literal("where_"),     c_opt(), "post_take_where"),     // Theme 2 5c
+        Slot(m_alias("loop_terminator_family"), c_opt(), "term")
+    ],
+    requires = [],   // intrinsic — chain shape carries the constraints
+    emit = @@<EmitFn> emit_loop_or_count_lane)
+```
+
+`emit_loop_or_count_lane` walks `c.many["head"]` left-to-right applying the same where_/select arms (AND-merge, chained-select rebinding, where-after-select projection-replace) the imperative loop did. Range ops + post-take-where + terminator come from `c.single[…]`. Pre-dispatch fast paths: `emit_length_shortcut`, `emit_any_empty_shortcut`. Lane dispatch: `classify_terminator(call_norm_name(c.single["term"]))` → `emit_counter_lane` / `emit_array_lane` / `emit_accumulator_lane` / `emit_early_exit_lane`. `emit_array_lane` refactored to take `isIter : bool` directly (was `expr : Expression?` just to read `.isIterator`) so the new emit fn can pass `ctx.expr_is_iterator` cleanly.
+
+### Predicates added (PR B1 ✓)
+
+- `inline_cmp_available(c, top)` — `try_make_inline_cmp(c.single["order"].arguments[1], …)`. For PR B2's `order_streaming_min` + `order_bounded_heap` rows.
+- `has_where_or_distinct(c, top)` — `c.single |> key_exists("where") || c.single |> key_exists("distinct")`. For PR B2's `order_fused_prefilter` row.
+
+### PR B2 — planned rows
+
+**`plan_order_family`** — 5 rows, priority order 1 → 5:
+
+```das
+// Row 1 — streaming_min: inline-cmp + first[_or_default]
+SplicePattern(
+    name = "order_streaming_min",
+    chain = [
+        Slot(m_literal("where_"),       c_opt(), "where"),
+        Slot(m_alias("distinct_family"), c_opt(), "distinct"),
+        Slot(m_alias("order_family"),    c_one(), "order"),
+        Slot(m_alias("first_family"),    c_one(), "term"),
+        Slot(m_literal("select"),        c_opt(), "termsel"),
+    ],
+    requires = [@@<RequiresPredicate> inline_cmp_available],
+    emit = @@<EmitFn> emit_streaming_min)
+
+// Row 2 — bounded_heap: inline-cmp + take(N)
+SplicePattern(
+    name = "order_bounded_heap",
+    chain = [
+        Slot(m_literal("where_"),       c_opt(), "where"),
+        Slot(m_alias("distinct_family"), c_opt(), "distinct"),
+        Slot(m_alias("order_family"),    c_one(), "order"),
+        Slot(m_literal("take"),          c_one(), "take"),
+        Slot(m_literal("select"),        c_opt(), "termsel"),
+    ],
+    requires = [@@<RequiresPredicate> inline_cmp_available, @@<RequiresPredicate> take_arg_is_int],
+    emit = @@<EmitFn> emit_bounded_heap)
+
+// Row 3 — fused_prefilter: where or distinct present, no inline-cmp shortcut
+SplicePattern(
+    name = "order_fused_prefilter",
+    chain = [
+        Slot(m_literal("where_"),       c_opt(), "where"),
+        Slot(m_alias("distinct_family"), c_opt(), "distinct"),
+        Slot(m_alias("order_family"),    c_one(), "order"),
+        Slot(m_literal("take"),          c_opt(), "take"),
+        Slot(m_alias("first_family"),    c_opt(), "term"),
+        Slot(m_literal("select"),        c_opt(), "termsel"),
+    ],
+    requires = [@@<RequiresPredicate> has_where_or_distinct, @@<RequiresPredicate> take_arg_is_int],
+    emit = @@<EmitFn> emit_fused_prefilter)
+
+// Row 4 — buffer_helper_dispatch: bare order, direct call to daslib helpers
+SplicePattern(
+    name = "order_buffer_helper_dispatch",
+    chain = [
+        Slot(m_alias("order_family"), c_one(), "order"),
+        Slot(m_literal("take"),       c_opt(), "take"),
+        Slot(m_alias("first_family"), c_opt(), "term"),
+    ],
+    requires = [@@<RequiresPredicate> take_arg_is_int],
+    emit = @@<EmitFn> emit_buffer_helper_dispatch)
+
+// Row 5 — order_then_plain_distinct: `order + distinct (plain)` accepted by master imperative
+// (whole-tuple equality is position-invariant). distinct_by AFTER order_by would NOT be safe
+// (distinct_by picks an arbitrary K1 representative regardless of sort order). distinct is
+// literal "distinct" only (no alias) to forbid distinct_by here.
+SplicePattern(
+    name = "order_then_plain_distinct",
+    chain = [
+        Slot(m_alias("order_family"), c_one(), "order"),
+        Slot(m_literal("distinct"),   c_one(), "distinct_after"),
+        Slot(m_literal("take"),       c_opt(), "take"),
+        Slot(m_alias("first_family"), c_opt(), "term"),
+        Slot(m_literal("select"),     c_opt(), "termsel"),
+    ],
+    requires = [@@<RequiresPredicate> take_arg_is_int],
+    emit = @@<EmitFn> emit_fused_prefilter)   // reuses emit_fused_prefilter with distinct_after capture
+```
+
+### Emit archetypes
+
+| Name | Source lines | LOC | Notes |
+|---|---|---|---|
+| `emit_streaming_min` | 1662-1727 | ~65 | Single-best state, per-element less-test |
+| `emit_bounded_heap` | 1729-1855 | ~125 | Size-N heap during walk; distinct gate variant (Theme 3 Phase 3); terminal `_select` variant |
+| `emit_fused_prefilter` | 1928-2107 | ~180 | Walk into buffer with where/distinct gate; sort/min/top_n on buffer; terminal `_select` variant; internal dispatch on take/first/bare |
+| `emit_buffer_helper_dispatch` | 1857-1927 | ~70 | Direct call to `order` / `top_n*` / `min_max` helpers; 4 sub-paths |
+| `emit_loop_or_count_lane` | 2243-2317 + recognition state | ~150 | Single row with internal `classify_terminator` dispatch into 4 existing lane emit fns |
+| `emit_terminal_select_project` | NEW shared helper | ~30 | Used by `emit_bounded_heap` + `emit_fused_prefilter` for `outBuf` projection-from-`buf` |
+
+### Co-occurrence audit (to verify during implementation)
+
+The imperative code has a few subtle co-occurrence rules that may not map cleanly onto the pattern table:
+
+- **`order + distinct (plain)`**: imperative `plan_order_family` accepts `distinct` (not `distinct_by`) AFTER `order_by` because whole-tuple equality is position-invariant. **Decision (2026-05-26)**: add row 5 `order_then_plain_distinct` so PR B has byte-equivalent splice coverage to master. The row's `distinct_after` slot is `m_literal("distinct")` (not the alias), structurally forbidding `distinct_by`. Emit reuses `emit_fused_prefilter` with the new capture name.
+- **`select` mid-chain in plan_loop_or_count**: chained selects need `intermediateBinds` for side-effect ordering. Already handled by emit-fn-internal recognition; pattern row captures via single `select` slot post-collapse.
+- **`where` after `select` in plan_loop_or_count**: imperative does `peel_lambda_replace_var(predicate, projection)` to rebind the where pred to the projection result. Critical correctness — emit fn must replicate (the recognition state in `emit_loop_or_count_lane` covers this).
+
+### LOC budget
+
+| Component | Delta |
+|---|---|
+| New: `collapse_chained_wheres` | +30 |
+| New: 5 emit archetypes (lifted from imperative) | +610 |
+| New: shared `emit_terminal_select_project` | +30 |
+| New: 5 pattern rows | +60 |
+| New: 2 populate `[_macro]` fns | +30 |
+| New: 2 predicates + 5 aliases | +25 |
+| Delete: imperative `plan_loop_or_count` body | -208 |
+| Delete: imperative `plan_order_family` body | -543 |
+| New: stubs + KR-1 wiring | +30 |
+| New: tests (`collapse_chained_wheres` + per-archetype + regression) | +200 |
+| **Net** | **~+264 LOC** (refactor, code redistributed; tests dominate) |
+
+### Test plan (additions to existing per-archetype + walker integrity)
+
+- `test_linq_fold_collapse_chained_wheres.das` — N=2, N=3 chains; side-effect bail; on plan_reverse + plan_distinct + plan_loop_or_count + plan_order_family
+- `test_linq_fold_order_streaming_min.das` — inline-cmp + first / first_or_default; with/without where; with/without distinct; with terminal `_select`
+- `test_linq_fold_order_bounded_heap.das` — inline-cmp + take(N); distinct gate; terminal `_select`
+- `test_linq_fold_order_fused_prefilter.das` — where + order + take/first; distinct + order + take/first; terminal `_select`
+- `test_linq_fold_order_buffer_helper.das` — bare order; order + take; order + first
+- `test_linq_fold_loop_or_count_terminators.das` — all 4 lanes (counter / accumulator / early_exit / array); fast paths; range ops; chained wheres post-collapse
+
+## Known regressions to address in follow-ups
+
+| # | Surface | Symptom | Severity | Owner PR |
+|---|---|---|---|---|
+| KR-1 | `plan_reverse` + `plan_distinct` pattern rows allow a single optional `where_` slot; pre-PR-A imperative `plan_*` accepted N consecutive `where_` calls and `&&`-merged via `merge_where_cond`. | `..._where(p1)._where(p2).reverse()...` and `..._where(p1)._where(p2)._distinct()...` no longer spliced; fell back to cascade. | medium | **CLOSED in PR B1** — `collapse_chained_wheres` pre-pass mirroring `collapse_chained_selects` (~50 LOC + 18 sub-runs). Called from `plan_reverse` / `plan_distinct` / `plan_loop_or_count` stubs; will be called from `plan_order_family` in PR B2. |
+
+## Risks
+
+1. **Pattern ordering hazard.** Pattern A's chain being a strict prefix of B's means A wins. Discipline: more-specific patterns declared first; add a lint pass that walks the table at module-init time and flags prefix conflicts.
+2. **Hidden cross-plan helpers.** Some emit branches consume helpers used by other plans. These stay as-is — emit archetypes call them like the imperative code did.
+3. **Mid-flight `SourceAdapter` redesign.** PR C is the highest-uncertainty phase. If `wrap_per_element` doesn't generalize cleanly to `DecsFind`, we either widen the interface or special-case. **Mid-flight redesign approved.**
+4. **Bench refresh per [feedback-living-results-md]:** each PR re-runs INTERP+JIT for any bench shape it touches and refreshes `results.md`. Goal is **byte-identical or strictly faster** at each phase (refactor, not perf change).
+5. **RST refresh per [feedback-living-linq-fold-patterns-rst]:** each promoted arm's row in `doc/source/reference/linq_fold_patterns.rst` gets touched — phrasing changes from "plan_X handles …" to "pattern `<name>` (archetype `<emit_fn>`) handles …".
+
+## Decision log
+
+- **2026-05-25** — Hybrid (declared patterns table + reusable archetypes) over pure EDSL or pure data-table.
+- **2026-05-25** — Inline kernel in `linq_fold.das` (not a separate module file).
+- **2026-05-25** — Bundle PR A foundation with first migrations (`plan_reverse`, `plan_distinct`) — foundation-only PR is grounded in nothing.
+- **2026-05-25** — Stop and align between phases; mid-flight redesign approved.
+- **2026-05-25** — Hard-cutover (no legacy imperative alongside the table).
+- **2026-05-25** — `Captures` as `table<string; ExprCall?>`; emit fns reach the `LinqCall` registry record on demand via `linqCalls[call_norm_name(c)]` (avoids carrying the per-call pair in every capture entry). Initial sketch carried `tuple<ExprCall?; LinqCall?>` but it bought nothing — emit fns mostly read terminator/call arg shape from the `ExprCall`, and `call_norm_name` is the canonical way to derive the registry key anyway.
+- **2026-05-25** — `SourceAdapter` stub (Array-only) in PR A; widened in PR C.
+- **2026-05-25** — Per-plan stubs during migration; flat walk after PR D.
+- **2026-05-25** — `arity` lives on `Slot` (structural check), not requires-predicate.
+- **2026-05-25** — Named predicates over inline closures by default; inline acceptable for one-off, promote on second use.
+- **2026-05-25 (PR A impl)** — `let` is const, `var` is non-const. Walker stub binds `var result = invoke(p.emit, …)` to receive non-const Expression?. Lint LINT005 misreports the required reinterpret as redundant — known asymmetry; tracked via this entry instead of suppressing in code.
+- **2026-05-25 (PR A impl)** — `ExprCall.name` is the mangled generic-instance name (e.g. `__::linq\`distinct_by\`<hash>`); the user-facing name lives at the root of the `func.fromGeneric` chain. Helper `call_norm_name(ExprCall?)` walks the chain and normalizes through `linqCalls`.
+- **2026-05-25 (PR A impl)** — Variant construction in gen2 uses the named-field constructor form (`SlotMatcher(literal = "x")`, `MatchResult(matched <- captures)`), same syntax as struct init. Helpers `m_literal`/`m_alias`/`c_one`/`c_opt` keep pattern rows compact.
+- **2026-05-25 (PR A impl)** — Empty typed array literal: `array<T>()`. The bare `[]` lacks the element-type inference base.
+- **2026-05-25 (PR A impl)** — `array_source` predicate gates only patterns that need indexed access (R-2a backward-walk, R6 backward-index). Patterns using `for (it in src)` body (Ra / Rb / R1-R4 / distinct_*) work on iterator sources too and have no source-shape gate.
+- **2026-05-25 (PR A impl)** — `take_arg_is_int` predicate is vacuously true when no `take` capture is present (so it's safe on patterns with optional take). Same pattern applies for any future capture-conditional predicate.
+- **2026-05-25 (PR A impl)** — PR A is a pure refactor (no arm add/extend/tighten). Per `[[feedback-living-linq-fold-patterns-rst]]` and `[[feedback-living-results-md]]`, RST and bench refresh are skipped — both are arm-shape-tracking docs, not implementation-tracking docs.
+- **2026-05-26 (PR A R3)** — Intentional extension over master in `plan_reverse`: chains with BOTH a pre-reverse `_select(f)` AND a post-reverse `_select(g)` now splice (R1-R4 + Rb patterns) where master's imperative code had a `!seenSelect` guard that bailed to cascade. The two selects compose cleanly — pre-projection feeds `pushExpr`, post-projection projects the reversed survivors at return. Strictly faster, semantics preserved. Covered by `test_reverse_pre_and_post_select_array` / `_first` in `test_linq_fold_terminal_select.das`.
+- **2026-05-26 (PR B1)** — Split PR B into B1 (KR-1 + `c_chain` + `plan_loop_or_count`) and B2 (`plan_order_family`). The c_chain cardinality is a kernel extension that's load-bearing for plan_loop_or_count's variable-shape head (`[where_*][select*]` interleaved); without it the row would explode into N positional optional slots and still not cover everything. Bundling kernel + first user of kernel in one PR keeps the kernel grounded.
+- **2026-05-26 (PR B1)** — `Captures` migrated from `typedef Captures = table<string; ExprCall?>` to `struct Captures { single : table<string;ExprCall?>; many : table<string;array<ExprCall?>> }`. Alternatives considered: (a) overload one table with sentinel encoding — ugly and type-unsafe; (b) store all captures as `array<ExprCall?>` and index `[0]` for c_one/c_opt — fixed-shape callsites pay an awkward bracket tax. The split struct is mechanical for emit fns (`c["x"]` → `c.single["x"]`, ~47 sites swept) and leaves room for future cardinality types (`c_repeat_n`, etc.) to land in their own table.
+- **2026-05-26 (PR B1)** — `c_chain` walker rule: empty match still creates an entry in `captures.many[name]` (empty array). Emit fns can rely on `c.many |> key_exists("…")` instead of branching on the array's length being > 0. Mirrors how `c_opt` slots that miss still leave `c.single |> key_exists` returning false — predictable existence semantics for emit-fn reads.
+- **2026-05-26 (PR B1)** — `slot_chain_of(names, cap)` convenience constructor takes `var names : array<string>` and moves it into the SlotMatcher via `<-`. `array<string>` is non-copyable; pass-by-value-and-copy would require an explicit clone. Move-consume is the more honest signature.
+- **2026-05-26 (PR B1)** — `collapse_chained_wheres` does NOT gate on `has_sideeffects` (whereas `collapse_chained_selects` does for one specific case). Reason: AND-composing two `where_` predicates preserves left-to-right short-circuit semantics — `inner(x) && outer(x)` evaluates `inner` first and short-circuits, identical to the imperative `if(inner) { if(outer) { … } }` cascade. Side effects in `inner` always fire (per element); side effects in `outer` fire only when `inner` returns true. Cascade and composition match exactly.
+- **2026-05-26 (PR B1)** — `loop_terminator_family` alias must include ALL terminators `classify_terminator` returns non-UNKNOWN for. First B1 cut missed `last`/`single`/`element_at` × `_or_default` (6 EARLY_EXIT terminators); matrix run caught it via `test_linq_fold_ast` "expected 1 for-loop, got 0" failures (terminator wasn't matching the alias → planner cascaded to tier-2 imperative which emits 2 loops). Single-line fix: extend the alias. Lesson: any new alias for a c_opt terminator slot needs an audit against `classify_terminator`'s domain.
+- **2026-05-26 (PR B1)** — `emit_array_lane` signature refactored: `var expr : Expression?` → `isIter : bool`. The only thing the original `expr` parameter was used for was reading `expr._type.isIterator`. The new `EmitCtx.expr_is_iterator` already carries that bool, so the refactor flows cleanly. Single callsite update (imperative caller computed `expr._type != null && expr._type.isIterator` inline before the call).
+
+## Open questions
+
+- **Prefix-conflict lint pass** — in PR A scope or deferred? Lean PR A so it grows with the table.
+- **`plan_zip` / `plan_decs_join` SourceAdapter shape** — defer until PR D scoping. They feel special-case.
+- **Reducer-spec data table** — exact shape (miss/hit template per row) — design during PR D.
+- **`SourceAdapter` method surface** — `wrap_per_element(body, allows_early_exit)` is the minimal contract. Whether `finalize(stmts, retType)` belongs on the adapter or stays as a separate `finalize_*_emission` family — decide during PR C.
+
+## See also
+
+- `doc/source/reference/linq_fold_patterns.rst` — user-facing splice-pattern reference (refreshed per arm-touching PR).
+- `benchmarks/sql/linq_fold_chain_audit.md` — closed-out audit that drove Themes 1-8 (PRs #2851 / #2852 / #2857 / #2861 / #2862 / #2865 / #2866 / #2874 / #2875).
+- `benchmarks/sql/results.md` — INTERP+JIT matrix refreshed per splice-touching PR.
diff --git a/daslib/with_boost.das b/daslib/with_boost.das
new file mode 100644
index 0000000000..b8052de8cb
--- /dev/null
+++ b/daslib/with_boost.das
@@ -0,0 +1,221 @@
+options gen2
+options indenting = 4
+options no_unused_block_arguments = false
+options no_unused_function_arguments = false
+
+options strict_smart_pointers
+
+module with_boost shared private
+
+//! ``with_`` call macro: bind one or more array / table element references
+//! inside a block, with an automatic ``lock`` around the body so
+//! push/erase/resize/clear panic at runtime instead of silently dangling.
+//! The macro emits the full lock / invoke / unlock sequence inline — no
+//! helper functions — so any arity and any mix of array / table args works
+//! uniformly. See ``tutorials/macros/18_with_boost.das`` for examples.
+//!
+//! Restrictions:
+//!
+//! * Each container arg must be an ``ExprAt`` (``arr[i]`` / ``tab[k]``). Plain
+//!   locals, struct fields on locals, and function-call results are refused —
+//!   use built-in ``with`` for those.
+//! * At most one table-keyed arg per call (a second insert/erase would
+//!   rehash and invalidate the pinned entry).
+//! * Block bodies are void (no ``return`` value); ``with_`` is for in-place
+//!   mutation. To compute a value, write to a local:
+//!   ``var v : T; with_(arr[0]) { v = _.f }``.
+
+require daslib/ast
+require daslib/ast_boost
+require daslib/templates_boost
+require daslib/macro_boost
+
+// ──────────────────────────────────────────────────────────────────────────────
+// Call macro
+// ──────────────────────────────────────────────────────────────────────────────
+
+def private container_kind(atNode : ExprAt?) : string {
+    //! Returns "array", "table", or "" if neither.
+    let subT = atNode.subexpr._type
+    return "" if (subT == null)
+    return "array" if (subT.baseType == Type.tArray)
+    return "table" if (subT.baseType == Type.tTable)
+    return ""
+}
+
+def private is_lvalue_chain(e : ExpressionPtr) : bool {
+    //! True if e is reachable from an ExprVar root through ExprField
+    //! hops only — i.e. binding `var x & = e` is safe (after
+    //! `unsafe(...)`) because e refers to addressable storage outside
+    //! this expression AND the chain doesn't contain a nested container
+    //! that the macro can't lock. ExprAt / ExprSafeAt are deliberately
+    //! NOT followed: `outer[i].innerArr[j]` would lock only `innerArr`
+    //! while `outer` stayed mutable, leaving the inner ref vulnerable
+    //! to outer-array reallocation from inside the body.
+    return false if (e == null)
+    return true if (e is ExprVar)
+    return is_lvalue_chain((e as ExprField).value) if (e is ExprField)
+    return is_lvalue_chain((e as ExprSafeField).value) if (e is ExprSafeField)
+    return false
+}
+
+[call_macro(name="with_")]
+class private WithMacro : AstCallMacro {
+    //! Implements the ``with_`` call macro. Each container arg must be an
+    //! ``ExprAt`` (``arr[i]`` / ``tab[k]``); at most one table-keyed arg
+    //! per call. Emits a pre-bind / lock / invoke / unlock sequence inline
+    //! per container, with block-param types pinned to ``T &`` (element
+    //! ref) so workhorse and struct elements mutate uniformly.
+    def override visit(prog : ProgramPtr; mod : Module?; var call : ExprCallMacro?) : ExpressionPtr {
+        macro_verify(length(call.arguments) >= 2, prog, call.at,
+            "with_ needs at least one container arg plus a block")
+        let totalArgs = length(call.arguments)
+        let containerCount = totalArgs - 1
+        let blockArg = call.arguments[totalArgs - 1]
+        macro_verify(blockArg is ExprMakeBlock, prog, call.at,
+            "with_ last argument must be a block, got {describe(blockArg)}")
+        // Validate every container arg + count tables in one pass.
+        var tableCount = 0
+        for (i in range(containerCount)) {
+            let ai = call.arguments[i]
+            macro_verify(ai is ExprAt, prog, call.at,
+                "with_ arg #{i} must be `arr[i]` or `tab[k]` — use built-in `with` for locals; got {describe(ai)}")
+            let aiNode = ai as ExprAt
+            macro_verify(aiNode.subexpr._type != null, prog, call.at,
+                "with_ arg #{i} container type not inferred yet — got null _type on {describe(aiNode.subexpr)}")
+            let ki = container_kind(aiNode)
+            macro_verify(ki != "", prog, call.at,
+                "with_ arg #{i} container must be array or table, got {describe(aiNode.subexpr._type)}")
+            macro_verify(is_lvalue_chain(aiNode.subexpr), prog, call.at,
+                "with_ arg #{i} container must be a named variable (or field/index chain rooted in one); array literals and function-call results have temp lifetime and can't be safely ref-bound. Got {describe(aiNode.subexpr)}")
+            if (ki == "table") {
+                tableCount ++
+            }
+        }
+        macro_verify(tableCount <= 1, prog, call.at,
+            "with_ allows at most one table-keyed arg per call (got {tableCount}); a 2nd table lookup could rehash and invalidate the first pinned entry")
+        // Build the rewritten block — every block param emerges as
+        // parser-shaped (autoinfer, ref=false, removeConstant=true) so
+        // daslang infers each name as `TT&` from the actual invoke arg
+        // type (TT struct or workhorse).
+        let mblk = blockArg as ExprMakeBlock
+        let userBlock = mblk._block as ExprBlock
+        let userArgCount = length(userBlock.arguments)
+        var rewrittenBlock : ExprMakeBlock?
+        var rewrittenInner : ExprBlock?
+        // Per-param type: clone the container's element type (already a
+        // ref after typing of `arr[i]` / `tab[k]`), strip const, force ref.
+        // Inline ExprInvoke doesn't drive inference from arg → block-param,
+        // so the macro pins each param to the concrete element-ref type
+        // directly. Workhorse types pick up the explicit `&` here too.
+        if (userArgCount == 0) {
+            macro_verify(containerCount == 1, prog, call.at,
+                "with_ multi-arg form requires explicit block params: $(a, b, ...)")
+            var pT = clone_type(call.arguments[0]._type)
+            pT.flags.ref = true
+            pT.flags.constant = false
+            pT.flags.removeConstant = true
+            var injected = new Variable(at = call.at, name := "_", _type = pT)
+            var newBlock = new ExprBlock(at = userBlock.at,
+                returnType = new TypeDecl(at = userBlock.at, baseType = Type.tVoid),
+                blockFlags = userBlock.blockFlags | ExprBlockFlags.isClosure)
+            newBlock.arguments |> emplace_new(injected)
+            for (st in userBlock.list) {
+                newBlock.list |> emplace_new(clone_expression(st))
+            }
+            rewrittenBlock = new ExprMakeBlock(at = call.at, _block = newBlock)
+            rewrittenInner = newBlock
+        } else {
+            macro_verify(userArgCount == containerCount, prog, call.at,
+                "with_ block-param count must match container-arg count: {containerCount} containers, {userArgCount} block params")
+            rewrittenBlock = clone_expression(mblk) as ExprMakeBlock
+            rewrittenInner = rewrittenBlock._block as ExprBlock
+            for (i in range(containerCount)) {
+                var arg = rewrittenInner.arguments[i]
+                arg._type = clone_type(call.arguments[i]._type)
+                arg._type.flags.ref = true
+                arg._type.flags.constant = false
+                arg._type.flags.removeConstant = true
+            }
+        }
+        // ── Inline-emission ────────────────────────────────────────────
+        // For each container arg, pre-bind the subexpr to a local ref so
+        // every downstream lock / invoke / unlock references the SAME
+        // materialised value. Without this, splicing $e(subexpr) three
+        // times (lock + invoke + unlock) would re-evaluate the subexpr
+        // three times — fine for an ExprVar, fatal for a literal or call
+        // (three different temp arrays → unlock hits stale memory).
+        //
+        // Per-container emission:
+        //   var __c_<i> & = unsafe(<subexpr>)                    // always
+        //   __builtin_<kind>_lock_mutable(__c_<i>)               // owned/temp alike
+        //   for tables: var __tref_<i> & = unsafe(__c_<i>[<key>])
+        //   <invoke argref>: __c_<i>[<idx>]  or  __tref_<i>
+        //   __builtin_<kind>_unlock_mutable(__c_<i>)             // reversed at end
+        var preStmts : array<ExpressionPtr>
+        var lockStmts : array<ExpressionPtr>
+        var unlockStmts : array<ExpressionPtr>
+        var argRefs : array<ExpressionPtr>
+        preStmts |> reserve(containerCount * 2)
+        lockStmts |> reserve(containerCount)
+        unlockStmts |> reserve(containerCount)
+        argRefs |> reserve(containerCount)
+        for (i in range(containerCount)) {
+            let ai = call.arguments[i] as ExprAt
+            let ki = container_kind(ai)
+            let cName = "__with_c_{i}"
+            preStmts |> push <| qmacro_expr() {
+                var $i(cName) & = unsafe($e(ai.subexpr))
+            }
+            if (ki == "array") {
+                lockStmts |> push <| qmacro_expr() {
+                    __builtin_array_lock_mutable($i(cName))
+                }
+                unlockStmts |> push <| qmacro_expr() {
+                    __builtin_array_unlock_mutable($i(cName))
+                }
+                argRefs |> push <| qmacro($i(cName)[$e(ai.index)])
+            } else {  // table
+                let trefName = "__with_tref_{i}"
+                preStmts |> push <| qmacro_expr() {
+                    var $i(trefName) & = unsafe($i(cName)[$e(ai.index)])
+                }
+                lockStmts |> push <| qmacro_expr() {
+                    __builtin_table_lock_mutable($i(cName))
+                }
+                unlockStmts |> push <| qmacro_expr() {
+                    __builtin_table_unlock_mutable($i(cName))
+                }
+                argRefs |> push <| qmacro($i(trefName))
+            }
+        }
+        // Reverse unlock order so cleanup mirrors acquisition.
+        let unlockN = length(unlockStmts)
+        var unlocksReversed <- [for (i in range(unlockN)); unlockStmts[unlockN - 1 - i]]
+        // Build the invoke statement via ExprInvoke (the dedicated AST node
+        // for lambda/block calls — ExprCall("invoke") doesn't unify the
+        // block's auto-typed params against the actual refs).
+        var invokeCall = new ExprInvoke(at = call.at, name := "invoke")
+        invokeCall.arguments |> emplace_new(rewrittenBlock)
+        for (r in argRefs) {
+            invokeCall.arguments |> emplace_new(r)
+        }
+        // Assemble the block: pre-resolves, locks, invoke, then unlocks
+        // (reversed). No ``finally`` — daslang panic is fatal, not a C++
+        // exception; if the invoke panics the process is exiting anyway
+        // and skipped unlocks don't matter.
+        var blk = new ExprBlock(at = call.at,
+            returnType = new TypeDecl(at = call.at, baseType = Type.tVoid))
+        for (s in preStmts) {
+            blk.list |> emplace_new(s)
+        }
+        for (s in lockStmts) {
+            blk.list |> emplace_new(s)
+        }
+        blk.list |> emplace_new(invokeCall)
+        for (s in unlocksReversed) {
+            blk.list |> emplace_new(s)
+        }
+        return <- blk
+    }
+}
diff --git a/doc/reflections/das2rst.das b/doc/reflections/das2rst.das
index 5725f75d6c..2765dcda1a 100644
--- a/doc/reflections/das2rst.das
+++ b/doc/reflections/das2rst.das
@@ -26,6 +26,7 @@ require daslib/jobque_boost
 require daslib/apply_in_context
 require daslib/contracts
 require daslib/defer
+require daslib/with_boost
 require daslib/delegate
 require daslib/option
 require daslib/result
@@ -587,6 +588,14 @@ def document_module_defer(root : string) {
     document("defer and defer_delete macros", mod, "defer.rst", groups)
 }
 
+def document_module_with_boost(root : string) {
+    var mod = find_module("with_boost")
+    var groups <- array<DocGroup>(
+        group_by_regex("Locked element binding helpers", mod, %regex~(_with_locked_.*)$%%)
+    )
+    document("``with_`` macro: locked array/table element binding", mod, "with_boost.rst", groups)
+}
+
 def document_module_if_not_null(root : string) {
     var mod = find_module("if_not_null")
     var groups <- array<DocGroup>(
@@ -1644,6 +1653,7 @@ def main {
     document_module_decs_boost(root)
     document_module_decs_state(root)
     document_module_defer(root)
+    document_module_with_boost(root)
     document_module_delegate(root)
     document_module_dynamic_cast_rtti(root)
     document_module_enum_trait(root)
diff --git a/doc/source/reference/tutorials.rst b/doc/source/reference/tutorials.rst
index 0ee3609d36..554c2f9890 100644
--- a/doc/source/reference/tutorials.rst
+++ b/doc/source/reference/tutorials.rst
@@ -204,6 +204,7 @@ Run any tutorial from the project root::
    tutorials/macros/15_type_macro.rst
    tutorials/macros/16_template_type_macro.rst
    tutorials/macros/17_qmacro.rst
+   tutorials/macros/18_with_boost.rst
 
 .. _tutorials_dashv:
 
diff --git a/doc/source/reference/tutorials/macros/18_with_boost.rst b/doc/source/reference/tutorials/macros/18_with_boost.rst
new file mode 100644
index 0000000000..08c903bbe4
--- /dev/null
+++ b/doc/source/reference/tutorials/macros/18_with_boost.rst
@@ -0,0 +1,193 @@
+.. _tutorial_macro_with_boost:
+
+.. index::
+   single: Tutorial; Macros; with_
+   single: Tutorial; Macros; with_boost
+   single: Tutorial; Macros; array lock
+   single: Tutorial; Macros; table lock
+
+==================================================================
+Macro Tutorial 18: ``with_`` — locked binding of container slots
+==================================================================
+
+``daslib/with_boost`` adds a ``with_`` call-macro that solves a recurring
+ergonomics problem: rebinding a struct field across an array or table
+element. The naive form is rejected by daslang's typer:
+
+.. code-block:: das
+
+   var arr = [A(f1 = 1, f2 = 2)]
+   var a : A& = arr[0]    // error[31300]: local reference to non-local expression is unsafe
+   a.f1 = 99
+
+Between binding ``a`` and writing through it, code could push/resize/erase
+``arr``, leaving ``a`` dangling. ``with_`` solves this by:
+
+1. Binding the element inside a block, named ``_`` by default;
+2. Wrapping the block in an **automatic lock** on the container, so
+   push/erase/resize/clear inside the body panic at runtime instead of
+   silently corrupting memory.
+
+The single-arg form is a 1:1 replacement for the rejected pattern above:
+
+.. code-block:: das
+
+   require daslib/with_boost
+
+   var arr = [A(f1 = 1, f2 = 2)]
+   with_(arr[0]) {
+       _.f1 = 99       // mutation persists in arr[0]
+   }
+
+
+Section 1 — The single-arg form
+================================
+
+Default-name ``_`` binding works for both struct-element and
+workhorse-element arrays (workhorse coverage in Section 3):
+
+.. code-block:: das
+
+   var arr = [A(f1 = 1, f2 = 2), A(f1 = 3, f2 = 4)]
+   with_(arr[0]) {
+       _.f1 = 99
+       _.f2 = 100
+   }
+   // arr[0] is now A(f1 = 99, f2 = 100)
+
+
+Named binding via ``$(name)`` is identical in effect — the macro strips
+constness so mutations always persist:
+
+.. code-block:: das
+
+   with_(arr[1]) $(elem) {
+       elem.f1 = 555
+   }
+
+
+Section 2 — Multi-arg positional form
+======================================
+
+Passing multiple containers locks each independently. The block params
+are positional (no ``=``-named args; the macro reads them in order):
+
+.. code-block:: das
+
+   var dst = [A(f1 = 0, f2 = 0)]
+   var src = [A(f1 = 10, f2 = 20)]
+   with_(dst[0], src[0]) $(d, s) {
+       d.f1 = s.f1 + 1
+       d.f2 = s.f2 + 2
+   }
+
+Any arity works — the macro emits the full lock / invoke / unlock sequence inline, with one lock per container, so a call like ``with_(a[0], b[1], c[2], d[3], e[4]) $(va, vb, vc, vd, ve) { ... }`` scales naturally. Mix arrays and tables freely, subject to the single-table-arg rule (next section).
+
+
+Section 3 — Workhorse element types (int, float, ...)
+======================================================
+
+The block-arg is bound by reference, so workhorse-element containers
+work the same as struct-element ones — mutation through ``_ = X`` (or
+the named ``x = X``) propagates back to the underlying slot:
+
+.. code-block:: das
+
+   var ints = [1, 2, 3]
+   with_(ints[1]) {
+       _ = 222
+   }
+   // ints == [1, 222, 3]
+
+The macro emits each block parameter pinned to the container's element
+type with the ref flag set, so daslang resolves the binding as ``int&``
+(or whichever workhorse type the element happens to be). No special-case
+in the macro for struct vs workhorse — the same pinning path covers both.
+
+
+Section 4 — Tables
+===================
+
+Tables work the same way; ``tab[key]`` upserts (creates a default entry
+if the key is missing). Only **one** table-keyed arg per call — any
+second insert into a table during the body would rehash and invalidate
+the pinned entry, so the macro refuses anything past the first:
+
+.. code-block:: das
+
+   var tab : table<string; A>
+   tab |> insert("k", A(f1 = 11, f2 = 22))
+   with_(tab["k"]) $(v) {
+       v.f1 = 777
+   }
+
+
+Section 5 — Lock is real
+=========================
+
+Mutation of the container inside the body panics at runtime — exactly
+the failure mode the typer was trying to prevent at compile time:
+
+.. code-block:: das
+
+   var arr = [A(f1 = 1, f2 = 2)]
+   with_(arr[0]) $(a) {
+       arr |> push(A(f1 = 1000, f2 = 2000))   // panics: "can't push into locked array"
+   }
+
+daslang panic is fatal (not a C++/JS-style exception) — the program
+prints the diagnostic and exits. ``try/recover`` exists to capture the
+message before exit for nicer logging, NOT to recover-and-continue.
+
+
+Section 6 — Refused container shapes
+=====================================
+
+``with_`` is intentionally narrow:
+
+* **Non-``ExprAt`` containers** (plain locals, struct fields on locals,
+  function-call results, array literals) are refused. The macro needs
+  to ref-bind the container to a local, and only ExprVar-rooted
+  lvalue chains (variables, ``obj.field``, ``arr[i]``) have stable
+  addressable storage outside the expression. Use built-in ``with`` for
+  locals; for literal-or-call containers, hoist to a ``var`` first.
+
+* **More than one table-keyed arg** is refused per the rehash hazard
+  noted above.
+
+* **Bodies that ``return`` a value** are refused at typecheck time —
+  the synthesized invoke target declares a ``: void`` block return.
+  ``with_`` is for in-place mutation; compute values via a local:
+  ``var v : T; with_(arr[0]) { v = _.f }``.
+
+All refusals fire at macro-expansion time with the macro-error code
+``50503`` and a message describing the failing arg.
+
+
+Running the tutorial
+=====================
+
+::
+
+   daslang.exe tutorials/macros/18_with_boost.das
+
+Expected output::
+
+   section 2: arr[0] = 99, 100
+   section 3: arr[1].f1 = 555
+   section 4: dst[0] = 11, 22
+   section 5: ints = [ 1, 222, 3]
+   section 6: tab[k].f1 = 777
+   section 7: see comment for the lock-panic shape
+
+
+.. seealso::
+
+   Full source:
+   :download:`18_with_boost.das <../../../../../tutorials/macros/18_with_boost.das>`
+
+   Previous tutorial: :ref:`tutorial_macro_qmacro`
+
+   Standard library: ``daslib/with_boost.das``
+
+   Language reference: :ref:`Macros <macros>` — full macro system documentation
diff --git a/doc/source/stdlib/handmade/module-with_boost.rst b/doc/source/stdlib/handmade/module-with_boost.rst
new file mode 100644
index 0000000000..fea23268aa
--- /dev/null
+++ b/doc/source/stdlib/handmade/module-with_boost.rst
@@ -0,0 +1,43 @@
+The WITH_BOOST module provides the ``with_`` call macro: bind one or more
+array / table element references inside a block, with an automatic
+container lock around the body so push / erase / resize / clear inside
+the body panic at runtime instead of silently dangling. The macro
+emits the lock / invoke / unlock sequence fully inline, so any arity
+and any mix of array / table args work uniformly with a single
+``require``.
+
+All functions and symbols are in "with_boost" module, use require to get access to it.
+
+.. code-block:: das
+
+    require daslib/with_boost
+
+Example:
+
+.. code-block:: das
+
+    require daslib/with_boost
+
+    struct A {
+        f1 : int
+        f2 : int
+    }
+
+    [export]
+    def main {
+        var arr = [A(f1=1, f2=2), A(f1=3, f2=4)]
+
+        // single-arg, default `_` binding
+        with_(arr[0]) {
+            _.f1 = 99
+        }
+
+        // multi-arg positional, struct + workhorse
+        var ints = [10, 20, 30]
+        with_(arr[1], ints[0]) $(s, n) {
+            s.f1 = n + 100
+        }
+
+        print("arr[0]={arr[0].f1}, arr[1]={arr[1].f1}\n")
+        // output: arr[0]=99, arr[1]=110
+    }
diff --git a/doc/source/stdlib/sec_annotations.rst b/doc/source/stdlib/sec_annotations.rst
index fa533d6f70..ae346316fe 100644
--- a/doc/source/stdlib/sec_annotations.rst
+++ b/doc/source/stdlib/sec_annotations.rst
@@ -12,6 +12,7 @@ and other compile-time utilities.
    generated/contracts.rst
    generated/apply.rst
    generated/defer.rst
+   generated/with_boost.rst
    generated/if_not_null.rst
    generated/is_local.rst
    generated/safe_addr.rst
diff --git a/examples/audio/hrtf/main.das b/examples/audio/hrtf/main.das
index 3c34ecd6ce..46506a6bc3 100644
--- a/examples/audio/hrtf/main.das
+++ b/examples/audio/hrtf/main.das
@@ -12,7 +12,9 @@ require daslib/defer
 require daslib/fio
 require daslib/safe_addr
 require daslib/math_boost
+require daslib/jobque_boost
 require math
+require strings
 
 // -- Shaders --
 
@@ -97,6 +99,11 @@ let SOURCE_COLORS = fixed_array(
     float3(1.0, 0.3, 1.0)    // magenta
 )
 
+// HRTF/simulated routing budget — cycle on B key.
+let HRTF_BUDGETS = fixed_array(32, 0, 999)
+let HRTF_BUDGET_LABELS = fixed_array("mixed top-32", "all simulated", "all HRTF")
+var hrtf_budget_idx = 0
+
 var sound_data : array<float>
 var sound_channels = 1
 var sound_rate = MA_SAMPLE_RATE
@@ -167,9 +174,7 @@ def gl_to_audio(p : float3) : float3 {
 
 def add_sound_source(pos : float3; volume : float = 1.0) {
     let idx = length(sources) % 5
-    var src : SoundSource
-    src.position = pos
-    src.color = SOURCE_COLORS[idx]
+    var src = SoundSource(position = pos, color = SOURCE_COLORS[idx])
     var samples <- clone(sound_data)
     src.sid = play_3d_sound_loop_from_pcm(gl_to_audio(pos), linear_attenuation(10.0), sound_rate, sound_channels, samples)
     src.sid |> set_volume(volume)
@@ -178,8 +183,21 @@ def add_sound_source(pos : float3; volume : float = 1.0) {
 
 // -- Main --
 
+// Parse --max-frames N from argv. Useful for headless repro and shutdown-leak debugging.
+def parse_max_frames {
+    var maxFrames = 0
+    let args <- get_command_line_arguments()
+    for (i in range(length(args))) {
+        if (args[i] == "--max-frames" && i + 1 < length(args)) {
+            maxFrames = to_int(args[i + 1])
+        }
+    }
+    return maxFrames
+}
+
 [export]
 def main {
+    let maxFrames = parse_max_frames()
     // GLFW init
     if (glfwInit() == 0) {
         panic("can't init glfw")
@@ -239,27 +257,38 @@ def main {
     // Load sound
     load_sound_data()
 
+    // Stats LockBox for per-second utilization + HRTF/simulated split readout.
+    // Lifecycle wraps the audio system: the audio thread releases its share-ref during
+    // audio_system_finalize (inside with_audio_system below), THEN this outer defer
+    // calls lock_box_remove to do the final delete. `release` alone never deletes.
+    var stats_box <- lock_box_create()
+    defer() {
+        unsafe(lock_box_remove(stats_box))
+    }
+
     // Audio system
     with_audio_system() {
         // Start with one source in front
         add_sound_source(float3(0.0, 0.0, -3.0))
 
+        set_audio_stats_box(stats_box)
+
         print("HRTF 3D Audio Demo\n")
         print("  Click to capture mouse, click again to release\n")
         print("  WASD to move, mouse to look\n")
-        print("  N to add a sound source, ESC to quit\n")
+        print("  N to add a sound source, M to add 30, ESC to quit\n")
+        print("  B to cycle HRTF budget (mixed top-32 / all simulated / all HRTF)\n")
 
         var last_time = glfwGetTime()
         var n_was_pressed = false
         var m_was_pressed = false
+        var b_was_pressed = false
+        var since_stats_print = 0.0
+        var frameCount = 0
 
         eval_main_loop() {
-            if (glfwWindowShouldClose(window) != 0) {
-                return false
-            }
-            if (glfwGetKey(window, int(GLFW_KEY_ESCAPE)) == int(GLFW_PRESS)) {
-                return false
-            }
+            if (glfwWindowShouldClose(window) != 0 || glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS || (maxFrames > 0 && frameCount >= maxFrames)) return false
+            frameCount ++
             glfwPollEvents()
 
             // Delta time
@@ -271,21 +300,21 @@ def main {
             let speed = 5.0 * dt
             let fwd = camera_forward()
             let rgt = camera_right()
-            if (glfwGetKey(window, int(GLFW_KEY_W)) == int(GLFW_PRESS)) {
+            if (glfwGetKey(window, GLFW_KEY_W) == GLFW_PRESS) {
                 camera_pos += fwd * speed
             }
-            if (glfwGetKey(window, int(GLFW_KEY_S)) == int(GLFW_PRESS)) {
+            if (glfwGetKey(window, GLFW_KEY_S) == GLFW_PRESS) {
                 camera_pos -= fwd * speed
             }
-            if (glfwGetKey(window, int(GLFW_KEY_A)) == int(GLFW_PRESS)) {
+            if (glfwGetKey(window, GLFW_KEY_A) == GLFW_PRESS) {
                 camera_pos -= rgt * speed
             }
-            if (glfwGetKey(window, int(GLFW_KEY_D)) == int(GLFW_PRESS)) {
+            if (glfwGetKey(window, GLFW_KEY_D) == GLFW_PRESS) {
                 camera_pos += rgt * speed
             }
 
             // Add source on N press
-            let n_pressed = glfwGetKey(window, int(GLFW_KEY_N)) == int(GLFW_PRESS)
+            let n_pressed = glfwGetKey(window, GLFW_KEY_N) == GLFW_PRESS
             if (n_pressed && !n_was_pressed) {
                 let spawn_pos = camera_pos + fwd * 3.0
                 add_sound_source(spawn_pos)
@@ -294,7 +323,7 @@ def main {
             n_was_pressed = n_pressed
 
             // Add 30 sources around camera on M press
-            let m_pressed = glfwGetKey(window, int(GLFW_KEY_M)) == int(GLFW_PRESS)
+            let m_pressed = glfwGetKey(window, GLFW_KEY_M) == GLFW_PRESS
             if (m_pressed && !m_was_pressed) {
                 for (i in range(30)) {
                     let angle = 2.0 * PI * float(i) / 30.0
@@ -306,6 +335,25 @@ def main {
             }
             m_was_pressed = m_pressed
 
+            // Cycle HRTF budget on B press
+            let b_pressed = glfwGetKey(window, GLFW_KEY_B) == GLFW_PRESS
+            if (b_pressed && !b_was_pressed) {
+                hrtf_budget_idx = (hrtf_budget_idx + 1) % 3
+                let newBudget = HRTF_BUDGETS[hrtf_budget_idx]
+                set_hrtf_budget(newBudget)
+                print("HRTF budget: {newBudget} ({HRTF_BUDGET_LABELS[hrtf_budget_idx]})\n")
+            }
+            b_was_pressed = b_pressed
+
+            // Per-second audio-system stats line. `get` is read-only — keeps the box alive across reads.
+            // `grab` would consume the notification and break the periodic publish/poll loop.
+            since_stats_print += dt
+            if (since_stats_print >= 1.0) {
+                stats_box |> get() $(s : AudioSystemStats#) {
+                    print("audio: util={s.utilization_pct}%, hrtf={s.hrtf_count}/{s.total_3d} (budget={HRTF_BUDGETS[hrtf_budget_idx]})\n")
+                }
+                since_stats_print = 0.0
+            }
 
             // Update audio listener
             set_head_position(gl_to_audio(camera_pos), gl_to_audio(fwd))
diff --git a/modules/dasAudio/audio/audio_boost.das b/modules/dasAudio/audio/audio_boost.das
index 55197002ae..4974171f0e 100644
--- a/modules/dasAudio/audio/audio_boost.das
+++ b/modules/dasAudio/audio/audio_boost.das
@@ -1,5 +1,7 @@
 options gen2
 options indenting = 4
+options persistent_heap
+options gc
 options no_global_variables = false
 options no_unused_block_arguments = false
 options no_unused_function_arguments = false
@@ -76,6 +78,16 @@ struct public AudioChannelStatus {
     //! Number of pending PCM chunks in the stream queue.
 }
 
+//! Snapshot of the audio system's recent CPU utilization and HRTF/simulated routing split, published to a caller-provided LockBox.
+struct public AudioSystemStats {
+    utilization_pct : float
+    //! Mixer CPU utilization over the last ~1 second, in percent (0..100+).
+    hrtf_count : int
+    //! Number of channels currently routed through HRTF.
+    total_3d : int
+    //! Total active is3D channels (denominator of the HRTF/simulated split).
+}
+
 //! Distance attenuation coefficients for 3D audio.
 //! Use the helper functions (inverse_distance_attenuation, linear_attenuation, etc.) to create instances.
 [safe_when_uninitialized]
@@ -147,12 +159,15 @@ class AudioChannel {
     paused : bool = false
     pause_fade : float = 1.0        // current fade level (0=silent, 1=full)
     pause_fade_target : float = 1.0 // 0=fading to pause, 1=fading to play
+    pause_fade_step : float = 1.0 / (0.002 * float(MA_SAMPLE_RATE)) // ~2ms ramp at the configured sample rate
+    ignoreGlobalVolume : bool = false  // if true, g_volume does not multiply this channel
     stop   : bool = false
     pitch  : float = 1.
     volume : float = 1.
     source : AudioSource?
     resampler : ma_resampler
-    channel_converter : ma_channel_converter
+    channel_converter : ma_channel_converter      // HRTF mode for is3D: 2 -> MA_CHANNELS. Non-3D: source.channels -> MA_CHANNELS
+    channel_converter_sim : ma_channel_converter  // simulated-3D mode: source.channels -> MA_CHANNELS (built in set3D, used when !is_hrtf)
     volume_mixer : ma_volume_mixer
     playback_position : uint64 = 0ul
     position3d : float3
@@ -160,6 +175,8 @@ class AudioChannel {
     doppler : float = 1.
     attenuation : Attenuation = default_attenuation()
     is3D : bool = false
+    is_hrtf : bool = true      // runtime per-channel mode; rewritten each frame by update_hrtf's budget
+    setup3D : bool = true
     @do_not_delete status : LockBox? = null
     @do_not_delete reverb : I3DL2Reverb?
     @do_not_delete chorus : ma_chorus?
@@ -191,9 +208,20 @@ class AudioChannel {
     }
     def set3D {
         is3D = true
+        // simulated-3D converter: source.channels -> MA_CHANNELS (mono->stereo upmix for mono sources)
+        var sim_config <- ma_channel_converter_config_init(
+            ma_format.ma_format_f32,
+            uint(source.channels),
+            null,
+            uint(MA_CHANNELS),
+            null,
+            ma_channel_mix_mode.ma_channel_mix_mode_default
+        )
+        ma_channel_converter_init(unsafe(addr(sim_config)), unsafe(addr(channel_converter_sim)))
         if (MA_HRTF) {
             ma_hrtf_init(unsafe(addr(hrtf)), uint(MA_SAMPLE_RATE))
             ma_channel_converter_uninit(unsafe(addr(channel_converter)))
+            // HRTF always emits stereo, so the converter's input is 2ch regardless of source.channels
             var channel_converter_config <- ma_channel_converter_config_init(
                 ma_format.ma_format_f32,
                 2u,
@@ -211,6 +239,9 @@ class AudioChannel {
         }
         ma_volume_mixer_uninit(unsafe(addr(volume_mixer)))
         ma_channel_converter_uninit(unsafe(addr(channel_converter)))
+        if (is3D) {
+            ma_channel_converter_uninit(unsafe(addr(channel_converter_sim)))
+        }
         ma_resampler_uninit(unsafe(addr(resampler)))
         if (status != null) {
             status |> notify_and_release
@@ -271,9 +302,9 @@ class AudioChannel {
             delete samples
             samples <- temp
         }
-        // hrtf
+        // hrtf — only when this channel is routed through HRTF (top-N by distance per update_hrtf budget)
         var nSoundChannels = source.channels
-        if (is3D && MA_HRTF) {
+        if (is3D && MA_HRTF && is_hrtf) {
             // void ma_hrtf_process_frames(ma_hrtf * hrtf, float * pOut, const float * pIn, ma_uint32 nChannels, ma_uint32 frameCount)
             var temp : array<float>
             temp |> resize(int(outputFrames) * 2)
@@ -308,10 +339,15 @@ class AudioChannel {
             delete samples
             samples <- temp
         }
-        // convert channels
+        // convert channels — for is3D channels, pick the converter sized to the upstream stage:
+        //   HRTF mode    -> samples are 2ch, use `channel_converter` (2 -> MA_CHANNELS)
+        //   simulated 3D -> samples are source.channels, use `channel_converter_sim` (handles the mono->stereo upmix)
         var channel_data : array<float>
         channel_data |> resize(data |> length)
-        ma_channel_converter_process_pcm_frames(unsafe(addr(channel_converter)),
+        let converter_to_use = (is3D && !is_hrtf
+            ? unsafe(addr(channel_converter_sim))
+            : unsafe(addr(channel_converter)))
+        ma_channel_converter_process_pcm_frames(converter_to_use,
             unsafe(addr(channel_data[0])),
             unsafe(addr(samples[0])),
             outputFrames)
@@ -326,9 +362,9 @@ class AudioChannel {
             unsafe(addr(data[0])),
             outputFrames)
         delete channel_data
-        // apply pause fade (2ms ramp to avoid clicks)
+        // apply pause fade (per-channel ramp speed, default ~2ms to avoid clicks)
         if (pause_fade != pause_fade_target) {
-            let fade_step = 1.0 / 96.0  // ~2ms at 48kHz
+            let fade_step = pause_fade_step
             for (f in range(int(outputFrames))) {
                 let fade = pause_fade
                 for (c in range(channels)) {
@@ -582,27 +618,90 @@ var g_head_position : float3
 var g_head_direction : float3 = float3(0., 1., 0.)
 var g_head_velocity : float3
 
+//! HRTF routing budget — at most this many is3D channels (closest to head) run through HRTF each frame; the rest run simulated-3D (pan + attenuation).
+var g_hrtf_budget : int = 32
+
+//! Calibrated normalizer applied to simulated-3D channels so their perceived loudness matches HRTF channels at the same position. Measured once in initialize_mixer.
+var g_hrtf_frontal_gain : float = 1.0
+
+// Scratch array reused each update_hrtf frame to avoid per-frame allocation; tuple of (distance² to head, g_channels index).
+var g_hrtf_scratch : array<tuple<dist2 : float; idx : int>>
+
+def public hrtf_budget_classify(rank, budget : int; wasHrtf : bool) : bool {
+    //! Decide whether a 3D channel at the given closest-to-head ``rank`` should run HRTF or simulated 3D,
+    //! given the current ``budget`` and whether the channel was HRTF on the previous frame.
+    //! Applies a sticky-rank margin to prevent flapping when two channels swap rank between frames,
+    //! while clamping to 0 when budget is 0 so "all simulated" actually clears in-flight HRTF channels.
+    let stickyTop = budget > 0 ? budget + max(2, budget / 10) : 0
+    return wasHrtf ? rank < stickyTop : rank < budget
+}
+
 def update_hrtf(dt : float; nFrames : uint64) {
+    // Pass 1: rank 3D channels by distance² and assign is_hrtf with a sticky-margin rule.
+    // The sticky margin prevents thrashing when two channels swap rank near the budget boundary.
+    g_hrtf_scratch |> clear()
+    for (i in range(length(g_channels))) {
+        var ch = g_channels[i]
+        if (ch.is3D && !ch.stop) {
+            let delta = ch.position3d - g_head_position
+            g_hrtf_scratch |> push((dist2 = dot(delta, delta), idx = i))
+        }
+    }
+    g_hrtf_scratch |> sort() $(a, b) => a.dist2 < b.dist2
+    let total3D = length(g_hrtf_scratch)
+    let budget = g_hrtf_budget
+    var hrtfCount = 0
+    for (rank in range(total3D)) {
+        let chIdx = g_hrtf_scratch[rank].idx
+        var ch = g_channels[chIdx]
+        let wasHrtf = ch.is_hrtf
+        let newHrtf = hrtf_budget_classify(rank, budget, wasHrtf)
+        if (newHrtf != wasHrtf) {
+            // linear_pan=true for HRTF (input is stereo from HRTF, unity at center); =false for simulated (constant-power panning of mono)
+            ma_volume_mixer_set_linear_pan(unsafe(addr(ch.volume_mixer)), newHrtf)
+            ch.is_hrtf = newHrtf
+        }
+        if (newHrtf) {
+            hrtfCount ++
+        }
+    }
+    g_stats_hrtf_count = hrtfCount
+    g_stats_3d_count = total3D
+
+    // Pass 2: per-channel position/pan/volume — routing decision already made above.
     for (ch in g_channels) {
         if (ch.is3D && !ch.stop) {
             var rxy = ch.position3d.xy - g_head_position.xy
             rxy = float2(rxy.x * g_head_direction.x + rxy.y * g_head_direction.y,
                         -rxy.x * g_head_direction.y + rxy.y * g_head_direction.x)
             let nrxy = normalize(rxy)
-            static_if (MA_HRTF) {
-                let asimuth = atan2(nrxy.y, nrxy.x)
-                let elevation = atan2(ch.position3d.z - g_head_position.z, length(rxy))
-                let iasimuth = int(asimuth * 180. / PI)
-                let ielevation = int(elevation * 180. / PI)
-                ma_hrtf_set_direction(unsafe(addr(ch.hrtf)), iasimuth, ielevation)
+            if (ch.is_hrtf) {
+                static_if (MA_HRTF) {
+                    let asimuth = atan2(nrxy.y, nrxy.x)
+                    let elevation = atan2(ch.position3d.z - g_head_position.z, length(rxy))
+                    let iasimuth = int(asimuth * 180. / PI)
+                    let ielevation = int(elevation * 180. / PI)
+                    ma_hrtf_set_direction(unsafe(addr(ch.hrtf)), iasimuth, ielevation)
+                }
+                // HRTF carries spatial cues; keep the volume_mixer's pan centered
+                ma_volume_mixer_set_pan(unsafe(addr(ch.volume_mixer)), 0.0)
             } else {
-                // panning
+                // simulated 3D — constant-power pan via the volume_mixer
                 ma_volume_mixer_set_pan(unsafe(addr(ch.volume_mixer)), nrxy.y)
             }
-            // volume attenuation — ramp over frame to avoid clicks
+            // volume attenuation — ramp over frame to avoid clicks. Simulated channels are scaled by the
+            // calibrated frontal-gain normalizer so they match HRTF channels at the same position.
             let distance = length(ch.position3d - g_head_position)
             let attn = compute_attenuation(ch.attenuation, distance)
-            ma_volume_mixer_set_volume_over_time(unsafe(addr(ch.volume_mixer)), ch.volume * attn, nFrames)
+            let g = ch.ignoreGlobalVolume ? 1.0 : g_volume
+            let modeGain = ch.is_hrtf ? 1.0 : g_hrtf_frontal_gain
+            let target = ch.volume * attn * g * modeGain
+            if (ch.setup3D) {
+                ma_volume_mixer_set_volume(unsafe(addr(ch.volume_mixer)), target)
+                ch.setup3D = false
+            } else {
+                ma_volume_mixer_set_volume_over_time(unsafe(addr(ch.volume_mixer)), target, nFrames)
+            }
             // doppler
             let vrel = g_head_velocity - ch.velocity3d
             let r = normalize(ch.position3d - g_head_position)
@@ -686,7 +785,7 @@ variant AudioCommand {
     add_pcm_stream_3d       : AudioCommandAddPCMStream3D
     append_pcm              : tuple<sid : SID; samples : array<float>>
     append_box_pcm          : tuple<sid : SID; box : uint64>  //! opaque LockBox? (archive-safe)
-    pause                   : tuple<sid : SID; paused : bool>
+    pause                   : tuple<sid : SID; paused : bool; time : float>
     volume                  : tuple<sid : SID; volume : float; time : float>
     pan                     : tuple<sid : SID; pan : float>
     pitch                   : tuple<sid : SID; pitch : float>
@@ -699,6 +798,10 @@ variant AudioCommand {
     chorus_cmd              : tuple<sid : SID; config : ma_chorus_config>
     set_playback_position   : tuple<sid : SID; position : uint64>
     global_pause            : bool
+    global_volume           : float
+    ignore_global_volume    : tuple<sid : SID; value : bool>
+    hrtf_budget             : int
+    system_stats_box        : uint64       //! opaque LockBox? (archive-safe)
 }
 
 var g_command_stream : Stream?
@@ -715,6 +818,8 @@ def add_channel(sid : SID; var channel : AudioChannel?) {
         channel.sid = sid
         g_sid_2_channel |> insert(sid, channel)
     }
+    let g = channel.ignoreGlobalVolume ? 1.0 : g_volume
+    ma_volume_mixer_set_volume(unsafe(addr(channel.volume_mixer)), channel.volume * g)
 }
 
 def add_channel_3d(sid : SID; position : float3; attenuation : Attenuation; var channel : AudioChannel?) {
@@ -740,6 +845,12 @@ def command_processor {
                 delete g_channels
             }
             g_sid_2_channel |> clear()
+            if (g_stats_box != null) {
+                // stats box is a read-only long-lived publication target, not a one-shot;
+                // never `notify_and_release` (the caller's `get()` doesn't consume notifications).
+                g_stats_box |> release
+                g_stats_box = null
+            }
             g_command_stream |> release
         } elif (cmd is add_decoder) {
             assume dcmd = cmd as add_decoder
@@ -793,6 +904,8 @@ def command_processor {
         } elif (cmd is pause) {
             let pcmd = cmd as pause
             g_sid_2_channel |> get(pcmd.sid) $(var ch : AudioChannel?&) {
+                let nFrames = max(1.0, pcmd.time * float(MA_SAMPLE_RATE))
+                ch.pause_fade_step = 1.0 / nFrames
                 if (pcmd.paused) {
                     ch.pause_fade_target = 0.0  // fade out to pause
                 } else {
@@ -804,11 +917,13 @@ def command_processor {
             let vcmd = cmd as volume
             g_sid_2_channel |> get(vcmd.sid) $(var ch : AudioChannel?&) {
                 ch.volume = vcmd.volume
+                let g = ch.ignoreGlobalVolume ? 1.0 : g_volume
+                let effective = vcmd.volume * g
                 if (vcmd.time > 0.) {
                     let nFrames = uint64(vcmd.time * float(MA_SAMPLE_RATE))
-                    ma_volume_mixer_set_volume_over_time(unsafe(addr(ch.volume_mixer)), vcmd.volume, nFrames)
+                    ma_volume_mixer_set_volume_over_time(unsafe(addr(ch.volume_mixer)), effective, nFrames)
                 } else {
-                    ma_volume_mixer_set_volume(unsafe(addr(ch.volume_mixer)), vcmd.volume)
+                    ma_volume_mixer_set_volume(unsafe(addr(ch.volume_mixer)), effective)
                 }
             }
         } elif (cmd is pan) {
@@ -825,6 +940,35 @@ def command_processor {
             g_pitch = cmd as global_pitch
         } elif (cmd is global_pause) {
             g_pause = cmd as global_pause // todo: envelope?
+        } elif (cmd is global_volume) {
+            g_volume = cmd as global_volume
+            let nFrames = uint64(0.025 * float(MA_SAMPLE_RATE))
+            for (it in values(g_sid_2_channel)) {
+                // 3D channels are handled in update_hrtf which reads g_volume each callback
+                if (!it.stop && !it.ignoreGlobalVolume && !it.is3D) {
+                    ma_volume_mixer_set_volume_over_time(unsafe(addr(it.volume_mixer)), it.volume * g_volume, nFrames)
+                }
+            }
+        } elif (cmd is ignore_global_volume) {
+            let icmd = cmd as ignore_global_volume
+            g_sid_2_channel |> get(icmd.sid) $(var ch : AudioChannel?&) {
+                ch.ignoreGlobalVolume = icmd.value
+                let effective = ch.volume * (ch.ignoreGlobalVolume ? 1.0 : g_volume)
+                ma_volume_mixer_set_volume(unsafe(addr(ch.volume_mixer)), effective)
+            }
+        } elif (cmd is hrtf_budget) {
+            g_hrtf_budget = max(0, cmd as hrtf_budget)
+        } elif (cmd is system_stats_box) {
+            let sbox = cmd as system_stats_box
+            if (g_stats_box != null) {
+                g_stats_box |> release
+                g_stats_box = null
+            }
+            // Reset the rolling window so a re-registered box starts with a clean accumulator
+            // instead of inheriting partial data from the previous registration.
+            g_stats_window_time_ms = 0.0lf
+            g_stats_window_samples = 0ul
+            g_stats_box = unsafe(reinterpret<LockBox?>(sbox))
         } elif (cmd is stop) {
             let scmd = cmd as stop
             g_sid_2_channel |> get(scmd.sid) $(var ch : AudioChannel?&) {
@@ -900,6 +1044,15 @@ def command_processor {
 var g_limiter : ma_limiter
 var g_mix_buffer : array<float>
 var g_pause : bool = false
+var g_volume : float = 1.  //! global master volume multiplier (1.0 = full, 0.0 = mute)
+
+// AudioSystemStats publication state (audio-context-side; main side holds the LockBox handle and read-grabs it).
+var g_stats_box : LockBox? = null
+var g_stats_window_time_ms = 0.0lf      // accumulated mixer wall time over the current window
+var g_stats_window_samples = 0ul        // accumulated audio frames over the current window (flush gate: realTimeMs >= 1000)
+var g_stats_recent_util_pct : float = 0.0
+var g_stats_3d_count : int = 0
+var g_stats_hrtf_count : int = 0
 
 [export]
 def mixer(var data : array<float>#; channels, rate : int; dt : float) {
@@ -935,8 +1088,32 @@ def mixer(var data : array<float>#; channels, rate : int; dt : float) {
             uint64(output_samples))
         g_mix_buffer |> erase(0, output_samples * channels)
     }
-    g_mixer_total_time += double(get_time_usec(t0)) / 1000.lf
+    let dt_ms = double(get_time_usec(t0)) / 1000.lf
+    g_mixer_total_time += dt_ms
     g_mixer_total_samples += uint64(length(data) / channels)
+    publish_audio_stats(dt_ms, uint64(length(data) / channels))
+}
+
+// Accumulate per-callback time + samples into a 1-second rolling window. On window flush, compute
+// utilization (= mixer time / real time) and update the caller's stats LockBox so main thread can read it.
+def private publish_audio_stats(dt_ms : double; frames : uint64) {
+    return if (g_stats_box == null)
+    g_stats_window_time_ms += dt_ms
+    g_stats_window_samples += frames
+    let realTimeMs = double(g_stats_window_samples) * 1000.lf / double(MA_SAMPLE_RATE)
+    if (realTimeMs < 1000.lf) return
+    let util = g_stats_window_time_ms / realTimeMs * 100.lf
+    g_stats_recent_util_pct = float(util)
+    g_stats_window_time_ms = 0.0lf
+    g_stats_window_samples = 0ul
+    let snap = g_stats_recent_util_pct
+    let hrtfCount = g_stats_hrtf_count
+    let total3D = g_stats_3d_count
+    g_stats_box |> update() $(var s : AudioSystemStats#) {
+        s.utilization_pct = snap
+        s.hrtf_count = hrtfCount
+        s.total_3d = total3D
+    }
 }
 
 [init]
@@ -949,7 +1126,68 @@ def initialize_mixer {
             MA_LIMITER_RELEASE_TIME,
             float(MA_SAMPLE_RATE),
             uint(MA_CHANNELS))
+        static_if (MA_HRTF) {
+            calibrate_hrtf_frontal_gain()
+        }
+    }
+}
+
+// Measure the HRTF's broadband gain at azimuth=0, elevation=0 with a 1 kHz sine probe and
+// compute the loudness-matching normalizer for the simulated-3D path.
+//
+// Derivation (per-channel amplitude at the volume_mixer output, head-on source):
+//   * HRTF path:      linear pan, identity at pan=0  -> per-channel = HRIR_gain * input
+//   * Simulated path: constant-power pan, 1/sqrt(2)  -> per-channel = 0.707 * input * normalizer
+//   Equality at center =>  normalizer = HRIR_gain / 0.707 = HRIR_gain * sqrt(2)
+//
+// HRIR_gain is the sine-input amplitude ratio (≈ |H(1 kHz)|, the magnitude of the HRIR's transfer
+// function at the probe frequency). DC (a constant signal) is the worst probe because impulse-
+// response filters attenuate DC strongly; sine at ~1 kHz gives a representative broadband estimate.
+def private calibrate_hrtf_frontal_gain {
+    let probeFrames = 2048
+    let measureStart = 512    // skip the HRTF crossfade region; measure the steady-state tail
+    var probe_hrtf : ma_hrtf
+    ma_hrtf_init(unsafe(addr(probe_hrtf)), uint(MA_SAMPLE_RATE))
+    // Per hrtf.h: first set_direction triggers a 256-sample crossfade against the zero filter;
+    // the second call leaves needs_crossfade=0 so the steady-state output is a clean measurement.
+    ma_hrtf_set_direction(unsafe(addr(probe_hrtf)), 0, 0)
+    ma_hrtf_set_direction(unsafe(addr(probe_hrtf)), 0, 0)
+    var input : array<float>
+    input |> resize(probeFrames)
+    let probeFreq = 1000.0
+    let phaseStep = 2.0 * PI * probeFreq / float(MA_SAMPLE_RATE)
+    for (i in range(probeFrames)) {
+        input[i] = sin(phaseStep * float(i))
+    }
+    var output : array<float>
+    output |> resize(probeFrames * 2)
+    ma_hrtf_process_frames(unsafe(addr(probe_hrtf)),
+        unsafe(addr(output[0])),
+        unsafe(addr(input[0])),
+        1u,
+        uint(probeFrames))
+    // Per-channel RMS over the steady-state tail; the frontal source is symmetric so average L+R.
+    var sumSq = 0.0
+    let nSamples = probeFrames - measureStart
+    for (i in range(measureStart, probeFrames)) {
+        let l = output[i * 2]
+        let r = output[i * 2 + 1]
+        sumSq += l * l + r * r
+    }
+    let perChannelRms = sqrt(sumSq / float(2 * nSamples))
+    let inputRms = sqrt(0.5)                            // sine of amplitude 1.0
+    let hrirGain = perChannelRms / inputRms             // ≈ |H(1 kHz)| of the frontal HRIR
+    let normalizer = hrirGain * sqrt(2.0)               // compensates the -3 dB const-power center pan
+    if (normalizer > 0.25 && normalizer < 4.0) {
+        g_hrtf_frontal_gain = normalizer
+    } else {
+        to_log(LOG_WARNING, "HRTF calibration produced out-of-range normalizer={normalizer} (hrirGain={hrirGain}); using 1.414 fallback\n")
+        g_hrtf_frontal_gain = sqrt(2.0)
     }
+    ma_hrtf_uninit(unsafe(addr(probe_hrtf)))
+    delete input
+    delete output
+    to_log(LOG_INFO, "HRTF frontal-gain calibration: perChannelRms={perChannelRms}, hrirGain={hrirGain}, normalizer={g_hrtf_frontal_gain}\n")
 }
         /*
         ma_limiter_init_linear(unsafe(addr(g_limiter)),
@@ -1022,8 +1260,9 @@ def make_decoder(filename : string; rate, channels : int) : ma_decoder? {
 // then pushed atomically as one Stream batch.
 var global_batch : array<array<uint8>>?
 
+[deprecated(message="use `batch(cb)` instead")]
 def public begin_batch() {
-    //! Begin batching audio commands. All commands until end_batch are sent atomically.
+    //! Deprecated. Use ``batch() { ... }`` instead.
     if (global_batch != null) {
         panic("nested batch")
     }
@@ -1031,8 +1270,9 @@ def public begin_batch() {
     global_batch = tempBatch
 }
 
+[deprecated(message="use `batch(cb)` instead")]
 def public end_batch() {
-    //! End batching and send all batched commands atomically.
+    //! Deprecated. Use ``batch() { ... }`` instead.
     if (global_batch == null) {
         panic("no batch")
     }
@@ -1072,69 +1312,70 @@ def push_cmd(var cmd : AudioCommand) {
     }
 }
 
-def public play_sound_from_file(filename : string; rate, channels : int) {
-    //! plays sound from file
+def public play_sound_from_file(filename : string; rate, channels : int; sid : SID = INVALID_SID) {
+    //! plays sound from file. If sid is INVALID_SID, a new SID is generated.
     //! note - this function is blocking for the duration of the decoder creation
     var decoder = make_decoder(filename, rate, channels)
     if (decoder == null) return INVALID_SID
-    let sid = generate_sound_sid()
-    push_cmd(AudioCommand(add_decoder = AudioCommandAddDecoder(sid = sid, decoder = intptr(decoder), rate = rate, channels = channels)))
-    return sid
+    let useSid = sid != INVALID_SID ? sid : generate_sound_sid()
+    push_cmd(AudioCommand(add_decoder = AudioCommandAddDecoder(sid = useSid, decoder = intptr(decoder), rate = rate, channels = channels)))
+    return useSid
 }
 
-def public play_3d_sound_from_file(filename : string; position : float3; attenuation : Attenuation; rate, channels : int) {
-    //! plays 3D sound from file
+def public play_3d_sound_from_file(filename : string; position : float3; attenuation : Attenuation; rate, channels : int; sid : SID = INVALID_SID) {
+    //! plays 3D sound from file. If sid is INVALID_SID, a new SID is generated.
     //! note - this function is blocking for the duration of the decoder creation
     var decoder = make_decoder(filename, rate, channels)
     if (decoder == null) return INVALID_SID
-    let sid = generate_sound_sid()
+    let useSid = sid != INVALID_SID ? sid : generate_sound_sid()
     push_cmd(AudioCommand(add_decoder_3d =
-        AudioCommandAddDecoder3D(sid = sid, decoder = intptr(decoder), rate = rate, channels = channels, position = position, attenuation = attenuation)))
-    return sid
+        AudioCommandAddDecoder3D(sid = useSid, decoder = intptr(decoder), rate = rate, channels = channels, position = position, attenuation = attenuation)))
+    return useSid
 }
 
-def public play_sound_from_pcm_stream(rate, channels : int; sid : SID = generate_sound_sid()) {
-    //! Create a PCM streaming channel. Feed it samples with append_to_pcm.
-    push_cmd(AudioCommand(add_pcm_stream <- AudioCommandAddPCMStream(sid = sid, rate = rate, channels = channels)))
-    return sid
+def public play_sound_from_pcm_stream(rate, channels : int; sid : SID = INVALID_SID) {
+    //! Create a PCM streaming channel. Feed it samples with append_to_pcm. If sid is INVALID_SID, a new SID is generated.
+    let useSid = sid != INVALID_SID ? sid : generate_sound_sid()
+    push_cmd(AudioCommand(add_pcm_stream <- AudioCommandAddPCMStream(sid = useSid, rate = rate, channels = channels)))
+    return useSid
 }
 
-def public play_3d_sound_from_pcm_stream(position : float3; attenuation : Attenuation; rate, channels : int) {
-    //! Create a 3D PCM streaming channel. Feed it samples with append_to_pcm.
-    let sid = generate_sound_sid()
+def public play_3d_sound_from_pcm_stream(position : float3; attenuation : Attenuation; rate, channels : int; sid : SID = INVALID_SID) {
+    //! Create a 3D PCM streaming channel. Feed it samples with append_to_pcm. If sid is INVALID_SID, a new SID is generated.
+    let useSid = sid != INVALID_SID ? sid : generate_sound_sid()
     push_cmd(AudioCommand(add_pcm_stream_3d <-
-        AudioCommandAddPCMStream3D(sid = sid, rate = rate, channels = channels, position = position, attenuation = attenuation)))
-    return sid
+        AudioCommandAddPCMStream3D(sid = useSid, rate = rate, channels = channels, position = position, attenuation = attenuation)))
+    return useSid
 }
 
-def public play_sound_from_pcm(rate, channels : int; var samples : array<float>) {
-    //! plays sound from PCM data
-    let sid = generate_sound_sid()
-    push_cmd(AudioCommand(add_pcm <- AudioCommandAddPCM(sid = sid, rate = rate, channels = channels, samples <- samples, loop = false)))
-    return sid
+def public play_sound_from_pcm(rate, channels : int; var samples : array<float>; sid : SID = INVALID_SID) {
+    //! plays sound from PCM data. If sid is INVALID_SID, a new SID is generated.
+    let useSid = sid != INVALID_SID ? sid : generate_sound_sid()
+    push_cmd(AudioCommand(add_pcm <- AudioCommandAddPCM(sid = useSid, rate = rate, channels = channels, samples <- samples, loop = false)))
+    return useSid
 }
 
-def public play_sound_loop_from_pcm(rate, channels : int; var samples : array<float>) {
-    //! plays looping sound from PCM data
-    let sid = generate_sound_sid()
-    push_cmd(AudioCommand(add_pcm <- AudioCommandAddPCM(sid = sid, rate = rate, channels = channels, samples <- samples, loop = true)))
-    return sid
+def public play_sound_loop_from_pcm(rate, channels : int; var samples : array<float>; sid : SID = INVALID_SID) {
+    //! plays looping sound from PCM data. If sid is INVALID_SID, a new SID is generated.
+    let useSid = sid != INVALID_SID ? sid : generate_sound_sid()
+    push_cmd(AudioCommand(add_pcm <- AudioCommandAddPCM(sid = useSid, rate = rate, channels = channels, samples <- samples, loop = true)))
+    return useSid
 }
 
-def public play_3d_sound_from_pcm(position : float3; attenuation : Attenuation; rate, channels : int; var samples : array<float>) {
-    //! plays 3D sound from PCM data
-    let sid = generate_sound_sid()
+def public play_3d_sound_from_pcm(position : float3; attenuation : Attenuation; rate, channels : int; var samples : array<float>; sid : SID = INVALID_SID) {
+    //! plays 3D sound from PCM data. If sid is INVALID_SID, a new SID is generated.
+    let useSid = sid != INVALID_SID ? sid : generate_sound_sid()
     push_cmd(AudioCommand(add_pcm_3d <-
-        AudioCommandAddPCM3D(sid = sid, rate = rate, channels = channels, samples <- samples, loop = false, position = position, attenuation = attenuation)))
-    return sid
+        AudioCommandAddPCM3D(sid = useSid, rate = rate, channels = channels, samples <- samples, loop = false, position = position, attenuation = attenuation)))
+    return useSid
 }
 
-def public play_3d_sound_loop_from_pcm(position : float3; attenuation : Attenuation; rate, channels : int; var samples : array<float>) {
-    //! plays 3D looping sound from PCM data
-    let sid = generate_sound_sid()
+def public play_3d_sound_loop_from_pcm(position : float3; attenuation : Attenuation; rate, channels : int; var samples : array<float>; sid : SID = INVALID_SID) {
+    //! plays 3D looping sound from PCM data. If sid is INVALID_SID, a new SID is generated.
+    let useSid = sid != INVALID_SID ? sid : generate_sound_sid()
     push_cmd(AudioCommand(add_pcm_3d <-
-        AudioCommandAddPCM3D(sid = sid, rate = rate, channels = channels, samples <- samples, loop = true, position = position, attenuation = attenuation)))
-    return sid
+        AudioCommandAddPCM3D(sid = useSid, rate = rate, channels = channels, samples <- samples, loop = true, position = position, attenuation = attenuation)))
+    return useSid
 }
 
 def public append_to_pcm(sid : SID; var samples : array<float>) {
@@ -1151,9 +1392,9 @@ def public append_box_to_pcm(sid : SID; box : LockBox?; var samples : array<floa
     return sid
 }
 
-def public set_pause(sid : SID; paused : bool) {
-    //! pause or unpause sound
-    push_cmd(AudioCommand(pause = (sid = sid, paused = paused)))
+def public set_pause(sid : SID; paused : bool; time : float = 0.002f) {
+    //! pause or unpause sound; ``time`` is the fade duration in seconds (default ~2ms to avoid clicks)
+    push_cmd(AudioCommand(pause = (sid = sid, paused = paused, time = time)))
     return sid
 }
 
@@ -1185,6 +1426,38 @@ def public set_global_pause(pause : bool) {
     push_cmd(AudioCommand(global_pause = pause))
 }
 
+def public set_global_volume(volume : float) {
+    //! Set global master volume (multiplier for all currently-playing sounds).
+    push_cmd(AudioCommand(global_volume = volume))
+}
+
+def public set_ignore_global_volume(sid : SID; value : bool) {
+    //! Make a specific channel ignore the global master volume (used by editor preview so muting the
+    //! game's master volume does not silence the asset preview).
+    push_cmd(AudioCommand(ignore_global_volume = (sid = sid, value = value)))
+}
+
+def public set_hrtf_budget(n : int) {
+    //! Set the maximum number of 3D channels routed through HRTF each frame; the rest run simulated 3D
+    //! (constant-power pan + distance attenuation, no convolution). Use 0 for all-simulated, or a large
+    //! value (e.g. 999) for all-HRTF. Default is 32. The closest-to-head channels win the HRTF slots.
+    push_cmd(AudioCommand(hrtf_budget = n))
+}
+
+def public set_audio_stats_box(var box : LockBox?) {
+    //! Register a LockBox to receive periodic AudioSystemStats updates from the audio thread.
+    //! The audio thread writes one snapshot per ~1-second window; the caller reads it on demand
+    //! via ``box |> get() $(var s : AudioSystemStats) { ... }`` (read-only — keeps the box alive
+    //! across multiple reads). Pass null to clear. Do NOT use ``grab()``: that's a one-shot consume.
+    if (box == null) {
+        push_cmd(AudioCommand(system_stats_box = 0ul))
+        return
+    }
+    box |> add_ref
+    box |> set() <| new AudioSystemStats(utilization_pct = 0.0, hrtf_count = 0, total_3d = 0)
+    push_cmd(AudioCommand(system_stats_box = intptr(box)))
+}
+
 def public stop(sid : SID; time : float = 0.0f) {
     //! stop sound
     push_cmd(AudioCommand(stop = (sid = sid, time = time)))
diff --git a/modules/dasAudio/src/dasAudio.cpp b/modules/dasAudio/src/dasAudio.cpp
index 05378f2f8a..2a4e287db7 100644
--- a/modules/dasAudio/src/dasAudio.cpp
+++ b/modules/dasAudio/src/dasAudio.cpp
@@ -299,6 +299,7 @@ bool dasAudio_init ( TFunc<void,TTemporary<TArray<float>>,int32_t,int32_t,float>
         return false;
     }
     g_mixer_context.reset(get_clone_context(&context,uint32_t(ContextCategory::audio_context)));
+    g_mixer_context->verySafeContext = false;
     g_mixer_function = mixer;
     g_mixer_env = daScriptEnvironment::getBound();
     if ( ma_device_start(&g_device) != MA_SUCCESS ) {
diff --git a/modules/dasAudio/strudel/strudel_midi_player.das b/modules/dasAudio/strudel/strudel_midi_player.das
index 3b14fe5983..3cee68862b 100644
--- a/modules/dasAudio/strudel/strudel_midi_player.das
+++ b/modules/dasAudio/strudel/strudel_midi_player.das
@@ -1,5 +1,7 @@
 options gen2
 options indenting = 4
+options persistent_heap
+options gc
 options no_unused_block_arguments = false
 options no_unused_function_arguments = false
 
@@ -553,7 +555,7 @@ def midi_tick(var state : MidiPlaybackState; chunk_seconds : float) : array<floa
         state.mixer_ready = true
     }
     let iSr = 1.0 / float(SAMPLE_RATE)
-    let tp = float(TWO_PI)
+    let tp = TWO_PI
     var i = length(state.voices) - 1
     while (i >= 0) {
         var voice = state.voices[i]
@@ -828,9 +830,9 @@ def private midi_thread_main(sid : uint64; var cmd_stream : Stream?; var done_st
     let TARGET_CHUNKS = 4
     let LOW_WATERMARK = 2
     var tracks : array<MidiPlaybackState?>
-    var status_box <- unsafe(lock_box_create())
+    var status_box <- lock_box_create()
     set_status_update(sid, status_box)
-    var pcm_box <- unsafe(lock_box_create())
+    var pcm_box <- lock_box_create()
     // initialize reverb (convolution: 2s decay, 15kHz→1kHz lowpass sweep)
     g_reverb = new ConvolutionReverb
     conv_reverb_init(g_reverb, SAMPLE_RATE, 2.0, 15000.0, 1000.0, 0.01)
@@ -862,7 +864,7 @@ def private midi_thread_main(sid : uint64; var cmd_stream : Stream?; var done_st
                     looping = tc.looping
                 ))
             } elif (cmd is remove_track) {
-                let name = string(cmd as remove_track)
+                let name = cmd as remove_track
                 var k = length(tracks) - 1
                 while (k >= 0) {
                     if (tracks[k].name == name) {
@@ -1024,7 +1026,7 @@ def public midi_init() {
         g_midi_sid = play_sound_from_pcm_stream(MA_SAMPLE_RATE, MA_CHANNELS)
     }
     g_midi_cmd_stream = unsafe(stream_create())
-    g_midi_done_status = unsafe(job_status_create())
+    g_midi_done_status = job_status_create()
     g_midi_done_status |> append(1)                   // expect one notification on worker exit
     // @capture auto-bumps refcount for Stream and JobStatus — no manual add_ref needed
     let sid = g_midi_sid
diff --git a/modules/dasAudio/strudel/strudel_player.das b/modules/dasAudio/strudel/strudel_player.das
index 37431d8a10..4e3aa023ed 100644
--- a/modules/dasAudio/strudel/strudel_player.das
+++ b/modules/dasAudio/strudel/strudel_player.das
@@ -1,5 +1,7 @@
 options gen2
 options indenting = 4
+options persistent_heap
+options gc
 options no_unused_block_arguments = false
 options no_unused_function_arguments = false
 
@@ -355,7 +357,7 @@ def private strudel_process_commands(cmd_fn : function<(cmd : string) : void>) :
         } elif (cmd is set_cps) {
             g_cps = cmd as set_cps
         } elif (cmd is user_cmd) {
-            let ucmd = string(cmd as user_cmd)
+            let ucmd = cmd as user_cmd
             invoke(cmd_fn, ucmd)
         }
     }
@@ -368,10 +370,10 @@ def public strudel_create_channel() {
     //! Create the PCM stream for main-thread playback. Call once after audio_system_create() and before strudel_tick.
     if (g_sid == 0ul) {
         g_sid = play_sound_from_pcm_stream(MA_SAMPLE_RATE, MA_CHANNELS)
-        g_tick_status_box = unsafe(lock_box_create())
+        g_tick_status_box = lock_box_create()
         g_tick_status_box |> add_ref()
         set_status_update(g_sid, g_tick_status_box)
-        g_tick_pcm_box = unsafe(lock_box_create())
+        g_tick_pcm_box = lock_box_create()
         // memory tracking baseline
         g_mem_heap_baseline = heap_bytes_allocated()
         g_mem_str_baseline = string_heap_bytes_allocated()
@@ -396,7 +398,7 @@ def public strudel_tick() {
     }
     let t0 = ref_time_ticks()
     let CHUNK_SEC = g_chunk_seconds
-    let chunkSamples = int(float(CHUNK_SEC) * float(SAMPLE_RATE)) * 2
+    let chunkSamples = int(CHUNK_SEC * float(SAMPLE_RATE)) * 2
     // prepare master output
     g_master_pcm |> resize(chunkSamples)
     for (s in g_master_pcm) {
@@ -414,7 +416,7 @@ def public strudel_tick() {
             track.sched.lastQueryEnd = g_wall_time * g_cps
             track.sched.cps = g_cps
         }
-        tick(track.sched, track.pat, g_bank, g_wall_time, float(CHUNK_SEC), g_look_ahead)
+        tick(track.sched, track.pat, g_bank, g_wall_time, CHUNK_SEC, g_look_ahead)
         // mix scheduler output into master with track gain
         if (!empty(track.sched.output) && track.gain > 0.001) {
             // copy to per-track PCM for visualizer access
@@ -435,9 +437,9 @@ def public strudel_tick() {
         // update fade envelope
         if (track.fade_speed > 0.0) {
             if (track.gain < track.target_gain) {
-                track.gain = min(track.gain + track.fade_speed * float(CHUNK_SEC), track.target_gain)
+                track.gain = min(track.gain + track.fade_speed * CHUNK_SEC, track.target_gain)
             } elif (track.gain > track.target_gain) {
-                track.gain = max(track.gain - track.fade_speed * float(CHUNK_SEC), track.target_gain)
+                track.gain = max(track.gain - track.fade_speed * CHUNK_SEC, track.target_gain)
             }
             if (abs(track.gain - track.target_gain) < 0.001) {
                 track.gain = track.target_gain
@@ -455,7 +457,7 @@ def public strudel_tick() {
     if (!empty(g_master_pcm)) {
         append_box_to_pcm(g_sid, g_tick_pcm_box, g_master_pcm)
     }
-    let chunkFrames = int(float(CHUNK_SEC) * float(SAMPLE_RATE))
+    let chunkFrames = int(CHUNK_SEC * float(SAMPLE_RATE))
     g_wall_samples += int64(chunkFrames)
     g_wall_time = double(g_wall_samples) / double(SAMPLE_RATE)
     ma_volume_mixer_uninit(unsafe(addr(mixer)))
@@ -481,7 +483,7 @@ def public strudel_tick_offline() {
     //! Renders into g_master_pcm and advances g_wall_time; call in a tight loop for offline WAV rendering.
     let t0 = ref_time_ticks()
     let CHUNK_SEC = g_chunk_seconds
-    let chunkSamples = int(float(CHUNK_SEC) * float(SAMPLE_RATE)) * 2
+    let chunkSamples = int(CHUNK_SEC * float(SAMPLE_RATE)) * 2
     // prepare master output
     g_master_pcm |> resize(chunkSamples)
     for (s in g_master_pcm) {
@@ -499,7 +501,7 @@ def public strudel_tick_offline() {
             track.sched.lastQueryEnd = g_wall_time * g_cps
             track.sched.cps = g_cps
         }
-        tick(track.sched, track.pat, g_bank, g_wall_time, float(CHUNK_SEC), g_look_ahead)
+        tick(track.sched, track.pat, g_bank, g_wall_time, CHUNK_SEC, g_look_ahead)
         // mix scheduler output into master with track gain
         if (!empty(track.sched.output) && track.gain > 0.001) {
             // copy to per-track PCM for visualizer access
@@ -520,9 +522,9 @@ def public strudel_tick_offline() {
         // update fade envelope
         if (track.fade_speed > 0.0) {
             if (track.gain < track.target_gain) {
-                track.gain = min(track.gain + track.fade_speed * float(CHUNK_SEC), track.target_gain)
+                track.gain = min(track.gain + track.fade_speed * CHUNK_SEC, track.target_gain)
             } elif (track.gain > track.target_gain) {
-                track.gain = max(track.gain - track.fade_speed * float(CHUNK_SEC), track.target_gain)
+                track.gain = max(track.gain - track.fade_speed * CHUNK_SEC, track.target_gain)
             }
             if (abs(track.gain - track.target_gain) < 0.001) {
                 track.gain = track.target_gain
@@ -536,7 +538,7 @@ def public strudel_tick_offline() {
             g_tracks[i] = null
         }
     }
-    let chunkFrames = int(float(CHUNK_SEC) * float(SAMPLE_RATE))
+    let chunkFrames = int(CHUNK_SEC * float(SAMPLE_RATE))
     g_wall_samples += int64(chunkFrames)
     g_wall_time = double(g_wall_samples) / double(SAMPLE_RATE)
     ma_volume_mixer_uninit(unsafe(addr(mixer)))
@@ -568,12 +570,12 @@ def public strudel_play(cmd_fn : function<(cmd : string) : void> = @@strudel_noo
     var total_time = 0.lf
     var total_samples = 0ul
     var max_chunk_usec = 0.0lf
-    var status_box <- unsafe(lock_box_create())
+    var status_box <- lock_box_create()
     set_status_update(g_sid, status_box)
-    var pcm_box <- unsafe(lock_box_create())
+    var pcm_box <- lock_box_create()
     var mixer : ma_volume_mixer
     ma_volume_mixer_init(unsafe(addr(mixer)), 2u)
-    let chunkSamples = int(float(CHUNK_SEC) * float(SAMPLE_RATE)) * 2
+    let chunkSamples = int(CHUNK_SEC * float(SAMPLE_RATE)) * 2
     var output : array<float>
     output |> resize(chunkSamples)
     // memory tracking
@@ -603,7 +605,7 @@ def public strudel_play(cmd_fn : function<(cmd : string) : void> = @@strudel_noo
                 track.sched.lastQueryEnd = g_wall_time * g_cps
                 track.sched.cps = g_cps
             }
-            tick(track.sched, track.pat, *g_bank_ptr, g_wall_time, float(CHUNK_SEC), g_look_ahead)
+            tick(track.sched, track.pat, *g_bank_ptr, g_wall_time, CHUNK_SEC, g_look_ahead)
             // mix into output with track gain
             if (!empty(track.sched.output) && track.gain > 0.001) {
                 ma_volume_mixer_set_volume(unsafe(addr(mixer)), track.gain)
@@ -620,9 +622,9 @@ def public strudel_play(cmd_fn : function<(cmd : string) : void> = @@strudel_noo
             // update fade envelope
             if (track.fade_speed > 0.0) {
                 if (track.gain < track.target_gain) {
-                    track.gain = min(track.gain + track.fade_speed * float(CHUNK_SEC), track.target_gain)
+                    track.gain = min(track.gain + track.fade_speed * CHUNK_SEC, track.target_gain)
                 } elif (track.gain > track.target_gain) {
-                    track.gain = max(track.gain - track.fade_speed * float(CHUNK_SEC), track.target_gain)
+                    track.gain = max(track.gain - track.fade_speed * CHUNK_SEC, track.target_gain)
                 }
                 if (abs(track.gain - track.target_gain) < 0.001) {
                     track.gain = track.target_gain
@@ -721,12 +723,12 @@ def public strudel_init(fn : function<() : void>; cmd_fn : function<(cmd : strin
     g_cmd_fn = cmd_fn
     // create playback time lockbox
     if (g_playback_box == null) {
-        g_playback_box = unsafe(lock_box_create())
+        g_playback_box = lock_box_create()
     }
     // create command stream and done status (thread-exit signal, wait group of 1).
     // @capture auto-bumps refcount for Stream and JobStatus — no manual add_ref needed.
     g_cmd_stream = unsafe(stream_create())
-    g_done_status = unsafe(job_status_create())
+    g_done_status = job_status_create()
     g_done_status |> append(1)
     // create PCM stream on main thread (audio globals are initialized here)
     let thread_sid = play_sound_from_pcm_stream(MA_SAMPLE_RATE, MA_CHANNELS)
diff --git a/tests/aot/CMakeLists.txt b/tests/aot/CMakeLists.txt
index acd517b48a..61a542f1b7 100644
--- a/tests/aot/CMakeLists.txt
+++ b/tests/aot/CMakeLists.txt
@@ -197,6 +197,7 @@ IF(NOT DAS_AUDIO_DISABLED)
         tests/strudel/test_signals.das
         tests/strudel/test_synthesis.das
         tests/strudel/test_vowel.das
+        tests/audio/test_hrtf_budget.das
     )
 ENDIF()
 
@@ -290,6 +291,11 @@ FILE(GLOB AOT_MACRO_BOOST_FILES RELATIVE ${PROJECT_SOURCE_DIR} CONFIGURE_DEPENDS
 # by the actual test file.
 list(FILTER AOT_MACRO_BOOST_FILES EXCLUDE REGEX "/_")
 
+# AOT for with_boost test files
+FILE(GLOB AOT_WITH_BOOST_FILES RELATIVE ${PROJECT_SOURCE_DIR} CONFIGURE_DEPENDS "tests/with_boost/*.das")
+# Exclude failed_* expected-failure compile tests — they're not AOT-able.
+list(FILTER AOT_WITH_BOOST_FILES EXCLUDE REGEX "/failed_")
+
 # Macro_boost test module files (probe call_macro required transitively by tests)
 SET(AOT_MACRO_BOOST_MODULE_FILES
     tests/macro_boost/_has_sideeffects_probe.das
@@ -591,6 +597,10 @@ add_custom_target(test_aot_macro_boost)
 SET(MACRO_BOOST_AOT_GENERATED_SRC)
 DAS_AOT("${AOT_MACRO_BOOST_FILES}" MACRO_BOOST_AOT_GENERATED_SRC test_aot_macro_boost daslang)
 
+add_custom_target(test_aot_with_boost)
+SET(WITH_BOOST_AOT_GENERATED_SRC)
+DAS_AOT("${AOT_WITH_BOOST_FILES}" WITH_BOOST_AOT_GENERATED_SRC test_aot_with_boost daslang)
+
 add_custom_target(test_aot_macro_boost_modules)
 SET(MACRO_BOOST_MODULES_AOT_GENERATED_SRC)
 DAS_AOT_LIB("${AOT_MACRO_BOOST_MODULE_FILES}" MACRO_BOOST_MODULES_AOT_GENERATED_SRC test_aot_macro_boost_modules daslang)
@@ -735,6 +745,7 @@ SOURCE_GROUP_FILES("aot generated" LINQ_MODULES_AOT_GENERATED_SRC)
 SOURCE_GROUP_FILES("aot generated" MACRO_CALL_AOT_GENERATED_SRC)
 SOURCE_GROUP_FILES("aot generated" MACRO_BOOST_AOT_GENERATED_SRC)
 SOURCE_GROUP_FILES("aot generated" MACRO_BOOST_MODULES_AOT_GENERATED_SRC)
+SOURCE_GROUP_FILES("aot generated" WITH_BOOST_AOT_GENERATED_SRC)
 SOURCE_GROUP_FILES("aot generated" MATCH_AOT_GENERATED_SRC)
 SOURCE_GROUP_FILES("aot generated" MATH_AOT_GENERATED_SRC)
 SOURCE_GROUP_FILES("aot generated" MATH_MODULES_AOT_GENERATED_SRC)
@@ -806,6 +817,7 @@ add_executable(test_aot ${DAS_DASCRIPT_MAIN_SRC}
     ${MACRO_CALL_AOT_GENERATED_SRC}
     ${MACRO_BOOST_AOT_GENERATED_SRC}
     ${MACRO_BOOST_MODULES_AOT_GENERATED_SRC}
+    ${WITH_BOOST_AOT_GENERATED_SRC}
     ${MATCH_AOT_GENERATED_SRC}
     ${MATH_AOT_GENERATED_SRC}
     ${MATH_MODULES_AOT_GENERATED_SRC}
@@ -859,6 +871,7 @@ ADD_DEPENDENCIES(test_aot libDaScriptAot
     test_aot_linq test_aot_linq_modules
     test_aot_macro_call
     test_aot_macro_boost test_aot_macro_boost_modules
+    test_aot_with_boost
     test_aot_match
     test_aot_math test_aot_math_modules test_aot_module_tests
     test_aot_option
diff --git a/tests/audio/test_hrtf_budget.das b/tests/audio/test_hrtf_budget.das
new file mode 100644
index 0000000000..ae368c8b45
--- /dev/null
+++ b/tests/audio/test_hrtf_budget.das
@@ -0,0 +1,72 @@
+options gen2
+options indenting = 4
+
+require dastest/testing_boost
+require audio/audio_boost
+
+// Unit tests for the HRTF/simulated-3D budget classifier. The classifier is the pure rank-vs-budget
+// decision update_hrtf consults each frame to decide whether a channel runs HRTF (expensive, top-N
+// closest) or simulated 3D (constant-power pan + attenuation, cheaper). Sticky margin in rank space
+// prevents flapping when channels swap rank between frames, but must NOT preserve HRTF status when
+// the budget is 0 (otherwise "all simulated" doesn't clear in-flight HRTF channels).
+
+[test]
+def test_full_hrtf_budget_keeps_everything_hrtf(t : T?) {
+    t |> run("budget=999: every rank routed to HRTF regardless of prior state") @(t : T?) {
+        t |> success(hrtf_budget_classify(0, 999, true),  "rank 0, wasHrtf -> HRTF")
+        t |> success(hrtf_budget_classify(0, 999, false), "rank 0, !wasHrtf -> HRTF (flip in)")
+        t |> success(hrtf_budget_classify(31, 999, true), "rank 31, wasHrtf -> HRTF")
+        t |> success(hrtf_budget_classify(998, 999, false), "rank below budget flips in")
+        t |> success(!hrtf_budget_classify(999, 999, false), "rank == budget for !wasHrtf is NOT under-budget")
+    }
+}
+
+[test]
+def test_zero_budget_clears_all_hrtf(t : T?) {
+    t |> run("budget=0: sticky margin must NOT preserve HRTF — all channels go simulated") @(t : T?) {
+        // This is the regression case for the sticky-margin bug: stickyTop is clamped to 0 when budget=0,
+        // so previously-HRTF channels don't get to keep HRTF status.
+        t |> success(!hrtf_budget_classify(0, 0, true), "rank 0, wasHrtf -> SIM (clears in-flight)")
+        t |> success(!hrtf_budget_classify(0, 0, false), "rank 0, !wasHrtf -> SIM")
+        t |> success(!hrtf_budget_classify(5, 0, true), "rank 5, wasHrtf -> SIM")
+        t |> success(!hrtf_budget_classify(99, 0, false), "rank 99, !wasHrtf -> SIM")
+    }
+}
+
+[test]
+def test_mixed_budget_routes_top_n(t : T?) {
+    t |> run("budget=32: top-32 ranks HRTF, rest simulated (no sticky effect for fresh entries)") @(t : T?) {
+        t |> success(hrtf_budget_classify(0, 32, false), "rank 0 below budget -> HRTF (flip in)")
+        t |> success(hrtf_budget_classify(31, 32, false), "rank 31 below budget -> HRTF")
+        t |> success(!hrtf_budget_classify(32, 32, false), "rank 32 == budget for !wasHrtf -> SIM")
+        t |> success(!hrtf_budget_classify(100, 32, false), "rank 100 -> SIM")
+    }
+}
+
+[test]
+def test_sticky_margin_in_rank_space(t : T?) {
+    t |> run("HRTF channels keep HRTF status across the budget+10% margin to avoid rank-swap flapping") @(t : T?) {
+        // For budget=32: stickyTop = 32 + max(2, 32/10) = 32 + 3 = 35
+        t |> success(hrtf_budget_classify(32, 32, true), "rank 32, wasHrtf -> stays HRTF (sticky)")
+        t |> success(hrtf_budget_classify(34, 32, true), "rank 34, wasHrtf -> stays HRTF (sticky)")
+        t |> success(!hrtf_budget_classify(35, 32, true), "rank 35, wasHrtf -> SIM (past stickyTop)")
+        t |> success(!hrtf_budget_classify(36, 32, true), "rank 36, wasHrtf -> SIM")
+        // Note: a channel at rank 33 that was simulated must NOT flip to HRTF — the budget is 32.
+        t |> success(!hrtf_budget_classify(33, 32, false), "rank 33, !wasHrtf -> SIM (budget unchanged)")
+    }
+}
+
+[test]
+def test_sticky_margin_floor_at_small_budgets(t : T?) {
+    t |> run("small budgets enforce a minimum margin of 2 so single-channel swaps don't immediately flip") @(t : T?) {
+        // For budget=10: stickyTop = 10 + max(2, 1) = 10 + 2 = 12
+        t |> success(hrtf_budget_classify(10, 10, true), "rank 10, wasHrtf -> stays HRTF")
+        t |> success(hrtf_budget_classify(11, 10, true), "rank 11, wasHrtf -> stays HRTF")
+        t |> success(!hrtf_budget_classify(12, 10, true), "rank 12 -> SIM (past min-margin top)")
+
+        // For budget=1: stickyTop = 1 + max(2, 0) = 3
+        t |> success(hrtf_budget_classify(0, 1, true), "budget=1: rank 0 stays HRTF")
+        t |> success(hrtf_budget_classify(2, 1, true), "budget=1: rank 2 still sticky")
+        t |> success(!hrtf_budget_classify(3, 1, true), "budget=1: rank 3 falls off")
+    }
+}
diff --git a/tests/linq/test_linq_fold_collapse_chained_wheres.das b/tests/linq/test_linq_fold_collapse_chained_wheres.das
new file mode 100644
index 0000000000..9c17c22772
--- /dev/null
+++ b/tests/linq/test_linq_fold_collapse_chained_wheres.das
@@ -0,0 +1,147 @@
+options gen2
+
+require daslib/linq
+require daslib/linq_boost
+require daslib/linq_fold
+require dastest/testing_boost public
+
+// KR-1 regression coverage — PR B collapse_chained_wheres pre-pass.
+//
+// Before PR B, chains like `..._where(p1)._where(p2).reverse()...` no longer spliced through
+// `plan_reverse` / `plan_distinct` (single optional `where_` slot in the pattern row vs N-fold
+// accept-and-merge in master's imperative code). Cascade still worked — correctness was OK,
+// but a perf regression on uncommon chains. The decs mirror `plan_decs_reverse` always
+// composed via `merge_where_cond` in a loop, so this is a parity restoration.
+//
+// These tests verify the COMPOSED predicate produces the same result as the manual
+// `_where(p1 && p2)` form. Splice-firing itself isn't asserted from runtime (would need AST
+// inspection); we rely on the per-archetype test files to exercise the emit paths and these
+// tests to assert composition correctness end-to-end.
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 1. plan_reverse surface
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_chained_wheres_reverse_n2(t : T?) {
+    t |> run("chained wheres N=2 + reverse + to_array: composed pred matches manual && form") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        let split <- _fold(each(arr)._where(_ > 2)._where(_ < 8).reverse() |> to_array())
+        let combined <- _fold(each(arr)._where(_ > 2 && _ < 8).reverse() |> to_array())
+        tt |> equal(length(split), length(combined))
+        tt |> equal(length(split), 5)        // 3,4,5,6,7 reversed
+        for (i in 0 .. length(split)) {
+            tt |> equal(split[i], combined[i])
+        }
+        tt |> equal(split[0], 7)
+        tt |> equal(split[4], 3)
+    }
+}
+
+[test]
+def test_chained_wheres_reverse_n3(t : T?) {
+    t |> run("chained wheres N=3 + reverse + to_array: all three compose into single && chain") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        let split <- _fold(each(arr)._where(_ > 1)._where(_ < 9)._where(_ % 2 == 0).reverse() |> to_array())
+        let combined <- _fold(each(arr)._where(_ > 1 && _ < 9 && _ % 2 == 0).reverse() |> to_array())
+        // _ > 1 && _ < 9 && _ % 2 == 0 → [2, 4, 6, 8]; reversed → [8, 6, 4, 2]
+        tt |> equal(length(split), length(combined))
+        tt |> equal(length(split), 4)
+        for (i in 0 .. length(split)) {
+            tt |> equal(split[i], combined[i])
+        }
+        tt |> equal(split[0], 8)
+        tt |> equal(split[3], 2)
+    }
+}
+
+[test]
+def test_chained_wheres_reverse_first(t : T?) {
+    t |> run("chained wheres + reverse + first: scalar terminator splice (Rb archetype) sees composed pred") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        let v = _fold(each(arr)._where(_ > 3)._where(_ < 9).reverse().first())
+        // _ > 3 && _ < 9 → [4,5,6,7,8]; reversed: [8,7,6,5,4]; first = 8
+        tt |> equal(v, 8)
+    }
+}
+
+[test]
+def test_chained_wheres_reverse_count(t : T?) {
+    t |> run("chained wheres + reverse + count: counter terminator (Ra archetype) — reverse is identity for count") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        let cnt = _fold(each(arr)._where(_ > 2)._where(_ < 8).reverse().count())
+        tt |> equal(cnt, 5)
+    }
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 2. plan_distinct surface
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_chained_wheres_distinct_n2(t : T?) {
+    t |> run("chained wheres N=2 + distinct + to_array: composed pred matches manual && form") @(tt : T?) {
+        let arr <- [1, 2, 1, 3, 2, 4, 3, 5, 4, 6]
+        let split <- _fold(each(arr)._where(_ > 1)._where(_ < 6) |> distinct() |> to_array())
+        let combined <- _fold(each(arr)._where(_ > 1 && _ < 6) |> distinct() |> to_array())
+        tt |> equal(length(split), length(combined))
+        tt |> equal(length(split), 4)        // 2,3,4,5 first-occurrence
+        for (i in 0 .. length(split)) {
+            tt |> equal(split[i], combined[i])
+        }
+    }
+}
+
+[test]
+def test_chained_wheres_distinct_n3(t : T?) {
+    t |> run("chained wheres N=3 + distinct + count: all three compose; count fast-path") @(tt : T?) {
+        let arr <- [1, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8]
+        let cnt = _fold(each(arr)._where(_ > 1)._where(_ < 8)._where(_ != 4) |> distinct() |> count())
+        // _ > 1 && _ < 8 && _ != 4 → 2,3,5,6,7 distinct → 5
+        tt |> equal(cnt, 5)
+    }
+}
+
+[test]
+def test_chained_wheres_distinct_take(t : T?) {
+    t |> run("chained wheres + distinct + take(N): bounded splice path with composed pred") @(tt : T?) {
+        let arr <- [1, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8]
+        let buf <- _fold(each(arr)._where(_ > 1)._where(_ < 8) |> distinct() |> take(3) |> to_array())
+        // _ > 1 && _ < 8 distinct first-N=3 → [2,3,4]
+        tt |> equal(length(buf), 3)
+        tt |> equal(buf[0], 2)
+        tt |> equal(buf[1], 3)
+        tt |> equal(buf[2], 4)
+    }
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 3. Edge cases: no collapse when call between, single where unaffected
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_single_where_unchanged(t : T?) {
+    t |> run("single where unchanged: collapse pre-pass is a no-op on chains without adjacent wheres") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let buf <- _fold(each(arr)._where(_ > 2).reverse() |> to_array())
+        tt |> equal(length(buf), 3)
+        tt |> equal(buf[0], 5)
+        tt |> equal(buf[2], 3)
+    }
+}
+
+[test]
+def test_wheres_split_by_select_no_collapse(t : T?) {
+    t |> run("wheres with select between: NOT collapsed (collapse only fires on ADJACENT wheres)") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        // After collapse_chained_selects (no chained selects here) + collapse_chained_wheres
+        // (wheres not adjacent — select sits between): chain stays as [where, select, where, reverse, to_array].
+        // plan_reverse's pattern rows expect a single optional `where_` slot before `reverse` — this chain
+        // has a where AFTER select that the pattern can't match. Cascade fires; correctness preserved.
+        let buf <- _fold(each(arr)._where(_ > 2)._select(_ * 10)._where(_ < 80).reverse() |> to_array())
+        // _ > 2: 3..10 (×10: 30..100); _ < 80: 30..70; reversed: 70,60,50,40,30
+        tt |> equal(length(buf), 5)
+        tt |> equal(buf[0], 70)
+        tt |> equal(buf[4], 30)
+    }
+}
diff --git a/tests/linq/test_linq_fold_iterator_wrap.das b/tests/linq/test_linq_fold_iterator_wrap.das
new file mode 100644
index 0000000000..24f0167131
--- /dev/null
+++ b/tests/linq/test_linq_fold_iterator_wrap.das
@@ -0,0 +1,266 @@
+options gen2
+
+require daslib/linq
+require daslib/linq_boost
+require daslib/linq_fold
+require dastest/testing_boost public
+
+// PR A R3c — Source × terminator matrix for buffer-emitting splice emit fns.
+//
+// For each chain shape that goes through a buffer-emitting emit fn, the splice must produce a result
+// whose static type matches the un-spliced chain. The full matrix is:
+//
+//                     │ no terminator (chain ends bare)  │ explicit `.to_array()` terminator
+//   ──────────────────┼──────────────────────────────────┼──────────────────────────────────
+//   iter source       │ iterator<T>                       │ array<T>
+//   (`.to_sequence()`)│                                   │
+//   array source      │ iterator<T>                       │ array<T>
+//   (`each(arr)`)     │                                   │
+//
+// The `iter source → iterator<T>` and `array source → iterator<T>` rows are the regression coverage
+// for R1/R2/R3 — `buffer_return(name, ctx.expr_is_iterator)`. The two array<T> corners protect against
+// future over-correction (always wrapping with `to_sequence_move`). `typeinfo typename(got)` is the
+// load-bearing assertion. Array-bound tests use `let got` (immutable, no consuming for-loop) and the
+// assertion strings reflect the resulting `array<int> const` binding type.
+//
+// Shapes covered per matrix:
+//   1. `reverse()` only             → emit_reverse_buffer_inplace (catch-all R1-R4)
+//   2. `distinct()` / `distinct_by` → emit_hashtable_dedup
+//   3. `reverse() + distinct()`     → emit_reverse_backward_walk_dset_gate (array path, R-2a)
+//                                  /  emit_hashtable_dedup (iter path)
+//   4. `reverse() + take(N)`        → emit_reverse_backward_index_walk (array path, R6)
+//                                  /  emit_reverse_buffer_inplace (iter path)
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 1. reverse() — matrix
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_matrix_reverse_iter_to_iter(t : T?) {
+    t |> run("reverse(): iter src, no to_array → iterator<int>") @(tt : T?) {
+        var got <- _fold([1, 2, 3, 4, 5].to_sequence() |> reverse())
+        tt |> equal(typeinfo typename(got), "iterator<int>")
+        var total = 0
+        for (v in got) {
+            total += v
+        }
+        tt |> equal(total, 15)
+    }
+}
+
+[test]
+def test_matrix_reverse_iter_to_array(t : T?) {
+    t |> run("reverse(): iter src + to_array() → array<int> const") @(tt : T?) {
+        let got <- _fold([1, 2, 3, 4, 5].to_sequence() |> reverse() |> to_array())
+        tt |> equal(typeinfo typename(got), "array<int> const")
+        tt |> equal(length(got), 5)
+        tt |> equal(got[0], 5)
+        tt |> equal(got[4], 1)
+    }
+}
+
+[test]
+def test_matrix_reverse_array_to_iter(t : T?) {
+    t |> run("reverse(): array src, no to_array → iterator<int>") @(tt : T?) {
+        let arr = [1, 2, 3, 4, 5]
+        var got <- _fold(each(arr) |> reverse())
+        tt |> equal(typeinfo typename(got), "iterator<int>")
+        var total = 0
+        for (v in got) {
+            total += v
+        }
+        tt |> equal(total, 15)
+    }
+}
+
+[test]
+def test_matrix_reverse_array_to_array(t : T?) {
+    t |> run("reverse(): array src + to_array() → array<int> const") @(tt : T?) {
+        let arr = [1, 2, 3, 4, 5]
+        let got <- _fold(each(arr) |> reverse() |> to_array())
+        tt |> equal(typeinfo typename(got), "array<int> const")
+        tt |> equal(length(got), 5)
+        tt |> equal(got[0], 5)
+        tt |> equal(got[4], 1)
+    }
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 2. distinct() / distinct_by — matrix
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_matrix_distinct_iter_to_iter(t : T?) {
+    t |> run("distinct(): iter src, no to_array → iterator<int>") @(tt : T?) {
+        var got <- _fold([1, 2, 1, 3, 2, 3, 1].to_sequence() |> distinct())
+        tt |> equal(typeinfo typename(got), "iterator<int>")
+        var seen : array<int>
+        for (v in got) {
+            seen |> push(v)
+        }
+        tt |> equal(length(seen), 3)
+    }
+}
+
+[test]
+def test_matrix_distinct_iter_to_array(t : T?) {
+    t |> run("distinct(): iter src + to_array() → array<int> const") @(tt : T?) {
+        let got <- _fold([1, 2, 1, 3, 2, 3, 1].to_sequence() |> distinct() |> to_array())
+        tt |> equal(typeinfo typename(got), "array<int> const")
+        tt |> equal(length(got), 3)
+    }
+}
+
+[test]
+def test_matrix_distinct_array_to_iter(t : T?) {
+    t |> run("distinct(): array src, no to_array → iterator<int>") @(tt : T?) {
+        let arr = [1, 2, 1, 3, 2, 3, 1]
+        var got <- _fold(each(arr) |> distinct())
+        tt |> equal(typeinfo typename(got), "iterator<int>")
+        var seen : array<int>
+        for (v in got) {
+            seen |> push(v)
+        }
+        tt |> equal(length(seen), 3)
+    }
+}
+
+[test]
+def test_matrix_distinct_array_to_array(t : T?) {
+    t |> run("distinct(): array src + to_array() → array<int> const") @(tt : T?) {
+        let arr = [1, 2, 1, 3, 2, 3, 1]
+        let got <- _fold(each(arr) |> distinct() |> to_array())
+        tt |> equal(typeinfo typename(got), "array<int> const")
+        tt |> equal(length(got), 3)
+    }
+}
+
+[test]
+def test_matrix_distinct_by_iter_to_iter(t : T?) {
+    t |> run("distinct_by(_): iter src, no to_array → iterator<int>") @(tt : T?) {
+        var got <- _fold([10, 21, 32, 11, 23, 30, 13].to_sequence()._distinct_by(_ % 10))
+        tt |> equal(typeinfo typename(got), "iterator<int>")
+        var count = 0
+        for (_v in got) {
+            count ++
+        }
+        tt |> equal(count, 4)   // mod-10 keys: {0,1,2,3} from 10,21,32,_,23,_,_
+    }
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 3. reverse() + distinct() — matrix
+//   Array path goes through emit_reverse_backward_walk_dset_gate (R-2a, single backward walk).
+//   Iter path falls through to emit_hashtable_dedup with an upstream reverse iterator.
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_matrix_reverse_distinct_iter_to_iter(t : T?) {
+    t |> run("reverse() + distinct(): iter src, no to_array → iterator<int>") @(tt : T?) {
+        var got <- _fold([3, 1, 2, 1, 3, 2].to_sequence() |> reverse() |> distinct())
+        tt |> equal(typeinfo typename(got), "iterator<int>")
+        var collected : array<int>
+        for (v in got) {
+            collected |> push(v)
+        }
+        // reverse → [2,3,1,2,1,3]; distinct first-seen → [2,3,1]
+        tt |> equal(length(collected), 3)
+        tt |> equal(collected[0], 2)
+    }
+}
+
+[test]
+def test_matrix_reverse_distinct_iter_to_array(t : T?) {
+    t |> run("reverse() + distinct(): iter src + to_array() → array<int> const") @(tt : T?) {
+        let got <- _fold([3, 1, 2, 1, 3, 2].to_sequence() |> reverse() |> distinct() |> to_array())
+        tt |> equal(typeinfo typename(got), "array<int> const")
+        tt |> equal(length(got), 3)
+        tt |> equal(got[0], 2)
+    }
+}
+
+[test]
+def test_matrix_reverse_distinct_array_to_iter(t : T?) {
+    t |> run("reverse() + distinct(): array src, no to_array → iterator<int>") @(tt : T?) {
+        let arr = [3, 1, 2, 1, 3, 2]
+        var got <- _fold(each(arr) |> reverse() |> distinct())
+        tt |> equal(typeinfo typename(got), "iterator<int>")
+        var collected : array<int>
+        for (v in got) {
+            collected |> push(v)
+        }
+        tt |> equal(length(collected), 3)
+        tt |> equal(collected[0], 2)
+    }
+}
+
+[test]
+def test_matrix_reverse_distinct_array_to_array(t : T?) {
+    t |> run("reverse() + distinct(): array src + to_array() → array<int> const") @(tt : T?) {
+        let arr = [3, 1, 2, 1, 3, 2]
+        let got <- _fold(each(arr) |> reverse() |> distinct() |> to_array())
+        tt |> equal(typeinfo typename(got), "array<int> const")
+        tt |> equal(length(got), 3)
+        tt |> equal(got[0], 2)
+    }
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 4. reverse() + take(N) — matrix
+//   Array path goes through emit_reverse_backward_index_walk (R6, visits only last N indices).
+//   Iter path falls through to emit_reverse_buffer_inplace (full buffer + reverse_inplace + resize).
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_matrix_reverse_take_iter_to_iter(t : T?) {
+    t |> run("reverse() + take(N): iter src, no to_array → iterator<int>") @(tt : T?) {
+        var got <- _fold([10, 20, 30, 40, 50].to_sequence() |> reverse() |> take(2))
+        tt |> equal(typeinfo typename(got), "iterator<int>")
+        var collected : array<int>
+        for (v in got) {
+            collected |> push(v)
+        }
+        tt |> equal(length(collected), 2)
+        tt |> equal(collected[0], 50)
+        tt |> equal(collected[1], 40)
+    }
+}
+
+[test]
+def test_matrix_reverse_take_iter_to_array(t : T?) {
+    t |> run("reverse() + take(N): iter src + to_array() → array<int> const") @(tt : T?) {
+        let got <- _fold([10, 20, 30, 40, 50].to_sequence() |> reverse() |> take(2) |> to_array())
+        tt |> equal(typeinfo typename(got), "array<int> const")
+        tt |> equal(length(got), 2)
+        tt |> equal(got[0], 50)
+        tt |> equal(got[1], 40)
+    }
+}
+
+[test]
+def test_matrix_reverse_take_array_to_iter(t : T?) {
+    t |> run("reverse() + take(N): array src, no to_array → iterator<int>") @(tt : T?) {
+        let arr = [10, 20, 30, 40, 50]
+        var got <- _fold(each(arr) |> reverse() |> take(2))
+        tt |> equal(typeinfo typename(got), "iterator<int>")
+        var collected : array<int>
+        for (v in got) {
+            collected |> push(v)
+        }
+        tt |> equal(length(collected), 2)
+        tt |> equal(collected[0], 50)
+        tt |> equal(collected[1], 40)
+    }
+}
+
+[test]
+def test_matrix_reverse_take_array_to_array(t : T?) {
+    t |> run("reverse() + take(N): array src + to_array() → array<int> const") @(tt : T?) {
+        let arr = [10, 20, 30, 40, 50]
+        let got <- _fold(each(arr) |> reverse() |> take(2) |> to_array())
+        tt |> equal(typeinfo typename(got), "array<int> const")
+        tt |> equal(length(got), 2)
+        tt |> equal(got[0], 50)
+        tt |> equal(got[1], 40)
+    }
+}
diff --git a/tests/linq/test_linq_fold_loop_or_count.das b/tests/linq/test_linq_fold_loop_or_count.das
new file mode 100644
index 0000000000..9b388d8c86
--- /dev/null
+++ b/tests/linq/test_linq_fold_loop_or_count.das
@@ -0,0 +1,146 @@
+options gen2
+
+require daslib/linq
+require daslib/linq_boost
+require daslib/linq_fold
+require dastest/testing_boost public
+
+// PR B1 regression coverage — plan_loop_or_count migrated to pattern-table.
+//
+// The new architecture: `slot_chain_of(["where_", "select"], "head")` greedy-consumes the
+// pre-range head; canonical-order positional slots (skip / skip_while / take_while / take /
+// post_take_where / term) carry the rest. The emit fn (emit_loop_or_count_lane) walks
+// c.many["head"] applying the same where-after-select / chained-select / AND-merge logic the
+// imperative loop did, then dispatches to the same lane emit fns.
+//
+// These tests assert end-to-end correctness across the lane × head-shape matrix.
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 1. Canonical chains — one lane per terminator family
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_counter_lane(t : T?) {
+    t |> run("counter lane: where + count") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        let n = _fold(each(arr)._where(_ > 3).count())
+        tt |> equal(n, 7)
+    }
+}
+
+[test]
+def test_array_lane(t : T?) {
+    t |> run("array lane: select + implicit to_array") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let buf <- _fold(each(arr)._select(_ * 10) |> to_array())
+        tt |> equal(length(buf), 5)
+        tt |> equal(buf[0], 10)
+        tt |> equal(buf[4], 50)
+    }
+}
+
+[test]
+def test_accumulator_lane(t : T?) {
+    t |> run("accumulator lane: where + select + sum") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        let s = _fold(each(arr)._where(_ > 2)._select(_ * 2).sum())
+        // 3+4+5+6+7+8+9+10 = 52; ×2 = 104
+        tt |> equal(s, 104)
+    }
+}
+
+[test]
+def test_early_exit_lane(t : T?) {
+    t |> run("early-exit lane: where + any") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let yes = _fold(each(arr)._where(_ > 3).any())
+        let no = _fold(each(arr)._where(_ > 100).any())
+        tt |> equal(yes, true)
+        tt |> equal(no, false)
+    }
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 2. Where-after-select rebinding (single & multiple)
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_where_after_select(t : T?) {
+    t |> run("where(p2) after select(f) sees PROJECTED value, not raw source") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        // _ > 1 keeps [2,3,4,5]; project ×10 → [20,30,40,50]; then keep _%20==0 → [20,40]; count=2.
+        let n = _fold(each(arr)._where(_ > 1)._select(_ * 10)._where(_ % 20 == 0).count())
+        tt |> equal(n, 2)
+    }
+}
+
+[test]
+def test_multiple_wheres_post_select(t : T?) {
+    t |> run("two wheres after a select: collapse_chained_wheres composes them") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        // ×2 → [2..20], _>5 keeps [6,8,10,12,14,16,18,20], _%4==0 keeps [8,12,16,20] = 4.
+        let n = _fold(each(arr)._select(_ * 2)._where(_ > 5)._where(_ % 4 == 0).count())
+        tt |> equal(n, 4)
+    }
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 3. Range op chains (skip / take / take_while)
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_skip_take_count(t : T?) {
+    t |> run("where + skip(N) + take(M) + count") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        // _ > 0 keeps all; skip 3 → [4..10]; take 4 → [4,5,6,7]; count=4.
+        let n = _fold(each(arr)._where(_ > 0).skip(3).take(4).count())
+        tt |> equal(n, 4)
+    }
+}
+
+[test]
+def test_take_while_sum(t : T?) {
+    t |> run("take_while + sum: streams until predicate fails") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6]
+        // _take_while(_<4): [1,2,3]; sum=6.
+        let s = _fold(each(arr)._take_while(_ < 4).sum())
+        tt |> equal(s, 6)
+    }
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 4. Post-take where (Theme 2 5c — gates contribution; take cap still ticks)
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_post_take_where(t : T?) {
+    t |> run("take(N)._where(p): take fires unconditionally, where gates per-element acc") @(tt : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        // take(5) → [1..5]; _%2==0 keeps [2,4]; count=2.
+        let n = _fold(each(arr).take(5)._where(_ % 2 == 0).count())
+        tt |> equal(n, 2)
+    }
+}
+
+// ═════════════════════════════════════════════════════════════════════════════
+// 5. Fast paths — length shortcut + any-empty shortcut on length-bearing source
+// ═════════════════════════════════════════════════════════════════════════════
+
+[test]
+def test_length_shortcut(t : T?) {
+    t |> run("count() on bare length-bearing source: emit_length_shortcut") @(tt : T?) {
+        let arr <- [10, 20, 30, 40, 50]
+        let n = _fold(each(arr).count())
+        tt |> equal(n, 5)
+    }
+}
+
+[test]
+def test_any_empty_shortcut(t : T?) {
+    t |> run("any() no-pred on bare length-bearing source: emit_any_empty_shortcut") @(tt : T?) {
+        let arr <- [1, 2, 3]
+        var empty_arr : array<int>
+        tt |> equal(_fold(each(arr).any()), true)
+        tt |> equal(_fold(each(empty_arr).any()), false)
+    }
+}
diff --git a/tests/linq/test_linq_fold_pattern_walker.das b/tests/linq/test_linq_fold_pattern_walker.das
new file mode 100644
index 0000000000..99cfa03772
--- /dev/null
+++ b/tests/linq/test_linq_fold_pattern_walker.das
@@ -0,0 +1,170 @@
+options gen2
+
+require daslib/ast_boost
+require daslib/linq_fold
+require dastest/testing_boost public
+
+// PR A — pattern-table refactor: walker + prefix-conflict lint tests.
+//
+// The per-plan pattern tables (`plan_reverse_patterns`, `plan_distinct_patterns`) are populated by
+// `[_macro]` functions at MACRO time, not at runtime — the function pointers in the rows reference
+// `[macro_function]` emit fns whose bodies the LLVM JIT can't lower (quote() nodes). So at runtime
+// these tables read as empty; the lint helpers below are exercised on synthetic tables instead.
+// End-to-end pattern selection is covered by the existing test_linq_fold_*.das suite (each user
+// chain exercises the emit fns through `_fold`).
+
+[test]
+def test_alias_table_resolves(t : T?) {
+    t |> run("alias_table: PR A aliases populated (runtime-initialized via literal)") @(tt : T?) {
+        tt |> equal(alias_table |> key_exists("distinct_family"), true)
+        tt |> equal(alias_table |> key_exists("first_family"), true)
+        tt |> equal(alias_table |> key_exists("count_family"), true)
+        tt |> equal(alias_table |> key_exists("distinct_terminator_family"), true)
+        tt |> equal(length(alias_table["distinct_family"]), 2)
+        tt |> equal(length(alias_table["first_family"]), 2)
+        tt |> equal(length(alias_table["distinct_terminator_family"]), 3)   // count / long_count / sum — see linq_fold.das alias_table
+    }
+}
+
+// ─── Prefix-conflict helper: positive case (synthetic patterns we KNOW shadow) ───
+
+[test]
+def test_chain_prefix_of_positive(t : T?) {
+    t |> run("chain_prefix_of: A is a strict prefix of B when A's slots structurally match B's first N") @(tt : T?) {
+        let prefix : array<Slot> <- [
+            Slot(matcher = m_literal("reverse"), cardinality = c_one())
+        ]
+        let longer : array<Slot> <- [
+            Slot(matcher = m_literal("reverse"), cardinality = c_one()),
+            Slot(matcher = m_literal("take"), cardinality = c_one(), capture_name = "take")
+        ]
+        tt |> equal(chain_prefix_of(prefix, longer), true)
+        tt |> equal(chain_prefix_of(longer, prefix), false)  // longer is not a prefix of shorter
+    }
+}
+
+[test]
+def test_chain_prefix_of_negative(t : T?) {
+    t |> run("chain_prefix_of: different matchers at any position break the prefix") @(tt : T?) {
+        let a : array<Slot> <- [
+            Slot(matcher = m_literal("reverse"), cardinality = c_one()),
+            Slot(matcher = m_literal("count"), cardinality = c_one())
+        ]
+        let b : array<Slot> <- [
+            Slot(matcher = m_literal("reverse"), cardinality = c_one()),
+            Slot(matcher = m_literal("take"), cardinality = c_one()),
+            Slot(matcher = m_literal("first"), cardinality = c_one())
+        ]
+        tt |> equal(chain_prefix_of(a, b), false)
+    }
+}
+
+[test]
+def test_chain_prefix_of_alias_equality(t : T?) {
+    t |> run("chain_prefix_of: alias matchers compare by alias name") @(tt : T?) {
+        let a : array<Slot> <- [
+            Slot(matcher = m_alias("distinct_family"), cardinality = c_one())
+        ]
+        let b : array<Slot> <- [
+            Slot(matcher = m_alias("distinct_family"), cardinality = c_one()),
+            Slot(matcher = m_literal("take"), cardinality = c_opt())
+        ]
+        let c : array<Slot> <- [
+            Slot(matcher = m_alias("first_family"), cardinality = c_one()),
+            Slot(matcher = m_literal("take"), cardinality = c_opt())
+        ]
+        tt |> equal(chain_prefix_of(a, b), true)
+        tt |> equal(chain_prefix_of(a, c), false)
+    }
+}
+
+def null_emit(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? {
+    return null
+}
+
+// ─── c_chain cardinality (PR B) — constructor + lint-helper structural tests ───
+// Semantic coverage of c_chain (empty / all-match / prefix-then-stop / arity gate) is end-to-end
+// via tests/linq/test_linq_fold_loop_or_count.das — the walker stays private and is exercised
+// through plan_loop_or_count's pattern table just like plan_reverse / plan_distinct in PR A.
+
+[test]
+def test_slot_chain_of_constructs_expected_shape(t : T?) {
+    t |> run("slot_chain_of: produces c_chain cardinality + one_of matcher + capture_name") @(tt : T?) {
+        let s = slot_chain_of(["where_", "select"], "head")
+        tt |> equal(s.cardinality is chain, true)
+        tt |> equal(s.cardinality is one, false)
+        tt |> equal(s.cardinality is optional, false)
+        tt |> equal(s.matcher is one_of, true)
+        let names & = unsafe(s.matcher as one_of)
+        tt |> equal(length(names), 2)
+        tt |> equal(names[0], "where_")
+        tt |> equal(names[1], "select")
+        tt |> equal(s.capture_name, "head")
+        tt |> equal(s.arity, -1)
+    }
+}
+
+[test]
+def test_slots_structurally_match_distinguishes_c_chain(t : T?) {
+    t |> run("slots_structurally_match: c_chain vs c_one vs c_opt at same matcher are distinct shapes") @(tt : T?) {
+        // c_chain ≢ c_one ≢ c_opt at the structural level — chain_prefix_of must reject mixed-cardinality matches.
+        let chainOnly : array<Slot> <- [slot_chain_of(["x"], "h")]
+        let oneOpt : array<Slot> <- [
+            Slot(matcher = SlotMatcher(one_of <- ["x"]), cardinality = c_one(), capture_name = "h"),
+            Slot(matcher = SlotMatcher(one_of <- ["x"]), cardinality = c_opt(), capture_name = "h")
+        ]
+        let chainPair : array<Slot> <- [
+            slot_chain_of(["x"], "h"),
+            Slot(matcher = SlotMatcher(one_of <- ["x"]), cardinality = c_opt(), capture_name = "h")
+        ]
+        tt |> equal(chain_prefix_of(chainOnly, oneOpt), false)
+        tt |> equal(chain_prefix_of(chainOnly, chainPair), true)
+    }
+}
+
+[test]
+def test_check_pattern_table_reachable_accepts_c_chain(t : T?) {
+    t |> run("check_pattern_table_reachable: c_chain head + distinct trailers stay reachable (no shadow)") @(tt : T?) {
+        var patterns : array<SplicePattern>
+        patterns |> emplace <| SplicePattern(
+            name = "chain_then_take",
+            chain <- [
+                slot_chain_of(["where_", "select"], "head"),
+                Slot(matcher = m_literal("take"), cardinality = c_opt(), capture_name = "take")
+            ],
+            emit = @@ < EmitFn > null_emit
+        )
+        patterns |> emplace <| SplicePattern(
+            name = "chain_then_count",
+            chain <- [
+                slot_chain_of(["where_", "select"], "head"),
+                Slot(matcher = m_literal("count"), cardinality = c_opt(), capture_name = "term")
+            ],
+            emit = @@ < EmitFn > null_emit
+        )
+        tt |> equal(check_pattern_table_reachable("synthetic-c_chain-trailers", patterns), true)
+    }
+}
+
+[test]
+def test_check_pattern_table_reachable_catches_shadow(t : T?) {
+    t |> run("check_pattern_table_reachable: returns false for a table with a strict-prefix shadow") @(tt : T?) {
+        var bad : array<SplicePattern>
+        bad |> emplace <| SplicePattern(
+            name = "short",
+            chain <- [
+                Slot(matcher = m_literal("reverse"), cardinality = c_one())
+            ],
+            emit = @@ < EmitFn > null_emit
+        )
+        bad |> emplace <| SplicePattern(
+            name = "long_shadowed",
+            chain <- [
+                Slot(matcher = m_literal("reverse"), cardinality = c_one()),
+                Slot(matcher = m_literal("take"), cardinality = c_opt())
+            ],
+            emit = @@ < EmitFn > null_emit
+        )
+        tt |> equal(check_pattern_table_reachable("synthetic-shadowed", bad), false)
+    }
+}
diff --git a/tests/linq/test_linq_fold_terminal_select.das b/tests/linq/test_linq_fold_terminal_select.das
index 0b735732bc..381cf8fddf 100644
--- a/tests/linq/test_linq_fold_terminal_select.das
+++ b/tests/linq/test_linq_fold_terminal_select.das
@@ -128,6 +128,74 @@ def test_reverse_select_first_array(t : T?) {
     }
 }
 
+// PR A extension: chains with BOTH a pre-reverse `_select(f)` AND a post-reverse `_select(g)` were
+// previously rejected by the imperative plan_reverse (it had a `!seenSelect` guard before letting any
+// terminal _select fire). The new pattern-table forms (R1-R4 + Rb) accept the combined shape because
+// emit composes `pushExpr` from the pre-projection and the terminal-select pass from the post-projection;
+// there's no semantic conflict (it's plain function composition over the reversed survivors).
+
+[test]
+def test_reverse_pre_and_post_select_array(t : T?) {
+    t |> run("plan_reverse R1-R4: where + select(f) + reverse + take + select(g) — both selects compose") @(tt : T?) {
+        let sounds <- make_sounds()
+        unsafe {
+            // pre-select: id*10  → [10, 20, 30, 40, 50]
+            // reverse:           → [50, 40, 30, 20, 10]
+            // take 3:            → [50, 40, 30]
+            // post-select: +1    → [51, 41, 31]
+            let out <- _fold(each(sounds)._where(_.rank > 0)._select(_.id * 10).reverse().take(3)._select(_ + 1).to_array())
+            tt |> equal(length(out), 3)
+            tt |> equal(out[0], 51)
+            tt |> equal(out[1], 41)
+            tt |> equal(out[2], 31)
+        }
+    }
+}
+
+[test]
+def test_reverse_pre_and_post_select_first(t : T?) {
+    t |> run("plan_reverse Rb: where + select(f) + reverse + select(g) + first — both selects compose") @(tt : T?) {
+        let sounds <- make_sounds()
+        unsafe {
+            // pre-select: id*10  → [10, 20, 30, 40, 50]
+            // reverse:           → [50, 40, 30, 20, 10]
+            // post-select: +1    → [51, 41, 31, 21, 11]
+            // first:             → 51
+            let v = _fold(each(sounds)._where(_.rank > 0)._select(_.id * 10).reverse()._select(_ + 1).first())
+            tt |> equal(v, 51)
+        }
+    }
+}
+
+// Regression for the bug Copilot caught in R6: emit must NOT re-project the user's `first_or_default` default
+// through the post-reverse `_select(g)`. The default is already typed at the post-select element type, so
+// applying termsel(d) double-applies and miscomputes when the chain hits the empty branch.
+
+[test]
+def test_reverse_pre_and_post_select_first_or_default_nonempty(t : T?) {
+    t |> run("plan_reverse Rb: where + select(f) + reverse + select(g) + first_or_default — nonempty hits found branch") @(tt : T?) {
+        let sounds <- make_sounds()
+        unsafe {
+            // sequence non-empty: termsel(lastName) = (50)+1 = 51
+            let v = _fold(each(sounds)._where(_.rank > 0)._select(_.id * 10).reverse()._select(_ + 1).first_or_default(-99))
+            tt |> equal(v, 51)
+        }
+    }
+}
+
+[test]
+def test_reverse_pre_and_post_select_first_or_default_empty(t : T?) {
+    t |> run("plan_reverse Rb: where + select(f) + reverse + select(g) + first_or_default — empty seq returns RAW default, not termsel(default)") @(tt : T?) {
+        let sounds <- make_sounds()
+        unsafe {
+            // where filters everything out → empty sequence → default branch fires.
+            // If emit re-projects, result would be (-99)+1 = -98 (wrong). Must be -99.
+            let v = _fold(each(sounds)._where(_.rank > 9999)._select(_.id * 10).reverse()._select(_ + 1).first_or_default(-99))
+            tt |> equal(v, -99)
+        }
+    }
+}
+
 [test]
 def test_reverse_take_select_decs(t : T?) {
     t |> run("plan_decs_reverse: reverse + take + terminal _select") @(tt : T?) {
diff --git a/tests/with_boost/failed_with_arity_mismatch.das b/tests/with_boost/failed_with_arity_mismatch.das
new file mode 100644
index 0000000000..e9ab5d4a2d
--- /dev/null
+++ b/tests/with_boost/failed_with_arity_mismatch.das
@@ -0,0 +1,22 @@
+options gen2
+expect 50503
+
+// Explicitly-typed block param so pre-macro typing succeeds; the macro
+// then catches the arity mismatch.
+
+require daslib/with_boost
+
+struct A {
+    f1 : int
+}
+
+[export]
+def main {
+    var a <- [A(f1=1)]
+    var b <- [A(f1=2)]
+    with_(a[0], b[0]) $(var x : A) {  // refuse: 2 containers but only 1 block param
+        print("{x.f1}")
+    }
+    delete a
+    delete b
+}
diff --git a/tests/with_boost/failed_with_array_literal.das b/tests/with_boost/failed_with_array_literal.das
new file mode 100644
index 0000000000..df65b6fd17
--- /dev/null
+++ b/tests/with_boost/failed_with_array_literal.das
@@ -0,0 +1,17 @@
+options gen2
+expect 50503
+
+require daslib/with_boost
+
+struct A {
+    f1 : int
+}
+
+[export]
+def main {
+    // Array literal as container — temp lifetime ends with the
+    // expression, can't be ref-bound. Refused at macro time.
+    with_([A(f1 = 1), A(f1 = 2)][0]) {
+        print("body")
+    }
+}
diff --git a/tests/with_boost/failed_with_function_call.das b/tests/with_boost/failed_with_function_call.das
new file mode 100644
index 0000000000..ad3529612c
--- /dev/null
+++ b/tests/with_boost/failed_with_function_call.das
@@ -0,0 +1,19 @@
+options gen2
+expect 50503
+
+require daslib/with_boost
+
+struct A {
+    f1 : int
+}
+
+def get_struct() : A {
+    return A(f1 = 42)
+}
+
+[export]
+def main {
+    with_(get_struct()) {  // refuse: not ExprAt
+        print("body")
+    }
+}
diff --git a/tests/with_boost/failed_with_local.das b/tests/with_boost/failed_with_local.das
new file mode 100644
index 0000000000..33ce54fdba
--- /dev/null
+++ b/tests/with_boost/failed_with_local.das
@@ -0,0 +1,16 @@
+options gen2
+expect 50503
+
+require daslib/with_boost
+
+struct A {
+    f1 : int
+}
+
+[export]
+def main {
+    var local = A(f1 = 1)
+    with_(local) {  // refuse: not ExprAt
+        print("body")
+    }
+}
diff --git a/tests/with_boost/failed_with_nested_array.das b/tests/with_boost/failed_with_nested_array.das
new file mode 100644
index 0000000000..b84c0eda33
--- /dev/null
+++ b/tests/with_boost/failed_with_nested_array.das
@@ -0,0 +1,20 @@
+options gen2
+expect 50503
+
+require daslib/with_boost
+
+struct Outer {
+    inner : array<int>
+}
+
+[export]
+def main {
+    var arr : array<Outer>
+    // Refused at macro time: subexpr `arr[0].inner` contains a nested
+    // ExprAt (`arr[0]`). The macro would only lock `inner`, leaving
+    // `arr` mutable — pushing to `arr` from inside the body would
+    // reallocate it and invalidate the `inner` ref.
+    with_(arr[0].inner[0]) {
+        print("body")
+    }
+}
diff --git a/tests/with_boost/failed_with_two_tables.das b/tests/with_boost/failed_with_two_tables.das
new file mode 100644
index 0000000000..2829045cc5
--- /dev/null
+++ b/tests/with_boost/failed_with_two_tables.das
@@ -0,0 +1,20 @@
+options gen2
+expect 50503
+
+// Explicitly-typed block params so daslang's pre-macro block typing
+// succeeds; the macro then reaches its single-table-only rule.
+
+require daslib/with_boost
+
+struct A {
+    f1 : int
+}
+
+[export]
+def main {
+    var t1 : table<string; A>
+    var t2 : table<string; A>
+    with_(t1["k"], t2["q"]) $(var a : A; var b : A) {  // refuse: max one table-keyed arg per call
+        print("{a.f1} {b.f1}")
+    }
+}
diff --git a/tests/with_boost/test_with_array.das b/tests/with_boost/test_with_array.das
new file mode 100644
index 0000000000..9c7483841b
--- /dev/null
+++ b/tests/with_boost/test_with_array.das
@@ -0,0 +1,107 @@
+options gen2
+options indenting = 4
+
+require dastest/testing_boost public
+require daslib/with_boost
+
+struct A {
+    f1 : int
+    f2 : int
+}
+
+[test]
+def test_single_arg_default_name(t : T?) {
+    //! `with_(arr[i]) { _.field = ... }` — no block params, `_` injected,
+    //! mutation persists in the underlying array.
+    var arr <- [A(f1=1, f2=2), A(f1=3, f2=4)]
+    with_(arr[0]) {
+        _.f1 = 99
+    }
+    t |> equal(arr[0].f1, 99, "default _ mutation persists in arr[0]")
+    t |> equal(arr[0].f2, 2, "unrelated field untouched")
+    t |> equal(arr[1].f1, 3, "other element untouched")
+    delete arr
+}
+
+[test]
+def test_single_arg_named_binding(t : T?) {
+    //! `with_(arr[i]) $(myA) { myA.field = ... }` — user-named param,
+    //! constness stripped so mutation persists.
+    var arr <- [A(f1=10, f2=20)]
+    with_(arr[0]) $(myA) {
+        myA.f1 = 111
+        myA.f2 = 222
+    }
+    t |> equal(arr[0].f1, 111, "named binding mutation persists")
+    t |> equal(arr[0].f2, 222, "second-field mutation persists")
+    delete arr
+}
+
+[test]
+def test_two_arg_array(t : T?) {
+    //! Multi-arg form: two arrays bound to two block params,
+    //! mutations persist in both.
+    var src <- [A(f1=100, f2=200)]
+    var dst <- [A(f1=0, f2=0)]
+    with_(dst[0], src[0]) $(d, s) {
+        d.f1 = s.f1 + 1
+        d.f2 = s.f2 + 2
+    }
+    t |> equal(dst[0].f1, 101, "dst.f1 = src.f1 + 1")
+    t |> equal(dst[0].f2, 202, "dst.f2 = src.f2 + 2")
+    delete src
+    delete dst
+}
+
+[test]
+def test_three_arg_array(t : T?) {
+    //! Arity 3: three arrays, three block params.
+    var a <- [A(f1=0, f2=0)]
+    var b <- [A(f1=10, f2=20)]
+    var c <- [A(f1=100, f2=200)]
+    with_(a[0], b[0], c[0]) $(va, vb, vc) {
+        va.f1 = vb.f1 + vc.f1
+        va.f2 = vb.f2 + vc.f2
+    }
+    t |> equal(a[0].f1, 110, "sum across three arrays in f1")
+    t |> equal(a[0].f2, 220, "sum across three arrays in f2")
+    delete a
+    delete b
+    delete c
+}
+
+struct Owner {
+    children : array<A>
+}
+
+[test]
+def test_field_chain_container(t : T?) {
+    //! Container is `obj.field[i]` — subexpr is the ExprField
+    //! `obj.children`, an lvalue chain rooted in `obj` (ExprVar).
+    //! The lvalue-chain check passes and pre-bind works.
+    var obj : Owner
+    obj.children |> push(A(f1 = 5, f2 = 6))
+    obj.children |> push(A(f1 = 7, f2 = 8))
+    with_(obj.children[1]) $(elem) {
+        elem.f1 = 555
+    }
+    t |> equal(obj.children[1].f1, 555, "second-element mutation persists")
+    t |> equal(obj.children[0].f1, 5, "first element untouched")
+    delete obj.children
+}
+
+def helper_set_f1(var aRef : A&; v : int) {
+    aRef.f1 = v
+}
+
+[test]
+def test_pass_binding_to_function(t : T?) {
+    //! The block-arg can be passed to another function expecting a
+    //! mutable ref; mutations through that path also persist.
+    var arr <- [A(f1=1, f2=2)]
+    with_(arr[0]) $(a) {
+        helper_set_f1(a, 777)
+    }
+    t |> equal(arr[0].f1, 777, "mutation through helper-fn ref persists")
+    delete arr
+}
diff --git a/tests/with_boost/test_with_lock_panics.das b/tests/with_boost/test_with_lock_panics.das
new file mode 100644
index 0000000000..d768e2584f
--- /dev/null
+++ b/tests/with_boost/test_with_lock_panics.das
@@ -0,0 +1,116 @@
+options gen2
+options indenting = 4
+
+require dastest/testing_boost public
+require daslib/with_boost
+
+//! When the with_ body panics, the lock leaks (daslang issue #2532).
+//! These tests deliberately don't try to ``delete`` the leaked-locked
+//! containers afterwards — that would compound the failure. We only
+//! verify the panic is caught at the with_ boundary.
+
+struct A {
+    f1 : int
+}
+
+[test]
+def test_push_inside_panics(t : T?) {
+    var arr <- [A(f1=1), A(f1=2)]
+    var panicked = false
+    try {
+        with_(arr[0]) $(a) {
+            arr |> push(A(f1 = 99))
+        }
+    } recover {
+        panicked = true
+    }
+    t |> success(panicked, "push during with_ body must panic")
+}
+
+[test]
+def test_erase_inside_panics(t : T?) {
+    var arr <- [A(f1=1), A(f1=2), A(f1=3)]
+    var panicked = false
+    try {
+        with_(arr[1]) $(a) {
+            arr |> erase(0)
+        }
+    } recover {
+        panicked = true
+    }
+    t |> success(panicked, "erase during with_ body must panic")
+}
+
+[test]
+def test_resize_inside_panics(t : T?) {
+    var arr <- [A(f1=1)]
+    var panicked = false
+    try {
+        with_(arr[0]) $(a) {
+            arr |> resize(100)
+        }
+    } recover {
+        panicked = true
+    }
+    t |> success(panicked, "resize during with_ body must panic")
+}
+
+[test]
+def test_clear_inside_panics(t : T?) {
+    var arr <- [A(f1=1)]
+    var panicked = false
+    try {
+        with_(arr[0]) $(a) {
+            arr |> clear
+        }
+    } recover {
+        panicked = true
+    }
+    t |> success(panicked, "clear during with_ body must panic")
+}
+
+[test]
+def test_table_insert_inside_panics(t : T?) {
+    var tab : table<string; A>
+    tab |> insert("a", A(f1 = 1))
+    var panicked = false
+    try {
+        with_(tab["a"]) $(v) {
+            tab |> insert("b", A(f1 = 2))
+        }
+    } recover {
+        panicked = true
+    }
+    t |> success(panicked, "table insert during with_ body must panic")
+}
+
+[test]
+def test_table_erase_inside_panics(t : T?) {
+    var tab : table<string; A>
+    tab |> insert("a", A(f1 = 1))
+    tab |> insert("b", A(f1 = 2))
+    var panicked = false
+    try {
+        with_(tab["a"]) $(v) {
+            tab |> erase("b")
+        }
+    } recover {
+        panicked = true
+    }
+    t |> success(panicked, "table erase during with_ body must panic")
+}
+
+[test]
+def test_multi_arg_inner_push_panics(t : T?) {
+    var a <- [A(f1=1)]
+    var b <- [A(f1=2)]
+    var panicked = false
+    try {
+        with_(a[0], b[0]) $(va, vb) {
+            b |> push(A(f1 = 99))
+        }
+    } recover {
+        panicked = true
+    }
+    t |> success(panicked, "multi-arg: mutation to either locked array must panic")
+}
diff --git a/tests/with_boost/test_with_n_arg.das b/tests/with_boost/test_with_n_arg.das
new file mode 100644
index 0000000000..8f2b3c380f
--- /dev/null
+++ b/tests/with_boost/test_with_n_arg.das
@@ -0,0 +1,59 @@
+options gen2
+options indenting = 4
+
+require dastest/testing_boost public
+require daslib/with_boost
+
+struct A {
+    f1 : int
+}
+
+[test]
+def test_five_arrays(t : T?) {
+    //! 5 array containers, one block, all-array. Proves the inline
+    //! emission scales past the old per-arity-helper limit (3).
+    var a1 <- [A(f1 = 1)]
+    var a2 <- [A(f1 = 2)]
+    var a3 <- [A(f1 = 3)]
+    var a4 <- [A(f1 = 4)]
+    var a5 <- [A(f1 = 5)]
+    with_(a1[0], a2[0], a3[0], a4[0], a5[0]) $(x1, x2, x3, x4, x5) {
+        x1.f1 = x1.f1 + x2.f1 + x3.f1 + x4.f1 + x5.f1
+    }
+    t |> equal(a1[0].f1, 15, "1 + 2 + 3 + 4 + 5 = 15 written to a1[0]")
+    t |> equal(a2[0].f1, 2, "a2 untouched")
+    t |> equal(a5[0].f1, 5, "a5 untouched")
+}
+
+[test]
+def test_mixed_4_arrays_1_table(t : T?) {
+    //! 4 arrays + 1 table — single table allowed, mixed positionally.
+    var arr_w <- [0]
+    var arr_x <- [10]
+    var arr_y <- [100]
+    var arr_z <- [1000]
+    var tab : table<string; int>
+    tab |> insert("k", 50)
+    with_(arr_w[0], arr_x[0], tab["k"], arr_y[0], arr_z[0]) $(w, x, t_k, y, z) {
+        w = x + t_k + y + z
+        t_k = 999
+    }
+    t |> equal(arr_w[0], 10 + 50 + 100 + 1000, "sum across mixed array+table args")
+    t |> equal(tab["k"], 999, "table entry mutated")
+}
+
+[test]
+def test_seven_arrays(t : T?) {
+    //! Even larger N — verifies macro really doesn't have an arity cap.
+    var a <- [1]
+    var b <- [2]
+    var c <- [3]
+    var d <- [4]
+    var e <- [5]
+    var f <- [6]
+    var g <- [7]
+    with_(a[0], b[0], c[0], d[0], e[0], f[0], g[0]) $(va, vb, vc, vd, ve, vf, vg) {
+        va = vb + vc + vd + ve + vf + vg
+    }
+    t |> equal(a[0], 27, "2+3+4+5+6+7 = 27")
+}
diff --git a/tests/with_boost/test_with_table.das b/tests/with_boost/test_with_table.das
new file mode 100644
index 0000000000..614a9a50eb
--- /dev/null
+++ b/tests/with_boost/test_with_table.das
@@ -0,0 +1,60 @@
+options gen2
+options indenting = 4
+
+require dastest/testing_boost public
+require daslib/with_boost
+
+struct A {
+    f1 : int
+    f2 : int
+}
+
+[test]
+def test_existing_key(t : T?) {
+    //! Mutation on an existing key persists in the table.
+    var tab : table<string; A>
+    tab |> insert("k", A(f1 = 1, f2 = 2))
+    with_(tab["k"]) $(v) {
+        v.f1 = 99
+        v.f2 = 100
+    }
+    t |> equal(tab["k"].f1, 99, "key mutation persists")
+    t |> equal(tab["k"].f2, 100, "second field mutation persists")
+}
+
+[test]
+def test_missing_key_creates_default(t : T?) {
+    //! tab[k] is upsert: missing key gets a default entry, then the
+    //! body's mutations persist on that newly-created entry.
+    var tab : table<string; A>
+    with_(tab["new"]) $(v) {
+        v.f1 = 42
+    }
+    t |> success(key_exists(tab, "new"), "missing key was created")
+    t |> equal(tab["new"].f1, 42, "mutation on new entry persists")
+}
+
+[test]
+def test_table_default_name(t : T?) {
+    //! Default `_` binding works for tables too.
+    var tab : table<string; A>
+    tab |> insert("x", A(f1 = 5, f2 = 6))
+    with_(tab["x"]) {
+        _.f1 = 77
+    }
+    t |> equal(tab["x"].f1, 77, "default _ binding mutation persists")
+}
+
+[test]
+def test_table_workhorse_value(t : T?) {
+    //! Table with workhorse value type — block-arg is `int&`, mutation
+    //! persists.
+    var tab : table<string; int>
+    tab |> insert("alpha", 10)
+    tab |> insert("beta", 20)
+    with_(tab["alpha"]) $(v) {
+        v = 200
+    }
+    t |> equal(tab["alpha"], 200, "workhorse value mutation persists")
+    t |> equal(tab["beta"], 20, "other key untouched")
+}
diff --git a/tests/with_boost/test_with_workhorse.das b/tests/with_boost/test_with_workhorse.das
new file mode 100644
index 0000000000..5120fd0fd0
--- /dev/null
+++ b/tests/with_boost/test_with_workhorse.das
@@ -0,0 +1,71 @@
+options gen2
+options indenting = 4
+
+require dastest/testing_boost public
+require daslib/with_boost
+
+struct A {
+    f1 : int
+}
+
+[test]
+def test_int_array_default_name(t : T?) {
+    //! `with_(arr[i]) { _ = X }` on a workhorse-element array — block
+    //! param `_` is `int&`, mutation persists in the array.
+    var arr <- [10, 20, 30]
+    with_(arr[1]) {
+        _ = 222
+    }
+    t |> equal(arr[0], 10, "untouched element stays")
+    t |> equal(arr[1], 222, "workhorse mutation persists")
+    t |> equal(arr[2], 30, "untouched element stays")
+    delete arr
+}
+
+[test]
+def test_int_array_named_binding(t : T?) {
+    //! Named binding `$(x)` — same propagation, daslang infers `x : int&`
+    //! from the helper sig.
+    var arr <- [1, 2, 3]
+    with_(arr[0]) $(x) {
+        x = 999
+    }
+    t |> equal(arr[0], 999, "named workhorse binding mutation persists")
+    delete arr
+}
+
+[test]
+def test_float_array_named(t : T?) {
+    var arr <- [1.0f, 2.0f, 3.0f]
+    with_(arr[2]) $(x) {
+        x = 99.5f
+    }
+    t |> equal(arr[2], 99.5f, "float workhorse mutation persists")
+    delete arr
+}
+
+[test]
+def test_multi_arg_workhorse(t : T?) {
+    //! Multi-arg form across two workhorse arrays.
+    var a <- [10, 20, 30]
+    var b <- [100, 200, 300]
+    with_(a[0], b[1]) $(x, y) {
+        x = y + 1
+    }
+    t |> equal(a[0], 201, "x = y + 1 with workhorse types")
+    t |> equal(b[1], 200, "source unchanged")
+    delete a
+    delete b
+}
+
+[test]
+def test_mixed_workhorse_struct(t : T?) {
+    var ints <- [10, 20]
+    var structs <- [A(f1 = 100)]
+    with_(ints[0], structs[0]) $(i, s) {
+        i = s.f1 + 1
+    }
+    t |> equal(ints[0], 101, "workhorse + struct multi-arg")
+    delete ints
+    delete structs
+}
diff --git a/tutorials/macros/18_with_boost.das b/tutorials/macros/18_with_boost.das
new file mode 100644
index 0000000000..c57023efce
--- /dev/null
+++ b/tutorials/macros/18_with_boost.das
@@ -0,0 +1,131 @@
+// Macro Tutorial 18: with_ macro from daslib/with_boost
+//
+// The `with_` macro binds an array or table element by reference inside
+// a block, with an automatic LOCK around the body so push/erase/resize
+// on the underlying container panic at runtime instead of silently
+// dangling the reference.
+//
+// Covers:
+//   - Why `var a : A& = arr[0]` is rejected by daslang's typer
+//   - Single-arg form with default `_` binding
+//   - Named binding via $(name) on the block
+//   - Multi-arg form (positional) for cross-container work
+//   - Workhorse element types (int, float, ...) — bound by reference
+//   - Table containers (one table-keyed arg per call)
+//   - Runtime lock: mutation of the container inside the body panics
+//
+// Run: daslang.exe tutorials/macros/18_with_boost.das
+
+options gen2
+
+require daslib/with_boost
+
+struct A {
+    f1 : int
+    f2 : int
+}
+
+[export]
+def main {
+
+    // ──────────────────────────────────────────────────────────────────
+    // Section 1 — Why with_ exists
+    // ──────────────────────────────────────────────────────────────────
+    //
+    // Plain `var a : A& = arr[0]` is rejected by daslang's typer:
+    //
+    //     error[31300]: local reference to non-local expression is unsafe
+    //
+    // The reason: between binding the ref and using it, code could push
+    // or resize the array, leaving `a` dangling. with_ solves this by
+    // locking the array around a block — push/resize inside the block
+    // panic instead of corrupting memory.
+
+    // ──────────────────────────────────────────────────────────────────
+    // Section 2 — Single-arg, default `_` binding
+    // ──────────────────────────────────────────────────────────────────
+
+    var arr = [A(f1 = 1, f2 = 2), A(f1 = 3, f2 = 4)]
+    with_(arr[0]) {
+        _.f1 = 99
+        _.f2 = 100
+    }
+    print("section 2: arr[0] = {arr[0].f1}, {arr[0].f2}\n")
+
+    // ──────────────────────────────────────────────────────────────────
+    // Section 3 — Named binding via $(name)
+    // ──────────────────────────────────────────────────────────────────
+    //
+    // The block-param keyword (`var`/`let`) is optional; the macro
+    // strips constness so mutations always persist.
+
+    with_(arr[1]) $(elem) {
+        elem.f1 = 555
+    }
+    print("section 3: arr[1].f1 = {arr[1].f1}\n")
+
+    // ──────────────────────────────────────────────────────────────────
+    // Section 4 — Multi-arg positional form
+    // ──────────────────────────────────────────────────────────────────
+    //
+    // Pass multiple containers; the block params are positional.
+    // Each container gets its own lock.
+
+    var dst = [A(f1 = 0, f2 = 0)]
+    var src = [A(f1 = 10, f2 = 20)]
+    with_(dst[0], src[0]) $(d, s) {
+        d.f1 = s.f1 + 1
+        d.f2 = s.f2 + 2
+    }
+    print("section 4: dst[0] = {dst[0].f1}, {dst[0].f2}\n")
+
+    // ──────────────────────────────────────────────────────────────────
+    // Section 5 — Workhorse element types (int, float, ...)
+    // ──────────────────────────────────────────────────────────────────
+    //
+    // The block-arg is bound by reference, so `_ = X` (or named `x = X`)
+    // mutates the underlying slot. The macro relies on the helper's
+    // `block<(var x : TT&) : void>` signature to drive daslang's
+    // inference; no special-case in the macro itself.
+
+    var ints = [1, 2, 3]
+    with_(ints[1]) {
+        _ = 222
+    }
+    print("section 5: ints = {ints}\n")
+
+    // ──────────────────────────────────────────────────────────────────
+    // Section 6 — Tables
+    // ──────────────────────────────────────────────────────────────────
+    //
+    // Tables work the same way; `tab[key]` upserts (creates a default
+    // entry if missing). At most ONE table-keyed arg per call (any
+    // second insert into the same table would rehash and invalidate
+    // the pinned entry).
+
+    var tab : table<string; A>
+    tab |> insert("k", A(f1 = 11, f2 = 22))
+    with_(tab["k"]) $(v) {
+        v.f1 = 777
+    }
+    let kf1 = tab["k"].f1
+    print("section 6: tab[k].f1 = {kf1}\n")
+
+    // ──────────────────────────────────────────────────────────────────
+    // Section 7 — Lock is real (would-panic example, commented out)
+    // ──────────────────────────────────────────────────────────────────
+    //
+    // Trying to mutate the container from inside the body panics at
+    // runtime — the whole point of the lock. Daslang panic is fatal
+    // (not a C++/JS exception), so we don't demonstrate it live in a
+    // tutorial. The line below is what would panic:
+    //
+    //     with_(arr[0]) $(a) {
+    //         arr |> push(A(f1 = 1000, f2 = 2000))   // panic: array is locked
+    //     }
+    //
+    // See `tests/with_boost/test_with_lock_panics.das` for the
+    // full set of runtime-locked behaviours.
+
+    print("section 7: see comment for the lock-panic shape\n")
+}