diff --git a/CLAUDE.md b/CLAUDE.md index 72333e97f7..83bcfa27dc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -137,7 +137,7 @@ All code MUST use gen2 syntax (add `options gen2` at the top of every file). Key - Structs/arrays/tables always pass by reference — no `&` needed. - Only **workhorse types** (`int`, `float`, `bool`, `string`, …, `isWorkhorseType` on the C++ side) pass by value. - **AST pointers (gc_node) pass by value** — copying the pointer, no refcount, no allocation. `def foo(p : ExpressionPtr)` shares the node; `var p` lets you reassign locally; `var p : ExpressionPtr&` propagates reassignment back. For mutable field access, take the param as `var`. -- **Lambdas pass by value (copy aliases the capture frame).** A `lambda<…>` is a fat pointer to a heap-allocated capture frame, so `=` copies the pointer (creates an alias) and pass-by-value is free. **`delete lam` requires `unsafe`** since other aliases may still be live — same rule as raw pointer / class `delete`. The rule cascades: `array>`, structs with a lambda field, tuple/variant containing a lambda — all inherit the unsafe-delete requirement. +- **Lambdas are copyable.** A `lambda<…>` is a fat pointer to a heap-allocated capture frame; `=` and pass-by-value copy the pointer (creates an alias), and `push`/array storage works without `push_clone`. **`delete lam` requires `unsafe`** since other aliases may still be live — same rule as raw pointer / class `delete`. The unsafe-delete rule cascades: `array>`, structs with a lambda field, tuple/variant containing a lambda — all inherit the unsafe-delete requirement. - **Strings:** `var s : string` is a writable local copy (no propagation). `var s : string&` propagates. `:=` clones into current context's heap (required across contexts); plain `=` copies the pointer. - **Residual `smart_ptr` types** (`ProgramPtr`, `ContextPtr`, `FileAccessPtr`, `DebugAgentPtr`, `VisitorAdapterPtr`) still use refcount semantics — variables holding them need `var inscope`. AST types do NOT — see below. @@ -178,6 +178,7 @@ Full migration table (when reading older docs that say `var inscope` or `<-` for - `panic("message")`, `assert(condition)`, `verify(condition)` (stays in release) - **Postfix conditional:** `return expr if (cond)`, `break if (cond)`, `continue if (cond)` — early-exit guard on one line - **Braceless early-exit:** prefer `if (cond) return X` (or postfix `return X if (cond)`) over `if (cond) { return X }` — STYLE005 flags the braced single-terminator form as noise +- **Panic is fatal, not an exception.** daslang has no C++/JS-style exception model. A `panic` (or failed `assert` / `verify`) means the program is broken — the only correct response is to print diagnostics and exit. `try/recover` exists to capture the message before exit so you can log it nicely, NOT to recover-and-continue. Do not write code that relies on continuing after `recover`; do not design APIs around panic-as-control-flow. Corollary: `{ body } finally { cleanup }` deliberately skips `cleanup` on panic (the cleanup can't run safely on a broken program); this is not a bug. Don't try to "fix" it; don't use `finally` for cleanup that needs to run on panic. If you need post-statements that run after a block in the normal path, just put them after the block — panic skips everything, and that's the design. ### Generic function dispatch @@ -232,7 +233,7 @@ Full migration table (when reading older docs that say `var inscope` or `<-` for - `require foo public` — re-exports `foo` transitively - `[export] def main()` defaults to returning `void`, but you can declare it as `def main() : int { ... return rc }` when you need to surface a non-zero process exit code (e.g. CLI tools where callers — MCP wrappers, CI, parent shells — branch on exit). See `dastest/dastest.das` for the canonical pattern. Don't reach for `panic` just to force a non-zero exit; declare `: int` and `return rc` instead. - `push` copies (fails for non-copyable types), `emplace` moves (zeros source), `push_clone` clones (preserves source) -- Non-copyable types (`array`, `table`, lambdas): use `:=`, `push_clone`, or `<-` +- Non-copyable types (`array`, `table`): use `:=`, `push_clone`, or `<-`. (Lambdas are copyable — see above.) - Blocks cannot be stored/returned/captured — use lambdas or function pointers - Class methods: `def const`, `def abstract const`, `def static`; call syntax `obj.method()`, `obj->method()`, `obj |> method()` - **`is`/`as` on handled types checks EXACT type**, not C++ inheritance — `expr is ExprField` is `false` when `expr` is `ExprSafeField`. `as` on wrong type crashes. Must handle each concrete type explicitly. diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das index c5b36c26c9..4d9d403852 100644 --- a/daslib/linq_fold.das +++ b/daslib/linq_fold.das @@ -35,6 +35,7 @@ require daslib/templates_boost require daslib/macro_boost require strings +// LinqCall — registry record describing an operator name (where_/select/distinct/...) and its fold disposition. struct private LinqCall { name : string moduleName : string = "linq" @@ -44,6 +45,289 @@ struct private LinqCall { recursive : array // indices of arguments to apply fold_linq_default on } +// ===== Pattern-table refactor kernel ===== +// See daslib/linq_fold.md for masterplan. + +variant private SourceAdapter { + Array : tuple // (top, srcName) — PR C widens with Decs/DecsFind/Zip/DecsJoin +} + +variant SlotMatcher { + literal : string // exact name match + one_of : array // any-of name set + alias : string // looked up in alias_table +} + +variant SlotCardinality { + one : void? // required, exactly 1 + optional : void? // 0 or 1 + chain : void? // 0 or more (greedy); captures as array via Captures.many +} + +struct Slot { + matcher : SlotMatcher + cardinality : SlotCardinality + capture_name : string = "" // "" = don't capture + arity : int = -1 // -1 = any; positive = require N args on the matched call +} + +// Captures bundle: `single` for c_one / c_opt slots; `many` for c_chain (a contiguous run of calls captured in chain order). +struct Captures { + single : table + many : table> +} + +variant private MatchResult { + no_match : void? // daslang-idiomatic Option + matched : Captures +} + +typedef private RequiresPredicate = function<(var c : Captures; var top : Expression?) : bool> + +// Fold-time context passed to every emit archetype. Carries the peeled source expression, source adapter, and the outer +// `_fold(...)` expression's iterator-ness (drives `buffer_return` wrap so iterator-typed contexts wrap survivors with +// `.to_sequence_move()`; array-typed contexts return the buffer directly). +struct EmitCtx { + top : Expression? // peel_each'd; stubs pre-clone per invoke + src : SourceAdapter + expr_is_iterator : bool +} + +typedef EmitFn = function<(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression?> + +struct SplicePattern { + name : string // debug / lint diagnostics + chain : array + requires : array + emit : EmitFn +} + +// Slot construction helpers (variant arms need named-field syntax; helpers keep pattern rows compact) +def m_literal(s : string) : SlotMatcher { + return SlotMatcher(literal = s) +} +def m_alias(s : string) : SlotMatcher { + return SlotMatcher(alias = s) +} +def c_one() : SlotCardinality { + return SlotCardinality(one = null) +} +def c_opt() : SlotCardinality { + return SlotCardinality(optional = null) +} +def c_chain() : SlotCardinality { + return SlotCardinality(chain = null) +} + +// Convenience: c_chain + m_one_of always pair (head shape "0+ contiguous calls whose name is in `names`, captured as `cap`"). `names` is consumed (moved into the slot matcher). +def slot_chain_of(var names : array; cap : string) : Slot { + return Slot(matcher = SlotMatcher(one_of <- names), cardinality = c_chain(), capture_name = cap) +} + +// Prefix-conflict lint: pattern A shadows pattern B (B unreachable) when A's chain is a structural prefix of B's chain. +// PR A scope: simple structural check (matcher + cardinality + arity equality, capture_name ignored). Doesn't catch all subsumption cases (e.g. opt slots interacting with required slots) — exhaustive subsumption deferred to a later PR. +def private slot_matchers_equal(a : SlotMatcher; b : SlotMatcher) : bool { + if (a is literal && b is literal) return (a as literal) == (b as literal) + if (a is alias && b is alias) return (a as alias) == (b as alias) + if (a is one_of && b is one_of) { + // `one_of` is set-membership — equality is order-insensitive (same length + every element in a is in b). + let la & = unsafe(a as one_of) + let lb & = unsafe(b as one_of) + if (length(la) != length(lb)) return false + for (name in la) { + var found = false + for (other in lb) { + if (other == name) { found = true; break; } + } + if (!found) return false + } + return true + } + return false +} + +def private slots_structurally_match(a : Slot; b : Slot) : bool { + return (slot_matchers_equal(a.matcher, b.matcher) + && (a.cardinality is one) == (b.cardinality is one) + && (a.cardinality is optional) == (b.cardinality is optional) + && (a.cardinality is chain) == (b.cardinality is chain) + && a.arity == b.arity) +} + +// Strict prefix: a must be SHORTER than b. Two patterns with identical chains but different `requires` +// predicates are both legitimately reachable (the walker falls through to the second when the first's +// requires fail), so equal-length is not a shadowing relation. +def chain_prefix_of(a : array; b : array) : bool { + if (length(a) >= length(b)) return false + for (i in 0 .. length(a)) { + if (!slots_structurally_match(a[i], b[i])) return false + } + return true +} + +// Walk a pattern table; return true iff every pattern is reachable (no earlier pattern is its strict prefix). +// Silent — return value carries the verdict. Tests assert on the bool; future compile-time hook can add diagnostics. +def check_pattern_table_reachable(name : string; patterns : array) : bool { + for (i in 0 .. length(patterns)) { + for (j in i + 1 .. length(patterns)) { + if (chain_prefix_of(patterns[i].chain, patterns[j].chain)) return false + } + } + return true +} + +// Normalize an ExprCall's name through linqCalls (e.g. "distinct_by_to_array" → "distinct_by"). +// Walks func.fromGeneric chain — ExprCall.name itself is the mangled generic-instance name (`__::linq\`distinct_by\``); the original user-facing name lives at the root of the fromGeneric chain. +def private call_norm_name(call : ExprCall?) : string { + var topFunc = call.func + while (topFunc.fromGeneric != null) { + topFunc = topFunc.fromGeneric + } + let raw = string(topFunc.name) + if (linqCalls |> key_exists(raw)) return linqCalls[raw].name + return raw +} + +var alias_table : table> <- { + "distinct_family" => ["distinct", "distinct_by"], + "first_family" => ["first", "first_or_default"], + "count_family" => ["count", "long_count"], + // Narrow to the terminators emit_hashtable_dedup actually handles — first[_or_default] would be over-matched then cascade. + "distinct_terminator_family" => ["count", "long_count", "sum"], + // PR B additions: + "order_family" => ["order", "order_descending", "order_by", "order_by_descending"], + "range_op_family" => ["skip", "skip_while", "take_while", "take"], + // accum_family: all reducing terminators routed via emit_accumulator_lane in plan_loop_or_count. + "accum_family" => ["sum", "min", "max", "average", "aggregate", + "min_by", "max_by", "min_max", "min_max_by", + "min_max_average", "min_max_average_by", "long_count"], + "early_exit_family" => ["any", "all", "contains", "first", "first_or_default"], + // loop_terminator_family: union used by plan_loop_or_count's single pattern row's optional terminator slot. + "loop_terminator_family" => ["count", "long_count", "sum", "min", "max", "average", "aggregate", + "min_by", "max_by", "min_max", "min_max_by", + "min_max_average", "min_max_average_by", + "any", "all", "contains", "first", "first_or_default", + "last", "last_or_default", "single", "single_or_default", + "element_at", "element_at_or_default"] +} + +// Walker — see daslib/linq_fold.md Step 3 contract. Returns MatchResult by move; +// caller binds with `var r <- match_pattern(...)` and reads via `if (r is matched) { let c & = r as matched; ... }`. + +def private slot_matches_call(slot : Slot; call : ExprCall?; name : string; pattern_name : string) : bool { + var in_set = false + if (slot.matcher is literal) { + in_set = (slot.matcher as literal) == name + } elif (slot.matcher is one_of) { + for (n in slot.matcher as one_of) { + if (n == name) { in_set = true; break; } + } + } elif (slot.matcher is alias) { + let key = slot.matcher as alias + if (alias_table |> key_exists(key)) { + for (n in alias_table[key]) { + if (n == name) { in_set = true; break; } + } + } else { + panic("match_pattern: unknown alias '{key}' in pattern '{pattern_name}'") + } + } + return in_set && (slot.arity == -1 || length(call.arguments) == slot.arity) +} + +def private match_pattern(p : SplicePattern; + var calls : array>; + var top : Expression?) : MatchResult { + var captures : Captures + var slot_i = 0 + var call_i = 0 + while (slot_i < length(p.chain)) { + let slot & = unsafe(p.chain[slot_i]) + if (slot.cardinality is chain) { + // c_chain: greedy match-while-in-set. Always succeeds (0+); empty match still creates a Captures.many entry so emit fns can rely on the key existing. + var captured : array + while (call_i < length(calls) && slot_matches_call(slot, calls[call_i]._0, calls[call_i]._1.name, p.name)) { + captured |> push(calls[call_i]._0) + call_i ++ + } + if (slot.capture_name != "") { + captures.many[slot.capture_name] <- captured + } + slot_i ++ + continue + } + var matched_here = false + if (call_i < length(calls)) { + let cur & = unsafe(calls[call_i]) + matched_here = slot_matches_call(slot, cur._0, cur._1.name, p.name) + } + if (matched_here) { + if (slot.capture_name != "") { + captures.single |> insert(slot.capture_name, calls[call_i]._0) + } + slot_i ++ + call_i ++ + } elif (slot.cardinality is one) { + return MatchResult(no_match = null) + } else { + slot_i ++ + } + } + if (call_i < length(calls)) return MatchResult(no_match = null) + for (pred in p.requires) { + if (!invoke(pred, captures, top)) return MatchResult(no_match = null) + } + return MatchResult(matched <- captures) +} + +// Predicate library — named module-level functions (named so JIT can take their address; inline `@@(...)` lambdas +// produce `_localfunction_*` symbols the JIT pass can't resolve). Pattern rows wrap each with `@@`. +// Starts minimal; grows with use per masterplan. + +def private array_source(var c : Captures; var top : Expression?) : bool { + // top is already peel_each'd by the calling stub; verify it carries an array type (indexed iteration safe). + return top != null && top._type != null && (top._type.isGoodArrayType || top._type.isArray) +} + +def private take_arg_is_int(var c : Captures; var top : Expression?) : bool { + if (!(c.single |> key_exists("take"))) return true // vacuous: no take to constrain + let take = c.single["take"] + return (take != null && length(take.arguments) >= 2 && take.arguments[1] != null + && take.arguments[1]._type != null && take.arguments[1]._type.baseType == Type.tInt) +} + +// `no_terminator` — chain ends bare (no `count` / `sum` / `first[_or_default]` / `to_array` captured). +// Return shape (array vs iterator) is decided by `ctx.expr_is_iterator` in the emit fn, not this predicate. +def private no_terminator(var c : Captures; var top : Expression?) : bool { + return !(c.single |> key_exists("term")) +} + +// True only for `order_by[_descending]` with an inline-splice-able key lambda. `order` / `order_descending` route elsewhere. +def private inline_cmp_available(var c : Captures; var top : Expression?) : bool { + if (!(c.single |> key_exists("order"))) return false + let orderCall = c.single["order"] + if (orderCall == null || orderCall._type == null || orderCall._type.firstType == null + || (orderCall.arguments |> length) < 2) return false + let orderName = call_norm_name(orderCall) + if (orderName != "order_by" && orderName != "order_by_descending") return false + return try_make_inline_cmp(orderCall.arguments[1], orderName, orderCall._type.firstType, orderCall.at) != null +} + +// `has_where_or_distinct` — used by order_fused_prefilter row to distinguish from bare buffer_helper_dispatch. +// At least one prefilter source must be present for the fused-loop path to make sense. +def private has_where_or_distinct(var c : Captures; var top : Expression?) : bool { + return (c.single |> key_exists("where")) || (c.single |> key_exists("distinct")) +} + +// Per-plan pattern tables — collapsed into splice_patterns in PR D. + +var private plan_reverse_patterns : array +var private plan_distinct_patterns : array +var private plan_loop_or_count_patterns : array +var private splice_patterns : array // populated in PR D when per-plan tables collapse + +// ===== End of pattern-table kernel ===== + var private linqCalls = { // filtering data "where_" => LinqCall(name = "where_"), @@ -274,6 +558,63 @@ def private collapse_chained_selects(var calls : array>) { + // Mirror of collapse_chained_selects shape (find adjacent same-op pairs, rename to fresh param, rewire backlink, erase inner, stay at i) but composes via `pred1 && pred2` instead of function compose. Both predicates take the source element, so the composed body uses ONE fresh param shared by both halves. No has_sideeffects bail needed — composition doesn't duplicate either body (each runs at most once per element, same as the imperative chain with short-circuit `&&`). + var i = 0 + while (i + 1 < length(calls)) { + if (calls[i]._1.name != "where_" || calls[i + 1]._1.name != "where_") { + i ++ + continue + } + var innerLam = calls[i]._0.arguments[1] + var outerLam = calls[i + 1]._0.arguments[1] + if (innerLam == null || outerLam == null + || !(innerLam is ExprMakeBlock) || !(outerLam is ExprMakeBlock)) { + i ++ + continue + } + var innerMblk = innerLam as ExprMakeBlock + var innerBlk = innerMblk._block as ExprBlock + var outerMblk = outerLam as ExprMakeBlock + var outerBlk = outerMblk._block as ExprBlock + if (innerBlk == null || outerBlk == null + || innerBlk.arguments |> length != 1 || outerBlk.arguments |> length != 1 + || innerBlk.list |> length != 1 || outerBlk.list |> length != 1 + || !(innerBlk.list[0] is ExprReturn) || !(outerBlk.list[0] is ExprReturn)) { + i ++ + continue + } + var innerRet = innerBlk.list[0] as ExprReturn + var outerRet = outerBlk.list[0] as ExprReturn + if (innerRet.subexpr == null || outerRet.subexpr == null) { + i ++ + continue + } + // Rename both predicate params to the same fresh name — composed body references one shared bind. + let freshName = qn("cw", innerLam.at) + var innerBodyFresh = peel_lambda_rename_var(innerLam, freshName) + var outerBodyFresh = peel_lambda_rename_var(outerLam, freshName) + if (innerBodyFresh == null || outerBodyFresh == null) { + i ++ + continue + } + var newLam = clone_expression(innerLam) + var newMblk = newLam as ExprMakeBlock + var newBlk = newMblk._block as ExprBlock + if (newBlk == null || newBlk.list |> length != 1 || !(newBlk.list[0] is ExprReturn)) { + i ++ + continue + } + var newRet = newBlk.list[0] as ExprReturn + newBlk.arguments[0].name := freshName + newRet.subexpr = merge_where_cond(innerBodyFresh, outerBodyFresh) + calls[i + 1]._0.arguments[1] = newLam + calls[i + 1]._0.arguments[0] = calls[i]._0.arguments[0] + calls |> erase(i) + } +} + [macro_function] def private flatten_linq(var expr : Expression?) { var top = expr @@ -707,12 +1048,11 @@ def private emit_counter_lane(var top : Expression?; srcName, accName, itName : } [clone(top), macro_function] -def private emit_array_lane(var top : Expression?; var expr : Expression?; var loopBody : Expression?; var elementType : TypeDeclPtr; +def private emit_array_lane(var top : Expression?; isIter : bool; var loopBody : Expression?; var elementType : TypeDeclPtr; srcName, accName, itName : string; names : RangeStateNames; var skipExpr, takeExpr, skipWhileCond : Expression?; at : LineInfo) : Expression? { // Array lane: `[skip/take init]; var acc : array; [reserve]; for (it in src) { $loopBody }; return <- acc` - let isIter = expr._type.isIterator let sourceHasLength = type_has_length(top._type) var topExpr = clone_expression(top) topExpr.genFlags.alwaysSafe = true @@ -1885,53 +2225,40 @@ def private plan_order_family(var expr : Expression?) : Expression? { return finalize_invoke(emission, at) } +// ===== plan_loop_or_count migration (PR B) ===== +// Single emit + single pattern row. c_chain captures where_/select head; canonical-order slots carry the rest. + [macro_function] -def private plan_loop_or_count(var expr : Expression?) : Expression? { - // Phase-2C loop planner. Recognizes chains of shape `[where_*][select*][skip?][take?]` - var (top, calls) = flatten_linq(expr) - if (empty(calls)) return null - top = peel_each(top) - let lastName = calls.back()._1.name - let lane = classify_terminator(lastName) - // Marker: future PRs add BufferTopN / BufferDistinct / etc. for `is_buffer_required_op` - if (lane == LinqLane.UNKNOWN) return null - let counterLane = lane == LinqLane.COUNTER - let hasTerminator = lane != LinqLane.ARRAY - let intermediateCount = hasTerminator ? length(calls) - 1 : length(calls) - let at = calls[0]._0.at - let srcName = qn("source", at) - let itName = qn("it", at) +def private emit_loop_or_count_lane(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? { + var top = ctx.top + let srcName = (ctx.src as Array)._1 + let itName = qn("it", at) let accName = qn("acc", at) let names <- make_range_names(at) + let hasTerminator = c.single |> key_exists("term") + var lastName : string + if (hasTerminator) { + lastName = call_norm_name(c.single["term"]) + } + let lane = hasTerminator ? classify_terminator(lastName) : LinqLane.ARRAY + if (lane == LinqLane.UNKNOWN) return null + let counterLane = lane == LinqLane.COUNTER + if (top._type == null || top._type.firstType == null) return null var whereCond : Expression? - // postTakeWhereCond — Theme 2 5c: gates per-element contribution AFTER the take cap fires. Distinct from whereCond (which wraps the entire take/skip body); this preserves take.where semantics ("first N elements, then filter") that auto-rewriting can't reproduce. var postTakeWhereCond : Expression? var projection : Expression? var intermediateBinds : array - // preConditionStmts evaluate UNCONDITIONALLY per element, BEFORE the where filter — var preCondStmts : array - var skipExpr : Expression? - var takeExpr : Expression? - // skip_while / take_while: predicate-driven ranges. Both peel with itName (source elem); seenSelect bails to tier 2. - var skipWhileCond : Expression? - var takeWhileCond : Expression? var seenSelect = false - var seenSkip = false - var seenSkipWhile = false - var seenTakeWhile = false - var seenTake = false var allProjectionsPure = true var elementType = clone_type(top._type.firstType) var lastBindName = itName - for (i in 0 .. intermediateCount) { - var cll & = unsafe(calls[i]) - let opName = cll._1.name + for (call in c.many["head"]) { + let opName = call_norm_name(call) if (opName == "where_") { - // Theme 2 5c — `take(N)._where(p)` allowed (routed to postTakeWhereCond, gates contribution only); other prior range ops still bail; single post-take where in v1. - if (seenSkip || seenSkipWhile || seenTakeWhile || (seenTake && postTakeWhereCond != null)) return null var predicate : Expression? if (seenSelect) { - // Phase 3d / single-eval: where-after-select. Bind the current projection + // where-after-select: bind the running projection to a typed local so the if (has_sideeffects(projection)) return null if (lane != LinqLane.COUNTER) { let wbName = "`vw`{at.line}`{at.column}`{length(preCondStmts)}" @@ -1939,26 +2266,21 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { preCondStmts |> push <| qmacro_expr() { var $i(wbName) : $t(projType) := $e(projection) } - // Replace projection with a typed ExprVar so downstream typer passes var pvar = new ExprVar(at = at, name := wbName) pvar._type = clone_type(elementType) pvar._type.flags.ref = true projection = pvar } - predicate = peel_lambda_replace_var(cll._0.arguments[1], projection) + predicate = peel_lambda_replace_var(call.arguments[1], projection) } else { - predicate = peel_lambda_rename_var(cll._0.arguments[1], itName) + predicate = peel_lambda_rename_var(call.arguments[1], itName) } - if (seenTake) { - postTakeWhereCond = predicate - } elif (whereCond == null) { + if (whereCond == null) { whereCond = predicate } else { whereCond = qmacro($e(whereCond) && $e(predicate)) } } elif (opName == "select") { - if (seenSkip || seenSkipWhile || seenTakeWhile || seenTake) return null - // Chained selects: bind the previous projection to a fresh local now so the next if (projection != null) { if (has_sideeffects(projection)) { allProjectionsPure = false @@ -1969,41 +2291,9 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { } lastBindName = bindName } - projection = peel_lambda_rename_var(cll._0.arguments[1], lastBindName) - elementType = clone_type(cll._0._type.firstType) + projection = peel_lambda_rename_var(call.arguments[1], lastBindName) + elementType = clone_type(call._type.firstType) seenSelect = true - } elif (opName == "skip") { - // Canonical chain: at most one skip, before any skip_while/take_while/take. - if (seenSkip || seenSkipWhile || seenTakeWhile || seenTake) return null - var skipArg = cll._0.arguments[1] - if (skipArg == null || skipArg._type == null || skipArg._type.baseType != Type.tInt) return null - skipExpr = clone_expression(skipArg) - seenSkip = true - } elif (opName == "skip_while") { - // pred uses itName; seenSelect bails (chained-bind peel is a follow-up). Canonical: after skip, before take_while/take. - if (seenSelect || seenSkipWhile || seenTakeWhile || seenTake) return null - var swArg = cll._0.arguments[1] - if (swArg == null) return null - skipWhileCond = peel_lambda_rename_var(swArg, itName) - if (skipWhileCond == null) return null - seenSkipWhile = true - } elif (opName == "take_while") { - // take_while pred sees source element (itName). Same select-cascade rule as skip_while. - if (seenSelect || seenTakeWhile || seenTake) return null - var twArg = cll._0.arguments[1] - if (twArg == null) return null - takeWhileCond = peel_lambda_rename_var(twArg, itName) - if (takeWhileCond == null) return null - seenTakeWhile = true - } elif (opName == "take") { - if (seenTake) return null - var takeArg = cll._0.arguments[1] - if (takeArg == null || takeArg._type == null || takeArg._type.baseType != Type.tInt) return null - takeExpr = clone_expression(takeArg) - seenTake = true - } elif (is_buffer_required_op(opName)) { // nolint:LINT009 - // TODO Phase 2X: BufferTopN (order_by + take/skip), BufferDistinct (distinct/_by), - return null } else { return null } @@ -2011,14 +2301,61 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { if (projection != null && has_sideeffects(projection)) { allProjectionsPure = false } + var skipExpr : Expression? + var skipWhileCond : Expression? + var takeWhileCond : Expression? + var takeExpr : Expression? + if (c.single |> key_exists("skip")) { + var skipArg = c.single["skip"].arguments[1] + if (skipArg == null || skipArg._type == null || skipArg._type.baseType != Type.tInt) return null + skipExpr = clone_expression(skipArg) + } + if (c.single |> key_exists("skip_while")) { + if (seenSelect) return null + var swArg = c.single["skip_while"].arguments[1] + if (swArg == null) return null + skipWhileCond = peel_lambda_rename_var(swArg, itName) + if (skipWhileCond == null) return null + } + if (c.single |> key_exists("take_while")) { + if (seenSelect) return null + var twArg = c.single["take_while"].arguments[1] + if (twArg == null) return null + takeWhileCond = peel_lambda_rename_var(twArg, itName) + if (takeWhileCond == null) return null + } + if (c.single |> key_exists("take")) { + var takeArg = c.single["take"].arguments[1] + if (takeArg == null || takeArg._type == null || takeArg._type.baseType != Type.tInt) return null + takeExpr = clone_expression(takeArg) + } + if (c.single |> key_exists("post_take_where")) { + var ptwArg = c.single["post_take_where"].arguments[1] + if (ptwArg == null) return null + if (seenSelect) { + if (has_sideeffects(projection)) return null + if (lane != LinqLane.COUNTER) { + let wbName = "`vw`{at.line}`{at.column}`{length(preCondStmts)}" + var projType = clone_type(elementType) + preCondStmts |> push <| qmacro_expr() { + var $i(wbName) : $t(projType) := $e(projection) + } + var pvar = new ExprVar(at = at, name := wbName) + pvar._type = clone_type(elementType) + pvar._type.flags.ref = true + projection = pvar + } + postTakeWhereCond = peel_lambda_replace_var(ptwArg, projection) + } else { + postTakeWhereCond = peel_lambda_rename_var(ptwArg, itName) + } + } let noLimits = skipExpr == null && takeExpr == null && skipWhileCond == null && takeWhileCond == null - // Count-shaped shortcut: when terminator is `count` (→ int) or `long_count` (→ int64), let isCountShaped = (lane == LinqLane.COUNTER || (lane == LinqLane.ACCUMULATOR && lastName == "long_count")) if (isCountShaped && whereCond == null && allProjectionsPure && noLimits && type_has_length(top._type)) return emit_length_shortcut(lastName, top, srcName, at) - // Ring 1: accumulator lane builds its own per-op loop body (typed accumulator, optional if (lane == LinqLane.ACCUMULATOR) { var laneTops : array laneTops |> push(top) @@ -2028,9 +2365,8 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { intermediateBinds, preCondStmts, elementType, laneSrcs, accName, itName, names, skipExpr, takeExpr, skipWhileCond, takeWhileCond, at) } - // Ring 2: early-exit lane — `any` no-pred + no upstream work + no limits + length-bearing if (lane == LinqLane.EARLY_EXIT) { - let terminatorCall = calls.back()._0 + let terminatorCall = c.single["term"] let isAnyNoPred = lastName == "any" && length(terminatorCall.arguments) == 1 if (isAnyNoPred && whereCond == null && allProjectionsPure && noLimits && type_has_length(top._type)) @@ -2043,10 +2379,8 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { intermediateBinds, preCondStmts, elementType, terminatorCall, laneSrcs, itName, names, skipExpr, takeExpr, skipWhileCond, takeWhileCond, at) } - // Build the per-element loop body for COUNTER / ARRAY. Both lanes follow the same shape: var loopBody : Expression? if (counterLane) { - // Counter lane must evaluate the projection (and any chained intermediates) per var stmts : array if (projection != null && has_sideeffects(projection)) { let finalBindName = qn("vfinal", at) @@ -2054,7 +2388,6 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { var $i(finalBindName) = $e(projection) } } - // Theme 2 5c: when postTakeWhereCond is set, gate JUST the acc++ — the take cap still ticks unconditionally above. var incExpr = qmacro_expr() { $i(accName) ++ } @@ -2063,7 +2396,6 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { wrap_with_ranges(stmts, skipExpr, takeExpr, skipWhileCond, takeWhileCond, names) loopBody = prepend_precond(wrap_with_condition(stmts_to_expr(stmts), whereCond), preCondStmts) } else { - // Array lane. `push_clone` is the safe append everywhere: for workhorse types it's a var stmts : array var pushExpr : Expression? if (projection != null) { @@ -2072,15 +2404,12 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { } } elif (whereCond != null || postTakeWhereCond != null || skipExpr != null || takeExpr != null || skipWhileCond != null || takeWhileCond != null) { - // Identity push: `it` aliases the source element. Reached when chain is bare pushExpr = qmacro_expr() { $i(accName) |> push_clone($i(itName)) } } else { - // identity chain — nothing to fuse; let the caller fall through. return null } - // Theme 2 5c: postTakeWhereCond gates JUST the push — same shape as counter lane. stmts |> push(wrap_with_condition(pushExpr, postTakeWhereCond)) prepend_binds(stmts, intermediateBinds) wrap_with_ranges(stmts, skipExpr, takeExpr, skipWhileCond, takeWhileCond, names) @@ -2090,290 +2419,365 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { return emit_counter_lane(top, srcName, accName, itName, names, skipExpr, takeExpr, skipWhileCond, loopBody, at) } else { - return emit_array_lane(top, expr, loopBody, elementType, srcName, accName, itName, + return emit_array_lane(top, ctx.expr_is_iterator, loopBody, elementType, srcName, accName, itName, names, skipExpr, takeExpr, skipWhileCond, at) } } +// Stub — table-driven dispatch into plan_loop_or_count_patterns. [macro_function] -def private plan_reverse(var expr : Expression?) : Expression? { +def private plan_loop_or_count(var expr : Expression?) : Expression? { var (top, calls) = flatten_linq(expr) if (empty(calls)) return null - normalize_order_reverse(calls) collapse_chained_selects(calls) + collapse_chained_wheres(calls) top = peel_each(top) - var terminatorName : string = "" - var terminatorCall : ExprCall? - { - let lastName = calls.back()._1.name - if (lastName == "count" || lastName == "first" || lastName == "first_or_default") { - terminatorName = lastName - terminatorCall = calls.back()._0 - calls |> pop + let at = calls[0]._0.at + let exprIsIter = expr._type != null && expr._type.isIterator + let srcName = qn("source", at) + for (p in plan_loop_or_count_patterns) { + var r <- match_pattern(p, calls, top) + if (r is matched) { + var topClone = clone_expression(top) + var ctx = EmitCtx(top = topClone, src = SourceAdapter(Array = (topClone, srcName)), expr_is_iterator = exprIsIter) + var result = invoke(p.emit, r as matched, ctx, at) + if (result != null) return result } } - if (empty(calls)) return null + return null +} + +[_macro] +def private populate_plan_loop_or_count_patterns { + if (!is_compiling_macros_in_module("linq_fold") || !empty(plan_loop_or_count_patterns)) return + // Single row — c_chain head matches the where_/select run (0+ contiguous calls), then canonical-order range ops, optional post-take where, optional terminator from loop_terminator_family. + plan_loop_or_count_patterns |> emplace <| SplicePattern( + name = "loop_or_count_general", + chain <- [ + slot_chain_of(["where_", "select"], "head"), + Slot(matcher = m_literal("skip"), cardinality = c_opt(), capture_name = "skip"), + Slot(matcher = m_literal("skip_while"), cardinality = c_opt(), capture_name = "skip_while"), + Slot(matcher = m_literal("take_while"), cardinality = c_opt(), capture_name = "take_while"), + Slot(matcher = m_literal("take"), cardinality = c_opt(), capture_name = "take"), + Slot(matcher = m_literal("where_"), cardinality = c_opt(), capture_name = "post_take_where"), + Slot(matcher = m_alias("loop_terminator_family"), cardinality = c_opt(), capture_name = "term") + ], + requires <- array(), + emit = @@ < EmitFn > emit_loop_or_count_lane + ) +} + +// ===== plan_reverse migration (PR A — pattern-table refactor) ===== +// 5 emit archetypes + 5 pattern rows + stub. Each archetype lifted verbatim from +// the prior imperative plan_reverse branches (see git history before PR A). + +// Ra — counter (reverse is identity for count). Side-effecting projection still fires per match. +[macro_function] +def private emit_reverse_counter(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? { + var top = ctx.top + let srcName = (ctx.src as Array)._1 + let itName = qn("it", at) + let cntName = qn("cnt", at) var whereCond : Expression? + if (c.single |> key_exists("where")) { + whereCond = peel_lambda_rename_var(c.single["where"].arguments[1], itName) + } var projection : Expression? - var hasReverse = false - var seenSelect = false - var takeExpr : Expression? - var terminalSelectLam : Expression? - var terminalSelectElemType : TypeDeclPtr - // Theme 8 (audit 2a): trailing `distinct[_by]` after reverse. Single backward source walk with set-gated push — saves the cascade's reverse_to_array + distinct_by_inplace second walk. v1 limited to array source + implicit to_array terminator. - var distinctName : string - var distinctKey : Expression? - let at = calls[0]._0.at - let srcName = qn("source", at) - let itName = qn("it", at) - let bufName = qn("buf", at) + if (c.single |> key_exists("proj")) { + projection = peel_lambda_rename_var(c.single["proj"].arguments[1], itName) + } + var perElement : Expression? + if (projection != null && has_sideeffects(projection)) { + let vfinalName = qn("vfinal", at) + perElement = qmacro_block() { + var $i(vfinalName) = $e(projection) + $i(cntName) ++ + } + } else { + perElement = qmacro_expr() { + $i(cntName) ++ + } + } + perElement = wrap_with_condition(perElement, whereCond) + var body : Expression? = qmacro_block() { + var $i(cntName) = 0 + for ($i(itName) in $i(srcName)) { + $e(perElement) + } + return $i(cntName) + } + var bodyStmts : array + bodyStmts |> push_block_list(body) + return finalize_emission_stmts(top, srcName, at, bodyStmts) +} + +// Rb — walk + overwrite-last scalar (terminator: first / first_or_default). +// "first of reversed" = LAST surviving element; walk source, overwrite `last` on each match. No buffer. +[macro_function] +def private emit_reverse_walk_overwrite_scalar(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? { + var top = ctx.top + let srcName = (ctx.src as Array)._1 + let itName = qn("it", at) let foundName = qn("found", at) let lastName = qn("last", at) - let cntName = qn("cnt", at) let dBindName = qn("d", at) - let dkeyName = qn("rev_dkey", at) - let dsetName = qn("rev_dset", at) - let idxName = qn("rev_k", at) - let rlenName = qn("rev_len", at) - var reverseCall : ExprCall? - for (i in 0 .. length(calls)) { - var cll & = unsafe(calls[i]) - let name = cll._1.name - if (name == "where_") { - if (hasReverse || seenSelect) return null - whereCond = merge_where_cond(whereCond, peel_lambda_rename_var(cll._0.arguments[1], itName)) - } elif (name == "select") { - if (!hasReverse && !seenSelect) { - // Pre-reverse select: existing path (buffer holds projected values). - seenSelect = true - projection = peel_lambda_rename_var(cll._0.arguments[1], itName) - } elif (hasReverse && !seenSelect && terminalSelectLam == null && i == length(calls) - 1) { - // Terminal post-reverse select: project at return (R1-R4 buf or first scalar). - terminalSelectLam = cll._0.arguments[1] - if (terminalSelectLam == null - || cll._0._type == null || cll._0._type.firstType == null) return null - terminalSelectElemType = clone_type(cll._0._type.firstType) - } else { - return null - } - } elif (name == "reverse") { - if (hasReverse) return null - hasReverse = true - reverseCall = cll._0 - } elif (name == "take") { - if (!hasReverse || takeExpr != null) return null - var arg = cll._0.arguments[1] - if (arg == null || arg._type == null || arg._type.baseType != Type.tInt) return null - takeExpr = clone_expression(arg) - } elif (name == "distinct" || name == "distinct_by") { - // Theme 8 (audit 2a): trailing distinct[_by] after reverse, no other chain ops, implicit to_array terminator, array source. Walks source backward with set-gated push. - if (!hasReverse || distinctName != "" || i != length(calls) - 1) return null - distinctName = name - if (name == "distinct_by") { - if ((cll._0.arguments |> length) < 2) return null - distinctKey = clone_expression(cll._0.arguments[1]) - } - } else { - return null + var whereCond : Expression? + if (c.single |> key_exists("where")) { + whereCond = peel_lambda_rename_var(c.single["where"].arguments[1], itName) + } + var projection : Expression? + if (c.single |> key_exists("proj")) { + projection = peel_lambda_rename_var(c.single["proj"].arguments[1], itName) + } + var terminalSelectLam : Expression? + if (c.single |> key_exists("termsel")) { + terminalSelectLam = c.single["termsel"].arguments[1] + } + if (!(c.single |> key_exists("term"))) return null + let terminatorCall = c.single["term"] + let terminatorName = call_norm_name(terminatorCall) + // Bail to cascade if upstream typing is incomplete — `lastType` reads `projection._type` / `top._type.firstType`, either can be null on partially-typed exprs. + if (projection != null) { + if (projection._type == null) return null + } else { + if (top._type == null || top._type.firstType == null) return null + } + var lastType = strip_const_ref(clone_type(projection != null ? projection._type : top._type.firstType)) + var valueExpr : Expression? + if (projection != null) { + valueExpr = clone_expression(projection) + } else { + valueExpr = qmacro_expr() { + $i(itName) } } - // Bail conditions: no reverse; take+terminator (take only with implicit to_array); count+terminal-select (would drop side effects); Theme 8 (audit 2a) distinct path constrained to array source + implicit to_array + no other chain ops. - if (!hasReverse || (takeExpr != null && terminatorName != "") - || (terminalSelectLam != null && terminatorName == "count") - || (distinctName != "" && (terminatorName != "" || takeExpr != null || projection != null - || whereCond != null || terminalSelectLam != null - || !(top._type.isGoodArrayType || top._type.isArray)))) return null + var matchBlock : Expression? = qmacro_block() { + $i(lastName) := $e(valueExpr) + $i(foundName) = true + } + var perElement = wrap_with_condition(matchBlock, whereCond) + var lastRetExpr : Expression? + if (terminalSelectLam != null) { + lastRetExpr = peel_lambda_replace_var(terminalSelectLam, qmacro($i(lastName))) + } else { + lastRetExpr = qmacro($i(lastName)) + } + // first_or_default's user default is already at post-termsel type — re-projecting through `termsel` double-applies. + var dRetExpr : Expression? = qmacro($i(dBindName)) var body : Expression? - if (terminatorName == "count") { - // Reverse is identity for count — counter loop, no buffer. Side-effecting projection still fires per match. - var perElement : Expression? - if (projection != null && has_sideeffects(projection)) { - let vfinalName = qn("vfinal", at) - perElement = qmacro_block() { - var $i(vfinalName) = $e(projection) - $i(cntName) ++ + if (terminatorName == "first") { + body = qmacro_block() { + var $i(foundName) = false + var $i(lastName) : $t(lastType) = default<$t(lastType)> + for ($i(itName) in $i(srcName)) { + $e(perElement) } - } else { - perElement = qmacro_expr() { - $i(cntName) ++ + if (!$i(foundName)) { + panic("sequence contains no elements") } + return $e(lastRetExpr) } - perElement = wrap_with_condition(perElement, whereCond) + } else { body = qmacro_block() { - var $i(cntName) = 0 + let $i(dBindName) = $e(terminatorCall.arguments[1]) + var $i(foundName) = false + var $i(lastName) : $t(lastType) = default<$t(lastType)> for ($i(itName) in $i(srcName)) { $e(perElement) } - return $i(cntName) + return $i(foundName) ? $e(lastRetExpr) : $e(dRetExpr) } - } elif (terminatorName == "first" || terminatorName == "first_or_default") { - // "first of reversed" = LAST surviving element. Walk source, overwrite `last` on each match. No buffer. - var lastType = strip_const_ref(clone_type(projection != null ? projection._type : top._type.firstType)) - var valueExpr : Expression? - if (projection != null) { - valueExpr = clone_expression(projection) - } else { - valueExpr = qmacro_expr() { - $i(itName) - } + } + var bodyStmts : array + bodyStmts |> push_block_list(body) + return finalize_emission_stmts(top, srcName, at, bodyStmts) +} + +// R6 — backward index walk (bare reverse + take(N) + implicit to_array on array source). +// Visits only the last takeN indices — skips full-source push + O(length) reverse_inplace. +[macro_function] +def private emit_reverse_backward_index_walk(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? { + var top = ctx.top + let srcName = (ctx.src as Array)._1 + if (!(c.single |> key_exists("take")) || top._type == null || top._type.firstType == null) return null + var bufElemType = strip_const_ref(clone_type(top._type.firstType)) + let bufName = qn("buf", at) + let lenName = qn("rlen", at) + let takeLimName = qn("rtakeLim", at) + let takeNName = qn("rtn", at) + let kName = qn("rk", at) + var returnExpr = buffer_return(bufName, ctx.expr_is_iterator) + // Bind `take` arg once — user expression may have side effects; matches normal call semantics. + var body : Expression? = qmacro_block() { + let $i(lenName) = length($i(srcName)) + let $i(takeLimName) = $e(c.single["take"].arguments[1]) + let $i(takeNName) = $i(takeLimName) <= 0 ? 0 : ($i(takeLimName) < $i(lenName) ? $i(takeLimName) : $i(lenName)) + var $i(bufName) : array<$t(bufElemType)> + $i(bufName) |> reserve($i(takeNName)) + for ($i(kName) in 0 .. $i(takeNName)) { + $i(bufName) |> push_clone($i(srcName)[$i(lenName) - 1 - $i(kName)]) } - var matchBlock : Expression? = qmacro_block() { - $i(lastName) := $e(valueExpr) - $i(foundName) = true + $e(returnExpr) + } + var bodyStmts : array + bodyStmts |> push_block_list(body) + return finalize_emission_stmts(top, srcName, at, bodyStmts) +} + +// R-2a — backward walk + dset gate (Theme 8 / audit 2a: reverse + distinct[_by] + implicit to_array). +// Single backward source walk with set-gated push — saves the cascade's reverse_to_array + distinct_by_inplace second walk. +[macro_function] +def private emit_reverse_backward_walk_dset_gate(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? { + var top = ctx.top + let srcName = (ctx.src as Array)._1 + if (!(c.single |> key_exists("dist"))) return null + let distinctCall = c.single["dist"] + let distinctName = call_norm_name(distinctCall) + var distinctKey : Expression? + if (distinctName == "distinct_by") { + if ((distinctCall.arguments |> length) < 2) return null + distinctKey = clone_expression(distinctCall.arguments[1]) + } + if (top._type == null || top._type.firstType == null) return null + var bufElemType = strip_const_ref(clone_type(top._type.firstType)) + let itName = qn("it", at) + let bufName = qn("buf", at) + let dkeyName = qn("rev_dkey", at) + let dsetName = qn("rev_dset", at) + let idxName = qn("rev_k", at) + let rlenName = qn("rev_len", at) + var dkeyExpr : Expression? + if (distinctName == "distinct_by") { + dkeyExpr = peel_lambda_rename_var(distinctKey, itName) + if (dkeyExpr == null) return null + } else { + dkeyExpr = qmacro($i(itName)) + } + var dsetDecl : Expression? + if (distinctName == "distinct_by") { + dsetDecl = qmacro_expr() { + var inscope $i(dsetName) : table)))> } - var perElement = wrap_with_condition(matchBlock, whereCond) - // Terminal _select: `last` stays source-typed; project (and the default) at return. - var lastRetExpr : Expression? - var dRetExpr : Expression? - if (terminalSelectLam != null) { - lastRetExpr = peel_lambda_replace_var(terminalSelectLam, qmacro($i(lastName))) - dRetExpr = peel_lambda_replace_var(terminalSelectLam, qmacro($i(dBindName))) - } else { - lastRetExpr = qmacro($i(lastName)) - dRetExpr = qmacro($i(dBindName)) + } else { + dsetDecl = qmacro_expr() { + var inscope $i(dsetName) : table))> } - if (terminatorName == "first") { - body = qmacro_block() { - var $i(foundName) = false - var $i(lastName) : $t(lastType) = default<$t(lastType)> - for ($i(itName) in $i(srcName)) { - $e(perElement) - } - if (!$i(foundName)) { - panic("sequence contains no elements") - } - return $e(lastRetExpr) - } - } else { - body = qmacro_block() { - let $i(dBindName) = $e(terminatorCall.arguments[1]) - var $i(foundName) = false - var $i(lastName) : $t(lastType) = default<$t(lastType)> - for ($i(itName) in $i(srcName)) { - $e(perElement) - } - return $i(foundName) ? $e(lastRetExpr) : $e(dRetExpr) + } + var returnExpr = buffer_return(bufName, ctx.expr_is_iterator) + var body : Expression? = qmacro_block() { + let $i(rlenName) = length($i(srcName)) + var $i(bufName) : array<$t(bufElemType)> + $e(dsetDecl) + for ($i(idxName) in 0 .. $i(rlenName)) { + let $i(itName) = $i(srcName)[$i(rlenName) - 1 - $i(idxName)] + let $i(dkeyName) = _::unique_key($e(dkeyExpr)) + if (!$i(dsetName) |> key_exists($i(dkeyName))) { + $i(dsetName) |> insert($i(dkeyName)) + $i(bufName) |> push_clone($i(itName)) } } + $e(returnExpr) + } + var bodyStmts : array + bodyStmts |> push_block_list(body) + return finalize_emission_stmts(top, srcName, at, bodyStmts) +} + +// R1-R4 — catch-all buffer + reverse_inplace + optional resize + optional terminal _select. +[macro_function] +def private emit_reverse_buffer_inplace(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? { + var top = ctx.top + let srcName = (ctx.src as Array)._1 + let itName = qn("it", at) + let bufName = qn("buf", at) + let outBufName = qn("rev_proj_buf", at) + let elemName = qn("rev_proj_e", at) + var whereCond : Expression? + if (c.single |> key_exists("where")) { + whereCond = peel_lambda_rename_var(c.single["where"].arguments[1], itName) + } + var projection : Expression? + if (c.single |> key_exists("preproj")) { + projection = peel_lambda_rename_var(c.single["preproj"].arguments[1], itName) + } + var takeExpr : Expression? + if (c.single |> key_exists("take")) { + takeExpr = clone_expression(c.single["take"].arguments[1]) + } + var terminalSelectLam : Expression? + var terminalSelectElemType : TypeDeclPtr + if (c.single |> key_exists("termsel")) { + terminalSelectLam = c.single["termsel"].arguments[1] + if (terminalSelectLam == null || c.single["termsel"]._type == null || c.single["termsel"]._type.firstType == null) return null + terminalSelectElemType = clone_type(c.single["termsel"]._type.firstType) + } + if (top._type == null || top._type.firstType == null) return null + // Buffer element type: post-select if a pre-reverse projection narrowed the element shape; otherwise source type. + var bufElemType : TypeDeclPtr + if (projection != null) { + bufElemType = strip_const_ref(clone_type(projection._type)) } else { - // R1-R4 path: buffer + reverse_inplace + optional resize + return buffer. - let needIterWrap = expr._type.isIterator - var bufElemType = strip_const_ref(clone_type(reverseCall._type.firstType)) - // Terminal _select projects buffer survivors at return (after resize trims to take(N)). - let outBufName = qn("rev_proj_buf", at) - let elemName = qn("rev_proj_e", at) - var projBody : Expression? - if (terminalSelectLam != null) { - projBody = peel_lambda_replace_var(terminalSelectLam, qmacro($i(elemName))) + bufElemType = strip_const_ref(clone_type(top._type.firstType)) + } + var projBody : Expression? + if (terminalSelectLam != null) { + projBody = peel_lambda_replace_var(terminalSelectLam, qmacro($i(elemName))) + } + var pushExpr : Expression? + if (projection != null) { + pushExpr = qmacro_expr() { + $i(bufName) |> push_clone($e(projection)) } - // Theme 8 (audit 2a): reverse + distinct[_by] + to_array on array source — single backward walk with set-gated push. Mirrors canBackwardIndex below but gates push by dedup key instead of indexing into the last takeN slots. - if (distinctName != "") { - var dkeyExpr : Expression? - if (distinctName == "distinct_by") { - dkeyExpr = peel_lambda_rename_var(distinctKey, itName) - if (dkeyExpr == null) return null - } else { - dkeyExpr = qmacro($i(itName)) - } - var dsetDecl : Expression? - if (distinctName == "distinct_by") { - dsetDecl = qmacro_expr() { - var inscope $i(dsetName) : table)))> - } - } else { - dsetDecl = qmacro_expr() { - var inscope $i(dsetName) : table))> - } - } - var returnExpr = buffer_return(bufName, needIterWrap) - body = qmacro_block() { - let $i(rlenName) = length($i(srcName)) - var $i(bufName) : array<$t(bufElemType)> - $e(dsetDecl) - for ($i(idxName) in 0 .. $i(rlenName)) { - let $i(itName) = $i(srcName)[$i(rlenName) - 1 - $i(idxName)] - let $i(dkeyName) = _::unique_key($e(dkeyExpr)) - if (!$i(dsetName) |> key_exists($i(dkeyName))) { - $i(dsetName) |> insert($i(dkeyName)) - $i(bufName) |> push_clone($i(itName)) - } - } - $e(returnExpr) - } - } elif (takeExpr != null && projection == null && whereCond == null - && terminalSelectLam == null - && (top._type.isGoodArrayType || top._type.isArray)) { - // R6: visit only the last takeN indices — skips full-source push + O(length) reverse_inplace. - let lenName = qn("rlen", at) - let takeNName = qn("rtn", at) - let kName = qn("rk", at) - var returnExpr = buffer_return(bufName, needIterWrap) - body = qmacro_block() { - let $i(lenName) = length($i(srcName)) - let $i(takeNName) = $e(takeExpr) <= 0 ? 0 : ($e(takeExpr) < $i(lenName) ? $e(takeExpr) : $i(lenName)) - var $i(bufName) : array<$t(bufElemType)> - $i(bufName) |> reserve($i(takeNName)) - for ($i(kName) in 0 .. $i(takeNName)) { - $i(bufName) |> push_clone($i(srcName)[$i(lenName) - 1 - $i(kName)]) - } - $e(returnExpr) - } - } else { - var pushExpr : Expression? - if (projection != null) { - pushExpr = qmacro_expr() { - $i(bufName) |> push_clone($e(projection)) - } - } else { - pushExpr = qmacro_expr() { - $i(bufName) |> push_clone($i(itName)) - } - } - pushExpr = wrap_with_condition(pushExpr, whereCond) - var reserveStmts : array - if (type_has_length(top._type) && whereCond == null) { - reserveStmts |> push <| qmacro_expr() { - $i(bufName) |> reserve(length($i(srcName))) - } + } else { + pushExpr = qmacro_expr() { + $i(bufName) |> push_clone($i(itName)) + } + } + pushExpr = wrap_with_condition(pushExpr, whereCond) + var reserveStmts : array + if (type_has_length(top._type) && whereCond == null) { + reserveStmts |> push <| qmacro_expr() { + $i(bufName) |> reserve(length($i(srcName))) + } + } + var resizeStmts : array + if (takeExpr != null) { + // Bind `take` arg once — user expression may have side effects; matches normal call semantics. + let takeLimName = qn("rev_takeLim", at) + resizeStmts |> push_from <| qmacro_block_to_array() { + let $i(takeLimName) = $e(takeExpr) + $i(bufName) |> resize($i(takeLimName) <= 0 ? 0 : ($i(takeLimName) < length($i(bufName)) ? $i(takeLimName) : length($i(bufName)))) + } + } + var body : Expression? + if (terminalSelectLam != null) { + var returnExpr = buffer_return(outBufName, ctx.expr_is_iterator) + body = qmacro_block() { + var $i(bufName) : array<$t(bufElemType)> + $b(reserveStmts) + for ($i(itName) in $i(srcName)) { + $e(pushExpr) } - var resizeStmts : array - if (takeExpr != null) { - // take(N) of reversed-buffer = last N of source reversed. Three clones since no math::min import. - resizeStmts |> push <| qmacro_expr() { - $i(bufName) |> resize($e(takeExpr) <= 0 ? 0 : ($e(takeExpr) < length($i(bufName)) ? $e(takeExpr) : length($i(bufName)))) - } + _::reverse_inplace($i(bufName)) + $b(resizeStmts) + var $i(outBufName) : array<$t(terminalSelectElemType)> + $i(outBufName) |> reserve(length($i(bufName))) + for ($i(elemName) in $i(bufName)) { + $i(outBufName) |> push_clone($e(projBody)) } - if (terminalSelectLam != null) { - // Post-reverse projection: outBuf returned in place of bufName. - var returnExpr = buffer_return(outBufName, needIterWrap) - body = qmacro_block() { - var $i(bufName) : array<$t(bufElemType)> - $b(reserveStmts) - for ($i(itName) in $i(srcName)) { - $e(pushExpr) - } - _::reverse_inplace($i(bufName)) - $b(resizeStmts) - var $i(outBufName) : array<$t(terminalSelectElemType)> - $i(outBufName) |> reserve(length($i(bufName))) - for ($i(elemName) in $i(bufName)) { - $i(outBufName) |> push_clone($e(projBody)) - } - $e(returnExpr) - } - } else { - var returnExpr = buffer_return(bufName, needIterWrap) - body = qmacro_block() { - var $i(bufName) : array<$t(bufElemType)> - $b(reserveStmts) - for ($i(itName) in $i(srcName)) { - $e(pushExpr) - } - _::reverse_inplace($i(bufName)) - $b(resizeStmts) - $e(returnExpr) - } + $e(returnExpr) + } + } else { + var returnExpr = buffer_return(bufName, ctx.expr_is_iterator) + body = qmacro_block() { + var $i(bufName) : array<$t(bufElemType)> + $b(reserveStmts) + for ($i(itName) in $i(srcName)) { + $e(pushExpr) } + _::reverse_inplace($i(bufName)) + $b(resizeStmts) + $e(returnExpr) } } var bodyStmts : array @@ -2381,72 +2785,150 @@ def private plan_reverse(var expr : Expression?) : Expression? { return finalize_emission_stmts(top, srcName, at, bodyStmts) } +// Stub — table-driven dispatch into plan_reverse_patterns. Populated by populate_plan_reverse_patterns [_macro]. [macro_function] -def private plan_distinct(var expr : Expression?) : Expression? { +def private plan_reverse(var expr : Expression?) : Expression? { var (top, calls) = flatten_linq(expr) if (empty(calls)) return null + normalize_order_reverse(calls) collapse_chained_selects(calls) + collapse_chained_wheres(calls) top = peel_each(top) - var terminatorName : string = "" - var countPred : Expression? - { - let lastName = calls.back()._1.name - let lastArgs = calls.back()._0.arguments |> length - // `sum` has no selector overload that interacts cleanly with distinct buffering — keep 1-arg only. count/long_count(predicate) (Theme 4): dedup runs UNCONDITIONALLY (distinct_by keeps the FIRST occurrence per key); a separate `acc` counter increments only when the predicate matches that first occurrence. Wrapping dedup in `if(P)` would diverge from tier-2 when a later duplicate matches but the first didn't. - if ((lastName == "count" || lastName == "sum" || lastName == "long_count") && lastArgs == 1) { - terminatorName = lastName - calls |> pop - } elif ((lastName == "count" || lastName == "long_count") && lastArgs == 2) { - countPred = calls.back()._0.arguments[1] - if (countPred == null) return null - terminatorName = lastName - calls |> pop - } - } - if (empty(calls)) return null - var whereCond : Expression? - var projection : Expression? - var hasDistinct = false - var seenSelect = false - var isDistinctBy = false - var distinctKeyBlock : Expression? - var takeExpr : Expression? let at = calls[0]._0.at + let exprIsIter = expr._type != null && expr._type.isIterator let srcName = qn("source", at) - let itName = qn("it", at) + for (p in plan_reverse_patterns) { + var r <- match_pattern(p, calls, top) + if (r is matched) { + var topClone = clone_expression(top) + var ctx = EmitCtx(top = topClone, src = SourceAdapter(Array = (topClone, srcName)), expr_is_iterator = exprIsIter) + var result = invoke(p.emit, r as matched, ctx, at) + if (result != null) return result + } + } + return null +} + +// [_macro] runs at macro-compile time and stays off the JIT runtime graph — the rows carry @@ +// addresses of [macro_function] emit fns whose quote() bodies the LLVM JIT can't lower. +[_macro] +def private populate_plan_reverse_patterns { + if (!is_compiling_macros_in_module("linq_fold") || !empty(plan_reverse_patterns)) return + // R-2a (most specific) — Theme 8 / audit 2a — backward walk + dset gate + plan_reverse_patterns |> emplace <| SplicePattern( + name = "reverse_distinct_backward_walk", + chain <- [ + Slot(matcher = m_literal("reverse"), cardinality = c_one()), + Slot(matcher = m_alias("distinct_family"), cardinality = c_one(), capture_name = "dist") + ], + requires <- [@@ < RequiresPredicate > array_source, @@ < RequiresPredicate > no_terminator], + emit = @@ < EmitFn > emit_reverse_backward_walk_dset_gate + ) + // R6 — bare reverse + take + to_array, backward index walk + plan_reverse_patterns |> emplace <| SplicePattern( + name = "reverse_take_backward_index", + chain <- [ + Slot(matcher = m_literal("reverse"), cardinality = c_one()), + Slot(matcher = m_literal("take"), cardinality = c_one(), capture_name = "take") + ], + requires <- [@@ < RequiresPredicate > array_source, @@ < RequiresPredicate > take_arg_is_int, @@ < RequiresPredicate > no_terminator], + emit = @@ < EmitFn > emit_reverse_backward_index_walk + ) + // Ra — reverse + count (reverse is identity for count) + plan_reverse_patterns |> emplace <| SplicePattern( + name = "reverse_counter", + chain <- [ + Slot(matcher = m_literal("where_"), cardinality = c_opt(), capture_name = "where"), + Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "proj"), + Slot(matcher = m_literal("reverse"), cardinality = c_one()), + Slot(matcher = m_literal("count"), cardinality = c_one(), capture_name = "term", arity = 1) + ], + requires <- array(), // Ra works on iterator and array sources (for-loop body, no indexed access) + emit = @@ < EmitFn > emit_reverse_counter + ) + // Rb — reverse + first[_or_default], walk-and-overwrite-last scalar; optional terminal _select + plan_reverse_patterns |> emplace <| SplicePattern( + name = "reverse_walk_overwrite", + chain <- [ + Slot(matcher = m_literal("where_"), cardinality = c_opt(), capture_name = "where"), + Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "proj"), + Slot(matcher = m_literal("reverse"), cardinality = c_one()), + Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "termsel"), + Slot(matcher = m_alias("first_family"), cardinality = c_one(), capture_name = "term") + ], + requires <- array(), + emit = @@ < EmitFn > emit_reverse_walk_overwrite_scalar + ) + // R1-R4 — catch-all buffer + reverse_inplace + optional resize + optional terminal _select + plan_reverse_patterns |> emplace <| SplicePattern( + name = "reverse_buffer_inplace", + chain <- [ + Slot(matcher = m_literal("where_"), cardinality = c_opt(), capture_name = "where"), + Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "preproj"), + Slot(matcher = m_literal("reverse"), cardinality = c_one()), + Slot(matcher = m_literal("take"), cardinality = c_opt(), capture_name = "take"), + Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "termsel") + ], + requires <- [@@ < RequiresPredicate > take_arg_is_int], + emit = @@ < EmitFn > emit_reverse_buffer_inplace + ) +} + +// ===== plan_distinct migration (PR A — pattern-table refactor) ===== +// 1 emit archetype with internal terminator-shape dispatch + 2 pattern rows + stub. + +// Hashtable dedup + per-fresh-key consume; terminator picks return wiring (count/long_count/sum/implicit to_array). +// take(N) bounds outer loop with cross-iteration break for true O(N)-source streaming exit. +[macro_function] +def private emit_hashtable_dedup(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? { + var top = ctx.top + let srcName = (ctx.src as Array)._1 + let itName = qn("it", at) let bufName = qn("buf", at) let seenName = qn("seen", at) let keyName = qn("k", at) let takenName = qn("taken", at) let accName = qn("acc", at) - for (i in 0 .. length(calls)) { - var cll & = unsafe(calls[i]) - let name = cll._1.name - if (name == "where_") { - if (hasDistinct || seenSelect) return null - whereCond = merge_where_cond(whereCond, peel_lambda_rename_var(cll._0.arguments[1], itName)) - } elif (name == "select") { - if (hasDistinct || seenSelect) return null - seenSelect = true - projection = peel_lambda_rename_var(cll._0.arguments[1], itName) - } elif (name == "distinct") { - if (hasDistinct) return null - hasDistinct = true - } elif (name == "distinct_by") { - if (hasDistinct) return null - hasDistinct = true - isDistinctBy = true - distinctKeyBlock = clone_expression(cll._0.arguments[1]) - } elif (name == "take") { - if (!hasDistinct || takeExpr != null) return null - var arg = cll._0.arguments[1] - if (arg == null || arg._type == null || arg._type.baseType != Type.tInt) return null - takeExpr = clone_expression(arg) + let takeLimName = qn("takeLim", at) + let pvName = qn("pv", at) + var whereCond : Expression? + if (c.single |> key_exists("where")) { + whereCond = peel_lambda_rename_var(c.single["where"].arguments[1], itName) + } + var projection : Expression? + if (c.single |> key_exists("proj")) { + projection = peel_lambda_rename_var(c.single["proj"].arguments[1], itName) + } + if (!(c.single |> key_exists("dist"))) return null + let distinctCall = c.single["dist"] + let isDistinctBy = call_norm_name(distinctCall) == "distinct_by" + var distinctKeyBlock : Expression? + if (isDistinctBy) { + if ((distinctCall.arguments |> length) < 2) return null + distinctKeyBlock = clone_expression(distinctCall.arguments[1]) + } + var takeExpr : Expression? + if (c.single |> key_exists("take")) { + takeExpr = clone_expression(c.single["take"].arguments[1]) + } + var terminatorName : string = "" + var countPred : Expression? + if (c.single |> key_exists("term")) { + let terminatorCall = c.single["term"] + let termName = call_norm_name(terminatorCall) + let termArgs = terminatorCall.arguments |> length + // `sum` has no selector overload that interacts cleanly with distinct buffering — keep 1-arg only. count/long_count(predicate) (Theme 4): dedup runs UNCONDITIONALLY (distinct_by keeps the FIRST occurrence per key); a separate `acc` counter increments only when the predicate matches that first occurrence. + if ((termName == "count" || termName == "sum" || termName == "long_count") && termArgs == 1) { + terminatorName = termName + } elif ((termName == "count" || termName == "long_count") && termArgs == 2) { + if (terminatorCall.arguments[1] == null) return null + countPred = clone_expression(terminatorCall.arguments[1]) + terminatorName = termName } else { return null } } - if (!hasDistinct || (takeExpr != null && terminatorName != "")) return null + if ((takeExpr != null && terminatorName != "") || top._type == null || top._type.firstType == null) return null var elemType = strip_const_ref(clone_type(projection != null ? projection._type : top._type.firstType)) var stmts : array if (isDistinctBy) { @@ -2478,16 +2960,12 @@ def private plan_distinct(var expr : Expression?) : Expression? { var $i(accName) = 0l } } - // Bind take(N) limit once at outer scope so a side-effecting arg fires once, not on every fresh-key check. - let takeLimName = qn("takeLim", at) if (takeExpr != null) { stmts |> push_from <| qmacro_block_to_array() { let $i(takeLimName) = $e(takeExpr) var $i(takenName) = 0 } } - // Bind side-effecting projection once per element; key + buffer/acc share the bind (matches original LINQ's one-eval per source elem). - let pvName = qn("pv", at) let bindProjection = projection != null && has_sideeffects(projection) var pushExpr : Expression? if (projection != null) { @@ -2509,12 +2987,10 @@ def private plan_distinct(var expr : Expression?) : Expression? { } else { keyExpr = clone_expression(pushExpr) } - // count(p) / long_count(p): peel against pushExpr so the predicate sees the post-projection element (`_select(_.brand)._distinct().count(b => b > 0)` → b binds to the projected brand, not the source tuple). if (countPred != null) { countPred = peel_lambda_replace_var(countPred, pushExpr) if (countPred == null) return null } - // Per-match consume: only inside the fresh-key branch; take-break is the outer-loop guard below. var consumeStmts : array if (takeExpr != null) { consumeStmts |> push <| qmacro_expr() { @@ -2533,7 +3009,6 @@ def private plan_distinct(var expr : Expression?) : Expression? { $i(accName) += $e(pushExpr) } } - // count(p) / long_count(p): dedup happens unconditionally so `distinct_by` semantics stay correct (FIRST occurrence per key); the predicate gates only the matched counter. Without this split the chain `distinct_by(k).count(p)` would diverge from tier-2 when a later duplicate matches p but the first one didn't. if (countPred != null && isCountTerminator) { consumeStmts |> push <| qmacro_expr() { if ($e(countPred)) { @@ -2559,7 +3034,6 @@ def private plan_distinct(var expr : Expression?) : Expression? { perMatchStmts |> push(ifNew) var perElement = stmts_to_expr(perMatchStmts) perElement = wrap_with_condition(perElement, whereCond) - // Outer-loop take guard: top-of-loop break for true O(N)-source streaming exit (skips duplicates after Nth distinct). if (takeExpr != null) { stmts |> push <| qmacro_expr() { for ($i(itName) in $i(srcName)) { @@ -2601,11 +3075,64 @@ def private plan_distinct(var expr : Expression?) : Expression? { return $i(accName) } } else { - stmts |> push(buffer_return(bufName, expr._type.isIterator)) + stmts |> push(buffer_return(bufName, ctx.expr_is_iterator)) } return finalize_emission_stmts(top, srcName, at, stmts) } +// Stub — table-driven dispatch into plan_distinct_patterns. +[macro_function] +def private plan_distinct(var expr : Expression?) : Expression? { + var (top, calls) = flatten_linq(expr) + if (empty(calls)) return null + collapse_chained_selects(calls) + collapse_chained_wheres(calls) + top = peel_each(top) + let at = calls[0]._0.at + let exprIsIter = expr._type != null && expr._type.isIterator + let srcName = qn("source", at) + for (p in plan_distinct_patterns) { + var r <- match_pattern(p, calls, top) + if (r is matched) { + var topClone = clone_expression(top) + var ctx = EmitCtx(top = topClone, src = SourceAdapter(Array = (topClone, srcName)), expr_is_iterator = exprIsIter) + var result = invoke(p.emit, r as matched, ctx, at) + if (result != null) return result + } + } + return null +} + +[_macro] +def private populate_plan_distinct_patterns { + if (!is_compiling_macros_in_module("linq_fold") || !empty(plan_distinct_patterns)) return + // distinct_take — distinct[_by] |> take(N) [|> terminator]; inner-loop break on take cap + plan_distinct_patterns |> emplace <| SplicePattern( + name = "distinct_take", + chain <- [ + Slot(matcher = m_literal("where_"), cardinality = c_opt(), capture_name = "where"), + Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "proj"), + Slot(matcher = m_alias("distinct_family"), cardinality = c_one(), capture_name = "dist"), + Slot(matcher = m_literal("take"), cardinality = c_one(), capture_name = "take"), + Slot(matcher = m_alias("distinct_terminator_family"), cardinality = c_opt(), capture_name = "term") + ], + requires <- [@@ < RequiresPredicate > take_arg_is_int], + emit = @@ < EmitFn > emit_hashtable_dedup + ) + // distinct_main — distinct[_by]; optional terminator (count / long_count / sum / implicit to_array) + plan_distinct_patterns |> emplace <| SplicePattern( + name = "distinct_main", + chain <- [ + Slot(matcher = m_literal("where_"), cardinality = c_opt(), capture_name = "where"), + Slot(matcher = m_literal("select"), cardinality = c_opt(), capture_name = "proj"), + Slot(matcher = m_alias("distinct_family"), cardinality = c_one(), capture_name = "dist"), + Slot(matcher = m_alias("distinct_terminator_family"), cardinality = c_opt(), capture_name = "term") + ], + requires <- array(), + emit = @@ < EmitFn > emit_hashtable_dedup + ) +} + // ── Group-by helpers ────────────────────────────────────────────────── [macro_function] diff --git a/daslib/linq_fold.md b/daslib/linq_fold.md new file mode 100644 index 0000000000..c58eca4a61 --- /dev/null +++ b/daslib/linq_fold.md @@ -0,0 +1,466 @@ +# linq_fold.das refactor — masterplan + +Living document. Update **Status** + **Decision log** as phases ship. + +## Status + +- [x] **PR A** — Foundation + first migrations (plan_reverse, plan_distinct) — branch `bbatkin/linq-fold-patterns-foundation` +- [x] **PR B1** — KR-1 closure (`collapse_chained_wheres`) + `c_chain` cardinality + `Captures` wrapper struct + `plan_loop_or_count` migration — branch `bbatkin/linq-fold-pattern-table-prb` +- [ ] **PR B2** — `plan_order_family` migration (5 emit archetypes + 5 rows) — deferred follow-up; foundation (aliases / predicates / c_chain) shipped in B1 +- [ ] **PR C** — SourceAdapter + decs mirrors (plan_decs_reverse / _distinct / _order_family / _unroll) +- [ ] **PR D** — Group-by + special cases (plan_group_by family, plan_zip, plan_decs_join, reducer-spec data table) + +## Goal + +Split `_fold` splice machinery into two layers: + +- **Pattern recognition** — declared via a data table (`splice_patterns`). Each row names a chain shape, a list of requires-predicates, and a target emit fn. +- **Code generation** — reusable emit archetypes parameterized by a `SourceAdapter` (array / decs / decs-find, plus future zip / decs-join variants). + +Today's 13 `plan_*` functions (~3300 LOC across the file) re-implement the same boilerplate: `flatten_linq → declare 8-30 tracking flags → giant for-loop with if/elif on op name → ad-hoc co-occurrence guards → giant final bail → emit dispatch`. 70-80% of plan_* code is recognition state + co-occurrence checking, not codegen. Adding a splice arm means patching 1-3 plan_* cores at 5-8 sites each, hunting through if/elif walls. + +End state: adding an arm = adding a row to `splice_patterns`. Coverage gaps surface as missing rows, not as negative bails buried at the bottom of a function. + +Estimated savings: **~-1750 LOC** across the 4 PRs (~-25% of the file). + +## Today's situation (input to the refactor) + +13 `plan_*` functions cover all splice cases. From the census: + +| Plan | LOC | Anchor | Emit shapes | +|---|---|---|---| +| plan_order_family | 543 | `order*` | 11 (3 archetypes × variants) | +| plan_loop_or_count | 208 | terminator dispatch | 6 (already lane-factored) | +| plan_reverse | 284 | `reverse` | 5 | +| plan_distinct | 223 | `distinct*` | 1 archetype × 7 returns | +| plan_group_by_core | 364 | (pre-stripped contract) | 8 + 12-arm reducer table | +| plan_group_by | 62 | `group_by_lazy` | delegate | +| plan_decs_unroll | 61 | decs + terminator | 7 (already dispatcher) | +| plan_decs_group_by | 103 | `group_by_lazy` + decs | delegate | +| plan_decs_order_family | 384 | `order*` + decs | ~12 (near-mirror of array sibling) | +| plan_decs_reverse | 288 | `reverse` + decs | 4 (near-mirror) | +| plan_decs_distinct | 254 | `distinct*` + decs | 1 × 4 × 2 (near-mirror) | +| plan_decs_join | 189 | `join` + 2 decs bridges | 1 × 4 (no array sibling) | +| plan_zip | 352 | `zip` | 8 | + +Decs-side plans are near-mirrors of their array-side siblings modulo source-loop wrap. `GroupBySourceAdapter` (existing) is the proof-case: `plan_group_by_core` is fully source-agnostic, with the array-side and decs-side wrappers each ~60-100 LOC. + +## Grammar kernel (lives inline at top of linq_fold.das) + +Visibility follows the rule in **Tests / exports philosophy** below: default `private`, public only what walker tests must name (`Slot` / `SlotMatcher` / `SlotCardinality` + their `m_*`/`c_*` constructors, `SplicePattern`, `Captures` / `EmitCtx` / `EmitFn` typedefs, `chain_prefix_of`, `check_pattern_table_reachable`, `alias_table`). The snippet below omits the `private` keyword for readability; the implementation in `linq_fold.das` carries it on everything not in that public list. + +```das +variant SourceAdapter { + Array : tuple // (top, srcName) — PR A scope + // PR C widens: Decs(DecsBridgeShape), DecsFind(DecsBridgeShape) + // PR D widens: Zip(...), DecsJoin(...) +} + +variant SlotMatcher { + literal : string // exact name match + one_of : array // any-of name set + alias : string // named group looked up in alias_table +} + +variant SlotCardinality { + one : void? // required, exactly 1 + optional : void? // 0 or 1 + chain : void? // 0 or more (greedy); captures as array via Captures.many — PR B1 +} + +struct Slot { + matcher : SlotMatcher + cardinality : SlotCardinality + capture_name : string = "" // "" = don't capture + arity : int = -1 // -1 = any; positive = require N args +} + +// PR B1 — Captures is a wrapper struct: `single` for c_one/c_opt slots, `many` for c_chain slots. +struct Captures { + single : table + many : table> +} + +variant MatchResult { + no_match : void? // daslang-idiomatic Option + matched : Captures +} + +typedef RequiresPredicate = function<(var c : Captures; var top : Expression?) : bool> + +// Fold-time context passed to every emit archetype. Carries the peeled source expression, source adapter, +// and the outer `_fold(...)` expression's iterator-ness (drives `buffer_return` wrap). +struct EmitCtx { + top : Expression? // peel_each'd; stubs pre-clone per invoke + src : SourceAdapter + expr_is_iterator : bool +} + +typedef EmitFn = function<(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression?> + +struct SplicePattern { + name : string // for debug / lint diagnostics + chain : array + requires : array // all must hold + emit : EmitFn +} + +var plan_reverse_patterns : array +var plan_distinct_patterns : array +var plan_loop_or_count_patterns : array // PR B1 +var splice_patterns : array // PR D: collapsed from per-plan tables; first match wins +``` + +Predicates and emit archetypes are NAMED module-level `def` functions wrapped at use sites with `@@` / `@@` (anonymous `@@(...)` lambdas produce `_localfunction_*` symbols that the LLVM JIT pass can't resolve — named functions take a stable address). + +### Walker contract + +```das +def match_pattern(p : SplicePattern; + var calls : array>; + var top : Expression?) : MatchResult +``` + +Walks `calls` left-to-right. For each slot: + +- `one` — current call must match (name + arity if specified); both cursors advance. +- `optional` — if current call matches, both cursors advance; otherwise the slot is skipped without consuming. +- `chain` (PR B1) — greedy match-while-in-set. Captured as `array` into `captures.many[capture_name]`. Always succeeds (0+); empty match still creates the `many` entry so emit fns can rely on `c.many |> key_exists("…")`. Pairs with `m_one_of` via the `slot_chain_of(names, cap)` convenience constructor. + +After all slots, no unconsumed calls remain. If any of the above fails → `MatchResult(no_match = null)`. + +Then each `RequiresPredicate` in `p.requires` is evaluated against the populated `Captures` and the peeled `top`. All must return true. If any fails → `MatchResult(no_match = null)`. + +Returns `MatchResult(matched <- captures)` on full success (move semantics — `Captures` is a table). Caller binds `var r <- match_pattern(...)` and reads via `if (r is matched) { let c & = r as matched; … }`. + +### Alias table (named op-name groups) + +The snippet below is the projected end-state at PR D. The authoritative live list is the `alias_table` literal in [daslib/linq_fold.das](linq_fold.das). Status reflects what's populated through PR B1. + +```das +// projected end-state at PR D +var alias_table : table> <- { + "order_family" => ["order", "order_descending", "order_by", "order_by_descending"], // PR B1 ✓ + "distinct_family" => ["distinct", "distinct_by"], // PR A ✓ + "first_family" => ["first", "first_or_default"], // PR A ✓ + "count_family" => ["count", "long_count"], // PR A ✓ + "accum_family" => ["sum", "min", "max", "average", "aggregate", // PR B1 ✓ + "min_by", "max_by", "min_max", "min_max_by", + "min_max_average", "min_max_average_by", "long_count"], + "early_exit_family" => ["any", "all", "contains", "first", "first_or_default"], // PR B1 ✓ + "range_op_family" => ["skip", "skip_while", "take_while", "take"], // PR B1 ✓ + "loop_terminator_family" => union of count + accum + early_exit + last/single/element_at, // PR B1 ✓ (loop_or_count terminator slot) + "distinct_terminator_family" => ["count", "long_count", "sum"] // PR A ✓ — narrow to terminators emit_hashtable_dedup actually handles +} +``` + +### Predicate library + +Module-level named `RequiresPredicate` constants for reuse across patterns. As with `alias_table`, this table shows the projected end-state — see [daslib/linq_fold.das](linq_fold.das) for what's actually defined today (PR A: `array_source`, `take_arg_is_int`, `no_terminator`). + +| Name | Status | Meaning | +|---|---|---| +| `array_source` | PR A ✓ | `top._type.isGoodArrayType \|\| top._type.isArray` (after `peel_each`) | +| `array_random_access` | planned | `array_source && top._type.isGoodArrayType` | +| `decs_source` | planned | `extract_decs_bridge(top) != null` | +| `inline_cmp_available` | PR B1 ✓ | `try_make_inline_cmp` succeeds on `c.single["order"]` (only `order_by[_descending]` with inline-splice-able key). Hard-wired to `"order"` capture key; promote to factory on second use | +| `has_where_or_distinct` | PR B1 ✓ | `c.single \|> key_exists("where") \|\| c.single \|> key_exists("distinct")`. For `order_fused_prefilter` row to distinguish from bare `buffer_helper_dispatch` | +| `take_arg_is_int` | PR A ✓ | captured `take`'s 2nd arg `_type.baseType == Type.tInt`; vacuous if no `take` slot | +| `arity_eq(cap, n)` / `arity_ge(cap, n)` | planned (factory) | structural checks on captured calls | +| `no_terminator` | PR A ✓ | no terminator captured (final optional slot empty); return shape decided by `ctx.expr_is_iterator` in the emit fn | +| `is_primitive_key(cap, argIdx)` | planned (factory) | `is_primitive_join_key_type` on captured arg | + +PR A's `take_arg_is_int` is hard-wired to the `"take"` capture key (not a factory) because every PR A consumer uses that name. Promote to a `make_arity_eq(cap, n)` / `make_arg_type_is(cap, idx, type)` factory shape on second use, per the masterplan rule of thumb. + +Inline closures (`@@(c, top) => …`) acceptable for one-off pattern-specific checks. **Rule of thumb:** promote to named predicate (or factory) on second use. + +## Migration phases + +| PR | Phase | Scope | Status | +|---|---|---|---| +| **A** | 0 — Foundation | Kernel types + walker + alias_table + predicate library + per-archetype unit tests. `splice_patterns` empty initially (safe state — all cascades unchanged). | complete | +| **A** | 1 — First migrations | `plan_reverse` (5 rows: Ra/Rb/R6/R-2a/R1-R4), `plan_distinct` (2 rows + return-shape switch in emit). Archetypes: `emit_counter_array`, `emit_walk_overwrite_scalar`, `emit_backward_walk`, `emit_buffer_reverse_inplace`, `emit_hashtable_dedup`. **Hard-delete imperative bodies.** | complete | +| **B1** | 2a — Array core (`plan_loop_or_count`) | `c_chain` cardinality + `Captures` wrapper struct (`single` / `many`) + `slot_chain_of(names, cap)` constructor. `collapse_chained_wheres` pre-pass (KR-1 fix). `plan_loop_or_count` migration (1 row + lane dispatch — preserves existing factoring; head c_chain matches `["where_", "select"]` greedy). | complete | +| **B2** | 2b — Array core (`plan_order_family`) | `plan_order_family` (5 rows: streaming-min / bounded-heap / fused-prefilter / buffer-helper-dispatch / order_then_plain_distinct). Archetypes: `emit_streaming_min`, `emit_bounded_heap`, `emit_fused_prefilter`, `emit_buffer_helper_dispatch`, shared `emit_terminal_select_project`. **Hard-delete imperative body.** | not started | +| **C** | 3 — SourceAdapter + decs mirrors | Widen `SourceAdapter` to multi-variant + methods. Migrate `plan_decs_reverse / _distinct / _order_family / _unroll` — **reuse array-side rows + emit fns** modulo adapter swap. **Hard-delete decs imperative bodies.** | not started | +| **D** | 4 — Group-by + special cases | Reconcile `GroupBySourceAdapter` with `SourceAdapter`. `plan_group_by` + `plan_decs_group_by` → thin pattern rows delegating to existing `plan_group_by_core` (which stays as a sub-codegen). `plan_zip` (1-2 rows, possibly `SourceAdapter::Zip`). `plan_decs_join` (1 row, `SourceAdapter::DecsJoin` or special-case emit). Migrate `emit_reducer_branches` 12-arm if/elif into a `ReducerSpec` data table. | not started | + +**Net at end: ~-1750 LOC.** + +## Migration mechanics + +During migration, today's planner cascade at lines 6302-6337 stays unchanged. Each migrated `plan_X` function body becomes a 3-line stub: + +```das +def private plan_reverse(var expr : Expression?) : Expression? { + for (p in plan_reverse_patterns) { // subset filtered by owner_plan_id + let captures = match_pattern(p, calls, top) + if (captures != null) return p.emit(captures, top, source_adapter, at) + } + return null +} +``` + +After PR D, collapse all stubs + cascade into one flat walk over `splice_patterns` (no `owner_plan_id` filtering needed once all are rows). + +## What we KEEP from today's code + +All shared helpers stay as building blocks for emit archetypes: + +- `flatten_linq`, `peel_each`, `extract_decs_bridge` +- `normalize_order_reverse`, `collapse_chained_selects` +- `peel_lambda_rename_var`, `peel_lambda_replace_var`, `peel_lambda_rename_2vars` +- `merge_where_cond`, `wrap_with_condition`, `wrap_with_ranges` +- `try_make_inline_cmp`, `make_inline_less_call` +- `buffer_return`, `finalize_emission_stmts`, `finalize_decs_emission` +- `qn`, `clone_expression`, `clone_type`, `strip_const_ref` +- `has_sideeffects`, `type_has_length`, `classify_terminator` +- `plan_group_by_core` (stays as sub-codegen; only its wrapper plans migrate) +- `GroupBySourceAdapter` (PR D folds it into the new `SourceAdapter`) + +## Tests / exports philosophy + +Default private; promote ONLY what a synthetic-input test must name. PR A's actual public surface is narrow: + +- Slot construction: `Slot`, `SlotMatcher`, `SlotCardinality`, `m_literal`, `m_alias`, `c_one`, `c_opt`, `c_chain`, `slot_chain_of` +- Pattern row: `SplicePattern` +- Struct/typedefs used in test fn signatures: `Captures` (struct), `EmitCtx`, `EmitFn` +- Lint helpers tests assert on: `chain_prefix_of`, `check_pattern_table_reachable` +- `alias_table` (so tests can read which aliases populated) + +Everything else stays private: the walker (`match_pattern`), the per-plan tables, the predicate library, all `emit_*` archetypes, all populate fns, `SourceAdapter` / `MatchResult` / `RequiresPredicate` / `LinqCall`. They're only called inside this module — bare names compose without cross-module visibility. + +Per-archetype unit testing via direct calls is impractical anyway: emit fns are `[macro_function]` whose bodies contain `quote()` nodes the runtime can't lower (LLVM JIT bail). End-to-end behavioral tests carry the per-archetype coverage in `tests/linq/test_linq_fold_*` (each user chain exercises one or more archetypes through the splice). + +## Naming (decided) + +| Name | Role | +|---|---| +| `splice_patterns` | The master table — `array` | +| `SplicePattern` | Per-row struct | +| `Slot` | Chain slot | +| `SlotMatcher`, `SlotCardinality` | Variant types | +| `Captures` | Struct `{ single : table; many : table> }`. `single` for c_one/c_opt slots, `many` for c_chain slots. The `LinqCall` record is accessible separately via `linqCalls[call_norm_name(c)]` | +| `MatchResult` | Variant `no_match : void? \| matched : Captures` — walker return type | +| `c_chain` / `slot_chain_of(names, cap)` | Greedy run cardinality + (matcher = m_one_of(names), cardinality = c_chain()) convenience constructor — PR B1 | +| `RequiresPredicate`, `EmitFn` | Function-typedef types — see kernel snippet for current signatures | +| `EmitCtx` | Struct `{ top; src; expr_is_iterator }` passed to every emit archetype | +| `SourceAdapter` | Source-loop abstraction variant | +| `alias_table` | Named op-name groups | +| `match_pattern(...)` | Walker function | +| `plan__patterns` | Per-plan filtered subset (only during migration; deleted in PR D) | + +## PR B1 (shipped) + PR B2 (planned) sketch + +### PR B1 — shipped + +**Branch:** `bbatkin/linq-fold-pattern-table-prb` + +**Scope (delivered):** KR-1 closure (`collapse_chained_wheres`) + `c_chain` cardinality + `Captures` wrapper struct (`single` / `many`) + `slot_chain_of(names, cap)` constructor + `plan_loop_or_count` migration (1 row, replaces 210 LOC imperative). PR A's 6 emit fns + 5 predicates mechanically migrated to `c.single[…]` (~47 sites). + +### PR B2 — planned + +**Scope:** `plan_order_family` migration (5 rows). All foundation (aliases / predicates / `c_chain`) shipped in B1; B2 is row + emit-archetype work only. + +### Pre-pass (PR B1 ✓) + +- `collapse_chained_wheres(calls)` — mirrors `collapse_chained_selects` modulo composition: clone inner where lambda, rename param to fresh name, build composed body via `merge_where_cond(innerBodyFresh, outerBodyFresh)` (which is `inner && outer`), rewire chain backlink, erase inner from `calls`. **No `has_sideeffects` bail** — composition uses cloned ASTs and AND-merge preserves left-to-right evaluation order with short-circuit semantics identical to the imperative cascade. Called from `plan_reverse`, `plan_distinct`, `plan_loop_or_count` stubs. **KR-1 fix; load-bearing for plan_loop_or_count row.** + +### Pattern row shipped (PR B1) + +**`plan_loop_or_count`** — 1 row using the new `c_chain` cardinality for the head: + +```das +SplicePattern( + name = "loop_or_count_general", + chain = [ + slot_chain_of(["where_", "select"], "head"), // c_chain — 0+ contiguous where/select + Slot(m_literal("skip"), c_opt(), "skip"), + Slot(m_literal("skip_while"), c_opt(), "skip_while"), + Slot(m_literal("take_while"), c_opt(), "take_while"), + Slot(m_literal("take"), c_opt(), "take"), + Slot(m_literal("where_"), c_opt(), "post_take_where"), // Theme 2 5c + Slot(m_alias("loop_terminator_family"), c_opt(), "term") + ], + requires = [], // intrinsic — chain shape carries the constraints + emit = @@ emit_loop_or_count_lane) +``` + +`emit_loop_or_count_lane` walks `c.many["head"]` left-to-right applying the same where_/select arms (AND-merge, chained-select rebinding, where-after-select projection-replace) the imperative loop did. Range ops + post-take-where + terminator come from `c.single[…]`. Pre-dispatch fast paths: `emit_length_shortcut`, `emit_any_empty_shortcut`. Lane dispatch: `classify_terminator(call_norm_name(c.single["term"]))` → `emit_counter_lane` / `emit_array_lane` / `emit_accumulator_lane` / `emit_early_exit_lane`. `emit_array_lane` refactored to take `isIter : bool` directly (was `expr : Expression?` just to read `.isIterator`) so the new emit fn can pass `ctx.expr_is_iterator` cleanly. + +### Predicates added (PR B1 ✓) + +- `inline_cmp_available(c, top)` — `try_make_inline_cmp(c.single["order"].arguments[1], …)`. For PR B2's `order_streaming_min` + `order_bounded_heap` rows. +- `has_where_or_distinct(c, top)` — `c.single |> key_exists("where") || c.single |> key_exists("distinct")`. For PR B2's `order_fused_prefilter` row. + +### PR B2 — planned rows + +**`plan_order_family`** — 5 rows, priority order 1 → 5: + +```das +// Row 1 — streaming_min: inline-cmp + first[_or_default] +SplicePattern( + name = "order_streaming_min", + chain = [ + Slot(m_literal("where_"), c_opt(), "where"), + Slot(m_alias("distinct_family"), c_opt(), "distinct"), + Slot(m_alias("order_family"), c_one(), "order"), + Slot(m_alias("first_family"), c_one(), "term"), + Slot(m_literal("select"), c_opt(), "termsel"), + ], + requires = [@@ inline_cmp_available], + emit = @@ emit_streaming_min) + +// Row 2 — bounded_heap: inline-cmp + take(N) +SplicePattern( + name = "order_bounded_heap", + chain = [ + Slot(m_literal("where_"), c_opt(), "where"), + Slot(m_alias("distinct_family"), c_opt(), "distinct"), + Slot(m_alias("order_family"), c_one(), "order"), + Slot(m_literal("take"), c_one(), "take"), + Slot(m_literal("select"), c_opt(), "termsel"), + ], + requires = [@@ inline_cmp_available, @@ take_arg_is_int], + emit = @@ emit_bounded_heap) + +// Row 3 — fused_prefilter: where or distinct present, no inline-cmp shortcut +SplicePattern( + name = "order_fused_prefilter", + chain = [ + Slot(m_literal("where_"), c_opt(), "where"), + Slot(m_alias("distinct_family"), c_opt(), "distinct"), + Slot(m_alias("order_family"), c_one(), "order"), + Slot(m_literal("take"), c_opt(), "take"), + Slot(m_alias("first_family"), c_opt(), "term"), + Slot(m_literal("select"), c_opt(), "termsel"), + ], + requires = [@@ has_where_or_distinct, @@ take_arg_is_int], + emit = @@ emit_fused_prefilter) + +// Row 4 — buffer_helper_dispatch: bare order, direct call to daslib helpers +SplicePattern( + name = "order_buffer_helper_dispatch", + chain = [ + Slot(m_alias("order_family"), c_one(), "order"), + Slot(m_literal("take"), c_opt(), "take"), + Slot(m_alias("first_family"), c_opt(), "term"), + ], + requires = [@@ take_arg_is_int], + emit = @@ emit_buffer_helper_dispatch) + +// Row 5 — order_then_plain_distinct: `order + distinct (plain)` accepted by master imperative +// (whole-tuple equality is position-invariant). distinct_by AFTER order_by would NOT be safe +// (distinct_by picks an arbitrary K1 representative regardless of sort order). distinct is +// literal "distinct" only (no alias) to forbid distinct_by here. +SplicePattern( + name = "order_then_plain_distinct", + chain = [ + Slot(m_alias("order_family"), c_one(), "order"), + Slot(m_literal("distinct"), c_one(), "distinct_after"), + Slot(m_literal("take"), c_opt(), "take"), + Slot(m_alias("first_family"), c_opt(), "term"), + Slot(m_literal("select"), c_opt(), "termsel"), + ], + requires = [@@ take_arg_is_int], + emit = @@ emit_fused_prefilter) // reuses emit_fused_prefilter with distinct_after capture +``` + +### Emit archetypes + +| Name | Source lines | LOC | Notes | +|---|---|---|---| +| `emit_streaming_min` | 1662-1727 | ~65 | Single-best state, per-element less-test | +| `emit_bounded_heap` | 1729-1855 | ~125 | Size-N heap during walk; distinct gate variant (Theme 3 Phase 3); terminal `_select` variant | +| `emit_fused_prefilter` | 1928-2107 | ~180 | Walk into buffer with where/distinct gate; sort/min/top_n on buffer; terminal `_select` variant; internal dispatch on take/first/bare | +| `emit_buffer_helper_dispatch` | 1857-1927 | ~70 | Direct call to `order` / `top_n*` / `min_max` helpers; 4 sub-paths | +| `emit_loop_or_count_lane` | 2243-2317 + recognition state | ~150 | Single row with internal `classify_terminator` dispatch into 4 existing lane emit fns | +| `emit_terminal_select_project` | NEW shared helper | ~30 | Used by `emit_bounded_heap` + `emit_fused_prefilter` for `outBuf` projection-from-`buf` | + +### Co-occurrence audit (to verify during implementation) + +The imperative code has a few subtle co-occurrence rules that may not map cleanly onto the pattern table: + +- **`order + distinct (plain)`**: imperative `plan_order_family` accepts `distinct` (not `distinct_by`) AFTER `order_by` because whole-tuple equality is position-invariant. **Decision (2026-05-26)**: add row 5 `order_then_plain_distinct` so PR B has byte-equivalent splice coverage to master. The row's `distinct_after` slot is `m_literal("distinct")` (not the alias), structurally forbidding `distinct_by`. Emit reuses `emit_fused_prefilter` with the new capture name. +- **`select` mid-chain in plan_loop_or_count**: chained selects need `intermediateBinds` for side-effect ordering. Already handled by emit-fn-internal recognition; pattern row captures via single `select` slot post-collapse. +- **`where` after `select` in plan_loop_or_count**: imperative does `peel_lambda_replace_var(predicate, projection)` to rebind the where pred to the projection result. Critical correctness — emit fn must replicate (the recognition state in `emit_loop_or_count_lane` covers this). + +### LOC budget + +| Component | Delta | +|---|---| +| New: `collapse_chained_wheres` | +30 | +| New: 5 emit archetypes (lifted from imperative) | +610 | +| New: shared `emit_terminal_select_project` | +30 | +| New: 5 pattern rows | +60 | +| New: 2 populate `[_macro]` fns | +30 | +| New: 2 predicates + 5 aliases | +25 | +| Delete: imperative `plan_loop_or_count` body | -208 | +| Delete: imperative `plan_order_family` body | -543 | +| New: stubs + KR-1 wiring | +30 | +| New: tests (`collapse_chained_wheres` + per-archetype + regression) | +200 | +| **Net** | **~+264 LOC** (refactor, code redistributed; tests dominate) | + +### Test plan (additions to existing per-archetype + walker integrity) + +- `test_linq_fold_collapse_chained_wheres.das` — N=2, N=3 chains; side-effect bail; on plan_reverse + plan_distinct + plan_loop_or_count + plan_order_family +- `test_linq_fold_order_streaming_min.das` — inline-cmp + first / first_or_default; with/without where; with/without distinct; with terminal `_select` +- `test_linq_fold_order_bounded_heap.das` — inline-cmp + take(N); distinct gate; terminal `_select` +- `test_linq_fold_order_fused_prefilter.das` — where + order + take/first; distinct + order + take/first; terminal `_select` +- `test_linq_fold_order_buffer_helper.das` — bare order; order + take; order + first +- `test_linq_fold_loop_or_count_terminators.das` — all 4 lanes (counter / accumulator / early_exit / array); fast paths; range ops; chained wheres post-collapse + +## Known regressions to address in follow-ups + +| # | Surface | Symptom | Severity | Owner PR | +|---|---|---|---|---| +| KR-1 | `plan_reverse` + `plan_distinct` pattern rows allow a single optional `where_` slot; pre-PR-A imperative `plan_*` accepted N consecutive `where_` calls and `&&`-merged via `merge_where_cond`. | `..._where(p1)._where(p2).reverse()...` and `..._where(p1)._where(p2)._distinct()...` no longer spliced; fell back to cascade. | medium | **CLOSED in PR B1** — `collapse_chained_wheres` pre-pass mirroring `collapse_chained_selects` (~50 LOC + 18 sub-runs). Called from `plan_reverse` / `plan_distinct` / `plan_loop_or_count` stubs; will be called from `plan_order_family` in PR B2. | + +## Risks + +1. **Pattern ordering hazard.** Pattern A's chain being a strict prefix of B's means A wins. Discipline: more-specific patterns declared first; add a lint pass that walks the table at module-init time and flags prefix conflicts. +2. **Hidden cross-plan helpers.** Some emit branches consume helpers used by other plans. These stay as-is — emit archetypes call them like the imperative code did. +3. **Mid-flight `SourceAdapter` redesign.** PR C is the highest-uncertainty phase. If `wrap_per_element` doesn't generalize cleanly to `DecsFind`, we either widen the interface or special-case. **Mid-flight redesign approved.** +4. **Bench refresh per [feedback-living-results-md]:** each PR re-runs INTERP+JIT for any bench shape it touches and refreshes `results.md`. Goal is **byte-identical or strictly faster** at each phase (refactor, not perf change). +5. **RST refresh per [feedback-living-linq-fold-patterns-rst]:** each promoted arm's row in `doc/source/reference/linq_fold_patterns.rst` gets touched — phrasing changes from "plan_X handles …" to "pattern `` (archetype ``) handles …". + +## Decision log + +- **2026-05-25** — Hybrid (declared patterns table + reusable archetypes) over pure EDSL or pure data-table. +- **2026-05-25** — Inline kernel in `linq_fold.das` (not a separate module file). +- **2026-05-25** — Bundle PR A foundation with first migrations (`plan_reverse`, `plan_distinct`) — foundation-only PR is grounded in nothing. +- **2026-05-25** — Stop and align between phases; mid-flight redesign approved. +- **2026-05-25** — Hard-cutover (no legacy imperative alongside the table). +- **2026-05-25** — `Captures` as `table`; emit fns reach the `LinqCall` registry record on demand via `linqCalls[call_norm_name(c)]` (avoids carrying the per-call pair in every capture entry). Initial sketch carried `tuple` but it bought nothing — emit fns mostly read terminator/call arg shape from the `ExprCall`, and `call_norm_name` is the canonical way to derive the registry key anyway. +- **2026-05-25** — `SourceAdapter` stub (Array-only) in PR A; widened in PR C. +- **2026-05-25** — Per-plan stubs during migration; flat walk after PR D. +- **2026-05-25** — `arity` lives on `Slot` (structural check), not requires-predicate. +- **2026-05-25** — Named predicates over inline closures by default; inline acceptable for one-off, promote on second use. +- **2026-05-25 (PR A impl)** — `let` is const, `var` is non-const. Walker stub binds `var result = invoke(p.emit, …)` to receive non-const Expression?. Lint LINT005 misreports the required reinterpret as redundant — known asymmetry; tracked via this entry instead of suppressing in code. +- **2026-05-25 (PR A impl)** — `ExprCall.name` is the mangled generic-instance name (e.g. `__::linq\`distinct_by\``); the user-facing name lives at the root of the `func.fromGeneric` chain. Helper `call_norm_name(ExprCall?)` walks the chain and normalizes through `linqCalls`. +- **2026-05-25 (PR A impl)** — Variant construction in gen2 uses the named-field constructor form (`SlotMatcher(literal = "x")`, `MatchResult(matched <- captures)`), same syntax as struct init. Helpers `m_literal`/`m_alias`/`c_one`/`c_opt` keep pattern rows compact. +- **2026-05-25 (PR A impl)** — Empty typed array literal: `array()`. The bare `[]` lacks the element-type inference base. +- **2026-05-25 (PR A impl)** — `array_source` predicate gates only patterns that need indexed access (R-2a backward-walk, R6 backward-index). Patterns using `for (it in src)` body (Ra / Rb / R1-R4 / distinct_*) work on iterator sources too and have no source-shape gate. +- **2026-05-25 (PR A impl)** — `take_arg_is_int` predicate is vacuously true when no `take` capture is present (so it's safe on patterns with optional take). Same pattern applies for any future capture-conditional predicate. +- **2026-05-25 (PR A impl)** — PR A is a pure refactor (no arm add/extend/tighten). Per `[[feedback-living-linq-fold-patterns-rst]]` and `[[feedback-living-results-md]]`, RST and bench refresh are skipped — both are arm-shape-tracking docs, not implementation-tracking docs. +- **2026-05-26 (PR A R3)** — Intentional extension over master in `plan_reverse`: chains with BOTH a pre-reverse `_select(f)` AND a post-reverse `_select(g)` now splice (R1-R4 + Rb patterns) where master's imperative code had a `!seenSelect` guard that bailed to cascade. The two selects compose cleanly — pre-projection feeds `pushExpr`, post-projection projects the reversed survivors at return. Strictly faster, semantics preserved. Covered by `test_reverse_pre_and_post_select_array` / `_first` in `test_linq_fold_terminal_select.das`. +- **2026-05-26 (PR B1)** — Split PR B into B1 (KR-1 + `c_chain` + `plan_loop_or_count`) and B2 (`plan_order_family`). The c_chain cardinality is a kernel extension that's load-bearing for plan_loop_or_count's variable-shape head (`[where_*][select*]` interleaved); without it the row would explode into N positional optional slots and still not cover everything. Bundling kernel + first user of kernel in one PR keeps the kernel grounded. +- **2026-05-26 (PR B1)** — `Captures` migrated from `typedef Captures = table` to `struct Captures { single : table; many : table> }`. Alternatives considered: (a) overload one table with sentinel encoding — ugly and type-unsafe; (b) store all captures as `array` and index `[0]` for c_one/c_opt — fixed-shape callsites pay an awkward bracket tax. The split struct is mechanical for emit fns (`c["x"]` → `c.single["x"]`, ~47 sites swept) and leaves room for future cardinality types (`c_repeat_n`, etc.) to land in their own table. +- **2026-05-26 (PR B1)** — `c_chain` walker rule: empty match still creates an entry in `captures.many[name]` (empty array). Emit fns can rely on `c.many |> key_exists("…")` instead of branching on the array's length being > 0. Mirrors how `c_opt` slots that miss still leave `c.single |> key_exists` returning false — predictable existence semantics for emit-fn reads. +- **2026-05-26 (PR B1)** — `slot_chain_of(names, cap)` convenience constructor takes `var names : array` and moves it into the SlotMatcher via `<-`. `array` is non-copyable; pass-by-value-and-copy would require an explicit clone. Move-consume is the more honest signature. +- **2026-05-26 (PR B1)** — `collapse_chained_wheres` does NOT gate on `has_sideeffects` (whereas `collapse_chained_selects` does for one specific case). Reason: AND-composing two `where_` predicates preserves left-to-right short-circuit semantics — `inner(x) && outer(x)` evaluates `inner` first and short-circuits, identical to the imperative `if(inner) { if(outer) { … } }` cascade. Side effects in `inner` always fire (per element); side effects in `outer` fire only when `inner` returns true. Cascade and composition match exactly. +- **2026-05-26 (PR B1)** — `loop_terminator_family` alias must include ALL terminators `classify_terminator` returns non-UNKNOWN for. First B1 cut missed `last`/`single`/`element_at` × `_or_default` (6 EARLY_EXIT terminators); matrix run caught it via `test_linq_fold_ast` "expected 1 for-loop, got 0" failures (terminator wasn't matching the alias → planner cascaded to tier-2 imperative which emits 2 loops). Single-line fix: extend the alias. Lesson: any new alias for a c_opt terminator slot needs an audit against `classify_terminator`'s domain. +- **2026-05-26 (PR B1)** — `emit_array_lane` signature refactored: `var expr : Expression?` → `isIter : bool`. The only thing the original `expr` parameter was used for was reading `expr._type.isIterator`. The new `EmitCtx.expr_is_iterator` already carries that bool, so the refactor flows cleanly. Single callsite update (imperative caller computed `expr._type != null && expr._type.isIterator` inline before the call). + +## Open questions + +- **Prefix-conflict lint pass** — in PR A scope or deferred? Lean PR A so it grows with the table. +- **`plan_zip` / `plan_decs_join` SourceAdapter shape** — defer until PR D scoping. They feel special-case. +- **Reducer-spec data table** — exact shape (miss/hit template per row) — design during PR D. +- **`SourceAdapter` method surface** — `wrap_per_element(body, allows_early_exit)` is the minimal contract. Whether `finalize(stmts, retType)` belongs on the adapter or stays as a separate `finalize_*_emission` family — decide during PR C. + +## See also + +- `doc/source/reference/linq_fold_patterns.rst` — user-facing splice-pattern reference (refreshed per arm-touching PR). +- `benchmarks/sql/linq_fold_chain_audit.md` — closed-out audit that drove Themes 1-8 (PRs #2851 / #2852 / #2857 / #2861 / #2862 / #2865 / #2866 / #2874 / #2875). +- `benchmarks/sql/results.md` — INTERP+JIT matrix refreshed per splice-touching PR. diff --git a/daslib/with_boost.das b/daslib/with_boost.das new file mode 100644 index 0000000000..b8052de8cb --- /dev/null +++ b/daslib/with_boost.das @@ -0,0 +1,221 @@ +options gen2 +options indenting = 4 +options no_unused_block_arguments = false +options no_unused_function_arguments = false + +options strict_smart_pointers + +module with_boost shared private + +//! ``with_`` call macro: bind one or more array / table element references +//! inside a block, with an automatic ``lock`` around the body so +//! push/erase/resize/clear panic at runtime instead of silently dangling. +//! The macro emits the full lock / invoke / unlock sequence inline — no +//! helper functions — so any arity and any mix of array / table args works +//! uniformly. See ``tutorials/macros/18_with_boost.das`` for examples. +//! +//! Restrictions: +//! +//! * Each container arg must be an ``ExprAt`` (``arr[i]`` / ``tab[k]``). Plain +//! locals, struct fields on locals, and function-call results are refused — +//! use built-in ``with`` for those. +//! * At most one table-keyed arg per call (a second insert/erase would +//! rehash and invalidate the pinned entry). +//! * Block bodies are void (no ``return`` value); ``with_`` is for in-place +//! mutation. To compute a value, write to a local: +//! ``var v : T; with_(arr[0]) { v = _.f }``. + +require daslib/ast +require daslib/ast_boost +require daslib/templates_boost +require daslib/macro_boost + +// ────────────────────────────────────────────────────────────────────────────── +// Call macro +// ────────────────────────────────────────────────────────────────────────────── + +def private container_kind(atNode : ExprAt?) : string { + //! Returns "array", "table", or "" if neither. + let subT = atNode.subexpr._type + return "" if (subT == null) + return "array" if (subT.baseType == Type.tArray) + return "table" if (subT.baseType == Type.tTable) + return "" +} + +def private is_lvalue_chain(e : ExpressionPtr) : bool { + //! True if e is reachable from an ExprVar root through ExprField + //! hops only — i.e. binding `var x & = e` is safe (after + //! `unsafe(...)`) because e refers to addressable storage outside + //! this expression AND the chain doesn't contain a nested container + //! that the macro can't lock. ExprAt / ExprSafeAt are deliberately + //! NOT followed: `outer[i].innerArr[j]` would lock only `innerArr` + //! while `outer` stayed mutable, leaving the inner ref vulnerable + //! to outer-array reallocation from inside the body. + return false if (e == null) + return true if (e is ExprVar) + return is_lvalue_chain((e as ExprField).value) if (e is ExprField) + return is_lvalue_chain((e as ExprSafeField).value) if (e is ExprSafeField) + return false +} + +[call_macro(name="with_")] +class private WithMacro : AstCallMacro { + //! Implements the ``with_`` call macro. Each container arg must be an + //! ``ExprAt`` (``arr[i]`` / ``tab[k]``); at most one table-keyed arg + //! per call. Emits a pre-bind / lock / invoke / unlock sequence inline + //! per container, with block-param types pinned to ``T &`` (element + //! ref) so workhorse and struct elements mutate uniformly. + def override visit(prog : ProgramPtr; mod : Module?; var call : ExprCallMacro?) : ExpressionPtr { + macro_verify(length(call.arguments) >= 2, prog, call.at, + "with_ needs at least one container arg plus a block") + let totalArgs = length(call.arguments) + let containerCount = totalArgs - 1 + let blockArg = call.arguments[totalArgs - 1] + macro_verify(blockArg is ExprMakeBlock, prog, call.at, + "with_ last argument must be a block, got {describe(blockArg)}") + // Validate every container arg + count tables in one pass. + var tableCount = 0 + for (i in range(containerCount)) { + let ai = call.arguments[i] + macro_verify(ai is ExprAt, prog, call.at, + "with_ arg #{i} must be `arr[i]` or `tab[k]` — use built-in `with` for locals; got {describe(ai)}") + let aiNode = ai as ExprAt + macro_verify(aiNode.subexpr._type != null, prog, call.at, + "with_ arg #{i} container type not inferred yet — got null _type on {describe(aiNode.subexpr)}") + let ki = container_kind(aiNode) + macro_verify(ki != "", prog, call.at, + "with_ arg #{i} container must be array or table, got {describe(aiNode.subexpr._type)}") + macro_verify(is_lvalue_chain(aiNode.subexpr), prog, call.at, + "with_ arg #{i} container must be a named variable (or field/index chain rooted in one); array literals and function-call results have temp lifetime and can't be safely ref-bound. Got {describe(aiNode.subexpr)}") + if (ki == "table") { + tableCount ++ + } + } + macro_verify(tableCount <= 1, prog, call.at, + "with_ allows at most one table-keyed arg per call (got {tableCount}); a 2nd table lookup could rehash and invalidate the first pinned entry") + // Build the rewritten block — every block param emerges as + // parser-shaped (autoinfer, ref=false, removeConstant=true) so + // daslang infers each name as `TT&` from the actual invoke arg + // type (TT struct or workhorse). + let mblk = blockArg as ExprMakeBlock + let userBlock = mblk._block as ExprBlock + let userArgCount = length(userBlock.arguments) + var rewrittenBlock : ExprMakeBlock? + var rewrittenInner : ExprBlock? + // Per-param type: clone the container's element type (already a + // ref after typing of `arr[i]` / `tab[k]`), strip const, force ref. + // Inline ExprInvoke doesn't drive inference from arg → block-param, + // so the macro pins each param to the concrete element-ref type + // directly. Workhorse types pick up the explicit `&` here too. + if (userArgCount == 0) { + macro_verify(containerCount == 1, prog, call.at, + "with_ multi-arg form requires explicit block params: $(a, b, ...)") + var pT = clone_type(call.arguments[0]._type) + pT.flags.ref = true + pT.flags.constant = false + pT.flags.removeConstant = true + var injected = new Variable(at = call.at, name := "_", _type = pT) + var newBlock = new ExprBlock(at = userBlock.at, + returnType = new TypeDecl(at = userBlock.at, baseType = Type.tVoid), + blockFlags = userBlock.blockFlags | ExprBlockFlags.isClosure) + newBlock.arguments |> emplace_new(injected) + for (st in userBlock.list) { + newBlock.list |> emplace_new(clone_expression(st)) + } + rewrittenBlock = new ExprMakeBlock(at = call.at, _block = newBlock) + rewrittenInner = newBlock + } else { + macro_verify(userArgCount == containerCount, prog, call.at, + "with_ block-param count must match container-arg count: {containerCount} containers, {userArgCount} block params") + rewrittenBlock = clone_expression(mblk) as ExprMakeBlock + rewrittenInner = rewrittenBlock._block as ExprBlock + for (i in range(containerCount)) { + var arg = rewrittenInner.arguments[i] + arg._type = clone_type(call.arguments[i]._type) + arg._type.flags.ref = true + arg._type.flags.constant = false + arg._type.flags.removeConstant = true + } + } + // ── Inline-emission ──────────────────────────────────────────── + // For each container arg, pre-bind the subexpr to a local ref so + // every downstream lock / invoke / unlock references the SAME + // materialised value. Without this, splicing $e(subexpr) three + // times (lock + invoke + unlock) would re-evaluate the subexpr + // three times — fine for an ExprVar, fatal for a literal or call + // (three different temp arrays → unlock hits stale memory). + // + // Per-container emission: + // var __c_ & = unsafe() // always + // __builtin__lock_mutable(__c_) // owned/temp alike + // for tables: var __tref_ & = unsafe(__c_[]) + // : __c_[] or __tref_ + // __builtin__unlock_mutable(__c_) // reversed at end + var preStmts : array + var lockStmts : array + var unlockStmts : array + var argRefs : array + preStmts |> reserve(containerCount * 2) + lockStmts |> reserve(containerCount) + unlockStmts |> reserve(containerCount) + argRefs |> reserve(containerCount) + for (i in range(containerCount)) { + let ai = call.arguments[i] as ExprAt + let ki = container_kind(ai) + let cName = "__with_c_{i}" + preStmts |> push <| qmacro_expr() { + var $i(cName) & = unsafe($e(ai.subexpr)) + } + if (ki == "array") { + lockStmts |> push <| qmacro_expr() { + __builtin_array_lock_mutable($i(cName)) + } + unlockStmts |> push <| qmacro_expr() { + __builtin_array_unlock_mutable($i(cName)) + } + argRefs |> push <| qmacro($i(cName)[$e(ai.index)]) + } else { // table + let trefName = "__with_tref_{i}" + preStmts |> push <| qmacro_expr() { + var $i(trefName) & = unsafe($i(cName)[$e(ai.index)]) + } + lockStmts |> push <| qmacro_expr() { + __builtin_table_lock_mutable($i(cName)) + } + unlockStmts |> push <| qmacro_expr() { + __builtin_table_unlock_mutable($i(cName)) + } + argRefs |> push <| qmacro($i(trefName)) + } + } + // Reverse unlock order so cleanup mirrors acquisition. + let unlockN = length(unlockStmts) + var unlocksReversed <- [for (i in range(unlockN)); unlockStmts[unlockN - 1 - i]] + // Build the invoke statement via ExprInvoke (the dedicated AST node + // for lambda/block calls — ExprCall("invoke") doesn't unify the + // block's auto-typed params against the actual refs). + var invokeCall = new ExprInvoke(at = call.at, name := "invoke") + invokeCall.arguments |> emplace_new(rewrittenBlock) + for (r in argRefs) { + invokeCall.arguments |> emplace_new(r) + } + // Assemble the block: pre-resolves, locks, invoke, then unlocks + // (reversed). No ``finally`` — daslang panic is fatal, not a C++ + // exception; if the invoke panics the process is exiting anyway + // and skipped unlocks don't matter. + var blk = new ExprBlock(at = call.at, + returnType = new TypeDecl(at = call.at, baseType = Type.tVoid)) + for (s in preStmts) { + blk.list |> emplace_new(s) + } + for (s in lockStmts) { + blk.list |> emplace_new(s) + } + blk.list |> emplace_new(invokeCall) + for (s in unlocksReversed) { + blk.list |> emplace_new(s) + } + return <- blk + } +} diff --git a/doc/reflections/das2rst.das b/doc/reflections/das2rst.das index 5725f75d6c..2765dcda1a 100644 --- a/doc/reflections/das2rst.das +++ b/doc/reflections/das2rst.das @@ -26,6 +26,7 @@ require daslib/jobque_boost require daslib/apply_in_context require daslib/contracts require daslib/defer +require daslib/with_boost require daslib/delegate require daslib/option require daslib/result @@ -587,6 +588,14 @@ def document_module_defer(root : string) { document("defer and defer_delete macros", mod, "defer.rst", groups) } +def document_module_with_boost(root : string) { + var mod = find_module("with_boost") + var groups <- array( + group_by_regex("Locked element binding helpers", mod, %regex~(_with_locked_.*)$%%) + ) + document("``with_`` macro: locked array/table element binding", mod, "with_boost.rst", groups) +} + def document_module_if_not_null(root : string) { var mod = find_module("if_not_null") var groups <- array( @@ -1644,6 +1653,7 @@ def main { document_module_decs_boost(root) document_module_decs_state(root) document_module_defer(root) + document_module_with_boost(root) document_module_delegate(root) document_module_dynamic_cast_rtti(root) document_module_enum_trait(root) diff --git a/doc/source/reference/tutorials.rst b/doc/source/reference/tutorials.rst index 0ee3609d36..554c2f9890 100644 --- a/doc/source/reference/tutorials.rst +++ b/doc/source/reference/tutorials.rst @@ -204,6 +204,7 @@ Run any tutorial from the project root:: tutorials/macros/15_type_macro.rst tutorials/macros/16_template_type_macro.rst tutorials/macros/17_qmacro.rst + tutorials/macros/18_with_boost.rst .. _tutorials_dashv: diff --git a/doc/source/reference/tutorials/macros/18_with_boost.rst b/doc/source/reference/tutorials/macros/18_with_boost.rst new file mode 100644 index 0000000000..08c903bbe4 --- /dev/null +++ b/doc/source/reference/tutorials/macros/18_with_boost.rst @@ -0,0 +1,193 @@ +.. _tutorial_macro_with_boost: + +.. index:: + single: Tutorial; Macros; with_ + single: Tutorial; Macros; with_boost + single: Tutorial; Macros; array lock + single: Tutorial; Macros; table lock + +================================================================== +Macro Tutorial 18: ``with_`` — locked binding of container slots +================================================================== + +``daslib/with_boost`` adds a ``with_`` call-macro that solves a recurring +ergonomics problem: rebinding a struct field across an array or table +element. The naive form is rejected by daslang's typer: + +.. code-block:: das + + var arr = [A(f1 = 1, f2 = 2)] + var a : A& = arr[0] // error[31300]: local reference to non-local expression is unsafe + a.f1 = 99 + +Between binding ``a`` and writing through it, code could push/resize/erase +``arr``, leaving ``a`` dangling. ``with_`` solves this by: + +1. Binding the element inside a block, named ``_`` by default; +2. Wrapping the block in an **automatic lock** on the container, so + push/erase/resize/clear inside the body panic at runtime instead of + silently corrupting memory. + +The single-arg form is a 1:1 replacement for the rejected pattern above: + +.. code-block:: das + + require daslib/with_boost + + var arr = [A(f1 = 1, f2 = 2)] + with_(arr[0]) { + _.f1 = 99 // mutation persists in arr[0] + } + + +Section 1 — The single-arg form +================================ + +Default-name ``_`` binding works for both struct-element and +workhorse-element arrays (workhorse coverage in Section 3): + +.. code-block:: das + + var arr = [A(f1 = 1, f2 = 2), A(f1 = 3, f2 = 4)] + with_(arr[0]) { + _.f1 = 99 + _.f2 = 100 + } + // arr[0] is now A(f1 = 99, f2 = 100) + + +Named binding via ``$(name)`` is identical in effect — the macro strips +constness so mutations always persist: + +.. code-block:: das + + with_(arr[1]) $(elem) { + elem.f1 = 555 + } + + +Section 2 — Multi-arg positional form +====================================== + +Passing multiple containers locks each independently. The block params +are positional (no ``=``-named args; the macro reads them in order): + +.. code-block:: das + + var dst = [A(f1 = 0, f2 = 0)] + var src = [A(f1 = 10, f2 = 20)] + with_(dst[0], src[0]) $(d, s) { + d.f1 = s.f1 + 1 + d.f2 = s.f2 + 2 + } + +Any arity works — the macro emits the full lock / invoke / unlock sequence inline, with one lock per container, so a call like ``with_(a[0], b[1], c[2], d[3], e[4]) $(va, vb, vc, vd, ve) { ... }`` scales naturally. Mix arrays and tables freely, subject to the single-table-arg rule (next section). + + +Section 3 — Workhorse element types (int, float, ...) +====================================================== + +The block-arg is bound by reference, so workhorse-element containers +work the same as struct-element ones — mutation through ``_ = X`` (or +the named ``x = X``) propagates back to the underlying slot: + +.. code-block:: das + + var ints = [1, 2, 3] + with_(ints[1]) { + _ = 222 + } + // ints == [1, 222, 3] + +The macro emits each block parameter pinned to the container's element +type with the ref flag set, so daslang resolves the binding as ``int&`` +(or whichever workhorse type the element happens to be). No special-case +in the macro for struct vs workhorse — the same pinning path covers both. + + +Section 4 — Tables +=================== + +Tables work the same way; ``tab[key]`` upserts (creates a default entry +if the key is missing). Only **one** table-keyed arg per call — any +second insert into a table during the body would rehash and invalidate +the pinned entry, so the macro refuses anything past the first: + +.. code-block:: das + + var tab : table + tab |> insert("k", A(f1 = 11, f2 = 22)) + with_(tab["k"]) $(v) { + v.f1 = 777 + } + + +Section 5 — Lock is real +========================= + +Mutation of the container inside the body panics at runtime — exactly +the failure mode the typer was trying to prevent at compile time: + +.. code-block:: das + + var arr = [A(f1 = 1, f2 = 2)] + with_(arr[0]) $(a) { + arr |> push(A(f1 = 1000, f2 = 2000)) // panics: "can't push into locked array" + } + +daslang panic is fatal (not a C++/JS-style exception) — the program +prints the diagnostic and exits. ``try/recover`` exists to capture the +message before exit for nicer logging, NOT to recover-and-continue. + + +Section 6 — Refused container shapes +===================================== + +``with_`` is intentionally narrow: + +* **Non-``ExprAt`` containers** (plain locals, struct fields on locals, + function-call results, array literals) are refused. The macro needs + to ref-bind the container to a local, and only ExprVar-rooted + lvalue chains (variables, ``obj.field``, ``arr[i]``) have stable + addressable storage outside the expression. Use built-in ``with`` for + locals; for literal-or-call containers, hoist to a ``var`` first. + +* **More than one table-keyed arg** is refused per the rehash hazard + noted above. + +* **Bodies that ``return`` a value** are refused at typecheck time — + the synthesized invoke target declares a ``: void`` block return. + ``with_`` is for in-place mutation; compute values via a local: + ``var v : T; with_(arr[0]) { v = _.f }``. + +All refusals fire at macro-expansion time with the macro-error code +``50503`` and a message describing the failing arg. + + +Running the tutorial +===================== + +:: + + daslang.exe tutorials/macros/18_with_boost.das + +Expected output:: + + section 2: arr[0] = 99, 100 + section 3: arr[1].f1 = 555 + section 4: dst[0] = 11, 22 + section 5: ints = [ 1, 222, 3] + section 6: tab[k].f1 = 777 + section 7: see comment for the lock-panic shape + + +.. seealso:: + + Full source: + :download:`18_with_boost.das <../../../../../tutorials/macros/18_with_boost.das>` + + Previous tutorial: :ref:`tutorial_macro_qmacro` + + Standard library: ``daslib/with_boost.das`` + + Language reference: :ref:`Macros ` — full macro system documentation diff --git a/doc/source/stdlib/handmade/module-with_boost.rst b/doc/source/stdlib/handmade/module-with_boost.rst new file mode 100644 index 0000000000..fea23268aa --- /dev/null +++ b/doc/source/stdlib/handmade/module-with_boost.rst @@ -0,0 +1,43 @@ +The WITH_BOOST module provides the ``with_`` call macro: bind one or more +array / table element references inside a block, with an automatic +container lock around the body so push / erase / resize / clear inside +the body panic at runtime instead of silently dangling. The macro +emits the lock / invoke / unlock sequence fully inline, so any arity +and any mix of array / table args work uniformly with a single +``require``. + +All functions and symbols are in "with_boost" module, use require to get access to it. + +.. code-block:: das + + require daslib/with_boost + +Example: + +.. code-block:: das + + require daslib/with_boost + + struct A { + f1 : int + f2 : int + } + + [export] + def main { + var arr = [A(f1=1, f2=2), A(f1=3, f2=4)] + + // single-arg, default `_` binding + with_(arr[0]) { + _.f1 = 99 + } + + // multi-arg positional, struct + workhorse + var ints = [10, 20, 30] + with_(arr[1], ints[0]) $(s, n) { + s.f1 = n + 100 + } + + print("arr[0]={arr[0].f1}, arr[1]={arr[1].f1}\n") + // output: arr[0]=99, arr[1]=110 + } diff --git a/doc/source/stdlib/sec_annotations.rst b/doc/source/stdlib/sec_annotations.rst index fa533d6f70..ae346316fe 100644 --- a/doc/source/stdlib/sec_annotations.rst +++ b/doc/source/stdlib/sec_annotations.rst @@ -12,6 +12,7 @@ and other compile-time utilities. generated/contracts.rst generated/apply.rst generated/defer.rst + generated/with_boost.rst generated/if_not_null.rst generated/is_local.rst generated/safe_addr.rst diff --git a/examples/audio/hrtf/main.das b/examples/audio/hrtf/main.das index 3c34ecd6ce..46506a6bc3 100644 --- a/examples/audio/hrtf/main.das +++ b/examples/audio/hrtf/main.das @@ -12,7 +12,9 @@ require daslib/defer require daslib/fio require daslib/safe_addr require daslib/math_boost +require daslib/jobque_boost require math +require strings // -- Shaders -- @@ -97,6 +99,11 @@ let SOURCE_COLORS = fixed_array( float3(1.0, 0.3, 1.0) // magenta ) +// HRTF/simulated routing budget — cycle on B key. +let HRTF_BUDGETS = fixed_array(32, 0, 999) +let HRTF_BUDGET_LABELS = fixed_array("mixed top-32", "all simulated", "all HRTF") +var hrtf_budget_idx = 0 + var sound_data : array var sound_channels = 1 var sound_rate = MA_SAMPLE_RATE @@ -167,9 +174,7 @@ def gl_to_audio(p : float3) : float3 { def add_sound_source(pos : float3; volume : float = 1.0) { let idx = length(sources) % 5 - var src : SoundSource - src.position = pos - src.color = SOURCE_COLORS[idx] + var src = SoundSource(position = pos, color = SOURCE_COLORS[idx]) var samples <- clone(sound_data) src.sid = play_3d_sound_loop_from_pcm(gl_to_audio(pos), linear_attenuation(10.0), sound_rate, sound_channels, samples) src.sid |> set_volume(volume) @@ -178,8 +183,21 @@ def add_sound_source(pos : float3; volume : float = 1.0) { // -- Main -- +// Parse --max-frames N from argv. Useful for headless repro and shutdown-leak debugging. +def parse_max_frames { + var maxFrames = 0 + let args <- get_command_line_arguments() + for (i in range(length(args))) { + if (args[i] == "--max-frames" && i + 1 < length(args)) { + maxFrames = to_int(args[i + 1]) + } + } + return maxFrames +} + [export] def main { + let maxFrames = parse_max_frames() // GLFW init if (glfwInit() == 0) { panic("can't init glfw") @@ -239,27 +257,38 @@ def main { // Load sound load_sound_data() + // Stats LockBox for per-second utilization + HRTF/simulated split readout. + // Lifecycle wraps the audio system: the audio thread releases its share-ref during + // audio_system_finalize (inside with_audio_system below), THEN this outer defer + // calls lock_box_remove to do the final delete. `release` alone never deletes. + var stats_box <- lock_box_create() + defer() { + unsafe(lock_box_remove(stats_box)) + } + // Audio system with_audio_system() { // Start with one source in front add_sound_source(float3(0.0, 0.0, -3.0)) + set_audio_stats_box(stats_box) + print("HRTF 3D Audio Demo\n") print(" Click to capture mouse, click again to release\n") print(" WASD to move, mouse to look\n") - print(" N to add a sound source, ESC to quit\n") + print(" N to add a sound source, M to add 30, ESC to quit\n") + print(" B to cycle HRTF budget (mixed top-32 / all simulated / all HRTF)\n") var last_time = glfwGetTime() var n_was_pressed = false var m_was_pressed = false + var b_was_pressed = false + var since_stats_print = 0.0 + var frameCount = 0 eval_main_loop() { - if (glfwWindowShouldClose(window) != 0) { - return false - } - if (glfwGetKey(window, int(GLFW_KEY_ESCAPE)) == int(GLFW_PRESS)) { - return false - } + if (glfwWindowShouldClose(window) != 0 || glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS || (maxFrames > 0 && frameCount >= maxFrames)) return false + frameCount ++ glfwPollEvents() // Delta time @@ -271,21 +300,21 @@ def main { let speed = 5.0 * dt let fwd = camera_forward() let rgt = camera_right() - if (glfwGetKey(window, int(GLFW_KEY_W)) == int(GLFW_PRESS)) { + if (glfwGetKey(window, GLFW_KEY_W) == GLFW_PRESS) { camera_pos += fwd * speed } - if (glfwGetKey(window, int(GLFW_KEY_S)) == int(GLFW_PRESS)) { + if (glfwGetKey(window, GLFW_KEY_S) == GLFW_PRESS) { camera_pos -= fwd * speed } - if (glfwGetKey(window, int(GLFW_KEY_A)) == int(GLFW_PRESS)) { + if (glfwGetKey(window, GLFW_KEY_A) == GLFW_PRESS) { camera_pos -= rgt * speed } - if (glfwGetKey(window, int(GLFW_KEY_D)) == int(GLFW_PRESS)) { + if (glfwGetKey(window, GLFW_KEY_D) == GLFW_PRESS) { camera_pos += rgt * speed } // Add source on N press - let n_pressed = glfwGetKey(window, int(GLFW_KEY_N)) == int(GLFW_PRESS) + let n_pressed = glfwGetKey(window, GLFW_KEY_N) == GLFW_PRESS if (n_pressed && !n_was_pressed) { let spawn_pos = camera_pos + fwd * 3.0 add_sound_source(spawn_pos) @@ -294,7 +323,7 @@ def main { n_was_pressed = n_pressed // Add 30 sources around camera on M press - let m_pressed = glfwGetKey(window, int(GLFW_KEY_M)) == int(GLFW_PRESS) + let m_pressed = glfwGetKey(window, GLFW_KEY_M) == GLFW_PRESS if (m_pressed && !m_was_pressed) { for (i in range(30)) { let angle = 2.0 * PI * float(i) / 30.0 @@ -306,6 +335,25 @@ def main { } m_was_pressed = m_pressed + // Cycle HRTF budget on B press + let b_pressed = glfwGetKey(window, GLFW_KEY_B) == GLFW_PRESS + if (b_pressed && !b_was_pressed) { + hrtf_budget_idx = (hrtf_budget_idx + 1) % 3 + let newBudget = HRTF_BUDGETS[hrtf_budget_idx] + set_hrtf_budget(newBudget) + print("HRTF budget: {newBudget} ({HRTF_BUDGET_LABELS[hrtf_budget_idx]})\n") + } + b_was_pressed = b_pressed + + // Per-second audio-system stats line. `get` is read-only — keeps the box alive across reads. + // `grab` would consume the notification and break the periodic publish/poll loop. + since_stats_print += dt + if (since_stats_print >= 1.0) { + stats_box |> get() $(s : AudioSystemStats#) { + print("audio: util={s.utilization_pct}%, hrtf={s.hrtf_count}/{s.total_3d} (budget={HRTF_BUDGETS[hrtf_budget_idx]})\n") + } + since_stats_print = 0.0 + } // Update audio listener set_head_position(gl_to_audio(camera_pos), gl_to_audio(fwd)) diff --git a/modules/dasAudio/audio/audio_boost.das b/modules/dasAudio/audio/audio_boost.das index 55197002ae..4974171f0e 100644 --- a/modules/dasAudio/audio/audio_boost.das +++ b/modules/dasAudio/audio/audio_boost.das @@ -1,5 +1,7 @@ options gen2 options indenting = 4 +options persistent_heap +options gc options no_global_variables = false options no_unused_block_arguments = false options no_unused_function_arguments = false @@ -76,6 +78,16 @@ struct public AudioChannelStatus { //! Number of pending PCM chunks in the stream queue. } +//! Snapshot of the audio system's recent CPU utilization and HRTF/simulated routing split, published to a caller-provided LockBox. +struct public AudioSystemStats { + utilization_pct : float + //! Mixer CPU utilization over the last ~1 second, in percent (0..100+). + hrtf_count : int + //! Number of channels currently routed through HRTF. + total_3d : int + //! Total active is3D channels (denominator of the HRTF/simulated split). +} + //! Distance attenuation coefficients for 3D audio. //! Use the helper functions (inverse_distance_attenuation, linear_attenuation, etc.) to create instances. [safe_when_uninitialized] @@ -147,12 +159,15 @@ class AudioChannel { paused : bool = false pause_fade : float = 1.0 // current fade level (0=silent, 1=full) pause_fade_target : float = 1.0 // 0=fading to pause, 1=fading to play + pause_fade_step : float = 1.0 / (0.002 * float(MA_SAMPLE_RATE)) // ~2ms ramp at the configured sample rate + ignoreGlobalVolume : bool = false // if true, g_volume does not multiply this channel stop : bool = false pitch : float = 1. volume : float = 1. source : AudioSource? resampler : ma_resampler - channel_converter : ma_channel_converter + channel_converter : ma_channel_converter // HRTF mode for is3D: 2 -> MA_CHANNELS. Non-3D: source.channels -> MA_CHANNELS + channel_converter_sim : ma_channel_converter // simulated-3D mode: source.channels -> MA_CHANNELS (built in set3D, used when !is_hrtf) volume_mixer : ma_volume_mixer playback_position : uint64 = 0ul position3d : float3 @@ -160,6 +175,8 @@ class AudioChannel { doppler : float = 1. attenuation : Attenuation = default_attenuation() is3D : bool = false + is_hrtf : bool = true // runtime per-channel mode; rewritten each frame by update_hrtf's budget + setup3D : bool = true @do_not_delete status : LockBox? = null @do_not_delete reverb : I3DL2Reverb? @do_not_delete chorus : ma_chorus? @@ -191,9 +208,20 @@ class AudioChannel { } def set3D { is3D = true + // simulated-3D converter: source.channels -> MA_CHANNELS (mono->stereo upmix for mono sources) + var sim_config <- ma_channel_converter_config_init( + ma_format.ma_format_f32, + uint(source.channels), + null, + uint(MA_CHANNELS), + null, + ma_channel_mix_mode.ma_channel_mix_mode_default + ) + ma_channel_converter_init(unsafe(addr(sim_config)), unsafe(addr(channel_converter_sim))) if (MA_HRTF) { ma_hrtf_init(unsafe(addr(hrtf)), uint(MA_SAMPLE_RATE)) ma_channel_converter_uninit(unsafe(addr(channel_converter))) + // HRTF always emits stereo, so the converter's input is 2ch regardless of source.channels var channel_converter_config <- ma_channel_converter_config_init( ma_format.ma_format_f32, 2u, @@ -211,6 +239,9 @@ class AudioChannel { } ma_volume_mixer_uninit(unsafe(addr(volume_mixer))) ma_channel_converter_uninit(unsafe(addr(channel_converter))) + if (is3D) { + ma_channel_converter_uninit(unsafe(addr(channel_converter_sim))) + } ma_resampler_uninit(unsafe(addr(resampler))) if (status != null) { status |> notify_and_release @@ -271,9 +302,9 @@ class AudioChannel { delete samples samples <- temp } - // hrtf + // hrtf — only when this channel is routed through HRTF (top-N by distance per update_hrtf budget) var nSoundChannels = source.channels - if (is3D && MA_HRTF) { + if (is3D && MA_HRTF && is_hrtf) { // void ma_hrtf_process_frames(ma_hrtf * hrtf, float * pOut, const float * pIn, ma_uint32 nChannels, ma_uint32 frameCount) var temp : array temp |> resize(int(outputFrames) * 2) @@ -308,10 +339,15 @@ class AudioChannel { delete samples samples <- temp } - // convert channels + // convert channels — for is3D channels, pick the converter sized to the upstream stage: + // HRTF mode -> samples are 2ch, use `channel_converter` (2 -> MA_CHANNELS) + // simulated 3D -> samples are source.channels, use `channel_converter_sim` (handles the mono->stereo upmix) var channel_data : array channel_data |> resize(data |> length) - ma_channel_converter_process_pcm_frames(unsafe(addr(channel_converter)), + let converter_to_use = (is3D && !is_hrtf + ? unsafe(addr(channel_converter_sim)) + : unsafe(addr(channel_converter))) + ma_channel_converter_process_pcm_frames(converter_to_use, unsafe(addr(channel_data[0])), unsafe(addr(samples[0])), outputFrames) @@ -326,9 +362,9 @@ class AudioChannel { unsafe(addr(data[0])), outputFrames) delete channel_data - // apply pause fade (2ms ramp to avoid clicks) + // apply pause fade (per-channel ramp speed, default ~2ms to avoid clicks) if (pause_fade != pause_fade_target) { - let fade_step = 1.0 / 96.0 // ~2ms at 48kHz + let fade_step = pause_fade_step for (f in range(int(outputFrames))) { let fade = pause_fade for (c in range(channels)) { @@ -582,27 +618,90 @@ var g_head_position : float3 var g_head_direction : float3 = float3(0., 1., 0.) var g_head_velocity : float3 +//! HRTF routing budget — at most this many is3D channels (closest to head) run through HRTF each frame; the rest run simulated-3D (pan + attenuation). +var g_hrtf_budget : int = 32 + +//! Calibrated normalizer applied to simulated-3D channels so their perceived loudness matches HRTF channels at the same position. Measured once in initialize_mixer. +var g_hrtf_frontal_gain : float = 1.0 + +// Scratch array reused each update_hrtf frame to avoid per-frame allocation; tuple of (distance² to head, g_channels index). +var g_hrtf_scratch : array> + +def public hrtf_budget_classify(rank, budget : int; wasHrtf : bool) : bool { + //! Decide whether a 3D channel at the given closest-to-head ``rank`` should run HRTF or simulated 3D, + //! given the current ``budget`` and whether the channel was HRTF on the previous frame. + //! Applies a sticky-rank margin to prevent flapping when two channels swap rank between frames, + //! while clamping to 0 when budget is 0 so "all simulated" actually clears in-flight HRTF channels. + let stickyTop = budget > 0 ? budget + max(2, budget / 10) : 0 + return wasHrtf ? rank < stickyTop : rank < budget +} + def update_hrtf(dt : float; nFrames : uint64) { + // Pass 1: rank 3D channels by distance² and assign is_hrtf with a sticky-margin rule. + // The sticky margin prevents thrashing when two channels swap rank near the budget boundary. + g_hrtf_scratch |> clear() + for (i in range(length(g_channels))) { + var ch = g_channels[i] + if (ch.is3D && !ch.stop) { + let delta = ch.position3d - g_head_position + g_hrtf_scratch |> push((dist2 = dot(delta, delta), idx = i)) + } + } + g_hrtf_scratch |> sort() $(a, b) => a.dist2 < b.dist2 + let total3D = length(g_hrtf_scratch) + let budget = g_hrtf_budget + var hrtfCount = 0 + for (rank in range(total3D)) { + let chIdx = g_hrtf_scratch[rank].idx + var ch = g_channels[chIdx] + let wasHrtf = ch.is_hrtf + let newHrtf = hrtf_budget_classify(rank, budget, wasHrtf) + if (newHrtf != wasHrtf) { + // linear_pan=true for HRTF (input is stereo from HRTF, unity at center); =false for simulated (constant-power panning of mono) + ma_volume_mixer_set_linear_pan(unsafe(addr(ch.volume_mixer)), newHrtf) + ch.is_hrtf = newHrtf + } + if (newHrtf) { + hrtfCount ++ + } + } + g_stats_hrtf_count = hrtfCount + g_stats_3d_count = total3D + + // Pass 2: per-channel position/pan/volume — routing decision already made above. for (ch in g_channels) { if (ch.is3D && !ch.stop) { var rxy = ch.position3d.xy - g_head_position.xy rxy = float2(rxy.x * g_head_direction.x + rxy.y * g_head_direction.y, -rxy.x * g_head_direction.y + rxy.y * g_head_direction.x) let nrxy = normalize(rxy) - static_if (MA_HRTF) { - let asimuth = atan2(nrxy.y, nrxy.x) - let elevation = atan2(ch.position3d.z - g_head_position.z, length(rxy)) - let iasimuth = int(asimuth * 180. / PI) - let ielevation = int(elevation * 180. / PI) - ma_hrtf_set_direction(unsafe(addr(ch.hrtf)), iasimuth, ielevation) + if (ch.is_hrtf) { + static_if (MA_HRTF) { + let asimuth = atan2(nrxy.y, nrxy.x) + let elevation = atan2(ch.position3d.z - g_head_position.z, length(rxy)) + let iasimuth = int(asimuth * 180. / PI) + let ielevation = int(elevation * 180. / PI) + ma_hrtf_set_direction(unsafe(addr(ch.hrtf)), iasimuth, ielevation) + } + // HRTF carries spatial cues; keep the volume_mixer's pan centered + ma_volume_mixer_set_pan(unsafe(addr(ch.volume_mixer)), 0.0) } else { - // panning + // simulated 3D — constant-power pan via the volume_mixer ma_volume_mixer_set_pan(unsafe(addr(ch.volume_mixer)), nrxy.y) } - // volume attenuation — ramp over frame to avoid clicks + // volume attenuation — ramp over frame to avoid clicks. Simulated channels are scaled by the + // calibrated frontal-gain normalizer so they match HRTF channels at the same position. let distance = length(ch.position3d - g_head_position) let attn = compute_attenuation(ch.attenuation, distance) - ma_volume_mixer_set_volume_over_time(unsafe(addr(ch.volume_mixer)), ch.volume * attn, nFrames) + let g = ch.ignoreGlobalVolume ? 1.0 : g_volume + let modeGain = ch.is_hrtf ? 1.0 : g_hrtf_frontal_gain + let target = ch.volume * attn * g * modeGain + if (ch.setup3D) { + ma_volume_mixer_set_volume(unsafe(addr(ch.volume_mixer)), target) + ch.setup3D = false + } else { + ma_volume_mixer_set_volume_over_time(unsafe(addr(ch.volume_mixer)), target, nFrames) + } // doppler let vrel = g_head_velocity - ch.velocity3d let r = normalize(ch.position3d - g_head_position) @@ -686,7 +785,7 @@ variant AudioCommand { add_pcm_stream_3d : AudioCommandAddPCMStream3D append_pcm : tuple> append_box_pcm : tuple //! opaque LockBox? (archive-safe) - pause : tuple + pause : tuple volume : tuple pan : tuple pitch : tuple @@ -699,6 +798,10 @@ variant AudioCommand { chorus_cmd : tuple set_playback_position : tuple global_pause : bool + global_volume : float + ignore_global_volume : tuple + hrtf_budget : int + system_stats_box : uint64 //! opaque LockBox? (archive-safe) } var g_command_stream : Stream? @@ -715,6 +818,8 @@ def add_channel(sid : SID; var channel : AudioChannel?) { channel.sid = sid g_sid_2_channel |> insert(sid, channel) } + let g = channel.ignoreGlobalVolume ? 1.0 : g_volume + ma_volume_mixer_set_volume(unsafe(addr(channel.volume_mixer)), channel.volume * g) } def add_channel_3d(sid : SID; position : float3; attenuation : Attenuation; var channel : AudioChannel?) { @@ -740,6 +845,12 @@ def command_processor { delete g_channels } g_sid_2_channel |> clear() + if (g_stats_box != null) { + // stats box is a read-only long-lived publication target, not a one-shot; + // never `notify_and_release` (the caller's `get()` doesn't consume notifications). + g_stats_box |> release + g_stats_box = null + } g_command_stream |> release } elif (cmd is add_decoder) { assume dcmd = cmd as add_decoder @@ -793,6 +904,8 @@ def command_processor { } elif (cmd is pause) { let pcmd = cmd as pause g_sid_2_channel |> get(pcmd.sid) $(var ch : AudioChannel?&) { + let nFrames = max(1.0, pcmd.time * float(MA_SAMPLE_RATE)) + ch.pause_fade_step = 1.0 / nFrames if (pcmd.paused) { ch.pause_fade_target = 0.0 // fade out to pause } else { @@ -804,11 +917,13 @@ def command_processor { let vcmd = cmd as volume g_sid_2_channel |> get(vcmd.sid) $(var ch : AudioChannel?&) { ch.volume = vcmd.volume + let g = ch.ignoreGlobalVolume ? 1.0 : g_volume + let effective = vcmd.volume * g if (vcmd.time > 0.) { let nFrames = uint64(vcmd.time * float(MA_SAMPLE_RATE)) - ma_volume_mixer_set_volume_over_time(unsafe(addr(ch.volume_mixer)), vcmd.volume, nFrames) + ma_volume_mixer_set_volume_over_time(unsafe(addr(ch.volume_mixer)), effective, nFrames) } else { - ma_volume_mixer_set_volume(unsafe(addr(ch.volume_mixer)), vcmd.volume) + ma_volume_mixer_set_volume(unsafe(addr(ch.volume_mixer)), effective) } } } elif (cmd is pan) { @@ -825,6 +940,35 @@ def command_processor { g_pitch = cmd as global_pitch } elif (cmd is global_pause) { g_pause = cmd as global_pause // todo: envelope? + } elif (cmd is global_volume) { + g_volume = cmd as global_volume + let nFrames = uint64(0.025 * float(MA_SAMPLE_RATE)) + for (it in values(g_sid_2_channel)) { + // 3D channels are handled in update_hrtf which reads g_volume each callback + if (!it.stop && !it.ignoreGlobalVolume && !it.is3D) { + ma_volume_mixer_set_volume_over_time(unsafe(addr(it.volume_mixer)), it.volume * g_volume, nFrames) + } + } + } elif (cmd is ignore_global_volume) { + let icmd = cmd as ignore_global_volume + g_sid_2_channel |> get(icmd.sid) $(var ch : AudioChannel?&) { + ch.ignoreGlobalVolume = icmd.value + let effective = ch.volume * (ch.ignoreGlobalVolume ? 1.0 : g_volume) + ma_volume_mixer_set_volume(unsafe(addr(ch.volume_mixer)), effective) + } + } elif (cmd is hrtf_budget) { + g_hrtf_budget = max(0, cmd as hrtf_budget) + } elif (cmd is system_stats_box) { + let sbox = cmd as system_stats_box + if (g_stats_box != null) { + g_stats_box |> release + g_stats_box = null + } + // Reset the rolling window so a re-registered box starts with a clean accumulator + // instead of inheriting partial data from the previous registration. + g_stats_window_time_ms = 0.0lf + g_stats_window_samples = 0ul + g_stats_box = unsafe(reinterpret(sbox)) } elif (cmd is stop) { let scmd = cmd as stop g_sid_2_channel |> get(scmd.sid) $(var ch : AudioChannel?&) { @@ -900,6 +1044,15 @@ def command_processor { var g_limiter : ma_limiter var g_mix_buffer : array var g_pause : bool = false +var g_volume : float = 1. //! global master volume multiplier (1.0 = full, 0.0 = mute) + +// AudioSystemStats publication state (audio-context-side; main side holds the LockBox handle and read-grabs it). +var g_stats_box : LockBox? = null +var g_stats_window_time_ms = 0.0lf // accumulated mixer wall time over the current window +var g_stats_window_samples = 0ul // accumulated audio frames over the current window (flush gate: realTimeMs >= 1000) +var g_stats_recent_util_pct : float = 0.0 +var g_stats_3d_count : int = 0 +var g_stats_hrtf_count : int = 0 [export] def mixer(var data : array#; channels, rate : int; dt : float) { @@ -935,8 +1088,32 @@ def mixer(var data : array#; channels, rate : int; dt : float) { uint64(output_samples)) g_mix_buffer |> erase(0, output_samples * channels) } - g_mixer_total_time += double(get_time_usec(t0)) / 1000.lf + let dt_ms = double(get_time_usec(t0)) / 1000.lf + g_mixer_total_time += dt_ms g_mixer_total_samples += uint64(length(data) / channels) + publish_audio_stats(dt_ms, uint64(length(data) / channels)) +} + +// Accumulate per-callback time + samples into a 1-second rolling window. On window flush, compute +// utilization (= mixer time / real time) and update the caller's stats LockBox so main thread can read it. +def private publish_audio_stats(dt_ms : double; frames : uint64) { + return if (g_stats_box == null) + g_stats_window_time_ms += dt_ms + g_stats_window_samples += frames + let realTimeMs = double(g_stats_window_samples) * 1000.lf / double(MA_SAMPLE_RATE) + if (realTimeMs < 1000.lf) return + let util = g_stats_window_time_ms / realTimeMs * 100.lf + g_stats_recent_util_pct = float(util) + g_stats_window_time_ms = 0.0lf + g_stats_window_samples = 0ul + let snap = g_stats_recent_util_pct + let hrtfCount = g_stats_hrtf_count + let total3D = g_stats_3d_count + g_stats_box |> update() $(var s : AudioSystemStats#) { + s.utilization_pct = snap + s.hrtf_count = hrtfCount + s.total_3d = total3D + } } [init] @@ -949,7 +1126,68 @@ def initialize_mixer { MA_LIMITER_RELEASE_TIME, float(MA_SAMPLE_RATE), uint(MA_CHANNELS)) + static_if (MA_HRTF) { + calibrate_hrtf_frontal_gain() + } + } +} + +// Measure the HRTF's broadband gain at azimuth=0, elevation=0 with a 1 kHz sine probe and +// compute the loudness-matching normalizer for the simulated-3D path. +// +// Derivation (per-channel amplitude at the volume_mixer output, head-on source): +// * HRTF path: linear pan, identity at pan=0 -> per-channel = HRIR_gain * input +// * Simulated path: constant-power pan, 1/sqrt(2) -> per-channel = 0.707 * input * normalizer +// Equality at center => normalizer = HRIR_gain / 0.707 = HRIR_gain * sqrt(2) +// +// HRIR_gain is the sine-input amplitude ratio (≈ |H(1 kHz)|, the magnitude of the HRIR's transfer +// function at the probe frequency). DC (a constant signal) is the worst probe because impulse- +// response filters attenuate DC strongly; sine at ~1 kHz gives a representative broadband estimate. +def private calibrate_hrtf_frontal_gain { + let probeFrames = 2048 + let measureStart = 512 // skip the HRTF crossfade region; measure the steady-state tail + var probe_hrtf : ma_hrtf + ma_hrtf_init(unsafe(addr(probe_hrtf)), uint(MA_SAMPLE_RATE)) + // Per hrtf.h: first set_direction triggers a 256-sample crossfade against the zero filter; + // the second call leaves needs_crossfade=0 so the steady-state output is a clean measurement. + ma_hrtf_set_direction(unsafe(addr(probe_hrtf)), 0, 0) + ma_hrtf_set_direction(unsafe(addr(probe_hrtf)), 0, 0) + var input : array + input |> resize(probeFrames) + let probeFreq = 1000.0 + let phaseStep = 2.0 * PI * probeFreq / float(MA_SAMPLE_RATE) + for (i in range(probeFrames)) { + input[i] = sin(phaseStep * float(i)) + } + var output : array + output |> resize(probeFrames * 2) + ma_hrtf_process_frames(unsafe(addr(probe_hrtf)), + unsafe(addr(output[0])), + unsafe(addr(input[0])), + 1u, + uint(probeFrames)) + // Per-channel RMS over the steady-state tail; the frontal source is symmetric so average L+R. + var sumSq = 0.0 + let nSamples = probeFrames - measureStart + for (i in range(measureStart, probeFrames)) { + let l = output[i * 2] + let r = output[i * 2 + 1] + sumSq += l * l + r * r + } + let perChannelRms = sqrt(sumSq / float(2 * nSamples)) + let inputRms = sqrt(0.5) // sine of amplitude 1.0 + let hrirGain = perChannelRms / inputRms // ≈ |H(1 kHz)| of the frontal HRIR + let normalizer = hrirGain * sqrt(2.0) // compensates the -3 dB const-power center pan + if (normalizer > 0.25 && normalizer < 4.0) { + g_hrtf_frontal_gain = normalizer + } else { + to_log(LOG_WARNING, "HRTF calibration produced out-of-range normalizer={normalizer} (hrirGain={hrirGain}); using 1.414 fallback\n") + g_hrtf_frontal_gain = sqrt(2.0) } + ma_hrtf_uninit(unsafe(addr(probe_hrtf))) + delete input + delete output + to_log(LOG_INFO, "HRTF frontal-gain calibration: perChannelRms={perChannelRms}, hrirGain={hrirGain}, normalizer={g_hrtf_frontal_gain}\n") } /* ma_limiter_init_linear(unsafe(addr(g_limiter)), @@ -1022,8 +1260,9 @@ def make_decoder(filename : string; rate, channels : int) : ma_decoder? { // then pushed atomically as one Stream batch. var global_batch : array>? +[deprecated(message="use `batch(cb)` instead")] def public begin_batch() { - //! Begin batching audio commands. All commands until end_batch are sent atomically. + //! Deprecated. Use ``batch() { ... }`` instead. if (global_batch != null) { panic("nested batch") } @@ -1031,8 +1270,9 @@ def public begin_batch() { global_batch = tempBatch } +[deprecated(message="use `batch(cb)` instead")] def public end_batch() { - //! End batching and send all batched commands atomically. + //! Deprecated. Use ``batch() { ... }`` instead. if (global_batch == null) { panic("no batch") } @@ -1072,69 +1312,70 @@ def push_cmd(var cmd : AudioCommand) { } } -def public play_sound_from_file(filename : string; rate, channels : int) { - //! plays sound from file +def public play_sound_from_file(filename : string; rate, channels : int; sid : SID = INVALID_SID) { + //! plays sound from file. If sid is INVALID_SID, a new SID is generated. //! note - this function is blocking for the duration of the decoder creation var decoder = make_decoder(filename, rate, channels) if (decoder == null) return INVALID_SID - let sid = generate_sound_sid() - push_cmd(AudioCommand(add_decoder = AudioCommandAddDecoder(sid = sid, decoder = intptr(decoder), rate = rate, channels = channels))) - return sid + let useSid = sid != INVALID_SID ? sid : generate_sound_sid() + push_cmd(AudioCommand(add_decoder = AudioCommandAddDecoder(sid = useSid, decoder = intptr(decoder), rate = rate, channels = channels))) + return useSid } -def public play_3d_sound_from_file(filename : string; position : float3; attenuation : Attenuation; rate, channels : int) { - //! plays 3D sound from file +def public play_3d_sound_from_file(filename : string; position : float3; attenuation : Attenuation; rate, channels : int; sid : SID = INVALID_SID) { + //! plays 3D sound from file. If sid is INVALID_SID, a new SID is generated. //! note - this function is blocking for the duration of the decoder creation var decoder = make_decoder(filename, rate, channels) if (decoder == null) return INVALID_SID - let sid = generate_sound_sid() + let useSid = sid != INVALID_SID ? sid : generate_sound_sid() push_cmd(AudioCommand(add_decoder_3d = - AudioCommandAddDecoder3D(sid = sid, decoder = intptr(decoder), rate = rate, channels = channels, position = position, attenuation = attenuation))) - return sid + AudioCommandAddDecoder3D(sid = useSid, decoder = intptr(decoder), rate = rate, channels = channels, position = position, attenuation = attenuation))) + return useSid } -def public play_sound_from_pcm_stream(rate, channels : int; sid : SID = generate_sound_sid()) { - //! Create a PCM streaming channel. Feed it samples with append_to_pcm. - push_cmd(AudioCommand(add_pcm_stream <- AudioCommandAddPCMStream(sid = sid, rate = rate, channels = channels))) - return sid +def public play_sound_from_pcm_stream(rate, channels : int; sid : SID = INVALID_SID) { + //! Create a PCM streaming channel. Feed it samples with append_to_pcm. If sid is INVALID_SID, a new SID is generated. + let useSid = sid != INVALID_SID ? sid : generate_sound_sid() + push_cmd(AudioCommand(add_pcm_stream <- AudioCommandAddPCMStream(sid = useSid, rate = rate, channels = channels))) + return useSid } -def public play_3d_sound_from_pcm_stream(position : float3; attenuation : Attenuation; rate, channels : int) { - //! Create a 3D PCM streaming channel. Feed it samples with append_to_pcm. - let sid = generate_sound_sid() +def public play_3d_sound_from_pcm_stream(position : float3; attenuation : Attenuation; rate, channels : int; sid : SID = INVALID_SID) { + //! Create a 3D PCM streaming channel. Feed it samples with append_to_pcm. If sid is INVALID_SID, a new SID is generated. + let useSid = sid != INVALID_SID ? sid : generate_sound_sid() push_cmd(AudioCommand(add_pcm_stream_3d <- - AudioCommandAddPCMStream3D(sid = sid, rate = rate, channels = channels, position = position, attenuation = attenuation))) - return sid + AudioCommandAddPCMStream3D(sid = useSid, rate = rate, channels = channels, position = position, attenuation = attenuation))) + return useSid } -def public play_sound_from_pcm(rate, channels : int; var samples : array) { - //! plays sound from PCM data - let sid = generate_sound_sid() - push_cmd(AudioCommand(add_pcm <- AudioCommandAddPCM(sid = sid, rate = rate, channels = channels, samples <- samples, loop = false))) - return sid +def public play_sound_from_pcm(rate, channels : int; var samples : array; sid : SID = INVALID_SID) { + //! plays sound from PCM data. If sid is INVALID_SID, a new SID is generated. + let useSid = sid != INVALID_SID ? sid : generate_sound_sid() + push_cmd(AudioCommand(add_pcm <- AudioCommandAddPCM(sid = useSid, rate = rate, channels = channels, samples <- samples, loop = false))) + return useSid } -def public play_sound_loop_from_pcm(rate, channels : int; var samples : array) { - //! plays looping sound from PCM data - let sid = generate_sound_sid() - push_cmd(AudioCommand(add_pcm <- AudioCommandAddPCM(sid = sid, rate = rate, channels = channels, samples <- samples, loop = true))) - return sid +def public play_sound_loop_from_pcm(rate, channels : int; var samples : array; sid : SID = INVALID_SID) { + //! plays looping sound from PCM data. If sid is INVALID_SID, a new SID is generated. + let useSid = sid != INVALID_SID ? sid : generate_sound_sid() + push_cmd(AudioCommand(add_pcm <- AudioCommandAddPCM(sid = useSid, rate = rate, channels = channels, samples <- samples, loop = true))) + return useSid } -def public play_3d_sound_from_pcm(position : float3; attenuation : Attenuation; rate, channels : int; var samples : array) { - //! plays 3D sound from PCM data - let sid = generate_sound_sid() +def public play_3d_sound_from_pcm(position : float3; attenuation : Attenuation; rate, channels : int; var samples : array; sid : SID = INVALID_SID) { + //! plays 3D sound from PCM data. If sid is INVALID_SID, a new SID is generated. + let useSid = sid != INVALID_SID ? sid : generate_sound_sid() push_cmd(AudioCommand(add_pcm_3d <- - AudioCommandAddPCM3D(sid = sid, rate = rate, channels = channels, samples <- samples, loop = false, position = position, attenuation = attenuation))) - return sid + AudioCommandAddPCM3D(sid = useSid, rate = rate, channels = channels, samples <- samples, loop = false, position = position, attenuation = attenuation))) + return useSid } -def public play_3d_sound_loop_from_pcm(position : float3; attenuation : Attenuation; rate, channels : int; var samples : array) { - //! plays 3D looping sound from PCM data - let sid = generate_sound_sid() +def public play_3d_sound_loop_from_pcm(position : float3; attenuation : Attenuation; rate, channels : int; var samples : array; sid : SID = INVALID_SID) { + //! plays 3D looping sound from PCM data. If sid is INVALID_SID, a new SID is generated. + let useSid = sid != INVALID_SID ? sid : generate_sound_sid() push_cmd(AudioCommand(add_pcm_3d <- - AudioCommandAddPCM3D(sid = sid, rate = rate, channels = channels, samples <- samples, loop = true, position = position, attenuation = attenuation))) - return sid + AudioCommandAddPCM3D(sid = useSid, rate = rate, channels = channels, samples <- samples, loop = true, position = position, attenuation = attenuation))) + return useSid } def public append_to_pcm(sid : SID; var samples : array) { @@ -1151,9 +1392,9 @@ def public append_box_to_pcm(sid : SID; box : LockBox?; var samples : array get() $(var s : AudioSystemStats) { ... }`` (read-only — keeps the box alive + //! across multiple reads). Pass null to clear. Do NOT use ``grab()``: that's a one-shot consume. + if (box == null) { + push_cmd(AudioCommand(system_stats_box = 0ul)) + return + } + box |> add_ref + box |> set() <| new AudioSystemStats(utilization_pct = 0.0, hrtf_count = 0, total_3d = 0) + push_cmd(AudioCommand(system_stats_box = intptr(box))) +} + def public stop(sid : SID; time : float = 0.0f) { //! stop sound push_cmd(AudioCommand(stop = (sid = sid, time = time))) diff --git a/modules/dasAudio/src/dasAudio.cpp b/modules/dasAudio/src/dasAudio.cpp index 05378f2f8a..2a4e287db7 100644 --- a/modules/dasAudio/src/dasAudio.cpp +++ b/modules/dasAudio/src/dasAudio.cpp @@ -299,6 +299,7 @@ bool dasAudio_init ( TFunc>,int32_t,int32_t,float> return false; } g_mixer_context.reset(get_clone_context(&context,uint32_t(ContextCategory::audio_context))); + g_mixer_context->verySafeContext = false; g_mixer_function = mixer; g_mixer_env = daScriptEnvironment::getBound(); if ( ma_device_start(&g_device) != MA_SUCCESS ) { diff --git a/modules/dasAudio/strudel/strudel_midi_player.das b/modules/dasAudio/strudel/strudel_midi_player.das index 3b14fe5983..3cee68862b 100644 --- a/modules/dasAudio/strudel/strudel_midi_player.das +++ b/modules/dasAudio/strudel/strudel_midi_player.das @@ -1,5 +1,7 @@ options gen2 options indenting = 4 +options persistent_heap +options gc options no_unused_block_arguments = false options no_unused_function_arguments = false @@ -553,7 +555,7 @@ def midi_tick(var state : MidiPlaybackState; chunk_seconds : float) : array= 0) { var voice = state.voices[i] @@ -828,9 +830,9 @@ def private midi_thread_main(sid : uint64; var cmd_stream : Stream?; var done_st let TARGET_CHUNKS = 4 let LOW_WATERMARK = 2 var tracks : array - var status_box <- unsafe(lock_box_create()) + var status_box <- lock_box_create() set_status_update(sid, status_box) - var pcm_box <- unsafe(lock_box_create()) + var pcm_box <- lock_box_create() // initialize reverb (convolution: 2s decay, 15kHz→1kHz lowpass sweep) g_reverb = new ConvolutionReverb conv_reverb_init(g_reverb, SAMPLE_RATE, 2.0, 15000.0, 1000.0, 0.01) @@ -862,7 +864,7 @@ def private midi_thread_main(sid : uint64; var cmd_stream : Stream?; var done_st looping = tc.looping )) } elif (cmd is remove_track) { - let name = string(cmd as remove_track) + let name = cmd as remove_track var k = length(tracks) - 1 while (k >= 0) { if (tracks[k].name == name) { @@ -1024,7 +1026,7 @@ def public midi_init() { g_midi_sid = play_sound_from_pcm_stream(MA_SAMPLE_RATE, MA_CHANNELS) } g_midi_cmd_stream = unsafe(stream_create()) - g_midi_done_status = unsafe(job_status_create()) + g_midi_done_status = job_status_create() g_midi_done_status |> append(1) // expect one notification on worker exit // @capture auto-bumps refcount for Stream and JobStatus — no manual add_ref needed let sid = g_midi_sid diff --git a/modules/dasAudio/strudel/strudel_player.das b/modules/dasAudio/strudel/strudel_player.das index 37431d8a10..4e3aa023ed 100644 --- a/modules/dasAudio/strudel/strudel_player.das +++ b/modules/dasAudio/strudel/strudel_player.das @@ -1,5 +1,7 @@ options gen2 options indenting = 4 +options persistent_heap +options gc options no_unused_block_arguments = false options no_unused_function_arguments = false @@ -355,7 +357,7 @@ def private strudel_process_commands(cmd_fn : function<(cmd : string) : void>) : } elif (cmd is set_cps) { g_cps = cmd as set_cps } elif (cmd is user_cmd) { - let ucmd = string(cmd as user_cmd) + let ucmd = cmd as user_cmd invoke(cmd_fn, ucmd) } } @@ -368,10 +370,10 @@ def public strudel_create_channel() { //! Create the PCM stream for main-thread playback. Call once after audio_system_create() and before strudel_tick. if (g_sid == 0ul) { g_sid = play_sound_from_pcm_stream(MA_SAMPLE_RATE, MA_CHANNELS) - g_tick_status_box = unsafe(lock_box_create()) + g_tick_status_box = lock_box_create() g_tick_status_box |> add_ref() set_status_update(g_sid, g_tick_status_box) - g_tick_pcm_box = unsafe(lock_box_create()) + g_tick_pcm_box = lock_box_create() // memory tracking baseline g_mem_heap_baseline = heap_bytes_allocated() g_mem_str_baseline = string_heap_bytes_allocated() @@ -396,7 +398,7 @@ def public strudel_tick() { } let t0 = ref_time_ticks() let CHUNK_SEC = g_chunk_seconds - let chunkSamples = int(float(CHUNK_SEC) * float(SAMPLE_RATE)) * 2 + let chunkSamples = int(CHUNK_SEC * float(SAMPLE_RATE)) * 2 // prepare master output g_master_pcm |> resize(chunkSamples) for (s in g_master_pcm) { @@ -414,7 +416,7 @@ def public strudel_tick() { track.sched.lastQueryEnd = g_wall_time * g_cps track.sched.cps = g_cps } - tick(track.sched, track.pat, g_bank, g_wall_time, float(CHUNK_SEC), g_look_ahead) + tick(track.sched, track.pat, g_bank, g_wall_time, CHUNK_SEC, g_look_ahead) // mix scheduler output into master with track gain if (!empty(track.sched.output) && track.gain > 0.001) { // copy to per-track PCM for visualizer access @@ -435,9 +437,9 @@ def public strudel_tick() { // update fade envelope if (track.fade_speed > 0.0) { if (track.gain < track.target_gain) { - track.gain = min(track.gain + track.fade_speed * float(CHUNK_SEC), track.target_gain) + track.gain = min(track.gain + track.fade_speed * CHUNK_SEC, track.target_gain) } elif (track.gain > track.target_gain) { - track.gain = max(track.gain - track.fade_speed * float(CHUNK_SEC), track.target_gain) + track.gain = max(track.gain - track.fade_speed * CHUNK_SEC, track.target_gain) } if (abs(track.gain - track.target_gain) < 0.001) { track.gain = track.target_gain @@ -455,7 +457,7 @@ def public strudel_tick() { if (!empty(g_master_pcm)) { append_box_to_pcm(g_sid, g_tick_pcm_box, g_master_pcm) } - let chunkFrames = int(float(CHUNK_SEC) * float(SAMPLE_RATE)) + let chunkFrames = int(CHUNK_SEC * float(SAMPLE_RATE)) g_wall_samples += int64(chunkFrames) g_wall_time = double(g_wall_samples) / double(SAMPLE_RATE) ma_volume_mixer_uninit(unsafe(addr(mixer))) @@ -481,7 +483,7 @@ def public strudel_tick_offline() { //! Renders into g_master_pcm and advances g_wall_time; call in a tight loop for offline WAV rendering. let t0 = ref_time_ticks() let CHUNK_SEC = g_chunk_seconds - let chunkSamples = int(float(CHUNK_SEC) * float(SAMPLE_RATE)) * 2 + let chunkSamples = int(CHUNK_SEC * float(SAMPLE_RATE)) * 2 // prepare master output g_master_pcm |> resize(chunkSamples) for (s in g_master_pcm) { @@ -499,7 +501,7 @@ def public strudel_tick_offline() { track.sched.lastQueryEnd = g_wall_time * g_cps track.sched.cps = g_cps } - tick(track.sched, track.pat, g_bank, g_wall_time, float(CHUNK_SEC), g_look_ahead) + tick(track.sched, track.pat, g_bank, g_wall_time, CHUNK_SEC, g_look_ahead) // mix scheduler output into master with track gain if (!empty(track.sched.output) && track.gain > 0.001) { // copy to per-track PCM for visualizer access @@ -520,9 +522,9 @@ def public strudel_tick_offline() { // update fade envelope if (track.fade_speed > 0.0) { if (track.gain < track.target_gain) { - track.gain = min(track.gain + track.fade_speed * float(CHUNK_SEC), track.target_gain) + track.gain = min(track.gain + track.fade_speed * CHUNK_SEC, track.target_gain) } elif (track.gain > track.target_gain) { - track.gain = max(track.gain - track.fade_speed * float(CHUNK_SEC), track.target_gain) + track.gain = max(track.gain - track.fade_speed * CHUNK_SEC, track.target_gain) } if (abs(track.gain - track.target_gain) < 0.001) { track.gain = track.target_gain @@ -536,7 +538,7 @@ def public strudel_tick_offline() { g_tracks[i] = null } } - let chunkFrames = int(float(CHUNK_SEC) * float(SAMPLE_RATE)) + let chunkFrames = int(CHUNK_SEC * float(SAMPLE_RATE)) g_wall_samples += int64(chunkFrames) g_wall_time = double(g_wall_samples) / double(SAMPLE_RATE) ma_volume_mixer_uninit(unsafe(addr(mixer))) @@ -568,12 +570,12 @@ def public strudel_play(cmd_fn : function<(cmd : string) : void> = @@strudel_noo var total_time = 0.lf var total_samples = 0ul var max_chunk_usec = 0.0lf - var status_box <- unsafe(lock_box_create()) + var status_box <- lock_box_create() set_status_update(g_sid, status_box) - var pcm_box <- unsafe(lock_box_create()) + var pcm_box <- lock_box_create() var mixer : ma_volume_mixer ma_volume_mixer_init(unsafe(addr(mixer)), 2u) - let chunkSamples = int(float(CHUNK_SEC) * float(SAMPLE_RATE)) * 2 + let chunkSamples = int(CHUNK_SEC * float(SAMPLE_RATE)) * 2 var output : array output |> resize(chunkSamples) // memory tracking @@ -603,7 +605,7 @@ def public strudel_play(cmd_fn : function<(cmd : string) : void> = @@strudel_noo track.sched.lastQueryEnd = g_wall_time * g_cps track.sched.cps = g_cps } - tick(track.sched, track.pat, *g_bank_ptr, g_wall_time, float(CHUNK_SEC), g_look_ahead) + tick(track.sched, track.pat, *g_bank_ptr, g_wall_time, CHUNK_SEC, g_look_ahead) // mix into output with track gain if (!empty(track.sched.output) && track.gain > 0.001) { ma_volume_mixer_set_volume(unsafe(addr(mixer)), track.gain) @@ -620,9 +622,9 @@ def public strudel_play(cmd_fn : function<(cmd : string) : void> = @@strudel_noo // update fade envelope if (track.fade_speed > 0.0) { if (track.gain < track.target_gain) { - track.gain = min(track.gain + track.fade_speed * float(CHUNK_SEC), track.target_gain) + track.gain = min(track.gain + track.fade_speed * CHUNK_SEC, track.target_gain) } elif (track.gain > track.target_gain) { - track.gain = max(track.gain - track.fade_speed * float(CHUNK_SEC), track.target_gain) + track.gain = max(track.gain - track.fade_speed * CHUNK_SEC, track.target_gain) } if (abs(track.gain - track.target_gain) < 0.001) { track.gain = track.target_gain @@ -721,12 +723,12 @@ def public strudel_init(fn : function<() : void>; cmd_fn : function<(cmd : strin g_cmd_fn = cmd_fn // create playback time lockbox if (g_playback_box == null) { - g_playback_box = unsafe(lock_box_create()) + g_playback_box = lock_box_create() } // create command stream and done status (thread-exit signal, wait group of 1). // @capture auto-bumps refcount for Stream and JobStatus — no manual add_ref needed. g_cmd_stream = unsafe(stream_create()) - g_done_status = unsafe(job_status_create()) + g_done_status = job_status_create() g_done_status |> append(1) // create PCM stream on main thread (audio globals are initialized here) let thread_sid = play_sound_from_pcm_stream(MA_SAMPLE_RATE, MA_CHANNELS) diff --git a/tests/aot/CMakeLists.txt b/tests/aot/CMakeLists.txt index acd517b48a..61a542f1b7 100644 --- a/tests/aot/CMakeLists.txt +++ b/tests/aot/CMakeLists.txt @@ -197,6 +197,7 @@ IF(NOT DAS_AUDIO_DISABLED) tests/strudel/test_signals.das tests/strudel/test_synthesis.das tests/strudel/test_vowel.das + tests/audio/test_hrtf_budget.das ) ENDIF() @@ -290,6 +291,11 @@ FILE(GLOB AOT_MACRO_BOOST_FILES RELATIVE ${PROJECT_SOURCE_DIR} CONFIGURE_DEPENDS # by the actual test file. list(FILTER AOT_MACRO_BOOST_FILES EXCLUDE REGEX "/_") +# AOT for with_boost test files +FILE(GLOB AOT_WITH_BOOST_FILES RELATIVE ${PROJECT_SOURCE_DIR} CONFIGURE_DEPENDS "tests/with_boost/*.das") +# Exclude failed_* expected-failure compile tests — they're not AOT-able. +list(FILTER AOT_WITH_BOOST_FILES EXCLUDE REGEX "/failed_") + # Macro_boost test module files (probe call_macro required transitively by tests) SET(AOT_MACRO_BOOST_MODULE_FILES tests/macro_boost/_has_sideeffects_probe.das @@ -591,6 +597,10 @@ add_custom_target(test_aot_macro_boost) SET(MACRO_BOOST_AOT_GENERATED_SRC) DAS_AOT("${AOT_MACRO_BOOST_FILES}" MACRO_BOOST_AOT_GENERATED_SRC test_aot_macro_boost daslang) +add_custom_target(test_aot_with_boost) +SET(WITH_BOOST_AOT_GENERATED_SRC) +DAS_AOT("${AOT_WITH_BOOST_FILES}" WITH_BOOST_AOT_GENERATED_SRC test_aot_with_boost daslang) + add_custom_target(test_aot_macro_boost_modules) SET(MACRO_BOOST_MODULES_AOT_GENERATED_SRC) DAS_AOT_LIB("${AOT_MACRO_BOOST_MODULE_FILES}" MACRO_BOOST_MODULES_AOT_GENERATED_SRC test_aot_macro_boost_modules daslang) @@ -735,6 +745,7 @@ SOURCE_GROUP_FILES("aot generated" LINQ_MODULES_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" MACRO_CALL_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" MACRO_BOOST_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" MACRO_BOOST_MODULES_AOT_GENERATED_SRC) +SOURCE_GROUP_FILES("aot generated" WITH_BOOST_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" MATCH_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" MATH_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" MATH_MODULES_AOT_GENERATED_SRC) @@ -806,6 +817,7 @@ add_executable(test_aot ${DAS_DASCRIPT_MAIN_SRC} ${MACRO_CALL_AOT_GENERATED_SRC} ${MACRO_BOOST_AOT_GENERATED_SRC} ${MACRO_BOOST_MODULES_AOT_GENERATED_SRC} + ${WITH_BOOST_AOT_GENERATED_SRC} ${MATCH_AOT_GENERATED_SRC} ${MATH_AOT_GENERATED_SRC} ${MATH_MODULES_AOT_GENERATED_SRC} @@ -859,6 +871,7 @@ ADD_DEPENDENCIES(test_aot libDaScriptAot test_aot_linq test_aot_linq_modules test_aot_macro_call test_aot_macro_boost test_aot_macro_boost_modules + test_aot_with_boost test_aot_match test_aot_math test_aot_math_modules test_aot_module_tests test_aot_option diff --git a/tests/audio/test_hrtf_budget.das b/tests/audio/test_hrtf_budget.das new file mode 100644 index 0000000000..ae368c8b45 --- /dev/null +++ b/tests/audio/test_hrtf_budget.das @@ -0,0 +1,72 @@ +options gen2 +options indenting = 4 + +require dastest/testing_boost +require audio/audio_boost + +// Unit tests for the HRTF/simulated-3D budget classifier. The classifier is the pure rank-vs-budget +// decision update_hrtf consults each frame to decide whether a channel runs HRTF (expensive, top-N +// closest) or simulated 3D (constant-power pan + attenuation, cheaper). Sticky margin in rank space +// prevents flapping when channels swap rank between frames, but must NOT preserve HRTF status when +// the budget is 0 (otherwise "all simulated" doesn't clear in-flight HRTF channels). + +[test] +def test_full_hrtf_budget_keeps_everything_hrtf(t : T?) { + t |> run("budget=999: every rank routed to HRTF regardless of prior state") @(t : T?) { + t |> success(hrtf_budget_classify(0, 999, true), "rank 0, wasHrtf -> HRTF") + t |> success(hrtf_budget_classify(0, 999, false), "rank 0, !wasHrtf -> HRTF (flip in)") + t |> success(hrtf_budget_classify(31, 999, true), "rank 31, wasHrtf -> HRTF") + t |> success(hrtf_budget_classify(998, 999, false), "rank below budget flips in") + t |> success(!hrtf_budget_classify(999, 999, false), "rank == budget for !wasHrtf is NOT under-budget") + } +} + +[test] +def test_zero_budget_clears_all_hrtf(t : T?) { + t |> run("budget=0: sticky margin must NOT preserve HRTF — all channels go simulated") @(t : T?) { + // This is the regression case for the sticky-margin bug: stickyTop is clamped to 0 when budget=0, + // so previously-HRTF channels don't get to keep HRTF status. + t |> success(!hrtf_budget_classify(0, 0, true), "rank 0, wasHrtf -> SIM (clears in-flight)") + t |> success(!hrtf_budget_classify(0, 0, false), "rank 0, !wasHrtf -> SIM") + t |> success(!hrtf_budget_classify(5, 0, true), "rank 5, wasHrtf -> SIM") + t |> success(!hrtf_budget_classify(99, 0, false), "rank 99, !wasHrtf -> SIM") + } +} + +[test] +def test_mixed_budget_routes_top_n(t : T?) { + t |> run("budget=32: top-32 ranks HRTF, rest simulated (no sticky effect for fresh entries)") @(t : T?) { + t |> success(hrtf_budget_classify(0, 32, false), "rank 0 below budget -> HRTF (flip in)") + t |> success(hrtf_budget_classify(31, 32, false), "rank 31 below budget -> HRTF") + t |> success(!hrtf_budget_classify(32, 32, false), "rank 32 == budget for !wasHrtf -> SIM") + t |> success(!hrtf_budget_classify(100, 32, false), "rank 100 -> SIM") + } +} + +[test] +def test_sticky_margin_in_rank_space(t : T?) { + t |> run("HRTF channels keep HRTF status across the budget+10% margin to avoid rank-swap flapping") @(t : T?) { + // For budget=32: stickyTop = 32 + max(2, 32/10) = 32 + 3 = 35 + t |> success(hrtf_budget_classify(32, 32, true), "rank 32, wasHrtf -> stays HRTF (sticky)") + t |> success(hrtf_budget_classify(34, 32, true), "rank 34, wasHrtf -> stays HRTF (sticky)") + t |> success(!hrtf_budget_classify(35, 32, true), "rank 35, wasHrtf -> SIM (past stickyTop)") + t |> success(!hrtf_budget_classify(36, 32, true), "rank 36, wasHrtf -> SIM") + // Note: a channel at rank 33 that was simulated must NOT flip to HRTF — the budget is 32. + t |> success(!hrtf_budget_classify(33, 32, false), "rank 33, !wasHrtf -> SIM (budget unchanged)") + } +} + +[test] +def test_sticky_margin_floor_at_small_budgets(t : T?) { + t |> run("small budgets enforce a minimum margin of 2 so single-channel swaps don't immediately flip") @(t : T?) { + // For budget=10: stickyTop = 10 + max(2, 1) = 10 + 2 = 12 + t |> success(hrtf_budget_classify(10, 10, true), "rank 10, wasHrtf -> stays HRTF") + t |> success(hrtf_budget_classify(11, 10, true), "rank 11, wasHrtf -> stays HRTF") + t |> success(!hrtf_budget_classify(12, 10, true), "rank 12 -> SIM (past min-margin top)") + + // For budget=1: stickyTop = 1 + max(2, 0) = 3 + t |> success(hrtf_budget_classify(0, 1, true), "budget=1: rank 0 stays HRTF") + t |> success(hrtf_budget_classify(2, 1, true), "budget=1: rank 2 still sticky") + t |> success(!hrtf_budget_classify(3, 1, true), "budget=1: rank 3 falls off") + } +} diff --git a/tests/linq/test_linq_fold_collapse_chained_wheres.das b/tests/linq/test_linq_fold_collapse_chained_wheres.das new file mode 100644 index 0000000000..9c17c22772 --- /dev/null +++ b/tests/linq/test_linq_fold_collapse_chained_wheres.das @@ -0,0 +1,147 @@ +options gen2 + +require daslib/linq +require daslib/linq_boost +require daslib/linq_fold +require dastest/testing_boost public + +// KR-1 regression coverage — PR B collapse_chained_wheres pre-pass. +// +// Before PR B, chains like `..._where(p1)._where(p2).reverse()...` no longer spliced through +// `plan_reverse` / `plan_distinct` (single optional `where_` slot in the pattern row vs N-fold +// accept-and-merge in master's imperative code). Cascade still worked — correctness was OK, +// but a perf regression on uncommon chains. The decs mirror `plan_decs_reverse` always +// composed via `merge_where_cond` in a loop, so this is a parity restoration. +// +// These tests verify the COMPOSED predicate produces the same result as the manual +// `_where(p1 && p2)` form. Splice-firing itself isn't asserted from runtime (would need AST +// inspection); we rely on the per-archetype test files to exercise the emit paths and these +// tests to assert composition correctness end-to-end. + +// ═════════════════════════════════════════════════════════════════════════════ +// 1. plan_reverse surface +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_chained_wheres_reverse_n2(t : T?) { + t |> run("chained wheres N=2 + reverse + to_array: composed pred matches manual && form") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + let split <- _fold(each(arr)._where(_ > 2)._where(_ < 8).reverse() |> to_array()) + let combined <- _fold(each(arr)._where(_ > 2 && _ < 8).reverse() |> to_array()) + tt |> equal(length(split), length(combined)) + tt |> equal(length(split), 5) // 3,4,5,6,7 reversed + for (i in 0 .. length(split)) { + tt |> equal(split[i], combined[i]) + } + tt |> equal(split[0], 7) + tt |> equal(split[4], 3) + } +} + +[test] +def test_chained_wheres_reverse_n3(t : T?) { + t |> run("chained wheres N=3 + reverse + to_array: all three compose into single && chain") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + let split <- _fold(each(arr)._where(_ > 1)._where(_ < 9)._where(_ % 2 == 0).reverse() |> to_array()) + let combined <- _fold(each(arr)._where(_ > 1 && _ < 9 && _ % 2 == 0).reverse() |> to_array()) + // _ > 1 && _ < 9 && _ % 2 == 0 → [2, 4, 6, 8]; reversed → [8, 6, 4, 2] + tt |> equal(length(split), length(combined)) + tt |> equal(length(split), 4) + for (i in 0 .. length(split)) { + tt |> equal(split[i], combined[i]) + } + tt |> equal(split[0], 8) + tt |> equal(split[3], 2) + } +} + +[test] +def test_chained_wheres_reverse_first(t : T?) { + t |> run("chained wheres + reverse + first: scalar terminator splice (Rb archetype) sees composed pred") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + let v = _fold(each(arr)._where(_ > 3)._where(_ < 9).reverse().first()) + // _ > 3 && _ < 9 → [4,5,6,7,8]; reversed: [8,7,6,5,4]; first = 8 + tt |> equal(v, 8) + } +} + +[test] +def test_chained_wheres_reverse_count(t : T?) { + t |> run("chained wheres + reverse + count: counter terminator (Ra archetype) — reverse is identity for count") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + let cnt = _fold(each(arr)._where(_ > 2)._where(_ < 8).reverse().count()) + tt |> equal(cnt, 5) + } +} + +// ═════════════════════════════════════════════════════════════════════════════ +// 2. plan_distinct surface +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_chained_wheres_distinct_n2(t : T?) { + t |> run("chained wheres N=2 + distinct + to_array: composed pred matches manual && form") @(tt : T?) { + let arr <- [1, 2, 1, 3, 2, 4, 3, 5, 4, 6] + let split <- _fold(each(arr)._where(_ > 1)._where(_ < 6) |> distinct() |> to_array()) + let combined <- _fold(each(arr)._where(_ > 1 && _ < 6) |> distinct() |> to_array()) + tt |> equal(length(split), length(combined)) + tt |> equal(length(split), 4) // 2,3,4,5 first-occurrence + for (i in 0 .. length(split)) { + tt |> equal(split[i], combined[i]) + } + } +} + +[test] +def test_chained_wheres_distinct_n3(t : T?) { + t |> run("chained wheres N=3 + distinct + count: all three compose; count fast-path") @(tt : T?) { + let arr <- [1, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8] + let cnt = _fold(each(arr)._where(_ > 1)._where(_ < 8)._where(_ != 4) |> distinct() |> count()) + // _ > 1 && _ < 8 && _ != 4 → 2,3,5,6,7 distinct → 5 + tt |> equal(cnt, 5) + } +} + +[test] +def test_chained_wheres_distinct_take(t : T?) { + t |> run("chained wheres + distinct + take(N): bounded splice path with composed pred") @(tt : T?) { + let arr <- [1, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8] + let buf <- _fold(each(arr)._where(_ > 1)._where(_ < 8) |> distinct() |> take(3) |> to_array()) + // _ > 1 && _ < 8 distinct first-N=3 → [2,3,4] + tt |> equal(length(buf), 3) + tt |> equal(buf[0], 2) + tt |> equal(buf[1], 3) + tt |> equal(buf[2], 4) + } +} + +// ═════════════════════════════════════════════════════════════════════════════ +// 3. Edge cases: no collapse when call between, single where unaffected +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_single_where_unchanged(t : T?) { + t |> run("single where unchanged: collapse pre-pass is a no-op on chains without adjacent wheres") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5] + let buf <- _fold(each(arr)._where(_ > 2).reverse() |> to_array()) + tt |> equal(length(buf), 3) + tt |> equal(buf[0], 5) + tt |> equal(buf[2], 3) + } +} + +[test] +def test_wheres_split_by_select_no_collapse(t : T?) { + t |> run("wheres with select between: NOT collapsed (collapse only fires on ADJACENT wheres)") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + // After collapse_chained_selects (no chained selects here) + collapse_chained_wheres + // (wheres not adjacent — select sits between): chain stays as [where, select, where, reverse, to_array]. + // plan_reverse's pattern rows expect a single optional `where_` slot before `reverse` — this chain + // has a where AFTER select that the pattern can't match. Cascade fires; correctness preserved. + let buf <- _fold(each(arr)._where(_ > 2)._select(_ * 10)._where(_ < 80).reverse() |> to_array()) + // _ > 2: 3..10 (×10: 30..100); _ < 80: 30..70; reversed: 70,60,50,40,30 + tt |> equal(length(buf), 5) + tt |> equal(buf[0], 70) + tt |> equal(buf[4], 30) + } +} diff --git a/tests/linq/test_linq_fold_iterator_wrap.das b/tests/linq/test_linq_fold_iterator_wrap.das new file mode 100644 index 0000000000..24f0167131 --- /dev/null +++ b/tests/linq/test_linq_fold_iterator_wrap.das @@ -0,0 +1,266 @@ +options gen2 + +require daslib/linq +require daslib/linq_boost +require daslib/linq_fold +require dastest/testing_boost public + +// PR A R3c — Source × terminator matrix for buffer-emitting splice emit fns. +// +// For each chain shape that goes through a buffer-emitting emit fn, the splice must produce a result +// whose static type matches the un-spliced chain. The full matrix is: +// +// │ no terminator (chain ends bare) │ explicit `.to_array()` terminator +// ──────────────────┼──────────────────────────────────┼────────────────────────────────── +// iter source │ iterator │ array +// (`.to_sequence()`)│ │ +// array source │ iterator │ array +// (`each(arr)`) │ │ +// +// The `iter source → iterator` and `array source → iterator` rows are the regression coverage +// for R1/R2/R3 — `buffer_return(name, ctx.expr_is_iterator)`. The two array corners protect against +// future over-correction (always wrapping with `to_sequence_move`). `typeinfo typename(got)` is the +// load-bearing assertion. Array-bound tests use `let got` (immutable, no consuming for-loop) and the +// assertion strings reflect the resulting `array const` binding type. +// +// Shapes covered per matrix: +// 1. `reverse()` only → emit_reverse_buffer_inplace (catch-all R1-R4) +// 2. `distinct()` / `distinct_by` → emit_hashtable_dedup +// 3. `reverse() + distinct()` → emit_reverse_backward_walk_dset_gate (array path, R-2a) +// / emit_hashtable_dedup (iter path) +// 4. `reverse() + take(N)` → emit_reverse_backward_index_walk (array path, R6) +// / emit_reverse_buffer_inplace (iter path) + +// ═════════════════════════════════════════════════════════════════════════════ +// 1. reverse() — matrix +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_matrix_reverse_iter_to_iter(t : T?) { + t |> run("reverse(): iter src, no to_array → iterator") @(tt : T?) { + var got <- _fold([1, 2, 3, 4, 5].to_sequence() |> reverse()) + tt |> equal(typeinfo typename(got), "iterator") + var total = 0 + for (v in got) { + total += v + } + tt |> equal(total, 15) + } +} + +[test] +def test_matrix_reverse_iter_to_array(t : T?) { + t |> run("reverse(): iter src + to_array() → array const") @(tt : T?) { + let got <- _fold([1, 2, 3, 4, 5].to_sequence() |> reverse() |> to_array()) + tt |> equal(typeinfo typename(got), "array const") + tt |> equal(length(got), 5) + tt |> equal(got[0], 5) + tt |> equal(got[4], 1) + } +} + +[test] +def test_matrix_reverse_array_to_iter(t : T?) { + t |> run("reverse(): array src, no to_array → iterator") @(tt : T?) { + let arr = [1, 2, 3, 4, 5] + var got <- _fold(each(arr) |> reverse()) + tt |> equal(typeinfo typename(got), "iterator") + var total = 0 + for (v in got) { + total += v + } + tt |> equal(total, 15) + } +} + +[test] +def test_matrix_reverse_array_to_array(t : T?) { + t |> run("reverse(): array src + to_array() → array const") @(tt : T?) { + let arr = [1, 2, 3, 4, 5] + let got <- _fold(each(arr) |> reverse() |> to_array()) + tt |> equal(typeinfo typename(got), "array const") + tt |> equal(length(got), 5) + tt |> equal(got[0], 5) + tt |> equal(got[4], 1) + } +} + +// ═════════════════════════════════════════════════════════════════════════════ +// 2. distinct() / distinct_by — matrix +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_matrix_distinct_iter_to_iter(t : T?) { + t |> run("distinct(): iter src, no to_array → iterator") @(tt : T?) { + var got <- _fold([1, 2, 1, 3, 2, 3, 1].to_sequence() |> distinct()) + tt |> equal(typeinfo typename(got), "iterator") + var seen : array + for (v in got) { + seen |> push(v) + } + tt |> equal(length(seen), 3) + } +} + +[test] +def test_matrix_distinct_iter_to_array(t : T?) { + t |> run("distinct(): iter src + to_array() → array const") @(tt : T?) { + let got <- _fold([1, 2, 1, 3, 2, 3, 1].to_sequence() |> distinct() |> to_array()) + tt |> equal(typeinfo typename(got), "array const") + tt |> equal(length(got), 3) + } +} + +[test] +def test_matrix_distinct_array_to_iter(t : T?) { + t |> run("distinct(): array src, no to_array → iterator") @(tt : T?) { + let arr = [1, 2, 1, 3, 2, 3, 1] + var got <- _fold(each(arr) |> distinct()) + tt |> equal(typeinfo typename(got), "iterator") + var seen : array + for (v in got) { + seen |> push(v) + } + tt |> equal(length(seen), 3) + } +} + +[test] +def test_matrix_distinct_array_to_array(t : T?) { + t |> run("distinct(): array src + to_array() → array const") @(tt : T?) { + let arr = [1, 2, 1, 3, 2, 3, 1] + let got <- _fold(each(arr) |> distinct() |> to_array()) + tt |> equal(typeinfo typename(got), "array const") + tt |> equal(length(got), 3) + } +} + +[test] +def test_matrix_distinct_by_iter_to_iter(t : T?) { + t |> run("distinct_by(_): iter src, no to_array → iterator") @(tt : T?) { + var got <- _fold([10, 21, 32, 11, 23, 30, 13].to_sequence()._distinct_by(_ % 10)) + tt |> equal(typeinfo typename(got), "iterator") + var count = 0 + for (_v in got) { + count ++ + } + tt |> equal(count, 4) // mod-10 keys: {0,1,2,3} from 10,21,32,_,23,_,_ + } +} + +// ═════════════════════════════════════════════════════════════════════════════ +// 3. reverse() + distinct() — matrix +// Array path goes through emit_reverse_backward_walk_dset_gate (R-2a, single backward walk). +// Iter path falls through to emit_hashtable_dedup with an upstream reverse iterator. +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_matrix_reverse_distinct_iter_to_iter(t : T?) { + t |> run("reverse() + distinct(): iter src, no to_array → iterator") @(tt : T?) { + var got <- _fold([3, 1, 2, 1, 3, 2].to_sequence() |> reverse() |> distinct()) + tt |> equal(typeinfo typename(got), "iterator") + var collected : array + for (v in got) { + collected |> push(v) + } + // reverse → [2,3,1,2,1,3]; distinct first-seen → [2,3,1] + tt |> equal(length(collected), 3) + tt |> equal(collected[0], 2) + } +} + +[test] +def test_matrix_reverse_distinct_iter_to_array(t : T?) { + t |> run("reverse() + distinct(): iter src + to_array() → array const") @(tt : T?) { + let got <- _fold([3, 1, 2, 1, 3, 2].to_sequence() |> reverse() |> distinct() |> to_array()) + tt |> equal(typeinfo typename(got), "array const") + tt |> equal(length(got), 3) + tt |> equal(got[0], 2) + } +} + +[test] +def test_matrix_reverse_distinct_array_to_iter(t : T?) { + t |> run("reverse() + distinct(): array src, no to_array → iterator") @(tt : T?) { + let arr = [3, 1, 2, 1, 3, 2] + var got <- _fold(each(arr) |> reverse() |> distinct()) + tt |> equal(typeinfo typename(got), "iterator") + var collected : array + for (v in got) { + collected |> push(v) + } + tt |> equal(length(collected), 3) + tt |> equal(collected[0], 2) + } +} + +[test] +def test_matrix_reverse_distinct_array_to_array(t : T?) { + t |> run("reverse() + distinct(): array src + to_array() → array const") @(tt : T?) { + let arr = [3, 1, 2, 1, 3, 2] + let got <- _fold(each(arr) |> reverse() |> distinct() |> to_array()) + tt |> equal(typeinfo typename(got), "array const") + tt |> equal(length(got), 3) + tt |> equal(got[0], 2) + } +} + +// ═════════════════════════════════════════════════════════════════════════════ +// 4. reverse() + take(N) — matrix +// Array path goes through emit_reverse_backward_index_walk (R6, visits only last N indices). +// Iter path falls through to emit_reverse_buffer_inplace (full buffer + reverse_inplace + resize). +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_matrix_reverse_take_iter_to_iter(t : T?) { + t |> run("reverse() + take(N): iter src, no to_array → iterator") @(tt : T?) { + var got <- _fold([10, 20, 30, 40, 50].to_sequence() |> reverse() |> take(2)) + tt |> equal(typeinfo typename(got), "iterator") + var collected : array + for (v in got) { + collected |> push(v) + } + tt |> equal(length(collected), 2) + tt |> equal(collected[0], 50) + tt |> equal(collected[1], 40) + } +} + +[test] +def test_matrix_reverse_take_iter_to_array(t : T?) { + t |> run("reverse() + take(N): iter src + to_array() → array const") @(tt : T?) { + let got <- _fold([10, 20, 30, 40, 50].to_sequence() |> reverse() |> take(2) |> to_array()) + tt |> equal(typeinfo typename(got), "array const") + tt |> equal(length(got), 2) + tt |> equal(got[0], 50) + tt |> equal(got[1], 40) + } +} + +[test] +def test_matrix_reverse_take_array_to_iter(t : T?) { + t |> run("reverse() + take(N): array src, no to_array → iterator") @(tt : T?) { + let arr = [10, 20, 30, 40, 50] + var got <- _fold(each(arr) |> reverse() |> take(2)) + tt |> equal(typeinfo typename(got), "iterator") + var collected : array + for (v in got) { + collected |> push(v) + } + tt |> equal(length(collected), 2) + tt |> equal(collected[0], 50) + tt |> equal(collected[1], 40) + } +} + +[test] +def test_matrix_reverse_take_array_to_array(t : T?) { + t |> run("reverse() + take(N): array src + to_array() → array const") @(tt : T?) { + let arr = [10, 20, 30, 40, 50] + let got <- _fold(each(arr) |> reverse() |> take(2) |> to_array()) + tt |> equal(typeinfo typename(got), "array const") + tt |> equal(length(got), 2) + tt |> equal(got[0], 50) + tt |> equal(got[1], 40) + } +} diff --git a/tests/linq/test_linq_fold_loop_or_count.das b/tests/linq/test_linq_fold_loop_or_count.das new file mode 100644 index 0000000000..9b388d8c86 --- /dev/null +++ b/tests/linq/test_linq_fold_loop_or_count.das @@ -0,0 +1,146 @@ +options gen2 + +require daslib/linq +require daslib/linq_boost +require daslib/linq_fold +require dastest/testing_boost public + +// PR B1 regression coverage — plan_loop_or_count migrated to pattern-table. +// +// The new architecture: `slot_chain_of(["where_", "select"], "head")` greedy-consumes the +// pre-range head; canonical-order positional slots (skip / skip_while / take_while / take / +// post_take_where / term) carry the rest. The emit fn (emit_loop_or_count_lane) walks +// c.many["head"] applying the same where-after-select / chained-select / AND-merge logic the +// imperative loop did, then dispatches to the same lane emit fns. +// +// These tests assert end-to-end correctness across the lane × head-shape matrix. + +// ═════════════════════════════════════════════════════════════════════════════ +// 1. Canonical chains — one lane per terminator family +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_counter_lane(t : T?) { + t |> run("counter lane: where + count") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + let n = _fold(each(arr)._where(_ > 3).count()) + tt |> equal(n, 7) + } +} + +[test] +def test_array_lane(t : T?) { + t |> run("array lane: select + implicit to_array") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5] + let buf <- _fold(each(arr)._select(_ * 10) |> to_array()) + tt |> equal(length(buf), 5) + tt |> equal(buf[0], 10) + tt |> equal(buf[4], 50) + } +} + +[test] +def test_accumulator_lane(t : T?) { + t |> run("accumulator lane: where + select + sum") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + let s = _fold(each(arr)._where(_ > 2)._select(_ * 2).sum()) + // 3+4+5+6+7+8+9+10 = 52; ×2 = 104 + tt |> equal(s, 104) + } +} + +[test] +def test_early_exit_lane(t : T?) { + t |> run("early-exit lane: where + any") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5] + let yes = _fold(each(arr)._where(_ > 3).any()) + let no = _fold(each(arr)._where(_ > 100).any()) + tt |> equal(yes, true) + tt |> equal(no, false) + } +} + +// ═════════════════════════════════════════════════════════════════════════════ +// 2. Where-after-select rebinding (single & multiple) +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_where_after_select(t : T?) { + t |> run("where(p2) after select(f) sees PROJECTED value, not raw source") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5] + // _ > 1 keeps [2,3,4,5]; project ×10 → [20,30,40,50]; then keep _%20==0 → [20,40]; count=2. + let n = _fold(each(arr)._where(_ > 1)._select(_ * 10)._where(_ % 20 == 0).count()) + tt |> equal(n, 2) + } +} + +[test] +def test_multiple_wheres_post_select(t : T?) { + t |> run("two wheres after a select: collapse_chained_wheres composes them") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + // ×2 → [2..20], _>5 keeps [6,8,10,12,14,16,18,20], _%4==0 keeps [8,12,16,20] = 4. + let n = _fold(each(arr)._select(_ * 2)._where(_ > 5)._where(_ % 4 == 0).count()) + tt |> equal(n, 4) + } +} + +// ═════════════════════════════════════════════════════════════════════════════ +// 3. Range op chains (skip / take / take_while) +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_skip_take_count(t : T?) { + t |> run("where + skip(N) + take(M) + count") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + // _ > 0 keeps all; skip 3 → [4..10]; take 4 → [4,5,6,7]; count=4. + let n = _fold(each(arr)._where(_ > 0).skip(3).take(4).count()) + tt |> equal(n, 4) + } +} + +[test] +def test_take_while_sum(t : T?) { + t |> run("take_while + sum: streams until predicate fails") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5, 6] + // _take_while(_<4): [1,2,3]; sum=6. + let s = _fold(each(arr)._take_while(_ < 4).sum()) + tt |> equal(s, 6) + } +} + +// ═════════════════════════════════════════════════════════════════════════════ +// 4. Post-take where (Theme 2 5c — gates contribution; take cap still ticks) +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_post_take_where(t : T?) { + t |> run("take(N)._where(p): take fires unconditionally, where gates per-element acc") @(tt : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + // take(5) → [1..5]; _%2==0 keeps [2,4]; count=2. + let n = _fold(each(arr).take(5)._where(_ % 2 == 0).count()) + tt |> equal(n, 2) + } +} + +// ═════════════════════════════════════════════════════════════════════════════ +// 5. Fast paths — length shortcut + any-empty shortcut on length-bearing source +// ═════════════════════════════════════════════════════════════════════════════ + +[test] +def test_length_shortcut(t : T?) { + t |> run("count() on bare length-bearing source: emit_length_shortcut") @(tt : T?) { + let arr <- [10, 20, 30, 40, 50] + let n = _fold(each(arr).count()) + tt |> equal(n, 5) + } +} + +[test] +def test_any_empty_shortcut(t : T?) { + t |> run("any() no-pred on bare length-bearing source: emit_any_empty_shortcut") @(tt : T?) { + let arr <- [1, 2, 3] + var empty_arr : array + tt |> equal(_fold(each(arr).any()), true) + tt |> equal(_fold(each(empty_arr).any()), false) + } +} diff --git a/tests/linq/test_linq_fold_pattern_walker.das b/tests/linq/test_linq_fold_pattern_walker.das new file mode 100644 index 0000000000..99cfa03772 --- /dev/null +++ b/tests/linq/test_linq_fold_pattern_walker.das @@ -0,0 +1,170 @@ +options gen2 + +require daslib/ast_boost +require daslib/linq_fold +require dastest/testing_boost public + +// PR A — pattern-table refactor: walker + prefix-conflict lint tests. +// +// The per-plan pattern tables (`plan_reverse_patterns`, `plan_distinct_patterns`) are populated by +// `[_macro]` functions at MACRO time, not at runtime — the function pointers in the rows reference +// `[macro_function]` emit fns whose bodies the LLVM JIT can't lower (quote() nodes). So at runtime +// these tables read as empty; the lint helpers below are exercised on synthetic tables instead. +// End-to-end pattern selection is covered by the existing test_linq_fold_*.das suite (each user +// chain exercises the emit fns through `_fold`). + +[test] +def test_alias_table_resolves(t : T?) { + t |> run("alias_table: PR A aliases populated (runtime-initialized via literal)") @(tt : T?) { + tt |> equal(alias_table |> key_exists("distinct_family"), true) + tt |> equal(alias_table |> key_exists("first_family"), true) + tt |> equal(alias_table |> key_exists("count_family"), true) + tt |> equal(alias_table |> key_exists("distinct_terminator_family"), true) + tt |> equal(length(alias_table["distinct_family"]), 2) + tt |> equal(length(alias_table["first_family"]), 2) + tt |> equal(length(alias_table["distinct_terminator_family"]), 3) // count / long_count / sum — see linq_fold.das alias_table + } +} + +// ─── Prefix-conflict helper: positive case (synthetic patterns we KNOW shadow) ─── + +[test] +def test_chain_prefix_of_positive(t : T?) { + t |> run("chain_prefix_of: A is a strict prefix of B when A's slots structurally match B's first N") @(tt : T?) { + let prefix : array <- [ + Slot(matcher = m_literal("reverse"), cardinality = c_one()) + ] + let longer : array <- [ + Slot(matcher = m_literal("reverse"), cardinality = c_one()), + Slot(matcher = m_literal("take"), cardinality = c_one(), capture_name = "take") + ] + tt |> equal(chain_prefix_of(prefix, longer), true) + tt |> equal(chain_prefix_of(longer, prefix), false) // longer is not a prefix of shorter + } +} + +[test] +def test_chain_prefix_of_negative(t : T?) { + t |> run("chain_prefix_of: different matchers at any position break the prefix") @(tt : T?) { + let a : array <- [ + Slot(matcher = m_literal("reverse"), cardinality = c_one()), + Slot(matcher = m_literal("count"), cardinality = c_one()) + ] + let b : array <- [ + Slot(matcher = m_literal("reverse"), cardinality = c_one()), + Slot(matcher = m_literal("take"), cardinality = c_one()), + Slot(matcher = m_literal("first"), cardinality = c_one()) + ] + tt |> equal(chain_prefix_of(a, b), false) + } +} + +[test] +def test_chain_prefix_of_alias_equality(t : T?) { + t |> run("chain_prefix_of: alias matchers compare by alias name") @(tt : T?) { + let a : array <- [ + Slot(matcher = m_alias("distinct_family"), cardinality = c_one()) + ] + let b : array <- [ + Slot(matcher = m_alias("distinct_family"), cardinality = c_one()), + Slot(matcher = m_literal("take"), cardinality = c_opt()) + ] + let c : array <- [ + Slot(matcher = m_alias("first_family"), cardinality = c_one()), + Slot(matcher = m_literal("take"), cardinality = c_opt()) + ] + tt |> equal(chain_prefix_of(a, b), true) + tt |> equal(chain_prefix_of(a, c), false) + } +} + +def null_emit(var c : Captures; var ctx : EmitCtx; at : LineInfo) : Expression? { + return null +} + +// ─── c_chain cardinality (PR B) — constructor + lint-helper structural tests ─── +// Semantic coverage of c_chain (empty / all-match / prefix-then-stop / arity gate) is end-to-end +// via tests/linq/test_linq_fold_loop_or_count.das — the walker stays private and is exercised +// through plan_loop_or_count's pattern table just like plan_reverse / plan_distinct in PR A. + +[test] +def test_slot_chain_of_constructs_expected_shape(t : T?) { + t |> run("slot_chain_of: produces c_chain cardinality + one_of matcher + capture_name") @(tt : T?) { + let s = slot_chain_of(["where_", "select"], "head") + tt |> equal(s.cardinality is chain, true) + tt |> equal(s.cardinality is one, false) + tt |> equal(s.cardinality is optional, false) + tt |> equal(s.matcher is one_of, true) + let names & = unsafe(s.matcher as one_of) + tt |> equal(length(names), 2) + tt |> equal(names[0], "where_") + tt |> equal(names[1], "select") + tt |> equal(s.capture_name, "head") + tt |> equal(s.arity, -1) + } +} + +[test] +def test_slots_structurally_match_distinguishes_c_chain(t : T?) { + t |> run("slots_structurally_match: c_chain vs c_one vs c_opt at same matcher are distinct shapes") @(tt : T?) { + // c_chain ≢ c_one ≢ c_opt at the structural level — chain_prefix_of must reject mixed-cardinality matches. + let chainOnly : array <- [slot_chain_of(["x"], "h")] + let oneOpt : array <- [ + Slot(matcher = SlotMatcher(one_of <- ["x"]), cardinality = c_one(), capture_name = "h"), + Slot(matcher = SlotMatcher(one_of <- ["x"]), cardinality = c_opt(), capture_name = "h") + ] + let chainPair : array <- [ + slot_chain_of(["x"], "h"), + Slot(matcher = SlotMatcher(one_of <- ["x"]), cardinality = c_opt(), capture_name = "h") + ] + tt |> equal(chain_prefix_of(chainOnly, oneOpt), false) + tt |> equal(chain_prefix_of(chainOnly, chainPair), true) + } +} + +[test] +def test_check_pattern_table_reachable_accepts_c_chain(t : T?) { + t |> run("check_pattern_table_reachable: c_chain head + distinct trailers stay reachable (no shadow)") @(tt : T?) { + var patterns : array + patterns |> emplace <| SplicePattern( + name = "chain_then_take", + chain <- [ + slot_chain_of(["where_", "select"], "head"), + Slot(matcher = m_literal("take"), cardinality = c_opt(), capture_name = "take") + ], + emit = @@ < EmitFn > null_emit + ) + patterns |> emplace <| SplicePattern( + name = "chain_then_count", + chain <- [ + slot_chain_of(["where_", "select"], "head"), + Slot(matcher = m_literal("count"), cardinality = c_opt(), capture_name = "term") + ], + emit = @@ < EmitFn > null_emit + ) + tt |> equal(check_pattern_table_reachable("synthetic-c_chain-trailers", patterns), true) + } +} + +[test] +def test_check_pattern_table_reachable_catches_shadow(t : T?) { + t |> run("check_pattern_table_reachable: returns false for a table with a strict-prefix shadow") @(tt : T?) { + var bad : array + bad |> emplace <| SplicePattern( + name = "short", + chain <- [ + Slot(matcher = m_literal("reverse"), cardinality = c_one()) + ], + emit = @@ < EmitFn > null_emit + ) + bad |> emplace <| SplicePattern( + name = "long_shadowed", + chain <- [ + Slot(matcher = m_literal("reverse"), cardinality = c_one()), + Slot(matcher = m_literal("take"), cardinality = c_opt()) + ], + emit = @@ < EmitFn > null_emit + ) + tt |> equal(check_pattern_table_reachable("synthetic-shadowed", bad), false) + } +} diff --git a/tests/linq/test_linq_fold_terminal_select.das b/tests/linq/test_linq_fold_terminal_select.das index 0b735732bc..381cf8fddf 100644 --- a/tests/linq/test_linq_fold_terminal_select.das +++ b/tests/linq/test_linq_fold_terminal_select.das @@ -128,6 +128,74 @@ def test_reverse_select_first_array(t : T?) { } } +// PR A extension: chains with BOTH a pre-reverse `_select(f)` AND a post-reverse `_select(g)` were +// previously rejected by the imperative plan_reverse (it had a `!seenSelect` guard before letting any +// terminal _select fire). The new pattern-table forms (R1-R4 + Rb) accept the combined shape because +// emit composes `pushExpr` from the pre-projection and the terminal-select pass from the post-projection; +// there's no semantic conflict (it's plain function composition over the reversed survivors). + +[test] +def test_reverse_pre_and_post_select_array(t : T?) { + t |> run("plan_reverse R1-R4: where + select(f) + reverse + take + select(g) — both selects compose") @(tt : T?) { + let sounds <- make_sounds() + unsafe { + // pre-select: id*10 → [10, 20, 30, 40, 50] + // reverse: → [50, 40, 30, 20, 10] + // take 3: → [50, 40, 30] + // post-select: +1 → [51, 41, 31] + let out <- _fold(each(sounds)._where(_.rank > 0)._select(_.id * 10).reverse().take(3)._select(_ + 1).to_array()) + tt |> equal(length(out), 3) + tt |> equal(out[0], 51) + tt |> equal(out[1], 41) + tt |> equal(out[2], 31) + } + } +} + +[test] +def test_reverse_pre_and_post_select_first(t : T?) { + t |> run("plan_reverse Rb: where + select(f) + reverse + select(g) + first — both selects compose") @(tt : T?) { + let sounds <- make_sounds() + unsafe { + // pre-select: id*10 → [10, 20, 30, 40, 50] + // reverse: → [50, 40, 30, 20, 10] + // post-select: +1 → [51, 41, 31, 21, 11] + // first: → 51 + let v = _fold(each(sounds)._where(_.rank > 0)._select(_.id * 10).reverse()._select(_ + 1).first()) + tt |> equal(v, 51) + } + } +} + +// Regression for the bug Copilot caught in R6: emit must NOT re-project the user's `first_or_default` default +// through the post-reverse `_select(g)`. The default is already typed at the post-select element type, so +// applying termsel(d) double-applies and miscomputes when the chain hits the empty branch. + +[test] +def test_reverse_pre_and_post_select_first_or_default_nonempty(t : T?) { + t |> run("plan_reverse Rb: where + select(f) + reverse + select(g) + first_or_default — nonempty hits found branch") @(tt : T?) { + let sounds <- make_sounds() + unsafe { + // sequence non-empty: termsel(lastName) = (50)+1 = 51 + let v = _fold(each(sounds)._where(_.rank > 0)._select(_.id * 10).reverse()._select(_ + 1).first_or_default(-99)) + tt |> equal(v, 51) + } + } +} + +[test] +def test_reverse_pre_and_post_select_first_or_default_empty(t : T?) { + t |> run("plan_reverse Rb: where + select(f) + reverse + select(g) + first_or_default — empty seq returns RAW default, not termsel(default)") @(tt : T?) { + let sounds <- make_sounds() + unsafe { + // where filters everything out → empty sequence → default branch fires. + // If emit re-projects, result would be (-99)+1 = -98 (wrong). Must be -99. + let v = _fold(each(sounds)._where(_.rank > 9999)._select(_.id * 10).reverse()._select(_ + 1).first_or_default(-99)) + tt |> equal(v, -99) + } + } +} + [test] def test_reverse_take_select_decs(t : T?) { t |> run("plan_decs_reverse: reverse + take + terminal _select") @(tt : T?) { diff --git a/tests/with_boost/failed_with_arity_mismatch.das b/tests/with_boost/failed_with_arity_mismatch.das new file mode 100644 index 0000000000..e9ab5d4a2d --- /dev/null +++ b/tests/with_boost/failed_with_arity_mismatch.das @@ -0,0 +1,22 @@ +options gen2 +expect 50503 + +// Explicitly-typed block param so pre-macro typing succeeds; the macro +// then catches the arity mismatch. + +require daslib/with_boost + +struct A { + f1 : int +} + +[export] +def main { + var a <- [A(f1=1)] + var b <- [A(f1=2)] + with_(a[0], b[0]) $(var x : A) { // refuse: 2 containers but only 1 block param + print("{x.f1}") + } + delete a + delete b +} diff --git a/tests/with_boost/failed_with_array_literal.das b/tests/with_boost/failed_with_array_literal.das new file mode 100644 index 0000000000..df65b6fd17 --- /dev/null +++ b/tests/with_boost/failed_with_array_literal.das @@ -0,0 +1,17 @@ +options gen2 +expect 50503 + +require daslib/with_boost + +struct A { + f1 : int +} + +[export] +def main { + // Array literal as container — temp lifetime ends with the + // expression, can't be ref-bound. Refused at macro time. + with_([A(f1 = 1), A(f1 = 2)][0]) { + print("body") + } +} diff --git a/tests/with_boost/failed_with_function_call.das b/tests/with_boost/failed_with_function_call.das new file mode 100644 index 0000000000..ad3529612c --- /dev/null +++ b/tests/with_boost/failed_with_function_call.das @@ -0,0 +1,19 @@ +options gen2 +expect 50503 + +require daslib/with_boost + +struct A { + f1 : int +} + +def get_struct() : A { + return A(f1 = 42) +} + +[export] +def main { + with_(get_struct()) { // refuse: not ExprAt + print("body") + } +} diff --git a/tests/with_boost/failed_with_local.das b/tests/with_boost/failed_with_local.das new file mode 100644 index 0000000000..33ce54fdba --- /dev/null +++ b/tests/with_boost/failed_with_local.das @@ -0,0 +1,16 @@ +options gen2 +expect 50503 + +require daslib/with_boost + +struct A { + f1 : int +} + +[export] +def main { + var local = A(f1 = 1) + with_(local) { // refuse: not ExprAt + print("body") + } +} diff --git a/tests/with_boost/failed_with_nested_array.das b/tests/with_boost/failed_with_nested_array.das new file mode 100644 index 0000000000..b84c0eda33 --- /dev/null +++ b/tests/with_boost/failed_with_nested_array.das @@ -0,0 +1,20 @@ +options gen2 +expect 50503 + +require daslib/with_boost + +struct Outer { + inner : array +} + +[export] +def main { + var arr : array + // Refused at macro time: subexpr `arr[0].inner` contains a nested + // ExprAt (`arr[0]`). The macro would only lock `inner`, leaving + // `arr` mutable — pushing to `arr` from inside the body would + // reallocate it and invalidate the `inner` ref. + with_(arr[0].inner[0]) { + print("body") + } +} diff --git a/tests/with_boost/failed_with_two_tables.das b/tests/with_boost/failed_with_two_tables.das new file mode 100644 index 0000000000..2829045cc5 --- /dev/null +++ b/tests/with_boost/failed_with_two_tables.das @@ -0,0 +1,20 @@ +options gen2 +expect 50503 + +// Explicitly-typed block params so daslang's pre-macro block typing +// succeeds; the macro then reaches its single-table-only rule. + +require daslib/with_boost + +struct A { + f1 : int +} + +[export] +def main { + var t1 : table + var t2 : table + with_(t1["k"], t2["q"]) $(var a : A; var b : A) { // refuse: max one table-keyed arg per call + print("{a.f1} {b.f1}") + } +} diff --git a/tests/with_boost/test_with_array.das b/tests/with_boost/test_with_array.das new file mode 100644 index 0000000000..9c7483841b --- /dev/null +++ b/tests/with_boost/test_with_array.das @@ -0,0 +1,107 @@ +options gen2 +options indenting = 4 + +require dastest/testing_boost public +require daslib/with_boost + +struct A { + f1 : int + f2 : int +} + +[test] +def test_single_arg_default_name(t : T?) { + //! `with_(arr[i]) { _.field = ... }` — no block params, `_` injected, + //! mutation persists in the underlying array. + var arr <- [A(f1=1, f2=2), A(f1=3, f2=4)] + with_(arr[0]) { + _.f1 = 99 + } + t |> equal(arr[0].f1, 99, "default _ mutation persists in arr[0]") + t |> equal(arr[0].f2, 2, "unrelated field untouched") + t |> equal(arr[1].f1, 3, "other element untouched") + delete arr +} + +[test] +def test_single_arg_named_binding(t : T?) { + //! `with_(arr[i]) $(myA) { myA.field = ... }` — user-named param, + //! constness stripped so mutation persists. + var arr <- [A(f1=10, f2=20)] + with_(arr[0]) $(myA) { + myA.f1 = 111 + myA.f2 = 222 + } + t |> equal(arr[0].f1, 111, "named binding mutation persists") + t |> equal(arr[0].f2, 222, "second-field mutation persists") + delete arr +} + +[test] +def test_two_arg_array(t : T?) { + //! Multi-arg form: two arrays bound to two block params, + //! mutations persist in both. + var src <- [A(f1=100, f2=200)] + var dst <- [A(f1=0, f2=0)] + with_(dst[0], src[0]) $(d, s) { + d.f1 = s.f1 + 1 + d.f2 = s.f2 + 2 + } + t |> equal(dst[0].f1, 101, "dst.f1 = src.f1 + 1") + t |> equal(dst[0].f2, 202, "dst.f2 = src.f2 + 2") + delete src + delete dst +} + +[test] +def test_three_arg_array(t : T?) { + //! Arity 3: three arrays, three block params. + var a <- [A(f1=0, f2=0)] + var b <- [A(f1=10, f2=20)] + var c <- [A(f1=100, f2=200)] + with_(a[0], b[0], c[0]) $(va, vb, vc) { + va.f1 = vb.f1 + vc.f1 + va.f2 = vb.f2 + vc.f2 + } + t |> equal(a[0].f1, 110, "sum across three arrays in f1") + t |> equal(a[0].f2, 220, "sum across three arrays in f2") + delete a + delete b + delete c +} + +struct Owner { + children : array +} + +[test] +def test_field_chain_container(t : T?) { + //! Container is `obj.field[i]` — subexpr is the ExprField + //! `obj.children`, an lvalue chain rooted in `obj` (ExprVar). + //! The lvalue-chain check passes and pre-bind works. + var obj : Owner + obj.children |> push(A(f1 = 5, f2 = 6)) + obj.children |> push(A(f1 = 7, f2 = 8)) + with_(obj.children[1]) $(elem) { + elem.f1 = 555 + } + t |> equal(obj.children[1].f1, 555, "second-element mutation persists") + t |> equal(obj.children[0].f1, 5, "first element untouched") + delete obj.children +} + +def helper_set_f1(var aRef : A&; v : int) { + aRef.f1 = v +} + +[test] +def test_pass_binding_to_function(t : T?) { + //! The block-arg can be passed to another function expecting a + //! mutable ref; mutations through that path also persist. + var arr <- [A(f1=1, f2=2)] + with_(arr[0]) $(a) { + helper_set_f1(a, 777) + } + t |> equal(arr[0].f1, 777, "mutation through helper-fn ref persists") + delete arr +} diff --git a/tests/with_boost/test_with_lock_panics.das b/tests/with_boost/test_with_lock_panics.das new file mode 100644 index 0000000000..d768e2584f --- /dev/null +++ b/tests/with_boost/test_with_lock_panics.das @@ -0,0 +1,116 @@ +options gen2 +options indenting = 4 + +require dastest/testing_boost public +require daslib/with_boost + +//! When the with_ body panics, the lock leaks (daslang issue #2532). +//! These tests deliberately don't try to ``delete`` the leaked-locked +//! containers afterwards — that would compound the failure. We only +//! verify the panic is caught at the with_ boundary. + +struct A { + f1 : int +} + +[test] +def test_push_inside_panics(t : T?) { + var arr <- [A(f1=1), A(f1=2)] + var panicked = false + try { + with_(arr[0]) $(a) { + arr |> push(A(f1 = 99)) + } + } recover { + panicked = true + } + t |> success(panicked, "push during with_ body must panic") +} + +[test] +def test_erase_inside_panics(t : T?) { + var arr <- [A(f1=1), A(f1=2), A(f1=3)] + var panicked = false + try { + with_(arr[1]) $(a) { + arr |> erase(0) + } + } recover { + panicked = true + } + t |> success(panicked, "erase during with_ body must panic") +} + +[test] +def test_resize_inside_panics(t : T?) { + var arr <- [A(f1=1)] + var panicked = false + try { + with_(arr[0]) $(a) { + arr |> resize(100) + } + } recover { + panicked = true + } + t |> success(panicked, "resize during with_ body must panic") +} + +[test] +def test_clear_inside_panics(t : T?) { + var arr <- [A(f1=1)] + var panicked = false + try { + with_(arr[0]) $(a) { + arr |> clear + } + } recover { + panicked = true + } + t |> success(panicked, "clear during with_ body must panic") +} + +[test] +def test_table_insert_inside_panics(t : T?) { + var tab : table + tab |> insert("a", A(f1 = 1)) + var panicked = false + try { + with_(tab["a"]) $(v) { + tab |> insert("b", A(f1 = 2)) + } + } recover { + panicked = true + } + t |> success(panicked, "table insert during with_ body must panic") +} + +[test] +def test_table_erase_inside_panics(t : T?) { + var tab : table + tab |> insert("a", A(f1 = 1)) + tab |> insert("b", A(f1 = 2)) + var panicked = false + try { + with_(tab["a"]) $(v) { + tab |> erase("b") + } + } recover { + panicked = true + } + t |> success(panicked, "table erase during with_ body must panic") +} + +[test] +def test_multi_arg_inner_push_panics(t : T?) { + var a <- [A(f1=1)] + var b <- [A(f1=2)] + var panicked = false + try { + with_(a[0], b[0]) $(va, vb) { + b |> push(A(f1 = 99)) + } + } recover { + panicked = true + } + t |> success(panicked, "multi-arg: mutation to either locked array must panic") +} diff --git a/tests/with_boost/test_with_n_arg.das b/tests/with_boost/test_with_n_arg.das new file mode 100644 index 0000000000..8f2b3c380f --- /dev/null +++ b/tests/with_boost/test_with_n_arg.das @@ -0,0 +1,59 @@ +options gen2 +options indenting = 4 + +require dastest/testing_boost public +require daslib/with_boost + +struct A { + f1 : int +} + +[test] +def test_five_arrays(t : T?) { + //! 5 array containers, one block, all-array. Proves the inline + //! emission scales past the old per-arity-helper limit (3). + var a1 <- [A(f1 = 1)] + var a2 <- [A(f1 = 2)] + var a3 <- [A(f1 = 3)] + var a4 <- [A(f1 = 4)] + var a5 <- [A(f1 = 5)] + with_(a1[0], a2[0], a3[0], a4[0], a5[0]) $(x1, x2, x3, x4, x5) { + x1.f1 = x1.f1 + x2.f1 + x3.f1 + x4.f1 + x5.f1 + } + t |> equal(a1[0].f1, 15, "1 + 2 + 3 + 4 + 5 = 15 written to a1[0]") + t |> equal(a2[0].f1, 2, "a2 untouched") + t |> equal(a5[0].f1, 5, "a5 untouched") +} + +[test] +def test_mixed_4_arrays_1_table(t : T?) { + //! 4 arrays + 1 table — single table allowed, mixed positionally. + var arr_w <- [0] + var arr_x <- [10] + var arr_y <- [100] + var arr_z <- [1000] + var tab : table + tab |> insert("k", 50) + with_(arr_w[0], arr_x[0], tab["k"], arr_y[0], arr_z[0]) $(w, x, t_k, y, z) { + w = x + t_k + y + z + t_k = 999 + } + t |> equal(arr_w[0], 10 + 50 + 100 + 1000, "sum across mixed array+table args") + t |> equal(tab["k"], 999, "table entry mutated") +} + +[test] +def test_seven_arrays(t : T?) { + //! Even larger N — verifies macro really doesn't have an arity cap. + var a <- [1] + var b <- [2] + var c <- [3] + var d <- [4] + var e <- [5] + var f <- [6] + var g <- [7] + with_(a[0], b[0], c[0], d[0], e[0], f[0], g[0]) $(va, vb, vc, vd, ve, vf, vg) { + va = vb + vc + vd + ve + vf + vg + } + t |> equal(a[0], 27, "2+3+4+5+6+7 = 27") +} diff --git a/tests/with_boost/test_with_table.das b/tests/with_boost/test_with_table.das new file mode 100644 index 0000000000..614a9a50eb --- /dev/null +++ b/tests/with_boost/test_with_table.das @@ -0,0 +1,60 @@ +options gen2 +options indenting = 4 + +require dastest/testing_boost public +require daslib/with_boost + +struct A { + f1 : int + f2 : int +} + +[test] +def test_existing_key(t : T?) { + //! Mutation on an existing key persists in the table. + var tab : table + tab |> insert("k", A(f1 = 1, f2 = 2)) + with_(tab["k"]) $(v) { + v.f1 = 99 + v.f2 = 100 + } + t |> equal(tab["k"].f1, 99, "key mutation persists") + t |> equal(tab["k"].f2, 100, "second field mutation persists") +} + +[test] +def test_missing_key_creates_default(t : T?) { + //! tab[k] is upsert: missing key gets a default entry, then the + //! body's mutations persist on that newly-created entry. + var tab : table + with_(tab["new"]) $(v) { + v.f1 = 42 + } + t |> success(key_exists(tab, "new"), "missing key was created") + t |> equal(tab["new"].f1, 42, "mutation on new entry persists") +} + +[test] +def test_table_default_name(t : T?) { + //! Default `_` binding works for tables too. + var tab : table + tab |> insert("x", A(f1 = 5, f2 = 6)) + with_(tab["x"]) { + _.f1 = 77 + } + t |> equal(tab["x"].f1, 77, "default _ binding mutation persists") +} + +[test] +def test_table_workhorse_value(t : T?) { + //! Table with workhorse value type — block-arg is `int&`, mutation + //! persists. + var tab : table + tab |> insert("alpha", 10) + tab |> insert("beta", 20) + with_(tab["alpha"]) $(v) { + v = 200 + } + t |> equal(tab["alpha"], 200, "workhorse value mutation persists") + t |> equal(tab["beta"], 20, "other key untouched") +} diff --git a/tests/with_boost/test_with_workhorse.das b/tests/with_boost/test_with_workhorse.das new file mode 100644 index 0000000000..5120fd0fd0 --- /dev/null +++ b/tests/with_boost/test_with_workhorse.das @@ -0,0 +1,71 @@ +options gen2 +options indenting = 4 + +require dastest/testing_boost public +require daslib/with_boost + +struct A { + f1 : int +} + +[test] +def test_int_array_default_name(t : T?) { + //! `with_(arr[i]) { _ = X }` on a workhorse-element array — block + //! param `_` is `int&`, mutation persists in the array. + var arr <- [10, 20, 30] + with_(arr[1]) { + _ = 222 + } + t |> equal(arr[0], 10, "untouched element stays") + t |> equal(arr[1], 222, "workhorse mutation persists") + t |> equal(arr[2], 30, "untouched element stays") + delete arr +} + +[test] +def test_int_array_named_binding(t : T?) { + //! Named binding `$(x)` — same propagation, daslang infers `x : int&` + //! from the helper sig. + var arr <- [1, 2, 3] + with_(arr[0]) $(x) { + x = 999 + } + t |> equal(arr[0], 999, "named workhorse binding mutation persists") + delete arr +} + +[test] +def test_float_array_named(t : T?) { + var arr <- [1.0f, 2.0f, 3.0f] + with_(arr[2]) $(x) { + x = 99.5f + } + t |> equal(arr[2], 99.5f, "float workhorse mutation persists") + delete arr +} + +[test] +def test_multi_arg_workhorse(t : T?) { + //! Multi-arg form across two workhorse arrays. + var a <- [10, 20, 30] + var b <- [100, 200, 300] + with_(a[0], b[1]) $(x, y) { + x = y + 1 + } + t |> equal(a[0], 201, "x = y + 1 with workhorse types") + t |> equal(b[1], 200, "source unchanged") + delete a + delete b +} + +[test] +def test_mixed_workhorse_struct(t : T?) { + var ints <- [10, 20] + var structs <- [A(f1 = 100)] + with_(ints[0], structs[0]) $(i, s) { + i = s.f1 + 1 + } + t |> equal(ints[0], 101, "workhorse + struct multi-arg") + delete ints + delete structs +} diff --git a/tutorials/macros/18_with_boost.das b/tutorials/macros/18_with_boost.das new file mode 100644 index 0000000000..c57023efce --- /dev/null +++ b/tutorials/macros/18_with_boost.das @@ -0,0 +1,131 @@ +// Macro Tutorial 18: with_ macro from daslib/with_boost +// +// The `with_` macro binds an array or table element by reference inside +// a block, with an automatic LOCK around the body so push/erase/resize +// on the underlying container panic at runtime instead of silently +// dangling the reference. +// +// Covers: +// - Why `var a : A& = arr[0]` is rejected by daslang's typer +// - Single-arg form with default `_` binding +// - Named binding via $(name) on the block +// - Multi-arg form (positional) for cross-container work +// - Workhorse element types (int, float, ...) — bound by reference +// - Table containers (one table-keyed arg per call) +// - Runtime lock: mutation of the container inside the body panics +// +// Run: daslang.exe tutorials/macros/18_with_boost.das + +options gen2 + +require daslib/with_boost + +struct A { + f1 : int + f2 : int +} + +[export] +def main { + + // ────────────────────────────────────────────────────────────────── + // Section 1 — Why with_ exists + // ────────────────────────────────────────────────────────────────── + // + // Plain `var a : A& = arr[0]` is rejected by daslang's typer: + // + // error[31300]: local reference to non-local expression is unsafe + // + // The reason: between binding the ref and using it, code could push + // or resize the array, leaving `a` dangling. with_ solves this by + // locking the array around a block — push/resize inside the block + // panic instead of corrupting memory. + + // ────────────────────────────────────────────────────────────────── + // Section 2 — Single-arg, default `_` binding + // ────────────────────────────────────────────────────────────────── + + var arr = [A(f1 = 1, f2 = 2), A(f1 = 3, f2 = 4)] + with_(arr[0]) { + _.f1 = 99 + _.f2 = 100 + } + print("section 2: arr[0] = {arr[0].f1}, {arr[0].f2}\n") + + // ────────────────────────────────────────────────────────────────── + // Section 3 — Named binding via $(name) + // ────────────────────────────────────────────────────────────────── + // + // The block-param keyword (`var`/`let`) is optional; the macro + // strips constness so mutations always persist. + + with_(arr[1]) $(elem) { + elem.f1 = 555 + } + print("section 3: arr[1].f1 = {arr[1].f1}\n") + + // ────────────────────────────────────────────────────────────────── + // Section 4 — Multi-arg positional form + // ────────────────────────────────────────────────────────────────── + // + // Pass multiple containers; the block params are positional. + // Each container gets its own lock. + + var dst = [A(f1 = 0, f2 = 0)] + var src = [A(f1 = 10, f2 = 20)] + with_(dst[0], src[0]) $(d, s) { + d.f1 = s.f1 + 1 + d.f2 = s.f2 + 2 + } + print("section 4: dst[0] = {dst[0].f1}, {dst[0].f2}\n") + + // ────────────────────────────────────────────────────────────────── + // Section 5 — Workhorse element types (int, float, ...) + // ────────────────────────────────────────────────────────────────── + // + // The block-arg is bound by reference, so `_ = X` (or named `x = X`) + // mutates the underlying slot. The macro relies on the helper's + // `block<(var x : TT&) : void>` signature to drive daslang's + // inference; no special-case in the macro itself. + + var ints = [1, 2, 3] + with_(ints[1]) { + _ = 222 + } + print("section 5: ints = {ints}\n") + + // ────────────────────────────────────────────────────────────────── + // Section 6 — Tables + // ────────────────────────────────────────────────────────────────── + // + // Tables work the same way; `tab[key]` upserts (creates a default + // entry if missing). At most ONE table-keyed arg per call (any + // second insert into the same table would rehash and invalidate + // the pinned entry). + + var tab : table + tab |> insert("k", A(f1 = 11, f2 = 22)) + with_(tab["k"]) $(v) { + v.f1 = 777 + } + let kf1 = tab["k"].f1 + print("section 6: tab[k].f1 = {kf1}\n") + + // ────────────────────────────────────────────────────────────────── + // Section 7 — Lock is real (would-panic example, commented out) + // ────────────────────────────────────────────────────────────────── + // + // Trying to mutate the container from inside the body panics at + // runtime — the whole point of the lock. Daslang panic is fatal + // (not a C++/JS exception), so we don't demonstrate it live in a + // tutorial. The line below is what would panic: + // + // with_(arr[0]) $(a) { + // arr |> push(A(f1 = 1000, f2 = 2000)) // panic: array is locked + // } + // + // See `tests/with_boost/test_with_lock_panics.das` for the + // full set of runtime-locked behaviours. + + print("section 7: see comment for the lock-panic shape\n") +}