diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8e0d75996..4d591c0e6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -202,6 +202,17 @@ jobs: - name: "Install CMake and Ninja" uses: lukka/get-cmake@latest + - if: runner.os == 'Windows' + uses: ilammy/setup-nasm@v1 # need nasm for openssl + + - name: "Install openssl windows" + if: runner.os == 'Windows' + run: | + git clone https://github.com/microsoft/vcpkg && ./vcpkg/bootstrap-vcpkg.sh + ./vcpkg/vcpkg install openssl:${{ matrix.architecture == 32 && 'x86' || 'x64' }}-windows --binarycaching + echo "VCPKG_ROOT=$(pwd)/vcpkg" >> $GITHUB_ENV + echo "CMAKE_TOOLCHAIN_FILE=$(pwd)/vcpkg/scripts/buildsystems/vcpkg.cmake" >> $GITHUB_ENV + - name: "Install: Required Dev Packages" run: | set -eux @@ -259,11 +270,13 @@ jobs: ninja ;; windows32) - cmake --no-warn-unused-cli -B./build -G "${{ matrix.cmake_generator }}" -T host=x64 -A ${{ matrix.architecture_string }} + export PATH="/c/Strawberry/perl/bin:$PATH" # prepend Strawberry perl to path, so openssl will use it. + cmake --no-warn-unused-cli -B./build -G "${{ matrix.cmake_generator }}" -T host=x64 -A ${{ matrix.architecture_string }} -DCMAKE_TOOLCHAIN_FILE="$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake" cmake --build ./build --config ${{ matrix.cmake_preset }} --parallel ;; windows64) - cmake --no-warn-unused-cli -B./build -G "${{ matrix.cmake_generator }}" -T host=x64 -A ${{ matrix.architecture_string }} -DDAS_LLVM_DISABLED=${{ env.das_llvm_disabled }} + export PATH="/c/Strawberry/perl/bin:$PATH" # prepend Strawberry perl to path, so openssl will use it. + cmake --no-warn-unused-cli -B./build -G "${{ matrix.cmake_generator }}" -T host=x64 -A ${{ matrix.architecture_string }} -DDAS_LLVM_DISABLED=${{ env.das_llvm_disabled }} -DCMAKE_TOOLCHAIN_FILE="$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake" cmake --build ./build --config ${{ matrix.cmake_preset }} --parallel ;; linux_arm*) diff --git a/CMakeLists.txt b/CMakeLists.txt index 08dbe5aed..07e219872 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) option(DAS_FLEX_BISON_DISABLED "Disable FLEX/BISON stage" OFF) option(DAS_CLANG_BIND_DISABLED "Disable dasClangBind (libclang bindings, C/C++ parsing)" ON) option(DAS_LLVM_DISABLED "Disable dasLLVM (llvm bindings)" ON) -option(DAS_HV_DISABLED "Disable dasHV (websokets,http server and client)" ON) +option(DAS_HV_DISABLED "Disable dasHV (websokets,http server and client)" OFF) option(DAS_GLFW_DISABLED "Disable dasGLFW (GLFW window for graphics apps)" OFF) option(DAS_AUDIO_DISABLED "Disable dasAudio (Miniaudio sound library)" OFF) option(DAS_STDDLG_DISABLED "Disable dasStdDlg (File new,open,save etc dialogs)" OFF) diff --git a/benchmarks/sql/LINQ.md b/benchmarks/sql/LINQ.md index 7a84d4b20..cfc9ba295 100644 --- a/benchmarks/sql/LINQ.md +++ b/benchmarks/sql/LINQ.md @@ -280,6 +280,26 @@ The order+take rows (`order_take_desc`, `sort_take`, `select_where_order_take`) `select_where_count` is the first **select+where** splice landing: previously rejected by the planner (the typer-inserted `ExprRef2Value` wrapper around `it` orphaned during substitution → `30921: can only dereference a reference`). The new `replaceVariablePeeling` helper in `templates_boost.das` peels the wrapper as part of the substitution, mirroring `ast_match`'s `qm_peel_ref2value`. All four terminator lanes (array / counter / accumulator / early-exit) covered. +### Headline benchmarks (100K rows, INTERP, single-eval splice PR) + +Follow-up to the order-family + select+where landing. Closes two single-eval gaps the previous PR documented as `KNOWN PERF GAP`: + +**Gap 1 — comparator key double-call inside `partial_sort`.** Before this PR, `top_n_by(arr, K, key)` ran `_::less(key(v1), key(v2))` per comparison → 2 indirect lambda dispatches per `cmp`. For pure single-expression keys (the common case, e.g. `$(_) => _.price`), the planner now inlines the key body twice into a comparator block and dispatches to the new `top_n_by_with_cmp(arr, K) <| ` library entry — zero per-comparison lambda dispatch. Descending direction is encoded by flipping the comparator arg order (`less(body[v2], body[v1])`), eliminating the secondary wrapper-lambda the `_descending` family used. Falls back to keyed `top_n_by` when the key has side effects or isn't a single-expression lambda. + +**Gap 2 — projection double-eval in `select + where + terminator`.** Phase 3d inlined `projection` into `predicate` via peel-substitution; lane emitters then *also* cloned `projection` into `valueExpr` → projection evaluated twice per element on ARRAY / ACCUMULATOR / EARLY_EXIT lanes (COUNTER unaffected — no body use). Fix: the where-after-select arm now binds `projection` to a fresh local via a new `preConditionStmts` slot (evaluated per-element, OUTSIDE the if-wrap), then rewrites `projection` to reference that bind. Both predicate (via peel) and valueExpr (via clone) share the single eval. Side-effecty projections still bail to tier 2 (moving them outside the if would visibly fire side effects on filter-rejected elements). COUNTER lane is explicitly excluded — the dedup has no benefit there and the bind decl would regress the single-stmt fast path. + +| Benchmark | Shape | m1 (sql) | m3 (linq) | m3f (prev PR) | m3f (this PR) | Win | +|---|---|---:|---:|---:|---:|---:| +| sort_take | `order_by → take(K)` | 38 | 710 | 56 | **27** | **2.1× over prev / 26× over m3 / faster than m1 SQL** | +| order_take_desc | `order_by_desc → take(K)` | 38 | 704 | 56 | **27** | **2.1× over prev / 26× over m3 / faster than m1 SQL** | +| select_where_order_take | `where → order_by → take(K)` | 36 | 356 | 39 | **24** | **1.6× over prev / 15× over m3 / faster than m1 SQL** | +| select_where_sum (NEW Gap 2) | `select → where → sum` | 37 | 59 | — | **7** | **8.4× over m3 / 5.3× over m1 SQL** | +| select_where_count (regression check) | `select → where → count` (COUNTER, dedup off) | 32 | 58 | 5 | **5** | unchanged (correctly excluded from dedup) | + +The sort/order rows now BEAT `m1` SQLite by ~30%. PR #2707 closed the comparator-throughput gap vs SQL; this PR's inline-key splice closes the per-iteration lambda dispatch gap. + +**Parser bonus:** the multi-arg `$($i(a) : T, $i(b) : T) { ... }` qmacro form failed parse with `30701: block argument is already declared MACRO``TAG` because the parser stamped every `$i(...)` in block-arg position with the literal placeholder name and dup-checked them before macro tag resolution. Fixed in `src/parser/parser_impl.cpp:885` by skipping the dup check when `name_at.tag != nullptr` (genuine post-resolution dups surface as ordinary local-lookup conflicts during type inference). General-purpose fix — usable by any macro that needs to emit a typed block with N tagged-name args. + ## Operator-coverage checklist (parity tests) The 24 benchmarks above cover the most common shapes. The end-game target is one benchmark per `_fold`-applicable scenario in the broader `tests/linq/` operator suite. Tracking the long-tail coverage below; PRs that add splice support for new operators should add a benchmark here if not already present. diff --git a/benchmarks/sql/select_where_sum.das b/benchmarks/sql/select_where_sum.das new file mode 100644 index 000000000..0f693c9b2 --- /dev/null +++ b/benchmarks/sql/select_where_sum.das @@ -0,0 +1,69 @@ +options gen2 +options persistent_heap + +require _common public + +let THRESHOLD = 1000 + +// _select(_.price * 2) |> _where(_ > T) |> sum — where-after-select pattern with the +// sum terminator. SQL: SELECT SUM(price * 2) FROM Cars WHERE price * 2 > T. +// +// m3 (plain LINQ) materializes a projection iterator, then a filter array, then sums. +// m3f (spliced via Phase 3d single-eval) binds the projection to a local once per +// element OUTSIDE the if-wrap, then the rewritten predicate reads the bind AND the +// sum's valueExpr reads the bind — projection (`_.price * 2`) evaluates exactly once +// per element across the splice. Without the dedup it evaluates twice (once in the +// inlined predicate, once in valueExpr) for ARRAY/ACCUMULATOR/EARLY_EXIT lanes. +// +// m1 uses ``query_scalar`` with raw SQL because ``_sql``'s ``_select`` clause only +// accepts a single column or named-tuple shape — arbitrary scalar expressions like +// ``_.price * 2`` aren't representable in the typed DSL. The engine still folds the +// projection into both the WHERE filter and the SUM, so per-row work is identical to +// what the typed form would emit. + +def run_m1(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m1_sql/{n}", n) { + let s = db |> query_scalar("SELECT SUM(price * 2) FROM Cars WHERE price * 2 > {THRESHOLD}", type) + if (s == 0) { + b->failNow() + } + } + } +} + +def run_m3(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3_array/{n}", n) { + let s = arr |> _select(_.price * 2) |> _where(_ > THRESHOLD) |> sum + if (s == 0) { + b->failNow() + } + } +} + +def run_m3f(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3f_array_fold/{n}", n) { + let s = _fold(each(arr)._select(_.price * 2)._where(_ > THRESHOLD).sum()) + if (s == 0) { + b->failNow() + } + } +} + +[benchmark] +def select_where_sum_m1(b : B?) { + run_m1(b, 100000) +} + +[benchmark] +def select_where_sum_m3(b : B?) { + run_m3(b, 100000) +} + +[benchmark] +def select_where_sum_m3f(b : B?) { + run_m3f(b, 100000) +} diff --git a/daslib/linq.das b/daslib/linq.das index 65cb38b0b..4b27fd72c 100644 --- a/daslib/linq.das +++ b/daslib/linq.das @@ -565,6 +565,51 @@ def top_n_descending(var a : iterator; n : int) : array return <- top_n_by_descending(a, n, $(v : TT -&) => v) } +// ============================================================================ +// top_n_by_with_cmp — take a comparator block directly instead of a key +// lambda. The splice planner uses this when an order-by key body is pure and +// inlineable: the body is spliced into the comparator twice (once per side), +// eliminating both the per-comparison lambda dispatch (`cmp(v1, v2)`) AND the +// per-side `key(v)` dispatch nested inside the standard comparator. +// +// Direction is encoded in the comparator: asc emits `less(body[v1], body[v2])`, +// desc emits `less(body[v2], body[v1])`. The output is sorted ascending by the +// comparator, so a reversed comparator yields a descending natural-key order. +// ============================================================================ + +def top_n_by_with_cmp(arr : array; n : int; cmp : block<(v1 : TT -&, v2 : TT -&) : bool>) : array { + //! Returns the ``n`` smallest-per-``cmp`` elements of ``arr``. + //! Pass a reversed comparator to extract the ``n`` largest instead. + var buf : array + if (n <= 0 || empty(arr)) return <- buf + let take_count = min(n, length(arr)) + buf |> reserve(length(arr)) + for (it in arr) { + buf |> push_clone(it) + } + sort_boost::partial_sort(buf, take_count, cmp) + buf |> resize(take_count) + return <- buf +} + +def top_n_by_with_cmp(var a : iterator; n : int; cmp : block<(v1 : TT -&, v2 : TT -&) : bool>) : array { + //! Iterator variant — bounded heap of size ``n`` maintained during scan. + var buf : array + if (n <= 0) return <- buf + for (it in a) { + if (length(buf) < n) { + buf |> push_clone(it) + sort_boost::push_heap(buf, cmp) + } elif (cmp(it, buf[0])) { + sort_boost::pop_heap(buf, cmp) + buf[length(buf) - 1] := it + sort_boost::push_heap(buf, cmp) + } + } + sort(buf, cmp) + return <- buf +} + def unique_key(a) { //! generates unique key of workhorse type for the value static_if (typeinfo is_workhorse(a)) { diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das index ffaa806a8..9b6b4dada 100644 --- a/daslib/linq_fold.das +++ b/daslib/linq_fold.das @@ -133,6 +133,7 @@ var private linqCalls = { "top_n_by" => LinqCall(name = "top_n_by", noToArrayVariant = true), "top_n_descending" => LinqCall(name = "top_n_descending", noToArrayVariant = true), "top_n_by_descending" => LinqCall(name = "top_n_by_descending", noToArrayVariant = true), + "top_n_by_with_cmp" => LinqCall(name = "top_n_by_with_cmp", noToArrayVariant = true), // aggregate "count" => LinqCall(name = "count"), "long_count" => LinqCall(name = "long_count"), @@ -524,6 +525,23 @@ def private wrap_with_condition(var body : Expression?; var cond : Expression?) } } +[macro_function] +def private prepend_precond(var body : Expression?; var preCondStmts : array) : Expression? { + // Splice per-element unconditional binds BEFORE the body. Used by the select+where + // splice arm to bind a pure projection once per element so the rewritten predicate + // (peel-substituted to reference the bind var) and the terminator's valueExpr (cloned + // from `projection`) share that single eval. intermediateBinds, by contrast, go INSIDE + // the if-wrap — correct for chained-select chains with no where. + if (empty(preCondStmts)) return body + var stmts : array + stmts |> reserve(length(preCondStmts) + 1) + for (s in preCondStmts) { + stmts |> push(s) + } + stmts |> push(body) + return stmts_to_expr(stmts) +} + [macro_function] def private append_skip_take_prelude(var preludeStmts : array; var skipExpr : Expression?; var takeExpr : Expression?; skipName, takeCountName : string) { @@ -697,6 +715,7 @@ def private emit_accumulator_lane( var projection : Expression?; var whereCond : Expression?; var intermediateBinds : array; + var preCondStmts : array; var elementType : TypeDeclPtr; srcName, accName, itName, skipName, takeCountName : string; var skipExpr : Expression?; var takeExpr : Expression?; @@ -797,7 +816,7 @@ def private emit_accumulator_lane( } prepend_binds(perMatchStmts, intermediateBinds) wrap_with_skip_take(perMatchStmts, skipExpr, takeExpr, skipName, takeCountName) - var loopBody = wrap_with_condition(stmts_to_expr(perMatchStmts), whereCond) + var loopBody = prepend_precond(wrap_with_condition(stmts_to_expr(perMatchStmts), whereCond), preCondStmts) // Collect all body statements into one list so they share scope when spliced via $b. // Splitting decls / for / return into separate splice tags would put each in its own // sub-block, hiding the accumulator from later statements (caught by AST dump under @@ -847,6 +866,7 @@ def private emit_early_exit_lane( var projection : Expression?; var whereCond : Expression?; var intermediateBinds : array; + var preCondStmts : array; var elementType : TypeDeclPtr; terminatorCall : ExprCall?; srcName, itName, skipName, takeCountName : string; @@ -956,7 +976,7 @@ def private emit_early_exit_lane( } prepend_binds(perMatchStmts, intermediateBinds) wrap_with_skip_take(perMatchStmts, skipExpr, takeExpr, skipName, takeCountName) - var loopBody = wrap_with_condition(stmts_to_expr(perMatchStmts), whereCond) + var loopBody = prepend_precond(wrap_with_condition(stmts_to_expr(perMatchStmts), whereCond), preCondStmts) // Single-$b body so all stmts (skip/take counters + prelude + for + tail) share scope // under one wrapping block. var bodyStmts : array @@ -992,6 +1012,66 @@ def private order_top_n_call_name(orderName : string) : string { return "" } +[macro_function] +def private try_make_inline_cmp(orderKey : Expression?; orderName : string; + elemType : TypeDeclPtr; at : LineInfo) : Expression? { + //! When ``orderKey`` is a single-arg, single-return-statement lambda whose body has + //! no side effects, return a comparator block that inlines the body twice with the + //! lambda argument renamed to ``v1`` and ``v2``. Direction is encoded by emit order: + //! asc → ``_::less(body[v1], body[v2])``; desc → ``_::less(body[v2], body[v1])``. + //! + //! Returns null when the key is multi-statement, has side effects, or isn't a + //! recognizable lambda shape — caller falls through to the lambda-passing emission. + //! + //! What this saves: indirect-dispatch overhead. The original comparator does two + //! `key(v)` indirect lambda calls per comparison (one per side); the inlined version + //! evaluates the body twice as direct expressions (one per side). Net: 2 fewer lambda + //! dispatches per comparison. For trivial keys (`$(_) => _.price` — a field load) + //! that dispatch IS the dominant cost. For expensive keys both forms still evaluate + //! the body twice per comparison, so the relative win shrinks. + //! + //! `has_sideeffects` is a **semantic** gate, not a perf gate: side-effecting bodies + //! can't be safely re-substituted into a fresh syntactic position because the typer- + //! inserted ExprRef2Value wrappers and ordering guarantees aren't preserved across + //! `replaceVariable`. (Schwartzian-style precompute-once is an orthogonal optimization + //! that would help expensive keys; not in scope here.) + if (orderKey == null || !(orderKey is ExprMakeBlock)) return null + var mblk = orderKey as ExprMakeBlock + var blk = mblk._block as ExprBlock + if (blk.arguments |> length != 1 || blk.list |> length != 1 + || !(blk.list[0] is ExprReturn)) return null + var ret = blk.list[0] as ExprReturn + if (ret.subexpr == null || has_sideeffects(ret.subexpr)) return null + let argName = string(blk.arguments[0].name) + var b1 = clone_expression(ret.subexpr) + var b2 = clone_expression(ret.subexpr) + var r1 : Template + r1 |> renameVariable(argName, "v1") + var r2 : Template + r2 |> renameVariable(argName, "v2") + apply_template(r1, b1.at, b1) + apply_template(r2, b2.at, b2) + var cmpExpr : Expression? + if (orderName == "order_by_descending") { + cmpExpr = qmacro(_::less($e(b2), $e(b1))) + } else { + cmpExpr = qmacro(_::less($e(b1), $e(b2))) + } + // Emit untyped block args (`$(v1, v2) { ... }`). The typer infers v1/v2 types from + // the dispatch's block-arg signature (`block<(v1 : TT -&, v2 : TT -&) : bool>` in + // `order(arr, block)` / `top_n_by_with_cmp(arr, n, block)`), and renders them as the + // const-ref shape (`TT const -&`) at the call site — which is what the candidate + // signature matches. Typed-explicitly emission via `$($i(a) : $t(T) -&, ...)` is + // possible after the parser_impl.cpp tagged-block-arg fix (error 30701), but the + // type-flag bookkeeping to make the emitted args match the candidate `TT const -&` + // exactly is fragile (must propagate clone_type, removeConstant, and ref flags + // through a parser path that flattens them differently per modifier order). Untyped + // sidesteps that entirely. + return qmacro($(v1, v2) { + return $e(cmpExpr) + }) +} + [macro_function] def private plan_order_family(var expr : Expression?) : Expression? { //! Phase 3 splice planner for chains containing an order-family operator. @@ -1051,6 +1131,15 @@ def private plan_order_family(var expr : Expression?) : Expression? { let needIterWrap = expr._type.isIterator let topNName = order_top_n_call_name(orderName) let inplaceName = "{orderName}_inplace" + // Inline-key path: when the key is a pure single-expression lambda, splice the body + // twice into a comparator block (asc: `_::less(body[v1], body[v2])`; desc: flip + // arg order). Eliminates the per-comparison `key(v)` indirect dispatch — dominant + // cost on trivial keys like `$(_) => _.field`. Null when key is multi-statement / + // side-effecting / not a recognizable lambda — caller falls through. + var inlineCmp : Expression? + if (hasKey) { + inlineCmp = try_make_inline_cmp(orderKey, orderName, orderElemType, at) + } if (whereCond == null) { // No prefilter — direct call to daslib helper. var topExpr = clone_expression(top) @@ -1059,14 +1148,20 @@ def private plan_order_family(var expr : Expression?) : Expression? { if (takeExpr == null) { // Bare order family — emit the direct call. Same shape as plain LINQ, but via // splice so `_fold` doesn't fall through to tier 2. - if (hasKey) { + if (inlineCmp != null) { + // Inlined comparator dispatches to the asc `order(src, block)` overload — + // direction encoded in the comparator (flip embedded for descending). + emission = qmacro(_::order($e(topExpr), $e(inlineCmp))) + } elif (hasKey) { emission = qmacro($c(orderName)($e(topExpr), $e(orderKey))) } else { emission = qmacro($c(orderName)($e(topExpr))) } } else { // order + take → top_n* dispatch. - if (hasKey) { + if (inlineCmp != null) { + emission = qmacro(_::top_n_by_with_cmp($e(topExpr), $e(takeExpr), $e(inlineCmp))) + } elif (hasKey) { emission = qmacro($c(topNName)($e(topExpr), $e(takeExpr), $e(orderKey))) } else { emission = qmacro($c(topNName)($e(topExpr), $e(takeExpr))) @@ -1116,7 +1211,10 @@ def private plan_order_family(var expr : Expression?) : Expression? { // Sort the prefilter buffer in place and return it. order*_inplace is void // (mutates the buffer in place), so we move the buffer out for the final result. var sortCall : Expression? - if (hasKey) { + if (inlineCmp != null) { + // Asc `order_inplace(buf, block)` overload + embedded-flip cmp for descending. + sortCall = qmacro(_::order_inplace($i(bufName), $e(inlineCmp))) + } elif (hasKey) { sortCall = qmacro($c(inplaceName)($i(bufName), $e(orderKey))) } else { sortCall = qmacro($c(inplaceName)($i(bufName))) @@ -1128,7 +1226,9 @@ def private plan_order_family(var expr : Expression?) : Expression? { } else { // top_n* on the prefilter buffer. var topNCall : Expression? - if (hasKey) { + if (inlineCmp != null) { + topNCall = qmacro(_::top_n_by_with_cmp($i(bufName), $e(takeExpr), $e(inlineCmp))) + } elif (hasKey) { topNCall = qmacro($c(topNName)($i(bufName), $e(takeExpr), $e(orderKey))) } else { topNCall = qmacro($c(topNName)($i(bufName), $e(takeExpr))) @@ -1184,6 +1284,13 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { var whereCond : Expression? var projection : Expression? var intermediateBinds : array + // preConditionStmts evaluate UNCONDITIONALLY per element, BEFORE the where filter — + // used by the select+where splice arm to bind a pure projection once per element so + // both the predicate (which is rewritten to read the bind var via peel-substitution) + // AND the terminator's valueExpr (which clones the projection) share that single + // eval. intermediateBinds, by contrast, prepend INSIDE the if(whereCond) wrap, so + // they evaluate only on matching elements — correct for chained-select bind chains. + var preCondStmts : array var skipExpr : Expression? var takeExpr : Expression? var seenSelect = false @@ -1201,20 +1308,36 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { if (seenSkip || seenTake) return null var predicate : Expression? if (seenSelect) { - // Phase 3d: where-after-select. Substitute the predicate's bound variable - // with the current projection via peel-aware substitution. The substitution - // inlines the projection into the predicate, which would re-evaluate any - // side effects (since the terminator also references projection) — bail to - // tier 2 cascade on side-effecty projections. + // Phase 3d / single-eval: where-after-select. Bind the current projection + // to a fresh local in `preCondStmts` (evaluated per-element, OUTSIDE the + // if-wrap), then rewrite `projection` to reference that local — peel- + // substituted into the predicate AND cloned into the terminator's + // valueExpr. Both reference the bind ⇒ projection evaluates exactly once + // per element. Side-effecty projections still bail to tier 2: moving them + // outside the if would visibly fire side effects on filter-rejected + // elements. // - // KNOWN PERF GAP (deferred to splice-with-cmp follow-up PR): pure projections - // currently re-evaluate per element for ARRAY/ACCUMULATOR/EARLY_EXIT lanes — - // once in the inlined predicate and once in valueExpr. COUNTER lane is unaffected - // (no body use). Fix shape: emit a pre-condition `var v := projection` bind in - // the loop body (outside the if-wrap) and rewrite both predicate and valueExpr - // to reference `v`. Bundled with the `_with_cmp` inline-key follow-up since - // both share the "single-eval splice" theme. + // COUNTER lane (count) is excluded: it never reads valueExpr, so the + // dedup brings no benefit, and the extra per-element bind would regress + // the lane's single-stmt fast path (predicate already inlines projection, + // result is just discarded). if (has_sideeffects(projection)) return null + if (lane != LinqLane.COUNTER) { + let wbName = "`vw`{at.line}`{at.column}`{length(preCondStmts)}" + var projType = clone_type(elementType) + preCondStmts |> push <| qmacro_expr() { + var $i(wbName) : $t(projType) := $e(projection) + } + // Replace projection with a typed ExprVar so downstream typer passes + // can resolve the reference without re-walking the loop body's local + // decls. Untyped ExprVars here propagate `auto` into push_clone / + // accumulator paths and surface as "cannot infer push_clone return + // type" at the typer. + var pvar = new ExprVar(at = at, name := wbName) + pvar._type = clone_type(elementType) + pvar._type.flags.ref = true + projection = pvar + } predicate = fold_linq_cond_peel(cll._0.arguments[1], projection) } else { predicate = fold_linq_cond(cll._0.arguments[1], itName) @@ -1290,7 +1413,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { // COUNTER/ARRAY loopBody construction. if (lane == LinqLane.ACCUMULATOR) return emit_accumulator_lane(lastName, top, projection, whereCond, - intermediateBinds, elementType, srcName, accName, itName, skipName, takeCountName, + intermediateBinds, preCondStmts, elementType, srcName, accName, itName, skipName, takeCountName, skipExpr, takeExpr, at) // Ring 2: early-exit lane — `any` no-pred + no upstream work + no limits + length-bearing // source gets the empty-shortcut; everything else dispatches to the loop emitter. @@ -1301,7 +1424,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { && type_has_length(top._type)) return emit_any_empty_shortcut(top, srcName, at) return emit_early_exit_lane(lastName, top, projection, whereCond, - intermediateBinds, elementType, terminatorCall, srcName, itName, skipName, + intermediateBinds, preCondStmts, elementType, terminatorCall, srcName, itName, skipName, takeCountName, skipExpr, takeExpr, at) } // Build the per-element loop body for COUNTER / ARRAY. Both lanes follow the same shape: @@ -1328,7 +1451,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { } prepend_binds(stmts, intermediateBinds) wrap_with_skip_take(stmts, skipExpr, takeExpr, skipName, takeCountName) - loopBody = wrap_with_condition(stmts_to_expr(stmts), whereCond) + loopBody = prepend_precond(wrap_with_condition(stmts_to_expr(stmts), whereCond), preCondStmts) } else { // Array lane. `push_clone` is the safe append everywhere: for workhorse types it's a // byte copy (same cost as `push`); for non-workhorse it deep-clones, avoiding the @@ -1354,7 +1477,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { } prepend_binds(stmts, intermediateBinds) wrap_with_skip_take(stmts, skipExpr, takeExpr, skipName, takeCountName) - loopBody = wrap_with_condition(stmts_to_expr(stmts), whereCond) + loopBody = prepend_precond(wrap_with_condition(stmts_to_expr(stmts), whereCond), preCondStmts) } if (counterLane) { return emit_counter_lane(top, srcName, accName, itName, skipName, takeCountName, diff --git a/modules/dasHV/CMakeLists.txt b/modules/dasHV/CMakeLists.txt index b7dab946a..cbb93744c 100644 --- a/modules/dasHV/CMakeLists.txt +++ b/modules/dasHV/CMakeLists.txt @@ -42,7 +42,9 @@ IF ((NOT DAS_HV_INCLUDED) AND ((NOT ${DAS_HV_DISABLED}) OR (NOT DEFINED DAS_HV_D SET(HV_LIBRARIES ${DAS_HV_DIR}/hv/$/lib/hv_static.lib) ELSE() find_package(OpenSSL REQUIRED) - SET(HV_LIBRARIES ${DAS_HV_DIR}/hv/$/lib/libhv_static.a) + # libhv ≥ master renames hv_static's OUTPUT_NAME to "hv" on POSIX, + # so the installed file is libhv.a (was libhv_static.a in v1.3.4). + SET(HV_LIBRARIES ${DAS_HV_DIR}/hv/$/lib/libhv.a) SET(OPENSSL_LIBRARIES_FILES OpenSSL::Crypto OpenSSL::SSL) ENDIF() @@ -61,15 +63,22 @@ IF ((NOT DAS_HV_INCLUDED) AND ((NOT ${DAS_HV_DISABLED}) OR (NOT DEFINED DAS_HV_D -DOPENSSL_LIBRARIES="${OPENSSL_CRYPTO_LIBRARY};${OPENSSL_SSL_LIBRARY}" ) IF(DAS_USE_SANITIZER STREQUAL "address" OR DAS_USE_SANITIZER STREQUAL "asan") + IF(MSVC) + SET(_HV_ASAN_FLAG "/fsanitize=address") + ELSE() + SET(_HV_ASAN_FLAG "-fsanitize=address") + ENDIF() LIST(APPEND HV_CMAKE_FLAGS - "-DCMAKE_C_FLAGS=/fsanitize=address" - "-DCMAKE_CXX_FLAGS=/fsanitize=address" + "-DCMAKE_C_FLAGS=${_HV_ASAN_FLAG}" + "-DCMAKE_CXX_FLAGS=${_HV_ASAN_FLAG}" ) ENDIF() + # TODO: switch back to ithewei/libhv upstream tag once + # https://github.com/ithewei/libhv/pull/835 is merged. ExternalProject_Add( LIBHV - URL https://github.com/ithewei/libhv/archive/refs/tags/v1.3.4.tar.gz - URL_HASH SHA256=f0a9a197f90da55cc3ff104f9c7a27cc927f117b6c18613c3292726068588e10 + URL https://github.com/aleksisch/libhv/archive/343437b72fbd0f348abb168d61fe7f1c6c4f4d20.tar.gz + URL_HASH SHA256=1b809d55dc1a637aafecb25e717c4ab302a6733390f43b32244976d6583cd866 DOWNLOAD_EXTRACT_TIMESTAMP TRUE PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libhv CMAKE_ARGS ${HV_CMAKE_FLAGS} diff --git a/mouse-data/docs/how-do-i-write-an-ast-shape-test-that-distinguishes-tier-1-splice-from-tier-2-cascade-in-fold-exprinvoke-matches-both.md b/mouse-data/docs/how-do-i-write-an-ast-shape-test-that-distinguishes-tier-1-splice-from-tier-2-cascade-in-fold-exprinvoke-matches-both.md new file mode 100644 index 000000000..7d7e82eec --- /dev/null +++ b/mouse-data/docs/how-do-i-write-an-ast-shape-test-that-distinguishes-tier-1-splice-from-tier-2-cascade-in-fold-exprinvoke-matches-both.md @@ -0,0 +1,40 @@ +--- +slug: how-do-i-write-an-ast-shape-test-that-distinguishes-tier-1-splice-from-tier-2-cascade-in-fold-exprinvoke-matches-both +title: How do I write an AST-shape test that distinguishes tier 1 splice from tier 2 cascade in `_fold`? `ExprInvoke` matches both. +created: 2026-05-18 +last_verified: 2026-05-18 +links: [] +--- + +**`body_expr is ExprInvoke` is the WEAKEST possible assertion** — both tier-1 splice emissions (in `plan_loop_or_count` / `plan_order_family` / `emit_*_lane`) AND tier-2 `fold_linq_default` emissions wrap their result in `invoke($block, $src)` form. Asserting just `ExprInvoke` passes for both tiers and tells you nothing. + +**Strong-form distinguishers** (use 2-3 of these together): + +``` +t |> equal(1, count_inner_for_loops(body_expr), "single fused for-loop") +t |> equal(0, count_call(body_expr, "where_"), "where_ should be inlined, not called") +t |> equal(0, count_call(body_expr, "select"), "select should be inlined, not called") +``` + +Tier 1 splice emits a single `for` loop with the predicate/projection inlined into the body — zero residual `where_(...)` / `select(...)` calls. Tier 2 `fold_linq_default` emits a multi-statement block: + +``` +var pass_0 = where_to_array(src, pred) +var pass_1 = select_inplace(pass_0, proj) +delete pass_0 +return <- pass_1 +``` + +Multiple `var` decls, `_to_array` / `_inplace` suffixed calls, explicit `delete` between stages. The pattern is distinguishable by counting calls that survive vs. inline. + +**Canonical template:** [tests/linq/test_linq_fold_ast.das:1377 (`test_select_where_count_emits_fused_loop`)](tests/linq/test_linq_fold_ast.das#L1377). Use as the model when adding new tier-1 AST-shape assertions. + +**Why this matters:** Phase 3d (PR #2712) added the select+where splice arm. The pre-Phase-3d tests asserted `body_expr is ExprInvoke` expecting tier-2 fallback, but those chains now splice tier 1. The tests kept passing (both tiers wrap in invoke) but were stale-named and meaningless. Copilot review caught it; the fix was deleting the redundant tests and replacing with strong-form assertions. + +## Questions +- How do I write an AST-shape test that distinguishes tier 1 splice from tier 2 cascade in `_fold`? `ExprInvoke` matches both. +- Why does my `body_expr is ExprInvoke` assertion pass for both tier 1 splice and tier 2 cascade in `_fold` AST tests? +- What's the canonical splice-emission shape I should assert in `test_linq_fold_ast.das` for a new splice arm? + +## Questions +- How do I write an AST-shape test that distinguishes tier 1 splice from tier 2 cascade in `_fold`? `ExprInvoke` matches both. diff --git a/mouse-data/docs/my-macro-substitutes-it-for-a-projection-expression-via-template-replacevariable-it-proj-apply-template-but-the-result-fails-to.md b/mouse-data/docs/my-macro-substitutes-it-for-a-projection-expression-via-template-replacevariable-it-proj-apply-template-but-the-result-fails-to.md index 9652ba343..f55e42132 100644 --- a/mouse-data/docs/my-macro-substitutes-it-for-a-projection-expression-via-template-replacevariable-it-proj-apply-template-but-the-result-fails-to.md +++ b/mouse-data/docs/my-macro-substitutes-it-for-a-projection-expression-via-template-replacevariable-it-proj-apply-template-but-the-result-fails-to.md @@ -2,7 +2,7 @@ slug: my-macro-substitutes-it-for-a-projection-expression-via-template-replacevariable-it-proj-apply-template-but-the-result-fails-to title: My macro substitutes `it` for a projection expression via `Template.replaceVariable("it", proj) + apply_template`, but the result fails to compile with "can only dereference a reference". What's going wrong? created: 2026-05-16 -last_verified: 2026-05-16 +last_verified: 2026-05-18 links: [] --- @@ -18,7 +18,14 @@ Two fixes for substitution: Concrete repro: daslang `linq_fold`'s Phase 2A planner tried to fuse chained `_select|_select` via `substitute_it_for(proj2, "it", proj1)`. proj1 was `it * 2` (where `it` is the typed-and-wrapped loop var), proj2 was `it + 1`. Substituting via Template replaced the inner ExprVar in proj2 but left `ExprRef2Value(it * 2) + 1` — type error. The fix was deferred (chained-select falls through unfolded in Phase 2A) but Phase 2B needs option 2. -See `skills/das_macros.md` "Peel ExprRef2Value before qmatch" for the matcher-side analog. The substitution side has no in-tree helper yet. +See `skills/das_macros.md` "Peel ExprRef2Value before qmatch" for the matcher-side analog. + +**Update 2026-05-18 (PR #2712):** The substitution-side helper has landed as `replaceVariablePeeling` in `daslib/templates_boost.das`. Same signature as `replaceVariable` — populates a new `var2exprPeeling : table` field on `Template`. The `TemplateVisitor` gets a `visitExprRef2Value` override that detects `ExprRef2Value(ExprVar(name))` for any peeling-registered name and returns `clone_expression(replacement)` directly (Option 2 above). First user is `fold_linq_cond_peel` in `daslib/linq_fold.das` for the `_select |> _where |> terminator` splice arm; bails to tier-2 cascade when `has_sideeffects(projection)` to avoid double-evaluation. + +When to choose `replaceVariablePeeling` over `replaceVariable`: any time you substitute into already-typed AST. The typer's `ExprRef2Value` wrappers are invisible in the IDE outline but real in the AST. ## Questions - My macro substitutes `it` for a projection expression via `Template.replaceVariable("it", proj) + apply_template`, but the result fails to compile with "can only dereference a reference". What's going wrong? +- ExprRef2Value blocker — `_select|_where` (where-after-select) in `_fold` splice — macro substitutes `it` for projection via `Template::replaceVariable`, compile error "can only dereference a reference" +- How do I peel `ExprRef2Value` during typed-AST substitution? When should I use `replaceVariablePeeling` over `replaceVariable`? +- Why does my `apply_template` substitution leave a `30921` error around the substituted expression? diff --git a/mouse-data/docs/when-does-daslib-s-order-by-return-iterator-vs-array-and-why-is-to-sequence-move-dangerous-to-wrap-blindly-around-the-result.md b/mouse-data/docs/when-does-daslib-s-order-by-return-iterator-vs-array-and-why-is-to-sequence-move-dangerous-to-wrap-blindly-around-the-result.md new file mode 100644 index 000000000..35b587ea1 --- /dev/null +++ b/mouse-data/docs/when-does-daslib-s-order-by-return-iterator-vs-array-and-why-is-to-sequence-move-dangerous-to-wrap-blindly-around-the-result.md @@ -0,0 +1,40 @@ +--- +slug: when-does-daslib-s-order-by-return-iterator-vs-array-and-why-is-to-sequence-move-dangerous-to-wrap-blindly-around-the-result +title: When does daslib's `order_by` return iterator vs array? And why is `to_sequence_move()` dangerous to wrap blindly around the result? +created: 2026-05-18 +last_verified: 2026-05-18 +links: [] +--- + +**`order_by` has 2 overloads in daslib/linq.das (and same for `order` / `order_descending` / `order_by_descending`):** +- `def order_by(var a : iterator; key) : iterator` ([daslib/linq.das:405](daslib/linq.das#L405)) — iterator in → **iterator out** +- `def order_by(a : array; key) : array` ([daslib/linq.das:412](daslib/linq.das#L412)) — array in → **array out** + +Return-type-mirrors-source-type. Caller picks the overload by what they pass in. + +Contrast with **`top_n_by` / `top_n*` (PR #2707): both overloads return `array` regardless of input shape** — iterator source still returns array. No `top_n_by_iterator` form. + +**Practical implication for splice planners:** `to_sequence_move()` is array-only — it crashes on iterator. So if you have a planner that emits a chain call and then wraps with `to_sequence_move()` when the outer expression was iterator-typed, gate the wrap on the *emission's* output type, not the outer-chain type: + +``` +let emissionIsArray = takeExpr != null /* top_n* always array */ || top._type.isGoodArrayType +if (needIterWrap && emissionIsArray) { + emission = qmacro($e(emission).to_sequence_move()) +} +``` + +Without the gate, a bare `order_by(iter, key)` emission gets `.to_sequence_move()` glued on its iterator result → compile error. + +This was a latent bug in `plan_order_family` (PR #2712 round 1) caught by Copilot review; my tests all used `each(arr)` sources where `peel_each` succeeded and yielded array, masking the iterator-source path. + +## Questions +- When does daslib's `order_by` return iterator vs array? And why is `to_sequence_move()` dangerous to wrap blindly around the result? +- What's the difference between `order_by` and `top_n_by` return types for iterator vs array source? +- Why does my splice planner's `.to_sequence_move()` wrap compile-fail on iterator sources? +- What's the array-only constraint on `to_sequence_move` in linq? + +## See also +- `daslib-order-family-and-top-n-overload-shapes` — companion table + +## Questions +- When does daslib's `order_by` return iterator vs array? And why is `to_sequence_move()` dangerous to wrap blindly around the result? diff --git a/mouse-data/docs/when-does-peel-each-in-daslib-linq-fold-das-unwrap-each-x-to-x-and-what-s-the-design-rationale.md b/mouse-data/docs/when-does-peel-each-in-daslib-linq-fold-das-unwrap-each-x-to-x-and-what-s-the-design-rationale.md new file mode 100644 index 000000000..f6453cf5a --- /dev/null +++ b/mouse-data/docs/when-does-peel-each-in-daslib-linq-fold-das-unwrap-each-x-to-x-and-what-s-the-design-rationale.md @@ -0,0 +1,35 @@ +--- +slug: when-does-peel-each-in-daslib-linq-fold-das-unwrap-each-x-to-x-and-what-s-the-design-rationale +title: When does `peel_each` in daslib/linq_fold.das unwrap `each()` to ``, and what's the design rationale? +created: 2026-05-18 +last_verified: 2026-05-18 +links: [] +--- + +**`peel_each` only unwraps when `x` is a true array (`isGoodArrayType` or `isArray`).** For iterator-typed `x` (e.g., `each(range(N))`, `each(generator())`), it returns `top` unchanged — the `each(...)` wrapper stays. + +Definition: [daslib/linq_fold.das:431-445](daslib/linq_fold.das#L431-L445). + +``` +def private peel_each(var top : Expression?) : Expression? { + if (!(top is ExprCall)) return top + var topCall = top as ExprCall + if (!is_each_call(topCall) || topCall.arguments |> length != 1) return top + let argExpr = topCall.arguments[0] + if ((argExpr == null || argExpr._type == null) + || (!argExpr._type.isGoodArrayType && !argExpr._type.isArray)) return top + return clone_expression(argExpr) +} +``` + +**Why gate on array-ness?** Downstream emitters call `length(src)` for buffer-reserve hints and rely on indexable / random-access semantics (e.g., chunked iteration, slice). An iterator behind the `each` wrapper has neither. Peeling would put the bare iterator in the emission and break those assumptions silently. + +**Implication for splice planner authors:** when you call `top = peel_each(top)`, `top._type` may still be iterator-typed afterward. That changes which library overload your emission dispatches to (`order_by(iter)` vs `order_by(arr)`, etc. — see [[when-does-daslibs-order-by-return-iterator-vs-array]]). Don't assume `peel_each` always gives you an array. + +## Questions +- When does `peel_each` in daslib/linq_fold.das unwrap `each()` to ``, and what's the design rationale? +- Why doesn't `peel_each` unwrap `each(range(N))` or `each(some_generator())`? +- After `peel_each`, can `top._type.isIterator` still be true? + +## Questions +- When does `peel_each` in daslib/linq_fold.das unwrap `each()` to ``, and what's the design rationale? diff --git a/mouse-data/docs/why-does-order-by-v-v-compile-fail-with-function-function-while-order-by-works.md b/mouse-data/docs/why-does-order-by-v-v-compile-fail-with-function-function-while-order-by-works.md new file mode 100644 index 000000000..397679fca --- /dev/null +++ b/mouse-data/docs/why-does-order-by-v-v-compile-fail-with-function-function-while-order-by-works.md @@ -0,0 +1,27 @@ +--- +slug: why-does-order-by-v-v-compile-fail-with-function-function-while-order-by-works +title: Why does `_order_by(@@(v) => -v)` compile-fail with "_::<(function, function)" while `_order_by(-_)` works? +created: 2026-05-18 +last_verified: 2026-05-18 +links: [] +--- + +**`_order_by(EXPR)` is a daslib comprehension macro — `EXPR` is a key-extraction expression using the `_` placeholder for the element, NOT a lambda or function pointer.** + +The macro expands `_order_by(EXPR)` to roughly `order_by(iter, $($) => EXPR)` — it wraps EXPR into a block that produces the key per element. So: + +- ✅ `_order_by(-_)` → emits `order_by(iter, $($) => -$)` → key = `-element` +- ✅ `_order_by(_.price)` → emits `order_by(iter, $($) => $.price)` → key = field access +- ✅ `_order_by(_ * 2)` → emits `order_by(iter, $($) => $ * 2)` +- ❌ `_order_by(@@(v) => -v)` → emits `order_by(iter, $($) => @@(v) => -v)` — the inner block returns a FUNCTION POINTER, not a value. Then `_::less(key(v1), key(v2))` tries to compare two function pointers → error 30341. + +Same rule for `_order_by_descending`, `_select`, `_where`, and every other comprehension form in `daslib/linq.das` that takes a `_`-placeholder expression. + +The `top_n_by(arr, K, key)` direct call is different — `key` IS a function/lambda parameter. There you pass `@@(v : int) => -v` or `$(v : int) => -v`. The two forms only collide when you write `_order_by(@@...)` (comprehension form taking a key expression but getting a function literal). + +**How to spot the diagnostic:** error `30341: no matching functions or generics: _::<(function<...>, function<...>)` with the call stack pointing into `linq.das` `order_by` instantiation. The "comparing two functions" is the tell. + +## Questions +- Why does `_order_by(@@(v) => -v)` compile-fail with "_::<(function, function)" while `_order_by(-_)` works? +- What's the difference between `_order_by(EXPR)` (comprehension form) and `order_by(arr, key)` (direct call) for the key argument? +- What does the `_` placeholder mean in daslib comprehension forms like `_select` / `_where` / `_order_by`? diff --git a/src/parser/parser_impl.cpp b/src/parser/parser_impl.cpp index 9ff906718..dd872223a 100644 --- a/src/parser/parser_impl.cpp +++ b/src/parser/parser_impl.cpp @@ -882,7 +882,14 @@ namespace das { for ( auto pDecl : *list ) { if ( pDecl->pTypeDecl ) { for ( const auto & name_at : *pDecl->pNameList ) { - if ( !closure->findArgument(name_at.name) ) { + // Macro-tagged names (`$i(expr)` in block-arg position) all parse to the + // literal placeholder "``MACRO``TAG``"; the actual name is resolved later + // when the macro processor substitutes the tag expression. Skip the dup + // check for tagged names so multi-arg lists like + // `$($i(a) : T, $i(b) : T) { ... }` aren't false-positive at parse time. + // After resolution, duplicate names surface as ordinary local-lookup + // conflicts during type inference. + if ( name_at.tag || !closure->findArgument(name_at.name) ) { VariablePtr pVar = new Variable(); pVar->name = name_at.name; pVar->aka = name_at.aka; diff --git a/tests/linq/test_linq_fold_ast.das b/tests/linq/test_linq_fold_ast.das index 1533d5e39..20fe25858 100644 --- a/tests/linq/test_linq_fold_ast.das +++ b/tests/linq/test_linq_fold_ast.das @@ -734,6 +734,69 @@ def count_op1(expr : Expression?; op : string) : int { return n } +// Count ExprConstInt(value) occurrences anywhere in the expression tree. Used by the +// select+where dedup tests to prove that a pure projection's signature constant +// (`_ * 7919`) appears exactly once in the emission (in the projection-bind statement), +// rather than twice (inlined into predicate + cloned into valueExpr). +def count_const_int(expr : Expression?; value : int) : int { + if (expr == null) return 0 + var n = 0 + if (expr is ExprConstInt && (expr as ExprConstInt).value == value) { + n ++ + } + if (expr is ExprBlock) { + let b = expr as ExprBlock + for (s in b.list) { + n += count_const_int(s, value) + } + for (s in b.finalList) { + n += count_const_int(s, value) + } + } elif (expr is ExprFor) { + let f = expr as ExprFor + for (s in f.sources) { + n += count_const_int(s, value) + } + n += count_const_int(f.body, value) + } elif (expr is ExprIfThenElse) { + let i = expr as ExprIfThenElse + n += count_const_int(i.cond, value) + n += count_const_int(i.if_true, value) + n += count_const_int(i.if_false, value) + } elif (expr is ExprOp2) { + let o = expr as ExprOp2 + n += count_const_int(o.left, value) + n += count_const_int(o.right, value) + } elif (expr is ExprOp1) { + let o = expr as ExprOp1 + n += count_const_int(o.subexpr, value) + } elif (expr is ExprCall) { + let c = expr as ExprCall + for (a in c.arguments) { + n += count_const_int(a, value) + } + } elif (expr is ExprMakeBlock) { + let mb = expr as ExprMakeBlock + n += count_const_int(mb._block, value) + } elif (expr is ExprInvoke) { + let inv = expr as ExprInvoke + for (a in inv.arguments) { + n += count_const_int(a, value) + } + } elif (expr is ExprReturn) { + let r = expr as ExprReturn + n += count_const_int(r.subexpr, value) + } elif (expr is ExprLet) { + let l = expr as ExprLet + for (v in l.variables) { + if (v != null && v.init != null) { + n += count_const_int(v.init, value) + } + } + } + return n +} + // Counts top-level `var` declarations in the outer block of an invoke wrapper. // sum/long_count emit one accumulator; min/max emit two (first flag + best); average // emits two (sum acc + count). @@ -1160,9 +1223,10 @@ def target_where_order_by_take_splices_fold() : array { } [test] -def test_order_by_take_emits_top_n_by(t : T?) { - // `order_by |> take(K)` splices via plan_order_family to a direct top_n_by(src, K, key) call. - // No invoke wrapper, no order_by call in the emission. +def test_order_by_take_emits_top_n_by_with_cmp(t : T?) { + // `order_by(key) |> take(K)` with an inlineable key body now splices to + // top_n_by_with_cmp(src, K, $(v1, v2) => _::less(body[v1], body[v2])) — the comparator + // block embeds the key body twice, eliminating the per-comparison key() dispatch. ast_gc_guard() { var func = find_module_function_via_rtti(compiling_module(), @@target_order_by_take_splices_fold) if (func == null) return @@ -1171,14 +1235,18 @@ def test_order_by_take_emits_top_n_by(t : T?) { return <- $e(body_expr) } t |> success(r.matched, "should have return expression") - t |> success(count_call(body_expr, "top_n_by") >= 1, "should emit a top_n_by call") + t |> success(count_call(body_expr, "top_n_by_with_cmp") >= 1, "should emit a top_n_by_with_cmp call") + t |> equal(0, count_call(body_expr, "top_n_by"), "should not emit non-cmp top_n_by") t |> equal(0, count_call(body_expr, "order_by"), "should not emit order_by") t |> equal(0, count_call(body_expr, "take"), "should not emit take") } } [test] -def test_order_by_descending_take_emits_top_n_by_descending(t : T?) { +def test_order_by_descending_take_emits_top_n_by_with_cmp(t : T?) { + // `order_by_descending(key) |> take(K)` with an inlineable key dispatches to the same + // top_n_by_with_cmp entry as ascending — direction is encoded by flipping the + // comparator argument order (`_::less(body[v2], body[v1])`), no `_descending` helper. ast_gc_guard() { var func = find_module_function_via_rtti(compiling_module(), @@target_order_by_descending_take_splices_fold) @@ -1188,8 +1256,10 @@ def test_order_by_descending_take_emits_top_n_by_descending(t : T?) { return <- $e(body_expr) } t |> success(r.matched, "should have return expression") - t |> success(count_call(body_expr, "top_n_by_descending") >= 1, - "should emit a top_n_by_descending call") + t |> success(count_call(body_expr, "top_n_by_with_cmp") >= 1, + "should emit a top_n_by_with_cmp call (flipped comparator embeds direction)") + t |> equal(0, count_call(body_expr, "top_n_by_descending"), + "should not emit the key-taking top_n_by_descending") t |> equal(0, count_call(body_expr, "order_by_descending"), "should not emit order_by_descending") } @@ -1197,7 +1267,8 @@ def test_order_by_descending_take_emits_top_n_by_descending(t : T?) { [test] def test_bare_order_by_emits_direct_call(t : T?) { - // Bare `order_by(key)` splices to a direct order_by call (no invoke wrapper, no top_n). + // Bare `order_by(key)` with an inlineable key dispatches to the asc `order(src, cmp)` + // block-taking overload — the key body is spliced into the comparator twice. ast_gc_guard() { var func = find_module_function_via_rtti(compiling_module(), @@target_bare_order_by_splices_fold) if (func == null) return @@ -1206,14 +1277,16 @@ def test_bare_order_by_emits_direct_call(t : T?) { return <- $e(body_expr) } t |> success(r.matched, "should have return expression") - t |> success(count_call(body_expr, "order_by") >= 1, "should emit an order_by call") + t |> success(count_call(body_expr, "order") >= 1, "should emit an order(src, cmp) call") + t |> equal(0, count_call(body_expr, "order_by"), "should not emit the key-taking order_by") t |> equal(0, count_call(body_expr, "top_n_by"), "should not emit top_n_by") } } [test] -def test_where_order_by_emits_fused_loop(t : T?) { - // `where |> order_by` splices into a fused prefilter loop + order_by_inplace on the buffer. +def test_where_order_by_emits_fused_loop_with_inline_cmp(t : T?) { + // `where |> order_by(key)` splices into a fused prefilter loop + order_inplace(buf, cmp) + // when the key is inlineable — the comparator embeds the key body twice. ast_gc_guard() { var func = find_module_function_via_rtti(compiling_module(), @@target_where_order_by_splices_fold) if (func == null) return @@ -1223,14 +1296,16 @@ def test_where_order_by_emits_fused_loop(t : T?) { } t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper for fused emission") t |> equal(1, count_inner_for_loops(body_expr), "single fused prefilter loop") - t |> success(count_call(body_expr, "order_by_inplace") >= 1, "should call order_by_inplace on the buffer") + t |> success(count_call(body_expr, "order_inplace") >= 1, "should call order_inplace(buf, cmp)") + t |> equal(0, count_call(body_expr, "order_by_inplace"), "should not emit the key-taking order_by_inplace") t |> success(count_call(body_expr, "push_clone") >= 1, "should push_clone into the buffer") } } [test] -def test_where_order_by_take_emits_fused_top_n(t : T?) { - // `where |> order_by |> take(K)` splices into a fused prefilter loop + top_n_by on the buffer. +def test_where_order_by_take_emits_fused_top_n_with_cmp(t : T?) { + // `where |> order_by(key) |> take(K)` splices into a fused prefilter loop + + // top_n_by_with_cmp(buf, K, cmp) when the key is inlineable. ast_gc_guard() { var func = find_module_function_via_rtti(compiling_module(), @@target_where_order_by_take_splices_fold) @@ -1241,7 +1316,8 @@ def test_where_order_by_take_emits_fused_top_n(t : T?) { } t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper for fused emission") t |> equal(1, count_inner_for_loops(body_expr), "single fused prefilter loop") - t |> success(count_call(body_expr, "top_n_by") >= 1, "should call top_n_by on the buffer") + t |> success(count_call(body_expr, "top_n_by_with_cmp") >= 1, "should call top_n_by_with_cmp on the buffer") + t |> equal(0, count_call(body_expr, "top_n_by"), "should not emit non-cmp top_n_by") t |> success(count_call(body_expr, "push_clone") >= 1, "should push_clone into the buffer") } } @@ -1366,3 +1442,113 @@ def test_select_where_to_array_correct_result(t : T?) { } } +// ── Gap 2 — select + where projection dedup (single-eval splice) ─────── + +[export, marker(no_coverage)] +def target_select_where_sum_dedup() : int { + // Pure projection with a distinctive constant (7919, prime). After the splice the + // constant appears EXACTLY ONCE in the body — in the projection-bind statement. + // Without the dedup the constant would appear twice: once inlined into the predicate + // via peel-substitution, and once cloned into the sum's valueExpr. + return _fold(each([1, 2, 3, 4, 5])._select(_ * 7919)._where(_ > 0).sum()) +} + +[test] +def test_select_where_projection_dedup(t : T?) { + // Single-eval splice: the projection constant 7919 must appear exactly once in the + // body — proves the projection is bound to a local once per element rather than + // inlined twice (once in predicate, once in valueExpr). + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_select_where_sum_dedup) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched, "should have return expression") + t |> equal(1, count_const_int(body_expr, 7919), + "projection constant should appear exactly once (single-eval dedup)") + t |> equal(1, count_inner_for_loops(body_expr), "single fused for-loop") + t |> equal(0, count_call(body_expr, "select"), "select should be inlined") + t |> equal(0, count_call(body_expr, "where_"), "where_ should be inlined") + } +} + +[test] +def test_select_where_dedup_correct_result(t : T?) { + t |> run("select+where dedup preserves arithmetic") @(t : T?) { + // [1,2,3,4,5] * 7919 = [7919, 15838, 23757, 31676, 39595], all > 0, + // sum = 7919 * (1+2+3+4+5) = 7919 * 15 = 118785 + t |> equal(118785, target_select_where_sum_dedup()) + } +} + +[export, marker(no_coverage)] +def target_select_where_impure_falls_to_tier2() : int { + // Impure projection (side-effecting function call) must NOT be moved outside the + // where if-wrap — that would visibly fire side effects on filter-rejected elements. + // The splice arm bails to tier 2 cascade in this case. + return _fold(each([1, 2, 3, 4, 5])._select(side_effect_select_proj(_))._where(_ > 5).count()) +} + +[test] +def test_select_where_impure_correctness(t : T?) { + t |> run("impure projection in select+where preserves per-element call count") @(t : T?) { + // Plain LINQ semantics: select runs the projection for EVERY element before the + // where filter sees the result. 5 input elements → 5 projection invocations, + // regardless of how many survive the filter. + g_select_count_proj_hits = 0 + let n = target_select_where_impure_falls_to_tier2() + // projected: [2,4,6,8,10]; filtered >5: [6,8,10]; count = 3 + t |> equal(3, n) + t |> equal(5, g_select_count_proj_hits, "projection must fire once per source element") + } +} + +// ── Gap 1 — inlineable key fallback when key is side-effecting ─────────── + +var g_order_key_hits = 0 + +def side_effect_order_key(x : int) : int { + g_order_key_hits ++ + return -x +} + +[export, marker(no_coverage)] +def target_order_by_take_side_effecting_key() : array { + // Side-effecting key — try_make_inline_cmp's has_sideeffects gate must bail; the + // splice falls back to the keyed top_n_by entry point (not _with_cmp). + return <- _fold(each([3, 1, 4, 1, 5, 9, 2, 6])._order_by(side_effect_order_key(_)).take(3).to_array()) +} + +[test] +def test_order_by_take_side_effecting_key_falls_back(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), + @@target_order_by_take_side_effecting_key) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return <- $e(body_expr) + } + t |> success(r.matched, "should have return expression") + t |> success(count_call(body_expr, "top_n_by") >= 1, + "side-effecting key should fall back to keyed top_n_by") + t |> equal(0, count_call(body_expr, "top_n_by_with_cmp"), + "side-effecting key must NOT use the inline-cmp variant") + } +} + +[test] +def test_order_by_take_side_effecting_key_correct_result(t : T?) { + t |> run("side-effecting order_by key produces correct top-N by -key") @(t : T?) { + let got <- target_order_by_take_side_effecting_key() + // -key sorts ascending by -x ⇒ descending by x; take 3 largest. + let expected = [9, 6, 5] + t |> equal(length(got), length(expected)) + for (v, e in got, expected) { + t |> equal(v, e) + } + } +} +