diff --git a/benchmarks/sql/LINQ.md b/benchmarks/sql/LINQ.md index 8868addf0..54506d59c 100644 --- a/benchmarks/sql/LINQ.md +++ b/benchmarks/sql/LINQ.md @@ -84,10 +84,10 @@ Notation: `—` means the variant is not applicable for this benchmark (operator | Benchmark | Shape | m3f_old | m3f (Phase 2A) | Delta | |---|---|---:|---:|---| -| count_aggregate | `where → count` | 5 | 5 | parity (same counter loop) | -| chained_where | `where → where → count` | 17 | 8 | **2.1× faster** (fuses chained wheres into single `&&` predicate) | -| select_count | `select → count` | 15 | 2 | **7.5× faster** (counter lane evaluates projection per iteration to preserve side effects; optimizer DCEs pure projections, no array materialization) | -| to_array_filter | `where → select → to_array` | 11 | 11 | parity (after `each()` peel + reserve + workhorse `push`) | +| count_aggregate | `where → count` | 5 | 4 | parity-ish (1ns improvement from `each()` peel) | +| chained_where | `where → where → count` | 17 | 6 | **2.8× faster** (fuses chained wheres into single `&&` predicate; small gain from peel + const-ref param) | +| select_count | `select → count` | 15 | 0 | **∞ faster** — when the projection is pure (`has_sideeffects == false`) and the source has length, the counter lane shortcuts to `length(src)` and elides the loop entirely. See [macro_boost::has_sideeffects](../../daslib/macro_boost.das) and `linq_fold.das:plan_loop_or_count` | +| to_array_filter | `where → select → to_array` | 11 | 10 | parity (after `each()` peel + reserve + workhorse `push`) | Shapes outside Phase 2A scope now compile to plain linq (`m3f ≈ m3`). This is an intentional regression vs the historical `_old_fold` numbers — Boris's call ("we let it fall through unfolded, and we see performance issues. im ok being slower until we fix") as the forcing function for Phase 2B+. The previous "m3f = m3f_old (identical by construction)" baseline assumed `_fold` would dispatch to `_old_fold` on the unmatched path; Phase 2A drops that dispatch. diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das index d4eaa83e6..39ebbde3f 100644 --- a/daslib/linq_fold.das +++ b/daslib/linq_fold.das @@ -534,18 +534,30 @@ def private type_has_length(t : TypeDecl?) : bool { } [macro_function] -def private peel_each_length_source(var top : Expression?) : Expression? { - // If `top` is `each()` and `` has a length-supporting type, return `` so - // the emitted loop iterates the underlying container directly — lets the array-lane - // reserve fire and avoids the iterator wrapper. Iteration semantics are preserved - // (`for (it in each(arr))` and `for (it in arr)` yield the same element refs). - // Restricted to length-supporting types to keep `reserve(length(src))` valid. +def private is_each_call(call : ExprCall?) : bool { + //! `each` in daslib/builtin.das is generic, so the resolved `func.name` on a typed + //! call is the mangled instance name (e.g. `builtin\`each\`30908...`). The generic's + //! original name lives in `func.fromGeneric.name`. Match either. + if (call == null || call.func == null) return false + return (call.func.name == "each" + || (call.func.fromGeneric != null && call.func.fromGeneric.name == "each")) +} + +[macro_function] +def private peel_each(var top : Expression?) : Expression? { + // Unwrap `each()` to `` when `` is a true array (or fixed-size array). + // Iteration semantics are preserved: `for it in ` implicitly re-wraps via the + // same `each` overload. We gate on array-ness because peeling an iterator-typed + // argument (e.g. `each(range(10))`, `each(generator())`) would put the iterator in + // place — the downstream length shortcut and reserve-by-length hints assume an + // indexable source. Only peel when we can prove that's true. if (!(top is ExprCall)) return top var topCall = top as ExprCall - if (topCall.func == null || topCall.func.name != "each" - || topCall.arguments |> length != 1 - || !type_has_length(topCall.arguments[0]._type)) return top - return clone_expression(topCall.arguments[0]) + if (!is_each_call(topCall) || topCall.arguments |> length != 1) return top + let argExpr = topCall.arguments[0] + if ((argExpr == null || argExpr._type == null) + || (!argExpr._type.isGoodArrayType && !argExpr._type.isArray)) return top + return clone_expression(argExpr) } [macro_function] @@ -556,7 +568,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { // with a plain for-loop. Returns null for anything else — caller falls through unfolded. var (top, calls) = flatten_linq(expr) if (empty(calls)) return null - top = peel_each_length_source(top) + top = peel_each(top) let lastName = calls.back()._1.name if (lastName != "count" && lastName != "where_" && lastName != "select") return null let counterLane = lastName == "count" @@ -569,6 +581,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { var projection : Expression? var intermediateBinds : array var seenSelect = false + var allProjectionsPure = true var elementType = clone_type(top._type.firstType) var lastBindName = itName for (i in 0 .. intermediateCount) { @@ -593,6 +606,9 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { if (projection != null) { let prevWorkhorse = projection._type != null && projection._type.isWorkhorseType if (!prevWorkhorse) return null // chained non-workhorse selects — Phase 2B + if (has_sideeffects(projection)) { + allProjectionsPure = false + } let bindName = "`v`{at.line}`{at.column}`{length(intermediateBinds)}" intermediateBinds |> push <| qmacro_expr() { var $i(bindName) = $e(projection) @@ -606,6 +622,26 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { return null } } + if (projection != null && has_sideeffects(projection)) { + allProjectionsPure = false + } + // Counter-lane shortcut: when there's no filter and every projection in the chain is + // pure, the count is simply `length(source)`. Skip the loop entirely — no per-element + // increments, no per-element side-effect evaluation. Gated on `type_has_length` so we + // only emit `length(src)` when it's statically resolvable. + if (counterLane && whereCond == null && allProjectionsPure + && type_has_length(top._type)) { + var topExpr = clone_expression(top) + topExpr.genFlags.alwaysSafe = true + var res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) { + return length($i(srcName)) + }, $e(topExpr))) + res.force_at(at) + res.force_generated(true) + let blk = (res as ExprInvoke).arguments[0] as ExprMakeBlock + (blk._block as ExprBlock).arguments[0].flags.can_shadow = true + return res + } // Build the per-element loop body. var loopBody : Expression? if (counterLane) { @@ -618,7 +654,10 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { for (b in intermediateBinds) { sideEffectStmts |> push(b) } - if (projection != null) { + // Bind the final projection only when it might have side effects. Pure projections + // (the common case — `_._field * 2`) can be elided entirely; no need to rely on + // the optimizer to DCE a dead store afterwards. + if (projection != null && has_sideeffects(projection)) { let finalBindName = "`vfinal`{at.line}`{at.column}" sideEffectStmts |> push <| qmacro_expr() { var $i(finalBindName) = $e(projection) @@ -713,14 +752,31 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { var topExpr = clone_expression(top) topExpr.genFlags.alwaysSafe = true var res : Expression? + // Pick the block-parameter typedecl modifier by source shape: + // - iterator (rvalue, e.g. `each(range(10))`) — strip `-const` so the body can + // consume the iterator. Without the strip, daslang's typer reports + // "can't iterate over const iterator". + // - container with length (array/table/string/range/fixed-array) — keep modifiers + // so a `const&` source (e.g. `let arr <-`) matches the param exactly. + let topIsIter = top._type != null && top._type.isIterator if (counterLane) { - res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) { - var $i(accName) = 0 - for ($i(itName) in $i(srcName)) { - $e(loopBody) - } - return $i(accName) - }, $e(topExpr))) + if (topIsIter) { + res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) { + var $i(accName) = 0 + for ($i(itName) in $i(srcName)) { + $e(loopBody) + } + return $i(accName) + }, $e(topExpr))) + } else { + res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) { + var $i(accName) = 0 + for ($i(itName) in $i(srcName)) { + $e(loopBody) + } + return $i(accName) + }, $e(topExpr))) + } } else { let isIter = expr._type.isIterator // Pre-reserve the accumulator to the source's length when the source has a known @@ -736,7 +792,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { return <- $i(accName).to_sequence_move() }, $e(topExpr))) } elif (sourceHasLength) { - res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) { + res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) { var $i(accName) : array<$t(elementType)> $i(accName) |> reserve(length($i(srcName))) for ($i(itName) in $i(srcName)) { diff --git a/daslib/macro_boost.das b/daslib/macro_boost.das index 02cb2923b..bac8e2c2f 100644 --- a/daslib/macro_boost.das +++ b/daslib/macro_boost.das @@ -149,3 +149,166 @@ def public collect_labels(expr : ExpressionPtr) { return <- res } +[macro_function] +def public has_sideeffects(expr : Expression?) : bool { + //! Conservative side-effect detection. Returns true when the expression has — or + //! might have — side effects. Returns false ONLY when provably pure (no function + //! calls, no heap allocation, no container mutation). + //! + //! Intended for macro-time elision of discardable evaluations. + //! Callers treat false as a promise; true is the safe default — when in doubt, true. + // null / compiler-tagged-pure / variable reads / constant literals — leaf, safe. + if (expr == null || expr.flags.noSideEffects + || expr is ExprVar + || expr is ExprConstInt || expr is ExprConstInt8 || expr is ExprConstInt16 + || expr is ExprConstInt64 || expr is ExprConstUInt || expr is ExprConstUInt8 + || expr is ExprConstUInt16 || expr is ExprConstUInt64 || expr is ExprConstFloat + || expr is ExprConstDouble || expr is ExprConstBool || expr is ExprConstString + || expr is ExprConstPtr || expr is ExprConstRange || expr is ExprConstURange + || expr is ExprConstRange64 || expr is ExprConstURange64 + || expr is ExprConstEnumeration || expr is ExprConstBitfield) return false + // Member access — recurse into operand. + if (expr is ExprField) return has_sideeffects((expr as ExprField).value) + if (expr is ExprSafeField) return has_sideeffects((expr as ExprSafeField).value) + if (expr is ExprSwizzle) return has_sideeffects((expr as ExprSwizzle).value) + // Pointer / reference artifacts. + if (expr is ExprRef2Value) return has_sideeffects((expr as ExprRef2Value).subexpr) + if (expr is ExprRef2Ptr) return has_sideeffects((expr as ExprRef2Ptr).subexpr) + if (expr is ExprPtr2Ref) return has_sideeffects((expr as ExprPtr2Ref).subexpr) + if (expr is ExprAddr) return false + // Type / variant checks. + if (expr is ExprIs) return has_sideeffects((expr as ExprIs).subexpr) + if (expr is ExprIsVariant) return has_sideeffects((expr as ExprIsVariant).value) + if (expr is ExprAsVariant) return has_sideeffects((expr as ExprAsVariant).value) + if (expr is ExprSafeAsVariant) return has_sideeffects((expr as ExprSafeAsVariant).value) + // Cast — recurse. + if (expr is ExprCast) return has_sideeffects((expr as ExprCast).subexpr) + // Compile-time meta. + if (expr is ExprTypeInfo || expr is ExprTypeDecl || expr is ExprTag) return false + // Subscripts. + if (expr is ExprAt) { + let at_e = expr as ExprAt + // tables auto-insert on missing key — unsafe; arrays/strings safe (read-only). + if (at_e.subexpr == null || at_e.subexpr._type == null + || at_e.subexpr._type.isGoodTableType) return true + return has_sideeffects(at_e.subexpr) || has_sideeffects(at_e.index) + } + if (expr is ExprSafeAt) { + let sat = expr as ExprSafeAt + return has_sideeffects(sat.subexpr) || has_sideeffects(sat.index) + } + // Null coalescing. + if (expr is ExprNullCoalescing) { + let nc = expr as ExprNullCoalescing + return has_sideeffects(nc.subexpr) || has_sideeffects(nc.defaultValue) + } + // String builder — string heap allocation is no-op by compiler; recurse into operands. + if (expr is ExprStringBuilder) { + let sb = expr as ExprStringBuilder + for (e in sb.elements) { + if (has_sideeffects(e)) return true + } + return false + } + // key_exists is a pure container read. + if (expr is ExprKeyExists) { + let ke = expr as ExprKeyExists + for (a in ke.arguments) { + if (has_sideeffects(a)) return true + } + return false + } + // Function-call-shaped expressions: ExprCall (regular call) and ExprOp1/ExprOp2/ExprOp3 + // (operators, which also resolve to a function). Two-layer check: + // + // 1. Mutation ops (`++`, `--`, `+=`, `-=`, …) are unconditionally unsafe — + // blacklisted up front, regardless of how the resolved builtin happens to be + // flagged. Catches builtins that the C++ side forgot to mark with + // `knownSideEffects`/`unsafeOperation`. + // 2. Trust `func.flags` when `func != null` — covers user-defined operator + // overloads (e.g. `struct Foo { def operator +(...) }`), which fall through + // `func_has_sideeffects` as non-builtin → unsafe. Fall back to the op-name + // allowlist only when `func == null` (typer left it unresolved, e.g. after + // partial constant folding). `/` and `%` stay UNSAFE (div-by-zero panic; + // design decision). + // + // `is`/`as` on handled types is EXACT-rtti (see CLAUDE.md), so each shape needs its + // own branch — can't cast ExprOp2 to ExprCallFunc even though the C++ class inherits. + if (expr is ExprOp1) { + let e1 = expr as ExprOp1 + // func != null → trust func flags (catches user overloads); func == null → fall + // back to op-name allowlist (handles partial-folding artifacts). Mutation ops + // are unconditionally unsafe (in case a C++ builtin missed the side-effect flag). + if (is_mutation_op1(e1.op) + || (e1.func != null && func_has_sideeffects(e1.func)) + || (e1.func == null && !is_safe_op1(e1.op))) return true + return has_sideeffects(e1.subexpr) + } + if (expr is ExprOp2) { + let e2 = expr as ExprOp2 + if (is_mutation_op2(e2.op) || e2.op == "/" || e2.op == "%" + || (e2.func != null && func_has_sideeffects(e2.func)) + || (e2.func == null && !is_safe_op2(e2.op))) return true + return has_sideeffects(e2.left) || has_sideeffects(e2.right) + } + if (expr is ExprOp3) { + let e3 = expr as ExprOp3 + // ExprOp3 is the only ternary `?:` in daslang — pure if operands pure. + return has_sideeffects(e3.subexpr) || has_sideeffects(e3.left) || has_sideeffects(e3.right) + } + if (expr is ExprCall) { + let ec = expr as ExprCall + if (func_has_sideeffects(ec.func)) return true + for (a in ec.arguments) { + if (has_sideeffects(a)) return true + } + return false + } + // Default: unknown → unsafe. + return true +} + +[macro_function] +def private func_has_sideeffects(f : Function?) : bool { + //! True when calling `f` may have side effects. Allowlists builtins + //! (`flags.builtIn`) without `knownSideEffects` or `unsafeOperation`. + return (f == null || !f.flags.builtIn + || f.flags.knownSideEffects || f.flags.unsafeOperation) +} + +[macro_function] +def private is_safe_op1(op : das_string) : bool { + //! Unary operators that are pure on workhorse types — no overflow trap, no mutation. + //! Excludes `++` / `--` (handled by is_mutation_op1). + return op == "-" || op == "!" || op == "~" || op == "+" +} + +[macro_function] +def private is_safe_op2(op : das_string) : bool { + //! Binary operators that are pure on workhorse types. Excludes `/`, `%` (div-by-zero + //! panic — design decision) and all compound-assignment ops (handled by is_mutation_op2). + return (op == "+" || op == "-" || op == "*" + || op == "==" || op == "!=" || op == "<" || op == "<=" || op == ">" || op == ">=" + || op == "&" || op == "|" || op == "^" || op == "<<" || op == ">>" + || op == "&&" || op == "||") +} + +[macro_function] +def private is_mutation_op1(op : das_string) : bool { + //! Unary operators that mutate their operand. Unconditionally unsafe — bypasses any + //! flag check on the resolved builtin (in case the C++ side forgot to mark it). + //! `++` / `--` are prefix; `+++` / `---` are the daslang AST op-strings for postfix + //! increment/decrement (the trailing-plus / trailing-minus naming). + return op == "++" || op == "--" || op == "+++" || op == "---" +} + +[macro_function] +def private is_mutation_op2(op : das_string) : bool { + //! Compound-assignment operators (mutate the left operand). Same unconditional-unsafe + //! treatment as is_mutation_op1. + return (op == "+=" || op == "-=" || op == "*=" || op == "/=" || op == "%=" + || op == "&=" || op == "|=" || op == "^=" + || op == "<<=" || op == ">>=" + || op == "&&=" || op == "||=" || op == "^^=") +} + diff --git a/tests/aot/CMakeLists.txt b/tests/aot/CMakeLists.txt index 3d08dcdbc..b19f27d84 100644 --- a/tests/aot/CMakeLists.txt +++ b/tests/aot/CMakeLists.txt @@ -270,6 +270,17 @@ FILE(GLOB AOT_MACRO_CALL_FILES RELATIVE ${PROJECT_SOURCE_DIR} CONFIGURE_DEPENDS # by the actual test files and don't need standalone AOT entries. list(FILTER AOT_MACRO_CALL_FILES EXCLUDE REGEX "/_") +# AOT for macro_boost test files +FILE(GLOB AOT_MACRO_BOOST_FILES RELATIVE ${PROJECT_SOURCE_DIR} CONFIGURE_DEPENDS "tests/macro_boost/*.das") +# Exclude the call_macro probe helper (prefixed with `_`); it's required transitively +# by the actual test file. +list(FILTER AOT_MACRO_BOOST_FILES EXCLUDE REGEX "/_") + +# Macro_boost test module files (probe call_macro required transitively by tests) +SET(AOT_MACRO_BOOST_MODULE_FILES + tests/macro_boost/_has_sideeffects_probe.das +) + # AOT for match test files FILE(GLOB AOT_MATCH_FILES RELATIVE ${PROJECT_SOURCE_DIR} CONFIGURE_DEPENDS "tests/match/*.das") @@ -547,6 +558,14 @@ add_custom_target(test_aot_macro_call) SET(MACRO_CALL_AOT_GENERATED_SRC) DAS_AOT("${AOT_MACRO_CALL_FILES}" MACRO_CALL_AOT_GENERATED_SRC test_aot_macro_call daslang) +add_custom_target(test_aot_macro_boost) +SET(MACRO_BOOST_AOT_GENERATED_SRC) +DAS_AOT("${AOT_MACRO_BOOST_FILES}" MACRO_BOOST_AOT_GENERATED_SRC test_aot_macro_boost daslang) + +add_custom_target(test_aot_macro_boost_modules) +SET(MACRO_BOOST_MODULES_AOT_GENERATED_SRC) +DAS_AOT_LIB("${AOT_MACRO_BOOST_MODULE_FILES}" MACRO_BOOST_MODULES_AOT_GENERATED_SRC test_aot_macro_boost_modules daslang) + add_custom_target(test_aot_match) SET(MATCH_AOT_GENERATED_SRC) DAS_AOT("${AOT_MATCH_FILES}" MATCH_AOT_GENERATED_SRC test_aot_match daslang) @@ -680,6 +699,8 @@ SOURCE_GROUP_FILES("aot generated" JSON_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" LINQ_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" LINQ_MODULES_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" MACRO_CALL_AOT_GENERATED_SRC) +SOURCE_GROUP_FILES("aot generated" MACRO_BOOST_AOT_GENERATED_SRC) +SOURCE_GROUP_FILES("aot generated" MACRO_BOOST_MODULES_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" MATCH_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" MATH_AOT_GENERATED_SRC) SOURCE_GROUP_FILES("aot generated" MATH_MODULES_AOT_GENERATED_SRC) @@ -748,6 +769,8 @@ add_executable(test_aot ${DAS_DASCRIPT_MAIN_SRC} ${LINQ_AOT_GENERATED_SRC} ${LINQ_MODULES_AOT_GENERATED_SRC} ${MACRO_CALL_AOT_GENERATED_SRC} + ${MACRO_BOOST_AOT_GENERATED_SRC} + ${MACRO_BOOST_MODULES_AOT_GENERATED_SRC} ${MATCH_AOT_GENERATED_SRC} ${MATH_AOT_GENERATED_SRC} ${MATH_MODULES_AOT_GENERATED_SRC} @@ -799,6 +822,7 @@ ADD_DEPENDENCIES(test_aot libDaScriptAot test_aot_jobque test_aot_json test_aot_jsonrpc test_aot_linq test_aot_linq_modules test_aot_macro_call + test_aot_macro_boost test_aot_macro_boost_modules test_aot_match test_aot_math test_aot_math_modules test_aot_module_tests test_aot_option diff --git a/tests/linq/test_linq_fold_ast.das b/tests/linq/test_linq_fold_ast.das index e47cf2e78..78a70051c 100644 --- a/tests/linq/test_linq_fold_ast.das +++ b/tests/linq/test_linq_fold_ast.das @@ -430,6 +430,26 @@ def target_select_count_fold() : int { return _fold(each([1, 2, 3, 4, 5])._select(_ * 2).count()) } +var g_select_count_proj_hits = 0 + +def side_effect_select_proj(x : int) : int { + g_select_count_proj_hits ++ + return x * 2 +} + +[export, marker(no_coverage)] +def target_select_count_fold_impure() : int { + return _fold(each([1, 2, 3, 4, 5])._select(side_effect_select_proj(_)).count()) +} + +// `each(range(...))` — argument is a `range`, not an array. peel_each must NOT fire +// here; we'd otherwise replace the iterator-yielding each call with the raw range +// and downstream length-shortcut / reserve-by-length would silently misbehave. +[export, marker(no_coverage)] +def target_each_range_count() : int { + return _fold(each(range(10))._where(_ > 5).count()) +} + // ── Tests: `_fold` Phase-2A loop emission ────────────────────────────── // Phase-2A `_fold` emits explicit for-loops inside an `invoke($block, $src)` wrapper // (no `ExprArrayComprehension` nodes). Each test asserts the invoke wrapper exists @@ -616,3 +636,171 @@ def test_select_count_fold_result(t : T?) { t |> equal(target_select_count_fold(), 5) } } + +// ── Counter-lane projection elision (has_sideeffects integration) ────── +// For pure counter chains (`_select(_ * 2).count()`, bare `.count()`, etc.) on +// length-supporting sources, the planner emits a `length(source)` shortcut and +// the for-loop is dropped entirely. For impure projections (function call w/ +// side effects), the per-element loop is preserved with the discardable bind. + +// Returns the number of ExprLet/ExprFor statements in the counter-lane invoke's +// inner block. Pure shortcut: `[var src, return length(src)]` → 0 for-loops. +// Impure loop: `[var src, var acc=0, for {...}, return acc]` → 1 for-loop. +def count_inner_for_loops(body_expr : Expression?) : int { + if (!(body_expr is ExprInvoke)) return -1 + let inv = body_expr as ExprInvoke + if (empty(inv.arguments) || !(inv.arguments[0] is ExprMakeBlock)) return -1 + let mb = inv.arguments[0] as ExprMakeBlock + let outer = mb._block as ExprBlock + if (outer == null) return -1 + var n = 0 + for (stmt in outer.list) { + if (stmt is ExprFor) { + n ++ + } + } + return n +} + +// Returns the number of stmts in the for-body, or -1 if no for loop exists. +def count_for_body_stmts(body_expr : Expression?) : int { + if (!(body_expr is ExprInvoke)) return -1 + let inv = body_expr as ExprInvoke + if (empty(inv.arguments) || !(inv.arguments[0] is ExprMakeBlock)) return -1 + let mb = inv.arguments[0] as ExprMakeBlock + let outer = mb._block as ExprBlock + if (outer == null) return -1 + for (stmt in outer.list) { + if (stmt is ExprFor) { + let fe = stmt as ExprFor + let fbody = fe.body as ExprBlock + if (fbody == null) return -1 + return length(fbody.list) + } + } + return -1 +} + +[test] +def test_pure_projection_uses_length_shortcut(t : T?) { + // `_select(_ * 2).count()` on a length-supporting source should collapse to + // `length(source)` — no for-loop emitted at all. + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_select_count_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper") + let n = count_inner_for_loops(body_expr) + t |> equal(n, 0, "pure select-count must emit length() shortcut (no for loop)") + } +} + +[test] +def test_bare_count_uses_length_shortcut(t : T?) { + // Bare `.count()` on an array source should also use the length shortcut. + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_count_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper") + let n = count_inner_for_loops(body_expr) + t |> equal(n, 0, "bare count on length-supporting source must use length() shortcut") + } +} + +[test] +def test_impure_projection_keeps_bind(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_select_count_fold_impure) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected counter-lane invoke wrapper") + let n = count_for_body_stmts(body_expr) + t |> equal(n, 2, "impure projection should preserve vfinal bind (for-body has bind + ++acc)") + } +} + +// ── peel_each invariant: each() must always be peeled ────────── +// The planner's `peel_each` helper unwraps `each(x)` regardless of x's type so +// the emitted block sees the underlying container directly. Without this, the +// length() shortcut would never fire (each returns an iterator, which has no +// length) and array-lane reserve would emit against the iterator wrapper. + +// Returns the second arg of the invoke (the source expression passed in). If +// it's still an ExprCall to `each`, peel didn't run. +def invoke_source_is_each_wrapped(body_expr : Expression?) : bool { + if (!(body_expr is ExprInvoke)) return false + let inv = body_expr as ExprInvoke + if (length(inv.arguments) < 2 || !(inv.arguments[1] is ExprCall)) return false + let src_call = inv.arguments[1] as ExprCall + if (src_call.func == null) return false + return (src_call.func.name == "each" + || (src_call.func.fromGeneric != null && src_call.func.fromGeneric.name == "each")) +} + +[test] +def test_peel_each_on_array_source(t : T?) { + // Sanity: target_select_count_fold uses `each([1,2,3,4,5])`. After peel, the + // invoke wrapper must NOT receive an each-wrapped source. + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_select_count_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper") + t |> success(!invoke_source_is_each_wrapped(body_expr), + "peel_each must unwrap each(array) at macro time") + } +} + +[test] +def test_peel_each_on_bare_count(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_count_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper") + t |> success(!invoke_source_is_each_wrapped(body_expr), + "peel_each must unwrap each(array) at macro time") + } +} + +// Negative case: `each(range(...))` argument is an iterator-yielding range, not an +// array. peel_each must NOT fire — peeling would drop the each call and put the raw +// range in source position; the downstream length-shortcut and reserve hints would +// then misbehave on a non-indexable source. +[test] +def test_peel_each_skips_non_array_source(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_each_range_count) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper") + t |> success(invoke_source_is_each_wrapped(body_expr), + "peel_each must keep each(range) wrapper — only arrays may be peeled") + } +} + +[test] +def test_target_each_range_count_runs(t : T?) { + // Behavioral: ensure the iterator-source chain still compiles and produces the + // expected count. range(10) → [0,1,2,3,4,5,6,7,8,9]; filter > 5 → 4 elements. + t |> equal(target_each_range_count(), 4) +} diff --git a/tests/macro_boost/_has_sideeffects_probe.das b/tests/macro_boost/_has_sideeffects_probe.das new file mode 100644 index 000000000..4b7dad96b --- /dev/null +++ b/tests/macro_boost/_has_sideeffects_probe.das @@ -0,0 +1,32 @@ +// Helper module for tests/macro_boost/test_has_sideeffects.das. +// +// Provides ``_test_has_sideeffects(expr)`` — a [call_macro] that invokes +// ``macro_boost::has_sideeffects`` on its argument at macro time and replaces +// the call with an ``ExprConstBool`` of the result. Lets test functions +// assert side-effect classification by writing ``t |> equal(_test_has_sideeffects(...), false)``. +// +// Lives in a separate ``.das`` with a leading underscore so dastest's file +// discovery skips it as a test. +options gen2 +options indenting = 4 + +module _has_sideeffects_probe public + +require daslib/ast public +require daslib/ast_boost +require daslib/macro_boost public + +[call_macro(name = "_test_has_sideeffects")] +class private TestHasSideeffects : AstCallMacro { + def override visit(prog : ProgramPtr; mod : Module?; var call : ExprCallMacro?) : Expression? { + if (call.arguments |> length != 1) { + macro_error(prog, call.at, "expecting _test_has_sideeffects(expression)") + return null + } + let b = has_sideeffects(call.arguments[0]) + var res : Expression? = new ExprConstBool(at = call.at, value = b) + res.force_at(call.at) + res.force_generated(true) + return res + } +} diff --git a/tests/macro_boost/test_has_sideeffects.das b/tests/macro_boost/test_has_sideeffects.das new file mode 100644 index 000000000..68dfefed7 --- /dev/null +++ b/tests/macro_boost/test_has_sideeffects.das @@ -0,0 +1,224 @@ +options gen2 +require dastest/testing_boost public +require _has_sideeffects_probe public + +// ── Side-effect-bearing helpers (used as test sources) ──────────────────── + +var g_proj_hits = 0 + +def side_effect_fn(_x : int) : int { + g_proj_hits ++ + return _x * 2 +} + +struct Foo { + a : int + b : int +} + +// ── SAFE cases — has_sideeffects must return false ─────────────────────── +// +// Note: `let _x = 5` lets the compiler fold expressions using `_x` into constants +// before the call_macro runs (so the macro sees ExprConstInt, not the original +// ExprOp2). To exercise the operator paths explicitly, tests below use `var`. + +[test] +def test_const_int(t : T?) { + t |> equal(_test_has_sideeffects(42), false) +} + +[test] +def test_const_string(t : T?) { + t |> equal(_test_has_sideeffects("hello"), false) +} + +[test] +def test_const_bool(t : T?) { + t |> equal(_test_has_sideeffects(true), false) +} + +[test] +def test_var_read(t : T?) { + var _x = 5 + t |> equal(_test_has_sideeffects(_x), false) +} + +[test] +def test_arith_pure(t : T?) { + var _x = 5 + t |> equal(_test_has_sideeffects(_x + 1), false) +} + +[test] +def test_arith_nested(t : T?) { + var _x = 5 + var _y = 3 + t |> equal(_test_has_sideeffects(_x * 2 + _y - 3), false) +} + +[test] +def test_field_access(t : T?) { + var _s = Foo(a = 1, b = 2) + t |> equal(_test_has_sideeffects(_s.a), false) +} + +[test] +def test_array_index(t : T?) { + var _arr = [1, 2, 3, 4, 5] + t |> equal(_test_has_sideeffects(_arr[0]), false) +} + +[test] +def test_safe_table_lookup(t : T?) { + var tab : table + tab |> insert("k", 1) + t |> equal(_test_has_sideeffects(tab?["k"]), false) +} + +[test] +def test_comparison(t : T?) { + var _x = 5 + t |> equal(_test_has_sideeffects(_x == 0), false) +} + +[test] +def test_ternary(t : T?) { + var _x = 5 + t |> equal(_test_has_sideeffects(_x > 0 ? 1 : 2), false) +} + +[test] +def test_null_coalescing(t : T?) { + var _p : int? = null + t |> equal(_test_has_sideeffects(_p ?? 0), false) +} + +[test] +def test_logical_and(t : T?) { + var _x = 5 + var _y = 10 + t |> equal(_test_has_sideeffects(_x > 0 && _y < 100), false) +} + +[test] +def test_unary_neg(t : T?) { + var _x = 5 + t |> equal(_test_has_sideeffects(-_x), false) +} + +[test] +def test_string_builder_safe(t : T?) { + var _x = 5 + t |> equal(_test_has_sideeffects("hello {_x}"), false) +} + +// ── UNSAFE cases — has_sideeffects must return true ────────────────────── + +[test] +def test_user_call_unsafe(t : T?) { + var _x = 5 + t |> equal(_test_has_sideeffects(side_effect_fn(_x)), true) +} + +[test] +def test_table_insert_subscript(t : T?) { + var _tab : table + // _tab[k] auto-inserts a default value if k is missing — side effect. + t |> equal(_test_has_sideeffects(_tab["k"]), true) +} + +[test] +def test_division_unsafe(t : T?) { + var _x = 10 + var _y = 2 + // `/` can panic on div-by-zero — kept on the unsafe side by explicit blacklist. + t |> equal(_test_has_sideeffects(_x / _y), true) +} + +[test] +def test_modulo_unsafe(t : T?) { + var _x = 10 + var _y = 3 + t |> equal(_test_has_sideeffects(_x % _y), true) +} + +[test] +def test_array_literal_alloc(t : T?) { + t |> equal(_test_has_sideeffects([1, 2, 3]), true) +} + +[test] +def test_struct_construct_alloc(t : T?) { + t |> equal(_test_has_sideeffects(Foo(a = 1, b = 2)), true) +} + +[test] +def test_string_builder_unsafe_part(t : T?) { + var _x = 5 + // The string interpolation itself is safe, but a side-effecting operand propagates. + t |> equal(_test_has_sideeffects("hello {side_effect_fn(_x)}"), true) +} + +// ── Mutation operators — must be unsafe regardless of resolved-builtin flags ── +// +// `++`/`--` (ExprOp1) and compound-assignment ops (`+=`/`-=`/… ExprOp2) mutate +// their operand. has_sideeffects blacklists these up front so a builtin +// mistakenly missing `knownSideEffects`/`unsafeOperation` can't classify them +// as pure. Covers Copilot review concern from PR #2691. + +[test] +def test_postfix_increment_unsafe(t : T?) { + var _x = 5 + t |> equal(_test_has_sideeffects(_x ++), true) +} + +[test] +def test_postfix_decrement_unsafe(t : T?) { + var _x = 5 + t |> equal(_test_has_sideeffects(_x --), true) +} + +// ── User-defined operator overload — must be unsafe (non-builtin func) ─── +// +// Overloads of `+`, `*`, etc. on user types can carry arbitrary side effects. +// The op-name allowlist must NOT bypass the func-flag check when func != null +// — otherwise a custom `def operator +` slips through as pure. Covers Copilot +// review concern from PR #2691. + +var g_op_overload_hits = 0 + +struct private SideEffectingNumber { + v : int +} + +def operator +(a : SideEffectingNumber; b : int) : SideEffectingNumber { + g_op_overload_hits ++ + return SideEffectingNumber(v = a.v + b) +} + +[test] +def test_user_op_overload_unsafe(t : T?) { + var _c = SideEffectingNumber(v = 5) + t |> equal(_test_has_sideeffects(_c + 1), true) +} + +// ── Conservative-unsafe cases — daslang-generic helpers fall through ───── +// +// `length`, `key_exists`, etc. are defined as daslang generics in builtin.das +// (`def length(a : auto | #) ...`). The compile-time func resolution doesn't +// always reach a `flags.builtIn=true` C++ overload before the call_macro runs, +// so the conservative classifier rejects them. A future Function-level +// `[no_side_effects]` annotation could let user-defined helpers opt in. + +[test] +def test_generic_length_unresolved(t : T?) { + var _arr = [1, 2, 3] + t |> equal(_test_has_sideeffects(length(_arr)), true) +} + +[test] +def test_key_exists_unresolved(t : T?) { + var tab : table + tab |> insert("k", 1) + t |> equal(_test_has_sideeffects(key_exists(tab, "k")), true) +}