From 7e9027c2b7296852e74da63f9f10a6fcc364cb32 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Tue, 19 May 2026 02:42:32 -0700 Subject: [PATCH 1/8] lambda: copyable (alias), delete requires unsafe Flip two predicates on tLambda in TypeDecl: - canCopy() false -> true: lambda is a fat pointer (one char* to a heap capture frame), so '=' aliases cheaply; pass-by-value is free. - isSafeToDelete() true -> false: aliases mean 'delete lam' must be gated by unsafe, matching raw-pointer and class delete. This cascades through composites: array>, structs with a lambda field, tuples/variants containing a lambda all inherit the unsafe-delete rule (Structure::isSafeToDelete walks fields, and the existing tArray/tTable branches recurse into element types). Side-effect (intended UX win): capturing a lambda from inside another lambda no longer needs an explicit @capture(<- inner) annotation - default capture_any now picks copy (pointer alias) instead of forced move. Existing @capture(<- ...) / @capture(& ...) annotations stay valid. Sites that newly require unsafe-delete: - daslib/archive.das: 4 generic 'serialize' overloads (struct/tuple/ variant/table) - daslib/decs.das: restart / after_gc / commit - daslib/heartbeat.das: set_heartbeat - tests: aot/test_lambdas, language/lambda_capture, lambda_capture_modes, jit_tests/invoke New positive coverage (interpreter + AOT mirror): - tests/language/lambda_copy.das, tests/aot/test_lambda_copy.das - '=' aliases the capture frame (proven by alternating invocations) - pass-by-value (independent local vars per invoke) - struct copy aliases lambda field - implicit capture-of-lambda inside another lambda Doc updates: - doc/source/reference/language/lambdas.rst - doc/source/reference/language/move_copy_clone.rst (type table row) - doc/source/reference/tutorials/14_lambdas.rst - doc/source/reference/tutorials/20_lifetime.rst (delete-needs-unsafe) - tutorials/language/14_lambdas.das (drop "lambdas can't be copied" comment, add copy/alias note) - CLAUDE.md (Pass-by-value section) Build infra (bundled - pre-existing limitations that surfaced): - CMakeLists.txt DAS_AOT_EXT: chunk AOT batches (60 files each) to stay under Windows' 32K command-line limit when worktree/CI paths are deep (absolute paths blow it; chunked add_custom_command runs COMMAND-per-batch sequentially). - tests/aot/CMakeLists.txt: gate test_aot_live_host* AOT under IF(NOT DAS_HV_DISABLED), skip daslib/just_in_time.das from the daslib AOT batch when DAS_LLVM_DISABLED=ON. Both AOT-compile by requiring external modules (dashv, LLVM.dll) and fail unconditionally without them. Verification: ctest 35/35; interpreter 7804/7810 (6 platform-skip); AOT (--use-aot) 7193/7199 (6 platform-skip). HV enabled, LLVM off. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 1 + CMakeLists.txt | 32 ++++++- daslib/archive.das | 8 +- daslib/decs.das | 10 ++- daslib/heartbeat.das | 2 +- doc/source/reference/language/lambdas.rst | 33 ++++--- .../reference/language/move_copy_clone.rst | 8 +- doc/source/reference/tutorials/14_lambdas.rst | 21 +++-- .../reference/tutorials/20_lifetime.rst | 24 ++++-- src/ast/ast_typedecl.cpp | 4 +- tests/aot/CMakeLists.txt | 40 ++++++--- tests/aot/test_lambda_copy.das | 79 +++++++++++++++++ tests/aot/test_lambdas.das | 9 +- tests/jit_tests/invoke.das | 2 +- tests/language/lambda_capture.das | 2 +- tests/language/lambda_capture_modes.das | 6 +- tests/language/lambda_copy.das | 85 +++++++++++++++++++ tutorials/language/14_lambdas.das | 14 ++- 18 files changed, 316 insertions(+), 64 deletions(-) create mode 100644 tests/aot/test_lambda_copy.das create mode 100644 tests/language/lambda_copy.das diff --git a/CLAUDE.md b/CLAUDE.md index 086dc62d6e..2b3ff1d18e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -150,6 +150,7 @@ All code MUST use gen2 syntax (add `options gen2` at the top of every file). Key - Structs/arrays/tables always pass by reference — no `&` needed. - Only **workhorse types** (`int`, `float`, `bool`, `string`, …, `isWorkhorseType` on the C++ side) pass by value. - **AST pointers (gc_node) pass by value** — copying the pointer, no refcount, no allocation. `def foo(p : ExpressionPtr)` shares the node; `var p` lets you reassign locally; `var p : ExpressionPtr&` propagates reassignment back. For mutable field access, take the param as `var`. +- **Lambdas pass by value (copy aliases the capture frame).** A `lambda<…>` is a fat pointer to a heap-allocated capture frame, so `=` copies the pointer (creates an alias) and pass-by-value is free. **`delete lam` requires `unsafe`** since other aliases may still be live — same rule as raw pointer / class `delete`. The rule cascades: `array>`, structs with a lambda field, tuple/variant containing a lambda — all inherit the unsafe-delete requirement. - **Strings:** `var s : string` is a writable local copy (no propagation). `var s : string&` propagates. `:=` clones into current context's heap (required across contexts); plain `=` copies the pointer. - **Residual `smart_ptr` types** (`ProgramPtr`, `ContextPtr`, `FileAccessPtr`, `DebugAgentPtr`, `VisitorAdapterPtr`) still use refcount semantics — variables holding them need `var inscope`. AST types do NOT — see below. diff --git a/CMakeLists.txt b/CMakeLists.txt index 07e219872d..fae362b5d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -116,7 +116,13 @@ FILE(GLOB DAS_AOT_DASLIB_DEPENDS CONFIGURE_DEPENDS "${PROJECT_SOURCE_DIR}/daslib MACRO(DAS_AOT_EXT input_files genList mainTarget dasAotTool dasAotToolArg) set(all_depends ${dasAotTool} ${PROJECT_SOURCE_DIR}/utils/aot/main.das ${DAS_AOT_DASLIB_DEPENDS}) set(all_outputs "") - set(command_args "") + # Per-batch arg buffer; chunked to keep each invocation under Windows' + # 32K command-line limit when worktree paths are long. + set(_DAS_AOT_BATCH_SIZE 60) + set(_batches "") + set(_batch_idx 0) + set(_batch_count 0) + set(_current_batch "") list(LENGTH input_files num_files) foreach(input ${input_files}) @@ -140,18 +146,36 @@ MACRO(DAS_AOT_EXT input_files genList mainTarget dasAotTool dasAotToolArg) list(APPEND all_outputs ${out_src}) list(APPEND ${genList} ${out_src}) - # Build command arguments: tool_arg input1 output1 tool_arg input2 output2 ... - list(APPEND command_args ${dasAotToolArg} ${input_src} ${out_arg}) + # Build command arguments for the current batch + list(APPEND _current_batch ${dasAotToolArg} ${input_src} ${out_arg}) file(MAKE_DIRECTORY ${out_dir}) set_source_files_properties(${out_src} PROPERTIES GENERATED TRUE) + + math(EXPR _batch_count "${_batch_count} + 1") + if(_batch_count GREATER_EQUAL ${_DAS_AOT_BATCH_SIZE}) + set(_batches_${_batch_idx} "${_current_batch}") + list(APPEND _batches ${_batch_idx}) + math(EXPR _batch_idx "${_batch_idx} + 1") + set(_current_batch "") + set(_batch_count 0) + endif() + endforeach() + if(_batch_count GREATER 0) + set(_batches_${_batch_idx} "${_current_batch}") + list(APPEND _batches ${_batch_idx}) + endif() + + set(_commands "") + foreach(i ${_batches}) + list(APPEND _commands COMMAND ${dasAotTool} ${PROJECT_SOURCE_DIR}/utils/aot/main.das -- ${_batches_${i}} ${CROSS_PLATFORM}) endforeach() ADD_CUSTOM_COMMAND( OUTPUT ${all_outputs} DEPENDS ${all_depends} COMMENT "AOT compiling files..." - COMMAND ${dasAotTool} ${PROJECT_SOURCE_DIR}/utils/aot/main.das -- ${command_args} ${CROSS_PLATFORM} + ${_commands} ) set(custom_name ${mainTarget}_genaot) ADD_CUSTOM_TARGET(${custom_name} DEPENDS ${all_outputs}) diff --git a/daslib/archive.das b/daslib/archive.das index 2707f7d830..e838ad890a 100644 --- a/daslib/archive.das +++ b/daslib/archive.das @@ -158,7 +158,7 @@ def public serialize(var arch : Archive; var value : float4x4) { def public serialize(var arch : Archive; var value : auto(TT)&) { //! Serializes struct by serializing each field. if (arch.reading) { - delete value + unsafe { delete value; } } apply(value) $ [unused_argument(name)] (name : string; field) { arch |> _::serialize(field) @@ -169,7 +169,7 @@ def public serialize(var arch : Archive; var value : auto(TT)&) { def public serialize(var arch : Archive; var value : auto(TT)&) { //! Serializes tuple by serializing each field. if (arch.reading) { - delete value + unsafe { delete value; } } apply(value) $ [unused_argument(name)] (name : string; field) { arch |> _::serialize(field) @@ -180,7 +180,7 @@ def public serialize(var arch : Archive; var value : auto(TT)&) { def public serialize(var arch : Archive; var value : auto(TT)&) { //! Serializes variant by serializing the index and the active field. if (arch.reading) { - delete value + unsafe { delete value; } var index : int arch |> read_raw(index) unsafe(value |> set_variant_index(index)) @@ -358,7 +358,7 @@ def public serialize(var arch : Archive; var value : table) if (arch.reading) { var len : int arch |> read_raw(len) - delete value + unsafe { delete value; } for (_ in range(len)) { var k : KT -const -& -# arch |> _::serialize(k) diff --git a/daslib/decs.das b/daslib/decs.das index 614ec3671a..4edfb3b64b 100644 --- a/daslib/decs.das +++ b/daslib/decs.das @@ -398,8 +398,10 @@ def public restart { if (insideQuery != 0) { panic("decs: can't call `restart` from inside query") } - delete deferActions - delete decsState + unsafe { + delete deferActions + delete decsState + } } def private new_entity_id() { @@ -443,7 +445,7 @@ def public after_gc { if (arch.size > 0) { for (comp in arch.components) { if (comp.gc_dummy != null) { - delete comp.gc_dummy + unsafe { delete comp.gc_dummy; } } } } @@ -1043,7 +1045,7 @@ def public commit { for (da in actions) { da.action |> invoke(da) } - delete actions + unsafe { delete actions; } } def public is_alive(eid : EntityId) : bool { diff --git a/daslib/heartbeat.das b/daslib/heartbeat.das index f3ca569bae..9cf974237f 100644 --- a/daslib/heartbeat.das +++ b/daslib/heartbeat.das @@ -23,7 +23,7 @@ var g_inHB : bool = false def public set_heartbeat(var cb : lambda<() : void>) { //! Sets the heartbeat callback lambda, replacing any previously set callback. - delete g_hbCallback + unsafe { delete g_hbCallback; } g_hbCallback <- cb } diff --git a/doc/source/reference/language/lambdas.rst b/doc/source/reference/language/lambdas.rst index 23fce644d9..5b0f281af8 100644 --- a/doc/source/reference/language/lambdas.rst +++ b/doc/source/reference/language/lambdas.rst @@ -23,17 +23,23 @@ The ``->`` operator can be used instead of ``:`` for the return type: If no type signature is specified, ``lambda`` alone represents a lambda that takes no arguments and returns nothing. -Lambdas can be local or global variables, and can be passed as an argument by reference. -Lambdas can be moved, but can't be copied or cloned: +Lambdas can be local or global variables, and can be passed by value or by reference. +A lambda value is a fat pointer to a heap-allocated capture frame, so assignment +``=`` aliases the same capture (cheap, no allocation); ``<-`` is the explicit move +form. Cloning is not supported. .. code-block:: das def foo ( x : lambda < (arg1:int;arg2:float&):bool > ) { ... - var y <- x + var y = x // copy — y and x now alias the same capture frame + var z <- x // move — alternative when x is no longer needed ... } +Because copies alias the capture frame, ``delete lam`` requires ``unsafe`` — +otherwise sibling copies would be left dangling. See :ref:`finalizer notes below `. + Lambdas can be invoked via ``invoke`` or call-like syntax: .. code-block:: das @@ -46,7 +52,8 @@ Lambdas can be invoked via ``invoke`` or call-like syntax: return x(14) } -Lambdas are typically declared via move syntax: +Lambdas are typically declared via move syntax (since ``@(...) { ... }`` produces a +fresh capture frame that the binding owns initially): .. code-block:: das @@ -94,11 +101,13 @@ By default, capture by copy is used. If copy is not available, the ``unsafe`` ke .. _lambdas_finalizer: -Lambdas can be deleted, which causes finalizers to be called on all captured data (see :ref:`Finalizers `): +Lambdas can be deleted, which causes finalizers to be called on all captured data (see :ref:`Finalizers `). +Because copies alias the same capture frame, ``delete`` requires ``unsafe`` — +the caller is asserting no other live copy exists: .. code-block:: das - delete lam + unsafe { delete lam; } Lambdas can specify a custom finalizer which is invoked before the default finalizer: @@ -111,7 +120,7 @@ Lambdas can specify a custom finalizer which is invoked before the default final print("CNT = {CNT}\n") } var x = invoke(counter,13) - delete counter // this is when the finalizer is called + unsafe { delete counter; } // this is when the finalizer is called .. _lambdas_iterator: @@ -207,12 +216,16 @@ The C++ Lambda class contains single void pointer for the capture data: ... }; -The rationale behind passing lambdas by reference is that when ``delete`` is called: +When ``delete`` is called on a lambda: 1. the finalizer is invoked for the capture data - 2. the capture is replaced with null + 2. the capture pointer is replaced with null in the local variable -The lack of copy or move semantics ensures that multiple pointers to a single instance of captured data cannot exist. +A lambda value is one machine word — the ``char *capture`` pointer above — so +copy is a cheap pointer alias and pass-by-value is free. Because copies +share the capture frame, ``delete lam`` requires ``unsafe`` (mirroring how raw +pointer ``delete`` is gated). Under the default GC the capture frame is freed +automatically once no copy is reachable. .. seealso:: diff --git a/doc/source/reference/language/move_copy_clone.rst b/doc/source/reference/language/move_copy_clone.rst index 880755ebd7..d897f855b2 100644 --- a/doc/source/reference/language/move_copy_clone.rst +++ b/doc/source/reference/language/move_copy_clone.rst @@ -45,8 +45,10 @@ The source value is not modified: Copy works for all POD types (``int``, ``float``, ``bool``, ``string``, pointers, etc.) and for structs whose fields are all copyable. -Types that manage owned resources — ``array``, ``table``, ``lambda``, and ``iterator`` — cannot -be copied. Attempting to copy them produces: +Types that manage owned resources — ``array``, ``table``, and ``iterator`` — cannot +be copied. ``lambda`` *is* copyable (a copy aliases the capture frame, the same way a +raw pointer copy aliases its target), but ``delete lam`` then requires ``unsafe``. +Attempting to copy a non-copyable type produces: .. code-block:: das @@ -233,7 +235,7 @@ The following table summarizes which operators work with which types: - ✓ - ✓ * - ``lambda`` - - ✗ + - ✓ (alias) - ✓ - ✗ * - ``block`` diff --git a/doc/source/reference/tutorials/14_lambdas.rst b/doc/source/reference/tutorials/14_lambdas.rst index b1f4081a15..d7aea322c1 100644 --- a/doc/source/reference/tutorials/14_lambdas.rst +++ b/doc/source/reference/tutorials/14_lambdas.rst @@ -70,14 +70,22 @@ Use ``capture()`` for other modes: Storing lambdas =============== -Lambdas must be **moved** with ``<-``, not copied:: +A lambda value is a fat pointer to a heap-allocated capture frame. +``=`` copies the pointer (both bindings now alias the same capture frame), +``<-`` moves (source becomes null):: var a <- @() { print("hello\n") } - var b <- a // a is now empty - b() + var b = a // b and a now alias the same lambda + var c <- a // c takes ownership, a becomes null + c() -Lambdas cannot be copied, but they **can** be stored in arrays using -``emplace``, which moves the lambda into the container:: +Because copies share the capture frame, ``delete lam`` requires +``unsafe { ... }`` — the caller asserts no other live copy exists. Under +the default GC the capture frame is freed automatically when no copy +remains reachable. + +Lambdas can be stored in arrays using ``emplace``, which moves the +lambda into the container:: var callbacks : array> var greet <- @() { print("hello from callback\n") } @@ -87,6 +95,7 @@ Lambdas cannot be copied, but they **can** be stored in arrays using for (cb in callbacks) { invoke(cb) } + unsafe { delete callbacks; } // array> inherits the unsafe-delete rule Blocks **cannot** be stored in arrays or variables — they live on the stack and are only valid as function arguments. @@ -165,7 +174,7 @@ Lambda vs block vs function pointer - By reference * - Storable - Yes - - Yes (move only) + - Yes (copy aliases capture) - No * - Returnable - Yes diff --git a/doc/source/reference/tutorials/20_lifetime.rst b/doc/source/reference/tutorials/20_lifetime.rst index dcd0da6f6c..aee3b767fb 100644 --- a/doc/source/reference/tutorials/20_lifetime.rst +++ b/doc/source/reference/tutorials/20_lifetime.rst @@ -97,14 +97,27 @@ finalizes its own scoped variables before the next one begins:: // delete f runs here, at the end of each iteration } -Heap pointers -============= +Heap pointers and lambdas +========================== For class instances created with ``new``, ``delete`` requires ``unsafe``:: var p = new MyClass() // ... use p ... - unsafe { delete p } + unsafe { delete p; } + +Lambdas follow the same rule: a lambda value is a fat pointer to a +heap-allocated capture frame, and ``=`` copies it (creating an alias). +``delete lam`` therefore requires ``unsafe`` — the caller asserts no +other live copy exists:: + + var fn <- @(x : int) : int { return x * 2 } + // ... use fn ... + unsafe { delete fn; } + +This rule cascades: ``array>``, structures with a lambda +field, and any composite containing a lambda all inherit the +unsafe-delete requirement. Or use ``var inscope`` for automatic cleanup:: @@ -133,8 +146,9 @@ When to use what .. note:: - For heap pointers (from ``new``), ``delete`` requires ``unsafe``. - For local variables, ``delete`` is safe. + For heap pointers (from ``new``) and lambdas, ``delete`` requires + ``unsafe``. For local container variables (``array``, ``table``, + ``string``) without lambda fields, ``delete`` is safe. .. seealso:: diff --git a/src/ast/ast_typedecl.cpp b/src/ast/ast_typedecl.cpp index 2414f5df9b..c0636872cf 100644 --- a/src/ast/ast_typedecl.cpp +++ b/src/ast/ast_typedecl.cpp @@ -1153,7 +1153,7 @@ namespace das } else if (baseType == Type::tPointer) { return !smartPtr; } else if ( baseType == Type::tLambda ) { - return false; + return true; } else if ( baseType == Type::tString ) { return tempMatters ? false : true; } else { @@ -1520,6 +1520,8 @@ namespace das return true; } else if ( baseType==Type::tBlock ) { return false; + } else if ( baseType==Type::tLambda ) { + return false; } else if ( baseType==Type::tTable ) { if ( secondType && !secondType->isSafeToDelete(dep) ) return false; } else if ( baseType==Type::tArray ) { diff --git a/tests/aot/CMakeLists.txt b/tests/aot/CMakeLists.txt index 02c855c4d9..73bbbe8e93 100644 --- a/tests/aot/CMakeLists.txt +++ b/tests/aot/CMakeLists.txt @@ -33,6 +33,11 @@ FILE(GLOB AOT_DASLIB_MODULE_FILES RELATIVE ${PROJECT_SOURCE_DIR} CONFIGURE_DEPEN # that subclasses `vector` in C++) which # has no `das_iterator` AOT specialization list(FILTER AOT_DASLIB_MODULE_FILES EXCLUDE REGEX "daslib/(aot_macro|debugger|profiler|profiler_boost|ast_debug|decs_state|aot_cpp|aot_standalone|network|style_lint)\\.das$") +# just_in_time bootstraps the LLVM-backed JIT module via [extern](library=LLVM.dll). +# Without DAS_LLVM_DISABLED=OFF, the [extern] macro fails at compile time. +IF(DAS_LLVM_DISABLED) + list(FILTER AOT_DASLIB_MODULE_FILES EXCLUDE REGEX "daslib/just_in_time\\.das$") +ENDIF() add_custom_target(test_aot_daslib_modules) SET(DASLIB_MODULES_AOT_GENERATED_SRC) @@ -99,17 +104,22 @@ SET(AOT_DECS_MODULE_FILES tests/decs/test_stages_extra.das ) -# AOT for live_host test files -FILE(GLOB AOT_LIVE_HOST_FILES RELATIVE ${PROJECT_SOURCE_DIR} CONFIGURE_DEPENDS "tests/live_host/*.das") - -# Live host module files (libraries required by live_host tests) -SET(AOT_LIVE_HOST_MODULE_FILES - modules/dasLiveHost/live/live_commands.das - modules/dasLiveHost/live/live_vars.das - modules/dasLiveHost/live/live_api.das - modules/dasLiveHost/live/live_api_builtins.das - modules/dasLiveHost/live/live_api_stdio.das -) +# AOT for live_host test files — dasLiveHost transitively requires dashv, +# so AOT them only when HV is enabled. +SET(AOT_LIVE_HOST_FILES) +SET(AOT_LIVE_HOST_MODULE_FILES) +IF(NOT DAS_HV_DISABLED) + FILE(GLOB AOT_LIVE_HOST_FILES RELATIVE ${PROJECT_SOURCE_DIR} CONFIGURE_DEPENDS "tests/live_host/*.das") + + # Live host module files (libraries required by live_host tests) + SET(AOT_LIVE_HOST_MODULE_FILES + modules/dasLiveHost/live/live_commands.das + modules/dasLiveHost/live/live_vars.das + modules/dasLiveHost/live/live_api.das + modules/dasLiveHost/live/live_api_builtins.das + modules/dasLiveHost/live/live_api_stdio.das + ) +ENDIF() # AOT for dasSQLITE module library files (required by dasSQLITE tests) SET(AOT_DASSQLITE_MODULE_FILES) @@ -449,11 +459,15 @@ DAS_AOT_LIB("${AOT_DECS_MODULE_FILES}" DECS_MODULES_AOT_GENERATED_SRC test_aot_d add_custom_target(test_aot_live_host) SET(LIVE_HOST_AOT_GENERATED_SRC) -DAS_AOT("${AOT_LIVE_HOST_FILES}" LIVE_HOST_AOT_GENERATED_SRC test_aot_live_host daslang) +IF(NOT DAS_HV_DISABLED) + DAS_AOT("${AOT_LIVE_HOST_FILES}" LIVE_HOST_AOT_GENERATED_SRC test_aot_live_host daslang) +ENDIF() add_custom_target(test_aot_live_host_modules) SET(LIVE_HOST_MODULES_AOT_GENERATED_SRC) -DAS_AOT_LIB("${AOT_LIVE_HOST_MODULE_FILES}" LIVE_HOST_MODULES_AOT_GENERATED_SRC test_aot_live_host_modules daslang) +IF(NOT DAS_HV_DISABLED) + DAS_AOT_LIB("${AOT_LIVE_HOST_MODULE_FILES}" LIVE_HOST_MODULES_AOT_GENERATED_SRC test_aot_live_host_modules daslang) +ENDIF() add_custom_target(test_aot_strudel_modules) SET(STRUDEL_MODULES_AOT_GENERATED_SRC) diff --git a/tests/aot/test_lambda_copy.das b/tests/aot/test_lambda_copy.das new file mode 100644 index 0000000000..cc87fa20c6 --- /dev/null +++ b/tests/aot/test_lambda_copy.das @@ -0,0 +1,79 @@ +options gen2 +require dastest/testing_boost public + +// AOT mirror of tests/language/lambda_copy.das. +// Exercises the same lambda copy + unsafe-delete coverage through the AOT codegen path. + +typedef Counter = lambda<() : int> + +def invoke_twice(g : Counter) : int { + return invoke(g) + invoke(g) +} + +[test] +def test_aot_lambda_copy_aliases_capture(t : T?) { + t |> run("aot copy assignment shares capture frame") @(t : T?) { + var counter = 0 // nolint:LINT003 -- captured by lambda, mutated inside body + var inc <- @() : int { + counter += 1 + return counter + } + let inc2 = inc + t |> equal(invoke(inc), 1) + t |> equal(invoke(inc2), 2) + t |> equal(invoke(inc), 3) + unsafe { delete inc; } + } +} + +[test] +def test_aot_lambda_pass_by_value(t : T?) { + t |> run("aot lambda passes by value (one capture frame, two refs)") @(t : T?) { + var lam <- @() : int { + var x = 0 + x += 1 + return x + } + let sum = invoke_twice(lam) + t |> equal(sum, 2) + unsafe { delete lam; } + } +} + +struct Holder { + action : Counter +} + +[test] +def test_aot_lambda_struct_field_copy(t : T?) { + t |> run("aot struct copy aliases lambda field") @(t : T?) { + var s1 = Holder(action <- @() : int { + var n = 0 + n += 1 + return n + }) + let s2 = s1 + t |> equal(invoke(s1.action), 1) + t |> equal(invoke(s2.action), 1) + unsafe { delete s1.action; } + } +} + +[test] +def test_aot_lambda_implicit_capture(t : T?) { + t |> run("aot inner lambda captured implicitly inside another lambda") @(t : T?) { + var inner <- @() : int { + var n = 0 + n += 1 + return n + } + var outer <- @() : int { + return invoke(inner) + invoke(inner) + } + t |> equal(invoke(outer), 2) + unsafe { + delete inner + delete outer + } + } +} diff --git a/tests/aot/test_lambdas.das b/tests/aot/test_lambdas.das index 5a856b0bdd..365ba7237a 100644 --- a/tests/aot/test_lambdas.das +++ b/tests/aot/test_lambdas.das @@ -9,7 +9,7 @@ def test_basic_lambda(t : T?) { } t |> equal(invoke(lam, 5), 10) t |> equal(invoke(lam, -3), -6) - delete lam + unsafe { delete lam; } } } @@ -21,12 +21,13 @@ def test_lambda_capture(t : T?) { return x * factor } t |> equal(invoke(lam, 5), 15) - delete lam + unsafe { delete lam; } } } def apply_transform(values : array; transform : function<(x : int) : int>) : array { var result : array + result |> reserve(length(values)) for (v in values) { result |> push(invoke(transform, v)) } @@ -40,8 +41,8 @@ def fn_double(x : int) : int { [test] def test_higher_order(t : T?) { t |> run("apply function pointer to array") @(t : T?) { - var arr <- [1, 2, 3, 4, 5] - var result <- apply_transform(arr, @@fn_double) + let arr <- [1, 2, 3, 4, 5] + let result <- apply_transform(arr, @@fn_double) t |> equal(length(result), 5) t |> equal(result[0], 2) t |> equal(result[1], 4) diff --git a/tests/jit_tests/invoke.das b/tests/jit_tests/invoke.das index 2348877c45..db419f67f8 100644 --- a/tests/jit_tests/invoke.das +++ b/tests/jit_tests/invoke.das @@ -35,7 +35,7 @@ def test_invoke_lambda(lmb : lambda<(a : int; b : float) : int>) { def test_invoke_and_delete_lambda(var lmb : lambda<(a : int; b : float) : int>) { let res = invoke(lmb, 1, 2.0) - delete lmb + unsafe { delete lmb; } return res } diff --git a/tests/language/lambda_capture.das b/tests/language/lambda_capture.das index ddcb8b7402..3d73dec956 100644 --- a/tests/language/lambda_capture.das +++ b/tests/language/lambda_capture.das @@ -16,7 +16,7 @@ def test_lambda_capture_const(var t : T?) { t |> equal(lenF, 0) } invoke(l) - delete l + unsafe { delete l; } } diff --git a/tests/language/lambda_capture_modes.das b/tests/language/lambda_capture_modes.das index 77fcc8b3ca..0a54304f73 100644 --- a/tests/language/lambda_capture_modes.das +++ b/tests/language/lambda_capture_modes.das @@ -19,7 +19,7 @@ def test_lambda_capture_modes(t : T?) { var a2 <- array(1, 2) var a3 <- array(1, 2) unsafe { - var lam <- @ capture(& a1, <- a2, := a3) { + let lam <- @ capture(& a1, <- a2, := a3) { push(a1, 1) push(a2, 1) push(a3, 1) @@ -34,7 +34,7 @@ def test_lambda_capture_modes(t : T?) { g_lcm_counter = 0 var pA = new LcmFoo var pB = new LcmFoo - var C = LcmFoo() + var C = LcmFoo() // nolint:LINT003 -- captured by clone; const-let changes capture semantics var lam <- @ capture(<- pB, := pC) { assert(pA.dummy == 0) assert(pB.dummy == 0) @@ -42,7 +42,7 @@ def test_lambda_capture_modes(t : T?) { } t |> equal(pB == null, true) invoke(lam) - delete lam + unsafe { delete lam; } t |> equal(g_lcm_counter, 1) } } diff --git a/tests/language/lambda_copy.das b/tests/language/lambda_copy.das new file mode 100644 index 0000000000..3f9ac2e4f2 --- /dev/null +++ b/tests/language/lambda_copy.das @@ -0,0 +1,85 @@ +options gen2 +require dastest/testing_boost public + +// Coverage for lambda being a copyable, pointer-shaped value. +// canCopy(lambda) is now true: '=' aliases the capture frame. +// isSafeToDelete(lambda) is now false: 'delete lam' requires unsafe. + +typedef Counter = lambda<() : int> + +def invoke_twice(g : Counter) : int { + return invoke(g) + invoke(g) +} + +[test] +def test_lambda_copy_aliases_capture(t : T?) { + t |> run("copy assignment shares capture frame") @(t : T?) { + var counter = 0 // nolint:LINT003 -- captured by lambda, mutated inside body + var inc <- @() : int { + counter += 1 // mutates the lambda's own captured counter + return counter + } + let inc2 = inc // copy — aliases the same capture frame + t |> equal(invoke(inc), 1) + t |> equal(invoke(inc2), 2) // sees inc's mutation through the alias + t |> equal(invoke(inc), 3) // and vice versa + unsafe { delete inc; } + } +} + +[test] +def test_lambda_pass_by_value(t : T?) { + t |> run("lambda passes by value (one capture frame, two refs)") @(t : T?) { + var lam <- @() : int { + var x = 0 + x += 1 + return x + } + // pass-by-value: the parameter is a separate variable holding the same pointer + let sum = invoke_twice(lam) + t |> equal(sum, 2) // each invoke returns 1 (independent x), sum = 2 + unsafe { delete lam; } + } +} + +struct Holder { + action : Counter +} + +[test] +def test_lambda_struct_field_copy(t : T?) { + t |> run("struct copy aliases lambda field") @(t : T?) { + var s1 = Holder(action <- @() : int { + var n = 0 + n += 1 + return n + }) + let s2 = s1 // copies the struct; lambda field is aliased + t |> equal(invoke(s1.action), 1) + t |> equal(invoke(s2.action), 1) // independent n per invoke + unsafe { delete s1.action; } + } +} + +[test] +def test_lambda_implicit_capture(t : T?) { + t |> run("inner lambda captured implicitly inside another lambda") @(t : T?) { + // Pre-flip this required an explicit `@capture(<- inner)` annotation + // (lambda was canCopy=false, so default `capture_any` forced move and + // implicit move-capture is an `unsafe` operation). Post-flip the + // lambda value is a pointer-alias, copy-capture is fine implicitly. + var inner <- @() : int { + var n = 0 + n += 1 + return n + } + var outer <- @() : int { + return invoke(inner) + invoke(inner) + } + t |> equal(invoke(outer), 2) + unsafe { + delete inner + delete outer + } + } +} diff --git a/tutorials/language/14_lambdas.das b/tutorials/language/14_lambdas.das index 0f5180b69f..1bc6ff25a5 100644 --- a/tutorials/language/14_lambdas.das +++ b/tutorials/language/14_lambdas.das @@ -46,9 +46,13 @@ def main { print("invoke: {invoke(doubler, 3)}\n") // === Storing lambdas === - // Lambdas MUST be moved with <- (they cannot be copied). - // var copy = doubler // ERROR: lambdas can't be copied - // var moved <- doubler // OK but doubler becomes null + // A lambda value is a fat pointer to a capture frame. + // '=' copies the pointer (both bindings now alias the same capture), + // '<-' moves (source becomes null), '@ ...' creates a fresh capture. + // var alias = doubler // OK — alias shares doubler's capture frame + // var moved <- doubler // OK and doubler becomes null + // 'delete' on a lambda requires 'unsafe' since copies may alias it: + // unsafe { delete doubler; } // === Storing lambdas in arrays === // Lambdas can be stored in arrays using emplace (which moves them in). @@ -66,7 +70,9 @@ def main { invoke(cb) } print("stored {length(callbacks)} lambdas in array\n") - delete callbacks + // 'array>' inherits the lambda's unsafe-delete rule, since + // finalizing the array would delete every element lambda. + unsafe { delete callbacks; } // === Passing lambdas to functions === // Functions that take lambda<> accept only @ lambdas (not @@ functions). From e7022e3d0d53846d2d6bef27e0a36474647e0a99 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Tue, 19 May 2026 03:41:27 -0700 Subject: [PATCH 2/8] linq_fold: take_while/skip_while predicate-driven ranges (PR-G) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the `_while` row in the test_linq_partition.das coverage checklist. take_while(pred) breaks the loop on the first false-pred element; skip_while(pred) flips a one-way `skipping` flag on the first false then emits everything after. How it lands: 1. wrap_with_skip_take → wrap_with_ranges, append_skip_take_prelude → append_ranges_prelude. Both helpers gain skipWhileCond / takeWhileCond / skippingName args. All 4 lane builders thread the new state through their signatures — predicate-driven ranges are now first-class alongside count-driven. 2. Per-match prefix order in the loop body: (1) take-guard, (2) skip counter, (3) skip_while flip (NEW), (4) take_while break (NEW), (5) takeCount++, body. takeCount++ moved below the predicate gates so skip_while-skipped and take_while-rejected elements don't eat the take(N) budget. Pure skip/take chains are unchanged (the new prefixes are no-ops when both predicates are null). 3. classify_terminator extension: take_while and skip_while join where_/ select/take/skip in the ARRAY-trailing arm so `_take_while(p).to_array()` splices into the array lane instead of cascading. 4. Canonical chain order rejected by plan_loop_or_count: [where_/select]* → skip? → skip_while? → take_while? → take? → terminator. Each later op rejects predecessors that violate this order; out-of-order chains cascade to tier 2 (correct, just no splice). Bail cases (cascade to tier 2): - `_select(...)._take_while(...)` or `_select(...)._skip_while(...)` — predicate currently peels with itName; lifting to chained-bind names is a follow-up that requires moving select binds above the predicate gates. - Multiple take_while or multiple skip_while in one chain. - skip / take appearing AFTER skip_while / take_while. Headline (100K rows, INTERP): | Benchmark | m1 sql | m3 linq | m3f this PR | Win | |--- |-------:|--------:|------------:|----:| | take_while_match | 7 | 23 | **2** | 11.5× over m3 / 3.5× over m1 SQL | | skip_while_match | 3 | 20 | **5** | 4× over m3 (sqlite COUNT-WHERE index-scan still 1.67× faster) | THRESHOLD = 50000 against n = 100000, so take_while breaks halfway and skip_while flips halfway. take_while_match is the standout: splice exits the for-loop on the first false-pred element (~50k of 100k source rows), while m3 still pays for the full take_while_impl array allocation + the subsequent count length read. Test plan: - Parity tests (tests/linq/test_linq_fold.das): 24 new subtests across 2 [test] functions covering bare/where/select/skip-take/empty/all-true/ all-false/cascade-bails for both operators and each lane. - AST-shape tests (tests/linq/test_linq_fold_ast.das): 8 new tests + 9 target_* helpers covering splice + cascade fingerprints. - Interpreter: 369/369 fold + 133/133 ast + wider tests/linq/*.das sweep (15 files, all green). - AOT (test_aot -use-aot): 369/369 fold + 133/133 ast green. - Lint + format: clean across all touched files. Co-Authored-By: Claude Opus 4.7 (1M context) --- benchmarks/sql/LINQ.md | 61 +++++++++- benchmarks/sql/skip_while_match.das | 62 ++++++++++ benchmarks/sql/take_while_match.das | 61 ++++++++++ daslib/linq_fold.das | 124 ++++++++++++++------ tests/linq/test_linq_fold.das | 150 +++++++++++++++++++++++- tests/linq/test_linq_fold_ast.das | 174 ++++++++++++++++++++++++++++ 6 files changed, 592 insertions(+), 40 deletions(-) create mode 100644 benchmarks/sql/skip_while_match.das create mode 100644 benchmarks/sql/take_while_match.das diff --git a/benchmarks/sql/LINQ.md b/benchmarks/sql/LINQ.md index 2d9170dc32..4879f3eecd 100644 --- a/benchmarks/sql/LINQ.md +++ b/benchmarks/sql/LINQ.md @@ -673,6 +673,65 @@ Closes the `test_linq_element.das` (last / last_or_default / single / single_or_ - Aggregate with non-peelable block body cascades to tier 2 (correct but slower) — a `return $b(stmts)` pattern recognizer would let multi-statement blocks splice too. - Skip-family (`skip_last` / `take_last`) — these need buffer state similar to PR-C's reverse_take; separate follow-up. +## Phase 3+ predicate-driven ranges: take_while / skip_while (PR-G) + +Closes the `_while` row in the `test_linq_partition.das` coverage checklist. `take_while(pred)` breaks the loop on the first element where `pred` returns false; `skip_while(pred)` flips a one-way `skipping` flag on the first false, emitting that element and everything after. Both fit alongside the existing skip/take counters in the per-element wrap. + +**How it lands:** + +1. **Helper rename + unification.** `wrap_with_skip_take` → `wrap_with_ranges`, `append_skip_take_prelude` → `append_ranges_prelude`. The new helpers gain three args: `skipWhileCond`, `takeWhileCond`, `skippingName`. All four lane builders (`emit_counter_lane`, `emit_array_lane`, `emit_accumulator_lane`, `emit_early_exit_lane`) thread the new state through their signatures — predicate-driven ranges are now first-class alongside count-driven ranges. + +2. **Per-element prefix ordering** (in `wrap_with_ranges`, final emission order): + ``` + if (takeCount >= takeLimit) break # (1) take-guard + if (skip > 0) { skip--; continue } # (2) skip counter + if (skipping) { # (3) skip_while flip — NEW + if (sw_pred) continue + skipping = false + } + if (!tw_pred) break # (4) take_while break — NEW + takeCount ++ # (5) take-inc — MOVED below (3)(4) + + ``` + takeCount++ moved from "right after skip" to "after all gates" so skip_while-skipped and take_while-rejected elements don't eat the `take(N)` budget. Pure `skip(N).take(M)` chains keep their existing behavior — steps (3)(4) are no-ops when both predicates are null. + +3. **Lane classification extension.** `take_while` / `skip_while` join `where_` / `select` / `take` / `skip` in the ARRAY-trailing arm of `classify_terminator`. This makes `_take_while(p).to_array()` (and the bare `_take_while_to_array` variant) splice into the array lane instead of cascading. + +4. **Canonical chain order** (intermediate ops walked by `plan_loop_or_count`): + ``` + [where_/select]* → skip? → skip_while? → take_while? → take? → terminator + ``` + Each op type rejects any successor that violates this order — `seenSkipWhile` blocks subsequent skip/where/select; `seenTakeWhile` blocks subsequent take_while/skip_while/where/select; etc. Out-of-order chains cascade to tier 2 (still correct, just no splice). + +**Bail cases (cascade to tier 2):** +- `_select(...) ._take_while(...)` or `_select(...) ._skip_while(...)` — predicate currently peels with `itName` (source element). Lifting to chained-bind names is a small follow-up that requires moving select binds above the predicate gates in the wrap. +- Multiple `take_while` or multiple `skip_while` in one chain. +- `skip` / `take` appearing AFTER `skip_while` / `take_while` (the takeCount budget would be ordering-sensitive). +- Any chain that previously cascaded (buffer-required upstream, unrecognized ops) — unchanged. + +**Edge cases worth noting:** +- `take_while` with all-true predicate is a no-op gate — the loop iterates to completion just like a chain without `take_while`. Verified by parity test `take_while pred-always-true emits whole source`. +- `take_while` with first-element-false predicate breaks immediately — `count` returns 0, `first_or_default(d)` returns `d` (the splice's bind variable was never written). Pinned by `take_while first_or_default with no survivor`. +- `skip_while` with all-true predicate skips the entire source — flag never flips, so `count` returns 0. Pinned by `skip_while pred-always-true skips whole source`. +- `skip_while` with first-element-false predicate emits everything — flag flips on element 0, no element is gated. Equivalent to no `skip_while` at all. +- Chained `skip(N).skip_while(p)`: skip absorbs first N source elements, then skip_while gates the next prefix. Reversed order (`skip_while(p).skip(N)`) cascades to tier 2 because the takeCount-style accumulator can't represent "skip N of skip_while-survivors" without an extra counter (deferred). + +### Headline (PR-G) + +| Benchmark | Shape | m1 (sql) | m3 (linq) | m3f (this PR) | Win | +|---|---|---:|---:|---:|---:| +| take_while_match | `each(arr) → take_while(id < THRESHOLD) → count` | 7 | 23 | **2** | **11.5× over m3 / 3.5× over m1 SQL** | +| skip_while_match | `each(arr) → skip_while(id < THRESHOLD) → count` | 3 | 20 | **5** | **4× over m3** (m1 SQL still 1.67× faster — index-scan COUNT* is hard to beat) | + +THRESHOLD = 50000 against n = 100000, so take_while breaks halfway and skip_while flips halfway. + +`take_while_match` is the standout: the splice exits the for-loop on the first false-pred element (~50k of 100k source rows), while m3 still pays for the full `take_while_impl` array allocation + the subsequent `count` length read. `skip_while_match` shows the buffer-elision win — splice fuses the gate into the counter lane (no allocation), while m3 materializes the survivor tail as an `array` then `count`s its length. SQL pulls ahead on `skip_while_match` because SQLite's planner uses the primary-key index to skip the head without touching rows, an optimization the in-process splice can't match. + +### Deferred for follow-ups (PR-G) + +- `select(proj)._take_while(p)` / `select(proj)._skip_while(p)` — lift predicate peeling to chained-bind names + move select-side binds above the predicate gates in the wrap. +- `skip_while(p).skip(N)` (or `.take(N)`) reverse order — requires an extra counter for "N of skip_while-survivors". + ## Operator-coverage checklist (parity tests) The benchmarks above cover the most common shapes. The end-game target is one benchmark per `_fold`-applicable scenario in the broader `tests/linq/` operator suite. Tracking the long-tail coverage below; PRs that add splice support for new operators should add a benchmark here if not already present. @@ -686,7 +745,7 @@ The benchmarks above cover the most common shapes. The end-game target is one be | `test_linq_sorting.das` | order/order_by/reverse | sort_first, sort_take, select_where_order_take, reverse_take | ✅ ascending + `order_descending` (Phase 3); ✅ `reverse` (Phase 3+); ✅ `reverse \|> take(N)` backward index loop on array sources (PR-C — closes the prior regression) | | `test_linq_group_by.das` | group_by/group_by_lazy/having | groupby_count, groupby_sum, groupby_min, groupby_max, groupby_first, groupby_multi_reducer, groupby_where_count, groupby_where_sum, groupby_select_sum, groupby_having_count, groupby_having_hidden_sum, groupby_average | ✅ count/long_count/sum + inner-select-sum (PR-A1); ✅ min/max/first + inner-select-{min,max,first} + multi-reducer (PR-A2); ✅ upstream where_/select* fusion (PR-B); ✅ `having_` with matching slot (PR-D); ✅ `average` + inner-select-average + multi-reducer-with-average (PR-A2 follow-up); ✅ hidden-slot reducer-in-having on named-tuple select shape (PR-E) | | `test_linq_join.das` | join/left_join/right_join/full_outer/cross | join_count | ✅ inner; outer joins + cross ⏳ | -| `test_linq_partition.das` | take/skip/take_while/skip_while/chunk | take_count, skip_take, take_sum_aggregate, take_count_filtered | ✅ take/skip in splice lanes; `_while` + `chunk` ⏳ | +| `test_linq_partition.das` | take/skip/take_while/skip_while/chunk | take_count, skip_take, take_sum_aggregate, take_count_filtered, take_while_match, skip_while_match | ✅ take/skip in splice lanes; ✅ `take_while` / `skip_while` (PR-G); `chunk` ⏳ | | `test_linq_set.das` | distinct/union/except/intersect/unique | distinct_count, distinct_take | ✅ distinct + distinct_by (streaming dedup, this PR); union/except/intersect/unique ⏳ | | `test_linq_element.das` | first/last/single/element_at + _or_default | first_match, first_or_default_match, last_match, single_match, element_at_match | ✅ first/first_or_default; ✅ last/last_or_default/single/single_or_default/element_at/element_at_or_default (PR-F terminal-walk lane) | | `test_linq_concat.das` | concat/prepend/append | — | ⏳ | diff --git a/benchmarks/sql/skip_while_match.das b/benchmarks/sql/skip_while_match.das new file mode 100644 index 0000000000..32ebd1bd30 --- /dev/null +++ b/benchmarks/sql/skip_while_match.das @@ -0,0 +1,62 @@ +options gen2 +options persistent_heap + +require _common public + +// Fixture car ids run 1..n in source order, so `id < THRESHOLD` is true for the leading +// THRESHOLD-1 elements then false from THRESHOLD on. skip_while flips on the first false +// and emits everything that follows; both reference and splice walk the entire source. +// The splice gain is buffer elision — reference `skip_while_impl` materializes an +// `array` of survivors before count; splice fuses into the counter lane (in-loop ++). +// +// SQL: `SELECT COUNT(*) FROM Cars WHERE id >= THRESHOLD` — index scan + aggregate. +// m3 materializes the survivor tail as `array` then `|> count` reads its length. +// m3f splice: single pass, no allocation, in-loop counter `++`. + +let THRESHOLD = 50000 + +def run_m1(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m1_sql/{n}", n) { + let total = _sql(db |> select_from(type) |> _where(_.id >= THRESHOLD) |> count()) + if (total == 0) { + b->failNow() + } + } + } +} + +def run_m3(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3_array/{n}", n) { + let total = arr |> _skip_while(_.id < THRESHOLD) |> count() + if (total == 0) { + b->failNow() + } + } +} +def run_m3f(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3f_array_fold/{n}", n) { + let total = _fold(each(arr)._skip_while(_.id < THRESHOLD).count()) + if (total == 0) { + b->failNow() + } + } +} + +[benchmark] +def skip_while_match_m1(b : B?) { + run_m1(b, 100000) +} + +[benchmark] +def skip_while_match_m3(b : B?) { + run_m3(b, 100000) +} + +[benchmark] +def skip_while_match_m3f(b : B?) { + run_m3f(b, 100000) +} diff --git a/benchmarks/sql/take_while_match.das b/benchmarks/sql/take_while_match.das new file mode 100644 index 0000000000..b6e7da9925 --- /dev/null +++ b/benchmarks/sql/take_while_match.das @@ -0,0 +1,61 @@ +options gen2 +options persistent_heap + +require _common public + +// Fixture car ids run 1..n in source order, so `id < THRESHOLD` is true for the leading +// THRESHOLD-1 elements then false from THRESHOLD on. take_while breaks on the first false, +// so splice visits only the prefix. +// +// SQL: `SELECT COUNT(*) FROM Cars WHERE id < THRESHOLD` — query planner uses pk index. +// m3 materializes the prefix as `array` then `|> count` reads its length. +// m3f splice walks the prefix, breaks on the first false, counts via in-loop accumulator +// (no buffer allocation). + +let THRESHOLD = 50000 + +def run_m1(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m1_sql/{n}", n) { + let total = _sql(db |> select_from(type) |> _where(_.id < THRESHOLD) |> count()) + if (total == 0) { + b->failNow() + } + } + } +} + +def run_m3(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3_array/{n}", n) { + let total = arr |> _take_while(_.id < THRESHOLD) |> count() + if (total == 0) { + b->failNow() + } + } +} +def run_m3f(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3f_array_fold/{n}", n) { + let total = _fold(each(arr)._take_while(_.id < THRESHOLD).count()) + if (total == 0) { + b->failNow() + } + } +} + +[benchmark] +def take_while_match_m1(b : B?) { + run_m1(b, 100000) +} + +[benchmark] +def take_while_match_m3(b : B?) { + run_m3(b, 100000) +} + +[benchmark] +def take_while_match_m3f(b : B?) { + run_m3f(b, 100000) +} diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das index c94c3516ec..3378600581 100644 --- a/daslib/linq_fold.das +++ b/daslib/linq_fold.das @@ -377,8 +377,9 @@ enum private LinqLane { [macro_function] def private classify_terminator(name : string) : LinqLane { if (name == "count") return LinqLane.COUNTER - // take/skip trailing (after to_array strip) → ARRAY lane with implicit materialization. - if (name == "where_" || name == "select" || name == "take" || name == "skip") return LinqLane.ARRAY + // take/skip/take_while/skip_while trailing (after to_array strip) → ARRAY lane with implicit materialization. + if (name == "where_" || name == "select" || name == "take" || name == "skip" + || name == "take_while" || name == "skip_while") return LinqLane.ARRAY if (name == "sum" || name == "min" || name == "max" || name == "average" || name == "long_count") return LinqLane.ACCUMULATOR // EARLY_EXIT is also the dispatch lane for full-walk single-return terminators (last/single/element_at/aggregate) — same emit_early_exit_lane shape, different per-op state. if (name == "first" || name == "first_or_default" || name == "any" || name == "all" || name == "contains" @@ -494,9 +495,9 @@ def private prepend_precond(var body : Expression?; var preCondStmts : array; var skipExpr : Expression?; var takeExpr : Expression?; - skipName, takeCountName : string) { - // Loop-level counters for skip/take. Both live in the outer invoke block, alongside any +def private append_ranges_prelude(var preludeStmts : array; var skipExpr, takeExpr, skipWhileCond : Expression?; + skipName, takeCountName, skippingName : string) { + // Loop-level state for skip/take counters + skip_while flag (one-way, flips on first false-pred). if (skipExpr != null) { var skipInit = clone_expression(skipExpr) preludeStmts |> push <| qmacro_expr() { @@ -508,15 +509,20 @@ def private append_skip_take_prelude(var preludeStmts : array; var var $i(takeCountName) = 0 } } + if (skipWhileCond != null) { + preludeStmts |> push <| qmacro_expr() { + var $i(skippingName) = true + } + } } [macro_function] -def private wrap_with_skip_take(var stmts : array; var skipExpr : Expression?; var takeExpr : Expression?; - skipName, takeCountName : string) { - // Per-match-block wrapping. Order at the head of the block is: - if (skipExpr == null && takeExpr == null) return +def private wrap_with_ranges(var stmts : array; var skipExpr, takeExpr, skipWhileCond, takeWhileCond : Expression?; + skipName, takeCountName, skippingName : string) { + // Per-match prefix order: take-guard → skip counter → skip_while flip → take_while break → takeCount++ → body. takeCount++ sits AFTER skip_while/take_while so gated-out elements don't eat the take(N) budget. + if (skipExpr == null && takeExpr == null && skipWhileCond == null && takeWhileCond == null) return var prefixed : array - prefixed |> reserve(length(stmts) + 3) + prefixed |> reserve(length(stmts) + 5) if (takeExpr != null) { var takeLimit = clone_expression(takeExpr) // `>=` (not `==`) so non-positive N short-circuits on the first iteration — @@ -534,6 +540,25 @@ def private wrap_with_skip_take(var stmts : array; var skipExpr : E } } } + if (skipWhileCond != null) { + var swPred = clone_expression(skipWhileCond) + prefixed |> push <| qmacro_expr() { + if ($i(skippingName)) { + if ($e(swPred)) { + continue + } + $i(skippingName) = false + } + } + } + if (takeWhileCond != null) { + var twPred = clone_expression(takeWhileCond) + prefixed |> push <| qmacro_expr() { + if (!($e(twPred))) { + break + } + } + } if (takeExpr != null) { prefixed |> push <| qmacro_expr() { $i(takeCountName) ++ @@ -555,15 +580,15 @@ def private min_max_compare(workhorse : bool; opName : string; valName, accName } [macro_function] -def private emit_counter_lane(var top : Expression?; srcName, accName, itName, skipName, takeCountName : string; - var skipExpr : Expression?; var takeExpr : Expression?; +def private emit_counter_lane(var top : Expression?; srcName, accName, itName, skipName, takeCountName, skippingName : string; + var skipExpr, takeExpr, skipWhileCond : Expression?; var loopBody : Expression?; at : LineInfo) : Expression? { // Counter lane: `[skip/take init]; var acc = 0; for (it in src) { $loopBody }; return acc` var topExpr = clone_expression(top) topExpr.genFlags.alwaysSafe = true var srcParamType = invoke_src_param_type(top) var bodyStmts : array - append_skip_take_prelude(bodyStmts, skipExpr, takeExpr, skipName, takeCountName) + append_ranges_prelude(bodyStmts, skipExpr, takeExpr, skipWhileCond, skipName, takeCountName, skippingName) bodyStmts |> push <| qmacro_expr() { var $i(accName) = 0 } @@ -583,8 +608,8 @@ def private emit_counter_lane(var top : Expression?; srcName, accName, itName, s [macro_function] def private emit_array_lane(var top : Expression?; var expr : Expression?; var loopBody : Expression?; var elementType : TypeDeclPtr; - srcName, accName, itName, skipName, takeCountName : string; - var skipExpr : Expression?; var takeExpr : Expression?; + srcName, accName, itName, skipName, takeCountName, skippingName : string; + var skipExpr, takeExpr, skipWhileCond : Expression?; at : LineInfo) : Expression? { // Array lane: `[skip/take init]; var acc : array; [reserve]; for (it in src) { $loopBody }; return <- acc` let isIter = expr._type.isIterator @@ -593,7 +618,7 @@ def private emit_array_lane(var top : Expression?; var expr : Expression?; var l topExpr.genFlags.alwaysSafe = true var srcParamType = invoke_src_param_type(top) var bodyStmts : array - append_skip_take_prelude(bodyStmts, skipExpr, takeExpr, skipName, takeCountName) + append_ranges_prelude(bodyStmts, skipExpr, takeExpr, skipWhileCond, skipName, takeCountName, skippingName) bodyStmts |> push <| qmacro_expr() { var $i(accName) : array<$t(elementType)> } @@ -640,8 +665,8 @@ def private emit_accumulator_lane( var intermediateBinds : array; var preCondStmts : array; var elementType : TypeDeclPtr; - srcName, accName, itName, skipName, takeCountName : string; - var skipExpr : Expression?; var takeExpr : Expression?; + srcName, accName, itName, skipName, takeCountName, skippingName : string; + var skipExpr, takeExpr, skipWhileCond, takeWhileCond : Expression?; at : LineInfo ) : Expression? { // Ring 1 single-pass accumulator lane: sum / min / max / average / long_count. @@ -733,12 +758,12 @@ def private emit_accumulator_lane( return null } prepend_binds(perMatchStmts, intermediateBinds) - wrap_with_skip_take(perMatchStmts, skipExpr, takeExpr, skipName, takeCountName) + wrap_with_ranges(perMatchStmts, skipExpr, takeExpr, skipWhileCond, takeWhileCond, skipName, takeCountName, skippingName) var loopBody = prepend_precond(wrap_with_condition(stmts_to_expr(perMatchStmts), whereCond), preCondStmts) // Collect all body statements into one list so they share scope when spliced via $b. var bodyStmts : array bodyStmts |> reserve(length(preludeStmts) + 4) - append_skip_take_prelude(bodyStmts, skipExpr, takeExpr, skipName, takeCountName) + append_ranges_prelude(bodyStmts, skipExpr, takeExpr, skipWhileCond, skipName, takeCountName, skippingName) for (s in preludeStmts) { bodyStmts |> push(s) } @@ -780,8 +805,8 @@ def private emit_early_exit_lane( var preCondStmts : array; var elementType : TypeDeclPtr; terminatorCall : ExprCall?; - srcName, itName, skipName, takeCountName : string; - var skipExpr : Expression?; var takeExpr : Expression?; + srcName, itName, skipName, takeCountName, skippingName : string; + var skipExpr, takeExpr, skipWhileCond, takeWhileCond : Expression?; at : LineInfo ) : Expression? { // Ring 2 early-exit lane: first / first_or_default / any / all / contains. @@ -1089,12 +1114,12 @@ def private emit_early_exit_lane( return null } prepend_binds(perMatchStmts, intermediateBinds) - wrap_with_skip_take(perMatchStmts, skipExpr, takeExpr, skipName, takeCountName) + wrap_with_ranges(perMatchStmts, skipExpr, takeExpr, skipWhileCond, takeWhileCond, skipName, takeCountName, skippingName) var loopBody = prepend_precond(wrap_with_condition(stmts_to_expr(perMatchStmts), whereCond), preCondStmts) // Single-$b body so all stmts (skip/take counters + prelude + for + tail) share scope var bodyStmts : array bodyStmts |> reserve(length(preludeStmts) + length(tailStmts) + 3) - append_skip_take_prelude(bodyStmts, skipExpr, takeExpr, skipName, takeCountName) + append_ranges_prelude(bodyStmts, skipExpr, takeExpr, skipWhileCond, skipName, takeCountName, skippingName) for (s in preludeStmts) { bodyStmts |> push(s) } @@ -1374,6 +1399,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { let accName = "`acc`{at.line}`{at.column}" let skipName = "`skip`{at.line}`{at.column}" let takeCountName = "`tc`{at.line}`{at.column}" + let skippingName = "`skipping`{at.line}`{at.column}" var whereCond : Expression? var projection : Expression? var intermediateBinds : array @@ -1381,8 +1407,13 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { var preCondStmts : array var skipExpr : Expression? var takeExpr : Expression? + // skip_while / take_while: predicate-driven ranges. Both peel with itName (source elem); seenSelect bails to tier 2. + var skipWhileCond : Expression? + var takeWhileCond : Expression? var seenSelect = false var seenSkip = false + var seenSkipWhile = false + var seenTakeWhile = false var seenTake = false var allProjectionsPure = true var elementType = clone_type(top._type.firstType) @@ -1391,8 +1422,8 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { var cll & = unsafe(calls[i]) let opName = cll._1.name if (opName == "where_") { - // skip/take-after-where is rejected — canonical chain order is - if (seenSkip || seenTake) return null + // skip/take/skip_while/take_while-after-where is rejected — canonical chain order is + if (seenSkip || seenSkipWhile || seenTakeWhile || seenTake) return null var predicate : Expression? if (seenSelect) { // Phase 3d / single-eval: where-after-select. Bind the current projection @@ -1419,7 +1450,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { whereCond = qmacro($e(whereCond) && $e(predicate)) } } elif (opName == "select") { - if (seenSkip || seenTake) return null + if (seenSkip || seenSkipWhile || seenTakeWhile || seenTake) return null // Chained selects: bind the previous projection to a fresh local now so the next if (projection != null) { if (has_sideeffects(projection)) { @@ -1435,12 +1466,28 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { elementType = clone_type(cll._0._type.firstType) seenSelect = true } elif (opName == "skip") { - // Canonical chain: at most one skip, before any take. Multiple skips / skip-after- - if (seenSkip || seenTake) return null + // Canonical chain: at most one skip, before any skip_while/take_while/take. + if (seenSkip || seenSkipWhile || seenTakeWhile || seenTake) return null var skipArg = cll._0.arguments[1] if (skipArg == null || skipArg._type == null || skipArg._type.baseType != Type.tInt) return null skipExpr = clone_expression(skipArg) seenSkip = true + } elif (opName == "skip_while") { + // pred uses itName; seenSelect bails (chained-bind peel is a follow-up). Canonical: after skip, before take_while/take. + if (seenSelect || seenSkipWhile || seenTakeWhile || seenTake) return null + var swArg = cll._0.arguments[1] + if (swArg == null) return null + skipWhileCond = fold_linq_cond(swArg, itName) + if (skipWhileCond == null) return null + seenSkipWhile = true + } elif (opName == "take_while") { + // take_while pred sees source element (itName). Same select-cascade rule as skip_while. + if (seenSelect || seenTakeWhile || seenTake) return null + var twArg = cll._0.arguments[1] + if (twArg == null) return null + takeWhileCond = fold_linq_cond(twArg, itName) + if (takeWhileCond == null) return null + seenTakeWhile = true } elif (opName == "take") { if (seenTake) return null var takeArg = cll._0.arguments[1] @@ -1457,7 +1504,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { if (projection != null && has_sideeffects(projection)) { allProjectionsPure = false } - let noLimits = skipExpr == null && takeExpr == null + let noLimits = skipExpr == null && takeExpr == null && skipWhileCond == null && takeWhileCond == null // Count-shaped shortcut: when terminator is `count` (→ int) or `long_count` (→ int64), let isCountShaped = (lane == LinqLane.COUNTER || (lane == LinqLane.ACCUMULATOR && lastName == "long_count")) @@ -1468,7 +1515,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { if (lane == LinqLane.ACCUMULATOR) return emit_accumulator_lane(lastName, top, projection, whereCond, intermediateBinds, preCondStmts, elementType, srcName, accName, itName, skipName, takeCountName, - skipExpr, takeExpr, at) + skippingName, skipExpr, takeExpr, skipWhileCond, takeWhileCond, at) // Ring 2: early-exit lane — `any` no-pred + no upstream work + no limits + length-bearing if (lane == LinqLane.EARLY_EXIT) { let terminatorCall = calls.back()._0 @@ -1478,7 +1525,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { return emit_any_empty_shortcut(top, srcName, at) return emit_early_exit_lane(lastName, top, projection, whereCond, intermediateBinds, preCondStmts, elementType, terminatorCall, srcName, itName, skipName, - takeCountName, skipExpr, takeExpr, at) + takeCountName, skippingName, skipExpr, takeExpr, skipWhileCond, takeWhileCond, at) } // Build the per-element loop body for COUNTER / ARRAY. Both lanes follow the same shape: var loopBody : Expression? @@ -1495,7 +1542,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { $i(accName) ++ } prepend_binds(stmts, intermediateBinds) - wrap_with_skip_take(stmts, skipExpr, takeExpr, skipName, takeCountName) + wrap_with_ranges(stmts, skipExpr, takeExpr, skipWhileCond, takeWhileCond, skipName, takeCountName, skippingName) loopBody = prepend_precond(wrap_with_condition(stmts_to_expr(stmts), whereCond), preCondStmts) } else { // Array lane. `push_clone` is the safe append everywhere: for workhorse types it's a @@ -1504,7 +1551,8 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { stmts |> push <| qmacro_expr() { $i(accName) |> push_clone($e(projection)) } - } elif (whereCond != null || skipExpr != null || takeExpr != null) { + } elif (whereCond != null || skipExpr != null || takeExpr != null + || skipWhileCond != null || takeWhileCond != null) { // Identity push: `it` aliases the source element. Reached when chain is bare stmts |> push <| qmacro_expr() { $i(accName) |> push_clone($i(itName)) @@ -1514,15 +1562,15 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? { return null } prepend_binds(stmts, intermediateBinds) - wrap_with_skip_take(stmts, skipExpr, takeExpr, skipName, takeCountName) + wrap_with_ranges(stmts, skipExpr, takeExpr, skipWhileCond, takeWhileCond, skipName, takeCountName, skippingName) loopBody = prepend_precond(wrap_with_condition(stmts_to_expr(stmts), whereCond), preCondStmts) } if (counterLane) { - return emit_counter_lane(top, srcName, accName, itName, skipName, takeCountName, - skipExpr, takeExpr, loopBody, at) + return emit_counter_lane(top, srcName, accName, itName, skipName, takeCountName, skippingName, + skipExpr, takeExpr, skipWhileCond, loopBody, at) } else { return emit_array_lane(top, expr, loopBody, elementType, srcName, accName, itName, - skipName, takeCountName, skipExpr, takeExpr, at) + skipName, takeCountName, skippingName, skipExpr, takeExpr, skipWhileCond, at) } } diff --git a/tests/linq/test_linq_fold.das b/tests/linq/test_linq_fold.das index 410a218dba..07a18b8fcb 100644 --- a/tests/linq/test_linq_fold.das +++ b/tests/linq/test_linq_fold.das @@ -1209,7 +1209,7 @@ def test_aggregate_terminal_walk(t : T?) { let a = _fold(each(arr).aggregate(42, $(acc : int, x : int) => acc + x)) t |> equal(42, a) } - t |> run("aggregate: sum of doubles") @(t : T?) { + t |> run("aggregate: sum with multiplier") @(t : T?) { let arr <- [1, 2, 3, 4, 5] let a = _fold(each(arr).aggregate(0, $(acc : int, x : int) => acc + x * 2)) t |> equal(30, a) @@ -2628,3 +2628,151 @@ def test_top_n_mid_chain_iterator_source(t : T?) { } } } + +[test] +def test_take_while_skip_while_splice_parity(t : T?) { + t |> run("take_while array lane") @(tt : T?) { + let got <- _fold(each([5, 3, 8, 1, 4])._take_while(_ < 8).to_array()) + let expected = [5, 3] + tt |> equal(2, length(got)) + for (v, i in got, 0 .. 2) { + tt |> equal(expected[i], v) + } + } + t |> run("take_while count lane") @(tt : T?) { + let n = _fold(each([5, 3, 8, 1, 4])._take_while(_ < 8).count()) + tt |> equal(2, n) + } + t |> run("take_while sum (accumulator)") @(tt : T?) { + let s = _fold(each([5, 3, 8, 1, 4])._take_while(_ < 8).sum()) + tt |> equal(8, s) + } + t |> run("take_while max (accumulator)") @(tt : T?) { + let m = _fold(each([5, 3, 8, 1, 4])._take_while(_ < 8).max()) + tt |> equal(5, m) + } + t |> run("take_while first (early-exit)") @(tt : T?) { + let f = _fold(each([5, 3, 8, 1, 4])._take_while(_ < 8).first()) + tt |> equal(5, f) + } + t |> run("take_while first_or_default with no survivor") @(tt : T?) { + // First source element fails pred → take_while breaks before any emit → first_or_default(99). + let f = _fold(each([10, 5, 3])._take_while(_ < 8).first_or_default(99)) + tt |> equal(99, f) + } + t |> run("take_while any") @(tt : T?) { + let a = _fold(each([5, 3, 8])._take_while(_ < 8).any()) + tt |> equal(true, a) + let b = _fold(each([10, 5, 3])._take_while(_ < 8).any()) + tt |> equal(false, b) + } + t |> run("take_while pred-always-true emits whole source") @(tt : T?) { + let n = _fold(each([1, 2, 3])._take_while(_ < 100).count()) + tt |> equal(3, n) + } + t |> run("take_while pred-always-false emits nothing") @(tt : T?) { + let n = _fold(each([1, 2, 3])._take_while(_ > 100).count()) + tt |> equal(0, n) + } + t |> run("take_while empty source") @(tt : T?) { + var empty : array + let n = _fold(each(empty)._take_while(_ < 100).count()) + tt |> equal(0, n) + } + t |> run("skip_while array lane") @(tt : T?) { + let got <- _fold(each([5, 3, 8, 1, 4])._skip_while(_ > 3).to_array()) + let expected = [3, 8, 1, 4] + tt |> equal(4, length(got)) + for (v, i in got, 0 .. 4) { + tt |> equal(expected[i], v) + } + } + t |> run("skip_while count lane") @(tt : T?) { + let n = _fold(each([5, 3, 8, 1, 4])._skip_while(_ > 3).count()) + tt |> equal(4, n) + } + t |> run("skip_while min (accumulator)") @(tt : T?) { + let m = _fold(each([5, 3, 8, 1, 4])._skip_while(_ > 3).min()) + tt |> equal(1, m) + } + t |> run("skip_while pred-always-true skips whole source") @(tt : T?) { + let n = _fold(each([1, 2, 3])._skip_while(_ < 100).count()) + tt |> equal(0, n) + } + t |> run("skip_while pred-always-false emits everything") @(tt : T?) { + let n = _fold(each([1, 2, 3])._skip_while(_ > 100).count()) + tt |> equal(3, n) + } + t |> run("skip_while empty source") @(tt : T?) { + var empty : array + let n = _fold(each(empty)._skip_while(_ < 100).count()) + tt |> equal(0, n) + } + t |> run("skip_while + take_while compose") @(tt : T?) { + // Skip leading >3 (skip 5), then take while <8 (emit 3, break on 8). + let got <- _fold(each([5, 3, 8, 1, 4])._skip_while(_ > 3)._take_while(_ < 8).to_array()) + tt |> equal(1, length(got)) + tt |> equal(3, got[0]) + } + t |> run("where + take_while compose") @(tt : T?) { + let got <- _fold(each([5, 3, 8, 1, 4])._where(_ != 3)._take_while(_ < 8).to_array()) + tt |> equal(1, length(got)) + tt |> equal(5, got[0]) + } + t |> run("skip + take_while compose") @(tt : T?) { + let got <- _fold(each([5, 3, 8, 1, 4]).skip(1)._take_while(_ < 8).to_array()) + tt |> equal(1, length(got)) + tt |> equal(3, got[0]) + } + t |> run("take_while + take compose — take_while breaks first") @(tt : T?) { + // Source: [5, 3, 1, 4, 8]; take_while<8 keeps [5, 3, 1, 4]; take(2) limits to [5, 3]. + let got <- _fold(each([5, 3, 1, 4, 8])._take_while(_ < 8).take(2).to_array()) + tt |> equal(2, length(got)) + tt |> equal(5, got[0]) + tt |> equal(3, got[1]) + } + t |> run("take_while + take compose — take(N) hits first") @(tt : T?) { + // Source: [5, 3, 1, 4, 8]; take_while<8 keeps [5, 3, 1, 4]; take(10) is wider → output = take_while output. + let got <- _fold(each([5, 3, 1, 4, 8])._take_while(_ < 8).take(10).to_array()) + tt |> equal(4, length(got)) + } + t |> run("skip_while + skip compose") @(tt : T?) { + // Source: [5, 5, 3, 1, 4]; skip_while>3 skips [5, 5] then emits [3, 1, 4]; skip(1) drops 3; output [1, 4]. + // Canonical order is skip then skip_while, so this writes skip BEFORE skip_while in the chain. + let got <- _fold(each([5, 5, 3, 1, 4]).skip(1)._skip_while(_ > 3).to_array()) + // skip(1) drops first 5 → [5, 3, 1, 4]; skip_while>3 skips [5] then emits [3, 1, 4]. + tt |> equal(3, length(got)) + tt |> equal(3, got[0]) + tt |> equal(1, got[1]) + tt |> equal(4, got[2]) + } +} + +[test] +def test_take_while_skip_while_cascade_bails(t : T?) { + // Cascades to tier 2 (still semantically correct, just no splice). These confirm the + // bail conditions don't break observable behavior. + t |> run("select before take_while cascades — values still match reference") @(tt : T?) { + let got <- _fold(each([5, 3, 8, 1])._select(_ * 2)._take_while(_ < 16).to_array()) + let expected = [10, 6] + tt |> equal(2, length(got)) + for (v, i in got, 0 .. 2) { + tt |> equal(expected[i], v) + } + } + t |> run("select before skip_while cascades — values still match reference") @(tt : T?) { + let got <- _fold(each([5, 3, 8, 1])._select(_ * 2)._skip_while(_ > 6).to_array()) + let expected = [6, 16, 2] + tt |> equal(3, length(got)) + for (v, i in got, 0 .. 3) { + tt |> equal(expected[i], v) + } + } + t |> run("multiple take_while cascades — only first one spliced semantics") @(tt : T?) { + // Reference would AND the two breaks; splice rejects (multiple take_while). Values: + // take_while<8 keeps [5, 3]; take_while<4 of those keeps [] (3 is the first to fail). + // Actually [5,3]: 5<4 false → break → []. + let got <- _fold(each([5, 3, 1, 8])._take_while(_ < 8)._take_while(_ < 4).to_array()) + tt |> equal(0, length(got)) + } +} diff --git a/tests/linq/test_linq_fold_ast.das b/tests/linq/test_linq_fold_ast.das index d56e195deb..0752139755 100644 --- a/tests/linq/test_linq_fold_ast.das +++ b/tests/linq/test_linq_fold_ast.das @@ -2869,3 +2869,177 @@ def test_aggregate_with_where_fuses(t : T?) { } } +// ── take_while / skip_while splice targets (PR-G) ────────────────────── + +[export, marker(no_coverage)] +def target_take_while_array_fold() : array { + return <- _fold(each([5, 3, 8, 1, 4])._take_while(_ < 8).to_array()) +} + +[export, marker(no_coverage)] +def target_take_while_count_fold() : int { + return _fold(each([5, 3, 8, 1, 4])._take_while(_ < 8).count()) +} + +[export, marker(no_coverage)] +def target_take_while_sum_fold() : int { + return _fold(each([5, 3, 8, 1, 4])._take_while(_ < 8).sum()) +} + +[export, marker(no_coverage)] +def target_take_while_first_fold() : int { + return _fold(each([5, 3, 8, 1, 4])._take_while(_ < 8).first()) +} + +[export, marker(no_coverage)] +def target_skip_while_array_fold() : array { + return <- _fold(each([5, 3, 8, 1, 4])._skip_while(_ > 3).to_array()) +} + +[export, marker(no_coverage)] +def target_skip_while_count_fold() : int { + return _fold(each([5, 3, 8, 1, 4])._skip_while(_ > 3).count()) +} + +[export, marker(no_coverage)] +def target_skip_while_then_take_while_fold() : int { + return _fold(each([5, 3, 8, 1, 4])._skip_while(_ > 3)._take_while(_ < 8).count()) +} + +[export, marker(no_coverage)] +def target_select_then_take_while_cascades_fold() : array { + // Cascade fingerprint: select precedes take_while → splice bails, tier-2 cascade runs. + return <- _fold(each([5, 3, 8, 1])._select(_ * 2)._take_while(_ < 16).to_array()) +} + +[test] +def test_take_while_splices(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_take_while_array_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "take_while: expected invoke wrapper") + t |> equal(1, count_inner_for_loops(body_expr), "take_while: one for-loop") + // take_while is spliced inline as `if (!pred) break` — the library `take_while_impl` + // helper must NOT appear as a runtime call. + t |> equal(0, count_call(body_expr, "take_while"), "take_while: helper inlined") + t |> equal(0, count_call(body_expr, "take_while_impl"), "take_while: impl inlined") + t |> equal(0, count_call(body_expr, "take_while_impl_const"), "take_while: impl_const inlined") + } +} + +[test] +def test_take_while_count_splices(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_take_while_count_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "take_while count: invoke wrapper") + t |> equal(1, count_inner_for_loops(body_expr), "take_while count: one for-loop") + t |> equal(0, count_call(body_expr, "take_while"), "take_while count: helper inlined") + } +} + +[test] +def test_take_while_sum_splices(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_take_while_sum_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "take_while sum: invoke wrapper") + t |> equal(1, count_inner_for_loops(body_expr), "take_while sum: one for-loop (accumulator lane)") + t |> equal(0, count_call(body_expr, "take_while"), "take_while sum: helper inlined") + t |> equal(0, count_call(body_expr, "sum"), "take_while sum: sum_impl inlined") + } +} + +[test] +def test_take_while_first_splices(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_take_while_first_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "take_while first: invoke wrapper") + t |> equal(1, count_inner_for_loops(body_expr), "take_while first: one for-loop (early-exit lane)") + t |> equal(0, count_call(body_expr, "take_while"), "take_while first: helper inlined") + t |> equal(0, count_call(body_expr, "first"), "take_while first: first inlined") + } +} + +[test] +def test_skip_while_splices(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_skip_while_array_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "skip_while: invoke wrapper") + t |> equal(1, count_inner_for_loops(body_expr), "skip_while: one for-loop") + t |> equal(0, count_call(body_expr, "skip_while"), "skip_while: helper inlined") + t |> equal(0, count_call(body_expr, "skip_while_impl"), "skip_while: impl inlined") + t |> equal(0, count_call(body_expr, "skip_while_impl_const"), "skip_while: impl_const inlined") + } +} + +[test] +def test_skip_while_count_splices(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_skip_while_count_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "skip_while count: invoke wrapper") + t |> equal(1, count_inner_for_loops(body_expr), "skip_while count: one for-loop") + t |> equal(0, count_call(body_expr, "skip_while"), "skip_while count: helper inlined") + } +} + +[test] +def test_skip_while_then_take_while_compose_splices(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_skip_while_then_take_while_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "skip_while+take_while: invoke wrapper") + t |> equal(1, count_inner_for_loops(body_expr), "skip_while+take_while: one for-loop") + t |> equal(0, count_call(body_expr, "skip_while"), "skip_while+take_while: skip_while inlined") + t |> equal(0, count_call(body_expr, "take_while"), "skip_while+take_while: take_while inlined") + } +} + +[test] +def test_select_then_take_while_cascades_to_tier2(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_select_then_take_while_cascades_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched, "select-then-take_while: target found") + // Cascade tier-2 fingerprint: multiple outer let-vars from fold_linq_default's + // intermediate-pass pipeline. Distinct from the single-invoke splice shape. + t |> success(count_outer_let_vars(body_expr) >= 2, + "select-then-take_while: tier-2 cascade fingerprint (>=2 outer lets)") + } +} + From 3985217ddca49608b37c6979674c2aa07f730b37 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Tue, 19 May 2026 03:41:41 -0700 Subject: [PATCH 3/8] linq_fold: address Copilot review on PR #2732 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bundled cosmetic fixes from PR #2732's Copilot review (PR-F is queued and will land before this PR-G stack): - benchmarks/sql/single_match.das:9 — comment claimed `LIMIT 2` but the SQL used `_first()` (LIMIT 1, fixture-side id uniqueness already asserts exactly-one). Reworded to match the code. - benchmarks/sql/aggregate_match.das:9 — comment said "max(price - min)" but the reducer is `sum(price)`. Reworded to "sum of prices". The 3rd Copilot suggestion — renaming the test label "aggregate: sum of doubles" → "aggregate: sum with multiplier" — lands in the PR-G commit ahead of this one since that file is already touched there. Co-Authored-By: Claude Opus 4.7 (1M context) --- benchmarks/sql/aggregate_match.das | 8 ++++---- benchmarks/sql/single_match.das | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/benchmarks/sql/aggregate_match.das b/benchmarks/sql/aggregate_match.das index eb4b65170c..f21df6cd36 100644 --- a/benchmarks/sql/aggregate_match.das +++ b/benchmarks/sql/aggregate_match.das @@ -3,10 +3,10 @@ options persistent_heap require _common public -// User-supplied binary reducer: max(price - min) over a where-filtered slice. SQL -// can express this as `SELECT (MAX(price) - MIN(price))`. m3 traverses the array -// with the user-block invoked per element; m3f peels the block body and inlines -// alongside the where predicate — single pass with no per-element block invoke. +// User-supplied binary reducer: sum of prices over a where-filtered slice. SQL +// can express this as `SELECT SUM(price)`. m3 traverses the array with the +// user-block invoked per element; m3f peels the block body and inlines alongside +// the where predicate — single pass with no per-element block invoke. let THRESHOLD = 200 diff --git a/benchmarks/sql/single_match.das b/benchmarks/sql/single_match.das index 37bab83947..863e71adad 100644 --- a/benchmarks/sql/single_match.das +++ b/benchmarks/sql/single_match.das @@ -6,7 +6,7 @@ require _common public // Fixture car ids run 1..n so id=42 is unique. `single` walks the full source // (semantically — exactly-one-match), but the splice still fuses upstream where. // -// SQL: SELECT * FROM Cars WHERE id = 42 LIMIT 2 — read at most 2 to assert uniqueness. +// SQL: SELECT * FROM Cars WHERE id = 42 — pk lookup returns the single row. // m3/m3f traverse the array filtering and asserting one survivor. let TARGET_ID = 42 From d23350aa9a0e12c9cd369e8811c86a3d9e2d8627 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Tue, 19 May 2026 08:44:52 -0700 Subject: [PATCH 4/8] mouse-data: 8 dasImgui + 1 daslang cards from PR-D / PR-E work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dasImgui cards (PR-D + table rail + popup/loop ID patterns): - aot-header-hand-maintained-not-regenerated: aot_dasIMGUI.h must carry hand-written DAS_MOD_API decls when new C++ helpers are bound; not regenerated from anything. - ffi-const-ref-return-and-decltype-name-collision: the failure modes of `addExtern` — runtime "missing WrapType implementation" on const-ref returns and compile-time `decltype incorrect argument` on namespace/identifier collision. - popup-id-stack-mismatch-stateful-overload: open_popup / open_popup_on_item_click with a string id outside the popup container body fails because the trigger and BeginPopup hash against different ID-stack depths. - registry-path-brackets-vs-slash-indexed-vs-container: indexed widget paths use IDENT[N], container paths use IDENT/N; mixing them (test/record wait targets with /0) silently no-ops. - sort-specs-block-arg-helper-for-sortable-data-table: the TableSortSpec struct + `sort_specs() $(specs) { ... }` pattern that wraps the cpp ImGuiTableSortSpecs pointer drain. - synced-table-instances-shared-boost-ident-in-a-for-loop: sharing the same boost IDENT across iterations of a `for` loop in dasImgui binds BeginTable to one table instance — synced layout is the intended behavior. - with-id-for-per-iter-id-namespacing-inside-loops: `with_id(i) { ... }` is the daslang shape for ImGui's PushID/PopID per-iter namespacing when widget IDENTs are literal strings. - imgui-angled-headers-row-plus-normal-headers-row-both-intentional: cpp imgui_demo calls TableAngledHeadersRow AND TableHeadersRow — the angled row has no column context menu, so both rows are intentional. daslang card: - table-safe-index-safe-only-with-null-coalescing: `table?[key]` is safe ONLY when immediately consumed by `??`. Bare `tab?[k]` / stored pointer escapes the table's storage and the compiler enforces `unsafe()`. Mirrors the rule just landed in CLAUDE.md (commit 760b69fac). Co-Authored-By: Claude Opus 4.7 (1M context) --- ...-header-hand-maintained-not-regenerated.md | 33 +++++++++ ...-ref-return-and-decltype-name-collision.md | 73 +++++++++++++++++++ ...pup-id-stack-mismatch-stateful-overload.md | 46 ++++++++++++ ...-brackets-vs-slash-indexed-vs-container.md | 34 +++++++++ ...ortspec-struct-sort-specs-specs-pattern.md | 51 +++++++++++++ ...ame-id-shared-boost-ident-in-a-for-loop.md | 46 ++++++++++++ ...where-widget-idents-are-literal-strings.md | 43 +++++++++++ ...fe-index-safe-only-with-null-coalescing.md | 35 +++++++++ ...lus-normal-headers-row-both-intentional.md | 26 +++++++ 9 files changed, 387 insertions(+) create mode 100644 mouse-data/docs/dasimgui-aot-header-hand-maintained-not-regenerated.md create mode 100644 mouse-data/docs/dasimgui-ffi-const-ref-return-and-decltype-name-collision.md create mode 100644 mouse-data/docs/dasimgui-popup-id-stack-mismatch-stateful-overload.md create mode 100644 mouse-data/docs/dasimgui-registry-path-brackets-vs-slash-indexed-vs-container.md create mode 100644 mouse-data/docs/dasimgui-sort-specs-block-arg-helper-for-sortable-data-table-tablesortspec-struct-sort-specs-specs-pattern.md create mode 100644 mouse-data/docs/dasimgui-synced-table-instances-multiple-begintable-with-same-id-shared-boost-ident-in-a-for-loop.md create mode 100644 mouse-data/docs/dasimgui-with-id-for-per-iter-id-namespacing-inside-loops-where-widget-idents-are-literal-strings.md create mode 100644 mouse-data/docs/daslang-table-safe-index-safe-only-with-null-coalescing.md create mode 100644 mouse-data/docs/imgui-angled-headers-row-plus-normal-headers-row-both-intentional.md diff --git a/mouse-data/docs/dasimgui-aot-header-hand-maintained-not-regenerated.md b/mouse-data/docs/dasimgui-aot-header-hand-maintained-not-regenerated.md new file mode 100644 index 0000000000..9c57574a70 --- /dev/null +++ b/mouse-data/docs/dasimgui-aot-header-hand-maintained-not-regenerated.md @@ -0,0 +1,33 @@ +--- +slug: dasimgui-aot-header-hand-maintained-not-regenerated +title: dasImgui aot_dasIMGUI.h hand-maintained — adding new DAS_MOD_API decls when binding new C++ helpers +created: 2026-05-19 +last_verified: 2026-05-19 +links: [] +--- + +**`src/aot_dasIMGUI.h` is hand-maintained, NOT regenerated by `gen_bind.das`** — when you add a new helper to `src/dasIMGUI.main.cpp` that scripts will call from AOT-compiled code, you must also append the matching `DAS_MOD_API` declaration to `aot_dasIMGUI.h` by hand. There's no autogen banner at the top of the file and no CMake target re-runs gen_bind for it. + +**Test for whether you need to update it**: does the new helper appear as a callable function in user `.das` code (i.e. registered as a builtin via `addExtern<...>` in the module's main.cpp)? If yes, AOT will emit `das::(args...)` in generated C++ and need a prototype. + +**The shape**: each line mirrors a `addExtern` registration in `dasIMGUI.main.cpp` — same name, same C++ signature, no body. Example from this session (commit `56b5111`, dasImgui PR #52 sort_specs rail): + +```cpp +namespace das { + // ... + DAS_MOD_API ImGuiSortDirection_ GetColumnSortDirection ( const ImGuiTableColumnSortSpecs * specs ); + DAS_MOD_API const ImGuiTableColumnSortSpecs * GetSortSpec ( ImGuiTableSortSpecs * specs, int idx ); + // ... +} +``` + +**Failure mode if forgotten**: interpreter-only smoke / lint passes (CI's das matrix without `-use-aot`). The break surfaces in: +- `test_aot.exe -use-aot dastest/dastest.das -- --use-aot --test ` — `error[50101]: AOT link failed` or "undefined symbol" at link time +- `cmake --build test_aot` — link errors on AOT'd test sources that call the new helper + +Default 50101 reflex is "stale exe" ([aot-local-stale-exe](feedback-aot-local-stale-exe.md)) or daslib-closure cache bust ([feedback-aot-dependency-cache-bust](feedback-aot-dependency-cache-bust.md)) — but if you added a C++ helper to dasIMGUI this PR, the third suspect is missing-from-aot_dasIMGUI.h. Grep the new symbol in `src/aot_dasIMGUI.h`; if absent, add it. + +**Why hand-maintained**: bind regen is opinionated and would regenerate a lot more than just the FFI prototypes. The header is small (~50 lines) and the friction of an occasional missed entry is acceptable vs. the friction of regen-noise on every PR. Verified by reading the file header (no autogen banner, manual `#pragma once`, plain `namespace das { ... }` block) and the existing pattern of one `DAS_MOD_API` line per `dasIMGUI.main.cpp` `addExtern` registration. + +## Questions +- dasImgui aot_dasIMGUI.h hand-maintained — adding new DAS_MOD_API decls when binding new C++ helpers diff --git a/mouse-data/docs/dasimgui-ffi-const-ref-return-and-decltype-name-collision.md b/mouse-data/docs/dasimgui-ffi-const-ref-return-and-decltype-name-collision.md new file mode 100644 index 0000000000..1f10ab46a6 --- /dev/null +++ b/mouse-data/docs/dasimgui-ffi-const-ref-return-and-decltype-name-collision.md @@ -0,0 +1,73 @@ +--- +slug: dasimgui-ffi-const-ref-return-and-decltype-name-collision +title: When binding a C++ helper to daslang via `addExtern`, what trips the runtime "missing WrapType implementation" error and the compile-time `decltype incorrect argument` error? +created: 2026-05-19 +last_verified: 2026-05-19 +links: [] +--- + +**Two FFI gotchas that bit on dasImgui PR-D (`GetSortSpec` / `GetColumnSortDirection` helpers):** + +## 1. `const T&` return type fails at RUNTIME with "missing WrapType implementation" + +A bound C++ helper returning `const SomeStruct&` (reference to a non-workhorse type) compiles fine and links fine, but when invoked from daslang at runtime throws: + +``` +EXCEPTION: internal integration error, missing WrapType implementation or it's not included +``` + +The check is in `include/daScript/simulate/interop.h:146-152`: + +```cpp +} else if constexpr ( !is_workhorse_type::value && is_same::value ) { + // if the WrapType is the same as Result, we are missing WrapType implementation, or its not included + context.throw_error("internal integration error, missing WrapType implementation or it's not included"); +``` + +**Fix:** Return `const T*` (pointer) instead of `const T&` (reference). Pointers are always workhorse-handled, so no specialization needed. + +```cpp +// BAD: runtime "missing WrapType implementation" +const ImGuiTableColumnSortSpecs & GetSortSpec ( ImGuiTableSortSpecs * specs, int idx ) { + return specs->Specs[idx]; +} + +// GOOD: pointer return, runtime-safe +const ImGuiTableColumnSortSpecs * GetSortSpec ( ImGuiTableSortSpecs * specs, int idx ) { + return &specs->Specs[idx]; +} +``` + +Daslang side then uses `let s = GetSortSpec(p, i)` and gets `T const?`. Field access via `.` auto-derefs (`s.ColumnIndex` works fine). + +## 2. `decltype(&das::Foo)` fails when `Foo` shadows a struct member name + +`DAS_BIND_FUN` expands to `decltype(&fn), &fn`. If you name a free helper the same as a struct's member field, MSVC's `decltype(&das::SortDirection)` chokes with: + +``` +error C3556: 'das::SortDirection': incorrect argument to 'decltype' +error C2672: 'addExtern': no matching overloaded function found +``` + +This bit even though the helper was in `das::` namespace and the conflict was with `ImGuiTableColumnSortSpecs::SortDirection` (a different scope). MSVC's qualified-name lookup for `decltype` is more aggressive than expected. + +**Fix:** Rename the helper to something that doesn't collide: + +```cpp +// BAD: clashes with ImGuiTableColumnSortSpecs::SortDirection +ImGuiSortDirection_ SortDirection ( const ImGuiTableColumnSortSpecs * specs ) { ... } +addExtern(*this, lib, "SortDirection", ...); + +// GOOD: unambiguous name +ImGuiSortDirection_ GetColumnSortDirection ( const ImGuiTableColumnSortSpecs * specs ) { ... } +addExtern(*this, lib, "GetColumnSortDirection", ...); +``` + +**Why this collides only sometimes:** The original `SortDirection(const T&)` binding existed in dasImgui master without conflict, but flipping the param to `const T*` triggered the decltype path differently. MSVC's overload-resolution heuristics for `decltype(&qualified_name)` are touchy around any name that exists as both a free symbol AND a struct member, even if scopes are disjoint. Renaming is the safe move; trying to qualify harder (`das::SortDirection`, `&das::SortDirection`, etc.) doesn't help — MSVC has already decided the name is ambiguous. + +## Bonus rule consistency + +If your bound free helper takes a struct, also prefer pointer-arg over ref-arg for daslang interop consistency — `TableGetSortSpecs() : ImGuiTableSortSpecs*` returns a pointer, so making your helpers take `T*` lets daslang call them directly without `*p` deref or `let s & = unsafe(...)` ref-binding gymnastics. Match the binding-side type to what daslang naturally hands to you. + +## Questions +- When binding a C++ helper to daslang via `addExtern`, what trips the runtime "missing WrapType implementation" error and the compile-time `decltype incorrect argument` error? diff --git a/mouse-data/docs/dasimgui-popup-id-stack-mismatch-stateful-overload.md b/mouse-data/docs/dasimgui-popup-id-stack-mismatch-stateful-overload.md new file mode 100644 index 0000000000..c64f40a879 --- /dev/null +++ b/mouse-data/docs/dasimgui-popup-id-stack-mismatch-stateful-overload.md @@ -0,0 +1,46 @@ +--- +slug: dasimgui-popup-id-stack-mismatch-stateful-overload +title: Why doesn't my dasImgui popup open when I call open_popup or open_popup_on_item_click with a string id outside the popup/popup_modal container body? The trigger fires but the container's BeginPopup returns false. +created: 2026-05-19 +last_verified: 2026-05-19 +links: [] +--- + +**Root cause:** ID-stack mismatch between the trigger and the container. + +`open_popup(str_id)` and `open_popup_on_item_click(str_id, flags)` compute `GetID(str_id)` at the **call site's** ID stack — usually outside any `[container]`. But the `popup`/`popup_modal` containers run their `BeginPopup(text)` / `BeginPopupModal(text)` **INSIDE** the internal `PushID(IDENT)` frame emitted by the `[container]` macro's `widget_prelude`. The two IDs hash differently — the trigger opens one popup slot, the container reads a different slot, the popup never displays. + +Two real bugs of this shape shipped (round-1/round-2 of dasImgui PR #51): +1. `app_custom_rendering` canvas: `open_popup_on_item_click("context", RMB)` outside `popup(CR_CV_CTX_MENU)` — right-click never opened the ctx menu. +2. `app_documents` close-confirm: `open_popup("Save?")` outside `popup_modal(DOCS_CLOSE_MODAL)` — dirty-doc close requests queued forever. + +**Fix:** Use the **stateful overloads** that defer the OpenPopup into the container's PushID scope. They live in `widgets/imgui_containers_builtin.das`: + +```daslang +// Imperative open (set pending_open; container's next visit calls OpenPopup INSIDE its PushID frame). +open_popup(state : PopupState) + +// Right-click-on-previously-submitted-item open (detects IsMouseReleased + IsItemHovered). +open_popup_on_item_click(var state : PopupState; flags : ImGuiPopupFlags = MouseButtonRight) +``` + +Call sites: + +```daslang +popup(MY_POPUP, (text = "ctx")) { + text("...") +} +if (button(OPEN_BTN, (text="Open"))) { + open_popup(MY_POPUP) // stateful +} +// or, for right-click on the previous item: +some_item_widget(...) +open_popup_on_item_click(MY_POPUP, ImGuiPopupFlags.MouseButtonRight) // stateful +``` + +The string-id overloads (`open_popup(str_id, flags)` / `open_popup_on_item_click(str_id, flags)`) are still useful for popups that are opened from inside the same ID-stack frame as the BeginPopup (e.g. when the trigger and the container both live inside `with_id` / a deeper container) — but for the typical "trigger outside the container body" pattern, ALWAYS use the stateful state-arg form. Their docstrings cross-reference and call this out. + +Mnemonic: if your popup is registered as `/MY_POPUP` in snapshots (so the container's PushID landed) but the trigger doesn't open it, the trigger's GetID hashed at a stack frame ABOVE that PushID. Switch to the state-arg overload. + +## Questions +- Why doesn't my dasImgui popup open when I call open_popup or open_popup_on_item_click with a string id outside the popup/popup_modal container body? The trigger fires but the container's BeginPopup returns false. diff --git a/mouse-data/docs/dasimgui-registry-path-brackets-vs-slash-indexed-vs-container.md b/mouse-data/docs/dasimgui-registry-path-brackets-vs-slash-indexed-vs-container.md new file mode 100644 index 0000000000..bed9b3283b --- /dev/null +++ b/mouse-data/docs/dasimgui-registry-path-brackets-vs-slash-indexed-vs-container.md @@ -0,0 +1,34 @@ +--- +slug: dasimgui-registry-path-brackets-vs-slash-indexed-vs-container +title: dasImgui registry path convention — indexed widgets use IDENT[N] (brackets) vs containers IDENT/N (slash) — why test/record wait targets fail with /0 +created: 2026-05-19 +last_verified: 2026-05-19 +links: [] +--- + +**Registry paths use two different separators depending on what's indexed**: + +| Shape | Path format | Example | +|---|---|---| +| Indexed leaf/text widget (`text(DT_NAME[i], ...)`) | `IDENT[N]` — square brackets | `DT_WIN/DT_TABLE/DT_NAME[0]` | +| Container hierarchy nesting | `IDENT/N` — forward slash | `DT_WIN/DT_TABLE` | +| Mixed | brackets at leaf, slashes between containers | `DT_WIN/DT_TABLE/DT_NAME[0]` | + +The brackets-vs-slash split is set by two different code paths: +- **Indexed widget wrapper** (per `[widget]` / `[container]` annotation, emitted lazily per IDENT) builds the bare key as `"{IDENT}[{k}]"` — see `imgui_boost_runtime.widget_path_key` and the per-IDENT macro-emitted wrapper in [how-do-indexed-table-keyed-dasimgui-widgets-work-slider-float-channel-vol-i](how-do-indexed-table-keyed-dasimgui-widgets-work-slider-float-channel-vol-i.md). +- **Container path stack** maintains `g_container_path_str` as `"/"`-joined chain of currently-open container idents — see [dasimgui-container-annotation-and-hierarchical-path-keys](dasimgui-container-annotation-and-hierarchical-path-keys.md). + +**The gotcha that bit me**: writing `wait_for_widget(d, "SORT_WIN/SORT_TABLE/SORT_ROW_ID/0", 15.0f)` for an indexed text widget. The widget never registers under that path — the actual path is `SORT_ROW_ID[0]`. The wait times out at 15s, looks like a CI flake or layout bug, actually a 1-char typo. Caught on dasImgui PR #52 (CI failure `bfcb48b`, fixed in `57ade70`). + +**Authoritative example** of both forms in one driver: `tests/integration/record_data_table.das:37-39`: + +```daslang +let T_TABLE = "DT_WIN/DT_TABLE" // container/container — slashes +let T_NAME0 = "DT_WIN/DT_TABLE/DT_NAME[0]" // ../indexed-leaf — brackets at leaf +let T_VAL5 = "DT_WIN/DT_TABLE/DT_VAL[5]" +``` + +Reach for this whenever writing `wait_for_widget` / `wait_for_render` / snapshot lookups in a record_*.das / test_*.das driver. If the target is a `text(IDENT[i], ...)` / `slider_float(STATE[k], ...)` etc. — it's brackets at that segment. If it's a `window(W, ...)` / `data_table(T, ...)` / `child(C, ...)` — it's slash between containers. + +## Questions +- dasImgui registry path convention — indexed widgets use IDENT[N] (brackets) vs containers IDENT/N (slash) — why test/record wait targets fail with /0 diff --git a/mouse-data/docs/dasimgui-sort-specs-block-arg-helper-for-sortable-data-table-tablesortspec-struct-sort-specs-specs-pattern.md b/mouse-data/docs/dasimgui-sort-specs-block-arg-helper-for-sortable-data-table-tablesortspec-struct-sort-specs-specs-pattern.md new file mode 100644 index 0000000000..9ce2e4bce4 --- /dev/null +++ b/mouse-data/docs/dasimgui-sort-specs-block-arg-helper-for-sortable-data-table-tablesortspec-struct-sort-specs-specs-pattern.md @@ -0,0 +1,51 @@ +--- +slug: dasimgui-sort-specs-block-arg-helper-for-sortable-data-table-tablesortspec-struct-sort-specs-specs-pattern +title: dasImgui sort_specs block-arg helper for sortable data_table — TableSortSpec struct + sort_specs() $(specs) { ... } pattern +created: 2026-05-19 +last_verified: 2026-05-19 +last_updated: 2026-05-19 +links: [] +--- + +**`sort_specs() $(specs) { ... }`** is a block-arg helper in `widgets/imgui_table_builtin.das` that captures ImGui's `TableGetSortSpecs()`, converts each `ImGuiTableColumnSortSpecs` entry into a daslang-friendly `TableSortSpec`, invokes the block with the array, and auto-clears `SpecsDirty` on return. + +Call it inside a `data_table(...)` body BEFORE emitting rows. Header-row placement is flexible — call `sort_specs()` either BEFORE or AFTER `table_headers_row()`. Sort state is finalized at `BeginTable` time (input events drain at frame start during `NewFrame`), so the header-row call doesn't gate when specs become visible. cpp `ShowDemoWindowTablesAdvanced` calls before headers. Pass `ImGuiTableFlags.Sortable` (and optionally `SortMulti` for multi-column) on the table flags, and tag each column with a stable `user_id` so the comparator dispatches by identity rather than column_index (robust under Reorderable): + +```daslang +data_table(MY_TABLE, (text = "...", columns = 3, + flags = ImGuiTableFlags.Sortable | ImGuiTableFlags.SortMulti | ..., + outer_size = float2(0,0), inner_width = 0.0f)) { + table_setup_column("ID", ImGuiTableColumnFlags.DefaultSort, 0.0f, COL_ID) + table_setup_column("Name", ImGuiTableColumnFlags.None, 0.0f, COL_NAME) + table_setup_column("Qty", ImGuiTableColumnFlags.None, 0.0f, COL_QTY) + table_headers_row() + + sort_specs() $(specs) { + // Multi-key comparator: walk specs in priority order. + MY_ITEMS |> sort() $(a, b) { + for (s in specs) { + let ord = compare_by_user_id(s, a, b) + if (ord != 0) return ord < 0 + } + return a.id < b.id // tiebreak on a unique field + } + } + + for (i in range(length(MY_ITEMS))) { ... } +} +``` + +`TableSortSpec` fields (daslang-friendly mirror of `ImGuiTableColumnSortSpecs`): +- `column_index : int` — 0-based position +- `column_user_id : uint` — the `user_id` you passed to `table_setup_column` (or 0) +- `sort_order : int` — 0 = primary, 1 = secondary, ... (under SortMulti) +- `sort_direction : ImGuiSortDirection` — Ascending / Descending / None + +**Block fires on SpecsDirty, including the empty case.** `sort_specs()` returns `bool` — true iff the body ran (specs non-null AND SpecsDirty). When `SpecsCount == 0` the block STILL fires, called with an empty `specs : array` — this is the `ImGuiTableFlags.SortTristate` "user cleared all sort columns" event (imgui.h:1871, `TableGetSortSpecs() may return specs where SpecsCount == 0`). The comparator should handle the empty case (typically "restore stable insertion order" or "no-op"). When dirty=false, reuse the most recent sort order; ImGui keeps the headers visually in sync without re-firing dirty until the user re-clicks. So the comparator cost is paid once per sort change, not every frame. + +**Final tiebreak matters** — `daslang's sort() is qsort, not stable` ([daslang-sort-qsort-not-stable](daslang-sort-qsort-not-stable.md)), so a final tiebreak on a unique field keeps the order total. Without it, equal-key elements can shuffle between frames. + +**Wired in dasImgui PR #52** (PR-D/1A, commit ad15368). Wrapper: `widgets/imgui_table_builtin.das sort_specs`. Backing C++ helpers in `src/dasIMGUI.main.cpp` — `GetSortSpec` returns a pointer (not reference) because non-workhorse `const T&` returns trip the WrapType FFI; `GetColumnSortDirection` is renamed from natural `SortDirection` to dodge an MSVC `decltype` member/free-function ambiguity (see [dasimgui-ffi-const-ref-return-and-decltype-name-collision](dasimgui-ffi-const-ref-return-and-decltype-name-collision.md)). Examples: `examples/features/sort_specs.das` (standalone inventory) + `examples/tutorial/data_table.das` (extended sortable tutorial) + `examples/imgui_demo/tables.das show_sorting()` (section 23 of the imgui_demo port). + +## Questions +- dasImgui sort_specs block-arg helper for sortable data_table — TableSortSpec struct + sort_specs() $(specs) { ... } pattern diff --git a/mouse-data/docs/dasimgui-synced-table-instances-multiple-begintable-with-same-id-shared-boost-ident-in-a-for-loop.md b/mouse-data/docs/dasimgui-synced-table-instances-multiple-begintable-with-same-id-shared-boost-ident-in-a-for-loop.md new file mode 100644 index 0000000000..f3127a129d --- /dev/null +++ b/mouse-data/docs/dasimgui-synced-table-instances-multiple-begintable-with-same-id-shared-boost-ident-in-a-for-loop.md @@ -0,0 +1,46 @@ +--- +slug: dasimgui-synced-table-instances-multiple-begintable-with-same-id-shared-boost-ident-in-a-for-loop +title: dasImgui synced table instances — multiple BeginTable with same id, shared boost IDENT in a for-loop +created: 2026-05-19 +last_verified: 2026-05-19 +links: [] +--- + +**Cpp synced-instances pattern** (`for n = 0; n < 3; n++ ImGui::BeginTable("Table", ...)` — 3x `BeginTable` with the same string id) triggers ImGui's "synced" behavior: column width/visibility/order changes on one instance propagate to all visible instances. Useful when a table needs to render in multiple places (e.g. a list view + a detail panel) with identical column layout. + +**Das equivalent:** use ONE shared boost `data_table(IDENT, ...)` IDENT inside a `for n in range(N)` loop. The boost `[container]` macro auto-PushIDs the IDENT in `widget_prelude` (see `widgets/imgui_boost_runtime.das`), so each iter sees the same prefix, and the inner `BeginTable("Table", ...)` string ID is also shared. Net effect: 3 BeginTable calls hash to the same ImGui ID → synced behavior. + +```daslang +var private TABLES_SYNCED_FLAGS : int = int(ImGuiTableFlags.Resizable | + ImGuiTableFlags.Reorderable | + ImGuiTableFlags.Hideable | + ImGuiTableFlags.Borders) +// Per-iter CollapsingHeader needs INDEPENDENT state (open/close per header), +// so that one uses an indexed `table`. +var private TABLES_SYNCED_HEADER : table + +for (n in range(3)) { + collapsing_header(TABLES_SYNCED_HEADER[n], + (text = "Synced Table {n}", + flags = ImGuiTreeNodeFlags.DefaultOpen)) { + // ONE shared IDENT — auto-PushID is identical per iter, BeginTable + // string id is also shared, so all 3 hash to the same ImGui ID. + data_table(TABLES_SYNCED_T, (text = "Table", columns = 3, + flags = unsafe(reinterpret(TABLES_SYNCED_FLAGS)), + outer_size = float2(0,0), + inner_width = 0.0f)) { + // body — column width/visibility/order changes here propagate + // to all 3 visible instances. + } + } +} +``` + +**Key:** `TABLES_SYNCED_T` is intentionally a single shared IDENT, NOT indexed per-iter. Do NOT write `TABLES_SYNCED_T[n]` — that would give each iter its own ImGui ID via the indexed-state pattern, and the synced behavior breaks (changes wouldn't propagate). The per-iter independent state (here the CollapsingHeader's open/close + the per-iter visible row count) lives in a SEPARATE indexed state table. + +**Per-iter cell state still needs disambiguation** — if you write per-cell text inside the body, key the cell-state table by `n * MAX_CELLS_PER_ITER + cell` (or similar) so each iter's cells stay in their own state slot. The synced-instances trick is purely about sharing the TABLE-level ID; per-cell registry paths can and should still be unique per iter. + +Landed dasImgui PR #52 (PR-D/2 commit c791118, section 22 Synced instances). Source: `examples/imgui_demo/tables.das show_synced_instances()`. Mirrors `imgui_demo.cpp:5594-5624`. + +## Questions +- dasImgui synced table instances — multiple BeginTable with same id, shared boost IDENT in a for-loop diff --git a/mouse-data/docs/dasimgui-with-id-for-per-iter-id-namespacing-inside-loops-where-widget-idents-are-literal-strings.md b/mouse-data/docs/dasimgui-with-id-for-per-iter-id-namespacing-inside-loops-where-widget-idents-are-literal-strings.md new file mode 100644 index 0000000000..7409b44463 --- /dev/null +++ b/mouse-data/docs/dasimgui-with-id-for-per-iter-id-namespacing-inside-loops-where-widget-idents-are-literal-strings.md @@ -0,0 +1,43 @@ +--- +slug: dasimgui-with-id-for-per-iter-id-namespacing-inside-loops-where-widget-idents-are-literal-strings +title: dasImgui with_id() for per-iter ID namespacing inside loops where widget IDENTs are literal strings +created: 2026-05-19 +last_verified: 2026-05-19 +links: [] +--- + +**`with_id("_{i}") { ... }`** is the standard pattern for per-iter ID namespacing when a loop body contains widget calls with literal-string IDENTs or `id =` args that can't be interpolated per iteration. + +The trap: boost macros like `edit_checkbox_flags(safe_addr(...), (id = "F_RESIZABLE", ...))` require the `id =` argument to be a string LITERAL — it's compile-time identifier scaffolding, not a runtime value. So a loop body that emits N identical-shape widgets can't write `id = "F_RESIZABLE_{i}"` to disambiguate each iter. Without disambiguation, ImGui's ID hash collides across iters and you get one shared widget instead of N. + +**Fix:** wrap the loop body in `with_id("scope_{n}") { ... }`. ImGui's `PushID(string)` mixes the prefix into every IDENT hash inside the block, so per-iter instances stay disjoint: + +```daslang +for (column in range(3)) { + with_id("colflags_col_{column}") { + text(LABEL, (text = "Column {column}")) + edit_checkbox_flags(safe_addr(g_input[column]), + (id = "F_DEFAULT_SORT", text = "DefaultSort", + flags_value = int(ImGuiTableColumnFlags.DefaultSort))) + edit_checkbox_flags(safe_addr(g_input[column]), + (id = "F_NO_HIDE", text = "NoHide", + flags_value = int(ImGuiTableColumnFlags.NoHide))) + // ... N more fixed-IDENT toggles ... + } +} +``` + +All N+ toggles inside share the same hard-coded `id = "..."` literals across all iters, but each iter's `with_id` prefix gives them distinct ImGui IDs. Found at `widgets/imgui_id_builtin.das:with_id`. + +**When NOT to use with_id:** if the inner widget IDENT itself is indexed (`text(TABLES_RHH_T1_CELL[row * 3 + col], ...)`), no with_id is needed — the table-indexed state IS the unique key per cell. `with_id` is specifically for the case where the inner widget call has a fixed literal-string IDENT or a literal `id=` arg. + +**Cross-iter PushID hint:** `with_id("name_{i}")` interpolates fine (it's a regular string, not a literal-required compile-time arg). So the prefix CAN be runtime — only the inner widget IDs are the literal-string trap. + +Used heavily across imgui_demo ports: PR #52 sections 8 (sizing per-table loop), 11 (columns flags per-column), 18 (per-row sliders), 19 (per-column header cells), 20 (per-row + per-col checkboxes), 21 (per-cell ".." buttons), 23 (per-item rows). See `widgets.das` and `tables.das` for concrete examples. + +Related: +- [how-do-indexed-table-keyed-dasimgui-widgets-work-slider-float-channel-vol-i](how-do-indexed-table-keyed-dasimgui-widgets-work-slider-float-channel-vol-i.md) — indexed-form widget state pattern (use this when the IDENT itself is indexed, INSTEAD of with_id). +- [dasimgui-widget-no-ident-form](dasimgui-widget-no-ident-form.md) — when IDENT can be dropped entirely (single-call sites). + +## Questions +- dasImgui with_id() for per-iter ID namespacing inside loops where widget IDENTs are literal strings diff --git a/mouse-data/docs/daslang-table-safe-index-safe-only-with-null-coalescing.md b/mouse-data/docs/daslang-table-safe-index-safe-only-with-null-coalescing.md new file mode 100644 index 0000000000..5bc86be1b5 --- /dev/null +++ b/mouse-data/docs/daslang-table-safe-index-safe-only-with-null-coalescing.md @@ -0,0 +1,35 @@ +--- +slug: daslang-table-safe-index-safe-only-with-null-coalescing +title: Is `table?[key]` safe in daslang — do I need `unsafe(...)` around it, and when is the safe-index form actually safe? +created: 2026-05-19 +last_verified: 2026-05-19 +links: [] +--- + +**Rule:** `tab?[k]` on a `table<>` is SAFE only when immediately consumed by null-coalescing `?? default`. Bare `tab?[k]` (or `let x = tab?[k]` followed by a manual null-check) is UNSAFE and the compiler enforces it with: + +``` +error[31034]: safe-index of table<> must be inside the 'unsafe' block +``` + +Why: `?[k]` returns a pointer into the table's storage. If the table mutates while you hold the pointer (e.g. another `insert` rehashes), the pointer dangles. `?? default` consumes the pointer immediately (extracts the value or substitutes the default), so the danger window is gone. Manual `if (p == null) return; ... p.field` still holds the pointer across the check, so the compiler keeps it unsafe. + +**Safe form:** +```daslang +let v = tab?[k] ?? default_value // OK — value extracted, no escaping pointer +let n = tab?[k] ?? 0 +``` + +**Unsafe form (must wrap):** +```daslang +let meta = unsafe(g_widgets?[path]) // pointer escapes; manually null-check next +if (meta == null) return +return invoke(meta.state_addr_getter) +``` + +**STYLE024 false-positive caveat (as of 2026-05-19):** `daslib/style_lint.das` STYLE024 fires on `unsafe(tab?[k])` claiming the wrap is redundant. It is NOT — the compiler error above proves it. STYLE024's "inherently-unsafe shapes" allowlist needs to include `ExprSafeAt` of `table<>` (the `?[k]` form on a table type). Until that fix lands, this remains a known false positive on consumer files that hold the pointer past the safe-coalesce window. + +Distinct from `tab[k]` (regular access): `tab[k]` is auto-insert on tables and IS safe (no `unsafe` wrap needed for either read or assign). The pointer issue only arises with the safe-index `?[k]` form. + +## Questions +- Is `table?[key]` safe in daslang — do I need `unsafe(...)` around it, and when is the safe-index form actually safe? diff --git a/mouse-data/docs/imgui-angled-headers-row-plus-normal-headers-row-both-intentional.md b/mouse-data/docs/imgui-angled-headers-row-plus-normal-headers-row-both-intentional.md new file mode 100644 index 0000000000..adcef5c47d --- /dev/null +++ b/mouse-data/docs/imgui-angled-headers-row-plus-normal-headers-row-both-intentional.md @@ -0,0 +1,26 @@ +--- +slug: imgui-angled-headers-row-plus-normal-headers-row-both-intentional +title: ImGui table headers — both TableAngledHeadersRow and TableHeadersRow called intentional in cpp imgui_demo; angled has no column context menu +created: 2026-05-19 +last_verified: 2026-05-19 +links: [] +--- + +**cpp `imgui_demo.cpp` intentionally calls BOTH `TableAngledHeadersRow()` AND `TableHeadersRow()`** in the same `BeginTable`/`EndTable` block when angled headers are in use. This is NOT a redundant double-emit — it's a deliberate UX pattern: + +- `TableAngledHeadersRow()` draws the rotated column labels (the visual "angled" effect at the top of each column). It does NOT expose the per-column context menu (right-click on header → Sort / Hide / Resize / etc.). +- `TableHeadersRow()` draws an additional flat header row below. This is what provides the column context menu (per `ImGuiTableFlags_ContextMenuInBody` note in imgui.h, the context menu surface is attached to the standard header row, not the angled one). + +Authoritative sources (cpp imgui_demo.cpp): +- `imgui_demo.cpp:5455-5456` — `TableAngledHeadersRow(); TableHeadersRow();` // comment: "Draw remaining headers and allow access to context-menu and other functions." +- `imgui_demo.cpp:4911-4913` — same pattern in `ShowDemoWindowTablesAdvanced` AngledHeaders demo +- `imgui_demo.cpp:5919-5922` — same pattern in `ShowDemoWindowTables` Sorting demo + +**Implication for the dasImgui port**: when porting any tables.das section that calls `table_angled_headers_row()`, also keep the `table_headers_row()` call immediately after it. Don't "clean up" what looks like a duplicate — you'd silently delete column context-menu access. + +**Surfaced during dasImgui PR #52 review** (R5 round-2 Copilot comment): Copilot suggested dropping the `table_headers_row()` call after `table_angled_headers_row()` as "redundant". REJECTED — verified against cpp parity. The redundancy is intentional UX scaffolding; both calls stay. + +Reach for this whenever a code reviewer (Copilot or human) flags duplicate-looking header-row calls in a tables port. Verify against `imgui/imgui_demo.cpp` lines above before accepting any removal. + +## Questions +- ImGui table headers — both TableAngledHeadersRow and TableHeadersRow called intentional in cpp imgui_demo; angled has no column context menu From e19d8ab2f9c49e2e9c149b34310a931aa87e8771 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Tue, 19 May 2026 09:42:06 -0700 Subject: [PATCH 5/8] allow C++ keywords as daslang identifiers (vars, structs, fields, enums) The lint pass rejected C++ reserved words as variable/struct/class/field/ enum/enum-value names because the AOT C++ emitter would otherwise produce invalid C++ (struct char {}, for (auto& register : ...), ...). Function names already had this restriction dropped in 2026-05-14 by extending aotSuffixNameEx to detect keywords and prepend _Func. This continues that pattern across the remaining sites: ast_lint.cpp drops 8 isCppKeyword(...) guards: - enum / enum-value / structure / structure-field names - global-let / for-iter / let / type-arg variable names - function-argument / block-argument names Module-name guard stays (modules emit raw into namespace X { ... }). daslib/aot_cpp.das extends the mangling: - aotFieldName adds _f_ prefix for keyword names (already did this for DELETE to avoid Windows SDK winnt.h macro collision). - aotStructName passes "_S" suffix instead of "" - aotSuffixNameEx prepends it only when the name is a keyword, so non-keyword structs emit unchanged. - New helpers aotEnumName (_E) and aotEnumValueName (_V) mirror the same shape for enums. - Replaces 5 raw enumType.name / structType.name emission sites with the helpers (type-decl emission, enum class decl/value decl, enum value access expressions, struct rtti field offsetof site). - aotSuffixNameEx gains a string overload that forwards to the das_string one so callers can pass either. Tests: - tests/language/cpp_keywords_as_names.das (new): positive coverage for every now-allowed shape - for-iter, let, global var, struct fields, struct name, class name, enum name, enum values, typedef alias, function arg, function-type arg name, lambda arg. Uses C++ keywords that aren't daslang reserved (char, register, short, extern, mutable, volatile, signed, long, union). - tests/aot/test_cpp_keyword_names.das extended to mirror every shape through AOT codegen alongside the existing function-name coverage. - tests/language/failed_reserved_names.das deleted - supplanted. Interpreter: 7920 tests, 7914 passed, 6 skipped, 0 failed. AOT: 7309 tests, 7303 passed, 6 skipped, 0 failed. Lint + das-fmt --verify pass on all 3 changed .das files. Co-Authored-By: Claude Opus 4.7 (1M context) --- daslib/aot_cpp.das | 45 ++++-- src/ast/ast_lint.cpp | 68 --------- tests/aot/test_cpp_keyword_names.das | 160 +++++++++++++++++++ tests/language/cpp_keywords_as_names.das | 187 +++++++++++++++++++++++ tests/language/failed_reserved_names.das | 48 ------ 5 files changed, 380 insertions(+), 128 deletions(-) create mode 100644 tests/language/cpp_keywords_as_names.das delete mode 100644 tests/language/failed_reserved_names.das diff --git a/daslib/aot_cpp.das b/daslib/aot_cpp.das index 142391ffb2..e35a4d6e44 100644 --- a/daslib/aot_cpp.das +++ b/daslib/aot_cpp.das @@ -19,8 +19,9 @@ options unsafe_table_lookup = false def aotFieldName(name : string) : string { var result = replace(name, "`", "__"); - // Escape names that clash with Windows SDK macros (e.g. DELETE from winnt.h) - if (result == "DELETE") { + // Escape names that clash with C++ keywords (e.g. `char`, `register`, `extern`) + // or with Windows SDK macros (e.g. `DELETE` from winnt.h). + if (is_cpp_keyword(result) || result == "DELETE") { result = "_f_" + result; } return result; @@ -221,10 +222,14 @@ def hex_char(var Ch : int) { } def aotSuffixNameEx(funcName : das_string; suffix : string) { - var prefix = is_cpp_keyword(string(funcName)); + return aotSuffixNameEx(string(funcName), suffix); +} + +def aotSuffixNameEx(funcName : string; suffix : string) { + var prefix = is_cpp_keyword(funcName); let name = build_string() $(var writer) { - for (ch in string(funcName)) { + for (ch in funcName) { if (is_alnum(ch) || ch == '_') { writer |> write_char(ch); } else { @@ -267,7 +272,21 @@ def aotSuffixNameEx(funcName : das_string; suffix : string) { } def aotStructName(st : Structure?) { - return aotSuffixNameEx(st.name, ""); + // Suffix "_S" is prepended only when `st.name` is a C++ keyword (or contains + // a non-alnum char, which the parser disallows for struct names — so in + // practice: keyword names only). Non-keyword struct names emit unchanged. + return aotSuffixNameEx(st.name, "_S"); +} + +def aotEnumName(enu : Enumeration?) { + // Same shape as aotStructName but for enums. + return aotSuffixNameEx(enu.name, "_E"); +} + +def aotEnumValueName(name : das_string) { + // Used for enum value identifiers inside `enum class { ... }` body and at access sites + // (`EnumName::ValueName`). Prepended `_V` only for C++ keyword names. + return aotSuffixNameEx(name, "_V"); } @@ -373,9 +392,9 @@ def describeCppTypeEx(typeDecl : TypeDeclPtr; if (typeDecl.enumType.external) { write(writer, "DAS_COMMENT(bound_enum) {typeDecl.enumType.cppName}"); } elif (typeDecl.enumType._module.name.empty()) { - write(writer, "DAS_COMMENT(enum) {typeDecl.enumType.name}"); + write(writer, "DAS_COMMENT(enum) {aotEnumName(typeDecl.enumType)}"); } else { - write(writer, "DAS_COMMENT(enum) {aotModuleName(typeDecl.enumType._module)}::{typeDecl.enumType.name}") + write(writer, "DAS_COMMENT(enum) {aotModuleName(typeDecl.enumType._module)}::{aotEnumName(typeDecl.enumType)}") } } else { write(writer, "DAS_COMMENT(unspecified enumeration)") @@ -743,7 +762,9 @@ class public AotDebugInfoHelper { writeArgTypes(writer, fld, suffix); writeArgNames(writer, fld, suffix); let prefix = (info.module_name |> !empty(info.module_name)) ? "{info.module_name}::" : ""; - write(*writer, "VarInfo {structInfoName(info)}_field_{fi} = \{ {describeCppVarInfo(prefix + info.name, fld,suffix)} \};\n"); + // info.name is the struct's daslang name; mangle if C++ keyword so offsetof() + // in describeCppVarInfo sees the same identifier the struct decl emitted. + write(*writer, "VarInfo {structInfoName(info)}_field_{fi} = \{ {describeCppVarInfo(prefix + aotSuffixNameEx(info.name, "_S"), fld,suffix)} \};\n"); if (fld.annotation_arguments != null) { if (length(*fld.annotation_arguments) > 0) { let annArgs = build_string() $(var sb) { @@ -1197,10 +1218,10 @@ class public CppAot : AstVisitor { write(*ss, "#if 0 // external enum\n"); } write(*ss, "namespace {aotModuleName(enu._module)} \{\n\n"); - write(*ss, "enum class {enu.name} : {das_to_cppString(enu.baseType)} \{\n"); + write(*ss, "enum class {aotEnumName(enu)} : {das_to_cppString(enu.baseType)} \{\n"); } def override preVisitEnumerationValue(enu : EnumerationPtr; name : das_string; value : ExpressionPtr; last : bool) { - write(*ss, " {name} = {das_to_cppString(enu.baseType)}("); + write(*ss, " {aotEnumValueName(name)} = {das_to_cppString(enu.baseType)}("); } def override visitEnumerationValue(enu : EnumerationPtr; name : das_string; value : ExpressionPtr; last : bool) { write(*ss, ")"); @@ -2098,7 +2119,7 @@ class public CppAot : AstVisitor { write(*ss, ", {vtype.get_variant_field_offset(field.fieldIndex)}, {field.fieldIndex}>::get("); } else { let mod_name = (vtype.structType._module.name.empty() ? "" : string(vtype.structType._module.name) + "::"); - write(*ss, ",&{mod_name}{vtype.structType.name}::{aotFieldName(string(field.name))}>::get("); + write(*ss, ",&{mod_name}{aotStructName(vtype.structType)}::{aotFieldName(string(field.name))}>::get("); } } def override visitExprSafeField(var field : ExprSafeField?) : ExpressionPtr { @@ -2281,7 +2302,7 @@ class public CppAot : AstVisitor { let cfg = DescribeConfig(skip_ref = true, skip_const = true, redundant_const = false, cross_platform = cross_platform); let type_str = describeCppType(c._type, cfg); write(*ss, type_str); - var ctext = string(c.value); + var ctext = aotEnumValueName(c.value); for (ee in c.enumType.list) { if (ee.name == c.value) { if (!ee.cppName.empty()) { diff --git a/src/ast/ast_lint.cpp b/src/ast/ast_lint.cpp index 01d86212c3..69c794d224 100644 --- a/src/ast/ast_lint.cpp +++ b/src/ast/ast_lint.cpp @@ -259,19 +259,7 @@ namespace das { LineInfo(), CompilationError::invalid_module_name ); } } - bool isValidEnumName(const string & str) const { - return !isCppKeyword(str.c_str()); - } - bool isValidEnumValueName(const string & str) const { - return !isCppKeyword(str.c_str()); - } void lintType ( TypeDecl * td ) { - for ( auto & name : td->argNames ) { - if (!isValidVarName(name)) { - program->error("invalid type argument name '" + name + "'", "", "", - td->at, CompilationError::invalid_argument_name ); - } - } if ( td->firstType ) lintType(td->firstType); if ( td->secondType ) lintType(td->secondType); for ( auto & arg : td->argTypes ) lintType(arg); @@ -289,32 +277,11 @@ namespace das { } } } - virtual void preVisit ( Enumeration * enu ) override { - Visitor::preVisit(enu); - if (!isValidEnumName(enu->name)) { - program->error("invalid enumeration name '" + enu->name + "'", "", "", - enu->at, CompilationError::invalid_enumeration_name ); - } - } - virtual void preVisitEnumerationValue ( Enumeration * enu, const string & name, Expression * value, bool last ) override { - Visitor::preVisitEnumerationValue(enu,name,value,last); - if (!isValidEnumValueName(name)) { - program->error("invalid enumeration value name '" + name + "'", "", "", - enu->at, CompilationError::invalid_enumerator_name ); - } - } - bool isValidStructureName(const string & str) const { - return !isCppKeyword(str.c_str()); - } virtual bool canVisitStructure ( Structure * st ) override { return !st->isTemplate; // not a thing with templates } virtual void preVisit ( Structure * var ) override { Visitor::preVisit(var); - if (!isValidStructureName(var->name)) { - program->error("invalid structure name '" + var->name + "'", "", "", - var->at, CompilationError::invalid_structure_name ); - } if ( var->getSizeOf64()>0x7fffffff ) { program->error("structure '" + var->name + "' is too big", "", "", var->at, CompilationError::exceeds_structure ); @@ -345,15 +312,8 @@ namespace das { } } } - bool isValidVarName(const string & str) const { - return !isCppKeyword(str.c_str()); - } virtual void preVisitStructureField ( Structure * var, Structure::FieldDeclaration & decl, bool last ) override { Visitor::preVisitStructureField(var, decl, last); - if (!isValidVarName(decl.name)) { - program->error("invalid structure field name " + decl.name, "", "", - decl.at, CompilationError::invalid_field_name ); - } if ( noLocalClassMembers ) { if ( !decl.type->ref && decl.type->hasClasses() ) { program->error("class can't contain local class declarations", decl.name + ": " + decl.type->describe(), "", @@ -363,10 +323,6 @@ namespace das { } virtual void preVisitGlobalLet ( const VariablePtr & var ) override { Visitor::preVisitGlobalLet(var); - if (!isValidVarName(var->name)) { - program->error("invalid variable name '" + var->name + "'", "", "", - var->at, CompilationError::invalid_variable_name ); - } if ( checkNoGlobalVariables && !var->generated ) { if ( checkNoGlobalVariablesAtAll ) { program->error("variable '" + var->name + "' is disabled via option no_global_variables_at_all", "", "", @@ -441,18 +397,6 @@ namespace das { } } - virtual void preVisit(ExprFor * expr) override { - Visitor::preVisit(expr); - // macro generated invisible variable - // DAS_ASSERT(expr->visibility.line); - for ( size_t i=0, sz=expr->iteratorVariables.size(); iiteratorVariables[i]; - if (!isValidVarName(var->name)) { - program->error("invalid variable name '" + var->name + "'", "", "", - var->at, CompilationError::invalid_variable_name ); - } - } - } virtual void preVisit(ExprDelete * expr) override { Visitor::preVisit(expr); if ( needAvoidNullPtr(expr->subexpr->type,true) ) { @@ -466,10 +410,6 @@ namespace das { // macro genearted invisible variable // DAS_ASSERT(expr->visibility.line); for (const auto & var : expr->variables) { - if (!isValidVarName(var->name)) { - program->error("invalid variable name " + var->name, "", "", - var->at, CompilationError::invalid_variable_name ); - } if ( !var->init ) { if ( needAvoidNullPtr(var->type,true) ) { program->error("local variable of type " + var->type->describe() + " needs to be initialized to avoid null pointer", "", "", @@ -925,10 +865,6 @@ namespace das { } virtual void preVisitArgument ( Function * fn, const VariablePtr & var, bool lastArg ) override { Visitor::preVisitArgument(fn, var, lastArg); - if (!isValidVarName(var->name)) { - program->error("invalid argument variable name " + var->name, "", "", - var->at, CompilationError::invalid_argument_name ); - } if ( checkUnusedArgument ) { if ( !var->marked_used && var->isAccessUnused() ) { program->error("unused function argument " + var->name, "", @@ -958,10 +894,6 @@ namespace das { } virtual void preVisitBlockArgument ( ExprBlock * block, const VariablePtr & var, bool lastArg ) override { Visitor::preVisitBlockArgument(block, var, lastArg); - if (!isValidVarName(var->name)) { - program->error("invalid block argument variable name " + var->name, "", "", - var->at, CompilationError::invalid_argument_name ); - } if ( checkUnusedBlockArgument ) { if ( !var->marked_used && var->isAccessUnused() ) { program->error("unused block argument " + var->name, "", diff --git a/tests/aot/test_cpp_keyword_names.das b/tests/aot/test_cpp_keyword_names.das index bcbc322a68..01b3aad7ec 100644 --- a/tests/aot/test_cpp_keyword_names.das +++ b/tests/aot/test_cpp_keyword_names.das @@ -1,6 +1,10 @@ options gen2 require dastest/testing_boost public +// Coverage: AOT codegen for daslang names that collide with C++ keywords. +// Includes function names (the 2026-05-14 baseline) plus the broader sites +// (struct/class/enum/field/variable) opened by the lint-drop in the same PR. + struct Foo { a : int } @@ -37,3 +41,159 @@ def test_cpp_keyword_function_names(t : T?) { t |> equal(do(Foo(a = 4)), 40) } } + +// --- struct fields named after C++ keywords --- +struct StFields { + char : int + register : float + extern : bool + mutable : string +} + +// --- struct named after a C++ keyword --- +struct register { + value : int +} + +// --- class named after a C++ keyword --- +class extern { + payload : int = 99 + def get_payload() : int { + return payload + } +} + +// --- enum named after a C++ keyword --- +enum signed { + plus + minus +} + +// --- enum values named after C++ keywords --- +enum Width { + short + long + union +} + +// --- type alias named after a C++ keyword --- +typedef short = int + +// --- function arg named after a C++ keyword --- +def double_it(char : int) : int { + return char * 2 +} + +// --- function type with a keyword arg name --- +def call_with_int(f : function<(char : int) : int>; v : int) : int { + return invoke(f, v) +} + +// --- global var named after a C++ keyword --- +var g_char = 100 + +[test] +def test_for_loop_iter(t : T?) { + t |> run("aot for-loop iter named char") @(t : T?) { + var sum = 0 + for (char in [1, 2, 3]) { + sum += char + } + t |> equal(sum, 6) + } +} + +[test] +def test_let_var(t : T?) { + t |> run("aot let var named char") @(t : T?) { + let char = 42 + t |> equal(char, 42) + } + t |> run("aot let with var named volatile") @(t : T?) { + var volatile = 0 + volatile += 5 + volatile += 7 + t |> equal(volatile, 12) + } +} + +[test] +def test_global_var(t : T?) { + t |> run("aot global var named g_char") @(t : T?) { + t |> equal(g_char, 100) + } +} + +[test] +def test_struct_fields(t : T?) { + t |> run("aot struct fields named after C++ keywords") @(t : T?) { + let s = StFields(char = 5, register = 3.14, extern = true, mutable = "ok") + t |> equal(s.char, 5) + t |> equal(int(s.register * 100.0), 314) + t |> equal(s.extern, true) + t |> equal(s.mutable, "ok") + } +} + +[test] +def test_struct_name(t : T?) { + t |> run("aot struct name 'register'") @(t : T?) { + let r = register(value = 17) + t |> equal(r.value, 17) + } +} + +[test] +def test_class_name(t : T?) { + t |> run("aot class name 'extern'") @(t : T?) { + var c = new extern() + t |> equal(c->get_payload(), 99) + unsafe { delete c; } + } +} + +[test] +def test_enum_name(t : T?) { + t |> run("aot enum name 'signed'") @(t : T?) { + let s = signed.plus + t |> equal(s == signed.plus, true) + t |> equal(s == signed.minus, false) + } +} + +[test] +def test_enum_values(t : T?) { + t |> run("aot enum values short/long/union") @(t : T?) { + let w = Width.short + t |> equal(w == Width.short, true) + t |> equal(w == Width.long, false) + t |> equal(w == Width.union, false) + } +} + +[test] +def test_typedef_alias(t : T?) { + t |> run("aot typedef alias named short") @(t : T?) { + let x : short = 5 + t |> equal(x, 5) + } +} + +[test] +def test_function_arg(t : T?) { + t |> run("aot function arg 'char'") @(t : T?) { + t |> equal(double_it(5), 10) + } + t |> run("aot function type with keyword arg name") @(t : T?) { + t |> equal(call_with_int(@@double_it, 7), 14) + } +} + +[test] +def test_lambda_arg(t : T?) { + t |> run("aot lambda arg 'char'") @(t : T?) { + var f <- @(char : int) => char * 3 + t |> equal(invoke(f, 4), 12) + unsafe { delete f; } + } +} diff --git a/tests/language/cpp_keywords_as_names.das b/tests/language/cpp_keywords_as_names.das new file mode 100644 index 0000000000..53022ccac2 --- /dev/null +++ b/tests/language/cpp_keywords_as_names.das @@ -0,0 +1,187 @@ +options gen2 +require dastest/testing_boost public + +// Coverage: daslang accepts C++ keywords as variable/struct/field/enum/class names. +// Only C++ keywords that are NOT also daslang reserved words can be tested here +// (the daslang parser rejects e.g. `for`, `if`, `class`, `new`, `delete`, ...). +// Safe choices: char, register, short, long, extern, friend, mutable, volatile, +// signed, unsigned, union, inline, virtual, namespace, do, catch, throw, ... + +// --- struct fields named after C++ keywords --- +struct StFields { + char : int + register : float + extern : bool + mutable : string +} + +// --- struct named after a C++ keyword --- +struct register { + value : int +} + +// --- class named after a C++ keyword --- +class extern { + payload : int = 99 + def get_payload() : int { + return payload + } +} + +// --- enum named after a C++ keyword --- +enum signed { + plus + minus +} + +// --- enum values named after C++ keywords --- +enum Width { + short + long + union +} + +// --- type alias named after a C++ keyword --- +typedef short = int +typedef extern_ptr = int? + +// --- function arg named after a C++ keyword --- +def double_it(char : int) : int { + return char * 2 +} + +// --- function type with a keyword arg name --- +def call_with_int(f : function<(char : int) : int>; v : int) : int { + return invoke(f, v) +} + +// --- global var named after a C++ keyword --- +var g_char = 100 +var register_g = 7 + +[test] +def test_for_loop_iter(t : T?) { + t |> run("for-loop iter named char (user's bug repro)") @(t : T?) { + var sum = 0 + for (char in [1, 2, 3]) { + sum += char + } + t |> equal(sum, 6) + } + t |> run("for-loop iter named register") @(t : T?) { + var product = 1 + for (register in [2, 3, 4]) { + product *= register + } + t |> equal(product, 24) + } +} + +[test] +def test_let_var(t : T?) { + t |> run("let var named char") @(t : T?) { + let char = 42 + t |> equal(char, 42) + } + t |> run("let var named short with arithmetic") @(t : T?) { + let short = 99 + let long = short * 2 + t |> equal(long, 198) + } + t |> run("var (mutable) named volatile") @(t : T?) { + var volatile = 0 + volatile += 5 + volatile += 7 + t |> equal(volatile, 12) + } +} + +[test] +def test_global_var(t : T?) { + t |> run("global var named g_char") @(t : T?) { + t |> equal(g_char, 100) + } + t |> run("global var named register_g") @(t : T?) { + t |> equal(register_g, 7) + } +} + +[test] +def test_struct_fields(t : T?) { + t |> run("struct fields named after C++ keywords") @(t : T?) { + let s = StFields(char = 5, register = 3.14, extern = true, mutable = "ok") + t |> equal(s.char, 5) + t |> equal(int(s.register * 100.0), 314) + t |> equal(s.extern, true) + t |> equal(s.mutable, "ok") + } +} + +[test] +def test_struct_name(t : T?) { + t |> run("struct name 'register'") @(t : T?) { + let r = register(value = 17) + t |> equal(r.value, 17) + } +} + +[test] +def test_class_name(t : T?) { + t |> run("class name 'extern'") @(t : T?) { + var c = new extern() + t |> equal(c->get_payload(), 99) + unsafe { delete c; } + } +} + +[test] +def test_enum_name(t : T?) { + t |> run("enum name 'signed'") @(t : T?) { + let s = signed.plus + t |> equal(s == signed.plus, true) + t |> equal(s == signed.minus, false) + } +} + +[test] +def test_enum_values(t : T?) { + t |> run("enum values short/long/union") @(t : T?) { + let w = Width.short + t |> equal(w == Width.short, true) + t |> equal(w == Width.long, false) + t |> equal(w == Width.union, false) + } +} + +[test] +def test_typedef_alias(t : T?) { + t |> run("typedef alias named short") @(t : T?) { + let x : short = 5 + t |> equal(x, 5) + } +} + +[test] +def test_function_arg(t : T?) { + t |> run("function arg 'char'") @(t : T?) { + t |> equal(double_it(5), 10) + } + t |> run("function type with keyword arg name") @(t : T?) { + t |> equal(call_with_int(@@double_it, 7), 14) + } +} + +[test] +def test_lambda_arg(t : T?) { + t |> run("lambda arg 'char'") @(t : T?) { + var f <- @(char : int) => char * 3 + t |> equal(invoke(f, 4), 12) + unsafe { delete f; } + } + t |> run("lambda arg 'register' with capture") @(t : T?) { + let bias = 100 + var g <- @(register : int) => register + bias + t |> equal(invoke(g, 5), 105) + unsafe { delete g; } + } +} diff --git a/tests/language/failed_reserved_names.das b/tests/language/failed_reserved_names.das deleted file mode 100644 index 956b9e9014..0000000000 --- a/tests/language/failed_reserved_names.das +++ /dev/null @@ -1,48 +0,0 @@ -// reserved identifier names -// verifies compiler rejects using reserved words (like 'register', 'do') -// as enum names, enum values, struct names, struct fields, -// function args, local variables, and lambda parameters -options gen2 -expect 30106:3, 30146, 30148, 30152, 30240, 30282 - -enum do {// 30116: invalid name - foo - bar -} - -enum foo { - register = 5 // 30116: invalid name - noregister -} - -struct Foo { - register : string // 30116: invalid name -} - -struct register {// 30116: invalid name - foo : int -} - -def foo(register : int) {// 30116: invalid name - return register -} - -def bar(a : int) { - let register = a // 30116: invalid name - return register -} - -def any(x) { - invoke(x, 12) -} - -def lbm(a : int) { - any() <| $(register : int) {// 30116: invalid name - return a + register - } -} - -[export] -def test { - return true -} From e6b80de4236bf3715312220e86399367cdf678e5 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Tue, 19 May 2026 10:05:33 -0700 Subject: [PATCH 6/8] ast: add _abstract and inherited bits to FieldDeclaration Two new bit flags on Structure::FieldDeclaration that make class-method abstractness and parent-inheritance state inspectable from AST macros and tooling: - _abstract (0x400): set when the method is declared `def abstract` (init=null, body must come from a derived class). Cleared on override. - inherited (0x800): set on fields cloned from a parent class that are not (re)declared in this class. Cleared when the field is declared or overridden locally. parentType keeps its existing narrow semantic (type pending from parent auto-resolution; transient, cleared during inference). A new explicit "inherited" bit avoids overloading it. Both bits are exposed via fld.flags._abstract / fld.flags.inherited in the FieldDeclarationFlags bitfield binding. AstSerializer bumped to 85. Tests under tests/language/ verify the three flag combinations (declared-here-abstract, inherited-from-parent, overridden) at compile time via a [structure_macro] helper, plus a runtime override-dispatch sanity check. --- .../typedef-ast-FieldDeclarationFlags.rst | 2 + include/daScript/ast/ast.h | 2 + src/builtin/module_builtin_ast_flags.cpp | 2 +- src/builtin/module_builtin_ast_serialize.cpp | 2 +- src/parser/parser_impl.cpp | 7 +++ src/parser/parser_impl.h | 1 + tests/language/_field_decl_flags_helper.das | 42 ++++++++++++++++++ tests/language/test_field_decl_flags.das | 43 +++++++++++++++++++ 8 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 tests/language/_field_decl_flags_helper.das create mode 100644 tests/language/test_field_decl_flags.das diff --git a/doc/source/stdlib/handmade/typedef-ast-FieldDeclarationFlags.rst b/doc/source/stdlib/handmade/typedef-ast-FieldDeclarationFlags.rst index ceff70d5b4..e8af61a65e 100644 --- a/doc/source/stdlib/handmade/typedef-ast-FieldDeclarationFlags.rst +++ b/doc/source/stdlib/handmade/typedef-ast-FieldDeclarationFlags.rst @@ -9,3 +9,5 @@ This field is private. The field is sealed. It cannot be overridden in derived types. Already implemented. This field is a class method. +The method is declared abstract - body must be provided by a derived class. +This field is inherited from a parent class and is not (re)declared in this class. diff --git a/include/daScript/ast/ast.h b/include/daScript/ast/ast.h index 8473b9a50d..f3f0c4d171 100644 --- a/include/daScript/ast/ast.h +++ b/include/daScript/ast/ast.h @@ -221,6 +221,8 @@ namespace das bool sealed : 1; bool implemented : 1; bool classMethod : 1; + bool _abstract : 1; + bool inherited : 1; }; uint32_t flags = 0; }; diff --git a/src/builtin/module_builtin_ast_flags.cpp b/src/builtin/module_builtin_ast_flags.cpp index 1d7cb7ff1d..38683b8562 100644 --- a/src/builtin/module_builtin_ast_flags.cpp +++ b/src/builtin/module_builtin_ast_flags.cpp @@ -166,7 +166,7 @@ namespace das { ft->alias = "FieldDeclarationFlags"; ft->argNames = { "moveSemantics", "parentType", "capturedConstant", "generated", "capturedRef", "doNotDelete", "privateField", "_sealed", - "implemented", "classMethod" }; + "implemented", "classMethod", "_abstract", "inherited" }; return ft; } diff --git a/src/builtin/module_builtin_ast_serialize.cpp b/src/builtin/module_builtin_ast_serialize.cpp index 0b3d0a9903..ff3abe7104 100644 --- a/src/builtin/module_builtin_ast_serialize.cpp +++ b/src/builtin/module_builtin_ast_serialize.cpp @@ -2698,7 +2698,7 @@ namespace das { } uint32_t AstSerializer::getVersion () { - static constexpr uint32_t currentVersion = 84; + static constexpr uint32_t currentVersion = 85; return currentVersion; } diff --git a/src/parser/parser_impl.cpp b/src/parser/parser_impl.cpp index dd872223a2..af60439fe0 100644 --- a/src/parser/parser_impl.cpp +++ b/src/parser/parser_impl.cpp @@ -325,8 +325,10 @@ namespace das { virtfin->at, CompilationError::internal_function); } } + const bool hasParent = (pStruct->parent != nullptr); for ( auto & ffd : pStruct->fields ) { ffd.implemented = false; + ffd.inherited = hasParent; } for ( auto pDecl : *list ) { for ( const auto & name_at : *pDecl->pNameList ) { @@ -386,6 +388,8 @@ namespace das { ffd.sealed = pDecl->sealed; ffd.implemented = true; ffd.classMethod = pDecl->isClassMethod; + ffd._abstract = pDecl->isAbstract; + ffd.inherited = false; } } } else { @@ -410,6 +414,8 @@ namespace das { oldFd->sealed = pDecl->sealed; oldFd->implemented = true; oldFd->classMethod = pDecl->isClassMethod; + oldFd->_abstract = false; + oldFd->inherited = false; } else { das_yyerror(scanner,"structure field is already declared "+name_at.name +", use override to replace initial value instead",name_at.at, @@ -673,6 +679,7 @@ namespace das { ); decl->isPrivate = isPrivate; decl->isClassMethod = true; + decl->isAbstract = true; list->push_back(decl); } } diff --git a/src/parser/parser_impl.h b/src/parser/parser_impl.h index 565229ad3b..d62e6d473d 100644 --- a/src/parser/parser_impl.h +++ b/src/parser/parser_impl.h @@ -62,6 +62,7 @@ namespace das { bool isStatic = false; bool isTupleExpansion = false; bool isClassMethod = false; + bool isAbstract = false; AnnotationArgumentList *annotation = nullptr; }; diff --git a/tests/language/_field_decl_flags_helper.das b/tests/language/_field_decl_flags_helper.das new file mode 100644 index 0000000000..9f4518991d --- /dev/null +++ b/tests/language/_field_decl_flags_helper.das @@ -0,0 +1,42 @@ +// Helper module for tests/language/test_field_decl_flags.das. +// +// Declares [check_add_field_flags(abstract=..., inherited=...)] — a structure_macro +// that walks st.fields looking for "add", and macro_errors if its _abstract / +// inherited bits don't match the annotation arguments. The compile succeeding +// is the test: any mismatch turns into a compile-time error. +options gen2 +options indenting = 4 + +module _field_decl_flags_helper public + +require daslib/ast public +require daslib/ast_boost + +[structure_macro(name = "check_add_field_flags")] +class private CheckAddFieldFlagsMacro : AstStructureAnnotation { + def override apply(var st : StructurePtr; var group : ModuleGroup; + args : AnnotationArgumentList; var errors : das_string) : bool { + let expected_abstract = find_arg(args, "is_abstract") ?as tBool ?? false + let expected_inherited = find_arg(args, "is_inherited") ?as tBool ?? false + var found = false + for (fld in st.fields) { + if (fld.name != "add") continue + found = true + let got_abstract = fld.flags._abstract + let got_inherited = fld.flags.inherited + if (got_abstract != expected_abstract) { + errors := "[check_add_field_flags] {st.name}.add._abstract = {got_abstract}, expected {expected_abstract}" + return false + } + if (got_inherited != expected_inherited) { + errors := "[check_add_field_flags] {st.name}.add.inherited = {got_inherited}, expected {expected_inherited}" + return false + } + } + if (!found) { + errors := "[check_add_field_flags] {st.name} has no 'add' field" + return false + } + return true + } +} diff --git a/tests/language/test_field_decl_flags.das b/tests/language/test_field_decl_flags.das new file mode 100644 index 0000000000..92597d6f8b --- /dev/null +++ b/tests/language/test_field_decl_flags.das @@ -0,0 +1,43 @@ +// Verifies _abstract and inherited bits on Structure::FieldDeclaration for +// abstract class methods through clone/override. +// +// - Foo.add: declared `def abstract` here -> _abstract=true, inherited=false +// - Bar.add: cloned from Foo, no redeclare -> _abstract=true, inherited=true +// - Far.add: overridden with a body -> _abstract=false, inherited=false +// +// The [check_add_field_flags] structure_macro asserts the bits at compile time. +// A bit mismatch becomes a macro_error and fails the compile, so successful +// compilation = passing test. The runtime [test] confirms the override chain +// dispatches correctly. +options gen2 +options indenting = 4 +options no_unused_block_arguments = false +options no_unused_function_arguments = false + +require dastest/testing_boost public +require _field_decl_flags_helper + +[check_add_field_flags(is_abstract = true, is_inherited = false)] +class Foo { + def abstract add(a, b : int) : int +} + +[check_add_field_flags(is_abstract = true, is_inherited = true)] +class Bar : Foo { +} + +[check_add_field_flags(is_abstract = false, is_inherited = false)] +class Far : Bar { + def override add(a, b : int) : int { + return a + b + } +} + +[test] +def test_far_overrides_abstract(t : T?) { + var f = new Far() + t |> equal(f.add(2, 3), 5) + unsafe { + delete f + } +} From b03cae1e50e6cfd487a903623a733eca9bb1f90e Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Tue, 19 May 2026 10:46:13 -0700 Subject: [PATCH 7/8] linq: N-ary zip family extended to arities 4-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 of the linq zip extension plan (LINQ_TO_DECS.md). Extends the existing 2-ary and 3-ary `zip` family to arities 4..8 with full overload coverage per arity: - lockstep iter / array / zip_to_array (no selector) - result-selector iter / array / zip_to_array (block<...>) - private `zipN_impl` / `zipN_impl_const` helpers per arity Also adds a `static_if` reserve in `select_many_impl` mirroring the existing zip pattern — closes a pre-existing PERF006 hint on length-bearing sources. Tests in `tests/linq/test_linq_transform.das` cover all arities and overloads (72 tests for zip; 1047 total linq tests including AOT mode green). LINT003/002 on pre-existing test functions also fixed per the "fix all lint" rule. `benchmarks/sql/LINQ_TO_DECS.md` documents the multi-phase plan: this PR is Phase 1; future phases add `plan_zip` splice in linq_fold (Phase 2), `from_decs_template(type)` macro (Phase 3), and `plan_from_decs_template` splice for SoA-without-buffer decs iteration (Phase 4). Mouse card documents an intermittent daslang bug observed during this work — a 4-ary block call with lambda params `p, q, r, s` returning stale first-param values. Could not produce reliable minimal repro; workaround documented. Co-Authored-By: Claude Opus 4.7 --- benchmarks/sql/LINQ_TO_DECS.md | 302 +++++++++ daslib/linq.das | 355 +++++++++++ doc/source/reference/tutorials/28_linq.rst | 15 +- ...hen-the-lambda-uses-param-names-p-q-r-s.md | 31 + tests/linq/test_linq_transform.das | 582 +++++++++++++++++- 5 files changed, 1273 insertions(+), 12 deletions(-) create mode 100644 benchmarks/sql/LINQ_TO_DECS.md create mode 100644 mouse-data/docs/why-does-my-4-ary-block-call-return-stale-first-param-values-when-the-lambda-uses-param-names-p-q-r-s.md diff --git a/benchmarks/sql/LINQ_TO_DECS.md b/benchmarks/sql/LINQ_TO_DECS.md new file mode 100644 index 0000000000..59df7e7968 --- /dev/null +++ b/benchmarks/sql/LINQ_TO_DECS.md @@ -0,0 +1,302 @@ +# LINQ → DECS — design notes + +Sibling of [LINQ.md](LINQ.md). Captures the design horizon for stitching linq's +`zip` family together with decs archetype iteration, so the upcoming +**BufferZip** PR is shaped with the decs angle in mind even though it doesn't +have to ship the decs bits. + +Status: **discussion draft** — no PR open, no plan committed. Edited in-place +as the design firms up. + +## What this is + +Today decs has two iteration surfaces: + +1. **`query() $(...){body}`** — the canonical "act on each entity matching a + component set" form. Lowered by [`DecsQueryMacro`](../../daslib/decs_boost.das#L315) + to `for_each_archetype($(arch) { for (a, b, c in get_ro(arch,"a",...), ...) body })` + — a multi-iterator for-loop, ONE pass per archetype, zero buffering. +2. **`from_decs($(...){})`** — pull form returning an iterator. Lowered by + [`FromDecsMacro`](../../daslib/decs_boost.das#L633) to + `invoke($() { var res : array; query(...){res |> push(...)}; return res.to_sequence() })`. + **Materializes** the entire matched set into a heap array, then yields. Plays + nicely with linq but loses the SoA-without-buffer win. + +Linq has `zip(a, b)` and `zip(a, b, c)` (lockstep + result-selector forms, +iterator + array overloads — [linq.das:3028-3157](../../daslib/linq.das#L3028)). +Both arities cascade to tier 2 in `_fold` today — `zip` is listed in +[`is_buffer_required_op`](../../daslib/linq_fold.das#L401) as a marker arm. + +The bridge insight: **`zip` over decs `get_ro(arch, "name", type)` calls IS +the SoA per-archetype prolog**. If linq_fold's zip planner emits a multi-source +for-loop (rather than buffering a tuple stream), `from_decs_template(type) +|> linq_chain |> terminator` can splice straight into the +`for_each_archetype` body — same shape `query` already builds, no intermediate +`array`. + +## North star + +```das +[decs_template(prefix="particle")] +struct Particle { + pos, vel : float3 +} + +// Authoring form (does not yet exist): +let total = ( + from_decs_template(type) + ._where(p => length(p.vel) > 0.0f) + ._select(p => length(p.pos)) + ._sum() +) + +// What we want the splice to lower to (same as query() would emit): +var total = 0.0f +for_each_archetype(req, $(arch) { + for (pos in get_ro(arch, "particle_pos", type), + vel in get_ro(arch, "particle_vel", type)) { + if (length(vel) > 0.0f) { + total += length(pos) + } + } +}) +``` + +No `array` materialization. No tuple stream. Same body shape as a +hand-written `query` — composable via the linq operator catalogue, which the +query macro alone doesn't give you. + +## Layered work plan + +Five separable pieces; each can ship independently. Numbered in build order, not +priority order. **Only piece 1 is in the BufferZip PR scope** — the rest are +later PRs. + +### 1. N-ary `zip` in `daslib/linq.das` (BufferZip PR) + +Extend the existing 2-ary / 3-ary `zip` family to 4..8 arity. Each arity gets +four overloads (lockstep iterator, lockstep array, result-selector iterator, +result-selector array) plus a `zip_to_array` companion — matches the established +pattern. Naming: `zip4_impl`, `zip5_impl`, … (current convention; `zip3_impl` +exists). + +Open question: 8 arity is what was suggested as "should be good." Could go +higher if there's a decs-driven reason (component-rich templates with 10+ +fields). Easier to ship 8 and extend later than to overshoot. + +**Tests:** functional parity vs hand-rolled `for (a, b, ..., h in srcA, srcB, ..., srcH)` +for each arity + iterator/array shape. AST tests not needed at this stage +(linq, not linq_fold). + +**Docs:** `daslib/linq.rst` (or wherever zip is currently documented). + +### 2. `plan_zip` splice arm in `daslib/linq_fold.das` + +Recognize `zip(srcA, srcB[, srcC, ...]) [|> chain]* |> terminator` and emit: + +```das +// Pseudo-skeleton (lockstep form, to_array terminator): +var inscope buf : array<...> +{ + var inscope itA <- each_or_pass(srcA) + var inscope itB <- each_or_pass(srcB) + // ... per source ... + for (a, b, ... in itA, itB, ...) { + // fused downstream: where_/select*/take/etc. + buf |> push_clone((a, b, ...)) + } +} +return <- buf +``` + +Multi-iterator for-loop is the natural lowering — the compiler already iterates +the shortest source's length. **NO buffered tuple stream.** Selects and wheres +downstream fuse the same way as the existing planners. + +Bail-list (cascade to tier 2): +- Mixed array+iterator sources (uniform only — matches the design constraint) +- Sources that aren't `each(arr)` / iterator-form / direct array (no nested + fold-spliceable chain inside zip arg — that's a PR-3 extension) +- Result-selector zip (the `block<...>` arg) — defer; needs block-body splice +- Anything that lands `zip` past a `where_`/`select` in the chain (canonical + order requires `zip` first, like the existing source slot) + +Trim `zip` from `is_buffer_required_op` after this lands. + +Removed marker (line 401) and corresponding cascade path. + +**Tests:** functional parity + AST shape (the anti-cascade gate from +`tests/linq/test_linq_fold_ast.das` — assert no `var pass_0` tier-2 shape). + +**Benchmark:** refresh `benchmarks/sql/zip_dot_product.das`; add `zip3_*` +shapes if the perf delta is interesting. + +### 3. `from_decs_template(type)` macro + +New `[call_macro]` in `daslib/decs_boost.das`. Single argument: `type` +where `Foo` is annotated `[decs_template]`. Returns an `iterator` (or a +tuple-of-refs — open question, see "Open questions" below). + +**Two-stage implementation**: + +- **Stage A (eager bridge)** — produces the same shape as `from_decs(...)` does + today: materializes via `query`, returns `to_sequence()` of the buffer. + Composes with all existing linq ops. ~50 LOC. Lands in the same PR as + piece 2 or shortly after. +- **Stage B (splice-able marker)** — emits a recognizable AST shape that + `linq_fold`'s planner peels and inlines into the + `for_each_archetype` body. The whole point of the design. Depends on + piece 4 below. + +Macro reads `type`'s `structType.fields` (same path the +`DecsTemplate.apply` method uses to enumerate fields — see +[decs_boost.das:68](../../daslib/decs_boost.das#L68)). Build an `EcsRequest` +the same way `build_req_from_args` does ([decs_boost.das:169](../../daslib/decs_boost.das#L169)) +— one `req.req |> push("{prefix}{field.name}")` per field. + +**Tests:** smoke parity test (Stage A) — `from_decs_template(type) == +materialized-query`. Splice-shape test (Stage B) gets added when piece 4 lands. + +### 4. `from_decs_template` recognizer in `linq_fold` planner + +New `plan_from_decs_template` planner that detects the +`from_decs_template(type) |> chain` shape and emits the full +`for_each_archetype($(arch) { for (a, b in get_ro(arch, "a", ...), ...) body +})` template — body fused from the downstream linq chain. + +Critical reuse: the **same per-field iterator-emit logic** as the `query` macro +uses ([decs_boost.das:239 `append_iterator`](../../daslib/decs_boost.das#L239)). +Two options: + +- **Option A — extract a shared helper** in `decs_boost.das` and call it from + the linq_fold planner. Cleanest. Cross-module dependency: `linq_fold.das` + starts depending on `decs_boost.das` (one-way; decs_boost already requires + linq). +- **Option B — duplicate the emit logic.** Faster to ship but creates a drift + hazard. + +Likely Option A — but worth a discussion point. The cross-module dependency is +the only awkward bit (linq_fold becoming "aware of" decs). One way to avoid the +direct dep: invert it — `decs_boost` exports the planner extension via a +registration hook (`add_linq_planner`). More plumbing, but linq_fold stays +ECS-unaware. + +This piece is where the **N-ary zip planner from piece 2 pays its decs dividend**. +The per-archetype emission is a zip — and the linq chain that follows is +exactly the same chain `plan_zip` already fuses. + +### 5. sqlite_linq N-ary zip recognition + +Currently `daslib/sqlite_linq.das` has **zero** zip references. The mapping for +`zip` over SQL sources is not obvious: +- N-ary lockstep over SQL tables isn't a standard relational operation. +- Closest analogue: `INNER JOIN` on synthetic row-number columns + (`ROW_NUMBER() OVER ()` in SQLite). +- Or — for the common case where one side is a `_sql` source and the other is + an in-memory array — generate an `IN (...)` clause from the array. But that's + a different operator semantically. + +**Likely outcome**: defer until there's a concrete authoring shape we want to +support. Stub in `sqlite_linq` (cascade to tier 2 — call `from_sql` to +materialize, then run the linq zip in-memory). + +Note: this is the **only** piece where the user mentioned "we'll need to expand +support for that there as well" — keep on the radar but not necessarily in the +critical path. + +## Decisions (locked-in design) + +These are settled — captured here so future-self doesn't relitigate. + +1. **`from_decs_template` element type = tuple** of refs to component fields. + `tuple` for `[decs_template] struct Particle{ + pos, vel : float3 }`. Zero-copy. Matches the SoA primitive already used by + `query`'s multi-iterator for-loop body. Composes with linq operators + through the existing tuple-element path. +2. **N-ary zip ceiling = 8.** Expand if a real use case lands that needs more. +3. **`linq_fold` does NOT require `decs_boost`.** The splice emits **names** + (`for_each_archetype`, `get_ro`) that resolve in the consuming scope — the + user's code already requires `decs_boost` if they're calling + `from_decs_template`. linq_fold stays ECS-unaware at the dependency level; + it just knows the canonical decs splice template by name. (This is the same + pattern already used elsewhere — e.g., the planner emits `_::unique_key` + and `_::less` without `require`-ing the modules that define them.) + +## Splice complexity — where the work actually is + +Per the user's framing: **per-archetype is "one set of arrays" (easy); the +outer multi-archetype loop is the tricky part.** Both `from_decs($(args){})` +and `from_decs_template(type)` walk multiple archetypes via +`for_each_archetype` — the inner body is a multi-iterator for-loop in lockstep +over per-component arrays (= zip), but most linq operators need state hoisted +*above* the archetype loop. + +Operator-by-operator hoist plan: + +| Operator | Hoist | Per-archetype body | Cross-archetype concern | +|---|---|---|---| +| `count` | `var cnt = 0` above outer | `cnt += archLen` (length shortcut) or `cnt++` per element | trivial accumulator | +| `sum`/`min`/`max`/`average`/`long_count` | accumulator above outer (incl. avg's 2-slot) | inline reduce | trivial accumulator | +| `to_array` | `var buf : array` above outer | `buf \|> push_clone(...)` per element | trivial; reserve sum-of-archetype-sizes upfront if cheap | +| `first` / `first_or_default(d)` | n/a — direct return | first match → `return val` | early-exit across outer loop: needs flag-or-return — `return` from outer block works because `for_each_archetype` block is callee-invoked, so a return statement inside the block exits the *block*, not the outer function. Needs either (a) wrap whole emission in `invoke($() { ... })` and use `return`, or (b) flag + `break` from `for_each_archetype` (no built-in break — would need `for_each_archetype_find`'s `bool` return shape) | +| `any` / `contains` | n/a — direct return | matching element → `return true` | same early-exit pattern; `for_each_archetype_find` is the natural fit (returns `bool`) | +| `all` | n/a — direct return | non-matching → `return false` | symmetric to any | +| `take(N)` | shared counter above outer | `cnt++; break if (cnt >= N)` (inner break) + outer-loop early-exit when N reached | needs *both*: inner break to stop archetype walk, plus signal to outer loop to stop. Same `for_each_archetype_find` pattern works (return `true` when N reached) | +| `skip(K)` | shared counter above outer | `if (remaining > 0) { remaining--; continue }` | counter survives archetype boundary; no early-exit | +| `distinct` / `distinct_by` | `var inscope seen : table<...>` above outer | streaming dedup per element | shared dedup table — exactly the same shape as the existing `plan_distinct`, just one level deeper | +| `group_by` | `var inscope tab : table<...>` above outer | per-element table update (the `tab?[uk] ?? dummy` pattern) | shared table — same shape as existing `plan_group_by` | +| `reverse` | `var buf : array` above outer | per-element push | `reverse_inplace(buf)` after both loops complete | +| `order_by [+ take(N)]` | `var buf : array` (or `top_n` heap) above outer | per-element push | sort / `top_n_select` after both loops | + +The pattern that recurs: **hoist state above `for_each_archetype`, mutate per +element in the inner loop, finalize after both loops**. The existing planners +(`plan_distinct`, `plan_group_by`, `plan_order_family`, `plan_reverse`) already +have this two-level structure (init / per-element / finalize); the +decs-splice variant just adds one more outer loop level. Bulk of the splice +infrastructure carries over. + +**Early-exit terminators** (`first`/`first_or_default`/`any`/`all`/`contains`/`take(N)`) +are the genuine new complexity. Lowering to `for_each_archetype_find` (returns +`bool`, callee can early-exit by returning `true`) is the cleanest path — the +non-find `for_each_archetype` doesn't have a break channel. This means the +splice-emission has to choose between `for_each_archetype` and +`for_each_archetype_find` based on the terminator class, mirroring the +existing per-lane planner split. + +## Lingering open questions + +1. **sqlite_linq zip semantics** — defer until there's an authoring shape with + a real use case. Don't speculate now. +2. **Result-selector zip splicing** — `zip(a, b, $(l, r) => ...)` block form. + Defer past piece 2; the unselectored form covers the decs case (multi-iter + for-loop yields the tuple directly). +3. **`from_decs_template` + REQUIRE / REQUIRE_NOT clauses** — the existing + `query` macro accepts `REQUIRE(c1, c2)` and `REQUIRE_NOT(c3)` annotations. + `from_decs_template(type)` derives the must-have set from `Foo`'s + fields, but there's no surface for the must-not-have set. Possible answer: + `from_decs_template(type, REQUIRE_NOT(deleted))` as a follow-up; v1 + ships without it. + +## What this doesn't change + +- The existing `query` / `find_query` / `from_decs` macros stay. They're the + canonical "act on / find / materialize" forms. `from_decs_template` is + additive — a fluent entry point for linq-style composition. +- The decs storage layout (Archetype + Component) is untouched. All the work + is at the macro / planner layer; per-component `array` reinterpret + via `get_ro` stays the SoA primitive. + +## Pointers to current code + +- Zip overloads: [daslib/linq.das:3028-3157](../../daslib/linq.das#L3028) +- zip in linq_fold call table: [daslib/linq_fold.das:129](../../daslib/linq_fold.das#L129) +- zip marker arm: [daslib/linq_fold.das:401](../../daslib/linq_fold.das#L401) +- Archetype + Component storage: [daslib/decs.das:48-70](../../daslib/decs.das#L48) +- `get_ro` / `get` SoA accessors: [daslib/decs.das:778-825](../../daslib/decs.das#L778) +- `for_each_archetype`: [daslib/decs.das:644](../../daslib/decs.das#L644) +- `[decs_template]` structure_macro: [daslib/decs_boost.das:50-133](../../daslib/decs_boost.das#L50) +- `query` macro lowering: [daslib/decs_boost.das:315-516](../../daslib/decs_boost.das#L315) +- `append_iterator` (per-field for-loop source emit): [daslib/decs_boost.das:239](../../daslib/decs_boost.das#L239) +- `from_decs` macro (current eager form): [daslib/decs_boost.das:633-705](../../daslib/decs_boost.das#L633) +- Existing test: [tests/linq/test_linq_from_decs.das](../../tests/linq/test_linq_from_decs.das) +- sqlite_linq (lives in dasSQLITE module): [modules/dasSQLITE/daslib/sqlite_linq.das](../../modules/dasSQLITE/daslib/sqlite_linq.das) diff --git a/daslib/linq.das b/daslib/linq.das index f4635a65f0..33ecdaaa8c 100644 --- a/daslib/linq.das +++ b/daslib/linq.das @@ -2964,6 +2964,11 @@ def private iter_type(src : array) : TT -const -& { def private select_many_impl(var src; tt : auto(TT); result_selector) : array))) -const -&> { //! Projects each element of an iterator to an iterator and flattens the resulting iterators into one array var buffer : array))) -const -&> + // Lower-bound reserve when the outer source is length-bearing — flat-map produces ≥ length(src) elements + // for non-degenerate inner sequences; reduces realloc count even when the actual output is larger. + static_if (typeinfo is_array(src) || typeinfo is_dim(src)) { + buffer |> reserve(length(src)) + } for (it in src) { for (innerIt in it) { buffer.push_clone(result_selector(innerIt)) @@ -3159,6 +3164,356 @@ def zip_to_array(var a : iterator; var b : iterator; var c : return <- zip3_impl(a, type, b, type, c, type, result_selector) } +// zip with 4 sources + +[unused_argument(tt, uu, ww, xx)] +def private zip4_impl(var a; tt : auto(TT); var b; uu : auto(UU); var c; ww : auto(WW); var d; xx : auto(XX)) : array> { + //! Merges four iterators into an array of tuples + var buffer : array> + static_if ((typeinfo is_array(a) || typeinfo is_dim(a)) && (typeinfo is_array(b) || typeinfo is_dim(b)) && (typeinfo is_array(c) || typeinfo is_dim(c)) && (typeinfo is_array(d) || typeinfo is_dim(d))) { + buffer |> reserve(min(length(a), min(length(b), min(length(c), length(d))))) + } + for (itA, itB, itC, itD in a, b, c, d) { + buffer.emplace((itA, itB, itC, itD)) + } + return <- buffer +} + +[unused_argument(tt, uu, ww, xx)] +def private zip4_impl_const(a : auto(ARGT); tt : auto(TT); b : auto(ARGTS); uu : auto(UU); c : auto(ARGC); ww : auto(WW); d : auto(ARGD); xx : auto(XX)) : array> { + return <- zip4_impl(unsafe(reinterpret(a)), type, unsafe(reinterpret(b)), type, unsafe(reinterpret(c)), type, unsafe(reinterpret(d)), type) +} + +def zip(var a : iterator; var b : iterator; var c : iterator; var d : iterator) : iterator> { + //! Merges four iterators into an iterator of tuples + return <- zip4_impl(a, type, b, type, c, type, d, type).to_sequence_move() +} + +def zip(a : array; b : array; c : array; d : array) : array> { + //! Merges four arrays into an array of tuples + return <- zip4_impl_const(a, type, b, type, c, type, d, type) +} + +def zip_to_array(var a : iterator; var b : iterator; var c : iterator; var d : iterator) : array> { + //! Merges four iterators into an array of tuples + return <- zip4_impl(a, type, b, type, c, type, d, type) +} + +// zip4 with result selector + +[unused_argument(tt, uu, ww, xx)] +def private zip4_impl(var a; tt : auto(TT); var b; uu : auto(UU); var c; ww : auto(WW); var d; xx : auto(XX); result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&) : auto>) : array, type, type, type)) -const -&> { + //! Merges four iterators into an array by applying a specified function + var buffer : array, type, type, type)) -const -&> + static_if ((typeinfo is_array(a) || typeinfo is_dim(a)) && (typeinfo is_array(b) || typeinfo is_dim(b)) && (typeinfo is_array(c) || typeinfo is_dim(c)) && (typeinfo is_array(d) || typeinfo is_dim(d))) { + buffer |> reserve(min(length(a), min(length(b), min(length(c), length(d))))) + } + for (itA, itB, itC, itD in a, b, c, d) { + buffer.push_clone(result_selector(itA, itB, itC, itD)) + } + return <- buffer +} + +[unused_argument(tt, uu, ww, xx)] +def private zip4_impl_const(a : auto(ARGT); tt : auto(TT); b : auto(ARGTS); uu : auto(UU); c : auto(ARGC); ww : auto(WW); d : auto(ARGD); xx : auto(XX); result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&) : auto>) : array, type, type, type)) -const -&> { + return <- zip4_impl(unsafe(reinterpret(a)), type, unsafe(reinterpret(b)), type, unsafe(reinterpret(c)), type, unsafe(reinterpret(d)), type, result_selector) +} + +def zip(var a : iterator; var b : iterator; var c : iterator; var d : iterator; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&) : auto>) : iterator, type, type, type)) -const -&> { + //! Merges four iterators into an iterator by applying a specified function + return <- zip4_impl(a, type, b, type, c, type, d, type, result_selector).to_sequence_move() +} + +def zip(a : array; b : array; c : array; d : array; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&) : auto>) : array, type, type, type)) -const -&> { + //! Merges four arrays into an array by applying a specified function + return <- zip4_impl_const(a, type, b, type, c, type, d, type, result_selector) +} + +def zip_to_array(var a : iterator; var b : iterator; var c : iterator; var d : iterator; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&) : auto>) : array, type, type, type)) -const -&> { + //! Merges four iterators into an array by applying a specified function + return <- zip4_impl(a, type, b, type, c, type, d, type, result_selector) +} + +// zip with 5 sources + +[unused_argument(tt, uu, ww, xx, yy)] +def private zip5_impl(var a; tt : auto(TT); var b; uu : auto(UU); var c; ww : auto(WW); var d; xx : auto(XX); var e; yy : auto(YY)) : array> { + //! Merges five iterators into an array of tuples + var buffer : array> + static_if ((typeinfo is_array(a) || typeinfo is_dim(a)) && (typeinfo is_array(b) || typeinfo is_dim(b)) && (typeinfo is_array(c) || typeinfo is_dim(c)) && (typeinfo is_array(d) || typeinfo is_dim(d)) && (typeinfo is_array(e) || typeinfo is_dim(e))) { + buffer |> reserve(min(length(a), min(length(b), min(length(c), min(length(d), length(e)))))) + } + for (itA, itB, itC, itD, itE in a, b, c, d, e) { + buffer.emplace((itA, itB, itC, itD, itE)) + } + return <- buffer +} + +[unused_argument(tt, uu, ww, xx, yy)] +def private zip5_impl_const(a : auto(ARGT); tt : auto(TT); b : auto(ARGTS); uu : auto(UU); c : auto(ARGC); ww : auto(WW); d : auto(ARGD); xx : auto(XX); e : auto(ARGE); yy : auto(YY)) : array> { + return <- zip5_impl(unsafe(reinterpret(a)), type, unsafe(reinterpret(b)), type, unsafe(reinterpret(c)), type, unsafe(reinterpret(d)), type, unsafe(reinterpret(e)), type) +} + +def zip(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator) : iterator> { + //! Merges five iterators into an iterator of tuples + return <- zip5_impl(a, type, b, type, c, type, d, type, e, type).to_sequence_move() +} + +def zip(a : array; b : array; c : array; d : array; e : array) : array> { + //! Merges five arrays into an array of tuples + return <- zip5_impl_const(a, type, b, type, c, type, d, type, e, type) +} + +def zip_to_array(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator) : array> { + //! Merges five iterators into an array of tuples + return <- zip5_impl(a, type, b, type, c, type, d, type, e, type) +} + +// zip5 with result selector + +[unused_argument(tt, uu, ww, xx, yy)] +def private zip5_impl(var a; tt : auto(TT); var b; uu : auto(UU); var c; ww : auto(WW); var d; xx : auto(XX); var e; yy : auto(YY); result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&) : auto>) : array, type, type, type, type)) -const -&> { + //! Merges five iterators into an array by applying a specified function + var buffer : array, type, type, type, type)) -const -&> + static_if ((typeinfo is_array(a) || typeinfo is_dim(a)) && (typeinfo is_array(b) || typeinfo is_dim(b)) && (typeinfo is_array(c) || typeinfo is_dim(c)) && (typeinfo is_array(d) || typeinfo is_dim(d)) && (typeinfo is_array(e) || typeinfo is_dim(e))) { + buffer |> reserve(min(length(a), min(length(b), min(length(c), min(length(d), length(e)))))) + } + for (itA, itB, itC, itD, itE in a, b, c, d, e) { + buffer.push_clone(result_selector(itA, itB, itC, itD, itE)) + } + return <- buffer +} + +[unused_argument(tt, uu, ww, xx, yy)] +def private zip5_impl_const(a : auto(ARGT); tt : auto(TT); b : auto(ARGTS); uu : auto(UU); c : auto(ARGC); ww : auto(WW); d : auto(ARGD); xx : auto(XX); e : auto(ARGE); yy : auto(YY); result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&) : auto>) : array, type, type, type, type)) -const -&> { + return <- zip5_impl(unsafe(reinterpret(a)), type, unsafe(reinterpret(b)), type, unsafe(reinterpret(c)), type, unsafe(reinterpret(d)), type, unsafe(reinterpret(e)), type, result_selector) +} + +def zip(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&) : auto>) : iterator, type, type, type, type)) -const -&> { + //! Merges five iterators into an iterator by applying a specified function + return <- zip5_impl(a, type, b, type, c, type, d, type, e, type, result_selector).to_sequence_move() +} + +def zip(a : array; b : array; c : array; d : array; e : array; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&) : auto>) : array, type, type, type, type)) -const -&> { + //! Merges five arrays into an array by applying a specified function + return <- zip5_impl_const(a, type, b, type, c, type, d, type, e, type, result_selector) +} + +def zip_to_array(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&) : auto>) : array, type, type, type, type)) -const -&> { + //! Merges five iterators into an array by applying a specified function + return <- zip5_impl(a, type, b, type, c, type, d, type, e, type, result_selector) +} + +// zip with 6 sources + +[unused_argument(tt, uu, ww, xx, yy, zz)] +def private zip6_impl(var a; tt : auto(TT); var b; uu : auto(UU); var c; ww : auto(WW); var d; xx : auto(XX); var e; yy : auto(YY); var f; zz : auto(ZZ)) : array> { + //! Merges six iterators into an array of tuples + var buffer : array> + static_if ((typeinfo is_array(a) || typeinfo is_dim(a)) && (typeinfo is_array(b) || typeinfo is_dim(b)) && (typeinfo is_array(c) || typeinfo is_dim(c)) && (typeinfo is_array(d) || typeinfo is_dim(d)) && (typeinfo is_array(e) || typeinfo is_dim(e)) && (typeinfo is_array(f) || typeinfo is_dim(f))) { + buffer |> reserve(min(length(a), min(length(b), min(length(c), min(length(d), min(length(e), length(f))))))) + } + for (itA, itB, itC, itD, itE, itF in a, b, c, d, e, f) { + buffer.emplace((itA, itB, itC, itD, itE, itF)) + } + return <- buffer +} + +[unused_argument(tt, uu, ww, xx, yy, zz)] +def private zip6_impl_const(a : auto(ARGT); tt : auto(TT); b : auto(ARGTS); uu : auto(UU); c : auto(ARGC); ww : auto(WW); d : auto(ARGD); xx : auto(XX); e : auto(ARGE); yy : auto(YY); f : auto(ARGF); zz : auto(ZZ)) : array> { + return <- zip6_impl(unsafe(reinterpret(a)), type, unsafe(reinterpret(b)), type, unsafe(reinterpret(c)), type, unsafe(reinterpret(d)), type, unsafe(reinterpret(e)), type, unsafe(reinterpret(f)), type) +} + +def zip(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator) : iterator> { + //! Merges six iterators into an iterator of tuples + return <- zip6_impl(a, type, b, type, c, type, d, type, e, type, f, type).to_sequence_move() +} + +def zip(a : array; b : array; c : array; d : array; e : array; f : array) : array> { + //! Merges six arrays into an array of tuples + return <- zip6_impl_const(a, type, b, type, c, type, d, type, e, type, f, type) +} + +def zip_to_array(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator) : array> { + //! Merges six iterators into an array of tuples + return <- zip6_impl(a, type, b, type, c, type, d, type, e, type, f, type) +} + +// zip6 with result selector + +[unused_argument(tt, uu, ww, xx, yy, zz)] +def private zip6_impl(var a; tt : auto(TT); var b; uu : auto(UU); var c; ww : auto(WW); var d; xx : auto(XX); var e; yy : auto(YY); var f; zz : auto(ZZ); result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&) : auto>) : array, type, type, type, type, type)) -const -&> { + //! Merges six iterators into an array by applying a specified function + var buffer : array, type, type, type, type, type)) -const -&> + static_if ((typeinfo is_array(a) || typeinfo is_dim(a)) && (typeinfo is_array(b) || typeinfo is_dim(b)) && (typeinfo is_array(c) || typeinfo is_dim(c)) && (typeinfo is_array(d) || typeinfo is_dim(d)) && (typeinfo is_array(e) || typeinfo is_dim(e)) && (typeinfo is_array(f) || typeinfo is_dim(f))) { + buffer |> reserve(min(length(a), min(length(b), min(length(c), min(length(d), min(length(e), length(f))))))) + } + for (itA, itB, itC, itD, itE, itF in a, b, c, d, e, f) { + buffer.push_clone(result_selector(itA, itB, itC, itD, itE, itF)) + } + return <- buffer +} + +[unused_argument(tt, uu, ww, xx, yy, zz)] +def private zip6_impl_const(a : auto(ARGT); tt : auto(TT); b : auto(ARGTS); uu : auto(UU); c : auto(ARGC); ww : auto(WW); d : auto(ARGD); xx : auto(XX); e : auto(ARGE); yy : auto(YY); f : auto(ARGF); zz : auto(ZZ); result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&) : auto>) : array, type, type, type, type, type)) -const -&> { + return <- zip6_impl(unsafe(reinterpret(a)), type, unsafe(reinterpret(b)), type, unsafe(reinterpret(c)), type, unsafe(reinterpret(d)), type, unsafe(reinterpret(e)), type, unsafe(reinterpret(f)), type, result_selector) +} + +def zip(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&) : auto>) : iterator, type, type, type, type, type)) -const -&> { + //! Merges six iterators into an iterator by applying a specified function + return <- zip6_impl(a, type, b, type, c, type, d, type, e, type, f, type, result_selector).to_sequence_move() +} + +def zip(a : array; b : array; c : array; d : array; e : array; f : array; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&) : auto>) : array, type, type, type, type, type)) -const -&> { + //! Merges six arrays into an array by applying a specified function + return <- zip6_impl_const(a, type, b, type, c, type, d, type, e, type, f, type, result_selector) +} + +def zip_to_array(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&) : auto>) : array, type, type, type, type, type)) -const -&> { + //! Merges six iterators into an array by applying a specified function + return <- zip6_impl(a, type, b, type, c, type, d, type, e, type, f, type, result_selector) +} + +// zip with 7 sources + +[unused_argument(tt, uu, ww, xx, yy, zz, rr)] +def private zip7_impl(var a; tt : auto(TT); var b; uu : auto(UU); var c; ww : auto(WW); var d; xx : auto(XX); var e; yy : auto(YY); var f; zz : auto(ZZ); var g; rr : auto(RR)) : array> { + //! Merges seven iterators into an array of tuples + var buffer : array> + static_if ((typeinfo is_array(a) || typeinfo is_dim(a)) && (typeinfo is_array(b) || typeinfo is_dim(b)) && (typeinfo is_array(c) || typeinfo is_dim(c)) && (typeinfo is_array(d) || typeinfo is_dim(d)) && (typeinfo is_array(e) || typeinfo is_dim(e)) && (typeinfo is_array(f) || typeinfo is_dim(f)) && (typeinfo is_array(g) || typeinfo is_dim(g))) { + buffer |> reserve(min(length(a), min(length(b), min(length(c), min(length(d), min(length(e), min(length(f), length(g)))))))) + } + for (itA, itB, itC, itD, itE, itF, itG in a, b, c, d, e, f, g) { + buffer.emplace((itA, itB, itC, itD, itE, itF, itG)) + } + return <- buffer +} + +[unused_argument(tt, uu, ww, xx, yy, zz, rr)] +def private zip7_impl_const(a : auto(ARGT); tt : auto(TT); b : auto(ARGTS); uu : auto(UU); c : auto(ARGC); ww : auto(WW); d : auto(ARGD); xx : auto(XX); e : auto(ARGE); yy : auto(YY); f : auto(ARGF); zz : auto(ZZ); g : auto(ARGG); rr : auto(RR)) : array> { + return <- zip7_impl(unsafe(reinterpret(a)), type, unsafe(reinterpret(b)), type, unsafe(reinterpret(c)), type, unsafe(reinterpret(d)), type, unsafe(reinterpret(e)), type, unsafe(reinterpret(f)), type, unsafe(reinterpret(g)), type) +} + +def zip(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator; var g : iterator) : iterator> { + //! Merges seven iterators into an iterator of tuples + return <- zip7_impl(a, type, b, type, c, type, d, type, e, type, f, type, g, type).to_sequence_move() +} + +def zip(a : array; b : array; c : array; d : array; e : array; f : array; g : array) : array> { + //! Merges seven arrays into an array of tuples + return <- zip7_impl_const(a, type, b, type, c, type, d, type, e, type, f, type, g, type) +} + +def zip_to_array(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator; var g : iterator) : array> { + //! Merges seven iterators into an array of tuples + return <- zip7_impl(a, type, b, type, c, type, d, type, e, type, f, type, g, type) +} + +// zip7 with result selector + +[unused_argument(tt, uu, ww, xx, yy, zz, rr)] +def private zip7_impl(var a; tt : auto(TT); var b; uu : auto(UU); var c; ww : auto(WW); var d; xx : auto(XX); var e; yy : auto(YY); var f; zz : auto(ZZ); var g; rr : auto(RR); result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&; m : RR -&) : auto>) : array, type, type, type, type, type, type)) -const -&> { + //! Merges seven iterators into an array by applying a specified function + var buffer : array, type, type, type, type, type, type)) -const -&> + static_if ((typeinfo is_array(a) || typeinfo is_dim(a)) && (typeinfo is_array(b) || typeinfo is_dim(b)) && (typeinfo is_array(c) || typeinfo is_dim(c)) && (typeinfo is_array(d) || typeinfo is_dim(d)) && (typeinfo is_array(e) || typeinfo is_dim(e)) && (typeinfo is_array(f) || typeinfo is_dim(f)) && (typeinfo is_array(g) || typeinfo is_dim(g))) { + buffer |> reserve(min(length(a), min(length(b), min(length(c), min(length(d), min(length(e), min(length(f), length(g)))))))) + } + for (itA, itB, itC, itD, itE, itF, itG in a, b, c, d, e, f, g) { + buffer.push_clone(result_selector(itA, itB, itC, itD, itE, itF, itG)) + } + return <- buffer +} + +[unused_argument(tt, uu, ww, xx, yy, zz, rr)] +def private zip7_impl_const(a : auto(ARGT); tt : auto(TT); b : auto(ARGTS); uu : auto(UU); c : auto(ARGC); ww : auto(WW); d : auto(ARGD); xx : auto(XX); e : auto(ARGE); yy : auto(YY); f : auto(ARGF); zz : auto(ZZ); g : auto(ARGG); rr : auto(RR); result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&; m : RR -&) : auto>) : array, type, type, type, type, type, type)) -const -&> { + return <- zip7_impl(unsafe(reinterpret(a)), type, unsafe(reinterpret(b)), type, unsafe(reinterpret(c)), type, unsafe(reinterpret(d)), type, unsafe(reinterpret(e)), type, unsafe(reinterpret(f)), type, unsafe(reinterpret(g)), type, result_selector) +} + +def zip(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator; var g : iterator; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&; m : RR -&) : auto>) : iterator, type, type, type, type, type, type)) -const -&> { + //! Merges seven iterators into an iterator by applying a specified function + return <- zip7_impl(a, type, b, type, c, type, d, type, e, type, f, type, g, type, result_selector).to_sequence_move() +} + +def zip(a : array; b : array; c : array; d : array; e : array; f : array; g : array; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&; m : RR -&) : auto>) : array, type, type, type, type, type, type)) -const -&> { + //! Merges seven arrays into an array by applying a specified function + return <- zip7_impl_const(a, type, b, type, c, type, d, type, e, type, f, type, g, type, result_selector) +} + +def zip_to_array(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator; var g : iterator; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&; m : RR -&) : auto>) : array, type, type, type, type, type, type)) -const -&> { + //! Merges seven iterators into an array by applying a specified function + return <- zip7_impl(a, type, b, type, c, type, d, type, e, type, f, type, g, type, result_selector) +} + +// zip with 8 sources + +[unused_argument(tt, uu, ww, xx, yy, zz, rr, ss)] +def private zip8_impl(var a; tt : auto(TT); var b; uu : auto(UU); var c; ww : auto(WW); var d; xx : auto(XX); var e; yy : auto(YY); var f; zz : auto(ZZ); var g; rr : auto(RR); var h; ss : auto(SS)) : array> { + //! Merges eight iterators into an array of tuples + var buffer : array> + static_if ((typeinfo is_array(a) || typeinfo is_dim(a)) && (typeinfo is_array(b) || typeinfo is_dim(b)) && (typeinfo is_array(c) || typeinfo is_dim(c)) && (typeinfo is_array(d) || typeinfo is_dim(d)) && (typeinfo is_array(e) || typeinfo is_dim(e)) && (typeinfo is_array(f) || typeinfo is_dim(f)) && (typeinfo is_array(g) || typeinfo is_dim(g)) && (typeinfo is_array(h) || typeinfo is_dim(h))) { + buffer |> reserve(min(length(a), min(length(b), min(length(c), min(length(d), min(length(e), min(length(f), min(length(g), length(h))))))))) + } + for (itA, itB, itC, itD, itE, itF, itG, itH in a, b, c, d, e, f, g, h) { + buffer.emplace((itA, itB, itC, itD, itE, itF, itG, itH)) + } + return <- buffer +} + +[unused_argument(tt, uu, ww, xx, yy, zz, rr, ss)] +def private zip8_impl_const(a : auto(ARGT); tt : auto(TT); b : auto(ARGTS); uu : auto(UU); c : auto(ARGC); ww : auto(WW); d : auto(ARGD); xx : auto(XX); e : auto(ARGE); yy : auto(YY); f : auto(ARGF); zz : auto(ZZ); g : auto(ARGG); rr : auto(RR); h : auto(ARGH); ss : auto(SS)) : array> { + return <- zip8_impl(unsafe(reinterpret(a)), type, unsafe(reinterpret(b)), type, unsafe(reinterpret(c)), type, unsafe(reinterpret(d)), type, unsafe(reinterpret(e)), type, unsafe(reinterpret(f)), type, unsafe(reinterpret(g)), type, unsafe(reinterpret(h)), type) +} + +def zip(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator; var g : iterator; var h : iterator) : iterator> { + //! Merges eight iterators into an iterator of tuples + return <- zip8_impl(a, type, b, type, c, type, d, type, e, type, f, type, g, type, h, type).to_sequence_move() +} + +def zip(a : array; b : array; c : array; d : array; e : array; f : array; g : array; h : array) : array> { + //! Merges eight arrays into an array of tuples + return <- zip8_impl_const(a, type, b, type, c, type, d, type, e, type, f, type, g, type, h, type) +} + +def zip_to_array(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator; var g : iterator; var h : iterator) : array> { + //! Merges eight iterators into an array of tuples + return <- zip8_impl(a, type, b, type, c, type, d, type, e, type, f, type, g, type, h, type) +} + +// zip8 with result selector + +[unused_argument(tt, uu, ww, xx, yy, zz, rr, ss)] +def private zip8_impl(var a; tt : auto(TT); var b; uu : auto(UU); var c; ww : auto(WW); var d; xx : auto(XX); var e; yy : auto(YY); var f; zz : auto(ZZ); var g; rr : auto(RR); var h; ss : auto(SS); result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&; m : RR -&; n : SS -&) : auto>) : array, type, type, type, type, type, type, type)) -const -&> { + //! Merges eight iterators into an array by applying a specified function + var buffer : array, type, type, type, type, type, type, type)) -const -&> + static_if ((typeinfo is_array(a) || typeinfo is_dim(a)) && (typeinfo is_array(b) || typeinfo is_dim(b)) && (typeinfo is_array(c) || typeinfo is_dim(c)) && (typeinfo is_array(d) || typeinfo is_dim(d)) && (typeinfo is_array(e) || typeinfo is_dim(e)) && (typeinfo is_array(f) || typeinfo is_dim(f)) && (typeinfo is_array(g) || typeinfo is_dim(g)) && (typeinfo is_array(h) || typeinfo is_dim(h))) { + buffer |> reserve(min(length(a), min(length(b), min(length(c), min(length(d), min(length(e), min(length(f), min(length(g), length(h))))))))) + } + for (itA, itB, itC, itD, itE, itF, itG, itH in a, b, c, d, e, f, g, h) { + buffer.push_clone(result_selector(itA, itB, itC, itD, itE, itF, itG, itH)) + } + return <- buffer +} + +[unused_argument(tt, uu, ww, xx, yy, zz, rr, ss)] +def private zip8_impl_const(a : auto(ARGT); tt : auto(TT); b : auto(ARGTS); uu : auto(UU); c : auto(ARGC); ww : auto(WW); d : auto(ARGD); xx : auto(XX); e : auto(ARGE); yy : auto(YY); f : auto(ARGF); zz : auto(ZZ); g : auto(ARGG); rr : auto(RR); h : auto(ARGH); ss : auto(SS); result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&; m : RR -&; n : SS -&) : auto>) : array, type, type, type, type, type, type, type)) -const -&> { + return <- zip8_impl(unsafe(reinterpret(a)), type, unsafe(reinterpret(b)), type, unsafe(reinterpret(c)), type, unsafe(reinterpret(d)), type, unsafe(reinterpret(e)), type, unsafe(reinterpret(f)), type, unsafe(reinterpret(g)), type, unsafe(reinterpret(h)), type, result_selector) +} + +def zip(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator; var g : iterator; var h : iterator; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&; m : RR -&; n : SS -&) : auto>) : iterator, type, type, type, type, type, type, type)) -const -&> { + //! Merges eight iterators into an iterator by applying a specified function + return <- zip8_impl(a, type, b, type, c, type, d, type, e, type, f, type, g, type, h, type, result_selector).to_sequence_move() +} + +def zip(a : array; b : array; c : array; d : array; e : array; f : array; g : array; h : array; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&; m : RR -&; n : SS -&) : auto>) : array, type, type, type, type, type, type, type)) -const -&> { + //! Merges eight arrays into an array by applying a specified function + return <- zip8_impl_const(a, type, b, type, c, type, d, type, e, type, f, type, g, type, h, type, result_selector) +} + +def zip_to_array(var a : iterator; var b : iterator; var c : iterator; var d : iterator; var e : iterator; var f : iterator; var g : iterator; var h : iterator; result_selector : block<(l : TT -&; r : UU -&; w : WW -&; x : XX -&; y : YY -&; z : ZZ -&; m : RR -&; n : SS -&) : auto>) : array, type, type, type, type, type, type, type)) -const -&> { + //! Merges eight iterators into an array by applying a specified function + return <- zip8_impl(a, type, b, type, c, type, d, type, e, type, f, type, g, type, h, type, result_selector) +} + def public order_unique_folded(var a : array) : array { //! sort and remove duplicate elements from an array var arr <- order(a) diff --git a/doc/source/reference/tutorials/28_linq.rst b/doc/source/reference/tutorials/28_linq.rst index 3a7a4cde52..f3e1726abe 100644 --- a/doc/source/reference/tutorials/28_linq.rst +++ b/doc/source/reference/tutorials/28_linq.rst @@ -159,7 +159,8 @@ Set operations Zip === -``zip`` merges two or three sequences into tuples:: +``zip`` merges 2 to 8 sequences into tuples; iteration stops at the shortest +source:: var names = ["Alice", "Bob", "Charlie"] var ages = [25, 35, 30] @@ -170,6 +171,18 @@ Zip var zipped3 = zip(names, ages, scores) // zipped3: [(Alice,25,95), (Bob,35,87), (Charlie,30,91)] +Each arity (2..8) has four overloads: lockstep and result-selector forms, each +on iterator and array sources. A result-selector block runs per element and +returns a custom projection (the tuple is replaced by whatever the block +returns):: + + var sums = zip(a, b, c, d, $(p, q, r, s : int) => p + q + r + s) + +Add ``_to_array`` to force materialization of an iterator-source zip into an +``array``:: + + var arr = zip_to_array(seqA, seqB, seqC) + Joining ======= diff --git a/mouse-data/docs/why-does-my-4-ary-block-call-return-stale-first-param-values-when-the-lambda-uses-param-names-p-q-r-s.md b/mouse-data/docs/why-does-my-4-ary-block-call-return-stale-first-param-values-when-the-lambda-uses-param-names-p-q-r-s.md new file mode 100644 index 0000000000..d1be6798a7 --- /dev/null +++ b/mouse-data/docs/why-does-my-4-ary-block-call-return-stale-first-param-values-when-the-lambda-uses-param-names-p-q-r-s.md @@ -0,0 +1,31 @@ +--- +slug: why-does-my-4-ary-block-call-return-stale-first-param-values-when-the-lambda-uses-param-names-p-q-r-s +title: Why does my 4-ary block call return stale first-param values when the lambda uses param names `p, q, r, s`? +created: 2026-05-19 +last_verified: 2026-05-19 +links: [] +--- + +**Status:** INTERMITTENT — observed 3x consistently during initial PR-1 work, but became non-reproducible after unrelated intermediate edits (notably adding a `static_if` reserve in `select_many_impl`). Standalone reproductions outside the test file with various complexity (multi-iter for + lambda directly, public-wrapper shape, nested in test-framework lambda) all returned correct values. Likely a daslang generic-instantiation order / codegen issue with preconditions that can't be pinned down from outside the failing context. + +**Symptom (when triggered):** Calling a `block<(l: TT-&; r: UU-&; w: WW-&; x: XX-&)>` with a lambda `$(p: int; q: int; r: int; s: int) => p + q + r + s` returns wrong sums — the first lambda param `p` evaluates to its initial value (0) on EVERY iteration even though the block call site passes it correctly each time. Probes of `=> p` alone return the correct sequence `[0, 1, 2]`; only the multi-param expression `p + q + r + s` exhibited the bug. + +**Repro:** Surfaced in `daslib/linq.das` 4-ary `zip` impl during PR-1 of the linq zip extension. Calling the result-selector form with `$(p: int; q: int; r: int; s: int) => p + q + r + s` over four iterators yielding `[0,1,2]`, `[10,11,12]`, `[100,101,102]`, `[1000,1001,1002]` returns `[1110, 1113, 1116]` instead of `[1110, 1114, 1118]`. Delta-per-row is 3 instead of 4 — consistent with first source stuck at 0. + +**Workaround:** Rename the lambda params away from `p, q, r, s`. Both `(l: int; r: int; w: int; x: int)` (identical-to-block-sig names) AND `(aa: int; bb: int; cc: int; dd: int)` (totally distinct names) work fine. The bug is SPECIFIC to the name set `p, q, r, s` — not a generic shadowing issue. + +**Root cause:** UNKNOWN. Confirmed it is NOT: +- Lambda multi-param shared-type `(p, q, r, s : int)` (explicit per-param annotations fail identically) +- Block-vs-lambda param name shadowing (3-ary with identical names works fine; 4-ary with identical-to-block names ALSO works fine) +- Block param name `x` colliding with `xx` witness or `[iterator for (x in ...)]` comprehension var (rename `x → m` doesn't fix it) +- Arity (3-ary works with any names; 4-ary breaks with `p,q,r,s` only) +- Source iterator type (probes via `=> p` etc. return correct sequences per source) + +Likely some interaction between daslang's lambda body evaluation order, ref binding to block ref-params, and the specific identifier set `p/q/r/s`. Possibly related to closures, register allocation, or symbol lookup in the lambda body. Needs a minimal repro outside the linq layer + a daslang issue. + +**Test discipline:** Avoid `p, q, r, s` as lambda param names in tests for zip-family functions of arity ≥ 4 until the daslang bug is identified and fixed. Use `aa, bb, cc, dd` or descriptive names. Same caution likely applies to higher arities until investigated. + +**Discovered:** 2026-05-19, PR-1 of linq zip extension (branch `bbatkin/linq-zip-n-ary`). + +## Questions +- Why does my 4-ary block call return stale first-param values when the lambda uses param names `p, q, r, s`? diff --git a/tests/linq/test_linq_transform.das b/tests/linq/test_linq_transform.das index 65a21eab31..3a1eee9225 100644 --- a/tests/linq/test_linq_transform.das +++ b/tests/linq/test_linq_transform.das @@ -12,12 +12,12 @@ require _common [test] def test_select_transform(t : T?) { t |> run("basic select_transform") @(t : T?) { - var chars = ["a", "b", "c", "d", "e"] + let chars = ["a", "b", "c", "d", "e"] var query = _select( chars.to_sequence(), (_, _ + _) ) - for (i, ch, q in 0..5, chars, query) { + for (ch, q in chars, query) { t |> equal(ch, q._0) t |> equal(ch + ch, q._1) } @@ -33,12 +33,12 @@ def test_select_transform(t : T?) { } } t |> run("select_transform to array") @(t : T?) { - var chars = ["a", "b", "c", "d", "e"] + let chars = ["a", "b", "c", "d", "e"] var query = _select_to_array( chars.to_sequence(), (_, _ + _) ) - for (i, ch, q in 0..5, chars, query) { + for (ch, q in chars, query) { t |> equal(ch, q._0) t |> equal(ch + ch, q._1) } @@ -84,7 +84,7 @@ def test_select(t : T?) { } } t |> run("select from array") @(t : T?) { - var nums = [10, 20, 30, 40, 50] + let nums = [10, 20, 30, 40, 50] var query = select(nums) for (i, q in 0..5, query) { t |> equal(q._0, i) @@ -92,7 +92,7 @@ def test_select(t : T?) { } } t |> run("select from array with result selector") @(t : T?) { - var nums = [10, 20, 30] + let nums = [10, 20, 30] var query = _select( nums, (_, _ * 2) @@ -184,7 +184,7 @@ def test_selectmany(t : T?) { } } t |> run("selectmany from array") @(t : T?) { - var seq_seq = [ + let seq_seq = [ ["a", "b", "c"], ["d", "e", "f"], ["g", "h", "i"] @@ -368,9 +368,9 @@ def test_zip3(t : T?) { t |> equal(idx, 3) } t |> run("zip 3 arrays") @(t : T?) { - var a = [0, 1, 2] - var b = [10, 11, 12] - var c = [100, 101, 102] + let a = [0, 1, 2] + let b = [10, 11, 12] + let c = [100, 101, 102] var result = zip(a, b, c) t |> equal(3, length(result)) for (r, i in result, 0..3) { @@ -394,7 +394,7 @@ def test_zip3(t : T?) { } } t |> run("zip 3 with unequal lengths") @(t : T?) { - var result = zip_to_array( + let result = zip_to_array( [iterator for(x in 0..5); x], [iterator for(x in 10..13); x], [iterator for(x in 100..104); x] @@ -402,3 +402,563 @@ def test_zip3(t : T?) { t |> equal(3, length(result)) // stops at shortest } } + +[test] +def test_zip4(t : T?) { + t |> run("basic zip 4 iterators") @(t : T?) { + var query = zip( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x] + ) + var idx = 0 + for (c in query) { + t |> equal(c._0, idx) + t |> equal(c._1, idx + 10) + t |> equal(c._2, idx + 100) + t |> equal(c._3, idx + 1000) + idx ++ + } + t |> equal(idx, 3) + } + t |> run("zip 4 arrays") @(t : T?) { + let a = [0, 1, 2] + let b = [10, 11, 12] + let c = [100, 101, 102] + let d = [1000, 1001, 1002] + var result = zip(a, b, c, d) + t |> equal(3, length(result)) + for (r, i in result, 0..3) { + t |> equal(r._0, i) + t |> equal(r._1, i + 10) + t |> equal(r._2, i + 100) + t |> equal(r._3, i + 1000) + } + } + t |> run("zip 4 to array") @(t : T?) { + var result = zip_to_array( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x] + ) + static_assert(typeinfo is_array(result)) + t |> equal(3, length(result)) + for (r, i in result, 0..3) { + t |> equal(r._0, i) + t |> equal(r._1, i + 10) + t |> equal(r._2, i + 100) + t |> equal(r._3, i + 1000) + } + } + t |> run("zip 4 with unequal lengths") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..5); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..104); x], + [iterator for(x in 1000..1006); x] + ) + t |> equal(3, length(result)) // stops at shortest + } + t |> run("zip 4 with result selector") @(t : T?) { + var qcomplex = zip( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + $(p : int; q : int; r : int; s : int) => p + q + r + s + ) + let expected = [1110, 1114, 1118] + var idx = 0 + for (sum in qcomplex) { + t |> equal(sum, expected[idx]) + idx ++ + } + t |> equal(idx, 3) + } + t |> run("zip 4 arrays with result selector") @(t : T?) { + let a = [0, 1, 2] + let b = [10, 11, 12] + let c = [100, 101, 102] + let d = [1000, 1001, 1002] + let result = zip(a, b, c, d, $(aa : int; bb : int; cc : int; dd : int) => aa + bb + cc + dd) + t |> equal(3, length(result)) + t |> equal(result[0], 1110) + t |> equal(result[1], 1114) + t |> equal(result[2], 1118) + } + t |> run("zip 4 to array with result selector") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + $(aa : int; bb : int; cc : int; dd : int) => aa + bb + cc + dd + ) + static_assert(typeinfo is_array(result)) + t |> equal(3, length(result)) + t |> equal(result[0], 1110) + t |> equal(result[1], 1114) + t |> equal(result[2], 1118) + } +} + +[test] +def test_zip5(t : T?) { + t |> run("basic zip 5 iterators") @(t : T?) { + var query = zip( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x] + ) + var idx = 0 + for (c in query) { + t |> equal(c._0, idx) + t |> equal(c._1, idx + 10) + t |> equal(c._2, idx + 100) + t |> equal(c._3, idx + 1000) + t |> equal(c._4, idx + 10000) + idx ++ + } + t |> equal(idx, 3) + } + t |> run("zip 5 arrays") @(t : T?) { + let a = [0, 1, 2] + let b = [10, 11, 12] + let c = [100, 101, 102] + let d = [1000, 1001, 1002] + let e = [10000, 10001, 10002] + let result = zip(a, b, c, d, e) + t |> equal(3, length(result)) + for (r, i in result, 0..3) { + t |> equal(r._0, i) + t |> equal(r._1, i + 10) + t |> equal(r._2, i + 100) + t |> equal(r._3, i + 1000) + t |> equal(r._4, i + 10000) + } + } + t |> run("zip 5 to array") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x] + ) + static_assert(typeinfo is_array(result)) + t |> equal(3, length(result)) + } + t |> run("zip 5 with unequal lengths") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..5); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..104); x], + [iterator for(x in 1000..1006); x], + [iterator for(x in 10000..10003); x] + ) + t |> equal(3, length(result)) // stops at shortest + } + t |> run("zip 5 with result selector") @(t : T?) { + var qcomplex = zip( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + $(aa : int; bb : int; cc : int; dd : int; ee : int) => aa + bb + cc + dd + ee + ) + let expected = [11110, 11115, 11120] + var idx = 0 + for (sum in qcomplex) { + t |> equal(sum, expected[idx]) + idx ++ + } + t |> equal(idx, 3) + } + t |> run("zip 5 arrays with result selector") @(t : T?) { + let a = [0, 1, 2] + let b = [10, 11, 12] + let c = [100, 101, 102] + let d = [1000, 1001, 1002] + let e = [10000, 10001, 10002] + let result = zip(a, b, c, d, e, $(aa : int; bb : int; cc : int; dd : int; ee : int) => aa + bb + cc + dd + ee) + t |> equal(3, length(result)) + t |> equal(result[0], 11110) + t |> equal(result[1], 11115) + t |> equal(result[2], 11120) + } + t |> run("zip 5 to array with result selector") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + $(aa : int; bb : int; cc : int; dd : int; ee : int) => aa + bb + cc + dd + ee + ) + static_assert(typeinfo is_array(result)) + t |> equal(3, length(result)) + t |> equal(result[0], 11110) + t |> equal(result[1], 11115) + t |> equal(result[2], 11120) + } +} + +[test] +def test_zip6(t : T?) { + t |> run("basic zip 6 iterators") @(t : T?) { + var query = zip( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x] + ) + var idx = 0 + for (c in query) { + t |> equal(c._0, idx) + t |> equal(c._1, idx + 10) + t |> equal(c._2, idx + 100) + t |> equal(c._3, idx + 1000) + t |> equal(c._4, idx + 10000) + t |> equal(c._5, idx + 100000) + idx ++ + } + t |> equal(idx, 3) + } + t |> run("zip 6 arrays") @(t : T?) { + let a = [0, 1, 2] + let b = [10, 11, 12] + let c = [100, 101, 102] + let d = [1000, 1001, 1002] + let e = [10000, 10001, 10002] + let f = [100000, 100001, 100002] + let result = zip(a, b, c, d, e, f) + t |> equal(3, length(result)) + for (r, i in result, 0..3) { + t |> equal(r._0, i) + t |> equal(r._1, i + 10) + t |> equal(r._2, i + 100) + t |> equal(r._3, i + 1000) + t |> equal(r._4, i + 10000) + t |> equal(r._5, i + 100000) + } + } + t |> run("zip 6 to array") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x] + ) + static_assert(typeinfo is_array(result)) + t |> equal(3, length(result)) + } + t |> run("zip 6 with unequal lengths") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..5); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..104); x], + [iterator for(x in 1000..1006); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100007); x] + ) + t |> equal(3, length(result)) // stops at shortest + } + t |> run("zip 6 with result selector") @(t : T?) { + var qcomplex = zip( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x], + $(aa : int; bb : int; cc : int; dd : int; ee : int; ff : int) => aa + bb + cc + dd + ee + ff + ) + let expected = [111110, 111116, 111122] + var idx = 0 + for (sum in qcomplex) { + t |> equal(sum, expected[idx]) + idx ++ + } + t |> equal(idx, 3) + } + t |> run("zip 6 arrays with result selector") @(t : T?) { + let a = [0, 1, 2] + let b = [10, 11, 12] + let c = [100, 101, 102] + let d = [1000, 1001, 1002] + let e = [10000, 10001, 10002] + let f = [100000, 100001, 100002] + let result = zip(a, b, c, d, e, f, $(aa : int; bb : int; cc : int; dd : int; ee : int; ff : int) => aa + bb + cc + dd + ee + ff) + t |> equal(3, length(result)) + t |> equal(result[0], 111110) + t |> equal(result[1], 111116) + t |> equal(result[2], 111122) + } + t |> run("zip 6 to array with result selector") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x], + $(aa : int; bb : int; cc : int; dd : int; ee : int; ff : int) => aa + bb + cc + dd + ee + ff + ) + static_assert(typeinfo is_array(result)) + t |> equal(3, length(result)) + t |> equal(result[0], 111110) + t |> equal(result[1], 111116) + t |> equal(result[2], 111122) + } +} + +[test] +def test_zip7(t : T?) { + t |> run("basic zip 7 iterators") @(t : T?) { + var query = zip( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x], + [iterator for(x in 1000000..1000003); x] + ) + var idx = 0 + for (c in query) { + t |> equal(c._0, idx) + t |> equal(c._1, idx + 10) + t |> equal(c._2, idx + 100) + t |> equal(c._3, idx + 1000) + t |> equal(c._4, idx + 10000) + t |> equal(c._5, idx + 100000) + t |> equal(c._6, idx + 1000000) + idx ++ + } + t |> equal(idx, 3) + } + t |> run("zip 7 arrays") @(t : T?) { + let a = [0, 1, 2] + let b = [10, 11, 12] + let c = [100, 101, 102] + let d = [1000, 1001, 1002] + let e = [10000, 10001, 10002] + let f = [100000, 100001, 100002] + let g = [1000000, 1000001, 1000002] + let result = zip(a, b, c, d, e, f, g) + t |> equal(3, length(result)) + for (r, i in result, 0..3) { + t |> equal(r._0, i) + t |> equal(r._6, i + 1000000) + } + } + t |> run("zip 7 to array") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x], + [iterator for(x in 1000000..1000003); x] + ) + static_assert(typeinfo is_array(result)) + t |> equal(3, length(result)) + } + t |> run("zip 7 with unequal lengths") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..5); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..104); x], + [iterator for(x in 1000..1006); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100007); x], + [iterator for(x in 1000000..1000004); x] + ) + t |> equal(3, length(result)) // stops at shortest + } + t |> run("zip 7 with result selector") @(t : T?) { + var qcomplex = zip( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x], + [iterator for(x in 1000000..1000003); x], + $(aa : int; bb : int; cc : int; dd : int; ee : int; ff : int; gg : int) => aa + bb + cc + dd + ee + ff + gg + ) + let expected = [1111110, 1111117, 1111124] + var idx = 0 + for (sum in qcomplex) { + t |> equal(sum, expected[idx]) + idx ++ + } + t |> equal(idx, 3) + } + t |> run("zip 7 arrays with result selector") @(t : T?) { + let a = [0, 1, 2] + let b = [10, 11, 12] + let c = [100, 101, 102] + let d = [1000, 1001, 1002] + let e = [10000, 10001, 10002] + let f = [100000, 100001, 100002] + let g = [1000000, 1000001, 1000002] + let result = zip(a, b, c, d, e, f, g, $(aa : int; bb : int; cc : int; dd : int; ee : int; ff : int; gg : int) => aa + bb + cc + dd + ee + ff + gg) + t |> equal(3, length(result)) + t |> equal(result[0], 1111110) + t |> equal(result[1], 1111117) + t |> equal(result[2], 1111124) + } + t |> run("zip 7 to array with result selector") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x], + [iterator for(x in 1000000..1000003); x], + $(aa : int; bb : int; cc : int; dd : int; ee : int; ff : int; gg : int) => aa + bb + cc + dd + ee + ff + gg + ) + static_assert(typeinfo is_array(result)) + t |> equal(3, length(result)) + t |> equal(result[0], 1111110) + t |> equal(result[1], 1111117) + t |> equal(result[2], 1111124) + } +} + +[test] +def test_zip8(t : T?) { + t |> run("basic zip 8 iterators") @(t : T?) { + var query = zip( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x], + [iterator for(x in 1000000..1000003); x], + [iterator for(x in 10000000..10000003); x] + ) + var idx = 0 + for (c in query) { + t |> equal(c._0, idx) + t |> equal(c._1, idx + 10) + t |> equal(c._2, idx + 100) + t |> equal(c._3, idx + 1000) + t |> equal(c._4, idx + 10000) + t |> equal(c._5, idx + 100000) + t |> equal(c._6, idx + 1000000) + t |> equal(c._7, idx + 10000000) + idx ++ + } + t |> equal(idx, 3) + } + t |> run("zip 8 arrays") @(t : T?) { + let a = [0, 1, 2] + let b = [10, 11, 12] + let c = [100, 101, 102] + let d = [1000, 1001, 1002] + let e = [10000, 10001, 10002] + let f = [100000, 100001, 100002] + let g = [1000000, 1000001, 1000002] + let h = [10000000, 10000001, 10000002] + let result = zip(a, b, c, d, e, f, g, h) + t |> equal(3, length(result)) + for (r, i in result, 0..3) { + t |> equal(r._0, i) + t |> equal(r._7, i + 10000000) + } + } + t |> run("zip 8 to array") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x], + [iterator for(x in 1000000..1000003); x], + [iterator for(x in 10000000..10000003); x] + ) + static_assert(typeinfo is_array(result)) + t |> equal(3, length(result)) + } + t |> run("zip 8 with unequal lengths") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..5); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..104); x], + [iterator for(x in 1000..1006); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100007); x], + [iterator for(x in 1000000..1000004); x], + [iterator for(x in 10000000..10000005); x] + ) + t |> equal(3, length(result)) // stops at shortest + } + t |> run("zip 8 with result selector") @(t : T?) { + var qcomplex = zip( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x], + [iterator for(x in 1000000..1000003); x], + [iterator for(x in 10000000..10000003); x], + $(aa : int; bb : int; cc : int; dd : int; ee : int; ff : int; gg : int; hh : int) => aa + bb + cc + dd + ee + ff + gg + hh + ) + let expected = [11111110, 11111118, 11111126] + var idx = 0 + for (sum in qcomplex) { + t |> equal(sum, expected[idx]) + idx ++ + } + t |> equal(idx, 3) + } + t |> run("zip 8 arrays with result selector") @(t : T?) { + let a = [0, 1, 2] + let b = [10, 11, 12] + let c = [100, 101, 102] + let d = [1000, 1001, 1002] + let e = [10000, 10001, 10002] + let f = [100000, 100001, 100002] + let g = [1000000, 1000001, 1000002] + let h = [10000000, 10000001, 10000002] + let result = zip(a, b, c, d, e, f, g, h, $(aa : int; bb : int; cc : int; dd : int; ee : int; ff : int; gg : int; hh : int) => aa + bb + cc + dd + ee + ff + gg + hh) + t |> equal(3, length(result)) + t |> equal(result[0], 11111110) + t |> equal(result[1], 11111118) + t |> equal(result[2], 11111126) + } + t |> run("zip 8 to array with result selector") @(t : T?) { + let result = zip_to_array( + [iterator for(x in 0..3); x], + [iterator for(x in 10..13); x], + [iterator for(x in 100..103); x], + [iterator for(x in 1000..1003); x], + [iterator for(x in 10000..10003); x], + [iterator for(x in 100000..100003); x], + [iterator for(x in 1000000..1000003); x], + [iterator for(x in 10000000..10000003); x], + $(aa : int; bb : int; cc : int; dd : int; ee : int; ff : int; gg : int; hh : int) => aa + bb + cc + dd + ee + ff + gg + hh + ) + static_assert(typeinfo is_array(result)) + t |> equal(3, length(result)) + t |> equal(result[0], 11111110) + t |> equal(result[1], 11111118) + t |> equal(result[2], 11111126) + } +} From 6eb331a8351c720d34dd81a532d9295e082c311b Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Tue, 19 May 2026 11:59:10 -0700 Subject: [PATCH 8/8] =?UTF-8?q?linq=5Ffold:=20plan=5Fzip=20splice=20arm=20?= =?UTF-8?q?(PR=20Phase=202A)=20=E2=80=94=20bare=20zip=20+=20count/long=5Fc?= =?UTF-8?q?ount?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds plan_zip planner ahead of plan_loop_or_count in the _fold cascade. Recognizes 2-ary zip(srcA, srcB) chains and splices them into a single multi-iterator for-loop with no intermediate tuple buffer. Z1 — no-terminator: emits invoke($($i(srcA), $i(srcB)) { var buf : array; [reserve when both have length]; for (itA, itB in srcA, srcB) { buf |> push_clone((itA, itB)); }; return <- buf [|> to_sequence_move] }, srcA, srcB). Covers zip(arr, arr), zip(iter, iter) |> to_array, and zip(iter, iter) → iterator. Shortest-source truncation comes free with daslang's multi-iter for. Z2 — count/long_count terminator: when both sources are length-bearing, emits the length-min shortcut directly (int / int64). Otherwise emits a counter loop (acc++, no buffer). No-pred only — predicated count deferred. Z7 — trims `zip` from is_buffer_required_op marker arm (now dead since plan_zip catches all zip-led chains before plan_loop_or_count). Z3 (fused chain ops: where_/select/take/skip between zip and terminator) and Z4..Z6 (3..8-ary mechanical expansion) deferred to a stacked follow-up. Verification: lint clean; 154/154 test_linq_fold_ast pass; full linq suite green under interpret (test_linq* — 480+ tests); 1103/1103 under -use-aot. --- daslib/linq_fold.das | 102 ++++++++++++++- tests/linq/test_linq_fold_ast.das | 202 ++++++++++++++++++++++++++++++ 2 files changed, 303 insertions(+), 1 deletion(-) diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das index 3378600581..4729ab1ff9 100644 --- a/daslib/linq_fold.das +++ b/daslib/linq_fold.das @@ -398,7 +398,6 @@ def private is_buffer_required_op(name : string) : bool { || name == "distinct" || name == "distinct_by" || name == "reverse" || name == "group_by" || name == "group_by_lazy" - || name == "zip" || name == "join" || name == "left_join" || name == "group_join") } @@ -2874,6 +2873,105 @@ def private plan_group_by(var expr : Expression?) : Expression? { return finalize_emission_stmts(top, srcName, at, stmts) } +[macro_function] +def private plan_zip(var expr : Expression?) : Expression? { + // Phase 2 Z1+Z2: 2-ary lockstep zip splice + no-pred count/long_count + length shortcut. + var (top, calls) = flatten_linq(expr) + if (empty(calls) || calls[0]._1.name != "zip") return null + var zipCall = calls[0]._0 + let zipArgCount = zipCall.arguments |> length + if (zipArgCount != 2 || length(calls) > 2) return null + var lastName = "" + var lastCall : ExprCall? + if (length(calls) == 2) { + lastName = calls.back()._1.name + lastCall = calls.back()._0 + // Z2 supports only no-pred count / long_count. Other terminators bail to tier-2. + if ((lastName != "count" && lastName != "long_count") || lastCall.arguments |> length != 1) return null + } + let at = zipCall.at + let srcAName = "`srcA`{at.line}`{at.column}" + let srcBName = "`srcB`{at.line}`{at.column}" + let bufName = "`buf`{at.line}`{at.column}" + let accName = "`acc`{at.line}`{at.column}" + var srcAExpr = peel_each(clone_expression(zipCall.arguments[0])) + var srcBExpr = peel_each(clone_expression(zipCall.arguments[1])) + if (srcAExpr == null || srcAExpr._type == null || srcBExpr == null || srcBExpr._type == null) return null + srcAExpr.genFlags.alwaysSafe = true + srcBExpr.genFlags.alwaysSafe = true + var srcAType = invoke_src_param_type(srcAExpr) + var srcBType = invoke_src_param_type(srcBExpr) + var elementType = clone_type(zipCall._type.firstType) + let bothHaveLength = type_has_length(srcAExpr._type) && type_has_length(srcBExpr._type) + var bodyStmts : array + if (lastName == "count" && bothHaveLength) { + bodyStmts |> push <| qmacro_expr() { + return int(length($i(srcAName)) < length($i(srcBName)) ? length($i(srcAName)) : length($i(srcBName))) + } + } elif (lastName == "long_count" && bothHaveLength) { + bodyStmts |> push <| qmacro_expr() { + return int64(length($i(srcAName)) < length($i(srcBName)) ? length($i(srcAName)) : length($i(srcBName))) + } + } elif (lastName == "count") { + bodyStmts |> push <| qmacro_expr() { + var $i(accName) = 0 + } + bodyStmts |> push <| qmacro_expr() { + for (_itA, _itB in $i(srcAName), $i(srcBName)) { + $i(accName) ++ + } + } + bodyStmts |> push <| qmacro_expr() { + return $i(accName) + } + } elif (lastName == "long_count") { + bodyStmts |> push <| qmacro_expr() { + var $i(accName) : int64 = 0l + } + bodyStmts |> push <| qmacro_expr() { + for (_itA, _itB in $i(srcAName), $i(srcBName)) { + $i(accName) ++ + } + } + bodyStmts |> push <| qmacro_expr() { + return $i(accName) + } + } else { + // No terminator — Z1 array/iterator output. + bodyStmts |> push <| qmacro_expr() { + var $i(bufName) : array<$t(elementType)> + } + if (bothHaveLength) { + bodyStmts |> push <| qmacro_expr() { + $i(bufName) |> reserve(length($i(srcAName)) < length($i(srcBName)) ? length($i(srcAName)) : length($i(srcBName))) + } + } + bodyStmts |> push <| qmacro_expr() { + for (itA, itB in $i(srcAName), $i(srcBName)) { + $i(bufName) |> push_clone((itA, itB)) + } + } + if (expr._type.isIterator) { + bodyStmts |> push <| qmacro_expr() { + return <- $i(bufName).to_sequence_move() + } + } else { + bodyStmts |> push <| qmacro_expr() { + return <- $i(bufName) + } + } + } + var res = qmacro(invoke($($i(srcAName) : $t(srcAType), $i(srcBName) : $t(srcBType)) { + $b(bodyStmts) + }, $e(srcAExpr), $e(srcBExpr))) + res.force_at(at) + res.force_generated(true) + let blk = (res as ExprInvoke).arguments[0] as ExprMakeBlock + (blk._block as ExprBlock).arguments[0].flags.can_shadow = true + (blk._block as ExprBlock).arguments[1].flags.can_shadow = true + return res +} + [call_macro(name="_fold")] class private LinqFold : AstCallMacro { def override visit(prog : ProgramPtr; mod : Module?; var call : ExprCallMacro?) : Expression? { @@ -2888,6 +2986,8 @@ class private LinqFold : AstCallMacro { if (res != null) return res res = plan_group_by(call.arguments[0]) if (res != null) return res + res = plan_zip(call.arguments[0]) + if (res != null) return res res = plan_loop_or_count(call.arguments[0]) if (res != null) return res // Tier 2 — array-shape pipeline with `_inplace` reuse + explicit `delete`. diff --git a/tests/linq/test_linq_fold_ast.das b/tests/linq/test_linq_fold_ast.das index 0752139755..5d874e3676 100644 --- a/tests/linq/test_linq_fold_ast.das +++ b/tests/linq/test_linq_fold_ast.das @@ -39,6 +39,49 @@ def target_zip_fold() : array> { return <- [1, 2, 3]._select(_ * 2).zip([10, 20, 30]._select(_ + 1))._fold() } +// Phase 2 Z1 targets — bare zip (no upstream select) hits plan_zip splice. +[export, marker(no_coverage)] +def target_zip_bare_arr_fold() : array> { + return <- [1, 2, 3].zip([10, 20, 30])._fold() +} + +[export, marker(no_coverage)] +def target_zip_bare_iter_fold() : array> { + return <- zip([1, 2, 3].each(), [10, 20, 30].each()).to_array()._fold() +} + +[export, marker(no_coverage)] +def target_zip_bare_iter_seq_fold() : iterator> { + return <- zip([1, 2, 3].each(), [10, 20, 30].each())._fold() +} + +[export, marker(no_coverage)] +def target_zip_bare_unequal_fold() : array> { + return <- [1, 2, 3, 4, 5].zip([10, 20, 30])._fold() +} + +// Phase 2 Z2 targets — count / long_count terminators on bare zip. +[export, marker(no_coverage)] +def target_zip_count_fold() : int { + return _fold([1, 2, 3].zip([10, 20, 30]).count()) +} + +[export, marker(no_coverage)] +def target_zip_long_count_fold() : int64 { + return _fold([1, 2, 3].zip([10, 20, 30]).long_count()) +} + +[export, marker(no_coverage)] +def target_zip_count_iter_fold() : int { + // to_sequence_move yields a real iterator (no peel, no length) → counter loop fallback. + return _fold(zip([1, 2, 3, 4, 5].to_sequence_move(), [10, 20, 30].to_sequence_move()).count()) +} + +[export, marker(no_coverage)] +def target_zip_count_unequal_fold() : int { + return _fold([1, 2, 3, 4, 5].zip([10, 20, 30]).count()) +} + [export, marker(no_coverage)] def target_zip3_fold() : array> { return <- [1, 2, 3]._select(_ * 2).zip([10, 20, 30]._select(_ + 1), [100, 200, 300]._select(_ / 10))._fold() @@ -81,6 +124,165 @@ def test_zip3_predicate_fold_result(t : T?) { } } +// Phase 2 Z1 — behavioral + AST-shape coverage for bare zip splice. + +[test] +def test_zip_bare_arr_fold_result(t : T?) { + t |> run("bare zip(arr, arr) produces matched tuples") @(t : T?) { + let result <- target_zip_bare_arr_fold() + t |> equal(length(result), 3) + t |> equal(result[0]._0, 1); t |> equal(result[0]._1, 10) + t |> equal(result[1]._0, 2); t |> equal(result[1]._1, 20) + t |> equal(result[2]._0, 3); t |> equal(result[2]._1, 30) + } +} + +[test] +def test_zip_bare_iter_fold_result(t : T?) { + t |> run("bare zip(iter, iter) |> to_array produces matched tuples") @(t : T?) { + let result <- target_zip_bare_iter_fold() + t |> equal(length(result), 3) + t |> equal(result[0]._0, 1); t |> equal(result[0]._1, 10) + t |> equal(result[2]._0, 3); t |> equal(result[2]._1, 30) + } +} + +[test] +def test_zip_bare_iter_seq_fold_result(t : T?) { + t |> run("bare zip(iter, iter) returns iterator that yields matched tuples") @(t : T?) { + var n = 0 + for (pair in target_zip_bare_iter_seq_fold()) { + t |> equal(pair._0, n + 1) + t |> equal(pair._1, (n + 1) * 10) + n ++ + } + t |> equal(n, 3) + } +} + +[test] +def test_zip_bare_unequal_fold_result(t : T?) { + t |> run("bare zip truncates to shorter source") @(t : T?) { + let result <- target_zip_bare_unequal_fold() + t |> equal(length(result), 3) + t |> equal(result[0]._0, 1); t |> equal(result[0]._1, 10) + t |> equal(result[2]._0, 3); t |> equal(result[2]._1, 30) + } +} + +[test] +def test_zip_bare_arr_emits_single_for(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_zip_bare_arr_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return <- $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected splice invoke wrapper") + let n = count_inner_for_loops(body_expr) + t |> equal(n, 1, "bare zip splice must emit exactly one (multi-iter) for-loop") + let zipCalls = count_call(body_expr, "zip") + t |> equal(zipCalls, 0, "zip call must be inlined into the for-loop, not preserved") + } +} + +[test] +def test_zip_bare_iter_emits_single_for(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_zip_bare_iter_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return <- $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected splice invoke wrapper") + let n = count_inner_for_loops(body_expr) + t |> equal(n, 1, "iter+iter zip splice must emit exactly one for-loop") + let zipCalls = count_call(body_expr, "zip") + t |> equal(zipCalls, 0, "zip call must be inlined, not preserved") + } +} + +// Phase 2 Z2 — behavioral + AST-shape for count/long_count terminators. + +[test] +def test_zip_count_fold_result(t : T?) { + t |> run("zip(arr, arr).count() returns matched length") @(t : T?) { + t |> equal(target_zip_count_fold(), 3) + } +} + +[test] +def test_zip_long_count_fold_result(t : T?) { + t |> run("zip(arr, arr).long_count() returns int64 matched length") @(t : T?) { + t |> equal(target_zip_long_count_fold(), 3l) + } +} + +[test] +def test_zip_count_iter_fold_result(t : T?) { + t |> run("zip(iter, iter).count() walks both and returns min length") @(t : T?) { + t |> equal(target_zip_count_iter_fold(), 3) + } +} + +[test] +def test_zip_count_unequal_fold_result(t : T?) { + t |> run("zip(arr, arr).count() truncates to shorter source") @(t : T?) { + t |> equal(target_zip_count_unequal_fold(), 3) + } +} + +[test] +def test_zip_count_uses_length_shortcut(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_zip_count_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected splice invoke wrapper") + let n = count_inner_for_loops(body_expr) + t |> equal(n, 0, "zip(arr,arr).count() must use length shortcut (no for-loop)") + } +} + +[test] +def test_zip_long_count_uses_length_shortcut(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_zip_long_count_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected splice invoke wrapper") + let n = count_inner_for_loops(body_expr) + t |> equal(n, 0, "zip(arr,arr).long_count() must use length shortcut (no for-loop)") + } +} + +[test] +def test_zip_count_iter_emits_counter_loop(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_zip_count_iter_fold) + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected splice invoke wrapper") + let n = count_inner_for_loops(body_expr) + t |> equal(n, 1, "iter+iter source: count must walk both via counter loop (1 for)") + let zipCalls = count_call(body_expr, "zip") + t |> equal(zipCalls, 0, "zip must be inlined") + let countCalls = count_call(body_expr, "count") + t |> equal(countCalls, 0, "count must be inlined into acc++") + } +} + // ── Targets for `_fold` Phase-2A loop planner ────────────────────────── [export, marker(no_coverage)]