From 875d45c9f4127ed8b8c75a44c3b3c34e301d2243 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Mon, 18 May 2026 19:44:40 -0700 Subject: [PATCH 01/22] das_hash_map: add insert-only variants, switch Module fields, add hash bench suite * New daslang_insert_only_hash_{map,set} in include/das_hash_map/das_hash_map.h: strict subset of the regular API (no erase, no tombstones, no rehash_same_capacity). reserve_slot drops the HASH_KILLED branch + insertI tracking; iterator skip is "== HASH_EMPTY" instead of "<= HASH_KILLED". Same layout, same load factor cap, same find_index. Same hashing. * Switch 8 Module class fields in include/daScript/ast/ast.h that are never erased anywhere in the codebase (audited via grep across src/ and modules/): handleTypes, callThis, typeInfoMacros, annotationData, requireModule, typeMacros, readMacros, options. Type signal that these tables are grow-only by design. * include/daScript/das_config.h: add das_insert_only_{hash_,}{map,set} aliases. Graceful std::unordered_* fallback under DAS_CUSTOM_HASH=0 (API superset). * include/daScript/misc/das_common.h: ordered() overloads for the new types. * include/daScript/ast/ast_serializer.h + .cpp: AstSerializer operator<< + serialize_hash_map overloads for the insert-only map. * examples/hash/: standalone bench suite (modeled on examples/sort/) with three executables - main matrix (std vs das vs absl, 5 key shapes, insert/churn/find x10, 270 cells); hash function vs table mechanics isolation (2x2 of {das_hash_map, absl::flat_hash_map} x {daslang_hash, absl::Hash}); insert-only vs regular comparison on find x10. Tests: 8422 dastest passes, 7811 AOT tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/hash/CMakeLists.txt | 81 +++ examples/hash/bench_hash_func.cpp | 224 +++++++ examples/hash/bench_hash_insert_only.cpp | 273 ++++++++ examples/hash/bench_hash_map.cpp | 650 +++++++++++++++++++ include/daScript/ast/ast.h | 16 +- include/daScript/ast/ast_serializer.h | 6 + include/daScript/das_config.h | 22 + include/daScript/misc/das_common.h | 25 + include/das_hash_map/das_hash_map.h | 502 ++++++++++++++ src/builtin/module_builtin_ast_serialize.cpp | 26 + 10 files changed, 1817 insertions(+), 8 deletions(-) create mode 100644 examples/hash/CMakeLists.txt create mode 100644 examples/hash/bench_hash_func.cpp create mode 100644 examples/hash/bench_hash_insert_only.cpp create mode 100644 examples/hash/bench_hash_map.cpp diff --git a/examples/hash/CMakeLists.txt b/examples/hash/CMakeLists.txt new file mode 100644 index 0000000000..28135c5f5c --- /dev/null +++ b/examples/hash/CMakeLists.txt @@ -0,0 +1,81 @@ +cmake_minimum_required(VERSION 3.16) +project(example_hash_bench CXX) + +# Standalone bench — compares three hash table implementations on typical +# daslang key shapes: +# * std::unordered_map / unordered_set (chained baseline) +# * das::daslang_hash_map / daslang_hash_set (in-tree, open addressing) +# * absl::flat_hash_map / flat_hash_set (Abseil SwissTable reference) +# +# Build: cmake -S examples/hash -B build/example_hash_bench -DCMAKE_BUILD_TYPE=Release +# cmake --build build/example_hash_bench -j +# Run: ./build/example_hash_bench/example_hash_bench +# +# First configure clones Abseil (~2-3 min, cached in _deps/ afterwards). + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE) +endif() + +# --- Abseil (flat_hash_map / flat_hash_set baseline) --------------------- +include(FetchContent) +set(ABSL_PROPAGATE_CXX_STD ON CACHE BOOL "" FORCE) +set(ABSL_ENABLE_INSTALL OFF CACHE BOOL "" FORCE) +set(BUILD_TESTING OFF CACHE BOOL "" FORCE) +FetchContent_Declare( + absl + GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git + GIT_TAG 20240722.0 + GIT_SHALLOW TRUE +) +FetchContent_MakeAvailable(absl) + +# --- bench executables --------------------------------------------------- +# Main bench: insert / churn / find x10 across {std, das, absl} x 5 key types. +add_executable(example_hash_bench bench_hash_map.cpp) +target_include_directories(example_hash_bench PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../../include) +target_link_libraries(example_hash_bench PRIVATE + absl::flat_hash_map + absl::flat_hash_set + absl::hash + absl::strings) + +# Hash-function isolation bench: find x10 only, on the 2x2 cross of +# {das_hash_map, absl::flat_hash_map} x {daslang_hash, absl::Hash}. +# Runs independently — `--target example_hash_func_bench` builds just this. +add_executable(example_hash_func_bench bench_hash_func.cpp) +target_include_directories(example_hash_func_bench PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../../include) +target_link_libraries(example_hash_func_bench PRIVATE + absl::flat_hash_map + absl::hash + absl::strings) + +# Insert-only isolation bench: find x10, daslang_hash_{map,set} vs +# daslang_insert_only_hash_{map,set}. Does NOT need Abseil — pure das. +# Build just this: `--target example_hash_insert_only_bench`. +add_executable(example_hash_insert_only_bench bench_hash_insert_only.cpp) +target_include_directories(example_hash_insert_only_bench PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../../include) + +if(MSVC) + target_compile_options(example_hash_bench PRIVATE /O2 /W4) + target_compile_options(example_hash_func_bench PRIVATE /O2 /W4) + target_compile_options(example_hash_insert_only_bench PRIVATE /O2 /W4) + # absl pulls in indirectly via some platform headers; tame it + target_compile_definitions(example_hash_bench PRIVATE + NOMINMAX WIN32_LEAN_AND_MEAN _CRT_SECURE_NO_WARNINGS) + target_compile_definitions(example_hash_func_bench PRIVATE + NOMINMAX WIN32_LEAN_AND_MEAN _CRT_SECURE_NO_WARNINGS) + target_compile_definitions(example_hash_insert_only_bench PRIVATE + NOMINMAX WIN32_LEAN_AND_MEAN _CRT_SECURE_NO_WARNINGS) +else() + target_compile_options(example_hash_bench PRIVATE -O3 -Wall -Wextra) + target_compile_options(example_hash_func_bench PRIVATE -O3 -Wall -Wextra) + target_compile_options(example_hash_insert_only_bench PRIVATE -O3 -Wall -Wextra) +endif() diff --git a/examples/hash/bench_hash_func.cpp b/examples/hash/bench_hash_func.cpp new file mode 100644 index 0000000000..ef72cfece1 --- /dev/null +++ b/examples/hash/bench_hash_func.cpp @@ -0,0 +1,224 @@ +// Find x10 only — isolates "hash function" from "table mechanics" by running +// every cell on the 2x2 cross of: +// tables: das::daslang_hash_map, absl::flat_hash_map +// hashes: das::daslang_hash, absl::Hash +// +// Read columns down for hash effect on same table; read rows across for +// table effect with same hash. +// +// Build: cmake --build build/example_hash_bench --config Release --target example_hash_func_bench +// Run: build/example_hash_bench/Release/example_hash_func_bench.exe +// +// Standalone — does not need example_hash_bench to be built. + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include // corpus uniqueness only +#include +#include + +namespace das { void das_throw(const char * msg); } +void das::das_throw(const char * msg) { + std::fprintf(stderr, "%s\n", msg); std::abort(); +} + +namespace { + +constexpr uint32_t SEED = 0xC0FFEEu; +constexpr size_t SIZES[] = { 1000, 10000, 100000 }; +constexpr size_t MAX_N = 100000; +constexpr int FIND_MULT = 10; + +volatile size_t g_sink = 0; + +int pick_iters_find (size_t n) { + if (n <= 1000) return 500; + if (n <= 10000) return 50; + return 5; +} + +// ===== const char* helpers ===== +// For absl on const char*, wrap absl::Hash to hash CONTENT +// (absl::Hash would hash the pointer — useless here). Equality +// likewise needs strcmp not pointer compare. +struct CStrEq { + bool operator () (const char * a, const char * b) const noexcept { return std::strcmp(a, b) == 0; } +}; +struct AbslCStrContentHash { + size_t operator () (const char * s) const noexcept { + return absl::Hash{}(absl::string_view(s)); + } +}; + +// ===== Hash traits per K ===== +template +struct HashTraits { + using DasH = das::daslang_hash; + using AbslH = absl::Hash; + using Eq = std::equal_to; +}; +template <> +struct HashTraits { + using DasH = das::daslang_hash; // FNV-64 on bytes + using AbslH = AbslCStrContentHash; // absl Hash on bytes + using Eq = CStrEq; +}; + +// ===== Corpora (same identifier-like generator as the main bench) ===== +struct StringCorpus { + std::vector strs; + std::vector cstrs; + std::vector hashes; +}; +StringCorpus make_string_corpus (size_t n, uint32_t seed) { + static const char alpha[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; + constexpr int ALPHA_N = sizeof(alpha) - 1; + std::mt19937 rng(seed); + std::normal_distribution len_dist(10.0f, 4.0f); + StringCorpus c; + c.strs.reserve(n); c.cstrs.reserve(n); c.hashes.reserve(n); + std::unordered_set seen; seen.reserve(n); + while (c.strs.size() < n) { + int len = int(len_dist(rng)); + if (len < 4) len = 4; + if (len > 20) len = 20; + std::string s; s.resize(size_t(len)); + for (int i = 0; i < len; ++i) s[i] = alpha[rng() % ALPHA_N]; + if (seen.insert(s).second) c.strs.push_back(std::move(s)); + } + das::daslang_hash h{}; + for (const auto & s : c.strs) { + c.cstrs.push_back(s.c_str()); + c.hashes.push_back(uint64_t(h(s))); + } + return c; +} + +struct PtrPool { + std::vector ptrs; + size_t block_size = 0; + PtrPool () = default; + ~PtrPool () { for (auto p : ptrs) std::free(p); } + PtrPool (const PtrPool &) = delete; + PtrPool & operator = (const PtrPool &) = delete; + PtrPool (PtrPool && o) noexcept : ptrs(std::move(o.ptrs)), block_size(o.block_size) {} +}; +PtrPool make_ptr_pool (size_t n, size_t block_size) { + PtrPool p; p.block_size = block_size; p.ptrs.reserve(n); + for (size_t i = 0; i < n; ++i) p.ptrs.push_back(std::malloc(block_size)); + return p; +} + +template +std::vector make_find_queries (const std::vector & keys, uint32_t seed) { + std::mt19937 rng(seed); + std::vector q; q.reserve(keys.size() * size_t(FIND_MULT)); + std::uniform_int_distribution pick(0, keys.size() - 1); + for (size_t i = 0; i < keys.size() * size_t(FIND_MULT); ++i) q.push_back(keys[pick(rng)]); + return q; +} + +// ===== Generic find bench ===== +template