From 7703228e1a0cc9c35858c06be1ee5dac7f2ee8e7 Mon Sep 17 00:00:00 2001 From: ochafik Date: Wed, 29 Oct 2025 01:49:06 +0000 Subject: [PATCH 01/11] build & test w/ sanitizers --- .github/workflows/build.yml | 12 +++++++++--- CMakeLists.txt | 9 +++++++++ README.md | 20 ++++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5f7fbdb..fdbbdc4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,8 +44,14 @@ jobs: Release, Debug, ] + sanitizer: [ + none, + address, + thread, + undefined, + ] runs-on: ${{ matrix.setup.os }} - name: ${{ matrix.setup.os }}-${{ matrix.setup.build }}-${{ matrix.type }} + name: ${{ matrix.setup.os }}-${{ matrix.setup.build }}-${{ matrix.type }}-sanitizer-${{ matrix.sanitizer }} timeout-minutes: 30 steps: @@ -58,7 +64,7 @@ jobs: - name: ccache uses: hendrikmuhs/ccache-action@v1.2.11 with: - key: ${{ matrix.setup.os }}-${{ matrix.setup.build }}-${{ matrix.type }} + key: ${{ matrix.setup.os }}-${{ matrix.setup.build }}-${{ matrix.type }}-sanitizer-${{ matrix.sanitizer }}-ccache - name: Set up CMake uses: lukka/get-cmake@latest @@ -75,7 +81,7 @@ jobs: - name: Configure CMake env: HF_TOKEN: ${{ secrets.HF_TOKEN }} - run: cmake -B ${{github.workspace}}/build ${{ matrix.setup.defines }} -DCMAKE_BUILD_TYPE=${{ matrix.type }} + run: cmake -B ${{github.workspace}}/build ${{ matrix.setup.defines }} -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DMINJA_SANITIZER=${{ matrix.sanitizer }} - name: Build run: cmake --build ${{github.workspace}}/build --config ${{ matrix.type }} --parallel diff --git a/CMakeLists.txt b/CMakeLists.txt index 95dabe7..b92ae02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,15 @@ option(MINJA_EXAMPLE_ENABLED "minja: Build with example" option(MINJA_FUZZTEST_ENABLED "minja: fuzztests enabled" MINJA_FUZZTEST_ENABLED_DEFAULT) option(MINJA_FUZZTEST_FUZZING_MODE "minja: run fuzztests (if enabled) in fuzzing mode" OFF) option(MINJA_USE_VENV "minja: use Python venv for build" MINJA_USE_VENV_DEFAULT) +set(MINJA_SANITIZERS thread address undefined none) +set(MINJA_SANITIZER none CACHE STRING "minja: sanitizer to use") +set_property(CACHE MINJA_SANITIZER PROPERTY STRINGS ${MINJA_SANITIZERS}) + +if (NOT MSVC AND NOT MINJA_SANITIZER STREQUAL "none") + message(STATUS "Using -fsanitize=${MINJA_SANITIZER}") + add_compile_options("-fsanitize=${MINJA_SANITIZER}") + link_libraries ("-fsanitize=${MINJA_SANITIZER}") +endif() set(CMAKE_CXX_STANDARD 17) diff --git a/README.md b/README.md index 5981079..36a3291 100644 --- a/README.md +++ b/README.md @@ -212,6 +212,26 @@ Main limitations (non-exhaustive list): ./scripts/fuzzing_tests.sh ``` +- Sanitizer tests: + + ```bash + for sanitizer in ADDRESS THREAD UNDEFINED ; do + docker run --rm \ + -v "$PWD":/src:ro \ + -v "$PWD/build-sanitizer-${sanitizer}":/src/build \ + -w /src \ + "$(echo " + FROM ghcr.io/astral-sh/uv:debian-slim + RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev cmake clang-tidy + " | docker build . -q -f - )" \ + bash -c " + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DMINJA_SANITIZER=${sanitizer} && \ + cmake --build build -j --config Debug && \ + ctest --test-dir build -j -C Debug --output-on-failure + " + done + ``` + - If your model's template doesn't run fine, please consider the following before [opening a bug](https://github.com/googlestaging/minja/issues/new): - Is the template using any unsupported filter / test / method / global function, and which one(s)? From 8fb880f3cd29c361eef7851ba7c572445cf32e1a Mon Sep 17 00:00:00 2001 From: ochafik Date: Wed, 29 Oct 2025 01:51:50 +0000 Subject: [PATCH 02/11] Fix circular context reference docker run --rm \ -v "$PWD":/src:ro \ -v "$PWD/build-docker":/src/build \ -w /src \ "$(echo " FROM ghcr.io/astral-sh/uv:debian-slim RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev cmake clang-tidy " | docker build . -q -f - )" \ bash -c " cmake -B build -DCMAKE_BUILD_TYPE=Debug -DMINJA_SANITIZER=address && \ cmake --build build -j --config Debug && \ ctest --test-dir build -j -C Debug --output-on-failure " --- include/minja/minja.hpp | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index 5ed0556..4295e73 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -1060,11 +1060,18 @@ class MacroNode : public TemplateNode { } } } - void do_render(std::ostringstream &, const std::shared_ptr & macro_context) const override { + void do_render(std::ostringstream &, const std::shared_ptr & context) const override { if (!name) throw std::runtime_error("MacroNode.name is null"); if (!body) throw std::runtime_error("MacroNode.body is null"); - auto callable = Value::callable([this, macro_context](const std::shared_ptr & call_context, ArgumentsValue & args) { - auto execution_context = Context::make(Value::object(), macro_context); + + // Use init-capture to avoid dangling 'this' pointer and circular references + auto callable = Value::callable([weak_context = std::weak_ptr(context), + name = name, params = params, body = body, + named_param_positions = named_param_positions] + (const std::shared_ptr & call_context, ArgumentsValue & args) { + auto context_locked = weak_context.lock(); + if (!context_locked) throw std::runtime_error("Macro context no longer valid"); + auto execution_context = Context::make(Value::object(), context_locked); if (call_context->contains("caller")) { execution_context->set("caller", call_context->get("caller")); @@ -1640,13 +1647,17 @@ class CallNode : public TemplateNode { void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { if (!expr) throw std::runtime_error("CallNode.expr is null"); if (!body) throw std::runtime_error("CallNode.body is null"); - - auto caller = Value::callable([this, context](const std::shared_ptr &, ArgumentsValue &) -> Value { - return Value(body->render(context)); + + // Use init-capture to avoid dangling 'this' pointer and circular references + auto caller = Value::callable([weak_context = std::weak_ptr(context), body=body] + (const std::shared_ptr &, ArgumentsValue &) -> Value { + auto context_locked = weak_context.lock(); + if (!context_locked) throw std::runtime_error("Caller context no longer valid"); + return Value(body->render(context_locked)); }); - + context->set("caller", caller); - + auto call_expr = dynamic_cast(expr.get()); if (!call_expr) { throw std::runtime_error("Invalid call block syntax - expected function call"); @@ -1657,7 +1668,7 @@ class CallNode : public TemplateNode { throw std::runtime_error("Call target must be callable: " + function.dump()); } ArgumentsValue args = call_expr->args.evaluate(context); - + Value result = function.call(context, args); out << result.to_str(); } @@ -2215,7 +2226,7 @@ class Parser { } } } - + if ((has_first_colon || has_second_colon)) { index = std::make_shared(slice_loc, std::move(start), std::move(end), std::move(step)); } else { From 3d5223c7adf8f9306d350ceeb2dd9e54d2ebcf20 Mon Sep 17 00:00:00 2001 From: ochafik Date: Wed, 29 Oct 2025 01:56:13 +0000 Subject: [PATCH 03/11] fix bad patch --- include/minja/minja.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index 4295e73..e861406 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -1101,7 +1101,7 @@ class MacroNode : public TemplateNode { } return body->render(execution_context); }); - macro_context->set(name->get_name(), callable); + context->set(name->get_name(), callable); } }; @@ -1271,7 +1271,7 @@ class SubscriptExpr : public Expression { } return result; - } else if (target_value.is_array()) { + } else if (target_value.is_array()) { auto result = Value::array(); for (int64_t i = start; step > 0 ? i < end : i > end; i += step) { result.push_back(target_value.at(i)); @@ -1320,7 +1320,7 @@ static bool in(const Value & value, const Value & container) { return (((container.is_array() || container.is_object()) && container.contains(value)) || (value.is_string() && container.is_string() && container.to_str().find(value.to_str()) != std::string::npos)); -}; +} class BinaryOpExpr : public Expression { public: From b56da72abbd96e812854c40d6ab0b56140f11436 Mon Sep 17 00:00:00 2001 From: ochafik Date: Wed, 29 Oct 2025 02:00:11 +0000 Subject: [PATCH 04/11] Add tiny reserves in value ctor (+ use emplace to avoid some copies) --- include/minja/minja.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index 5ed0556..dc2e134 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -158,12 +158,14 @@ class Value : public std::enable_shared_from_this { Value(const json & v) { if (v.is_object()) { auto object = std::make_shared(); + object->reserve(v.size()); for (auto it = v.begin(); it != v.end(); ++it) { - (*object)[it.key()] = it.value(); + object->emplace_back(it.key(), Value(it.value())); } object_ = std::move(object); } else if (v.is_array()) { auto array = std::make_shared(); + array->reserve(v.size()); for (const auto& item : v) { array->push_back(Value(item)); } From 00538f9dc58adeba9c8dc6cc69c398f4b751f6a6 Mon Sep 17 00:00:00 2001 From: ochafik Date: Wed, 29 Oct 2025 02:00:21 +0000 Subject: [PATCH 05/11] drop unused enable_shared_from_this --- include/minja/minja.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index dc2e134..129fe11 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -55,7 +55,7 @@ inline std::string normalize_newlines(const std::string & s) { } /* Values that behave roughly like in Python. */ -class Value : public std::enable_shared_from_this { +class Value { public: using CallableType = std::function &, ArgumentsValue &)>; using FilterType = std::function &, ArgumentsValue &)>; @@ -612,7 +612,7 @@ static std::string error_location_suffix(const std::string & source, size_t pos) return out.str(); } -class Context : public std::enable_shared_from_this { +class Context { protected: Value values_; std::shared_ptr parent_; @@ -852,12 +852,12 @@ struct LoopControlTemplateToken : public TemplateToken { struct CallTemplateToken : public TemplateToken { std::shared_ptr expr; - CallTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && e) + CallTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && e) : TemplateToken(Type::Call, loc, pre, post), expr(std::move(e)) {} }; struct EndCallTemplateToken : public TemplateToken { - EndCallTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) + EndCallTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndCall, loc, pre, post) {} }; @@ -1077,7 +1077,7 @@ class MacroNode : public TemplateNode { auto & arg = args.args[i]; if (i >= params.size()) throw std::runtime_error("Too many positional arguments for macro " + name->get_name()); param_set[i] = true; - auto & param_name = params[i].first; + const auto & param_name = params[i].first; execution_context->set(param_name, arg); } for (auto & [arg_name, value] : args.kwargs) { From f7e04ebfcea3ed9df192b22b5f3aa4cdec0edf51 Mon Sep 17 00:00:00 2001 From: ochafik Date: Wed, 29 Oct 2025 02:10:13 +0000 Subject: [PATCH 06/11] Update minja.hpp --- include/minja/minja.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index 129fe11..6583e3f 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -33,6 +33,7 @@ using json = nlohmann::ordered_json; + namespace minja { class Context; From 924a26ffce49066874a7b5829b9588a0acfcf337 Mon Sep 17 00:00:00 2001 From: ochafik Date: Wed, 29 Oct 2025 02:18:01 +0000 Subject: [PATCH 07/11] Update minja.hpp --- include/minja/minja.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index e82ff0b..57b138a 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -33,7 +33,6 @@ using json = nlohmann::ordered_json; - namespace minja { class Context; From 7cf0e79b2d7ceea676482a74cdd96820d0f5185e Mon Sep 17 00:00:00 2001 From: ochafik Date: Wed, 29 Oct 2025 02:19:03 +0000 Subject: [PATCH 08/11] Update build.yml --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fdbbdc4..d3503e2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -64,7 +64,7 @@ jobs: - name: ccache uses: hendrikmuhs/ccache-action@v1.2.11 with: - key: ${{ matrix.setup.os }}-${{ matrix.setup.build }}-${{ matrix.type }}-sanitizer-${{ matrix.sanitizer }}-ccache + key: ${{ matrix.setup.os }}-${{ matrix.setup.build }}-${{ matrix.type }}-sanitizer-${{ matrix.sanitizer }} - name: Set up CMake uses: lukka/get-cmake@latest From ec271d7e9f1873e36f05c710b3f0a49781c847db Mon Sep 17 00:00:00 2001 From: ochafik Date: Sun, 2 Nov 2025 15:40:02 +0000 Subject: [PATCH 09/11] Fix parsing of values (chaining of identifier, function call, method call) --- include/minja/minja.hpp | 10 ++++------ tests/CMakeLists.txt | 1 + tests/test-syntax.cpp | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index 57b138a..c35ef62 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -2205,7 +2205,7 @@ class Parser { auto value = parseValue(); - while (it != end && consumeSpaces() && peekSymbols({ "[", "." })) { + while (it != end && consumeSpaces() && peekSymbols({ "[", ".", "(" })) { if (!consumeToken("[").empty()) { std::shared_ptr index; auto slice_loc = get_location(); @@ -2250,15 +2250,13 @@ class Parser { auto key = std::make_shared(identifier->location, Value(identifier->get_name())); value = std::make_shared(identifier->location, std::move(value), std::move(key)); } + } else if (peekSymbols({ "(" })) { + auto callParams = parseCallArgs(); + value = std::make_shared(get_location(), std::move(value), std::move(callParams)); } consumeSpaces(); } - if (peekSymbols({ "(" })) { - auto location = get_location(); - auto callParams = parseCallArgs(); - value = std::make_shared(location, std::move(value), std::move(callParams)); - } return value; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index db82c2d..4a446ac 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -324,6 +324,7 @@ set(MODEL_IDS Qwen/Qwen3-235B-A22B-Thinking-2507 Qwen/Qwen3-Coder-30B-A3B-Instruct Qwen/QwQ-32B + zai-org/GLM-4.6 # Broken, TODO: # ai21labs/AI21-Jamba-1.5-Large # https://github.com/google/minja/issues/8 diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp index 36bdaa3..e445f10 100644 --- a/tests/test-syntax.cpp +++ b/tests/test-syntax.cpp @@ -11,7 +11,6 @@ #include #include -#include #include static std::string render_python(const std::string & template_str, const json & bindings, const minja::Options & options) { @@ -373,6 +372,7 @@ TEST(SyntaxTest, SimpleCases) { {}, {} ) ); + EXPECT_EQ("False", render("{{ trim(' a ').endswith(' ') }}", {} , {})); // Test parsing of expression (chaining of identifier, function call, method call) } EXPECT_EQ( "[0, 1, 2][0, 2]", From fe177fe6ad7d3a6d6a0cd77d64a91235b89fb871 Mon Sep 17 00:00:00 2001 From: ochafik Date: Sun, 2 Nov 2025 16:15:22 +0000 Subject: [PATCH 10/11] fix tojson ensure_ascii --- include/minja/minja.hpp | 2 +- scripts/fetch_templates_and_goldens.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index c35ef62..873ece8 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -2736,7 +2736,7 @@ inline std::shared_ptr Context::builtins() { globals.set("raise_exception", simple_function("raise_exception", { "message" }, [](const std::shared_ptr &, Value & args) -> Value { throw std::runtime_error(args.at("message").get()); })); - globals.set("tojson", simple_function("tojson", { "value", "indent" }, [](const std::shared_ptr &, Value & args) { + globals.set("tojson", simple_function("tojson", { "value", "indent", "ensure_ascii" }, [](const std::shared_ptr &, Value & args) { return Value(args.at("value").dump(args.get("indent", -1), /* to_json= */ true)); })); globals.set("items", simple_function("items", { "object" }, [](const std::shared_ptr &, Value & args) { diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py index acaf969..f6c8fe8 100644 --- a/scripts/fetch_templates_and_goldens.py +++ b/scripts/fetch_templates_and_goldens.py @@ -50,6 +50,8 @@ def strftime_now(format): now = datetime.datetime.strptime(TEST_DATE, "%Y-%m-%d") return now.strftime(format) +def tojson(value, indent=None, ensure_ascii=False, sort_keys=False): + return json.dumps(value, indent=indent, ensure_ascii=ensure_ascii, sort_keys=sort_keys) def join_cmake_path(parent, child): ''' @@ -119,8 +121,11 @@ def __init__(self, template, env=None, filters=None, global_functions=None): env = jinja2.Environment( trim_blocks=True, lstrip_blocks=True, - extensions=[jinja2.ext.loopcontrols] + extensions=[jinja2.ext.loopcontrols], ) + # https://jinja.palletsprojects.com/en/stable/api/#policies + env.policies["json.dumps_function"] = tojson + env.filters['tojson'] = tojson if filters: for name, func in filters.items(): env.filters[name] = func From efd8e02c314dda3865122b58c3b3a4eef6a6f497 Mon Sep 17 00:00:00 2001 From: ochafik Date: Sun, 2 Nov 2025 16:15:32 +0000 Subject: [PATCH 11/11] fix glm 4.6 tool detection --- include/minja/chat-template.hpp | 4 ++-- scripts/fetch_templates_and_goldens.py | 4 ++-- tests/test-capabilities.cpp | 12 ++++++++++++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/include/minja/chat-template.hpp b/include/minja/chat-template.hpp index d31fb90..f9580df 100644 --- a/include/minja/chat-template.hpp +++ b/include/minja/chat-template.hpp @@ -198,12 +198,12 @@ class chat_template { dummy_user_msg, make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})), }), {}, false); - auto tool_call_renders_str_arguments = contains(out, "") || contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); + auto tool_call_renders_str_arguments = contains(out, "") || contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':") || contains(out, ">argument_needle<"); out = try_raw_render(json::array({ dummy_user_msg, make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})), }), {}, false); - auto tool_call_renders_obj_arguments = contains(out, "") || contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); + auto tool_call_renders_obj_arguments = contains(out, "") || contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':") || contains(out, ">argument_needle<"); caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments; caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments; diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py index f6c8fe8..a9656d9 100644 --- a/scripts/fetch_templates_and_goldens.py +++ b/scripts/fetch_templates_and_goldens.py @@ -197,12 +197,12 @@ def make_tool_call(tool_name, arguments): dummy_user_msg, make_tool_calls_msg([make_tool_call("ipython", json.dumps(dummy_args_obj))]), ]) - tool_call_renders_str_arguments = "" in out or '"argument_needle":' in out or "'argument_needle':" in out + tool_call_renders_str_arguments = "" in out or '"argument_needle":' in out or "'argument_needle':" in out or ">argument_needle<" in out out = self.try_raw_render([ dummy_user_msg, make_tool_calls_msg([make_tool_call("ipython", dummy_args_obj)]), ]) - tool_call_renders_obj_arguments = "" in out or '"argument_needle":' in out or "'argument_needle':" in out + tool_call_renders_obj_arguments = "" in out or '"argument_needle":' in out or "'argument_needle':" in out or ">argument_needle<" in out caps.supports_tool_calls = tool_call_renders_str_arguments or tool_call_renders_obj_arguments caps.requires_object_arguments = not tool_call_renders_str_arguments and tool_call_renders_obj_arguments diff --git a/tests/test-capabilities.cpp b/tests/test-capabilities.cpp index 458f9b9..90b137a 100644 --- a/tests/test-capabilities.cpp +++ b/tests/test-capabilities.cpp @@ -257,3 +257,15 @@ TEST(CapabilitiesTest, CommandRPlusToolUse) { // EXPECT_TRUE(caps.requires_non_null_content); EXPECT_FALSE(caps.requires_typed_content); } + +TEST(CapabilitiesTest, GLM46) { + auto caps = get_caps("tests/zai-org-GLM-4.6.jinja"); + EXPECT_TRUE(caps.supports_system_role); + EXPECT_TRUE(caps.supports_tools); + EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_TRUE(caps.supports_tool_responses); + EXPECT_TRUE(caps.supports_parallel_tool_calls); + EXPECT_TRUE(caps.requires_object_arguments); + // EXPECT_TRUE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content); +}