diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5f7fbdb..6805ea9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,7 +3,7 @@ name: CI on: push: branches: - - master + - main paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/*.hpp', '**/*.cpp'] pull_request: types: [opened, synchronize, reopened] @@ -44,8 +44,29 @@ jobs: Release, Debug, ] + sanitizer: [ + none, + address, + thread, + undefined, + ] + exclude: + # Sanitizers not supported on Clang targeting MSVC (llvm-arm64) + - setup: { build: 'llvm-arm64' } + sanitizer: address + - setup: { build: 'llvm-arm64' } + sanitizer: thread + - setup: { build: 'llvm-arm64' } + sanitizer: undefined + # Sanitizers not supported on MSVC ARM64 + - setup: { build: 'msvc-arm64' } + sanitizer: address + - setup: { build: 'msvc-arm64' } + sanitizer: thread + - setup: { build: 'msvc-arm64' } + sanitizer: undefined runs-on: ${{ matrix.setup.os }} - name: ${{ matrix.setup.os }}-${{ matrix.setup.build }}-${{ matrix.type }} + name: ${{ matrix.setup.os }}-${{ matrix.setup.build }}-${{ matrix.type }}-sanitizer-${{ matrix.sanitizer }} timeout-minutes: 30 steps: @@ -58,7 +79,7 @@ jobs: - name: ccache uses: hendrikmuhs/ccache-action@v1.2.11 with: - key: ${{ matrix.setup.os }}-${{ matrix.setup.build }}-${{ matrix.type }} + key: ${{ matrix.setup.os }}-${{ matrix.setup.build }}-${{ matrix.type }}-sanitizer-${{ matrix.sanitizer }} - name: Set up CMake uses: lukka/get-cmake@latest @@ -75,11 +96,14 @@ jobs: - name: Configure CMake env: HF_TOKEN: ${{ secrets.HF_TOKEN }} - run: cmake -B ${{github.workspace}}/build ${{ matrix.setup.defines }} -DCMAKE_BUILD_TYPE=${{ matrix.type }} + PYTHONIOENCODING: utf-8 + run: cmake -B ${{github.workspace}}/build ${{ matrix.setup.defines }} -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DMINJA_SANITIZER=${{ matrix.sanitizer }} - name: Build run: cmake --build ${{github.workspace}}/build --config ${{ matrix.type }} --parallel - name: Test if: ${{ matrix.setup.test }} + env: + PYTHONIOENCODING: utf-8 run: ctest --test-dir build --output-on-failure --verbose -C ${{ matrix.type }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 95dabe7..6969da9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,9 +15,9 @@ add_library(minja INTERFACE) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -# Test if clang-tidy is available +# Test if clang-tidy is available (disabled for address sanitizer due to GCC false positives) find_program(CLANG_TIDY_EXE NAMES "clang-tidy") -if (CLANG_TIDY_EXE) +if (CLANG_TIDY_EXE AND NOT MINJA_SANITIZER STREQUAL "address") message(STATUS "clang-tidy found: ${CLANG_TIDY_EXE}") set(CMAKE_CXX_CLANG_TIDY clang-tidy; @@ -27,6 +27,8 @@ if (CLANG_TIDY_EXE) -checks=-*,clang-analyzer-*,clang-diagnostic-*,cppcoreguideline-*,bugprone-*,-bugprone-suspicious-include,-bugprone-assignment-in-if-condition,-bugprone-narrowing-conversions,-bugprone-easily-swappable-parameters,-bugprone-inc-dec-in-conditions,-bugprone-exception-escape,-clang-analyzer-cplusplus.StringChecker; -warnings-as-errors=*; ) +elseif(MINJA_SANITIZER STREQUAL "address") + message(STATUS "clang-tidy disabled for address sanitizer builds") else() message(STATUS "clang-tidy not found") endif() @@ -43,6 +45,15 @@ option(MINJA_EXAMPLE_ENABLED "minja: Build with example" option(MINJA_FUZZTEST_ENABLED "minja: fuzztests enabled" MINJA_FUZZTEST_ENABLED_DEFAULT) option(MINJA_FUZZTEST_FUZZING_MODE "minja: run fuzztests (if enabled) in fuzzing mode" OFF) option(MINJA_USE_VENV "minja: use Python venv for build" MINJA_USE_VENV_DEFAULT) +set(MINJA_SANITIZERS thread address undefined none) +set(MINJA_SANITIZER none CACHE STRING "minja: sanitizer to use") +set_property(CACHE MINJA_SANITIZER PROPERTY STRINGS ${MINJA_SANITIZERS}) + +if (NOT MSVC AND NOT CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC" AND NOT MINJA_SANITIZER STREQUAL "none") + message(STATUS "Using -fsanitize=${MINJA_SANITIZER}") + add_compile_options("-fsanitize=${MINJA_SANITIZER}") + link_libraries ("-fsanitize=${MINJA_SANITIZER}") +endif() set(CMAKE_CXX_STANDARD 17) @@ -50,6 +61,11 @@ set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>DLL") set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) if (NOT MSVC) add_compile_options(-Wall -Wextra -pedantic -Werror) + # GCC 13+ has false-positive maybe-uninitialized warnings with address sanitizer + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105562 + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND MINJA_SANITIZER STREQUAL "address") + add_compile_options(-Wno-maybe-uninitialized) + endif() endif() include(FetchContent) @@ -108,10 +124,13 @@ if(MINJA_TEST_ENABLED) message(STATUS "Python executable: ${Python_EXECUTABLE}") endif() -find_program(CPPCHECK cppcheck) -if(CPPCHECK) - set(CMAKE_CXX_CPPCHECK "${CPPCHECK}" -i ${json_SOURCE_DIR}/include/nlohmann/json.hpp) - message(STATUS "cppcheck found: ${CPPCHECK}") +# cppcheck has issues on Windows (missing std.cfg), so we only enable it on non-Windows +if(NOT WIN32) + find_program(CPPCHECK cppcheck) + if(CPPCHECK) + set(CMAKE_CXX_CPPCHECK "${CPPCHECK}" -i ${json_SOURCE_DIR}/include/nlohmann/json.hpp) + message(STATUS "cppcheck found: ${CPPCHECK}") + endif() endif() include(GNUInstallDirs) @@ -135,6 +154,8 @@ if(MINJA_EXAMPLE_ENABLED) add_subdirectory(examples) endif() +add_subdirectory(tools) + if(MINJA_TEST_ENABLED) enable_testing() include(GoogleTest) diff --git a/README.md b/README.md index 5981079..dcfbfb0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # minja.hpp - A minimalistic C++ Jinja templating engine for LLM chat templates -_**This is not an official Google product**_ +_**Used to be at https://github.com/google/minja, but I've left Google and I'll only maintain my fork from now on**_ Minja is a minimalistic reimplementation of the [Jinja](https://github.com/pallets/jinja/) templating engine to integrate in/with C++ LLM projects (it's used in [llama.cpp](https://github.com/ggerganov/llama.cpp/pull/11016), [Jan](https://jan.ai/) (through [cortex.cpp](https://github.com/menloresearch/cortex.cpp/pull/1814)), [GPT4All](https://github.com/nomic-ai/gpt4all/pull/3433) and [Docker Model Runner](https://github.com/docker/model-runner)). @@ -212,6 +212,26 @@ Main limitations (non-exhaustive list): ./scripts/fuzzing_tests.sh ``` +- Sanitizer tests: + + ```bash + for sanitizer in ADDRESS THREAD UNDEFINED ; do + docker run --rm \ + -v "$PWD":/src:ro \ + -v "$PWD/build-sanitizer-${sanitizer}":/src/build \ + -w /src \ + "$(echo " + FROM ghcr.io/astral-sh/uv:debian-slim + RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev cmake clang-tidy + " | docker build . -q -f - )" \ + bash -c " + cmake -B build -DCMAKE_BUILD_TYPE=Debug -DMINJA_SANITIZER=${sanitizer} && \ + cmake --build build -j --config Debug && \ + ctest --test-dir build -j -C Debug --output-on-failure + " + done + ``` + - If your model's template doesn't run fine, please consider the following before [opening a bug](https://github.com/googlestaging/minja/issues/new): - Is the template using any unsupported filter / test / method / global function, and which one(s)? diff --git a/include/minja/chat-template.hpp b/include/minja/chat-template.hpp index d31fb90..7c76c1b 100644 --- a/include/minja/chat-template.hpp +++ b/include/minja/chat-template.hpp @@ -28,6 +28,17 @@ using json = nlohmann::ordered_json; namespace minja { +// Format used by a template to represent reasoning/thinking content +enum class ReasoningFormat { + NONE, // Template doesn't support reasoning + REASONING_CONTENT_FIELD, // message.reasoning_content field (Qwen3, GLM-4.6/4.7) - canonical format + THINKING_CONTENT_BLOCK, // message.content[].type == "thinking" (Ministral, DeepSeek-R1) + THOUGHTS_CONTENT_BLOCK, // message.content[].type == "thoughts" (Apertus) + THOUGHT_FIELD, // message.thought field (MiniCPM3) + TOOL_PLAN_FIELD, // message.tool_plan field (Command-R7B) + THINKING_FIELD, // message.thinking field (GPT-OSS-120B) +}; + struct chat_template_caps { bool supports_tools = false; bool supports_tool_calls = false; @@ -40,8 +51,23 @@ struct chat_template_caps { bool requires_object_arguments = false; // CohereForAI/c4ai-command-r-plus simple variant bool requires_non_null_content = false; - // MiniMaxAI/MiniMax-Text-01 special - bool requires_typed_content = false; + // Template expects content as typed blocks: [{type: "text", text: ...}] instead of plain string + bool requires_typed_content_blocks = false; + + // Reasoning capabilities (extended thinking / chain-of-thought) + bool supports_reasoning = false; // Template supports some form of reasoning + ReasoningFormat reasoning_format = ReasoningFormat::NONE; + bool reasoning_requires_tools = false; // Reasoning only works when tool_calls present (Command-R7B) + bool reasoning_requires_suffix_position = false; // Reasoning hidden for last non-tool-call assistant (Kimi K2) + + // Reasoning behavior flags (computed via detection probes) + bool supports_reasoning_without_content = false; // Can emit reasoning with empty/null content + bool supports_reasoning_with_content = false; // Can emit both reasoning and content together + bool respects_enable_reasoning = false; // Template responds to enable_thinking=false + + // Whether template supports reasoning visibility control (GLM-4.7's clear_thinking flag) + // When clear_thinking=false, all reasoning is shown; when true/default, position-based visibility + bool supports_clear_thinking = false; }; struct chat_template_inputs { @@ -65,6 +91,8 @@ struct chat_template_options { bool polyfill_system_role = true; bool polyfill_object_arguments = true; bool polyfill_typed_content = true; + // Convert reasoning_content to template's native format (thought, thinking, tool_plan) + bool polyfill_reasoning = true; }; class chat_template { @@ -124,16 +152,17 @@ class chat_template { const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}}; const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}}; - caps_.requires_typed_content = + caps_.requires_typed_content_blocks = !contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle) && contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle); - const auto dummy_user_msg = caps_.requires_typed_content + const auto uses_blocks = caps_.requires_typed_content_blocks; + const auto dummy_user_msg = uses_blocks ? dummy_typed_user_msg : dummy_str_user_msg; const json needle_system_msg = { {"role", "system"}, - {"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)}, + {"content", uses_blocks ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)}, }; caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle); @@ -192,18 +221,24 @@ class chat_template { }; }; const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}}; + const auto contains_arg_needle = [&](const std::string & out_str) { + return contains(out_str, "") + || contains(out_str, "\"argument_needle\"") + || contains(out_str, "'argument_needle':") + || contains(out_str, ">argument_needle<"); + }; // Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want. out = try_raw_render(json::array({ dummy_user_msg, make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})), }), {}, false); - auto tool_call_renders_str_arguments = contains(out, "") || contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); + auto tool_call_renders_str_arguments = contains_arg_needle(out); out = try_raw_render(json::array({ dummy_user_msg, make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})), }), {}, false); - auto tool_call_renders_obj_arguments = contains(out, "") || contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); + auto tool_call_renders_obj_arguments = contains_arg_needle(out); caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments; caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments; @@ -232,6 +267,238 @@ class chat_template { caps_.supports_tool_call_id = contains(out, "call_911_"); } + // Detect thinking / reasoning capabilities + const std::string reasoning_needle = ""; + auto make_assistant_msg = [&](const json & extra_fields, const json & content = json()) { + json msg = {{"role", "assistant"}}; + for (auto & [key, val] : extra_fields.items()) { + msg[key] = val; + } + if (!content.is_null()) { + msg["content"] = content; + } else if (caps_.requires_non_null_content) { + msg["content"] = ""; + } + return msg; + }; + + // Pattern A: reasoning_content field (Qwen3, GLM-4.6/4.7) + // Test both with and without tool_calls to catch position-based templates like Kimi K2 + // that only show reasoning for certain message positions + out = try_raw_render(json::array({ + dummy_user_msg, + make_assistant_msg({{"reasoning_content", reasoning_needle}}), + }), {}, false); + bool supports_reasoning_content = contains(out, reasoning_needle); + bool reasoning_content_requires_tools = false; + // Also test with tool_calls for position-based templates (e.g., Kimi K2) + // that only show reasoning for messages with tool_calls + if (!supports_reasoning_content && caps_.supports_tool_calls) { + auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump()); + json reasoning_with_tools_msg = { + {"role", "assistant"}, + {"content", caps_.requires_non_null_content ? "" : json()}, + {"reasoning_content", reasoning_needle}, + {"tool_calls", json::array({make_tool_call("test_tool", dummy_args)})}, + }; + out = try_raw_render(json::array({ + dummy_user_msg, + reasoning_with_tools_msg, + }), {}, false); + supports_reasoning_content = contains(out, reasoning_needle); + if (supports_reasoning_content) { + // Reasoning only works with tool_calls for this template (position-based visibility) + reasoning_content_requires_tools = true; + } + } + + // Pattern D: thought field (MiniCPM3) + out = try_raw_render(json::array({ + dummy_user_msg, + make_assistant_msg({{"thought", reasoning_needle}}, "response"), + }), {}, false); + bool supports_thought_field = contains(out, reasoning_needle); + + // Pattern F: thinking field (GPT-OSS-120B style) + out = try_raw_render(json::array({ + dummy_user_msg, + make_assistant_msg({{"thinking", reasoning_needle}}, "response"), + }), {}, false); + bool supports_thinking_field = contains(out, reasoning_needle); + + // Pattern B: content blocks with type="thinking" (Ministral) + // To detect stringification, we check if the output contains structural markers + // like '"type"' or "'type'" which would appear in serialized JSON/Python + json THINKING_CONTENT_BLOCK_msg = { + {"role", "assistant"}, + {"content", json::array({ + {{"type", "thinking"}, {"thinking", reasoning_needle}}, + {{"type", "text"}, {"text", "response"}} + })} + }; + out = try_raw_render(json::array({dummy_user_msg, THINKING_CONTENT_BLOCK_msg}), {}, false); + // Real support: needle appears but structural markers don't (template extracts content) + // Stringified: needle appears with structural markers (template just serializes the object) + bool supports_THINKING_CONTENT_BLOCK = contains(out, reasoning_needle) + && !contains(out, "\"type\"") && !contains(out, "'type'"); + + // Pattern C: content blocks with type="thoughts" (Apertus) + json THOUGHTS_CONTENT_BLOCK_msg = { + {"role", "assistant"}, + {"content", json::array({ + {{"type", "thoughts"}, {"text", reasoning_needle}}, + {{"type", "text"}, {"text", "response"}} + })} + }; + out = try_raw_render(json::array({dummy_user_msg, THOUGHTS_CONTENT_BLOCK_msg}), {}, false); + bool supports_THOUGHTS_CONTENT_BLOCK = contains(out, reasoning_needle) + && !contains(out, "\"type\"") && !contains(out, "'type'"); + + // Pattern E: tool_plan field (Command-R7B) - requires tool_calls + bool supports_tool_plan_field = false; + if (caps_.supports_tool_calls) { + auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump()); + json tool_plan_msg = { + {"role", "assistant"}, + {"content", caps_.requires_non_null_content ? "" : json()}, + {"tool_plan", reasoning_needle}, + {"tool_calls", json::array({make_tool_call("test_tool", dummy_args)})}, + }; + out = try_raw_render(json::array({ + dummy_user_msg, + tool_plan_msg, + }), {}, false); + supports_tool_plan_field = contains(out, reasoning_needle); + } + + // Determine the primary reasoning format (in priority order) + // Field-based patterns are checked first as they are more specific + // Content block patterns are checked last as many templates just stringify unknown content + if (supports_reasoning_content) { + caps_.supports_reasoning = true; + caps_.reasoning_format = ReasoningFormat::REASONING_CONTENT_FIELD; + if (reasoning_content_requires_tools) { + // Position-based templates like Kimi K2 only show reasoning for messages with tool_calls + caps_.reasoning_requires_tools = true; + } + } else if (supports_thought_field) { + caps_.supports_reasoning = true; + caps_.reasoning_format = ReasoningFormat::THOUGHT_FIELD; + } else if (supports_thinking_field) { + caps_.supports_reasoning = true; + caps_.reasoning_format = ReasoningFormat::THINKING_FIELD; + } else if (supports_tool_plan_field) { + caps_.supports_reasoning = true; + caps_.reasoning_format = ReasoningFormat::TOOL_PLAN_FIELD; + caps_.reasoning_requires_tools = true; + } else if (supports_THINKING_CONTENT_BLOCK) { + caps_.supports_reasoning = true; + caps_.reasoning_format = ReasoningFormat::THINKING_CONTENT_BLOCK; + // Note: Don't override requires_typed_content_blocks - it's detected separately. + // Templates using content block reasoning may or may not require typed content for all messages. + } else if (supports_THOUGHTS_CONTENT_BLOCK) { + caps_.supports_reasoning = true; + caps_.reasoning_format = ReasoningFormat::THOUGHTS_CONTENT_BLOCK; + // Note: Don't override requires_typed_content_blocks - it's detected separately. + } + + // Test reasoning visibility control (GLM-4.7's clear_thinking pattern) + // When clear_thinking=false is passed, template should show all reasoning + if (caps_.reasoning_format == ReasoningFormat::REASONING_CONTENT_FIELD) { + // Test with multiple assistant messages and clear_thinking=false + const std::string first_reasoning = ""; + const std::string second_reasoning = ""; + json extra_ctx = {{"clear_thinking", false}}; + out = try_raw_render(json::array({ + dummy_user_msg, + make_assistant_msg({{"reasoning_content", first_reasoning}}, "first"), + dummy_user_msg, + make_assistant_msg({{"reasoning_content", second_reasoning}}, "second"), + }), {}, false, extra_ctx); + // If both reasonings are visible with clear_thinking=false, template supports it + caps_.supports_clear_thinking = contains(out, first_reasoning) && contains(out, second_reasoning); + } + + // Test reasoning behavior flags for templates that support reasoning + if (caps_.supports_reasoning) { + const std::string reasoning_test = ""; + const std::string content_test = ""; + + // Helper to create assistant message with reasoning in the template's native format + auto make_reasoning_msg = [&](const std::string& reasoning, const std::string& content) -> json { + json msg = {{"role", "assistant"}}; + switch (caps_.reasoning_format) { + case ReasoningFormat::REASONING_CONTENT_FIELD: + msg["reasoning_content"] = reasoning; + msg["content"] = content; + break; + case ReasoningFormat::THOUGHT_FIELD: + msg["thought"] = reasoning; + msg["content"] = content; + break; + case ReasoningFormat::THINKING_FIELD: + msg["thinking"] = reasoning; + msg["content"] = content; + break; + case ReasoningFormat::TOOL_PLAN_FIELD: { + // tool_plan requires tool_calls to be present + auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump()); + msg["content"] = caps_.requires_non_null_content ? "" : json(); + msg["tool_plan"] = reasoning; + msg["tool_calls"] = json::array({make_tool_call("test_tool", dummy_args)}); + break; + } + case ReasoningFormat::THINKING_CONTENT_BLOCK: + msg["content"] = json::array({ + {{"type", "thinking"}, {"thinking", reasoning}}, + {{"type", "text"}, {"text", content}} + }); + break; + case ReasoningFormat::THOUGHTS_CONTENT_BLOCK: + msg["content"] = json::array({ + {{"type", "thoughts"}, {"text", reasoning}}, + {{"type", "text"}, {"text", content}} + }); + break; + default: + break; + } + return msg; + }; + + // Test supports_reasoning_without_content: can template emit reasoning with empty content? + // Skip for TOOL_PLAN_FIELD since it requires tool_calls which have different semantics + if (caps_.reasoning_format != ReasoningFormat::TOOL_PLAN_FIELD) { + out = try_raw_render(json::array({ + dummy_user_msg, + make_reasoning_msg(reasoning_test, ""), + }), {}, false); + caps_.supports_reasoning_without_content = contains(out, reasoning_test); + } + + // Test supports_reasoning_with_content: can template emit both reasoning and content together? + // Skip for TOOL_PLAN_FIELD since tool calls don't have regular content + if (caps_.reasoning_format != ReasoningFormat::TOOL_PLAN_FIELD) { + out = try_raw_render(json::array({ + dummy_user_msg, + make_reasoning_msg(reasoning_test, content_test), + }), {}, false); + caps_.supports_reasoning_with_content = contains(out, reasoning_test) && contains(out, content_test); + } + + // Test respects_enable_reasoning: does template honor enable_thinking=false? + // Only test for REASONING_CONTENT_FIELD format where this flag is commonly used (Qwen3) + if (caps_.reasoning_format == ReasoningFormat::REASONING_CONTENT_FIELD) { + json disable_ctx = {{"enable_thinking", false}}; + out = try_raw_render(json::array({ + dummy_user_msg, + make_reasoning_msg(reasoning_test, content_test), + }), {}, false, disable_ctx); + // If reasoning disappears but content remains when enable_thinking=false, template respects it + caps_.respects_enable_reasoning = !contains(out, reasoning_test) && contains(out, content_test); + } + } + try { if (!caps_.supports_tools) { const json user_msg { @@ -336,6 +603,7 @@ class chat_template { auto has_tool_calls = false; auto has_tool_responses = false; auto has_string_content = false; + auto has_reasoning_content = false; for (const auto & message : inputs.messages) { if (message.contains("tool_calls") && !message["tool_calls"].is_null()) { has_tool_calls = true; @@ -346,6 +614,9 @@ class chat_template { if (message.contains("content") && message["content"].is_string()) { has_string_content = true; } + if (message.contains("reasoning_content") && !message["reasoning_content"].is_null()) { + has_reasoning_content = true; + } } auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role; @@ -354,7 +625,12 @@ class chat_template { auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls; auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses; auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments; - auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content; + auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content_blocks; + // Polyfill reasoning_content to template's native format when template supports + // a different reasoning format than REASONING_CONTENT_FIELD (the canonical format) + auto polyfill_reasoning = opts.polyfill_reasoning && has_reasoning_content + && caps_.reasoning_format != ReasoningFormat::NONE + && caps_.reasoning_format != ReasoningFormat::REASONING_CONTENT_FIELD; auto needs_polyfills = opts.apply_polyfills && (false || polyfill_system_role @@ -363,20 +639,30 @@ class chat_template { || polyfill_tool_responses || polyfill_object_arguments || polyfill_typed_content + || polyfill_reasoning ); if (needs_polyfills) { actual_messages = json::array(); + // Helper to build typed content array from string or existing array + auto build_content_array = [](const json & content) -> json { + json content_blocks = json::array(); + if (content.is_string()) { + content_blocks.push_back({{"type", "text"}, {"text", content}}); + } else if (content.is_array()) { + for (const auto & block : content) { + content_blocks.push_back(block); + } + } + return content_blocks; + }; + auto add_message = [&](const json & msg) { if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) { - actual_messages.push_back({ - {"role", msg.at("role")}, - {"content", {{ - {"type", "text"}, - {"text", msg.at("content")}, - }}}, - }); + auto adjusted = msg; + adjusted["content"] = build_content_array(msg.at("content")); + actual_messages.push_back(adjusted); } else { actual_messages.push_back(msg); } @@ -470,6 +756,56 @@ class chat_template { message.erase("name"); } + // Polyfill reasoning_content to template's native format + if (polyfill_reasoning && message.contains("reasoning_content") && !message["reasoning_content"].is_null()) { + auto reasoning = message["reasoning_content"]; + switch (caps_.reasoning_format) { + case ReasoningFormat::THOUGHT_FIELD: + // MiniCPM3 style: message.thought + message["thought"] = reasoning; + break; + case ReasoningFormat::THINKING_FIELD: + // GPT-OSS-120B style: message.thinking + message["thinking"] = reasoning; + break; + case ReasoningFormat::TOOL_PLAN_FIELD: + // Command-R7B style: message.tool_plan (only with tool_calls) + if (message.contains("tool_calls")) { + message["tool_plan"] = reasoning; + } + break; + case ReasoningFormat::THINKING_CONTENT_BLOCK: + // Ministral style: content blocks with type="thinking" + { + json content_blocks = json::array(); + content_blocks.push_back({{"type", "thinking"}, {"thinking", reasoning}}); + if (message.contains("content") && !message["content"].is_null()) { + for (const auto & block : build_content_array(message["content"])) { + content_blocks.push_back(block); + } + } + message["content"] = content_blocks; + } + break; + case ReasoningFormat::THOUGHTS_CONTENT_BLOCK: + // Apertus style: content blocks with type="thoughts" + { + json content_blocks = json::array(); + content_blocks.push_back({{"type", "thoughts"}, {"text", reasoning}}); + if (message.contains("content") && !message["content"].is_null()) { + for (const auto & block : build_content_array(message["content"])) { + content_blocks.push_back(block); + } + } + message["content"] = content_blocks; + } + break; + default: + break; + } + message.erase("reasoning_content"); + } + if (!message["content"].is_null() && polyfill_system_role) { std::string content = message.at("content"); if (role == "system") { diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index 5ed0556..f8cd8f7 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -55,7 +55,7 @@ inline std::string normalize_newlines(const std::string & s) { } /* Values that behave roughly like in Python. */ -class Value : public std::enable_shared_from_this { +class Value { public: using CallableType = std::function &, ArgumentsValue &)>; using FilterType = std::function &, ArgumentsValue &)>; @@ -95,7 +95,7 @@ class Value : public std::enable_shared_from_this { } out << string_quote; } - void dump(std::ostringstream & out, int indent = -1, int level = 0, bool to_json = false) const { + void dump(std::ostringstream & out, int indent, int level, bool to_json, const std::string & item_sep, const std::string & key_sep) const { auto print_indent = [&](int level) { if (indent > 0) { out << "\n"; @@ -103,9 +103,11 @@ class Value : public std::enable_shared_from_this { } }; auto print_sub_sep = [&]() { - out << ','; - if (indent < 0) out << ' '; - else print_indent(level + 1); + if (indent < 0) out << item_sep; + else { + out << ','; + print_indent(level + 1); + } }; auto string_quote = to_json ? '"' : '\''; @@ -116,7 +118,7 @@ class Value : public std::enable_shared_from_this { print_indent(level + 1); for (size_t i = 0; i < array_->size(); ++i) { if (i) print_sub_sep(); - (*array_)[i].dump(out, indent, level + 1, to_json); + (*array_)[i].dump(out, indent, level + 1, to_json, item_sep, key_sep); } print_indent(level); out << "]"; @@ -130,8 +132,8 @@ class Value : public std::enable_shared_from_this { } else { out << string_quote << it->first.dump() << string_quote; } - out << ": "; - it->second.dump(out, indent, level + 1, to_json); + out << key_sep; + it->second.dump(out, indent, level + 1, to_json, item_sep, key_sep); } print_indent(level); out << "}"; @@ -158,12 +160,14 @@ class Value : public std::enable_shared_from_this { Value(const json & v) { if (v.is_object()) { auto object = std::make_shared(); + object->reserve(v.size()); for (auto it = v.begin(); it != v.end(); ++it) { - (*object)[it.key()] = it.value(); + object->emplace_back(it.key(), Value(it.value())); } object_ = std::move(object); } else if (v.is_array()) { auto array = std::make_shared(); + array->reserve(v.size()); for (const auto& item : v) { array->push_back(Value(item)); } @@ -445,9 +449,9 @@ class Value : public std::enable_shared_from_this { throw std::runtime_error("get not defined for this value type: " + dump()); } - std::string dump(int indent=-1, bool to_json=false) const { + std::string dump(int indent=-1, bool to_json=false, const std::string & item_sep = ", ", const std::string & key_sep = ": ") const { std::ostringstream out; - dump(out, indent, 0, to_json); + dump(out, indent, 0, to_json, item_sep, key_sep); return out.str(); } @@ -610,7 +614,7 @@ static std::string error_location_suffix(const std::string & source, size_t pos) return out.str(); } -class Context : public std::enable_shared_from_this { +class Context { protected: Value values_; std::shared_ptr parent_; @@ -850,12 +854,12 @@ struct LoopControlTemplateToken : public TemplateToken { struct CallTemplateToken : public TemplateToken { std::shared_ptr expr; - CallTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && e) + CallTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post, std::shared_ptr && e) : TemplateToken(Type::Call, loc, pre, post), expr(std::move(e)) {} }; struct EndCallTemplateToken : public TemplateToken { - EndCallTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) + EndCallTemplateToken(const Location & loc, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndCall, loc, pre, post) {} }; @@ -1060,11 +1064,18 @@ class MacroNode : public TemplateNode { } } } - void do_render(std::ostringstream &, const std::shared_ptr & macro_context) const override { + void do_render(std::ostringstream &, const std::shared_ptr & context) const override { if (!name) throw std::runtime_error("MacroNode.name is null"); if (!body) throw std::runtime_error("MacroNode.body is null"); - auto callable = Value::callable([this, macro_context](const std::shared_ptr & call_context, ArgumentsValue & args) { - auto execution_context = Context::make(Value::object(), macro_context); + + // Use init-capture to avoid dangling 'this' pointer and circular references + auto callable = Value::callable([weak_context = std::weak_ptr(context), + name = name, params = params, body = body, + named_param_positions = named_param_positions] + (const std::shared_ptr & call_context, ArgumentsValue & args) { + auto context_locked = weak_context.lock(); + if (!context_locked) throw std::runtime_error("Macro context no longer valid"); + auto execution_context = Context::make(Value::object(), context_locked); if (call_context->contains("caller")) { execution_context->set("caller", call_context->get("caller")); @@ -1075,7 +1086,7 @@ class MacroNode : public TemplateNode { auto & arg = args.args[i]; if (i >= params.size()) throw std::runtime_error("Too many positional arguments for macro " + name->get_name()); param_set[i] = true; - auto & param_name = params[i].first; + const auto & param_name = params[i].first; execution_context->set(param_name, arg); } for (auto & [arg_name, value] : args.kwargs) { @@ -1094,7 +1105,7 @@ class MacroNode : public TemplateNode { } return body->render(execution_context); }); - macro_context->set(name->get_name(), callable); + context->set(name->get_name(), callable); } }; @@ -1264,7 +1275,7 @@ class SubscriptExpr : public Expression { } return result; - } else if (target_value.is_array()) { + } else if (target_value.is_array()) { auto result = Value::array(); for (int64_t i = start; step > 0 ? i < end : i > end; i += step) { result.push_back(target_value.at(i)); @@ -1313,7 +1324,7 @@ static bool in(const Value & value, const Value & container) { return (((container.is_array() || container.is_object()) && container.contains(value)) || (value.is_string() && container.is_string() && container.to_str().find(value.to_str()) != std::string::npos)); -}; +} class BinaryOpExpr : public Expression { public: @@ -1458,6 +1469,7 @@ static std::vector split(const std::string & s, const std::string & static std::string capitalize(const std::string & s) { if (s.empty()) return s; auto result = s; + std::transform(result.begin(), result.end(), result.begin(), ::tolower); result[0] = std::toupper(result[0]); return result; } @@ -1640,13 +1652,17 @@ class CallNode : public TemplateNode { void do_render(std::ostringstream & out, const std::shared_ptr & context) const override { if (!expr) throw std::runtime_error("CallNode.expr is null"); if (!body) throw std::runtime_error("CallNode.body is null"); - - auto caller = Value::callable([this, context](const std::shared_ptr &, ArgumentsValue &) -> Value { - return Value(body->render(context)); + + // Use init-capture to avoid dangling 'this' pointer and circular references + auto caller = Value::callable([weak_context = std::weak_ptr(context), body=body] + (const std::shared_ptr &, ArgumentsValue &) -> Value { + auto context_locked = weak_context.lock(); + if (!context_locked) throw std::runtime_error("Caller context no longer valid"); + return Value(body->render(context_locked)); }); - + context->set("caller", caller); - + auto call_expr = dynamic_cast(expr.get()); if (!call_expr) { throw std::runtime_error("Invalid call block syntax - expected function call"); @@ -1657,7 +1673,7 @@ class CallNode : public TemplateNode { throw std::runtime_error("Call target must be callable: " + function.dump()); } ArgumentsValue args = call_expr->args.evaluate(context); - + Value result = function.call(context, args); out << result.to_str(); } @@ -2192,7 +2208,7 @@ class Parser { auto value = parseValue(); - while (it != end && consumeSpaces() && peekSymbols({ "[", "." })) { + while (it != end && consumeSpaces() && peekSymbols({ "[", ".", "(" })) { if (!consumeToken("[").empty()) { std::shared_ptr index; auto slice_loc = get_location(); @@ -2215,7 +2231,7 @@ class Parser { } } } - + if ((has_first_colon || has_second_colon)) { index = std::make_shared(slice_loc, std::move(start), std::move(end), std::move(step)); } else { @@ -2237,15 +2253,13 @@ class Parser { auto key = std::make_shared(identifier->location, Value(identifier->get_name())); value = std::make_shared(identifier->location, std::move(value), std::move(key)); } + } else if (peekSymbols({ "(" })) { + auto callParams = parseCallArgs(); + value = std::make_shared(get_location(), std::move(value), std::move(callParams)); } consumeSpaces(); } - if (peekSymbols({ "(" })) { - auto location = get_location(); - auto callParams = parseCallArgs(); - value = std::make_shared(location, std::move(value), std::move(callParams)); - } return value; } @@ -2589,8 +2603,8 @@ class Parser { auto text = text_token->text; if (post_space == SpaceHandling::Strip) { - static std::regex trailing_space_regex(R"(\s+$)"); - text = std::regex_replace(text, trailing_space_regex, ""); + auto pos = text.find_last_not_of(" \t\n\r\f\v"); + text.resize(pos == std::string::npos ? 0 : pos + 1); } else if (options.lstrip_blocks && it != end) { auto i = text.size(); while (i > 0 && (text[i - 1] == ' ' || text[i - 1] == '\t')) i--; @@ -2599,8 +2613,7 @@ class Parser { } } if (pre_space == SpaceHandling::Strip) { - static std::regex leading_space_regex(R"(^\s+)"); - text = std::regex_replace(text, leading_space_regex, ""); + text.erase(0, text.find_first_not_of(" \t\n\r\f\v")); } else if (options.trim_blocks && (it - 1) != begin && !dynamic_cast((*(it - 2)).get())) { if (!text.empty() && text[0] == '\n') { text.erase(0, 1); @@ -2725,8 +2738,17 @@ inline std::shared_ptr Context::builtins() { globals.set("raise_exception", simple_function("raise_exception", { "message" }, [](const std::shared_ptr &, Value & args) -> Value { throw std::runtime_error(args.at("message").get()); })); - globals.set("tojson", simple_function("tojson", { "value", "indent" }, [](const std::shared_ptr &, Value & args) { - return Value(args.at("value").dump(args.get("indent", -1), /* to_json= */ true)); + globals.set("tojson", simple_function("tojson", { "value", "indent", "ensure_ascii", "separators" }, [](const std::shared_ptr &, Value & args) { + std::string item_sep = ", "; + std::string key_sep = ": "; + if (args.contains("separators")) { + const auto & sep = args.at("separators"); + if (sep.is_array() && sep.size() == 2) { + item_sep = sep.at(0).get(); + key_sep = sep.at(1).get(); + } + } + return Value(args.at("value").dump(args.get("indent", -1), /* to_json= */ true, item_sep, key_sep)); })); globals.set("items", simple_function("items", { "object" }, [](const std::shared_ptr &, Value & args) { auto items = Value::array(); @@ -2741,6 +2763,12 @@ inline std::shared_ptr Context::builtins() { } return items; })); + globals.set("first", simple_function("first", { "items" }, [](const std::shared_ptr &, Value & args) { + auto items = args.at("items"); + if (!items.is_array()) throw std::runtime_error("object is not a list"); + if (items.empty()) return Value(); + return items.at(0); + })); globals.set("last", simple_function("last", { "items" }, [](const std::shared_ptr &, Value & args) { auto items = args.at("items"); if (!items.is_array()) throw std::runtime_error("object is not a list"); @@ -2751,6 +2779,10 @@ inline std::shared_ptr Context::builtins() { auto & text = args.at("text"); return text.is_null() ? text : Value(strip(text.get())); })); + globals.set("capitalize", simple_function("capitalize", { "text" }, [](const std::shared_ptr &, Value & args) { + auto & text = args.at("text"); + return text.is_null() ? text : Value(capitalize(text.get())); + })); auto char_transform_function = [](const std::string & name, const std::function & fn) { return simple_function(name, { "text" }, [=](const std::shared_ptr &, Value & args) { auto text = args.at("text"); diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py index acaf969..1280111 100644 --- a/scripts/fetch_templates_and_goldens.py +++ b/scripts/fetch_templates_and_goldens.py @@ -50,6 +50,8 @@ def strftime_now(format): now = datetime.datetime.strptime(TEST_DATE, "%Y-%m-%d") return now.strftime(format) +def tojson(value, indent=None, ensure_ascii=False, sort_keys=False, separators=None): + return json.dumps(value, indent=indent, ensure_ascii=ensure_ascii, sort_keys=sort_keys, separators=separators) def join_cmake_path(parent, child): ''' @@ -71,6 +73,17 @@ def add_system(messages, system_prompt): "content": system_prompt, }) +from enum import Enum + +class ReasoningFormat(Enum): + NONE = "NONE" + REASONING_CONTENT = "REASONING_CONTENT" # message.reasoning_content (Qwen3, GLM-4.6/4.7) - canonical format + CONTENT_BLOCK_THINKING = "CONTENT_BLOCK_THINKING" # content[].type == "thinking" (Ministral) + CONTENT_BLOCK_THOUGHTS = "CONTENT_BLOCK_THOUGHTS" # content[].type == "thoughts" (Apertus) + THOUGHT_FIELD = "THOUGHT_FIELD" # message.thought (MiniCPM3) + TOOL_PLAN_FIELD = "TOOL_PLAN_FIELD" # message.tool_plan (Command-R7B) + THINKING_FIELD = "THINKING_FIELD" # message.thinking (GPT-OSS-120B) + # data class @dataclass class TemplateCaps: @@ -82,7 +95,16 @@ class TemplateCaps: supports_tool_call_id: bool = False requires_object_arguments: bool = False requires_non_null_content: bool = False - requires_typed_content: bool = False + requires_typed_content_blocks: bool = False + # Reasoning capabilities (extended thinking / chain-of-thought) + supports_reasoning: bool = False + reasoning_format: ReasoningFormat = ReasoningFormat.NONE + reasoning_requires_tools: bool = False + # Reasoning behavior flags + supports_reasoning_without_content: bool = False + supports_reasoning_with_content: bool = False + respects_enable_reasoning: bool = False + supports_clear_thinking: bool = False def to_json(self): return json.dumps({ @@ -94,7 +116,7 @@ def to_json(self): "supports_tool_call_id": self.supports_tool_call_id, "requires_object_arguments": self.requires_object_arguments, # "requires_non_null_content": self.requires_non_null_content, - "requires_typed_content": self.requires_typed_content, + "requires_typed_content_blocks": self.requires_typed_content_blocks, }, indent=2) @@ -119,8 +141,11 @@ def __init__(self, template, env=None, filters=None, global_functions=None): env = jinja2.Environment( trim_blocks=True, lstrip_blocks=True, - extensions=[jinja2.ext.loopcontrols] + extensions=[jinja2.ext.loopcontrols], ) + # https://jinja.palletsprojects.com/en/stable/api/#policies + env.policies["json.dumps_function"] = tojson + env.filters['tojson'] = tojson if filters: for name, func in filters.items(): env.filters[name] = func @@ -137,12 +162,12 @@ def __init__(self, template, env=None, filters=None, global_functions=None): dummy_str_user_msg = {"role": "user", "content": user_needle } dummy_typed_user_msg = {"role": "user", "content": [{"type": "text", "text": user_needle}]} - caps.requires_typed_content = \ + caps.requires_typed_content_blocks = \ (user_needle not in self.try_raw_render([dummy_str_user_msg])) \ and (user_needle in self.try_raw_render([dummy_typed_user_msg])) - dummy_user_msg = dummy_typed_user_msg if caps.requires_typed_content else dummy_str_user_msg + dummy_user_msg = dummy_typed_user_msg if caps.requires_typed_content_blocks else dummy_str_user_msg - needle_system_msg = {"role": "system", "content": [{"type": "text", "text": sys_needle}] if caps.requires_typed_content else sys_needle} + needle_system_msg = {"role": "system", "content": [{"type": "text", "text": sys_needle}] if caps.requires_typed_content_blocks else sys_needle} caps.supports_system_role = sys_needle in self.try_raw_render([needle_system_msg, dummy_user_msg]) @@ -187,17 +212,23 @@ def make_tool_call(tool_name, arguments): } dummy_args_obj = {"argument_needle": "print('Hello, World!')"} + contains_arg_needle = lambda out_str: ( + "" in out_str + or '"argument_needle"' in out_str + or "'argument_needle':" in out_str + or ">argument_needle<" in out_str + ) out = self.try_raw_render([ dummy_user_msg, make_tool_calls_msg([make_tool_call("ipython", json.dumps(dummy_args_obj))]), ]) - tool_call_renders_str_arguments = "" in out or '"argument_needle":' in out or "'argument_needle':" in out + tool_call_renders_str_arguments = contains_arg_needle(out) out = self.try_raw_render([ dummy_user_msg, make_tool_calls_msg([make_tool_call("ipython", dummy_args_obj)]), ]) - tool_call_renders_obj_arguments = "" in out or '"argument_needle":' in out or "'argument_needle':" in out + tool_call_renders_obj_arguments = contains_arg_needle(out) caps.supports_tool_calls = tool_call_renders_str_arguments or tool_call_renders_obj_arguments caps.requires_object_arguments = not tool_call_renders_str_arguments and tool_call_renders_obj_arguments @@ -267,11 +298,197 @@ def make_tool_call(tool_name, arguments): except Exception as e: print(f"Failed to generate tool call example: {e}", file=sys.stderr) + # Detect thinking / reasoning capabilities + reasoning_needle = "" + + def make_assistant_msg(extra_fields, content=None): + msg = {"role": "assistant"} + msg.update(extra_fields) + if content is not None: + msg["content"] = content + elif caps.requires_non_null_content: + msg["content"] = "" + return msg + + # Pattern A: reasoning_content field (Qwen3, GLM-4.6/4.7) + out = self.try_raw_render([ + dummy_user_msg, + make_assistant_msg({"reasoning_content": reasoning_needle}), + ]) + supports_reasoning_content = reasoning_needle in out + + # Pattern D: thought field (MiniCPM3) + out = self.try_raw_render([ + dummy_user_msg, + make_assistant_msg({"thought": reasoning_needle}, "response"), + ]) + supports_thought_field = reasoning_needle in out + + # Pattern F: thinking field (GPT-OSS-120B style) + out = self.try_raw_render([ + dummy_user_msg, + make_assistant_msg({"thinking": reasoning_needle}, "response"), + ]) + supports_reasoning_field = reasoning_needle in out + + # Pattern B: content blocks with type="thinking" (Ministral) + # To detect stringification, we check if the output contains structural markers + # like '"type"' or "'type'" which would appear in serialized JSON/Python + content_block_thinking_msg = { + "role": "assistant", + "content": [ + {"type": "thinking", "thinking": reasoning_needle}, + {"type": "text", "text": "response"} + ] + } + out = self.try_raw_render([dummy_user_msg, content_block_thinking_msg]) + # Real support: needle appears but structural markers don't (template extracts content) + # Stringified: needle appears with structural markers (template just serializes the object) + supports_content_block_thinking = reasoning_needle in out \ + and '"type"' not in out and "'type'" not in out + + # Pattern C: content blocks with type="thoughts" (Apertus) + content_block_thoughts_msg = { + "role": "assistant", + "content": [ + {"type": "thoughts", "text": reasoning_needle}, + {"type": "text", "text": "response"} + ] + } + out = self.try_raw_render([dummy_user_msg, content_block_thoughts_msg]) + supports_content_block_thoughts = reasoning_needle in out \ + and '"type"' not in out and "'type'" not in out + + # Pattern E: tool_plan field (Command-R7B) - requires tool_calls + supports_tool_plan_field = False + if caps.supports_tool_calls: + dummy_args = dummy_args_obj if caps.requires_object_arguments else json.dumps(dummy_args_obj) + tool_plan_msg = { + "role": "assistant", + "content": "" if caps.requires_non_null_content else None, + "tool_plan": reasoning_needle, + "tool_calls": [make_tool_call("test_tool", dummy_args)], + } + out = self.try_raw_render([ + dummy_user_msg, + tool_plan_msg, + ]) + supports_tool_plan_field = reasoning_needle in out + + # Determine the primary reasoning format (in priority order) + # Field-based patterns are checked first as they are more specific + # Content block patterns are checked last as many templates just stringify unknown content + if supports_reasoning_content: + caps.supports_reasoning = True + caps.reasoning_format = ReasoningFormat.REASONING_CONTENT + elif supports_thought_field: + caps.supports_reasoning = True + caps.reasoning_format = ReasoningFormat.THOUGHT_FIELD + elif supports_reasoning_field: + caps.supports_reasoning = True + caps.reasoning_format = ReasoningFormat.THINKING_FIELD + elif supports_tool_plan_field: + caps.supports_reasoning = True + caps.reasoning_format = ReasoningFormat.TOOL_PLAN_FIELD + caps.reasoning_requires_tools = True + elif supports_content_block_thinking: + caps.supports_reasoning = True + caps.reasoning_format = ReasoningFormat.CONTENT_BLOCK_THINKING + elif supports_content_block_thoughts: + caps.supports_reasoning = True + caps.reasoning_format = ReasoningFormat.CONTENT_BLOCK_THOUGHTS + + # Test clear_thinking support (GLM-4.7 pattern) + if caps.reasoning_format == ReasoningFormat.REASONING_CONTENT: + first_reasoning = "" + second_reasoning = "" + out = self.try_raw_render([ + dummy_user_msg, + make_assistant_msg({"reasoning_content": first_reasoning}, "first"), + dummy_user_msg, + make_assistant_msg({"reasoning_content": second_reasoning}, "second"), + ], extra_context={"clear_thinking": False}) + caps.supports_clear_thinking = first_reasoning in out and second_reasoning in out + + # Test reasoning behavior flags for templates that support reasoning + if caps.supports_reasoning: + reasoning_test = "" + content_test = "" + + # Helper to create assistant message with reasoning in the template's native format + def make_reasoning_msg(reasoning: str, content: str) -> dict: + fmt = caps.reasoning_format + if fmt == ReasoningFormat.REASONING_CONTENT: + return {"role": "assistant", "reasoning_content": reasoning, "content": content} + elif fmt == ReasoningFormat.THOUGHT_FIELD: + return {"role": "assistant", "thought": reasoning, "content": content} + elif fmt == ReasoningFormat.THINKING_FIELD: + return {"role": "assistant", "thinking": reasoning, "content": content} + elif fmt == ReasoningFormat.TOOL_PLAN_FIELD: + dummy_args = dummy_args_obj if caps.requires_object_arguments else json.dumps(dummy_args_obj) + return { + "role": "assistant", + "content": "" if caps.requires_non_null_content else None, + "tool_plan": reasoning, + "tool_calls": [make_tool_call("test_tool", dummy_args)] + } + elif fmt == ReasoningFormat.CONTENT_BLOCK_THINKING: + return { + "role": "assistant", + "content": [ + {"type": "thinking", "thinking": reasoning}, + {"type": "text", "text": content} + ] + } + elif fmt == ReasoningFormat.CONTENT_BLOCK_THOUGHTS: + return { + "role": "assistant", + "content": [ + {"type": "thoughts", "text": reasoning}, + {"type": "text", "text": content} + ] + } + return {"role": "assistant", "content": content} + + # Test supports_reasoning_without_content: can template emit reasoning with empty content? + # Skip for TOOL_PLAN_FIELD since it requires tool_calls which have different semantics + if caps.reasoning_format != ReasoningFormat.TOOL_PLAN_FIELD: + out = self.try_raw_render([dummy_user_msg, make_reasoning_msg(reasoning_test, "")]) + caps.supports_reasoning_without_content = reasoning_test in out + + # Test supports_reasoning_with_content: can template emit both reasoning and content together? + # Skip for TOOL_PLAN_FIELD since tool calls don't have regular content + if caps.reasoning_format != ReasoningFormat.TOOL_PLAN_FIELD: + out = self.try_raw_render([dummy_user_msg, make_reasoning_msg(reasoning_test, content_test)]) + caps.supports_reasoning_with_content = reasoning_test in out and content_test in out + + # Test respects_enable_reasoning: does template honor enable_thinking=false? + # Only test for REASONING_CONTENT format where this flag is commonly used (Qwen3) + if caps.reasoning_format == ReasoningFormat.REASONING_CONTENT: + out = self.try_raw_render( + [dummy_user_msg, make_reasoning_msg(reasoning_test, content_test)], + extra_context={"enable_thinking": False} + ) + # If reasoning disappears but content remains when enable_thinking=false, template respects it + caps.respects_enable_reasoning = reasoning_test not in out and content_test in out + self.original_caps = caps def needs_polyfills(self, context): has_tools = context.get('tools') is not None caps = self.original_caps + + # Check if any message has reasoning_content that needs polyfilling + has_reasoning_content = any( + msg.get('reasoning_content') is not None + for msg in context.get('messages', []) + ) + # Polyfill reasoning_content to template's native format when template supports + # a different reasoning format than REASONING_CONTENT (the canonical format) + needs_reasoning_polyfill = has_reasoning_content \ + and caps.reasoning_format != ReasoningFormat.NONE \ + and caps.reasoning_format != ReasoningFormat.REASONING_CONTENT + return not caps.supports_system_role \ or (has_tools is not None and (False \ or not caps.supports_tools \ @@ -279,7 +496,8 @@ def needs_polyfills(self, context): or not caps.supports_tool_calls \ or caps.requires_object_arguments \ )) \ - or caps.requires_typed_content + or caps.requires_typed_content_blocks \ + or needs_reasoning_polyfill def apply(self, context: dict): assert isinstance(context, dict) @@ -329,7 +547,50 @@ def apply(self, context: dict): }, indent=2) del message['name'] - if caps.requires_typed_content: + # Polyfill reasoning_content to template's native format + should_polyfill_reasoning = caps.reasoning_format not in ( + ReasoningFormat.NONE, + ReasoningFormat.REASONING_CONTENT, + ) + if should_polyfill_reasoning and 'reasoning_content' in message and message['reasoning_content'] is not None: + reasoning = message['reasoning_content'] + if caps.reasoning_format == ReasoningFormat.THOUGHT_FIELD: + # MiniCPM3 style: message.thought + message['thought'] = reasoning + del message['reasoning_content'] + elif caps.reasoning_format == ReasoningFormat.THINKING_FIELD: + # GPT-OSS-120B style: message.thinking + message['thinking'] = reasoning + del message['reasoning_content'] + elif caps.reasoning_format == ReasoningFormat.TOOL_PLAN_FIELD: + # Command-R7B style: message.tool_plan (only with tool_calls) + if 'tool_calls' in message: + message['tool_plan'] = reasoning + del message['reasoning_content'] + elif caps.reasoning_format == ReasoningFormat.CONTENT_BLOCK_THINKING: + # Ministral style: content blocks with type="thinking" + content_blocks = [{"type": "thinking", "thinking": reasoning}] + original_content = message.get('content') + if original_content is not None: + if isinstance(original_content, str): + content_blocks.append({"type": "text", "text": original_content}) + elif isinstance(original_content, list): + content_blocks.extend(original_content) + message['content'] = content_blocks + del message['reasoning_content'] + elif caps.reasoning_format == ReasoningFormat.CONTENT_BLOCK_THOUGHTS: + # Apertus style: content blocks with type="thoughts" + content_blocks = [{"type": "thoughts", "text": reasoning}] + original_content = message.get('content') + if original_content is not None: + if isinstance(original_content, str): + content_blocks.append({"type": "text", "text": original_content}) + elif isinstance(original_content, list): + content_blocks.extend(original_content) + message['content'] = content_blocks + del message['reasoning_content'] + + if caps.requires_typed_content_blocks: for message in context['messages']: if 'content' in message and isinstance(message['content'], str): message['content'] = [{"type": "text", "text": message['content']}] @@ -369,7 +630,7 @@ async def handle_chat_template(output_folder, model_id, variant, template_src, c caps_file = join_cmake_path(output_folder, f'{base_name}.caps.json') - async with aiofiles.open(template_file, 'w') as f: + async with aiofiles.open(template_file, 'w', encoding='utf-8', newline='\n') as f: await f.write(template_src) template = chat_template(template_src, @@ -386,7 +647,7 @@ async def handle_chat_template(output_folder, model_id, variant, template_src, c print(f"{template_file} {caps_file} n/a {template_file}") return - async with aiofiles.open(caps_file, 'w') as f: + async with aiofiles.open(caps_file, 'w', encoding='utf-8', newline='\n') as f: await f.write(caps.to_json()) assert isinstance(contexts, list) @@ -404,7 +665,7 @@ async def handle_chat_template(output_folder, model_id, variant, template_src, c output_file = join_cmake_path(output_folder, f'{base_name}-{context.name}.txt') output = template.apply(context.bindings) - async with aiofiles.open(output_file, 'w') as f: + async with aiofiles.open(output_file, 'w', encoding='utf-8', newline='\n') as f: await f.write(output) print(f"{template_file} {caps_file} {context.file} {output_file}") @@ -420,6 +681,16 @@ async def async_hf_download(repo_id: str, filename: str) -> str: async def process_model(output_folder: str, model_id: str, contexts: list[Context]): try: print(f"Processing model {model_id}...", file=sys.stderr) + + # Handle local .jinja files directly (for synthetic test templates) + if model_id.endswith('.jinja') and os.path.isfile(model_id): + async with aiofiles.open(model_id, 'r', encoding='utf-8') as f: + chat_template = await f.read() + # Use filename without extension as model_id for output naming + synthetic_id = os.path.basename(model_id).replace('.jinja', '') + await handle_chat_template(output_folder, synthetic_id, None, chat_template, contexts) + return + config_str = await async_hf_download(model_id, "tokenizer_config.json") try: @@ -465,7 +736,7 @@ async def main(): model_ids = [] for file in args.json_context_files_or_model_ids: if file.endswith('.json'): - async with aiofiles.open(file, 'r') as f: + async with aiofiles.open(file, 'r', encoding='utf-8') as f: contexts.append(Context( name=os.path.basename(file).replace(".json", ""), file=file, diff --git a/scripts/render.py b/scripts/render.py index 0de5d45..68acba4 100644 --- a/scripts/render.py +++ b/scripts/render.py @@ -11,11 +11,15 @@ import jinja2.ext from pathlib import Path +def tojson(value, indent=None, ensure_ascii=False, sort_keys=False, separators=None): + return json.dumps(value, indent=indent, ensure_ascii=ensure_ascii, sort_keys=sort_keys, separators=separators) + input_file, output_file = sys.argv[1:3] data = json.loads(Path(input_file).read_text()) # print(json.dumps(data, indent=2), file=sys.stderr) env = Environment(**data['options'], extensions=[jinja2.ext.loopcontrols]) +env.filters['tojson'] = tojson tmpl = env.from_string(data['template']) output = tmpl.render(data['bindings']) Path(output_file).write_text(output) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index db82c2d..27eba25 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -40,6 +40,10 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Windows" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ar target_compile_definitions(test-polyfills PUBLIC _CRT_SECURE_NO_WARNINGS) target_compile_options(gtest PRIVATE -Wno-language-extension-token) endif() +# GCC/MinGW on Windows needs -Wa,-mbig-obj for large debug builds due to COFF section limits +if (MINGW AND CMAKE_BUILD_TYPE STREQUAL "Debug") + target_compile_options(test-polyfills PRIVATE -Wa,-mbig-obj) +endif() target_link_libraries(test-polyfills PRIVATE minja gtest_main @@ -91,239 +95,113 @@ set(MODEL_IDS # # For Gated models, you'll need to run `huggingface-cli login` (and be granted access) to download their template. + BEE-spoke-data/tFINE-900m-instruct-orpo + CohereForAI/aya-expanse-8b + CohereForAI/c4ai-command-r-plus + CohereForAI/c4ai-command-r7b-12-2024 + Delta-Vector/Rei-12B + HelpingAI/HAI-SER + HuggingFaceTB/SmolLM2-1.7B-Instruct + HuggingFaceTB/SmolLM3-3B + Infinigence/Megrez-3B-Instruct + LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct + MiniMaxAI/MiniMax-Text-01 + MiniMaxAI/MiniMax-VL-01 + NousResearch/Hermes-3-Llama-3.1-70B + OnlyCheeini/greesychat-turbo + OrionStarAI/Orion-14B-Chat + PowerInfer/SmallThinker-3B-Preview + PrimeIntellect/INTELLECT-1-Instruct + Qwen/QVQ-72B-Preview + Qwen/QwQ-32B + Qwen/QwQ-32B-Preview + Qwen/Qwen1.5-7B-Chat + Qwen/Qwen2-VL-7B-Instruct + Qwen/Qwen2.5-7B + Qwen/Qwen2.5-7B-Instruct + Qwen/Qwen2.5-Math-7B-Instruct + Qwen/Qwen3-235B-A22B-Instruct-2507 + Qwen/Qwen3-235B-A22B-Thinking-2507 + Qwen/Qwen3-4B + Qwen/Qwen3-Coder-30B-A3B-Instruct + SakanaAI/TinySwallow-1.5B-Instruct + THUDM/glm-4-9b-chat + THUDM/glm-edge-1.5b-chat + TheBloke/FusionNet_34Bx2_MoE-AWQ + TinyLlama/TinyLlama-1.1B-Chat-v1.0 + UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3 abacusai/Fewshot-Metamath-OrcaVicuna-Mistral - allenai/Llama-3.1-Tulu-3-405B - allenai/Llama-3.1-Tulu-3-405B-SFT allenai/Llama-3.1-Tulu-3-8B - arcee-ai/Virtuoso-Lite arcee-ai/Virtuoso-Medium-v2 - arcee-ai/Virtuoso-Small-v2 - AtlaAI/Selene-1-Mini-Llama-3.1-8B avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI - BEE-spoke-data/tFINE-900m-instruct-orpo - bespokelabs/Bespoke-Stratos-7B bfuzzy1/acheron-m1a-llama bofenghuang/vigogne-2-70b-chat bytedance-research/UI-TARS-72B-DPO - bytedance-research/UI-TARS-7B-DPO - bytedance-research/UI-TARS-7B-SFT carsenk/phi3.5_mini_exp_825_uncensored - CohereForAI/aya-expanse-8b - CohereForAI/c4ai-command-r-plus - CohereForAI/c4ai-command-r7b-12-2024 - cyberagent/DeepSeek-R1-Distill-Qwen-14B-Japanese - cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese databricks/dbrx-instruct - DavieLion/Llama-3.2-1B-SPIN-iter3 - deepseek-ai/deepseek-coder-33b-instruct - deepseek-ai/deepseek-coder-6.7b-instruct - deepseek-ai/deepseek-coder-7b-instruct-v1.5 - deepseek-ai/DeepSeek-Coder-V2-Instruct - deepseek-ai/DeepSeek-Coder-V2-Lite-Base - deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct - deepseek-ai/deepseek-llm-67b-chat - deepseek-ai/deepseek-llm-7b-chat deepseek-ai/DeepSeek-R1-Distill-Llama-70B - deepseek-ai/DeepSeek-R1-Distill-Llama-8B - deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B - deepseek-ai/DeepSeek-R1-Distill-Qwen-14B - deepseek-ai/DeepSeek-R1-Distill-Qwen-32B - deepseek-ai/DeepSeek-R1-Distill-Qwen-7B deepseek-ai/DeepSeek-V2-Lite deepseek-ai/DeepSeek-V2.5 deepseek-ai/DeepSeek-V3 - Delta-Vector/Rei-12B + deepseek-ai/DeepSeek-V3.1 + # deepseek-ai/DeepSeek-V3.2 # No Jinja template; see synthetic below + deepseek-ai/deepseek-coder-7b-instruct-v1.5 dicta-il/dictalm2.0-instruct ehristoforu/Falcon3-8B-Franken-Basestruct - EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math - FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit - FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit - godlikehhd/alpaca_data_sampled_ifd_new_5200 - godlikehhd/alpaca_data_score_max_0.7_2600 - google/gemma-2-27b-it - google/gemma-2-2b-it - google/gemma-2-2b-jpn-it google/gemma-7b-it - HelpingAI/HAI-SER - HuggingFaceTB/SmolLM2-1.7B-Instruct - HuggingFaceTB/SmolLM2-135M-Instruct - HuggingFaceTB/SmolLM2-360M-Instruct - HuggingFaceTB/SmolLM3-3B - huihui-ai/DeepSeek-R1-Distill-Llama-70B-abliterated - huihui-ai/DeepSeek-R1-Distill-Llama-8B-abliterated - huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2 - huihui-ai/DeepSeek-R1-Distill-Qwen-32B-abliterated - huihui-ai/DeepSeek-R1-Distill-Qwen-7B-abliterated-v2 - huihui-ai/Qwen2.5-14B-Instruct-1M-abliterated ibm-granite/granite-3.1-8b-instruct - Ihor/Text2Graph-R1-Qwen2.5-0.5b + ibm-granite/granite-3.3-2b-instruct inclusionAI/Ling-Coder-lite indischepartij/MiniCPM-3B-OpenHermes-2.5-v2 - Infinigence/Megrez-3B-Instruct - inflatebot/MN-12B-Mag-Mell-R1 - INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0 jinaai/ReaderLM-v2 - Josephgflowers/TinyLlama_v1.1_math_code-world-test-1 - kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath - knifeayumu/Cydonia-v1.3-Magnum-v4-22B langgptai/qwen1.5-7b-chat-sa-v0.1 - LatitudeGames/Wayfarer-12B llava-hf/llava-1.5-7b-hf - LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct - LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct - lightblue/DeepSeek-R1-Distill-Qwen-7B-Japanese - Magpie-Align/Llama-3-8B-Magpie-Align-v0.1 - Magpie-Align/Llama-3.1-8B-Magpie-Align-v0.1 - mattshumer/Reflection-Llama-3.1-70B - MaziyarPanahi/calme-3.2-instruct-78b meetkai/functionary-medium-v3.1 meetkai/functionary-medium-v3.2 + moonshotai/Kimi-K2-Instruct meta-llama/Llama-2-7b-chat-hf meta-llama/Llama-3.1-8B-Instruct - meta-llama/Llama-3.2-1B-Instruct meta-llama/Llama-3.2-3B-Instruct - meta-llama/Llama-3.3-70B-Instruct meta-llama/Meta-Llama-3-8B-Instruct - meta-llama/Meta-Llama-3.1-8B-Instruct microsoft/Phi-3-medium-4k-instruct microsoft/Phi-3-mini-4k-instruct microsoft/Phi-3-small-8k-instruct microsoft/Phi-3.5-mini-instruct microsoft/Phi-3.5-vision-instruct microsoft/phi-4 - migtissera/Tess-3-Mistral-Nemo-12B - MiniMaxAI/MiniMax-Text-01 - MiniMaxAI/MiniMax-VL-01 ministral/Ministral-3b-instruct mistralai/Codestral-22B-v0.1 mistralai/Mistral-7B-Instruct-v0.1 - mistralai/Mistral-7B-Instruct-v0.2 mistralai/Mistral-7B-Instruct-v0.3 - mistralai/Mistral-Large-Instruct-2407 mistralai/Mistral-Large-Instruct-2411 mistralai/Mistral-Nemo-Instruct-2407 mistralai/Mistral-Small-24B-Instruct-2501 - mistralai/Mixtral-8x7B-Instruct-v0.1 mkurman/Qwen2.5-14B-DeepSeek-R1-1M mlabonne/AlphaMonarch-7B mlx-community/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32 - mlx-community/Qwen2.5-VL-7B-Instruct-8bit - mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-1.5B-v1.1 - NaniDAO/deepseek-r1-qwen-2.5-32B-ablated netcat420/MFANNv0.20 - netcat420/MFANNv0.24 - netease-youdao/Confucius-o1-14B - NexaAIDev/Octopus-v2 - NousResearch/Hermes-2-Pro-Llama-3-8B - NousResearch/Hermes-2-Pro-Mistral-7B - NousResearch/Hermes-3-Llama-3.1-70B - NovaSky-AI/Sky-T1-32B-Flash - NovaSky-AI/Sky-T1-32B-Preview - nvidia/AceMath-7B-RM - nvidia/Eagle2-1B nvidia/Eagle2-9B nvidia/Llama-3.1-Nemotron-70B-Instruct-HF - OnlyCheeini/greesychat-turbo onnx-community/DeepSeek-R1-Distill-Qwen-1.5B-ONNX - open-thoughts/OpenThinker-7B openbmb/MiniCPM3-4B openchat/openchat-3.5-0106 - Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2 - OrionStarAI/Orion-14B-Chat - pankajmathur/orca_mini_v6_8b - PowerInfer/SmallThinker-3B-Preview - PrimeIntellect/INTELLECT-1-Instruct - princeton-nlp/Mistral-7B-Base-SFT-RDPO princeton-nlp/Mistral-7B-Instruct-DPO - princeton-nlp/Mistral-7B-Instruct-RDPO prithivMLmods/Bellatrix-Tiny-1.5B-R1 prithivMLmods/Bellatrix-Tiny-1B-R1 prithivMLmods/Bellatrix-Tiny-1B-v3 - prithivMLmods/Bellatrix-Tiny-3B-R1 - prithivMLmods/Blaze-14B-xElite - prithivMLmods/Calcium-Opus-14B-Elite2-R1 - prithivMLmods/Calme-Ties-78B - prithivMLmods/Calme-Ties2-78B - prithivMLmods/Calme-Ties3-78B - prithivMLmods/ChemQwen2-vL - prithivMLmods/GWQ2b - prithivMLmods/LatexMind-2B-Codec - prithivMLmods/Llama-3.2-6B-AlgoCode - prithivMLmods/Megatron-Opus-14B-Exp - prithivMLmods/Megatron-Opus-14B-Stock - prithivMLmods/Megatron-Opus-7B-Exp - prithivMLmods/Omni-Reasoner-Merged - prithivMLmods/Omni-Reasoner4-Merged - prithivMLmods/Primal-Opus-14B-Optimus-v1 - prithivMLmods/Qwen-7B-Distill-Reasoner - prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct - prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M - prithivMLmods/Qwen2.5-32B-DeepSeek-R1-Instruct - prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M - prithivMLmods/QwQ-Math-IO-500M - prithivMLmods/Triangulum-v2-10B - Qwen/QVQ-72B-Preview - Qwen/Qwen1.5-7B-Chat - Qwen/Qwen2-7B-Instruct - Qwen/Qwen2-VL-72B-Instruct - Qwen/Qwen2-VL-7B-Instruct - Qwen/Qwen2.5-0.5B - Qwen/Qwen2.5-1.5B-Instruct - Qwen/Qwen2.5-14B - Qwen/Qwen2.5-14B-Instruct-1M - Qwen/Qwen2.5-32B - Qwen/Qwen2.5-32B-Instruct - Qwen/Qwen2.5-3B-Instruct - Qwen/Qwen2.5-72B-Instruct - Qwen/Qwen2.5-7B - Qwen/Qwen2.5-7B-Instruct - Qwen/Qwen2.5-7B-Instruct-1M - Qwen/Qwen2.5-Coder-32B-Instruct - Qwen/Qwen2.5-Coder-7B-Instruct - Qwen/Qwen2.5-Math-1.5B - Qwen/Qwen2.5-Math-7B-Instruct - Qwen/Qwen2.5-VL-3B-Instruct - Qwen/Qwen2.5-VL-72B-Instruct - Qwen/Qwen2.5-VL-7B-Instruct - Qwen/QwQ-32B-Preview rubenroy/Zurich-14B-GCv2-5m rubenroy/Zurich-7B-GCv2-5m - RWKV-Red-Team/ARWKV-7B-Preview-0.1 - SakanaAI/TinySwallow-1.5B - SakanaAI/TinySwallow-1.5B-Instruct - Sao10K/70B-L3.3-Cirrus-x1 - SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B - SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B - silma-ai/SILMA-Kashif-2B-Instruct-v1.0 - simplescaling/s1-32B sometimesanotion/Lamarck-14B-v0.7 - sonthenguyen/zephyr-sft-bnb-4bit-DPO-mtbr-180steps - Steelskull/L3.3-Damascus-R1 - Steelskull/L3.3-MS-Nevoria-70b - Steelskull/L3.3-Nevoria-R1-70b sthenno/tempesthenno-icy-0130 - sumink/qwft - Tarek07/Progenitor-V1.1-LLaMa-70B teknium/OpenHermes-2.5-Mistral-7B - TheBloke/FusionNet_34Bx2_MoE-AWQ - thirdeyeai/elevate360m - THUDM/glm-4-9b-chat - THUDM/glm-edge-1.5b-chat tiiuae/Falcon3-10B-Instruct - TinyLlama/TinyLlama-1.1B-Chat-v1.0 - UCLA-AGI/Mistral7B-PairRM-SPPO-Iter3 - unsloth/DeepSeek-R1-Distill-Llama-8B - unsloth/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit - unsloth/Mistral-Small-24B-Instruct-2501-unsloth-bnb-4bit upstage/solar-pro-preview-instruct - ValiantLabs/Llama3.1-8B-Enigma - xwen-team/Xwen-72B-Chat xwen-team/Xwen-7B-Chat - Qwen/Qwen3-4B - Qwen/Qwen3-235B-A22B-Instruct-2507 - Qwen/Qwen3-235B-A22B-Thinking-2507 - Qwen/Qwen3-Coder-30B-A3B-Instruct - Qwen/QwQ-32B + zai-org/GLM-4.6 + zai-org/GLM-4.7 + + # Synthetic templates for models without Jinja templates + ${CMAKE_CURRENT_SOURCE_DIR}/synthetic-deepseek-v3.2-dsml.jinja # Broken, TODO: # ai21labs/AI21-Jamba-1.5-Large # https://github.com/google/minja/issues/8 @@ -334,6 +212,7 @@ set(MODEL_IDS # HuggingFaceTB/SmolVLM-256M-Instruct # HuggingFaceTB/SmolVLM-500M-Instruct # HuggingFaceTB/SmolVLM-Instruct + # unsloth/MiniMax-M2 # https://github.com/ochafik/minja/pull/7#issuecomment-3478459580 # meta-llama/Llama-3.2-11B-Vision-Instruct # unsloth/DeepSeek-R1 ) @@ -371,6 +250,13 @@ foreach(test_case ${CHAT_TEMPLATE_TEST_CASES}) set_tests_properties(test-supported-template-${test_name} PROPERTIES SKIP_RETURN_CODE 127) endforeach() +# Test to ensure no duplicate templates exist +add_test( + NAME test-no-duplicate-templates + COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_no_duplicate_templates.py ${CMAKE_CURRENT_BINARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} +) + if (MINJA_FUZZTEST_ENABLED) if (MINJA_FUZZTEST_FUZZING_MODE) message(STATUS "Fuzzing mode enabled") diff --git a/tests/contexts/reasoning_clear_thinking.json b/tests/contexts/reasoning_clear_thinking.json new file mode 100644 index 0000000..00b9c2b --- /dev/null +++ b/tests/contexts/reasoning_clear_thinking.json @@ -0,0 +1,31 @@ +{ + "messages": [ + { + "role": "user", + "content": "What is 2+2?" + }, + { + "role": "assistant", + "reasoning_content": "Simple \"arithmetic\": 2+2=4", + "content": "It's \"4\"." + }, + { + "role": "user", + "content": "And 3+3?" + }, + { + "role": "assistant", + "reasoning_content": "Similarly: 3+3=6", + "content": "6" + } + ], + "add_generation_prompt": true, + "clear_thinking": false, + "bos_token": "<|startoftext|>", + "eos_token": "<|endoftext|>", + "_test_metadata": { + "_comment": "clear_thinking=false should show ALL reasoning. Quote in reasoning_content and content tests non-stringification.", + "expected_strings_if_supports_reasoning": ["Simple \"arithmetic\": 2+2=4", "Similarly: 3+3=6"], + "forbidden_strings": ["\"reasoning_content\"", "\\\"arithmetic\\\"", "\\\"4\\\""] + } +} diff --git a/tests/contexts/reasoning_disabled.json b/tests/contexts/reasoning_disabled.json new file mode 100644 index 0000000..05e89fd --- /dev/null +++ b/tests/contexts/reasoning_disabled.json @@ -0,0 +1,21 @@ +{ + "messages": [ + { + "role": "user", + "content": "Quick answer: what is 2+2?" + }, + { + "role": "assistant", + "content": "It's \"4\"." + } + ], + "add_generation_prompt": true, + "enable_thinking": false, + "bos_token": "<|startoftext|>", + "eos_token": "<|endoftext|>", + "_test_metadata": { + "_comment": "enable_thinking=false disables thinking mode. Quote in content tests non-stringification.", + "expected_strings": ["Quick answer: what is 2+2?", "It's \"4\"."], + "forbidden_strings": ["\"reasoning_content\"", "\\\"4\\\""] + } +} diff --git a/tests/contexts/reasoning_multi_turn.json b/tests/contexts/reasoning_multi_turn.json new file mode 100644 index 0000000..6c98ae5 --- /dev/null +++ b/tests/contexts/reasoning_multi_turn.json @@ -0,0 +1,39 @@ +{ + "messages": [ + { + "role": "user", + "content": "Let's solve a puzzle step by step" + }, + { + "role": "assistant", + "reasoning_content": "This is a multi-step problem. Let me break it down.", + "content": "Sure, let's work through it together." + }, + { + "role": "user", + "content": "First clue: the number is even" + }, + { + "role": "assistant", + "reasoning_content": "An even number... that narrows it to 2, 4, 6, 8...", + "content": "Noted. What's the next clue?" + }, + { + "role": "user", + "content": "It's less than 5" + }, + { + "role": "assistant", + "reasoning_content": "Even and less than 5 means it's \"either\" 2 or 4.", + "content": "The number must be \"2 or 4\"!" + } + ], + "add_generation_prompt": true, + "bos_token": "<|startoftext|>", + "eos_token": "<|endoftext|>", + "_test_metadata": { + "_comment": "Multi-turn reasoning. Quote in reasoning_content and content tests non-stringification.", + "expected_strings_if_supports_reasoning": ["Even and less than 5 means it's \"either\" 2 or 4.", "The number must be \"2 or 4\"!"], + "forbidden_strings": ["\"reasoning_content\"", "\\\"either\\\"", "\\\"2 or 4\\\""] + } +} diff --git a/tests/contexts/reasoning_only.json b/tests/contexts/reasoning_only.json new file mode 100644 index 0000000..60d3fdb --- /dev/null +++ b/tests/contexts/reasoning_only.json @@ -0,0 +1,21 @@ +{ + "messages": [ + { + "role": "user", + "content": "What is 2+2?" + }, + { + "role": "assistant", + "reasoning_content": "Let me calculate: 2+2 equals 4.", + "content": "The answer is \"four\"." + } + ], + "add_generation_prompt": true, + "bos_token": "<|startoftext|>", + "eos_token": "<|endoftext|>", + "_test_metadata": { + "_comment": "For templates with supports_reasoning=true, reasoning should appear in output. Quote in content tests for non-stringification.", + "expected_strings_if_supports_reasoning": ["Let me calculate: 2+2 equals 4.", "The answer is \"four\"."], + "forbidden_strings": ["\"reasoning_content\"", "\\\"four\\\""] + } +} diff --git a/tests/contexts/reasoning_position_based.json b/tests/contexts/reasoning_position_based.json new file mode 100644 index 0000000..e8f89d7 --- /dev/null +++ b/tests/contexts/reasoning_position_based.json @@ -0,0 +1,30 @@ +{ + "messages": [ + { + "role": "user", + "content": "What is 2+2?" + }, + { + "role": "assistant", + "reasoning_content": "Simple arithmetic: 2+2=4", + "content": "4" + }, + { + "role": "user", + "content": "And 3+3?" + }, + { + "role": "assistant", + "reasoning_content": "Similarly: \"3+3\"=6", + "content": "It's \"6\"." + } + ], + "add_generation_prompt": true, + "bos_token": "<|startoftext|>", + "eos_token": "<|endoftext|>", + "_test_metadata": { + "_comment": "Position-based: only last reasoning shown. Quote in reasoning_content and content tests non-stringification.", + "expected_strings_if_supports_reasoning": ["Similarly: \"3+3\"=6", "It's \"6\"."], + "forbidden_strings": ["\"reasoning_content\"", "\\\"3+3\\\"", "\\\"6\\\""] + } +} diff --git a/tests/contexts/reasoning_with_tools.json b/tests/contexts/reasoning_with_tools.json new file mode 100644 index 0000000..4d5b336 --- /dev/null +++ b/tests/contexts/reasoning_with_tools.json @@ -0,0 +1,61 @@ +{ + "messages": [ + { + "role": "user", + "content": "Calculate 15% tip on $50" + }, + { + "role": "assistant", + "reasoning_content": "I need to calculate \"15%\" of $50. Let me use the calculator tool.", + "content": "", + "tool_calls": [ + { + "id": "call_1___", + "type": "function", + "function": { + "name": "calculator", + "arguments": "{\"expression\": \"50 * 0.15\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_1___", + "name": "calculator", + "content": "7.5" + }, + { + "role": "assistant", + "reasoning_content": "The calculation returned 7.5, so the tip is $7.50.", + "content": "A 15% tip on $50 is \"$7.50\"." + } + ], + "add_generation_prompt": true, + "bos_token": "<|startoftext|>", + "eos_token": "<|endoftext|>", + "tools": [ + { + "type": "function", + "function": { + "name": "calculator", + "description": "Evaluate a mathematical expression", + "parameters": { + "type": "object", + "properties": { + "expression": { + "type": "string", + "description": "The mathematical expression to evaluate." + } + }, + "required": ["expression"] + } + } + } + ], + "_test_metadata": { + "_comment": "Reasoning with tool calls. Quote in reasoning_content and content tests non-stringification.", + "expected_strings_if_supports_reasoning": ["I need to calculate \"15%\" of $50", "A 15% tip on $50 is \"$7.50\"."], + "forbidden_strings": ["\"reasoning_content\"", "\\\"15%\\\"", "\\\"$7.50\\\""] + } +} diff --git a/tests/contexts/simple.json b/tests/contexts/simple.json index 5e89f22..e158995 100644 --- a/tests/contexts/simple.json +++ b/tests/contexts/simple.json @@ -6,11 +6,16 @@ }, { "role": "assistant", - "content": "llama.cpp!" + "content": "I'd say \"llama.cpp\"!" } ], "add_generation_prompt": true, "bos_token": "<|startoftext|>", "eos_token": "<|endoftext|>", - "tools_in_user_message": false + "tools_in_user_message": false, + "_test_metadata": { + "_comment": "Basic conversation without tools or system message. Quote in content tests non-stringification.", + "expected_strings": ["What's your favourite LLM framework?", "I'd say \"llama.cpp\"!"], + "forbidden_strings": ["\\\"llama.cpp\\\""] + } } diff --git a/tests/contexts/system.json b/tests/contexts/system.json index 7cbc5c2..7cef6a6 100644 --- a/tests/contexts/system.json +++ b/tests/contexts/system.json @@ -2,7 +2,7 @@ "messages": [ { "role": "system", - "content": "You only tell the truth." + "content": "You only tell \"the truth\"." }, { "role": "user", @@ -16,5 +16,10 @@ "add_generation_prompt": true, "bos_token": "<|startoftext|>", "eos_token": "<|endoftext|>", - "tools_in_user_message": false + "tools_in_user_message": false, + "_test_metadata": { + "_comment": "Conversation with system message. Quote in system content tests non-stringification.", + "expected_strings": ["What's your favourite LLM framework?", "llama.cpp!"], + "forbidden_strings": ["\\\"the truth\\\""] + } } diff --git a/tests/contexts/tool_plan_reasoning.json b/tests/contexts/tool_plan_reasoning.json new file mode 100644 index 0000000..51c77d2 --- /dev/null +++ b/tests/contexts/tool_plan_reasoning.json @@ -0,0 +1,100 @@ +{ + "messages": [ + { + "role": "user", + "content": "What's the weather in Paris and convert it to Fahrenheit?" + }, + { + "role": "assistant", + "reasoning_content": "I need to first get the weather in \"Paris\", then convert the temperature.", + "content": "", + "tool_calls": [ + { + "id": "call_1___", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"city\": \"Paris\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_1___", + "name": "get_weather", + "content": "{\"temperature\": 20, \"unit\": \"celsius\", \"condition\": \"sunny\"}" + }, + { + "role": "assistant", + "reasoning_content": "Got 20°C. Now I need to convert: F = C * 9/5 + 32 = 20 * 1.8 + 32 = 68°F", + "content": "", + "tool_calls": [ + { + "id": "call_2___", + "type": "function", + "function": { + "name": "convert_temperature", + "arguments": "{\"celsius\": 20}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_2___", + "name": "convert_temperature", + "content": "{\"fahrenheit\": 68}" + }, + { + "role": "assistant", + "content": "The weather in Paris is sunny at \"twenty\" degrees (68°F)." + } + ], + "add_generation_prompt": true, + "bos_token": "<|startoftext|>", + "eos_token": "<|endoftext|>", + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a city", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The city name" + } + }, + "required": ["city"] + } + } + }, + { + "type": "function", + "function": { + "name": "convert_temperature", + "description": "Convert Celsius to Fahrenheit", + "parameters": { + "type": "object", + "properties": { + "celsius": { + "type": "number", + "description": "Temperature in Celsius" + } + }, + "required": ["celsius"] + } + } + } + ], + "_test_metadata": { + "_comment": "Multi-step tool use with reasoning. Quote in reasoning_content and content tests non-stringification.", + "expected_strings": ["The weather in Paris is sunny at \"twenty\" degrees (68°F)."], + "expected_strings_if_supports_tool_calls": ["get_weather", "convert_temperature"], + "expected_strings_if_supports_reasoning": ["I need to first get the weather in \"Paris\"", "convert: F = C * 9/5 + 32"], + "forbidden_strings": ["\"reasoning_content\"", "\"tool_plan\"", "\\\"Paris\\\"", "\\\"twenty\\\""] + } +} diff --git a/tests/contexts/tool_use.json b/tests/contexts/tool_use.json index cca70cb..5c09881 100644 --- a/tests/contexts/tool_use.json +++ b/tests/contexts/tool_use.json @@ -26,7 +26,7 @@ }, { "role": "assistant", - "content": "Anything else?" + "content": "Anything \"else\"?" }, { "role": "user", @@ -164,5 +164,19 @@ }, "type": "function" } - ] + ], + "_test_metadata": { + "_comment": "Complex tool use scenario with multiple tool calls and responses", + "expected_strings": [ + "Print a hello world message with python.", + "Anything \"else\"?", + "Test a tautology.", + "Truth is definitely true.", + "Check it on the web.", + "I don't need the web to answer you but I did check, as you asked. What now?" + ], + "forbidden_strings": ["\\\"else\\\""], + "expected_strings_if_supports_tool_calls": ["ipython", "test", "brave_search"], + "expected_strings_if_supports_tool_responses": ["Hello, World!"] + } } \ No newline at end of file diff --git a/tests/synthetic-deepseek-v3.2-dsml.jinja b/tests/synthetic-deepseek-v3.2-dsml.jinja new file mode 100644 index 0000000..72044f5 --- /dev/null +++ b/tests/synthetic-deepseek-v3.2-dsml.jinja @@ -0,0 +1,42 @@ +{# Synthetic template based on DeepSeek V3.2 DSML format (encoding_dsv32.py) #} +{# V3.2 doesn't provide a Jinja template, so this replicates its Python encoding logic #} +{%- set bos_token = "<|begin▁of▁sentence|>" -%} +{%- set eos_token = "<|end▁of▁sentence|>" -%} +{%- set dsml_token = "|DSML|" -%} +{{ bos_token }} +{%- for message in messages -%} +{%- if message.role == 'system' -%} +{{ message.content }} +{%- elif message.role == 'user' -%} +<|User|>{{ message.content }}<|Assistant|> +{%- elif message.role == 'assistant' -%} +{%- if message.tool_calls is defined and message.tool_calls -%} +<{{ dsml_token }}function_calls> +{%- for tool_call in message.tool_calls -%} +{%- if tool_call.type == 'function' -%} +<{{ dsml_token }}invoke name="{{ tool_call.function.name }}"> +{%- if tool_call.function.arguments is mapping -%} +{%- for key, value in tool_call.function.arguments.items() -%} +{%- if value is string -%} +<{{ dsml_token }}parameter name="{{ key }}" string="true">{{ value }} +{%- else -%} +<{{ dsml_token }}parameter name="{{ key }}" string="false">{{ value | tojson }} +{%- endif -%} +{%- endfor -%} +{%- endif -%} + +{%- endif -%} +{%- endfor -%} + +{%- endif -%} +{%- if message.content -%} +{{ message.content }} +{%- endif -%} +{{ eos_token }} +{%- elif message.role == 'tool' -%} +<{{ dsml_token }}tool_result>{{ message.content }} +{%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} +<|Assistant|> +{%- endif -%} diff --git a/tests/test-capabilities.cpp b/tests/test-capabilities.cpp index 458f9b9..f2d85d8 100644 --- a/tests/test-capabilities.cpp +++ b/tests/test-capabilities.cpp @@ -39,6 +39,19 @@ static std::string read_file(const std::string &path) return out; } +static std::string reasoning_format_to_string(minja::ReasoningFormat format) { + switch (format) { + case minja::ReasoningFormat::NONE: return "NONE"; + case minja::ReasoningFormat::REASONING_CONTENT_FIELD: return "REASONING_CONTENT_FIELD"; + case minja::ReasoningFormat::THINKING_CONTENT_BLOCK: return "THINKING_CONTENT_BLOCK"; + case minja::ReasoningFormat::THOUGHTS_CONTENT_BLOCK: return "THOUGHTS_CONTENT_BLOCK"; + case minja::ReasoningFormat::THOUGHT_FIELD: return "THOUGHT_FIELD"; + case minja::ReasoningFormat::TOOL_PLAN_FIELD: return "TOOL_PLAN_FIELD"; + case minja::ReasoningFormat::THINKING_FIELD: return "THINKING_FIELD"; + default: return "UNKNOWN"; + } +} + static minja::chat_template_caps get_caps(const std::string &path) { auto caps = minja::chat_template(read_file(path), "", "").original_caps(); @@ -53,11 +66,20 @@ static minja::chat_template_caps get_caps(const std::string &path) print("supports_system_role", caps.supports_system_role); print("supports_tools", caps.supports_tools); print("supports_tool_calls", caps.supports_tool_calls); + print("supports_tool_call_id", caps.supports_tool_call_id); print("supports_tool_responses", caps.supports_tool_responses); print("supports_parallel_tool_calls", caps.supports_parallel_tool_calls); print("requires_object_arguments", caps.requires_object_arguments); - // print("requires_non_null_content", caps.requires_non_null_content); - print("requires_typed_content", caps.requires_typed_content); + print("requires_non_null_content", caps.requires_non_null_content); + print("requires_typed_content", caps.requires_typed_content_blocks); + // Reasoning capabilities (extended thinking / chain-of-thought) + print("supports_reasoning", caps.supports_reasoning); + print("reasoning_requires_tools", caps.reasoning_requires_tools); + print("supports_reasoning_without_content", caps.supports_reasoning_without_content); + print("supports_reasoning_with_content", caps.supports_reasoning_with_content); + print("respects_enable_reasoning", caps.respects_enable_reasoning); + print("supports_clear_thinking", caps.supports_clear_thinking); + std::cout << " EXPECT_EQ(caps.reasoning_format, minja::ReasoningFormat::" << reasoning_format_to_string(caps.reasoning_format) << ");" << std::endl; std::cout << "}\n" << std::endl; return caps; @@ -68,11 +90,12 @@ TEST(CapabilitiesTest, Gemma7b) { EXPECT_FALSE(caps.supports_system_role); EXPECT_FALSE(caps.supports_tools); EXPECT_FALSE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_FALSE(caps.supports_tool_responses); EXPECT_FALSE(caps.supports_parallel_tool_calls); EXPECT_FALSE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, QwQ32B) { @@ -80,11 +103,12 @@ TEST(CapabilitiesTest, QwQ32B) { EXPECT_TRUE(caps.supports_system_role); EXPECT_TRUE(caps.supports_tools); EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_TRUE(caps.supports_tool_responses); EXPECT_TRUE(caps.supports_parallel_tool_calls); EXPECT_TRUE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_TRUE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, Qwen3Coder) { @@ -92,38 +116,40 @@ TEST(CapabilitiesTest, Qwen3Coder) { EXPECT_TRUE(caps.supports_system_role); EXPECT_TRUE(caps.supports_tools); EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_TRUE(caps.supports_tool_responses); EXPECT_TRUE(caps.supports_parallel_tool_calls); EXPECT_TRUE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } #ifndef _WIN32 -TEST(CapabilitiesTest, DeepSeekR1Distill) -{ - auto caps = get_caps("tests/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja"); +TEST(CapabilitiesTest, DeepSeekR1Distill) { + auto caps = get_caps("tests/deepseek-ai-DeepSeek-R1-Distill-Llama-70B.jinja"); EXPECT_TRUE(caps.supports_system_role); EXPECT_FALSE(caps.supports_tools); EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_TRUE(caps.supports_tool_responses); EXPECT_TRUE(caps.supports_parallel_tool_calls); EXPECT_FALSE(caps.requires_object_arguments); - // EXPECT_FALSE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } -#endif +#endif // _WIN32 TEST(CapabilitiesTest, FunctionaryMediumV3_2) { auto caps = get_caps("tests/meetkai-functionary-medium-v3.2.jinja"); EXPECT_TRUE(caps.supports_system_role); EXPECT_TRUE(caps.supports_tools); EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_TRUE(caps.supports_tool_responses); EXPECT_TRUE(caps.supports_parallel_tool_calls); EXPECT_FALSE(caps.requires_object_arguments); - // EXPECT_FALSE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, MetaLlama3_1_8BInstruct) { @@ -131,11 +157,12 @@ TEST(CapabilitiesTest, MetaLlama3_1_8BInstruct) { EXPECT_TRUE(caps.supports_system_role); EXPECT_TRUE(caps.supports_tools); EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_TRUE(caps.supports_tool_responses); EXPECT_FALSE(caps.supports_parallel_tool_calls); EXPECT_TRUE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, MetaLlama3_2_3BInstruct) { @@ -143,23 +170,25 @@ TEST(CapabilitiesTest, MetaLlama3_2_3BInstruct) { EXPECT_TRUE(caps.supports_system_role); EXPECT_TRUE(caps.supports_tools); EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_TRUE(caps.supports_tool_responses); EXPECT_FALSE(caps.supports_parallel_tool_calls); EXPECT_TRUE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, MetaLlama3_3_70BInstruct) { - auto caps = get_caps("tests/meta-llama-Llama-3.3-70B-Instruct.jinja"); + auto caps = get_caps("tests/meta-llama-Llama-3.1-8B-Instruct.jinja"); EXPECT_TRUE(caps.supports_system_role); EXPECT_TRUE(caps.supports_tools); EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_TRUE(caps.supports_tool_responses); EXPECT_FALSE(caps.supports_parallel_tool_calls); EXPECT_TRUE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, MiniMaxAIText01) { @@ -167,23 +196,25 @@ TEST(CapabilitiesTest, MiniMaxAIText01) { EXPECT_TRUE(caps.supports_system_role); EXPECT_TRUE(caps.supports_tools); EXPECT_FALSE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_FALSE(caps.supports_tool_responses); EXPECT_FALSE(caps.supports_parallel_tool_calls); EXPECT_FALSE(caps.requires_object_arguments); - // EXPECT_FALSE(caps.requires_non_null_content); - EXPECT_TRUE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_TRUE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, Mistral7BInstruct) { - auto caps = get_caps("tests/mistralai-Mistral-7B-Instruct-v0.2.jinja"); + auto caps = get_caps("tests/mistralai-Mistral-7B-Instruct-v0.1.jinja"); EXPECT_TRUE(caps.supports_system_role); EXPECT_FALSE(caps.supports_tools); EXPECT_FALSE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_FALSE(caps.supports_tool_responses); EXPECT_FALSE(caps.supports_parallel_tool_calls); EXPECT_FALSE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, MistralNemoInstruct) { @@ -191,11 +222,12 @@ TEST(CapabilitiesTest, MistralNemoInstruct) { EXPECT_TRUE(caps.supports_system_role); EXPECT_TRUE(caps.supports_tools); EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_TRUE(caps.supports_tool_call_id); EXPECT_TRUE(caps.supports_tool_responses); EXPECT_TRUE(caps.supports_parallel_tool_calls); EXPECT_TRUE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, NousResearchHermes3Llama3_1_70BToolUse) { @@ -203,23 +235,25 @@ TEST(CapabilitiesTest, NousResearchHermes3Llama3_1_70BToolUse) { EXPECT_TRUE(caps.supports_system_role); EXPECT_TRUE(caps.supports_tools); EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_TRUE(caps.supports_tool_responses); EXPECT_TRUE(caps.supports_parallel_tool_calls); EXPECT_FALSE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, NousResearchHermes2ProLlama3_8BToolUse) { - auto caps = get_caps("tests/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja"); + auto caps = get_caps("tests/NousResearch-Hermes-3-Llama-3.1-70B-tool_use.jinja"); EXPECT_TRUE(caps.supports_system_role); EXPECT_TRUE(caps.supports_tools); EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_TRUE(caps.supports_tool_responses); EXPECT_TRUE(caps.supports_parallel_tool_calls); EXPECT_FALSE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, CommandRPlusDefault) { @@ -227,11 +261,12 @@ TEST(CapabilitiesTest, CommandRPlusDefault) { EXPECT_TRUE(caps.supports_system_role); EXPECT_FALSE(caps.supports_tools); EXPECT_FALSE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_FALSE(caps.supports_tool_responses); EXPECT_FALSE(caps.supports_parallel_tool_calls); EXPECT_FALSE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_TRUE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, CommandRPlusRag) { @@ -239,11 +274,12 @@ TEST(CapabilitiesTest, CommandRPlusRag) { EXPECT_TRUE(caps.supports_system_role); EXPECT_FALSE(caps.supports_tools); EXPECT_FALSE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_FALSE(caps.supports_tool_responses); EXPECT_FALSE(caps.supports_parallel_tool_calls); EXPECT_FALSE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_TRUE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); } TEST(CapabilitiesTest, CommandRPlusToolUse) { @@ -251,9 +287,147 @@ TEST(CapabilitiesTest, CommandRPlusToolUse) { EXPECT_TRUE(caps.supports_system_role); EXPECT_TRUE(caps.supports_tools); EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); + EXPECT_TRUE(caps.supports_tool_responses); + EXPECT_TRUE(caps.supports_parallel_tool_calls); + EXPECT_TRUE(caps.requires_object_arguments); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); +} + +TEST(CapabilitiesTest, GLM46) { + auto caps = get_caps("tests/zai-org-GLM-4.6.jinja"); + EXPECT_TRUE(caps.supports_system_role); + EXPECT_TRUE(caps.supports_tools); + EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); + EXPECT_TRUE(caps.supports_tool_responses); + EXPECT_TRUE(caps.supports_parallel_tool_calls); + EXPECT_TRUE(caps.requires_object_arguments); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); +} + +// Synthetic template based on DeepSeek V3.2's DSML format (encoding_dsv32.py) +// V3.2 doesn't provide a Jinja template, so we replicate its Python encoding logic +// DSML format: <|DSML|parameter name="argument_needle" string="true"> +TEST(CapabilitiesTest, SyntheticDeepSeekV3_2_DSML) { + auto caps = get_caps("tests/synthetic-deepseek-v3.2-dsml.jinja"); + EXPECT_TRUE(caps.supports_system_role); + EXPECT_FALSE(caps.supports_tools); // No native tools block in template + EXPECT_TRUE(caps.supports_tool_calls); // Has tool_calls rendering with DSML format + EXPECT_FALSE(caps.supports_tool_call_id); + EXPECT_TRUE(caps.supports_tool_responses); + EXPECT_TRUE(caps.supports_parallel_tool_calls); // Iterates over tool_calls array + EXPECT_TRUE(caps.requires_object_arguments); // DSML iterates over argument keys + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); + // Reasoning capabilities - synthetic template doesn't support reasoning_content field + EXPECT_FALSE(caps.supports_reasoning); +} + +// Reasoning model tests +// Note: DeepSeek R1 does NOT support reasoning_content field - it looks for tags embedded in content +// These tests are for models that DO support the reasoning_content field + +#ifndef _WIN32 +TEST(CapabilitiesTest, Qwen3_235B_A22B_Thinking_2507) { + auto caps = get_caps("tests/Qwen-Qwen3-235B-A22B-Thinking-2507.jinja"); + EXPECT_TRUE(caps.supports_system_role); + EXPECT_TRUE(caps.supports_tools); + EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); + EXPECT_TRUE(caps.supports_tool_responses); + EXPECT_TRUE(caps.supports_parallel_tool_calls); + EXPECT_FALSE(caps.requires_object_arguments); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); + // Qwen supports reasoning_content field + EXPECT_TRUE(caps.supports_reasoning); +} + +TEST(CapabilitiesTest, GLM_4_6) { + auto caps = get_caps("tests/zai-org-GLM-4.6.jinja"); + EXPECT_TRUE(caps.supports_system_role); + EXPECT_TRUE(caps.supports_tools); + EXPECT_TRUE(caps.supports_tool_calls); + EXPECT_FALSE(caps.supports_tool_call_id); EXPECT_TRUE(caps.supports_tool_responses); EXPECT_TRUE(caps.supports_parallel_tool_calls); EXPECT_TRUE(caps.requires_object_arguments); - // EXPECT_TRUE(caps.requires_non_null_content); - EXPECT_FALSE(caps.requires_typed_content); + EXPECT_FALSE(caps.requires_non_null_content); + EXPECT_FALSE(caps.requires_typed_content_blocks); + // GLM-4.6 supports reasoning_content field + EXPECT_TRUE(caps.supports_reasoning); } +#endif // _WIN32 + +// ReasoningFormat tests - verify detection of different reasoning formats + +// Pattern A: REASONING_CONTENT (Qwen3, GLM-4.6/4.7) +TEST(ReasoningFormatTest, ReasoningContentField_GLM47) { + auto caps = get_caps("tests/zai-org-GLM-4.7.jinja"); + EXPECT_TRUE(caps.supports_reasoning); + EXPECT_EQ(caps.reasoning_format, minja::ReasoningFormat::REASONING_CONTENT_FIELD); + // GLM-4.7 supports reasoning visibility control (clear_thinking flag) + EXPECT_TRUE(caps.supports_clear_thinking); +} + +TEST(ReasoningFormatTest, ReasoningContentField_Qwen3) { + auto caps = get_caps("tests/Qwen-Qwen3-4B.jinja"); + EXPECT_TRUE(caps.supports_reasoning); + EXPECT_EQ(caps.reasoning_format, minja::ReasoningFormat::REASONING_CONTENT_FIELD); +} + +// Pattern D: THOUGHT_FIELD (MiniCPM3) +TEST(ReasoningFormatTest, ThoughtField_MiniCPM3) { + auto caps = get_caps("tests/openbmb-MiniCPM3-4B.jinja"); + EXPECT_TRUE(caps.supports_reasoning); + EXPECT_EQ(caps.reasoning_format, minja::ReasoningFormat::THOUGHT_FIELD); +} + +// Pattern E: TOOL_PLAN_FIELD (Command-R7B) - requires tools +TEST(ReasoningFormatTest, ToolPlanField_CommandR7B) { + auto caps = get_caps("tests/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja"); + EXPECT_TRUE(caps.supports_reasoning); + EXPECT_EQ(caps.reasoning_format, minja::ReasoningFormat::TOOL_PLAN_FIELD); + EXPECT_TRUE(caps.reasoning_requires_tools); +} + +// Pattern NONE: Templates without reasoning support +TEST(ReasoningFormatTest, NoReasoning_Gemma7b) { + auto caps = get_caps("tests/google-gemma-7b-it.jinja"); + EXPECT_FALSE(caps.supports_reasoning); + EXPECT_EQ(caps.reasoning_format, minja::ReasoningFormat::NONE); +} + +TEST(ReasoningFormatTest, NoReasoning_Llama31) { + auto caps = get_caps("tests/meta-llama-Llama-3.1-8B-Instruct.jinja"); + EXPECT_FALSE(caps.supports_reasoning); + EXPECT_EQ(caps.reasoning_format, minja::ReasoningFormat::NONE); +} + +// Test Kimi K2 - supports reasoning via THOUGHTS_CONTENT_BLOCK +// The template's render_content macro iterates over content blocks and outputs text +TEST(ReasoningFormatTest, ThoughtsContentBlock_KimiK2) { + auto caps = get_caps("tests/moonshotai-Kimi-K2-Instruct.jinja"); + EXPECT_TRUE(caps.supports_reasoning); + EXPECT_EQ(caps.reasoning_format, minja::ReasoningFormat::THOUGHTS_CONTENT_BLOCK); + EXPECT_FALSE(caps.reasoning_requires_tools); +} + +// Test that REASONING_CONTENT_FIELD models don't require tools for reasoning +TEST(ReasoningFormatTest, ReasoningContentNoToolsRequired_Qwen3) { + auto caps = get_caps("tests/Qwen-Qwen3-4B.jinja"); + EXPECT_TRUE(caps.supports_reasoning); + EXPECT_EQ(caps.reasoning_format, minja::ReasoningFormat::REASONING_CONTENT_FIELD); + EXPECT_FALSE(caps.reasoning_requires_tools); +} + +TEST(ReasoningFormatTest, ReasoningContentNoToolsRequired_GLM47) { + auto caps = get_caps("tests/zai-org-GLM-4.7.jinja"); + EXPECT_TRUE(caps.supports_reasoning); + EXPECT_EQ(caps.reasoning_format, minja::ReasoningFormat::REASONING_CONTENT_FIELD); + EXPECT_FALSE(caps.reasoning_requires_tools); +} + diff --git a/tests/test-polyfills.cpp b/tests/test-polyfills.cpp index 5bc1226..7f2a1fa 100644 --- a/tests/test-polyfills.cpp +++ b/tests/test-polyfills.cpp @@ -391,7 +391,7 @@ TEST(PolyfillTest, ToolPolyfill) { #ifndef _WIN32 TEST(ToolTest, DeepSeekR1) { - chat_template tmpl(read_file("tests/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja"), "", ""); + chat_template tmpl(read_file("tests/deepseek-ai-DeepSeek-R1-Distill-Llama-70B.jinja"), "", ""); auto inputs = chat_template_inputs(); inputs.messages = json::array({message_tool}); @@ -476,7 +476,7 @@ TEST(ToolTest, NousResearchHermes3) { } TEST(ToolTest, NousResearchHermes2) { - chat_template tmpl(read_file("tests/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja"), "", ""); + chat_template tmpl(read_file("tests/NousResearch-Hermes-3-Llama-3.1-70B-tool_use.jinja"), "", ""); auto inputs = chat_template_inputs(); inputs.messages = json::array({message_tool}); @@ -495,7 +495,7 @@ TEST(ToolTest, NousResearchHermes2) { } TEST(ToolTest, Llama3_3) { - chat_template tmpl(read_file("tests/meta-llama-Llama-3.3-70B-Instruct.jinja"), "", ""); + chat_template tmpl(read_file("tests/meta-llama-Llama-3.1-8B-Instruct.jinja"), "", ""); auto inputs = chat_template_inputs(); inputs.messages = json::array({message_tool}); diff --git a/tests/test-supported-template.cpp b/tests/test-supported-template.cpp index db23a4a..1eaad53 100644 --- a/tests/test-supported-template.cpp +++ b/tests/test-supported-template.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #undef NDEBUG #include @@ -22,6 +23,16 @@ using json = nlohmann::ordered_json; +#ifdef _WIN32 +// Workaround for https://github.com/ochafik/minja/issues/16 +// On Windows, C++ minja outputs fewer newlines than Python Jinja2 for certain templates. +// This function collapses consecutive blank lines to normalize comparison. +static std::string collapse_blank_lines(const std::string &s) { + static const std::regex blank_lines_regex("\n\n+"); + return std::regex_replace(s, blank_lines_regex, "\n"); +} +#endif + template static void assert_equals(const T &expected, const T &actual){ if (expected != actual) { @@ -76,7 +87,7 @@ static json caps_to_json(const minja::chat_template_caps &caps) { {"supports_tool_call_id", caps.supports_tool_call_id}, {"requires_object_arguments", caps.requires_object_arguments}, // {"requires_non_null_content", caps.requires_non_null_content}, - {"requires_typed_content", caps.requires_typed_content}, + {"requires_typed_content_blocks", caps.requires_typed_content_blocks}, }; } #endif @@ -146,13 +157,102 @@ int main(int argc, char *argv[]) { std::string actual; try { - actual = tmpl.apply(inputs); + actual = minja::normalize_newlines(tmpl.apply(inputs)); } catch (const std::exception &e) { std::cerr << "Error applying template: " << e.what() << "\n"; return 1; } - if (expected != actual) { + // Validate expected/forbidden strings from _test_metadata if present + // This provides template-independent validation that doesn't rely on Python goldens + auto original_ctx = json::parse(read_file(ctx_file)); + if (original_ctx.contains("_test_metadata")) { + auto metadata = original_ctx["_test_metadata"]; + auto caps = tmpl.original_caps(); + + // Check expected_strings (always required) + if (metadata.contains("expected_strings")) { + for (const auto& s : metadata["expected_strings"]) { + std::string expected_str = s.get(); + if (actual.find(expected_str) == std::string::npos) { + std::cerr << "Expected string not found in output: " << expected_str << "\n"; + std::cerr << "Actual output:\n" << actual << "\n"; + return 1; + } + } + } + + // Helper lambda to check expected strings + auto check_expected_strings = [&](const std::string& key, bool condition, const std::string& desc) -> bool { + if (metadata.contains(key) && condition) { + for (const auto& s : metadata[key]) { + std::string expected_str = s.get(); + if (actual.find(expected_str) == std::string::npos) { + std::cerr << "Expected string (" << desc << ") not found in output: " << expected_str << "\n"; + std::cerr << "Actual output:\n" << actual << "\n"; + return false; + } + } + } + return true; + }; + + // Check expected_strings_if_supports_system_role + if (!check_expected_strings("expected_strings_if_supports_system_role", caps.supports_system_role, "system role")) { + return 1; + } + + // Check expected_strings_if_supports_tool_calls + if (!check_expected_strings("expected_strings_if_supports_tool_calls", caps.supports_tool_calls, "tool calls")) { + return 1; + } + + // Check expected_strings_if_supports_tool_responses + if (!check_expected_strings("expected_strings_if_supports_tool_responses", caps.supports_tool_responses, "tool responses")) { + return 1; + } + + // Check expected_strings_if_supports_reasoning (with additional conditions) + // If context uses clear_thinking, only check if template supports it + // If template requires tools for reasoning (TOOL_PLAN_FIELD), only check if context has tool_calls + bool context_uses_clear_thinking = original_ctx.contains("clear_thinking"); + bool context_has_tool_calls = false; + for (const auto& msg : original_ctx["messages"]) { + if (msg.contains("tool_calls") && !msg["tool_calls"].empty()) { + context_has_tool_calls = true; + break; + } + } + bool should_check_reasoning_strings = caps.supports_reasoning + && (!context_uses_clear_thinking || caps.supports_clear_thinking) + && (!caps.reasoning_requires_tools || context_has_tool_calls); + if (!check_expected_strings("expected_strings_if_supports_reasoning", should_check_reasoning_strings, "reasoning")) { + return 1; + } + + // Check forbidden_strings (should never appear) + if (metadata.contains("forbidden_strings")) { + for (const auto& s : metadata["forbidden_strings"]) { + std::string forbidden_str = s.get(); + if (actual.find(forbidden_str) != std::string::npos) { + std::cerr << "Forbidden string found in output: " << forbidden_str << "\n"; + std::cerr << "Actual output:\n" << actual << "\n"; + return 1; + } + } + } + } + +#ifdef _WIN32 + // On Windows, collapse blank lines for comparison due to known whitespace handling issues + auto expected_cmp = collapse_blank_lines(expected); + auto actual_cmp = collapse_blank_lines(actual); +#else + auto expected_cmp = expected; + auto actual_cmp = actual; +#endif + + if (expected_cmp != actual_cmp) { if (getenv("WRITE_GOLDENS")) { write_file(golden_file, actual); std::cerr << "Updated golden file: " << golden_file << "\n"; diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp index 36bdaa3..f1d5916 100644 --- a/tests/test-syntax.cpp +++ b/tests/test-syntax.cpp @@ -11,7 +11,6 @@ #include #include -#include #include static std::string render_python(const std::string & template_str, const json & bindings, const minja::Options & options) { @@ -94,7 +93,7 @@ TEST(SyntaxTest, SimpleCases) { EXPECT_EQ("HELLO WORLD", render("{{ 'hello world'.upper() }}", {}, {})); EXPECT_EQ("MIXED", render("{{ 'MiXeD'.upper() }}", {}, {})); EXPECT_EQ("", render("{{ ''.upper() }}", {}, {})); - + EXPECT_EQ("hello world", render("{{ 'HELLO WORLD'.lower() }}", {}, {})); EXPECT_EQ("mixed", render("{{ 'MiXeD'.lower() }}", {}, {})); EXPECT_EQ("", render("{{ ''.lower() }}", {}, {})); @@ -141,6 +140,9 @@ TEST(SyntaxTest, SimpleCases) { EXPECT_EQ( "[1, 2, 3]", render("{{ [1] + [2, 3] }}", {}, {})); + EXPECT_EQ( + "Abc", + render("{{ 'aBc' | capitalize }}", {}, {})); EXPECT_EQ( "abc", render("{{ 'AbC' | lower }}", {}, {})); @@ -236,6 +238,9 @@ TEST(SyntaxTest, SimpleCases) { EXPECT_EQ( "2", render(R"({{ range(3) | last }})", {}, {})); + EXPECT_EQ( + "0", + render(R"({{ range(3) | first }})", {}, {})); EXPECT_EQ( "True", render(R"({% set foo = true %}{{ foo is defined }})", {}, {})); @@ -257,6 +262,14 @@ TEST(SyntaxTest, SimpleCases) { EXPECT_EQ( R"({"a": "b"})", render(R"({{ {"a": "b"} | tojson }})", {}, {})); + // Test tojson with compact separators (used by Kimi K2 template) + EXPECT_EQ( + R"({"a":"b","c":[1,2]})", + render(R"({{ {"a": "b", "c": [1, 2]} | tojson(separators=(',', ':')) }})", {}, {})); + // Test tojson with exotic separators to verify they're actually used + EXPECT_EQ( + R"({"a"=>"b";"c"=>[1;2]})", + render(R"({{ {"a": "b", "c": [1, 2]} | tojson(separators=(';', '=>')) }})", {}, {})); EXPECT_EQ( R"({'a': 'b'})", render(R"({{ {"a": "b"} }})", {}, {})); @@ -373,6 +386,7 @@ TEST(SyntaxTest, SimpleCases) { {}, {} ) ); + EXPECT_EQ("False", render("{{ trim(' a ').endswith(' ') }}", {} , {})); // Test parsing of expression (chaining of identifier, function call, method call) } EXPECT_EQ( "[0, 1, 2][0, 2]", @@ -455,7 +469,7 @@ TEST(SyntaxTest, SimpleCases) { {%- endfor -%} {%- endcall -%} )", {}, {})); - + EXPECT_EQ( "\\n\\nclass A:\\n b: 1\\n c: 2\\n", render(R"( diff --git a/tests/test_no_duplicate_templates.py b/tests/test_no_duplicate_templates.py new file mode 100755 index 0000000..1e9280a --- /dev/null +++ b/tests/test_no_duplicate_templates.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +""" +Test that verifies there are no duplicate chat templates. + +This test computes MD5 checksums for all .jinja files in tests/templates/ +and fails if any duplicates are found. Duplicate templates waste storage, +build time, and test execution time. + +Usage: + python test_no_duplicate_templates.py [templates_directory] + +Returns: + 0 if no duplicates found (success) + 1 if duplicates found (failure) +""" + +import hashlib +import sys +from pathlib import Path +from collections import defaultdict + + +def compute_md5(file_path: Path) -> str: + """Compute MD5 hash of a file.""" + md5 = hashlib.md5() + with open(file_path, 'rb') as f: + for chunk in iter(lambda: f.read(8192), b''): + md5.update(chunk) + return md5.hexdigest() + + +def find_duplicate_templates(templates_dir: Path) -> dict[str, list[Path]]: + """ + Find duplicate templates by MD5 hash. + + Returns: + Dictionary mapping checksums to list of file paths with that checksum. + Only includes checksums that appear more than once. + """ + checksums = defaultdict(list) + + template_files = list(templates_dir.glob('*.jinja')) + + if not template_files: + print(f"Warning: No .jinja files found in {templates_dir}", file=sys.stderr) + return {} + + for template_file in template_files: + checksum = compute_md5(template_file) + checksums[checksum].append(template_file) + + # Only return checksums with duplicates + duplicates = { + checksum: files + for checksum, files in checksums.items() + if len(files) > 1 + } + + return duplicates + + +def main(): + # Get templates directory from argument or use default + if len(sys.argv) > 1: + templates_dir = Path(sys.argv[1]) + else: + # Default: tests/templates relative to this script + script_dir = Path(__file__).parent + templates_dir = script_dir / 'templates' + + if not templates_dir.exists(): + print(f"Error: Templates directory not found: {templates_dir}", file=sys.stderr) + return 1 + + if not templates_dir.is_dir(): + print(f"Error: Not a directory: {templates_dir}", file=sys.stderr) + return 1 + + # Find duplicates + duplicates = find_duplicate_templates(templates_dir) + + if not duplicates: + template_count = len(list(templates_dir.glob('*.jinja'))) + print(f"✓ No duplicate templates found ({template_count} unique templates)") + return 0 + + # Report duplicates + print(f"✗ Found {len(duplicates)} duplicate template(s):", file=sys.stderr) + print(file=sys.stderr) + + for checksum, files in sorted(duplicates.items()): + print(f"Checksum {checksum}:", file=sys.stderr) + for file_path in sorted(files): + print(f" - {file_path.name}", file=sys.stderr) + print(file=sys.stderr) + + total_duplicates = sum(len(files) - 1 for files in duplicates.values()) + print(f"Total: {total_duplicates} duplicate file(s) should be removed", file=sys.stderr) + + return 1 + + +if __name__ == '__main__': + sys.exit(main())