diff --git a/CMakePresets.json b/CMakePresets.json
index b5afeb3c0f2..accdd72d180 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -1,95 +1,332 @@
 {
-  "version": 4,
-  "configurePresets": [
-    {
-        "name":  "base",
-        "hidden": true,
-        "generator":   "Ninja",
-        "binaryDir":   "${sourceDir}/build-${presetName}",
-        "cacheVariables": {
-            "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
-            "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
+    "version": 4,
+    "configurePresets": [
+        {
+            "name": "base",
+            "hidden": true,
+            "generator": "Ninja",
+            "binaryDir": "${sourceDir}/build-${presetName}",
+            "cacheVariables": {
+                "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+                "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
+            }
+        },
+        {
+            "name": "sycl-base",
+            "hidden": true,
+            "generator": "Ninja",
+            "binaryDir": "${sourceDir}/build-${presetName}",
+            "cacheVariables": {
+                "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+                "CMAKE_CXX_COMPILER": "icx",
+                "CMAKE_C_COMPILER": "cl",
+                "GGML_SYCL": "ON",
+                "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
+            }
+        },
+        {
+            "name": "debug",
+            "hidden": true,
+            "cacheVariables": {
+                "CMAKE_BUILD_TYPE": "Debug"
+            }
+        },
+        {
+            "name": "release",
+            "hidden": true,
+            "cacheVariables": {
+                "CMAKE_BUILD_TYPE": "Release"
+            }
+        },
+        {
+            "name": "reldbg",
+            "hidden": true,
+            "cacheVariables": {
+                "CMAKE_BUILD_TYPE": "RelWithDebInfo"
+            }
+        },
+        {
+            "name": "static",
+            "hidden": true,
+            "cacheVariables": {
+                "GGML_STATIC": "ON"
+            }
+        },
+        {
+            "name": "sycl_f16",
+            "hidden": true,
+            "cacheVariables": {
+                "GGML_SYCL_F16": "ON"
+            }
+        },
+        {
+            "name": "vulkan",
+            "hidden": true,
+            "cacheVariables": {
+                "GGML_VULKAN": "ON"
+            }
+        },
+        {
+            "name": "x64-windows-llvm",
+            "hidden": true,
+            "cacheVariables": {
+                "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake"
+            }
+        },
+        {
+            "name": "arm64-windows-llvm",
+            "hidden": true,
+            "architecture": {
+                "value": "arm64",
+                "strategy": "external"
+            },
+            "toolset": {
+                "value": "host=x64",
+                "strategy": "external"
+            },
+            "cacheVariables": {
+                "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
+            }
+        },
+        {
+            "name": "arm64-apple-clang",
+            "hidden": true,
+            "architecture": {
+                "value": "arm64",
+                "strategy": "external"
+            },
+            "toolset": {
+                "value": "host=x64",
+                "strategy": "external"
+            },
+            "cacheVariables": {
+                "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
+            }
+        },
+        {
+            "name": "x64-linux-gcc",
+            "hidden": true,
+            "cacheVariables": {
+                "CMAKE_C_COMPILER": "gcc",
+                "CMAKE_CXX_COMPILER": "g++"
+            }
+        },
+        {
+            "name": "x64-linux-gcc-debug",
+            "inherits": [
+                "base",
+                "x64-linux-gcc",
+                "debug"
+            ]
+        },
+        {
+            "name": "x64-linux-gcc-release",
+            "inherits": [
+                "base",
+                "x64-linux-gcc",
+                "release"
+            ]
+        },
+        {
+            "name": "x64-linux-gcc-reldbg",
+            "inherits": [
+                "base",
+                "x64-linux-gcc",
+                "reldbg"
+            ]
+        },
+        {
+            "name": "x64-linux-gcc+static-release",
+            "inherits": [
+                "base",
+                "x64-linux-gcc",
+                "release",
+                "static"
+            ]
+        },
+        {
+            "name": "arm64-windows-llvm-debug",
+            "inherits": [
+                "base",
+                "arm64-windows-llvm",
+                "debug"
+            ]
+        },
+        {
+            "name": "arm64-windows-llvm-release",
+            "inherits": [
+                "base",
+                "arm64-windows-llvm",
+                "reldbg"
+            ]
+        },
+        {
+            "name": "arm64-windows-llvm+static-release",
+            "inherits": [
+                "base",
+                "arm64-windows-llvm",
+                "reldbg",
+                "static"
+            ]
+        },
+        {
+            "name": "arm64-apple-clang-debug",
+            "inherits": [
+                "base",
+                "arm64-apple-clang",
+                "debug"
+            ]
+        },
+        {
+            "name": "arm64-apple-clang-release",
+            "inherits": [
+                "base",
+                "arm64-apple-clang",
+                "reldbg"
+            ]
+        },
+        {
+            "name": "arm64-apple-clang+static-release",
+            "inherits": [
+                "base",
+                "arm64-apple-clang",
+                "reldbg",
+                "static"
+            ]
+        },
+        {
+            "name": "x64-windows-llvm-debug",
+            "inherits": [
+                "base",
+                "x64-windows-llvm",
+                "debug"
+            ]
+        },
+        {
+            "name": "x64-windows-llvm-release",
+            "inherits": [
+                "base",
+                "x64-windows-llvm",
+                "release"
+            ]
+        },
+        {
+            "name": "x64-windows-llvm-reldbg",
+            "inherits": [
+                "base",
+                "x64-windows-llvm",
+                "reldbg"
+            ]
+        },
+        {
+            "name": "x64-windows-llvm+static-release",
+            "inherits": [
+                "base",
+                "x64-windows-llvm",
+                "reldbg",
+                "static"
+            ]
+        },
+        {
+            "name": "x64-windows-msvc-debug",
+            "inherits": [
+                "base",
+                "debug"
+            ]
+        },
+        {
+            "name": "x64-windows-msvc-release",
+            "inherits": [
+                "base",
+                "reldbg"
+            ]
+        },
+        {
+            "name": "x64-windows-msvc+static-release",
+            "inherits": [
+                "base",
+                "reldbg",
+                "static"
+            ]
+        },
+        {
+            "name": "x64-windows-sycl-debug",
+            "inherits": [
+                "sycl-base",
+                "debug"
+            ]
+        },
+        {
+            "name": "x64-windows-sycl-debug-f16",
+            "inherits": [
+                "sycl-base",
+                "debug",
+                "sycl_f16"
+            ]
+        },
+        {
+            "name": "x64-windows-sycl-release",
+            "inherits": [
+                "sycl-base",
+                "release"
+            ]
+        },
+        {
+            "name": "x64-windows-sycl-release-f16",
+            "inherits": [
+                "sycl-base",
+                "release",
+                "sycl_f16"
+            ]
+        },
+        {
+            "name": "x64-windows-vulkan-debug",
+            "inherits": [
+                "base",
+                "vulkan",
+                "debug"
+            ]
+        },
+        {
+            "name": "x64-windows-vulkan-release",
+            "inherits": [
+                "base",
+                "vulkan",
+                "release"
+            ]
+        },
+        {
+            "name": "ilintar-release",
+            "hidden": false,
+            "description": "Release build",
+            "displayName": "Release build",
+            "binaryDir": "${sourceDir}/build",
+            "cacheVariables": {
+                "GGML_CUDA": "ON",
+                "GGML_CUDA_FORCE_CUBLAS": "OFF",
+                "GGML_CUDA_FORCE_MMQ": "OFF",
+                "GGML_CUDA_FA_ALL_QUANTS": "1",
+                "CMAKE_CUDA_ARCHITECTURES": "86;120",
+                "GGML_BLAS": "ON",
+                "GGML_BLAS_VENDOR": "OpenBLAS",
+                "GGML_CPU_ALL_VARIANTS": "ON",
+                "GGML_BACKEND_DL": "ON",
+                "CMAKE_CUDA_COMPILER": "nvcc"
+            },
+            "inherits": [
+                "base",
+                "release",
+                "x64-linux-gcc-release"
+            ]
         }
-    },
-    {
-        "name": "sycl-base",
-        "hidden": true,
-        "generator": "Ninja",
-        "binaryDir": "${sourceDir}/build-${presetName}",
-        "cacheVariables": {
-            "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
-            "CMAKE_CXX_COMPILER": "icx",
-            "CMAKE_C_COMPILER": "cl",
-            "GGML_SYCL": "ON",
-            "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
+    ],
+    "buildPresets": [
+        {
+            "name": "parallel",
+            "description": "Parallel build",
+            "displayName": "Parallel build",
+            "configurePreset": "ilintar-release",
+            "jobs": 8
         }
-    },
-    { "name": "debug",    "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
-    { "name": "release",  "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
-    { "name": "reldbg",   "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
-    { "name": "static",   "hidden": true, "cacheVariables": { "GGML_STATIC":      "ON" } },
-    { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16":    "ON" } },
-    { "name": "vulkan",   "hidden": true, "cacheVariables": { "GGML_VULKAN":      "ON" } },
-
-    {
-        "name": "x64-windows-llvm", "hidden": true,
-        "cacheVariables": {
-            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake"
-        }
-    },
-
-    {
-        "name": "arm64-windows-llvm", "hidden": true,
-        "architecture": { "value": "arm64",    "strategy": "external" },
-        "toolset":      { "value": "host=x64", "strategy": "external" },
-        "cacheVariables": {
-            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
-        }
-    },
-
-    {
-        "name": "arm64-apple-clang", "hidden": true,
-        "architecture": { "value": "arm64",    "strategy": "external" },
-        "toolset":      { "value": "host=x64", "strategy": "external" },
-        "cacheVariables": {
-            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
-        }
-    },
-    {
-        "name": "x64-linux-gcc", "hidden": true,
-        "cacheVariables": {
-            "CMAKE_C_COMPILER": "gcc",
-            "CMAKE_CXX_COMPILER": "g++"
-        }
-    },
-    { "name": "x64-linux-gcc-debug", "inherits": [ "base", "x64-linux-gcc", "debug" ] },
-    { "name": "x64-linux-gcc-release", "inherits": [ "base", "x64-linux-gcc", "release" ] },
-    { "name": "x64-linux-gcc-reldbg", "inherits": [ "base", "x64-linux-gcc", "reldbg" ] },
-    { "name": "x64-linux-gcc+static-release", "inherits": [ "base", "x64-linux-gcc", "release", "static" ] },
-
-    { "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
-    { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
-    { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
-
-    { "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] },
-    { "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
-    { "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang",  "reldbg", "static" ] },
-
-    { "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] },
-    { "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] },
-    { "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] },
-    { "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] },
-
-    { "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] },
-    { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
-    { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
-
-    { "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] },
-    { "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] },
-    { "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] },
-    { "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] },
-
-    { "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] },
-    { "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] }
-  ]
-}
+    ]
+}
\ No newline at end of file
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 295ae9ea254..41069a04ef4 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -48,10 +48,11 @@ add_library(${TARGET} STATIC
     arg.cpp
     arg.h
     base64.hpp
-    chat-parser.cpp
-    chat-parser.h
-    chat-parser-xml-toolcall.h
-    chat-parser-xml-toolcall.cpp
+    chat-auto-parser-generator.cpp
+    chat-auto-parser-helpers.cpp
+    chat-auto-parser.h
+    chat-diff-analyzer.cpp
+    chat-diff-analyzer.h
     chat-peg-parser.cpp
     chat-peg-parser.h
     chat.cpp
diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp
new file mode 100644
index 00000000000..a721a30f1c5
--- /dev/null
+++ b/common/chat-auto-parser-generator.cpp
@@ -0,0 +1,361 @@
+#include "chat-auto-parser.h"
+#include "chat-diff-analyzer.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "json-schema-to-grammar.h"
+#include "nlohmann/json.hpp"
+#include <string>
+
+
+using json = nlohmann::ordered_json;
+
+// Helper to iterate over tools/functions
+static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
+    for (const auto & tool : tools) {
+        if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
+            continue;
+        }
+        fn(tool);
+    }
+}
+
+common_chat_params universal_peg_generator::generate_parser(const common_chat_template &    tmpl,
+                                                            const struct templates_params & inputs) {
+    // Run differential analysis to extract template structure
+    auto analysis = differential_analyzer::analyze(tmpl);
+    return generate_parser(tmpl, inputs, analysis);
+}
+
+common_chat_params universal_peg_generator::generate_parser(const common_chat_template &    tmpl, 
+                                                            const struct templates_params & inputs,
+                                                            const diff_analysis_result &    analysis) {
+    // Check for thinking forced open
+    bool thinking_forced_open = (analysis.reasoning == reasoning_mode::FORCED_OPEN);
+    bool thinking_forced_closed = (analysis.reasoning == reasoning_mode::FORCED_CLOSED);
+
+    // Build the parser using the analysis results
+    auto parser = build_parser(analysis, inputs, thinking_forced_open, thinking_forced_closed);
+
+    // Create the result structure
+    common_chat_params data;
+    data.prompt = common_chat_template_direct_apply(tmpl, inputs);
+    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.preserved_tokens = analysis.preserved_tokens;
+    data.parser = parser.save();
+
+    // Build grammar if tools are present
+    bool has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+    bool include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
+
+    if (include_grammar) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+
+        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto & function = tool.at("function");
+                auto         schema   = function.at("parameters");
+                builder.resolve_refs(schema);
+            });
+            parser.build_grammar(builder, data.grammar_lazy);
+        });
+
+        // Set grammar triggers based on tool section markers (fall back to per-call markers)
+        std::string trigger_marker = !analysis.markers.tool_section_start.empty()
+            ? analysis.markers.tool_section_start
+            : analysis.markers.per_call_start;
+        if (!trigger_marker.empty()) {
+            data.grammar_triggers = {
+                { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker }
+            };
+        }
+    }
+
+    return data;
+}
+
+common_peg_arena universal_peg_generator::build_parser(const diff_analysis_result &    analysis,
+                                                        const struct templates_params & inputs,
+                                                        bool                            thinking_forced_open,
+                                                        bool                            thinking_forced_closed) {
+    return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        p.set_allow_python_dict_format(true);
+        const auto & m = analysis.markers;
+
+        common_peg_parser reasoning = p.eps();
+        bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+        bool enable_thinking = inputs.enable_thinking;
+
+        if (extract_reasoning && enable_thinking && analysis.reasoning != reasoning_mode::NONE) {
+            if (thinking_forced_open || thinking_forced_closed) {
+                // Thinking is forced open OR forced closed with enable_thinking=true
+                // In both cases, expect only the closing tag (opening was in template)
+                reasoning = p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end;
+            } else if (analysis.reasoning == reasoning_mode::TAG_BASED ||
+                       analysis.reasoning == reasoning_mode::TOOLS_ONLY) {
+                // Standard tag-based reasoning OR tools-only mode (reasoning appears with tools)
+                // Both use the same tag-based pattern if markers are available
+                if (!m.reasoning_start.empty() && !m.reasoning_end.empty()) {
+                    reasoning = p.optional(m.reasoning_start + p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end);
+                }
+            } else if (analysis.reasoning == reasoning_mode::DELIMITER) {
+                reasoning = p.optional(p.reasoning(p.until(m.reasoning_end)) + m.reasoning_end);
+            }
+        }
+
+        bool has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+        bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
+
+        if (has_response_format) {
+            return reasoning + p.space() + p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + p.end();
+        }
+
+        if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && analysis.supports_tools) {
+            return build_tool_parser(p, analysis, inputs, reasoning);
+        }
+
+        if (analysis.content == content_mode::ALWAYS_WRAPPED &&
+            !m.content_start.empty() && !m.content_end.empty()) {
+
+            bool extracting_reasoning = extract_reasoning && enable_thinking && analysis.reasoning != reasoning_mode::NONE;
+
+            if (extracting_reasoning) {
+                return reasoning + m.content_start + p.content(p.until(m.content_end)) + m.content_end + p.end();
+            } 
+            return p.content(p.until(m.content_start)) + m.content_start +
+                    p.content(p.until(m.content_end)) + m.content_end + p.end();
+        }
+        return reasoning + p.content(p.rest()) + p.end();
+    });
+}
+
+common_peg_parser universal_peg_generator::build_tool_parser(
+        common_chat_peg_unified_builder & p,
+        const diff_analysis_result & analysis,
+        const templates_params & inputs,
+        const common_peg_parser & reasoning) {
+
+    const auto & m = analysis.markers;
+
+    // Build tool choice parser based on format
+    common_peg_parser tool_choice = p.choice();
+
+    if (analysis.tools == tool_format::JSON_NATIVE) {
+        // Pure JSON format: use standard_json_tools helper
+        // Build effective field names with dot notation if function_field is set
+        std::string name_field = analysis.name_field;
+        std::string args_field = analysis.args_field;
+
+        if (!analysis.function_field.empty() &&
+            analysis.function_field != "function" &&
+            name_field.find('.') == std::string::npos) {
+            name_field = analysis.function_field + "." + name_field;
+            args_field = analysis.function_field + "." + args_field;
+        }
+
+        auto tools_parser = p.standard_json_tools(
+            m.tool_section_start,
+            m.tool_section_end,
+            inputs.tools,
+            inputs.parallel_tool_calls,
+            inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED,
+            name_field,
+            args_field,
+            analysis.tools_array_wrapped,
+            analysis.fun_name_is_key,
+            analysis.id_field,
+            analysis.gen_id_field,
+            analysis.parameter_order
+        );
+
+        // Handle content wrappers if present
+        if (analysis.content == content_mode::ALWAYS_WRAPPED &&
+            !m.content_start.empty() && !m.content_end.empty()) {
+            auto wrapped_content = p.optional(m.content_start + p.content(p.until(m.content_end)) + m.content_end);
+            return reasoning + wrapped_content + tools_parser + p.end();
+        }
+
+        auto content_before_tools = m.tool_section_start.empty() ? p.eps() : p.until(m.tool_section_start);
+        return reasoning + p.optional(p.content(content_before_tools)) + tools_parser + p.end();
+    }
+
+    if (analysis.tools == tool_format::TAG_WITH_JSON) {
+        // Tag-based with JSON args: <function=name>{args}</function>
+        // With optional call_id: <function=name>[CALL_ID]id[ARGS]{args}</function>
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const auto & function = tool.at("function");
+            std::string  name     = function.at("name");
+            const auto & schema   = function.at("parameters");
+
+            // Build call_id parser based on position (if supported)
+            common_peg_parser call_id_section = p.eps();
+            if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
+                !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
+                // Optional call_id followed by required call_id_suffix (which is also args_start)
+                // Format: optional([CALL_ID] + call_id_value) + [ARGS]
+                call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
+            }
+
+            auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
+                               call_id_section +
+                               p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
+
+            if (!m.func_close.empty()) {
+                func_parser = func_parser + m.func_close;
+            }
+
+            tool_choice |= p.rule("tool-" + name, func_parser);
+        });
+
+        auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+        common_peg_parser tool_calls = p.eps();
+
+        if (!m.per_call_start.empty()) {
+            // Per-call wrapping: each call individually wrapped
+            auto wrapped_call = m.per_call_start + tool_choice + m.per_call_end;
+            if (inputs.parallel_tool_calls) {
+                tool_calls = p.trigger_rule("tool-call",
+                    wrapped_call + p.zero_or_more(p.space() + wrapped_call));
+            } else {
+                tool_calls = p.trigger_rule("tool-call", wrapped_call);
+            }
+            if (!m.tool_section_start.empty()) {
+                tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
+                    tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
+            }
+        } else {
+            std::string separator = m.call_separator;
+            if (separator.empty()) {
+                separator = ", ";  // Default
+            }
+
+            if (inputs.parallel_tool_calls) {
+                tool_calls = p.trigger_rule("tool-call",
+                    m.tool_section_start + tool_choice + p.zero_or_more(separator + tool_choice) + m.tool_section_end);
+            } else {
+                tool_calls = p.trigger_rule("tool-call",
+                    m.tool_section_start + tool_choice + m.tool_section_end);
+            }
+        }
+
+        if (!require_calls) {
+            tool_calls = p.optional(tool_calls);
+        }
+
+        std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
+        auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
+        return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
+    }
+
+    if (analysis.tools == tool_format::TAG_WITH_TAGGED) {
+        // Tag-based with tagged args: <function=name><param=key>value</param></function>
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const auto & function = tool.at("function");
+            std::string  name     = function.at("name");
+            const auto & params   = function.at("parameters");
+
+            if (!params.contains("properties") || !params.at("properties").is_object()) {
+                return;
+            }
+
+            const auto & properties = params.at("properties");
+            std::set<std::string> required;
+            if (params.contains("required") && params.at("required").is_array()) {
+                params.at("required").get_to(required);
+            }
+
+            // Build parser for each argument
+            std::vector<common_peg_parser> arg_parsers;
+            for (const auto & [param_name, param_schema] : properties.items()) {
+                bool is_required = required.find(param_name) != required.end();
+                auto type = param_schema.value("type", "object");
+
+                auto arg = p.tool_arg(
+                    p.tool_arg_open(m.arg_name_prefix + p.tool_arg_name(p.literal(param_name)) + m.arg_name_suffix) + m.arg_value_prefix +
+                    (type == "string" ?
+                        p.tool_arg_string_value(p.schema(p.until(m.arg_value_suffix),
+                            "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
+                        p.tool_arg_json_value(p.schema(p.json(),
+                            "tool-" + name + "-arg-" + param_name + "-schema", param_schema)) + p.space()) +
+                    p.tool_arg_close(p.literal(m.arg_value_suffix))
+                );
+
+                if (is_required) {
+                    arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
+                } else {
+                    arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
+                }
+            }
+
+            // Build arg sequence with space() between consecutive args
+            common_peg_parser args_seq = p.eps();
+            for (size_t i = 0; i < arg_parsers.size(); i++) {
+                if (i > 0) {
+                    args_seq = args_seq + p.space();
+                }
+                args_seq = args_seq + arg_parsers[i];
+            }
+
+            // Build call_id parser based on position (if supported)
+            common_peg_parser call_id_section = p.eps();
+            if (analysis.call_id_pos == call_id_position::BETWEEN_FUNC_AND_ARGS &&
+                !m.call_id_prefix.empty() && !m.call_id_suffix.empty()) {
+                // Optional call_id followed by required call_id_suffix
+                call_id_section = p.optional(m.call_id_prefix + p.tool_id(p.until(m.call_id_suffix))) + m.call_id_suffix;
+            }
+
+            auto func_parser = p.tool_open(m.func_name_prefix + p.tool_name(p.literal(name)) + m.func_name_suffix) +
+                               call_id_section +
+                               p.space() + args_seq;
+
+            if (!m.func_close.empty()) {
+                func_parser = func_parser + p.space() + p.tool_close(p.literal(m.func_close));
+            } else {
+                func_parser = func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper
+            }
+
+            tool_choice |= p.rule("tool-" + name, func_parser);
+        });
+
+        auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+        common_peg_parser tool_calls = p.eps();
+
+        if (!m.per_call_start.empty()) {
+            // Per-call wrapping: each call individually wrapped (e.g., <tool_call>...</tool_call>)
+            auto wrapped_call = m.per_call_start + p.space() + tool_choice + p.space() + m.per_call_end;
+            if (inputs.parallel_tool_calls) {
+                tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
+            } else {
+                tool_calls = p.trigger_rule("tool-call", wrapped_call);
+            }
+            if (!m.tool_section_start.empty()) {
+                tool_calls = p.trigger_rule("tool-calls", p.literal(m.tool_section_start) + p.space() +
+                    tool_calls + p.space() + (m.tool_section_end.empty() ? p.end() : p.literal(m.tool_section_end)));
+            }
+        } else {
+            std::string separator = m.call_separator;
+            if (separator.empty()) {
+                separator = ", ";  // Default
+            }
+
+            if (inputs.parallel_tool_calls) {
+                tool_calls = p.trigger_rule("tool-call",
+                    m.tool_section_start + p.space() + tool_choice + p.zero_or_more(separator + tool_choice) + p.space() + m.tool_section_end);
+            } else {
+                tool_calls = p.trigger_rule("tool-call",
+                    m.tool_section_start + p.space() + tool_choice + p.space() + m.tool_section_end);
+            }
+        }
+
+        if (!require_tools) {
+            tool_calls = p.optional(tool_calls);
+        }
+
+        std::string trigger_marker = !m.tool_section_start.empty() ? m.tool_section_start : m.per_call_start;
+        auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
+        return reasoning + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
+    }
+
+    GGML_ABORT("Unable to create tool parser");
+}
\ No newline at end of file
diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp
new file mode 100644
index 00000000000..4bf27f1dcbf
--- /dev/null
+++ b/common/chat-auto-parser-helpers.cpp
@@ -0,0 +1,386 @@
+#include "chat-auto-parser-helpers.h"
+
+#include "chat-diff-analyzer.h"
+#include "nlohmann/json.hpp"
+
+#include <cctype>
+
+using json = nlohmann::ordered_json;
+
+std::string trim_whitespace(const std::string & str) {
+    size_t start = 0;
+    while (start < str.length() && std::isspace(static_cast<unsigned char>(str[start]))) {
+        start++;
+    }
+
+    if (start == str.length()) {
+        return "";
+    }
+
+    size_t end = str.length() - 1;
+    while (end > start && std::isspace(static_cast<unsigned char>(str[end]))) {
+        end--;
+    }
+
+    return str.substr(start, end - start + 1);
+}
+
+std::string trim_leading_whitespace(const std::string & str) {
+    size_t start = 0;
+    while (start < str.length() && std::isspace(static_cast<unsigned char>(str[start]))) {
+        start++;
+    }
+
+    return str.substr(start);
+}
+
+std::string trim_trailing_whitespace(const std::string & str) {
+    if (str.empty()) {
+        return "";
+    }
+    
+    size_t end = str.length() - 1;
+    while (end > 0 && std::isspace(static_cast<unsigned char>(str[end]))) {
+        end--;
+    }
+    
+    // If first char is also whitespace, return empty string
+    if (end == 0 && std::isspace(static_cast<unsigned char>(str[0]))) {
+        return "";
+    }
+    
+    return str.substr(0, end + 1);
+}
+
+std::string trim_trailing_newlines(const std::string & str) {
+    size_t end = str.length();
+    while (end > 0 && str[end - 1] == '\n') {
+        end--;
+    }
+
+    return str.substr(0, end);
+}
+
+// Helper to find unmatched bracket/tag in a string
+// Finds an unmatched bracket in a string.
+// search_backwards=true:  finds unclosed opening bracket at end (returns bracket position)
+// search_backwards=false: finds unopened closing bracket at start (returns position after bracket)
+static size_t find_unmatched_bracket(const std::string & str, bool search_backwards) {
+    if (str.empty()) {
+        return std::string::npos;
+    }
+
+    // Compute iteration bounds and bracket types based on direction
+    const char * primary_brackets = search_backwards ? "<[" : ">]";
+
+    for (size_t i = 0; i < str.length(); ++i) {
+        // Map iteration index to actual position based on direction
+        size_t pos = search_backwards ? (str.length() - 1 - i) : i;
+        char   c   = str[pos];
+
+        // Check if this is a primary bracket we're looking for
+        if (c == primary_brackets[0] || c == primary_brackets[1]) {
+            // Get the matching bracket: < matches >, [ matches ], and vice versa
+            char match_bracket = (c == '<' || c == '>') ? (c == '<' ? '>' : '<') : (c == '[' ? ']' : '[');
+
+            // Search for matching bracket in the appropriate range
+            size_t inner_start = search_backwards ? (pos + 1) : 0;
+            size_t inner_end   = search_backwards ? str.length() : pos;
+            bool   found_match = false;
+
+            for (size_t j = inner_start; j < inner_end; ++j) {
+                if (str[j] == match_bracket) {
+                    found_match = true;
+                    break;
+                }
+            }
+
+            if (!found_match) {
+                return search_backwards ? pos : (pos + 1);
+            }
+        }
+    }
+
+    return std::string::npos;
+}
+
+static size_t find_unclosed_bracket_at_end(const std::string & str) {
+    return find_unmatched_bracket(str, true);
+}
+
+static size_t find_unopened_bracket_at_start(const std::string & str) {
+    return find_unmatched_bracket(str, false);
+}
+
+// Returns true if `s` contains an unmatched bracket.
+// search_backwards=true:  looks for opening bracket without matching closing after it
+// search_backwards=false: looks for closing bracket without matching opening before it
+static bool contains_unmatched_bracket(const std::string & s, char opening, char closing, bool search_backwards) {
+    if (s.empty()) {
+        return false;
+    }
+
+    char primary = search_backwards ? opening : closing;
+
+    for (size_t i = 0; i < s.length(); ++i) {
+        // Map iteration index to actual position based on direction
+        size_t pos = search_backwards ? (s.length() - 1 - i) : i;
+
+        if (s[pos] == primary) {
+            // Search for matching bracket in the appropriate range
+            size_t inner_start   = search_backwards ? (pos + 1) : 0;
+            size_t inner_end     = search_backwards ? s.length() : pos;
+            char   match_bracket = search_backwards ? closing : opening;
+            bool   found_match   = false;
+
+            for (size_t j = inner_start; j < inner_end; ++j) {
+                if (s[j] == match_bracket) {
+                    found_match = true;
+                    break;
+                }
+            }
+
+            if (!found_match) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+static bool contains_unopened_closing(const std::string & s, char opening, char closing) {
+    return contains_unmatched_bracket(s, opening, closing, false);
+}
+
+static bool contains_unclosed_opening(const std::string & s, char opening, char closing) {
+    return contains_unmatched_bracket(s, opening, closing, true);
+}
+
+// Moves incomplete tags from prefix/suffix into left/right parts
+// Only moves tags when we detect the split pattern in BOTH left and right
+static diff_split fix_tag_boundaries(diff_split result) {
+    // Check if prefix ends with an unclosed bracket/tag
+    // No fixed window: search the entire neighboring strings for matching brackets
+    size_t unclosed_pos = find_unclosed_bracket_at_end(result.prefix);
+    if (unclosed_pos != std::string::npos) {
+        char opening_bracket = result.prefix[unclosed_pos];
+        char closing_bracket = (opening_bracket == '<') ? '>' : ']';
+
+        // Look for the specific closing bracket that matches our opening bracket
+        bool left_has_pattern   = contains_unopened_closing(result.left, opening_bracket, closing_bracket);
+        bool right_has_pattern  = contains_unopened_closing(result.right, opening_bracket, closing_bracket);
+        bool suffix_has_pattern = contains_unopened_closing(result.suffix, opening_bracket, closing_bracket);
+
+        // Move the tag if both sides satisfy: has pattern OR is empty (and other has pattern)
+        // This handles cases like: left="" right="_begin|>..." or left="stuff>" right="stuff>"
+        bool left_satisfies  = left_has_pattern || (result.left.empty() && suffix_has_pattern);
+        bool right_satisfies = right_has_pattern || (result.right.empty() && suffix_has_pattern);
+
+        if (left_satisfies && right_satisfies) {
+            // Move the unclosed tag from prefix to left/right
+            std::string tag_part = result.prefix.substr(unclosed_pos);
+            result.prefix        = result.prefix.substr(0, unclosed_pos);
+            result.left          = tag_part + result.left;
+            result.right         = tag_part + result.right;
+        }
+    }
+
+    // Check if suffix starts with an unopened bracket/tag
+    size_t unopened_end = find_unopened_bracket_at_start(result.suffix);
+    if (unopened_end != std::string::npos) {
+        char closing_bracket =
+            result.suffix[unopened_end - 1];  // -1 because unopened_end is position after the bracket
+        char opening_bracket = (closing_bracket == '>') ? '<' : '[';
+
+        // Check if BOTH left and right have the pattern of unclosed opening bracket at the end
+        bool left_has_pattern   = contains_unclosed_opening(result.left, opening_bracket, closing_bracket);
+        bool right_has_pattern  = contains_unclosed_opening(result.right, opening_bracket, closing_bracket);
+        bool prefix_has_pattern = contains_unclosed_opening(result.prefix, opening_bracket, closing_bracket);
+
+        // Move the tag if both sides satisfy: has pattern OR is empty (and other has pattern)
+        bool left_satisfies  = left_has_pattern || (result.left.empty() && prefix_has_pattern);
+        bool right_satisfies = right_has_pattern || (result.right.empty() && prefix_has_pattern);
+
+        if (left_satisfies && right_satisfies) {
+            // Move the unopened tag from suffix to left/right
+            std::string tag_part = result.suffix.substr(0, unopened_end);
+            result.suffix        = result.suffix.substr(unopened_end);
+            result.left          = result.left + tag_part;
+            result.right         = result.right + tag_part;
+        }
+    }
+
+    return result;
+}
+
+diff_split calculate_diff_split(const std::string & left, const std::string & right) {
+    diff_split result;
+
+    // Find longest common prefix
+    size_t prefix_len = 0;
+    size_t min_len    = std::min(left.length(), right.length());
+    while (prefix_len < min_len && left[prefix_len] == right[prefix_len]) {
+        prefix_len++;
+    }
+    result.prefix = left.substr(0, prefix_len);
+
+    // Find longest common suffix, ending no later than the end of the longest common prefix
+    size_t suffix_len = 0;
+    while (suffix_len < min_len - prefix_len) {
+        size_t left_pos  = left.length() - 1 - suffix_len;
+        size_t right_pos = right.length() - 1 - suffix_len;
+
+        // Ensure we're not going into the prefix region
+        if (left_pos < prefix_len || right_pos < prefix_len) {
+            break;
+        }
+
+        if (left[left_pos] == right[right_pos]) {
+            suffix_len++;
+        } else {
+            break;
+        }
+    }
+    result.suffix = left.substr(left.length() - suffix_len);
+
+    // Extract the remainders (the parts between prefix and suffix)
+    result.left  = left.substr(prefix_len, left.length() - prefix_len - suffix_len);
+    result.right = right.substr(prefix_len, right.length() - prefix_len - suffix_len);
+
+    // Fix tag boundaries by moving incomplete tags to left/right
+    // We iterate because:
+    // 1. fix_tag_boundaries may move content from prefix/suffix to left/right
+    // 2. After that, we find common suffix in left/right to extract
+    // 3. The extracted suffix might contain tag parts that need fixing
+    // We apply fix AFTER suffix extraction to ensure incomplete tags aren't left in suffix
+    diff_split prev_result;
+    do {
+        prev_result = result;
+
+        // First, find and extract any common suffix from left/right
+        size_t suffix_len = 0;
+        size_t min_len    = std::min(result.left.length(), result.right.length());
+        while (suffix_len < min_len) {
+            size_t left_pos  = result.left.length() - 1 - suffix_len;
+            size_t right_pos = result.right.length() - 1 - suffix_len;
+            if (result.left[left_pos] == result.right[right_pos]) {
+                suffix_len++;
+            } else {
+                break;
+            }
+        }
+
+        if (suffix_len > 0) {
+            std::string common_suffix = result.left.substr(result.left.length() - suffix_len);
+            result.suffix             = common_suffix + result.suffix;
+            result.left               = result.left.substr(0, result.left.length() - suffix_len);
+            result.right              = result.right.substr(0, result.right.length() - suffix_len);
+        }
+
+        // Then apply fix_tag_boundaries to move incomplete tags from prefix/suffix to left/right
+        result = fix_tag_boundaries(result);
+
+    } while (!(result == prev_result) && result.left != left && result.right != right);
+
+    return result;
+}
+
+// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right`
+std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right) {
+    // Find the common prefix of left and right
+    size_t common_prefix_len = 0;
+    size_t min_len           = std::min(left.length(), right.length());
+    while (common_prefix_len < min_len && left[common_prefix_len] == right[common_prefix_len]) {
+        common_prefix_len++;
+    }
+
+    // If there's no common prefix, return empty string
+    if (common_prefix_len == 0) {
+        return "";
+    }
+
+    // Find the common prefix in the full string
+    std::string common_prefix = left.substr(0, common_prefix_len);
+    size_t      pos           = full.find(common_prefix);
+
+    // If not found, return empty string
+    if (pos == std::string::npos) {
+        return "";
+    }
+
+    // Return everything before the common prefix
+    return full.substr(0, pos);
+}
+
+// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right`
+std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right) {
+    // Find the common suffix of left and right (compare from the end)
+    size_t common_suffix_len = 0;
+    size_t min_len           = std::min(left.length(), right.length());
+    while (common_suffix_len < min_len &&
+           left[left.length() - 1 - common_suffix_len] == right[right.length() - 1 - common_suffix_len]) {
+        common_suffix_len++;
+    }
+
+    // If there's no common suffix, return empty string
+    if (common_suffix_len == 0) {
+        return "";
+    }
+
+    // Extract the common suffix
+    std::string common_suffix = left.substr(left.length() - common_suffix_len);
+
+    // Find the last occurrence of the common suffix in the full string
+    size_t pos = full.rfind(common_suffix);
+
+    // If not found, return empty string
+    if (pos == std::string::npos) {
+        return "";
+    }
+
+    // Return everything after the common suffix
+    return full.substr(pos + common_suffix_len);
+}
+
+std::vector<segment> segmentize_markers(const std::string & text) {
+    std::vector<segment> retval;
+    bool in_marker = false;
+    char marker_opener = '\0';
+
+    auto is_marker_opener = [](char c) -> bool { return c == '<' || c == '['; };
+    auto is_marker_closer = [](char op, char c) -> bool { return (op == '<' && c == '>') || (op == '[' && c == ']'); };
+
+    size_t last_border = 0;
+
+    for (size_t cur_pos = 0; cur_pos < text.length(); cur_pos++) {
+        if (!in_marker && is_marker_opener(text[cur_pos])) {
+            if (last_border < cur_pos) {
+                retval.push_back(segment(segment_type::TEXT, text.substr(last_border, cur_pos - last_border)));
+            }
+            last_border = cur_pos;
+            in_marker = true;
+            marker_opener = text[cur_pos];
+        } else if (in_marker && is_marker_closer(marker_opener, text[cur_pos])) {
+            // no need to check because last_border will always be smaller
+                retval.push_back(segment(segment_type::MARKER, text.substr(last_border, cur_pos - last_border + 1)));
+            last_border = cur_pos + 1;
+            in_marker = false;
+            marker_opener = '\0';
+        }
+    }
+    if (last_border < text.length()) {
+            retval.push_back(segment(segment_type::TEXT, text.substr(last_border)));
+    }
+    return retval;
+}
+
+std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments) {
+    std::vector<segment> result;
+    for (const auto & seg : segments) {
+        if (!trim_whitespace(seg.value).empty()) {
+            result.push_back(seg);
+        }
+    }
+    return result;
+}
+
diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h
new file mode 100644
index 00000000000..445119be8e0
--- /dev/null
+++ b/common/chat-auto-parser-helpers.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "chat-diff-analyzer.h"
+#include <string>
+
+std::string trim_whitespace(const std::string & str);
+std::string trim_leading_whitespace(const std::string & str);
+std::string trim_trailing_whitespace(const std::string & str);
+std::string trim_trailing_newlines(const std::string & str);
+
+// calculate a diff split (longest common prefix, longest common suffix excluding prefix,
+// mismatched part on the left, mismatched part on the right) between two strings
+diff_split calculate_diff_split(const std::string & left, const std::string & right);
+
+// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right`
+std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right);
+
+// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right`
+std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right);
+
+// Segmentize text into markers and non-marker fragments
+std::vector<segment> segmentize_markers(const std::string & text);
+
+// Prune whitespace-only segments from a vector of segments
+std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments);
\ No newline at end of file
diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h
new file mode 100644
index 00000000000..c6587667d16
--- /dev/null
+++ b/common/chat-auto-parser.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#include "chat-diff-analyzer.h"
+#include "chat.h"
+#include "chat-peg-parser.h"
+#include "common.h"
+
+#include <chrono>
+#include <string>
+
+using json = nlohmann::ordered_json;
+
+struct templates_params {
+    json                                  messages;
+    json                                  tools;
+    common_chat_tool_choice               tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+    json                                  json_schema;
+    bool                                  parallel_tool_calls = true;
+    common_reasoning_format               reasoning_format    = COMMON_REASONING_FORMAT_AUTO;
+    bool                                  stream              = true;
+    std::string                           grammar;
+    bool                                  add_generation_prompt = false;
+    bool                                  enable_thinking       = true;
+    std::chrono::system_clock::time_point now                   = std::chrono::system_clock::now();
+    json                                  extra_context;
+    bool                                  add_bos       = false;
+    bool                                  add_eos       = false;
+    bool                                  is_inference  = true;
+    bool                                  add_inference = false;
+    bool                                  mark_input    = true;  // whether to mark input strings in the jinja context
+};
+
+class universal_peg_generator {
+  public:
+    static common_chat_params generate_parser(const common_chat_template &    tmpl,
+                                              const struct templates_params & inputs);
+
+    static common_chat_params generate_parser(const common_chat_template &    tmpl,
+                                              const struct templates_params & inputs,
+                                              const diff_analysis_result &    analysis);
+
+  private:
+    // Build unified parser (single code path for all formats)
+    static common_peg_arena build_parser(const diff_analysis_result &    analysis,
+                                         const struct templates_params & inputs,
+                                         bool                            thinking_forced_open,
+                                         bool                            thinking_forced_closed = false);
+
+    // Build tool calling parser based on detected format
+    static common_peg_parser build_tool_parser(common_chat_peg_unified_builder & p,
+                                               const diff_analysis_result &      analysis,
+                                               const templates_params &           inputs,
+                                               const common_peg_parser &          reasoning);
+};
diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp
new file mode 100644
index 00000000000..0082e3ab772
--- /dev/null
+++ b/common/chat-diff-analyzer.cpp
@@ -0,0 +1,1672 @@
+#include "chat-diff-analyzer.h"
+
+#include "chat-auto-parser-helpers.h"
+#include "chat-auto-parser.h"
+#include "chat.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+
+#include <algorithm>
+#include <cctype>
+
+#define ANSI_RESET  "\033[0m"
+#define ANSI_PURPLE "\033[1m\x1b[38;5;126m"
+#define ANSI_ORANGE "\033[1m\x1b[38;5;214m"
+#define ANSI_RED    "\033[1m\x1b[38;5;196m"
+
+using json = nlohmann::ordered_json;
+
+static std::vector<std::function<void(const common_chat_template & tmpl, diff_analysis_result &)>> workarounds(
+    { // Old reasoning Qwen templates - they don't really display reasoning content, but we still want to
+      // support reasoning on them
+      [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void {
+          if (tmpl.src.find("content.split('</think>')") != std::string::npos &&
+              analysis.reasoning == reasoning_mode::NONE) {
+              analysis.reasoning               = reasoning_mode::FORCED_OPEN;
+              analysis.markers.reasoning_start = "<think>";
+              analysis.markers.reasoning_end   = "</think>";
+              analysis.preserved_tokens.push_back("<think>");
+              analysis.preserved_tokens.push_back("</think>");
+              LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET);
+          }
+      },
+      // Granite 3.3, with separate reasoning and content markers
+      [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void {
+          if (tmpl.src.find("Write your thoughts between <think></think> and write your response between "
+                            "<response></response>") != std::string::npos) {
+              analysis.reasoning               = reasoning_mode::TAG_BASED;
+              analysis.markers.reasoning_start = "<think>";
+              analysis.markers.reasoning_end   = "</think>";
+              analysis.preserved_tokens.push_back("<think>");
+              analysis.preserved_tokens.push_back("</think>");
+              analysis.content               = content_mode::WRAPPED_WITH_REASONING;
+              analysis.markers.content_start = "<response>";
+              analysis.markers.content_end   = "</response>";
+              analysis.preserved_tokens.push_back("<response>");
+              analysis.preserved_tokens.push_back("</response>");
+              LOG_DBG(ANSI_ORANGE "[Patch: Granite 3.3]\n" ANSI_RESET);
+          }
+      },
+      // Cohere Command R+ - content wrapped in <|CHATBOT_TOKEN|>...<|END_OF_TURN_TOKEN|>
+      [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void {
+          if (tmpl.src.find("<|CHATBOT_TOKEN|>") != std::string::npos &&
+              tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && analysis.markers.content_start.empty()) {
+              analysis.content               = content_mode::ALWAYS_WRAPPED;
+              analysis.markers.content_start = "<|CHATBOT_TOKEN|>";
+              analysis.markers.content_end   = "<|END_OF_TURN_TOKEN|>";
+              analysis.preserved_tokens.push_back("<|CHATBOT_TOKEN|>");
+              analysis.preserved_tokens.push_back("<|END_OF_TURN_TOKEN|>");
+              LOG_DBG(ANSI_ORANGE "[Patch: Cohere Command R+]\n" ANSI_RESET);
+          }
+      },
+      // Functionary - no tool call section delimiter
+      [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void {
+          if (tmpl.src.find("set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", "
+                            "\"code_interpreter\") | list | length > 0") != std::string::npos) {
+              analysis.content                    = content_mode::PLAIN;
+              analysis.markers.content_end        = "";
+              analysis.markers.func_name_prefix   = "";
+              analysis.markers.tool_section_start = "";
+              analysis.markers.tool_section_end   = "";
+              analysis.markers.per_call_start     = "<function=";
+              analysis.markers.per_call_end       = "</function>";
+              analysis.markers.func_close         = "";
+              analysis.preserved_tokens.clear();
+              analysis.preserved_tokens.push_back("<|eot_id|>");
+              analysis.preserved_tokens.push_back("<|eom_id|>");
+              analysis.preserved_tokens.push_back("<function=");
+              analysis.preserved_tokens.push_back(">");
+              analysis.preserved_tokens.push_back("</function>");
+              LOG_DBG(ANSI_ORANGE "[Patch: Functionary 3.1]\n" ANSI_RESET);
+          }
+      },
+      // DeepSeek-R1-Distill-Qwen
+      [](const common_chat_template & tmpl, diff_analysis_result & analysis) -> void {
+          if (tmpl.src.find(
+                  "{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>'") !=
+              std::string::npos) {
+              analysis.markers.tool_section_start = "<｜tool▁calls▁begin｜>";
+              analysis.markers.tool_section_end   = "<｜tool▁calls▁end｜>";
+              analysis.markers.per_call_start     = "<｜tool▁call▁begin｜>function";
+              analysis.markers.func_name_prefix   = "<｜tool▁sep｜>";
+              analysis.markers.per_call_end       = "<｜tool▁call▁end｜>";
+              analysis.markers.func_close         = "```";
+          }
+      } });
+
+// Common JSON structures
+static json params_schema = {
+    { "type",       "object"                                                           },
+    { "properties",
+     { { "first", { { "type", "string" }, { "description", "First argument" } } },
+        { "second", { { "type", "string" }, { "description", "Second argument" } } } } },
+    { "required",   json::array({})                                                    }
+};
+
+static json tools = json::array({
+    { { "type", "function" },
+     { "function",
+        json{ { "name", "foofoo" }, { "description", "Test function foo" }, { "parameters", params_schema } } } },
+    { { "type", "function" },
+     { "function",
+        json{ { "name", "barbar" }, { "description", "Test function bar" }, { "parameters", params_schema } } } }
+});
+
+static json user_msg = json{
+    { "role",    "user"  },
+    { "content", "Hello" }
+};
+
+static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call00001") {
+    return json{
+        { "id",       id                                              },
+        { "type",     "function"                                      },
+        { "function", json{ { "name", name }, { "arguments", args } } }
+    };
+}
+
+static json first_tool_call_zero_args         = build_tool_call("foofoo", json::object(), "call00001");
+static json first_tool_call_one_arg           = build_tool_call("foofoo",
+                                                                json{
+                                                                    { "first", "XXXX" }
+},
+                                                                "call00001");
+static json first_tool_call_one_arg_other_val = build_tool_call("foofoo",
+                                                                json{
+                                                                    { "first", "YYYY" }
+},
+                                                                "call00001");
+static json first_tool_call_other_arg         = build_tool_call("foofoo",
+                                                                json{
+                                                                    { "second", "YYYY" }
+},
+                                                                "call00001");
+static json first_tool_call                   = build_tool_call("foofoo",
+                                                                json{
+                                                                    { "first",  "XXXX" },
+                                                                    { "second", "YYYY" }
+},
+                                                                "call00001");
+static json second_tool_call                  = build_tool_call("barbar",
+                                                                json{
+                                                                    { "first",  "XXXX" },
+                                                                    { "second", "YYYY" }
+},
+                                                                "call00002");
+// Tool call variants with different IDs for call_id detection
+static json first_tool_call_alt_id            = build_tool_call("foofoo",
+                                                                json{
+                                                                    { "first",  "XXXX" },
+                                                                    { "second", "YYYY" }
+},
+                                                                "call99999");
+
+std::string differential_analyzer::apply_template(const common_chat_template & tmpl, const template_params & params) {
+    templates_params tmpl_params;
+    tmpl_params.messages              = params.messages;
+    tmpl_params.tools                 = params.tools;
+    tmpl_params.add_generation_prompt = params.add_generation_prompt;
+    tmpl_params.enable_thinking       = params.enable_thinking;
+
+    if (params.extra_context) {
+        tmpl_params.extra_context = *params.extra_context;
+    }
+    tmpl_params.extra_context["enable_thinking"] = params.enable_thinking;
+
+    try {
+        return common_chat_template_direct_apply(tmpl, tmpl_params);
+    } catch (const std::exception & e) {
+        LOG_DBG("Template application failed: %s\n", e.what());
+        return "";
+    }
+}
+
+std::optional<compare_variants_result> differential_analyzer::compare_variants(
+    const common_chat_template &                   tmpl,
+    const template_params &                        params_A,
+    const std::function<void(template_params &)> & params_modifier) {
+    // Create variant B by copying A
+    template_params params_B = params_A;
+
+    // Apply modifier to create variant B
+    if (params_modifier) {
+        params_modifier(params_B);
+    }
+    
+
+    // Apply template to both variants
+    std::string output_A = apply_template(tmpl, params_A);
+    std::string output_B = apply_template(tmpl, params_B);
+
+    // Check for template application failures
+    if (output_A.empty() || output_B.empty()) {
+        return std::nullopt;
+    }
+
+    // Calculate diff and return result with both outputs
+    compare_variants_result result;
+    result.diff     = calculate_diff_split(output_A, output_B);
+    result.output_A = output_A;
+    result.output_B = output_B;
+
+    return result;
+}
+
+diff_analysis_result differential_analyzer::analyze(const common_chat_template & tmpl) {
+    diff_analysis_result result;
+
+    LOG_DBG(ANSI_PURPLE "=== Starting differential analysis ===\n" ANSI_RESET);
+
+    auto caps                      = tmpl.original_caps();
+    result.supports_tools          = caps.supports_tools || caps.supports_tool_calls;
+    result.supports_parallel_calls = caps.supports_parallel_tool_calls;
+
+    analyze_reasoning(tmpl, result);
+    analyze_content(tmpl, result);
+    if (result.supports_tools) {
+        analyze_tools(tmpl, result);
+    }
+    collect_preserved_tokens(result);
+
+    for (auto & workaround : workarounds) {
+        workaround(tmpl, result);
+    }
+
+    LOG_DBG(ANSI_PURPLE "=== Differential analysis complete ===\n" ANSI_RESET);
+
+    return result;
+}
+
+void differential_analyzer::analyze_reasoning(const common_chat_template & tmpl, diff_analysis_result & result) {
+    LOG_DBG(ANSI_ORANGE "Phase 1: Reasoning analysis\n" ANSI_RESET);
+
+    compare_reasoning_presence(tmpl, result);
+    compare_thinking_enabled(tmpl, result);
+    if (result.supports_tools) {
+        compare_reasoning_scope(tmpl, result);
+    }
+}
+
+void differential_analyzer::compare_reasoning_presence(const common_chat_template & tmpl,
+                                                       diff_analysis_result &       result) {
+    json user_msg = json{
+        { "role",    "user"  },
+        { "content", "Hello" }
+    };
+
+    json assistant_no_reasoning = json{
+        { "role",    "assistant"   },
+        { "content", "I can help." }
+    };
+
+    json assistant_with_reasoning = json{
+        { "role",              "assistant"                },
+        { "content",           "I can help."              },
+        { "reasoning_content", "Let me think about this." }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_no_reasoning });
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_reasoning }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "R1: Template application failed, skipping reasoning detection\n" ANSI_RESET);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    LOG_DBG(ANSI_ORANGE "R1 diff - suffix: " ANSI_RESET "'%s', " ANSI_ORANGE "left: " ANSI_RESET "'%s', " ANSI_ORANGE
+                        "right: " ANSI_ORANGE "'%s'\n" ANSI_RESET,
+            diff.suffix.c_str(), diff.left.c_str(), diff.right.c_str());
+
+    const std::string reasoning_content = "Let me think about this.";
+
+    if (!diff.right.empty() && diff.right.find(reasoning_content) != std::string::npos) {
+        auto seg = prune_whitespace_segments(segmentize_markers(diff.right));
+        if (seg.size() >= 3 && trim_whitespace(seg[1].value) == reasoning_content) {
+            // easy one: opening marker - reasoning - closing marker (possibly with trailing whitespace)
+            result.reasoning               = reasoning_mode::TAG_BASED;
+            result.markers.reasoning_start = trim_whitespace(seg[0].value);
+            result.markers.reasoning_end   = trim_leading_whitespace(seg[2].value);
+            for (size_t i = 3; i < seg.size(); i++) {
+                result.markers.reasoning_end += seg[i].value;
+            }
+            // we always truncate because this doesn't really influence correctness but model might not always generate newline
+            result.markers.reasoning_end = trim_whitespace(result.markers.reasoning_end);
+        } else if (seg.size() >= 2 && trim_whitespace(seg[0].value) == reasoning_content) {
+            // delimited
+            result.reasoning             = reasoning_mode::DELIMITER;
+            result.markers.reasoning_end = trim_leading_whitespace(seg[1].value);
+            for (size_t i = 2; i < seg.size(); i++) {
+                result.markers.reasoning_end += seg[i].value;
+            }
+            result.markers.reasoning_end = trim_whitespace(result.markers.reasoning_end);
+        } else if (seg.size() == 1 && trim_whitespace(seg[0].value) == reasoning_content) {
+            // the marker might be in the prefix actually, let's check for case of
+            // left: empty
+            // right: reasoning_content
+            // suffix: <closing marker>content
+            // prefix: ...<opening marker>
+            auto suf_seg = prune_whitespace_segments(segmentize_markers(diff.suffix));
+            if (trim_whitespace(diff.left).empty() && suf_seg.size() >= 2 && suf_seg[0].type == segment_type::MARKER &&
+                trim_whitespace(suf_seg[1].value).substr(0, 11) == "I can help.") {
+                auto pre_seg = prune_whitespace_segments(segmentize_markers(diff.prefix));
+                if (pre_seg[pre_seg.size() - 1].type == segment_type::MARKER ||
+                    (pre_seg.size() > 1 && trim_whitespace(pre_seg[pre_seg.size() - 1].value).empty() &&
+                     pre_seg[pre_seg.size() - 2].type == segment_type::MARKER)) {
+                    auto marker_seg = pre_seg[pre_seg.size() - 1];
+                    if (marker_seg.type == segment_type::TEXT) {
+                        marker_seg = pre_seg[pre_seg.size() - 2];
+                    }
+                    result.reasoning               = reasoning_mode::FORCED_CLOSED;
+                    result.markers.reasoning_start = trim_whitespace(marker_seg.value);
+                    result.markers.reasoning_end   = trim_whitespace(suf_seg[0].value);
+                }
+            }
+        }
+    }
+}
+
+void differential_analyzer::compare_thinking_enabled(const common_chat_template & tmpl, diff_analysis_result & result) {
+    json user_msg = json{
+        { "role",    "user"  },
+        { "content", "Hello" }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg });
+    params.add_generation_prompt = true;
+    params.enable_thinking       = false;
+
+    auto comparison = compare_variants(tmpl, params, [&](template_params & p) { p.enable_thinking = true; });
+
+    if (!comparison) {
+        LOG_DBG("R2: Template application failed\n");
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    LOG_DBG("R2 diff - suffix: '%s', left: '%s', right: '%s'\n", diff.suffix.c_str(), diff.left.c_str(),
+            diff.right.c_str());
+
+    std::string left_trimmed = diff.left;
+    trim_whitespace(left_trimmed);
+
+    if (left_trimmed.empty() && !diff.right.empty()) {
+        std::string right_trimmed = diff.right;
+        trim_whitespace(right_trimmed);
+
+        if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) {
+            if (result.markers.reasoning_start.empty()) {
+                result.markers.reasoning_start = right_trimmed;
+                result.reasoning               = reasoning_mode::FORCED_OPEN;
+                LOG_DBG("R2: Detected forced-open reasoning with start marker: '%s'\n", right_trimmed.c_str());
+            }
+        }
+    }
+
+    if (result.markers.reasoning_start.empty() && !result.markers.reasoning_end.empty()) {
+        result.reasoning = reasoning_mode::DELIMITER;
+        LOG_DBG("R2: Delimiter-based reasoning detected (empty start, end: '%s')\n",
+                result.markers.reasoning_end.c_str());
+    }
+
+    // Check for FORCED_CLOSED: when enable_thinking=false produces both start and end markers,
+    // but enable_thinking=true produces only the start marker
+    if (!comparison->output_A.empty() && !comparison->output_B.empty()) {
+        std::string output_A = comparison->output_A;  // enable_thinking=false
+        std::string output_B = comparison->output_B;  // enable_thinking=true
+
+        // Both should end with the assistant role marker
+        // Check if output_A has both reasoning_start and reasoning_end markers
+        // while output_B has only reasoning_start
+        if (!result.markers.reasoning_start.empty()) {
+            // Check if output_A contains both start and end markers
+            bool A_has_start = output_A.find(result.markers.reasoning_start) != std::string::npos;
+            bool A_has_end   = !result.markers.reasoning_end.empty() &&
+                             output_A.find(result.markers.reasoning_end) != std::string::npos;
+
+            // Check if output_B contains only the start marker (and not the end marker)
+            bool B_has_start = output_B.find(result.markers.reasoning_start) != std::string::npos;
+            bool B_has_end   = !result.markers.reasoning_end.empty() &&
+                             output_B.find(result.markers.reasoning_end) != std::string::npos;
+
+            // For FORCED_CLOSED: A should have both, B should have only start
+            if (A_has_start && A_has_end && B_has_start && !B_has_end) {
+                result.reasoning = reasoning_mode::FORCED_CLOSED;
+                LOG_DBG("R2: Detected forced-closed reasoning\n");
+            }
+        } else if (!result.markers.reasoning_end.empty()) {
+            // We might not have detected the reasoning open marker until now,
+            // but this is another chance to do so
+            auto diff    = comparison->diff;
+            auto diff_rt = trim_whitespace(diff.right);
+            auto diff_lt = trim_whitespace(diff.left);
+            if (diff_rt.empty() && diff_lt == result.markers.reasoning_end) {
+                auto seg = segmentize_markers(trim_whitespace(diff.prefix));
+                if (!seg.empty() && seg[seg.size() - 1].type == MARKER) {  // this is FORCED_CLOSED
+                    result.markers.reasoning_start = seg[seg.size() - 1].value;
+                    result.reasoning               = reasoning_mode::FORCED_CLOSED;
+                }
+            }
+        }
+    }
+
+    // Check for slash-in-tag pattern: <think> vs </think>
+    // diff shows: suffix="think>", left="/", right="" (or vice versa)
+    if (result.markers.reasoning_start.empty() && result.markers.reasoning_end.empty()) {
+        if (diff.right.empty() && trim_whitespace(diff.left) == "/") {
+            auto seg_A = segmentize_markers(trim_trailing_whitespace(comparison->output_A));
+            auto seg_B = segmentize_markers(trim_trailing_whitespace(comparison->output_B));
+            if (!seg_A.empty() && !seg_B.empty() && seg_A[seg_A.size() - 1].type == segment_type::MARKER &&
+                seg_B[seg_B.size() - 1].type == segment_type::MARKER) {
+                result.reasoning               = reasoning_mode::FORCED_CLOSED;
+                result.markers.reasoning_start = seg_B[seg_B.size() - 1].value;
+                result.markers.reasoning_end   = seg_A[seg_A.size() - 1].value;
+            }
+        }
+    }
+}
+
+void differential_analyzer::compare_reasoning_scope(const common_chat_template & tmpl, diff_analysis_result & result) {
+    json assistant_reasoning_content = json{
+        { "role",              "assistant"            },
+        { "content",           "Here is my response." },
+        { "reasoning_content", "Let me think."        }
+    };
+
+    json assistant_reasoning_tools = json{
+        { "role",              "assistant"                                                             },
+        { "content",           nullptr                                                                 },
+        { "reasoning_content", "Let me think."                                                         },
+        { "tool_calls",
+         json::array({ build_tool_call("foofoo", json{ { "first", "VVVV" }, { "second", "XXXX" } }) }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_reasoning_content });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_reasoning_tools }); });
+
+    if (!comparison) {
+        LOG_DBG("R3: Template application failed\n");
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    std::string reasoning_content = "Let me think.";
+
+    LOG_DBG("R3 diff - prefix: '%s', suffix: '%s', left: '%s', right: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str(),
+            diff.left.c_str(), diff.right.c_str());
+
+    // Check if reasoning only appears in variant B (with tools)
+    bool reasoning_in_A = comparison->output_A.find(reasoning_content) != std::string::npos;
+    bool reasoning_in_B = comparison->output_B.find(reasoning_content) != std::string::npos;
+
+    if (!reasoning_in_A && reasoning_in_B) {
+        result.reasoning = reasoning_mode::TOOLS_ONLY;
+        LOG_DBG("R3: Detected TOOLS_ONLY reasoning mode\n");
+
+        // Extract reasoning markers from output_B
+        // The reasoning_content is "Let me think."
+        size_t reasoning_pos = comparison->output_B.find(reasoning_content);
+        if (reasoning_pos != std::string::npos) {
+            // Find start marker before reasoning_content
+            std::string before_reasoning = comparison->output_B.substr(0, reasoning_pos);
+            before_reasoning             = trim_trailing_whitespace(before_reasoning);
+            auto segments_before         = segmentize_markers(before_reasoning);
+            std::reverse(segments_before.begin(), segments_before.end());
+
+            for (auto & segment : segments_before) {
+                if (segment.type == segment_type::MARKER) {
+                    result.markers.reasoning_start = segment.value;
+                    LOG_DBG("R3: Found reasoning_start: '%s'\n", result.markers.reasoning_start.c_str());
+                    break;
+                }
+            }
+
+            // Find end marker after reasoning_content
+            size_t      reasoning_end   = reasoning_pos + reasoning_content.length();
+            std::string after_reasoning = comparison->output_B.substr(reasoning_end);
+            after_reasoning             = trim_leading_whitespace(after_reasoning);
+
+            if (!after_reasoning.empty()) {
+                // Try to find matching end marker
+                if (!result.markers.reasoning_start.empty()) {
+                    auto segments = segmentize_markers(after_reasoning);
+                    for (auto & segment : segments) {
+                        if (segment.type == segment_type::MARKER) {
+                            result.markers.reasoning_end = segment.value;
+                            break;
+                        }
+                    }
+                    if (!result.markers.reasoning_end.empty()) {
+                        LOG_DBG("R3: Found reasoning_end (matched): '%s'\n", result.markers.reasoning_end.c_str());
+                    }
+                }
+            }
+        }
+    }
+}
+
+void differential_analyzer::analyze_content(const common_chat_template & tmpl, diff_analysis_result & result) {
+    LOG_DBG(ANSI_ORANGE "Phase 2: Content analysis\n" ANSI_RESET);
+
+    compare_content_values(tmpl, result);
+}
+
+void differential_analyzer::compare_content_values(const common_chat_template & tmpl, diff_analysis_result & result) {
+    json assistant_content_only = json{
+        { "role",    "assistant"     },
+        { "content", "Response text" }
+    };
+
+    json assistant_with_tools = json{
+        { "role",       "assistant"                                                                 },
+        { "content",    ""                                                                          },
+        { "tool_calls", json::array({ build_tool_call("test_func", json{ { "arg1", "value1" } }) }) }
+    };
+
+    json assistant_with_reasoning = json{
+        { "role",              "assistant"     },
+        { "content",           ""              },
+        { "reasoning_content", "Need to think" }
+    };
+
+    template_params params_content_only;
+    params_content_only.messages              = json::array({ user_msg, assistant_content_only });
+    params_content_only.add_generation_prompt = false;
+    params_content_only.enable_thinking       = true;
+    params_content_only.tools                 = tools;
+
+    auto comparison_with_tools = compare_variants(tmpl, params_content_only, [&](template_params & p) {
+        p.messages = json::array({ user_msg, assistant_with_tools });
+    });
+
+    auto comparison_with_reasoning = compare_variants(tmpl, params_content_only, [&](template_params & p) {
+        p.messages = json::array({ user_msg, assistant_with_reasoning });
+    });
+
+    if (!comparison_with_tools || !comparison_with_reasoning) {
+        LOG_DBG("C1: Template application failed\n");
+        return;
+    }
+
+    const auto & diff_tools     = comparison_with_tools->diff;
+    const auto & diff_reasoning = comparison_with_reasoning->diff;
+
+    std::string response = "Response text";
+
+    bool found_plain_content = false;
+    if (trim_whitespace(diff_tools.left) == response) {
+        auto segments = segmentize_markers(diff_reasoning.left);
+        if (trim_whitespace(diff_reasoning.left) == response ||
+            (segments.size() == 2 && trim_whitespace(segments[0].value) == response)) {
+            // We only have the content text in the diff (possibly with a stray EOG marker), so no markers
+            LOG_DBG("C1: No content markers\n");
+            result.content      = content_mode::PLAIN;
+            found_plain_content = true;
+        } else if (result.reasoning != reasoning_mode::NONE && !result.markers.reasoning_end.empty() &&
+                   diff_reasoning.left.find(result.markers.reasoning_end) != std::string::npos) {
+            std::string post_closed_reasoning = diff_reasoning.left.substr(
+                diff_reasoning.left.find(result.markers.reasoning_end) + result.markers.reasoning_end.length());
+            if (trim_whitespace(post_closed_reasoning) == "Response text") {
+                LOG_DBG("C1: No content markers after stripping reasoning close marker\n");
+                result.content      = content_mode::PLAIN;
+                found_plain_content = true;
+            }
+        }
+    }
+    if (!found_plain_content) {
+        std::string rdiff = diff_reasoning.left;
+        if (!result.markers.reasoning_end.empty() && rdiff.find(result.markers.reasoning_end) != std::string::npos) {
+            rdiff = rdiff.substr(rdiff.find(result.markers.reasoning_end) + result.markers.reasoning_end.length());
+        }
+        // Take the more promising diff
+        std::string pure_content = rdiff.length() > diff_tools.left.length() ? rdiff : diff_tools.left;
+        size_t      pos          = pure_content.find("Response text");
+        if (pos == std::string::npos) {
+            LOG_DBG("C1: Error: response text not found - improper template application?");
+            return;
+        }
+        result.markers.content_start = trim_leading_whitespace(pure_content.substr(0, pos));
+        result.markers.content_end =
+            trim_leading_whitespace(pure_content.substr(pos + 13));  // 13 - len of "Response text"
+        // TODO: WRAPPED_WITH_REASONING
+    }
+
+    // Determine content mode
+    if (!result.markers.content_start.empty() || !result.markers.content_end.empty()) {
+        result.content = content_mode::ALWAYS_WRAPPED;
+        LOG_DBG("C1: Content is ALWAYS_WRAPPED\n");
+        // TODO: END_DELIMITED content mode - delimited at end but not at start?
+    }
+}
+
+void differential_analyzer::analyze_tool_call_format(const std::string &    haystack,
+                                                     const std::string &    fun_name_needle,
+                                                     const std::string &    arg_name_needle,
+                                                     diff_analysis_result & result) {
+    if (fun_name_needle.empty() || arg_name_needle.empty() || haystack.empty()) {
+        return;
+    }
+
+    auto in_json_haystack = [&haystack](const std::string & needle) -> bool {
+        // Find the needle in the haystack
+        size_t needle_pos = haystack.find(needle);
+        if (needle_pos == std::string::npos) {
+            return false;
+        }
+        if (needle_pos < 2) {
+            return false;  // not enough space for a JSON structure
+        }
+        if (haystack[needle_pos - 1] == '\'' || haystack[needle_pos - 1] == '"') {
+            int cur = needle_pos - 2;
+            for (; cur >= 0 && std::isspace(haystack[cur]); cur--) {
+            }
+            if (haystack[cur] == ':' || haystack[cur] == '{') {
+                return true;
+            }
+        }
+        return false;
+    };
+
+    if (in_json_haystack(fun_name_needle)) {
+        // no need to check further, we're in JSON land
+        result.tools = tool_format::JSON_NATIVE;
+    } else if (in_json_haystack(arg_name_needle)) {
+        result.tools = tool_format::TAG_WITH_JSON;
+    } else {
+        result.tools = tool_format::TAG_WITH_TAGGED;
+    }
+
+    // first, remove any reasoning markers
+    std::string clean_haystack = haystack;
+    if (!result.markers.reasoning_start.empty()) {
+        auto pos = haystack.find(result.markers.reasoning_start);
+        if (pos != std::string::npos) {
+            clean_haystack = haystack.substr(0, pos) + haystack.substr(pos + result.markers.reasoning_start.length());
+        }
+    }
+    if (!result.markers.reasoning_end.empty()) {
+        auto pos = clean_haystack.find(result.markers.reasoning_end);
+        if (pos != std::string::npos) {
+            clean_haystack =
+                clean_haystack.substr(0, pos) + clean_haystack.substr(pos + result.markers.reasoning_end.length());
+        }
+    }
+
+    if (result.tools == tool_format::JSON_NATIVE) {
+        analyze_tool_call_format_json_native(clean_haystack, fun_name_needle, arg_name_needle, result);
+    } else {
+        analyze_tool_call_format_non_json(clean_haystack, fun_name_needle, result);
+    }
+    // always relax whitespace requirements on ending markers since they don't influence content
+    result.markers.tool_section_end = trim_whitespace(result.markers.tool_section_end);
+    result.markers.per_call_end     = trim_whitespace(result.markers.per_call_end);
+}
+
+void differential_analyzer::analyze_tool_call_format_json_native(const std::string &    clean_haystack,
+                                                                 const std::string &    fun_name_needle,
+                                                                 const std::string &    arg_name_needle,
+                                                                 diff_analysis_result & result) {
+    // we might not have the typical OpenAI tool calling structure
+    int  json_start     = clean_haystack.find_first_of('{');
+    int  json_end       = clean_haystack.find_last_of('}');
+    std::string cut     = clean_haystack.substr(json_start, json_end - json_start + 1);
+    json call_struct    = json::parse(cut);
+    auto register_field = [&](const std::string &                                             prefix,
+                              const nlohmann::detail::iteration_proxy_value<json::iterator> & subel) {
+        if (subel.value().is_string() && std::string(subel.value()).find("call0000") != std::string::npos) {
+            result.id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        } else if (subel.value().is_string() && std::string(subel.value()) == fun_name_needle) {
+            result.name_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        } else if (subel.value().dump().find(arg_name_needle) !=
+                   std::string::npos) {  // handle both string and JSON obj variants
+            result.args_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        } else if (subel.key().find("id") != std::string::npos) {
+            // heuristics for generated id field
+            result.gen_id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        }
+    };
+    for (const auto & el : call_struct.items()) {
+        if (el.key() == fun_name_needle) {
+            result.fun_name_is_key = true;
+            // When function name is the key, there's no name field and args are direct
+            result.name_field.clear();
+            result.args_field.clear();
+            // Don't register this element - the function name IS the key, not a field
+        } else {
+            if (el.value().is_object() &&
+                el.value().dump().find(arg_name_needle) == std::string::npos) {  // not the args object
+                result.function_field = el.key();
+                for (const auto & subel : el.value().items()) {
+                    register_field(el.key(), subel);
+                }
+            }
+            // Register this element as a potential field
+            register_field("", el);
+        }
+    }
+    // TODO: support for generated (not provided) tool call IDs
+    auto space_or_bracket = [](bool opening, char c) -> bool {
+        return std::isspace(c) || (opening ? c == '[' : c == ']');
+    };
+    // now let's check if we're in an array construction, mark it if so and get out of it
+    if (json_start > 0 && space_or_bracket(true, clean_haystack[json_start - 1])) {
+        for (--json_start; space_or_bracket(true, clean_haystack[json_start]) && json_start >= 0; json_start--) {
+            if (clean_haystack[json_start] == '[') {
+                result.tools_array_wrapped = true;
+                break;
+            }
+        }
+        if (!result.tools_array_wrapped) {
+            json_start++;  // we ate into the last pre-json character
+        }
+    }
+    if (json_end < (int) clean_haystack.length() - 1 && space_or_bracket(false, clean_haystack[json_end + 1])) {
+        for (++json_end;
+             space_or_bracket(false, clean_haystack[json_end]) && json_end < (int) clean_haystack.length() - 1;
+             json_end++) {
+        }
+    }
+
+    std::vector<std::pair<size_t, std::string>> located_params;
+    if (!result.name_field.empty()) {
+        located_params.push_back({ clean_haystack.find(result.name_field), result.name_field });
+    }
+    if (!result.args_field.empty()) {
+        located_params.push_back({ clean_haystack.find(result.args_field), result.args_field });
+    }
+    if (!result.id_field.empty()) {
+        located_params.push_back({ clean_haystack.find(result.id_field), result.id_field });
+    }
+    if (!result.gen_id_field.empty()) {
+        located_params.push_back({ clean_haystack.find(result.gen_id_field), result.gen_id_field });
+    }
+    std::sort(located_params.begin(), located_params.end());
+    for (auto & pair : located_params) {
+        result.parameter_order.push_back(pair.second);
+    }
+    // we can immediately extract tool calling markers too
+    result.markers.tool_section_start = trim_leading_whitespace(clean_haystack.substr(0, json_start));
+    result.markers.tool_section_end   = trim_whitespace(clean_haystack.substr(json_end));
+    // When tools_array_wrapped is true, the closing bracket is part of the array structure,
+    // not a separate section end marker. Clear tool_section_end to avoid duplicate brackets.
+    if (result.tools_array_wrapped && result.markers.tool_section_end == "]") {
+        result.markers.tool_section_end.clear();
+    }
+}
+
+void differential_analyzer::analyze_tool_call_format_non_json(const std::string &    clean_haystack,
+                                                              const std::string &    fun_name_needle,
+                                                              diff_analysis_result & result) {
+    // we need to split by markers...
+    auto haystack_split = segmentize_markers(trim_leading_whitespace(clean_haystack));
+    int  where_is_nemo  = 0;
+    int  i              = 0;
+    for (auto & segment : haystack_split) {
+        if (segment.value.find(fun_name_needle) != std::string::npos) {
+            where_is_nemo = i;
+            break;
+        }
+        i++;
+    }
+
+    // basically the rule here is:
+    // - we append everything adjacent to a marker to the marker (treat it as part of the marker)
+    // - we assume symmetry (as many opening as closing markers)
+    // - we count the number of opening markers and then try to move backwards from the end until we've
+    //   eaten as many closing markers as there were opening markers
+    if (where_is_nemo > 1) {  // we might have more than one marker set here
+        std::vector<segment> preceding_markers;
+        for (int seg = where_is_nemo - 1; seg >= 0; seg--) {
+            if (haystack_split[seg].type == MARKER) {
+                preceding_markers.push_back(haystack_split[seg]);
+            }
+        }
+        size_t how_many_markers = preceding_markers.size();
+        if (how_many_markers > 1) {
+            bool had_marker = false;
+            for (int seg = where_is_nemo - 1; seg >= 0; seg--) {
+                if (haystack_split[seg].type == MARKER) {
+                    if (!had_marker) {
+                        had_marker                    = true;
+                        result.markers.per_call_start = haystack_split[seg].value + result.markers.per_call_start;
+                    } else {
+                        result.markers.tool_section_start =
+                            haystack_split[seg].value + result.markers.tool_section_start;
+                    }
+                } else {
+                    if (had_marker) {
+                        result.markers.tool_section_start =
+                            haystack_split[seg].value + result.markers.tool_section_start;
+                    } else {
+                        result.markers.per_call_start = haystack_split[seg].value + result.markers.per_call_start;
+                    }
+                }
+            }
+            had_marker                = false;
+            size_t backtracked_so_far = 0;
+            for (size_t seg = haystack_split.size() - 1; seg > (size_t) where_is_nemo; seg--) {
+                if (haystack_split[seg].type == MARKER) {
+                    backtracked_so_far++;
+                    if (!had_marker) {
+                        had_marker                      = true;
+                        result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end;
+                    } else {
+                        result.markers.per_call_end = haystack_split[seg].value + result.markers.per_call_end;
+                    }
+                } else {
+                    if (had_marker) {
+                        result.markers.per_call_end = haystack_split[seg].value + result.markers.per_call_end;
+                    } else {
+                        result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end;
+                    }
+                }
+                if (backtracked_so_far >= how_many_markers) {
+                    break;
+                }
+            }
+        } else {
+            for (int seg = 0; seg < where_is_nemo; seg++) {
+                result.markers.tool_section_start += haystack_split[seg].value;
+            }
+            for (size_t seg = haystack_split.size() - 1; seg > (size_t) where_is_nemo; seg--) {
+                result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end;
+                if (haystack_split[seg].type == segment_type::MARKER) {
+                    break;
+                }
+            }
+        }
+    } else {
+        result.markers.tool_section_start += haystack_split[0].value;
+        for (size_t seg = haystack_split.size() - 1; seg > (size_t) where_is_nemo; seg--) {
+            result.markers.tool_section_end = haystack_split[seg].value + result.markers.tool_section_end;
+            if (haystack_split[seg].type == segment_type::MARKER) {
+                break;
+            }
+        }
+    }
+}
+
+void differential_analyzer::analyze_tools(const common_chat_template & tmpl, diff_analysis_result & result) {
+    LOG_DBG(ANSI_ORANGE "Phase 3: Tool call analysis\n" ANSI_RESET);
+    analyze_tool_calls(tmpl, result);
+
+    if (result.tools == tool_format::NONE) {
+        LOG_DBG("T1: No tool support found\n");
+        // Continue anyway - we may still have useful markers
+    } else if (result.tools != tool_format::JSON_NATIVE) {
+        if (result.supports_parallel_calls) {
+            check_per_call_markers(tmpl, result);
+        }
+        extract_function_markers(tmpl, result);
+        extract_argument_separator(tmpl, result);
+        extract_args_markers(tmpl, result);
+        extract_call_id_markers(tmpl, result);
+        if (result.tools == tool_format::TAG_WITH_TAGGED) {
+            analyze_arguments(tmpl, result);
+        }
+    }
+}
+
+void differential_analyzer::check_per_call_markers(const common_chat_template & tmpl, diff_analysis_result & result) {
+    json assistant_one_tool = json{
+        { "role",       "assistant"                      },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    json assistant_two_tools = json{
+        { "role",       "assistant"                                        },
+        { "content",    ""                                                 },
+        { "tool_calls", json::array({ first_tool_call, second_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_one_tool });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto one_vs_two = compare_variants(
+        tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_tools }); });
+
+    if (!one_vs_two) {
+        LOG_DBG("T2: Generating double tool call comparison failed\n");
+        return;
+    }
+
+    std::string second_tool_content = trim_leading_whitespace(one_vs_two->diff.right);
+    if (!result.markers.tool_section_start.empty() &&
+        second_tool_content.find(result.markers.tool_section_start) == 0) {
+        result.markers.per_call_start = result.markers.tool_section_start;
+        result.markers.per_call_end   = result.markers.tool_section_end;
+        result.markers.tool_section_start.clear();
+        result.markers.tool_section_end.clear();
+    }
+}
+
+void differential_analyzer::analyze_tool_calls(const common_chat_template & tmpl, diff_analysis_result & result) {
+    json assistant_no_tools = json{
+        { "role",    "assistant" },
+        { "content", "Response." }
+    };
+
+    json assistant_with_tools = json{
+        { "role",       "assistant"                      },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_no_tools });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_tools }); });
+
+    if (!comparison) {
+        LOG_DBG("T1: Template application failed\n");
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+    LOG_DBG("T1 diff - prefix: '%s', suffix: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str());
+    LOG_DBG("T1 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str());
+
+    std::string tool_section = diff.right;
+
+    if (tool_section.empty()) {
+        return;
+    }
+
+    analyze_tool_call_format(tool_section, "foofoo", "first", result);
+
+    LOG_DBG("T1: tool_section_start='%s', tool_section_end='%s'\n", result.markers.tool_section_start.c_str(),
+            result.markers.tool_section_end.c_str());
+}
+
+void differential_analyzer::extract_call_separator(const common_chat_template & tmpl,
+                                                   diff_analysis_result &       result,
+                                                   std::string &                second_call_content) {
+    json assistant_one_call = json{
+        { "role",       "assistant"                      },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    json assistant_two_calls = json{
+        { "role",       "assistant"                                        },
+        { "content",    ""                                                 },
+        { "tool_calls", json::array({ first_tool_call, second_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_one_call });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_calls }); });
+
+    if (!comparison) {
+        LOG_DBG("T2: Template application failed\n");
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+    LOG_DBG("T2 diff - prefix: '%s', suffix: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str());
+    LOG_DBG("T2 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str());
+
+    if (!diff.right.empty()) {
+        std::string first_func_name  = "foofoo";
+        std::string second_func_name = "barbar";
+
+        std::string separator         = until_common_prefix(diff.right, first_func_name, second_func_name);
+        result.markers.call_separator = trim_whitespace(separator);
+
+        LOG_DBG("T2: call_separator='%s'\n", result.markers.call_separator.c_str());
+
+        result.supports_parallel_calls = true;
+        second_call_content            = diff.right;
+
+        LOG_DBG("T2: second_call_content='%s', supports_parallel_calls=true\n", second_call_content.c_str());
+    }
+}
+
+void differential_analyzer::extract_function_markers(const common_chat_template & tmpl, diff_analysis_result & result) {
+    json assistant_nocall = json{
+        { "role",    "assistant" },
+        { "content", "BBBB"      },
+    };
+
+    json assistant_foofoo = json{
+        { "role",       "assistant"                      },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    json assistant_barbar = json{
+        { "role",       "assistant"                       },
+        { "content",    ""                                },
+        { "tool_calls", json::array({ second_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_foofoo });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_barbar }); });
+
+    if (!comparison) {
+        LOG_DBG("T3: Template application failed\n");
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+    LOG_DBG("T3 diff - suffix: '%s'\n", diff.suffix.c_str());
+    LOG_DBG("T3 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str());
+
+    if (diff.left.find("foofoo") != std::string::npos && diff.right.find("barbar") != std::string::npos) {
+        std::string prefix_marker;
+        if (!result.markers.per_call_start.empty()) {
+            prefix_marker = result.markers.per_call_start;
+        } else {
+            prefix_marker = result.markers.tool_section_start;
+        }
+        if (!prefix_marker.empty() && diff.prefix.rfind(prefix_marker) != std::string::npos) {
+            result.markers.func_name_prefix =
+                diff.prefix.substr(diff.prefix.rfind(prefix_marker) + prefix_marker.size());
+        }
+
+        auto seg = segmentize_markers(diff.left);
+        for (const auto & s : seg) {
+            if (s.value.find("foofoo") == std::string::npos) {
+                result.markers.func_name_prefix += s.value;
+            } else {
+                size_t      pos  = s.value.find("foofoo");
+                std::string pre  = s.value.substr(0, pos);
+                std::string post = s.value.substr(pos + 6);  // 6 = len("foofoo")
+                result.markers.func_name_prefix += pre;
+                result.markers.func_name_suffix += post;
+                break;
+            }
+        }
+
+        auto   seg_suf           = segmentize_markers(diff.suffix);
+        size_t stop              = 0;
+        size_t stop_internal_pos = 0;
+        for (const auto & ss : seg_suf) {
+            bool has_needle = false;
+            if (result.tools == tool_format::TAG_WITH_JSON) {
+                has_needle = (ss.type == segment_type::TEXT && ss.value.find_first_of("{[") != std::string::npos);
+                if (has_needle) {
+                    stop_internal_pos = ss.value.find_first_of("{[");
+                    break;
+                }
+            } else {
+                has_needle = ss.value.find("first") != std::string::npos;
+                if (has_needle) {
+                    stop_internal_pos = ss.value.find("first");
+                    break;
+                }
+            }
+            stop++;
+        }
+        if (stop < seg_suf.size() - 1) {
+            if (result.tools == tool_format::TAG_WITH_TAGGED) {
+                size_t how_far = 0;
+                if (stop > 0) {
+                    if (seg_suf[stop].type == segment_type::MARKER) {
+                        how_far = stop;
+                    } else {
+                        how_far = stop - 1;
+                    }
+                    for (size_t i = 0; i < how_far; i++) {
+                        result.markers.func_name_suffix += seg_suf[i].value;
+                    }
+                }
+            } else {
+                for (size_t i = 0; i < stop; i++) {
+                    result.markers.func_name_suffix += seg_suf[i].value;
+                }
+                const std::string & stopper = seg_suf[stop].value;
+                result.markers.func_name_suffix += stopper.substr(0, stop_internal_pos);
+            }
+        }
+
+        // now just to find the closer
+        std::string suffix_marker;
+        if (!result.markers.per_call_end.empty()) {
+            suffix_marker = result.markers.per_call_end;
+        } else {
+            suffix_marker = result.markers.tool_section_end;
+        }
+        std::string closer_suffix;
+        if (suffix_marker.empty()) {
+            // we'll have to rely on an extra diff with no-calls version
+            auto notool_comp = compare_variants(
+                tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_nocall }); });
+            auto nt_diff  = notool_comp->diff;
+            closer_suffix = nt_diff.left.substr(nt_diff.left.find("YYYY") + 4);
+        } else {
+            closer_suffix = diff.suffix.substr(0, diff.suffix.find(suffix_marker));
+        }
+        if (!closer_suffix.empty()) {
+            auto   closer_seg             = segmentize_markers(closer_suffix);
+            bool   need_to_eat_arg_marker = (result.tools == tool_format::TAG_WITH_TAGGED);
+            size_t last_arg_seg           = closer_seg.size() - 1;
+            for (int i = (int) closer_seg.size() - 1; i >= 0; i--) {
+                if (closer_seg[i].value.find("YYYY") != std::string::npos) {
+                    last_arg_seg = i;
+                }
+            }
+            if (result.tools == tool_format::TAG_WITH_JSON) {
+                const auto & entire_seg = closer_seg[last_arg_seg].value;
+                size_t       pos        = entire_seg.find_last_of("}]");
+                if (pos != std::string::npos && pos < entire_seg.size() - 1) {
+                    result.markers.func_close = trim_leading_whitespace(entire_seg.substr(pos + 1));
+                }
+            }
+            for (size_t i = last_arg_seg + 1; i < closer_seg.size(); i++) {
+                if (closer_seg[i].type == segment_type::MARKER) {
+                    if (need_to_eat_arg_marker) {
+                        need_to_eat_arg_marker = false;
+                    } else {
+                        result.markers.func_close += closer_seg[i].value;
+                    }
+                } else if (!need_to_eat_arg_marker) {
+                    result.markers.func_close += closer_seg[i].value;
+                }
+            }
+        }
+        result.markers.func_close = trim_leading_whitespace(result.markers.func_close);
+
+        LOG_DBG("T3: func_name_prefix='%s', func_name_suffix='%s', func_close='%s'\n",
+                result.markers.func_name_prefix.c_str(), result.markers.func_name_suffix.c_str(),
+                result.markers.func_close.c_str());
+    }
+}
+
+void differential_analyzer::extract_argument_separator(const common_chat_template & tmpl,
+                                                       diff_analysis_result &       result) {
+    json assistant_one_arg = json{
+        { "role",       "assistant"                              },
+        { "content",    ""                                       },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    json assistant_two_args = json{
+        { "role",       "assistant"                      },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_one_arg });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_args }); });
+
+    if (!comparison) {
+        LOG_DBG("T4: Template application failed\n");
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+    LOG_DBG("T4 diff - suffix: '%s'\n", diff.suffix.c_str());
+    LOG_DBG("T4 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str());
+
+    if (!diff.right.empty()) {
+        std::string separator        = until_common_prefix(diff.right, "first", "second");
+        result.markers.arg_separator = separator;
+        LOG_DBG("T4: arg_separator='%s'\n", result.markers.arg_separator.c_str());
+    }
+}
+
+void differential_analyzer::extract_args_markers(const common_chat_template & tmpl, diff_analysis_result & result) {
+    json assistant_no_args = json{
+        { "role",       "assistant"                                },
+        { "content",    ""                                         },
+        { "tool_calls", json::array({ first_tool_call_zero_args }) }
+    };
+
+    json assistant_with_args = json{
+        { "role",       "assistant"                              },
+        { "content",    ""                                       },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_no_args });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_args }); });
+
+    if (!comparison) {
+        LOG_DBG("T5: Template application failed\n");
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+    LOG_DBG("T5 diff - suffix: '%s'\n", diff.suffix.c_str());
+    LOG_DBG("T5 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str());
+
+    if (result.markers.args_start.empty() && result.tools != tool_format::JSON_NATIVE) {
+        std::string prefix_marker = !result.markers.tool_section_start.empty() ? result.markers.tool_section_start :
+                                                                                 result.markers.per_call_start;
+        std::string suffix_marker =
+            !result.markers.tool_section_end.empty() ? result.markers.tool_section_end : result.markers.per_call_end;
+        // these might happen earlier in the tools section as an example or somewhere else, so we need to find the closest ones
+        size_t prefix_pos = prefix_marker.empty() ? 0 : diff.prefix.rfind(prefix_marker);
+        size_t suffix_pos = suffix_marker.empty() ? diff.suffix.size() : diff.suffix.find(suffix_marker);
+        if (prefix_pos == std::string::npos) {
+            prefix_pos = 0;
+        }
+        if (suffix_pos == std::string::npos) {
+            suffix_pos = diff.suffix.size();
+        }
+        std::string prefix_cut = diff.prefix.substr(prefix_pos + prefix_marker.size());
+        std::string suffix_cut = diff.suffix.substr(0, suffix_pos);
+        std::string args_start = until_common_prefix(prefix_cut, "{}", "{\"first\":");
+        std::string args_end   = after_common_suffix(suffix_cut, "{}", "\"XXXX\"}");
+
+        if (!args_start.empty() || !args_end.empty()) {
+            result.markers.args_start = args_start;
+            result.markers.args_end   = args_end;
+            LOG_DBG("T5: Custom argument container detected: start='%s', end='%s'\n", args_start.c_str(),
+                    args_end.c_str());
+        }
+    }
+}
+
+void differential_analyzer::extract_call_id_markers(const common_chat_template & tmpl, diff_analysis_result & result) {
+    json assistant_id1 = json{
+        { "role",       "assistant"                      },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    json assistant_id2 = json{
+        { "role",       "assistant"                             },
+        { "content",    ""                                      },
+        { "tool_calls", json::array({ first_tool_call_alt_id }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_id1 });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_id2 }); });
+
+    if (!comparison) {
+        LOG_DBG("T6: Template application failed for call_id detection\n");
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+    LOG_DBG("T6 diff (call_id) - prefix: '%s', suffix: '%s'\n", diff.prefix.c_str(), diff.suffix.c_str());
+    LOG_DBG("T6 diff (call_id) - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str());
+
+    if (diff.left.empty() && diff.right.empty()) {
+        LOG_DBG("T6: No call_id difference detected\n");
+        return;
+    }
+
+    std::string id_value_1 = "call00001";
+    std::string id_value_2 = "call99999";
+
+    size_t common_id_prefix_len = 0;
+    for (size_t i = 0; i < std::min(id_value_1.length(), id_value_2.length()); i++) {
+        if (id_value_1[i] == id_value_2[i]) {
+            common_id_prefix_len++;
+        } else {
+            break;
+        }
+    }
+    std::string common_id_part = id_value_1.substr(0, common_id_prefix_len);
+
+    // Check if the function name is in the prefix (normal case: BETWEEN_FUNC_AND_ARGS or POST_ARGS)
+    // or in the suffix (call_id is PRE_FUNC_NAME)
+    std::string func_name           = "foofoo";
+    size_t      func_name_in_prefix = diff.prefix.rfind(func_name);
+    size_t      func_name_in_suffix = diff.suffix.find(func_name);
+
+    if (func_name_in_prefix != std::string::npos && func_name_in_suffix == std::string::npos) {
+        // Function name is only in prefix - call_id is BETWEEN_FUNC_AND_ARGS or POST_ARGS
+        // Check if args indicator "{" is in prefix or suffix
+        size_t args_in_prefix = diff.prefix.find('{', func_name_in_prefix);
+        size_t args_in_suffix = diff.suffix.find('{');
+
+        if (args_in_suffix != std::string::npos &&
+            (args_in_prefix == std::string::npos || args_in_prefix > diff.prefix.length())) {
+            // Args are in suffix, so call_id is BETWEEN_FUNC_AND_ARGS
+            result.call_id_pos = call_id_position::BETWEEN_FUNC_AND_ARGS;
+            LOG_DBG("T6: Detected BETWEEN_FUNC_AND_ARGS position\n");
+
+            // The prefix ends with: ...<func_name><func_name_suffix><call_id_prefix><common_id_part>
+            // Segmentize to find the call_id_prefix marker
+            std::string after_func = diff.prefix.substr(func_name_in_prefix + func_name.length());
+            auto        segments   = segmentize_markers(after_func);
+
+            std::string marker_before_id;
+            for (size_t i = 0; i < segments.size(); i++) {
+                if (segments[i].type == segment_type::MARKER) {
+                    // Check if the next segment (if any) contains the common_id_part
+                    if (i + 1 < segments.size() && segments[i + 1].value.find(common_id_part) != std::string::npos) {
+                        marker_before_id = segments[i].value;
+                        break;
+                    }
+                    // Or if this is the last marker and the text after contains common_id_part
+                    if (i == segments.size() - 1 ||
+                        (i + 1 < segments.size() && segments[i + 1].type == segment_type::TEXT &&
+                         segments[i + 1].value.find(common_id_part) != std::string::npos)) {
+                        marker_before_id = segments[i].value;
+                    }
+                }
+            }
+
+            if (!marker_before_id.empty()) {
+                result.markers.call_id_prefix = marker_before_id;
+                LOG_DBG("T6: call_id_prefix='%s'\n", result.markers.call_id_prefix.c_str());
+            } else {
+                // Fallback: look for the last marker in after_func
+                for (int i = (int) segments.size() - 1; i >= 0; i--) {
+                    if (segments[i].type == segment_type::MARKER) {
+                        result.markers.call_id_prefix = segments[i].value;
+                        LOG_DBG("T6: call_id_prefix (fallback)='%s'\n", result.markers.call_id_prefix.c_str());
+                        break;
+                    }
+                }
+            }
+
+            // Extract call_id_suffix: the first marker in the suffix before args
+            auto suffix_segments = segmentize_markers(diff.suffix);
+            for (size_t i = 0; i < suffix_segments.size(); i++) {
+                if (suffix_segments[i].type == segment_type::MARKER) {
+                    result.markers.call_id_suffix = suffix_segments[i].value;
+                    LOG_DBG("T6: call_id_suffix='%s'\n", result.markers.call_id_suffix.c_str());
+                    break;
+                }
+                // Stop if we hit the args
+                if (suffix_segments[i].value.find('{') != std::string::npos) {
+                    break;
+                }
+            }
+        } else if (args_in_prefix != std::string::npos) {
+            // Args are in prefix, so call_id is POST_ARGS
+            result.call_id_pos = call_id_position::POST_ARGS;
+            LOG_DBG("T6: POST_ARGS call_id position detected\n");
+
+            // Extract markers from between args and the ID
+            std::string after_args    = diff.prefix.substr(args_in_prefix);
+            size_t      closing_brace = after_args.rfind('}');
+            if (closing_brace != std::string::npos) {
+                std::string between_args_and_id = after_args.substr(closing_brace + 1);
+                auto        segments            = segmentize_markers(between_args_and_id);
+                for (int i = (int) segments.size() - 1; i >= 0; i--) {
+                    if (segments[i].type == segment_type::MARKER) {
+                        result.markers.call_id_prefix = segments[i].value;
+                        LOG_DBG("T6: call_id_prefix='%s'\n", result.markers.call_id_prefix.c_str());
+                        break;
+                    }
+                }
+            }
+
+            // call_id_suffix would be in the suffix (first marker)
+            auto suffix_segments = segmentize_markers(diff.suffix);
+            for (const auto & seg : suffix_segments) {
+                if (seg.type == segment_type::MARKER) {
+                    result.markers.call_id_suffix = seg.value;
+                    LOG_DBG("T6: call_id_suffix='%s'\n", result.markers.call_id_suffix.c_str());
+                    break;
+                }
+            }
+        }
+    } else if (func_name_in_suffix != std::string::npos && func_name_in_prefix == std::string::npos) {
+        // Function name is only in suffix - call_id is PRE_FUNC_NAME
+        result.call_id_pos = call_id_position::PRE_FUNC_NAME;
+        LOG_DBG("T6: PRE_FUNC_NAME call_id position detected\n");
+
+        // Extract call_id_prefix from prefix (last marker before the common_id_part)
+        auto prefix_segments = segmentize_markers(diff.prefix);
+        for (int i = (int) prefix_segments.size() - 1; i >= 0; i--) {
+            if (prefix_segments[i].type == segment_type::MARKER) {
+                result.markers.call_id_prefix = prefix_segments[i].value;
+                LOG_DBG("T6: call_id_prefix='%s'\n", result.markers.call_id_prefix.c_str());
+                break;
+            }
+        }
+
+        // Extract call_id_suffix from suffix (first marker before func_name)
+        std::string before_func     = diff.suffix.substr(0, func_name_in_suffix);
+        auto        suffix_segments = segmentize_markers(before_func);
+        for (const auto & seg : suffix_segments) {
+            if (seg.type == segment_type::MARKER) {
+                result.markers.call_id_suffix = seg.value;
+                LOG_DBG("T6: call_id_suffix='%s'\n", result.markers.call_id_suffix.c_str());
+                break;
+            }
+        }
+    } else {
+        LOG_DBG("T6: Unable to determine call_id position\n");
+    }
+
+    // When call_id is detected, per_call_end may have been incorrectly set to include
+    // the call_id_suffix and sample args. Clear it if it starts with call_id_suffix.
+    if (result.call_id_pos != call_id_position::NONE && !result.markers.call_id_suffix.empty() &&
+        result.markers.per_call_end.find(result.markers.call_id_suffix) == 0) {
+        result.markers.per_call_end.clear();
+        LOG_DBG("T6: Cleared per_call_end (was incorrectly including call_id_suffix)\n");
+    }
+}
+
+void differential_analyzer::analyze_arguments(const common_chat_template & tmpl, diff_analysis_result & result) {
+    LOG_DBG(ANSI_ORANGE "Phase 4: Argument analysis\n" ANSI_RESET);
+
+    extract_argument_name_markers(tmpl, result);
+    extract_argument_value_markers(tmpl, result);
+}
+
+void differential_analyzer::extract_argument_name_markers(const common_chat_template & tmpl,
+                                                          diff_analysis_result &       result) {
+    json assistant_first_arg = json{
+        { "role",       "assistant"                              },
+        { "content",    ""                                       },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    json assistant_second_arg = json{
+        { "role",       "assistant"                                },
+        { "content",    ""                                         },
+        { "tool_calls", json::array({ first_tool_call_other_arg }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_first_arg });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_second_arg }); });
+
+    if (!comparison) {
+        LOG_DBG("A1: Template application failed\n");
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+    LOG_DBG("A1 diff - suffix: '%s', left: '%s', right: '%s'\n", diff.suffix.c_str(), diff.left.c_str(),
+            diff.right.c_str());
+
+    if (!diff.left.empty() && !diff.right.empty()) {
+        size_t common_len = 0;
+        size_t min_len    = std::min(diff.left.length(), diff.right.length());
+        while (common_len < min_len && diff.left[common_len] == diff.right[common_len]) {
+            common_len++;
+        }
+
+        if (common_len > 0) {  // we have a marker structure with the name *inside* the marker
+            std::string common_prefix   = diff.left.substr(0, common_len);
+            std::string left_remainder  = diff.left.substr(common_len);
+            std::string right_remainder = diff.right.substr(common_len);
+            size_t      left_close =
+                left_remainder.find_first_of("\"X");  // because arg-val is XXXX, can be quoted or unquoted
+            size_t right_close = right_remainder.find_first_of("\"Y");  // here arg-val is YYYY
+
+            if (left_close != std::string::npos && right_close != std::string::npos) {
+                std::string left_name  = left_remainder.substr(0, 5);   // 5 = len("first")
+                std::string right_name = right_remainder.substr(0, 6);  // 6 = len("second")
+
+                if (left_name == "first" && right_name == "second") {
+                    result.markers.arg_name_prefix = trim_whitespace(common_prefix);
+                    std::string suffix_left        = left_remainder.substr(5, left_close - 5);
+                    std::string suffix_right       = right_remainder.substr(6, right_close - 6);
+                    if (suffix_left == suffix_right) {
+                        result.markers.arg_name_suffix = trim_leading_whitespace(suffix_left);
+                    }
+                    LOG_DBG("A1: arg_name_prefix='%s', arg_name_suffix='%s'\n", result.markers.arg_name_prefix.c_str(),
+                            result.markers.arg_name_suffix.c_str());
+                }
+            }
+        } else if (diff.left.substr(0, 5) == "first" && diff.right.substr(0, 6) == "second") {
+            // we most likely have actual markers for argument names
+            auto pre_seg = segmentize_markers(diff.prefix);
+            for (int i = pre_seg.size() - 1; i >= 0; i--) {
+                result.markers.arg_name_prefix = result.markers.arg_name_prefix + pre_seg[i].value;
+                if (pre_seg[i].type == segment_type::MARKER) {
+                    break;
+                }
+            }
+            auto left_seg = segmentize_markers(diff.left);
+            if (left_seg.size() == 1) {  // only the name + maybe extra whitespace / normal chars in differing part
+                result.markers.arg_name_suffix = diff.left.substr(5);
+                auto suf_seg                   = segmentize_markers(diff.suffix);
+                for (size_t i = 0; i < suf_seg.size(); i++) {
+                    result.markers.arg_name_suffix += suf_seg[i].value;
+                    if (suf_seg[i].type == segment_type::MARKER) {
+                        if (i < suf_seg.size() - 2 && suf_seg[i + 1].type == segment_type::TEXT &&
+                            trim_whitespace(suf_seg[i + 1].value).empty()) {
+                            // we need to include post-marker whitespace/newlines as well
+                            result.markers.arg_name_suffix += suf_seg[i + 1].value;
+                        }
+                        break;
+                    }
+                }
+            } else {
+                for (size_t i = 0; i < left_seg.size(); i++) {
+                    std::string to_add;
+                    if (i == 0) {
+                        to_add = left_seg[i].value.substr(5);
+                    } else {
+                        to_add = left_seg[i].value;
+                    }
+                    result.markers.arg_name_suffix += to_add;
+                    if (left_seg[i].type == segment_type::MARKER) {
+                        if (i < left_seg.size() - 2 && left_seg[i + 1].type == segment_type::TEXT &&
+                            trim_whitespace(left_seg[i + 1].value).empty()) {
+                            // we need to include post-marker whitespace/newlines as well
+                            result.markers.arg_name_suffix += left_seg[i + 1].value;
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+    }
+}
+
+void differential_analyzer::extract_argument_value_markers(const common_chat_template & tmpl,
+                                                           diff_analysis_result &       result) {
+    json assistant_val_X = json{
+        { "role",       "assistant"                              },
+        { "content",    ""                                       },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    json assistant_val_Y = json{
+        { "role",       "assistant"                                        },
+        { "content",    ""                                                 },
+        { "tool_calls", json::array({ first_tool_call_one_arg_other_val }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_val_X });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_val_Y }); });
+
+    if (!comparison) {
+        LOG_DBG("A2: Template application failed\n");
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+    LOG_DBG("A2 diff - suffix: '%s'\n", diff.suffix.c_str());
+    LOG_DBG("A2 diff - left: '%s', right: '%s'\n", diff.left.c_str(), diff.right.c_str());
+
+    if (diff.left == "XXXX" && diff.right == "YYYY") {
+        std::string arg_name_ending = "first" + result.markers.arg_name_suffix;
+        std::string prefix          = diff.prefix;
+        if (prefix.rfind(arg_name_ending) != std::string::npos) {
+            prefix = prefix.substr(prefix.rfind(arg_name_ending) + arg_name_ending.size());
+        }
+        if (!prefix.empty()) {
+            auto seg_pre = segmentize_markers(prefix);
+            for (int i = seg_pre.size() - 1; i >= 0; i--) {
+                result.markers.arg_value_prefix = seg_pre[i].value + result.markers.arg_value_prefix;
+                if (seg_pre[i].type == segment_type::MARKER) {
+                    break;
+                }
+            }
+        }
+
+        std::string value_suffix = diff.suffix;
+        if (!result.markers.func_close.empty()) {
+            size_t func_close_pos = value_suffix.find(result.markers.func_close);
+            if (func_close_pos != std::string::npos) {
+                value_suffix = value_suffix.substr(0, func_close_pos);
+            }
+        } else if (!result.markers.per_call_end.empty() || !result.markers.tool_section_end.empty()) {
+            std::string end_marker =
+                !result.markers.per_call_end.empty() ? result.markers.per_call_end : result.markers.tool_section_end;
+            size_t end_marker_pos = value_suffix.find(end_marker);
+            if (end_marker_pos != std::string::npos) {
+                value_suffix = value_suffix.substr(0, end_marker_pos);
+            }
+        }
+        value_suffix = trim_leading_whitespace(value_suffix);
+        if (!value_suffix.empty()) {
+            result.markers.arg_value_suffix = value_suffix;
+        }
+
+        LOG_DBG("A2: arg_value_prefix='%s', arg_value_suffix='%s'\n", result.markers.arg_value_prefix.c_str(),
+                result.markers.arg_value_suffix.c_str());
+    }
+}
+
+void differential_analyzer::collect_preserved_tokens(diff_analysis_result & result) {
+    auto & tokens = result.preserved_tokens;
+
+    auto add_token = [&tokens](const std::string & org_token) {
+        std::string token = trim_whitespace(org_token);
+        if (!token.empty()) {
+            // Avoid duplicates
+            if (std::find(tokens.begin(), tokens.end(), token) == tokens.end()) {
+                tokens.push_back(token);
+            }
+        }
+    };
+
+    add_token(result.markers.reasoning_start);
+    add_token(result.markers.reasoning_end);
+    add_token(result.markers.content_start);
+    add_token(result.markers.content_end);
+    add_token(result.markers.tool_section_start);
+    add_token(result.markers.tool_section_end);
+    add_token(result.markers.per_call_start);
+    add_token(result.markers.per_call_end);
+    add_token(result.markers.func_name_prefix);
+    add_token(result.markers.func_name_suffix);
+    add_token(result.markers.func_close);
+    add_token(result.markers.arg_name_prefix);
+    add_token(result.markers.arg_name_suffix);
+    add_token(result.markers.arg_separator);
+    add_token(result.markers.arg_value_prefix);
+    add_token(result.markers.arg_value_suffix);
+    add_token(result.markers.call_id_prefix);
+    add_token(result.markers.call_id_suffix);
+    add_token(result.markers.code_block_marker);
+}
diff --git a/common/chat-diff-analyzer.h b/common/chat-diff-analyzer.h
new file mode 100644
index 00000000000..b1c601181e5
--- /dev/null
+++ b/common/chat-diff-analyzer.h
@@ -0,0 +1,347 @@
+#pragma once
+
+#include "chat.h"
+#include "nlohmann/json.hpp"
+
+#include <functional>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+using json = nlohmann::ordered_json;
+
+// ============================================================================
+// Parameters for template application
+// ============================================================================
+struct template_params {
+    json                                  messages;
+    json                                  tools;
+    bool                                  add_generation_prompt = false;
+    bool                                  enable_thinking       = true;
+    std::optional<json>                   extra_context = std::nullopt;
+};
+
+struct diff_split {
+    std::string prefix;
+    std::string suffix;
+    std::string left;
+    std::string right;
+
+    bool operator==(struct diff_split & other) const {
+        return prefix == other.prefix && suffix == other.suffix && left == other.left && right == other.right;
+    }
+};
+
+// Result of compare_variants containing diff and original outputs
+struct compare_variants_result {
+    diff_split                 diff;
+    std::string                output_A;
+    std::string                output_B;
+};
+
+// ============================================================================
+// Marker Registry: All markers extracted via differential analysis
+// ============================================================================
+
+// Markers extracted from differential analysis of template outputs
+// Each marker is derived from a specific comparison in the analysis matrix
+struct marker_registry {
+    // === Reasoning markers (from Phase 1: R1-R3) ===
+    std::string reasoning_start;  // e.g., "<think>", "[THINK]", "<|START_THINKING|>", ""
+    std::string reasoning_end;    // e.g., "</think>", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>"
+
+    // === Content markers (from Phase 2: C1-C2) ===
+    std::string content_start;  // e.g., "<response>", ">>>all\n", ""
+    std::string content_end;    // e.g., "</response>", ""
+
+    // === Tool section markers (from Phase 3: T1-T2) ===
+    std::string tool_section_start;  // e.g., "<tool_call>", "[TOOL_CALLS]", ""
+    std::string tool_section_end;    // e.g., "</tool_call>", ""
+    std::string per_call_start;      // e.g., "<|tool_call_begin|>", "" (for multi-call templates)
+    std::string per_call_end;        // e.g., "<|tool_call_end|>", ""
+    std::string call_separator;      // e.g., ",", "\n", "" (between multiple calls)
+
+    // === Function markers (from Phase 3: T3-T5) ===
+    std::string func_name_prefix;  // e.g., "<function=", "\"name\": \"", "functions."
+    std::string func_name_suffix;  // e.g., ">", "\"", ":0"
+    std::string func_close;        // e.g., "</function>", "" (for tag-based)
+    std::string args_start;        // e.g., "{", "<|tool_call_argument_begin|>"
+    std::string args_end;          // e.g., "}", ""
+
+    // === Argument markers (from Phase 4: A1-A3, for tagged args format) ===
+    std::string arg_name_prefix;   // e.g., "<param=", "<arg_key>", "\""
+    std::string arg_name_suffix;   // e.g., ">", "</arg_key>", "\":"
+    std::string arg_value_prefix;  // e.g., "", "<arg_value>", ""
+    std::string arg_value_suffix;  // e.g., "</param>", "</arg_value>", ""
+    std::string arg_separator;     // e.g., "", "\n", ","
+
+    // === Call ID markers (for non-JSON formats with tool call IDs) ===
+    std::string call_id_prefix;       // e.g., "[CALL_ID]" (marker before call ID value)
+    std::string call_id_suffix;       // e.g., "" (marker after call ID value, before next section)
+
+    // === Special markers ===
+    std::string code_block_marker;    // e.g., "Action:" (for markdown code block format)
+    std::string code_block_language;  // e.g., "json"
+    std::string function_namespace;   // e.g., "functions." (for prefixed-indexed format)
+};
+
+
+// ============================================================================
+// Analysis Result Enums
+// ============================================================================
+
+// Reasoning handling mode (derived from R1-R3 comparisons)
+enum class reasoning_mode {
+    NONE,         // No reasoning markers detected
+    TAG_BASED,    // Standard tag-based: <think>...</think>
+    DELIMITER,    // Delimiter-based: [BEGIN FINAL RESPONSE] (reasoning ends at delimiter)
+    FORCED_OPEN,  // Template ends with open reasoning tag (empty start, non-empty end)
+    FORCED_CLOSED,// Template ends with open reasoning tag on enabled thinking but 
+                  // with both opened and closed tag for disabled thinking
+    TOOLS_ONLY    // Only reason on tool calls, not on normal content
+};
+
+inline std::ostream & operator<<(std::ostream & os, const reasoning_mode & mode) {
+    switch (mode) {
+        case reasoning_mode::NONE:
+            return os << "NONE";
+        case reasoning_mode::TAG_BASED:
+            return os << "TAG_BASED";
+        case reasoning_mode::DELIMITER:
+            return os << "DELIMITER";
+        case reasoning_mode::FORCED_OPEN:
+            return os << "FORCED_OPEN";
+        case reasoning_mode::FORCED_CLOSED:
+            return os << "FORCED_CLOSED";
+        case reasoning_mode::TOOLS_ONLY:
+            return os << "TOOLS_ONLY";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// Content wrapping mode (derived from C1 comparison)
+enum class content_mode {
+    PLAIN,                   // No content markers
+    ALWAYS_WRAPPED,          // Content always wrapped with markers
+    WRAPPED_WITH_REASONING,  // Content wrapped only when reasoning present
+};
+
+inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
+    switch (mode) {
+        case content_mode::PLAIN:
+            return os << "PLAIN";
+        case content_mode::ALWAYS_WRAPPED:
+            return os << "ALWAYS_WRAPPED";
+        case content_mode::WRAPPED_WITH_REASONING:
+            return os << "WRAPPED_WITH_REASONING";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// Call ID position in tool calls (for non-JSON formats)
+enum class call_id_position {
+    NONE,                    // No call ID support detected
+    PRE_FUNC_NAME,           // Call ID before function name: [CALL_ID]id[FUNC]name{args}
+    BETWEEN_FUNC_AND_ARGS,   // Call ID between function and args: [FUNC]name[CALL_ID]id{args}
+    POST_ARGS,               // Call ID after arguments: [FUNC]name{args}[CALL_ID]id
+};
+
+inline std::ostream & operator<<(std::ostream & os, const call_id_position & pos) {
+    switch (pos) {
+        case call_id_position::NONE:
+            return os << "NONE";
+        case call_id_position::PRE_FUNC_NAME:
+            return os << "PRE_FUNC_NAME";
+        case call_id_position::BETWEEN_FUNC_AND_ARGS:
+            return os << "BETWEEN_FUNC_AND_ARGS";
+        case call_id_position::POST_ARGS:
+            return os << "POST_ARGS";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// Tool call format classification (derived from T1-T5, A1-A3 comparisons)
+enum class tool_format {
+    NONE,              // No tool support detected
+    JSON_NATIVE,       // Pure JSON: {"name": "X", "arguments": {...}}
+    TAG_WITH_JSON,     // Tag-based with JSON args: <function=X>{...}</function>
+    BRACKET_TAG,       // Bracket-tag: [TOOL_CALLS]name[CALL_ID]id[ARGS]{...}
+    PREFIXED_INDEXED,  // Prefixed-indexed: functions.X:0{...}
+    RECIPIENT_BASED,   // Recipient routing: >>>func_name\n{...}
+    TAG_WITH_TAGGED,   // Tag-based with tagged args: <param=key>value</param>
+    MARKDOWN_BLOCK,    // Markdown code block: Action:\n```json\n[...]\n```
+};
+
+inline std::ostream & operator<<(std::ostream & os, const tool_format & format) {
+    switch (format) {
+        case tool_format::NONE:
+            return os << "NONE";
+        case tool_format::JSON_NATIVE:
+            return os << "JSON_NATIVE";
+        case tool_format::TAG_WITH_JSON:
+            return os << "TAG_WITH_JSON";
+        case tool_format::BRACKET_TAG:
+            return os << "BRACKET_TAG";
+        case tool_format::PREFIXED_INDEXED:
+            return os << "PREFIXED_INDEXED";
+        case tool_format::RECIPIENT_BASED:
+            return os << "RECIPIENT_BASED";
+        case tool_format::TAG_WITH_TAGGED:
+            return os << "TAG_WITH_TAGGED";
+        case tool_format::MARKDOWN_BLOCK:
+            return os << "MARKDOWN_BLOCK";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// Complete result of differential analysis
+struct diff_analysis_result {
+    // Classification results
+    reasoning_mode  reasoning = reasoning_mode::NONE;
+    content_mode    content   = content_mode::PLAIN;
+    tool_format     tools     = tool_format::NONE;
+
+    // All extracted markers
+    marker_registry markers;
+
+    // JSON field names (for JSON-based formats)
+    bool        fun_name_is_key = false;
+    std::string function_field  = "function";
+    std::string name_field      = "name";
+    std::string args_field      = "arguments";
+    std::string id_field;
+    std::string gen_id_field;
+    std::vector<std::string> parameter_order;
+
+    // Call ID position (for non-JSON formats)
+    call_id_position call_id_pos = call_id_position::NONE;
+
+    // Flags
+    bool supports_tools           = false;
+    bool supports_parallel_calls  = false;
+    bool requires_nonnull_content = false;
+    bool tools_array_wrapped      = false;  // Tool calls wrapped in JSON array [...]
+
+    // Preserved tokens for tokenizer (union of all non-empty markers)
+    std::vector<std::string> preserved_tokens;
+};
+
+// Performs systematic differential analysis on chat templates
+// Uses comparison matrix to extract markers without heuristics
+class differential_analyzer {
+  public:
+    // Main entry point: Run full differential analysis on a template
+    static diff_analysis_result analyze(const common_chat_template & tmpl);
+
+    // Phase-specific analysis (can be called individually for testing)
+    static void analyze_reasoning(const common_chat_template & tmpl, diff_analysis_result & result);
+    static void analyze_content(const common_chat_template & tmpl, diff_analysis_result & result);
+    static void analyze_tools(const common_chat_template & tmpl, diff_analysis_result & result);
+    static void analyze_arguments(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // Factorized differential comparison function (public for testing)
+    // Takes base params and a single modifier lambda to create variant B
+    // Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure
+    static std::optional<compare_variants_result> compare_variants(
+        const common_chat_template &                           tmpl,
+        const template_params &                                params_A,
+        const std::function<void(template_params &)> &         params_modifier);
+
+  private:
+    // Comparison helpers (implement the comparison matrix from the plan)
+
+    // R1: Extract reasoning markers by comparing with/without reasoning_content
+    static void compare_reasoning_presence(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // R2: Detect forced-open reasoning by comparing enable_thinking=false vs true
+    static void compare_thinking_enabled(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // R3: Detect reasoning scope (content-only vs with tools)
+    static void compare_reasoning_scope(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // C1: Extract content markers by comparing different content values
+    static void compare_content_values(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // T1: Analyze the tool calls
+    static void analyze_tool_calls(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // Analyzes a tool call section to determine the format used (pure JSON, function name markers, or full markers)
+    static void analyze_tool_call_format(const std::string &    haystack,
+                                         const std::string &    fun_name_needle,
+                                         const std::string &    arg_name_needle,
+                                         diff_analysis_result & result);
+
+    // Helper functions to handle the two branches of analyze_tool_call_format
+    static void analyze_tool_call_format_json_native(const std::string &    clean_haystack,
+                                                     const std::string &    fun_name_needle,
+                                                     const std::string &    arg_name_needle,
+                                                     diff_analysis_result & result);
+    
+    static void analyze_tool_call_format_non_json(const std::string &    clean_haystack,
+                                                  const std::string &    fun_name_needle,
+                                                  diff_analysis_result & result);
+
+    // T2: Check if markers are per call or per section
+    static void check_per_call_markers(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // T3: Extract call separator; also outputs second_call_content for per-call detection
+    static void extract_call_separator(const common_chat_template & tmpl, diff_analysis_result & result,
+                                       std::string & second_call_content);
+
+    // T4: Analyze function name format and extract markers
+    static void extract_function_markers(const common_chat_template & tmpl,
+                                        diff_analysis_result & result);
+
+    // T5: Extract argument separator
+    static void extract_argument_separator(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // T6: Extract args container markers
+    static void extract_args_markers(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // A1: Extract argument name markers
+    static void extract_argument_name_markers(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // A2: Extract argument value markers
+    static void extract_argument_value_markers(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // T7: Extract call ID markers (for non-JSON formats)
+    static void extract_call_id_markers(const common_chat_template & tmpl, diff_analysis_result & result);
+
+    // Classify tool format based on extracted markers
+    static void classify_tool_format(diff_analysis_result & result);
+
+    // Classification helpers
+    static void collect_preserved_tokens(diff_analysis_result & result);
+
+    // Utility: Apply template with given parameters
+    static std::string apply_template(const common_chat_template & tmpl,
+                                      const template_params &      params);
+};
+
+enum segment_type {
+    TEXT, 
+    MARKER
+};
+
+inline std::ostream & operator<<(std::ostream & os, const segment_type & type) {
+    switch (type) {
+        case segment_type::TEXT:
+            return os << "TEXT";
+        case segment_type::MARKER:
+            return os << "MARKER";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+struct segment {
+    segment_type type;
+    std::string value;
+
+    segment(segment_type type, std::string value) : type(type), value(std::move(value)) {}
+};
\ No newline at end of file
diff --git a/common/chat-parser-xml-toolcall.cpp b/common/chat-parser-xml-toolcall.cpp
deleted file mode 100644
index a80900ff8d8..00000000000
--- a/common/chat-parser-xml-toolcall.cpp
+++ /dev/null
@@ -1,879 +0,0 @@
-#include "chat.h"
-#include "chat-parser.h"
-#include "common.h"
-#include "json-partial.h"
-#include "json-schema-to-grammar.h"
-#include "log.h"
-#include "regex-partial.h"
-
-using json = nlohmann::ordered_json;
-
-class xml_toolcall_syntax_exception : public std::runtime_error {
-  public:
-    xml_toolcall_syntax_exception(const std::string & message) : std::runtime_error(message) {}
-};
-
-template<typename T>
-inline void sort_uniq(std::vector<T> &vec) {
-    std::sort(vec.begin(), vec.end());
-    vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
-}
-
-template<typename T>
-inline bool all_space(const T &str) {
-    return std::all_of(str.begin(), str.end(), [](unsigned char ch) { return std::isspace(ch); });
-}
-
-static size_t utf8_truncate_safe(const std::string_view s) {
-    size_t len = s.size();
-    if (len == 0) return 0;
-    size_t i = len;
-    for (size_t back = 0; back < 4 && i > 0; ++back) {
-        --i;
-        unsigned char c = s[i];
-        if ((c & 0x80) == 0) {
-            return len;
-        } else if ((c & 0xC0) == 0xC0) {
-            size_t expected_len = 0;
-            if ((c & 0xE0) == 0xC0) expected_len = 2;
-            else if ((c & 0xF0) == 0xE0) expected_len = 3;
-            else if ((c & 0xF8) == 0xF0) expected_len = 4;
-            else return i;
-            if (len - i >= expected_len) {
-                return len;
-            } else {
-                return i;
-            }
-        }
-    }
-    return len - std::min(len, size_t(3));
-}
-
-inline void utf8_truncate_safe_resize(std::string &s) {
-    s.resize(utf8_truncate_safe(s));
-}
-
-inline std::string_view utf8_truncate_safe_view(const std::string_view s) {
-    return s.substr(0, utf8_truncate_safe(s));
-}
-
-static std::optional<common_chat_msg_parser::find_regex_result> try_find_2_literal_splited_by_spaces(common_chat_msg_parser & builder, const std::string & literal1, const std::string & literal2) {
-    if (literal1.size() == 0) return builder.try_find_literal(literal2);
-    const auto saved_pos = builder.pos();
-    while (auto res = builder.try_find_literal(literal1)) {
-        builder.consume_spaces();
-        const auto match_len = std::min(literal2.size(), builder.input().size() - builder.pos());
-        if (builder.input().compare(builder.pos(), match_len, literal2, 0, match_len) == 0) {
-            if (res->prelude.size() != res->groups[0].begin - saved_pos) {
-                res->prelude = builder.str({saved_pos, res->groups[0].begin});
-            }
-            builder.move_to(builder.pos() + match_len);
-            res->groups[0].end = builder.pos();
-            GGML_ASSERT(res->groups[0].begin != res->groups[0].end);
-            return res;
-        }
-        builder.move_to(res->groups[0].begin + 1);
-    }
-    builder.move_to(saved_pos);
-    return std::nullopt;
-}
-
-/**
- * make a GBNF that accept any strings except those containing any of the forbidden strings.
- */
-std::string make_gbnf_excluding(std::vector<std::string> forbids) {
-    constexpr auto charclass_escape = [](unsigned char c) -> std::string {
-        if (c == '\\' || c == ']' || c == '^' || c == '-') {
-            std::string s = "\\";
-            s.push_back((char)c);
-            return s;
-        }
-        if (isprint(c)) {
-            return std::string(1, (char)c);
-        }
-        char buf[16];
-        snprintf(buf, 15, "\\x%02X", c);
-        return std::string(buf);
-    };
-    constexpr auto build_expr = [charclass_escape](auto self, const std::vector<std::string>& forbids, int l, int r, int depth) -> std::string {
-        std::vector<std::pair<unsigned char, std::pair<int,int>>> children;
-        int i = l;
-        while (i < r) {
-            const std::string &s = forbids[i];
-            if ((int)s.size() == depth) {
-                ++i;
-                continue;
-            }
-            unsigned char c = (unsigned char)s[depth];
-            int j = i;
-            while (j < r && (int)forbids[j].size() > depth &&
-                   (unsigned char)forbids[j][depth] == c) {
-                ++j;
-            }
-            children.push_back({c, {i, j}});
-            i = j;
-        }
-        std::vector<std::string> alts;
-        if (!children.empty()) {
-            std::string cls;
-            for (auto &ch : children) cls += charclass_escape(ch.first);
-            alts.push_back(std::string("[^") + cls + "]");
-        }
-        for (auto &ch : children) {
-            std::string childExpr = self(self, forbids, ch.second.first, ch.second.second, depth+1);
-            if (!childExpr.empty()) {
-                std::string quoted_ch = "\"";
-                if (ch.first == '\\') quoted_ch += "\\\\";
-                else if (ch.first == '"') quoted_ch += "\\\"";
-                else if (isprint(ch.first)) quoted_ch.push_back(ch.first);
-                else {
-                    char buf[16];
-                    snprintf(buf, 15, "\\x%02X", ch.first);
-                    quoted_ch += buf;
-                }
-                quoted_ch += "\"";
-                std::string branch = quoted_ch + std::string(" ") + childExpr;
-                alts.push_back(branch);
-            }
-        }
-        if (alts.empty()) return "";
-        std::ostringstream oss;
-        oss << "( ";
-        for (size_t k = 0; k < alts.size(); ++k) {
-            if (k) oss << " | ";
-            oss << alts[k];
-        }
-        oss << " )";
-        return oss.str();
-    };
-    if (forbids.empty()) return "( . )*";
-    sort(forbids.begin(), forbids.end());
-    std::string expr = build_expr(build_expr, forbids, 0, forbids.size(), 0);
-    if (expr.empty()) {
-        std::string cls;
-        for (auto &s : forbids) if (!s.empty()) cls += charclass_escape((unsigned char)s[0]);
-        expr = std::string("( [^") + cls + "] )";
-    }
-    if (forbids.size() == 1)
-        return expr + "*";
-    else
-        return std::string("( ") + expr + " )*";
-}
-
-/**
- * Build grammar for xml-style tool call
- * form.scope_start and form.scope_end can be empty.
- * Requires data.format for model-specific hacks.
- */
-void build_grammar_xml_tool_call(common_chat_params & data, const json & tools, const struct xml_tool_call_format & form) {
-    GGML_ASSERT(!form.tool_start.empty());
-    GGML_ASSERT(!form.tool_sep.empty());
-    GGML_ASSERT(!form.key_start.empty());
-    GGML_ASSERT(!form.val_end.empty());
-    GGML_ASSERT(!form.tool_end.empty());
-
-    std::string key_val_sep = form.key_val_sep;
-    if (form.key_val_sep2) {
-        key_val_sep += "\n";
-        key_val_sep += *form.key_val_sep2;
-    }
-    GGML_ASSERT(!key_val_sep.empty());
-
-    if (tools.is_array() && !tools.empty()) {
-        data.grammar = build_grammar([&](const common_grammar_builder &builder) {
-            auto string_arg_val = form.last_val_end ?
-                    builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end, *form.last_val_end})) :
-                    builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end}));
-
-            std::vector<std::string> tool_rules;
-            for (const auto & tool : tools) {
-                if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
-                    LOG_WRN("Skipping tool without function: %s", tool.dump(2).c_str());
-                    continue;
-                }
-                const auto & function = tool.at("function");
-                if (!function.contains("name") || !function.at("name").is_string()) {
-                    LOG_WRN("Skipping invalid function (invalid name): %s", function.dump(2).c_str());
-                    continue;
-                }
-                if (!function.contains("parameters") || !function.at("parameters").is_object()) {
-                    LOG_WRN("Skipping invalid function (invalid parameters): %s", function.dump(2).c_str());
-                    continue;
-                }
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                struct parameter_rule {
-                    std::string symbol_name;
-                    bool is_required;
-                };
-                std::vector<parameter_rule> arg_rules;
-                if (!parameters.contains("properties") || !parameters.at("properties").is_object()) {
-                    LOG_WRN("Skipping invalid function (invalid properties): %s", function.dump(2).c_str());
-                    continue;
-                } else {
-                    std::vector<std::string> requiredParameters;
-                    if (parameters.contains("required")) {
-                        try { parameters.at("required").get_to(requiredParameters); }
-                        catch (const std::runtime_error&) {
-                            LOG_WRN("Invalid function required parameters, ignoring: %s", function.at("required").dump(2).c_str());
-                        }
-                    }
-                    sort_uniq(requiredParameters);
-                    for (const auto & [key, value] : parameters.at("properties").items()) {
-                        std::string quoted_key = key;
-                        bool required = std::binary_search(requiredParameters.begin(), requiredParameters.end(), key);
-                        if (form.key_start.back() == '"' && key_val_sep[0] == '"') {
-                            quoted_key = gbnf_format_literal(key);
-                            quoted_key = quoted_key.substr(1, quoted_key.size() - 2);
-                        }
-                        arg_rules.push_back(parameter_rule {builder.add_rule("func-" + name + "-kv-" + key,
-                            gbnf_format_literal(form.key_start) + " " +
-                            gbnf_format_literal(quoted_key) + " " +
-                            gbnf_format_literal(key_val_sep) + " " +
-                            ((value.contains("type") && value["type"].is_string() && value["type"] == "string" && (!form.raw_argval || *form.raw_argval)) ?
-                                    (form.raw_argval ?
-                                            string_arg_val :
-                                            "( " + string_arg_val + " | " + builder.add_schema(name + "-arg-" + key, value) + " )"
-                                    ) :
-                                    builder.add_schema(name + "-arg-" + key, value)
-                            )
-                        ), required});
-                    }
-                }
-
-                auto next_arg_with_sep = builder.add_rule(name + "-last-arg-end", form.last_val_end ? gbnf_format_literal(*form.last_val_end) : gbnf_format_literal(form.val_end));
-                decltype(next_arg_with_sep) next_arg = "\"\"";
-                for (auto i = arg_rules.size() - 1; /* i >= 0 && */ i < arg_rules.size(); --i) {
-                    std::string include_this_arg = arg_rules[i].symbol_name + " " + next_arg_with_sep;
-                    next_arg = builder.add_rule(name + "-arg-after-" + std::to_string(i), arg_rules[i].is_required ?
-                            include_this_arg : "( " + include_this_arg + " ) | " + next_arg
-                    );
-                    include_this_arg = gbnf_format_literal(form.val_end) + " " + include_this_arg;
-                    next_arg_with_sep = builder.add_rule(name + "-arg-after-" + std::to_string(i) + "-with-sep", arg_rules[i].is_required ?
-                            include_this_arg : "( " + include_this_arg + " ) | " + next_arg_with_sep
-                    );
-                }
-
-                std::string quoted_name = name;
-                if (form.tool_start.back() == '"' && form.tool_sep[0] == '"') {
-                    quoted_name = gbnf_format_literal(name);
-                    quoted_name = quoted_name.substr(1, quoted_name.size() - 2);
-                }
-                quoted_name = gbnf_format_literal(quoted_name);
-                // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
-                if (data.format == COMMON_CHAT_FORMAT_KIMI_K2) {
-                    quoted_name = "\"functions.\" " + quoted_name + " \":\" [0-9]+";
-                }
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                        gbnf_format_literal(form.tool_start) + " " +
-                        quoted_name + " " +
-                        gbnf_format_literal(form.tool_sep) + " " +
-                        next_arg
-                ));
-            }
-
-            auto tool_call_once = builder.add_rule("root-tool-call-once", string_join(tool_rules, " | "));
-            auto tool_call_more = builder.add_rule("root-tool-call-more", gbnf_format_literal(form.tool_end) + " " + tool_call_once);
-            auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end));
-            auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end);
-            builder.add_rule("root",
-                (form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") +
-                tool_call_multiple_with_end  + "?" +
-                (form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end))
-            );
-        });
-
-        // grammar trigger for tool call
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, form.scope_start + form.tool_start });
-    }
-}
-
-/**
- * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
- * Throws xml_toolcall_syntax_exception if there is invalid syntax and cannot recover the original status for common_chat_msg_parser.
- * form.scope_start, form.tool_sep and form.scope_end can be empty.
- */
-inline bool parse_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form) {
-    GGML_ASSERT(!form.tool_start.empty());
-    GGML_ASSERT(!form.key_start.empty());
-    GGML_ASSERT(!form.key_val_sep.empty());
-    GGML_ASSERT(!form.val_end.empty());
-    GGML_ASSERT(!form.tool_end.empty());
-
-    // Helper to choose return false or throw error
-    constexpr auto return_error = [](common_chat_msg_parser & builder, auto &start_pos, const bool &recovery) {
-        LOG_DBG("Failed to parse XML-Style tool call at position: %s\n", gbnf_format_literal(builder.consume_rest().substr(0, 20)).c_str());
-        if (recovery) {
-            builder.move_to(start_pos);
-            return false;
-        } else throw xml_toolcall_syntax_exception("Tool call parsing failed with unrecoverable errors. Try using a grammar to constrain the model’s output.");
-    };
-    // Drop substring from needle to end from a JSON
-    constexpr auto partial_json = [](std::string &json_str, std::string_view needle = "XML_TOOL_CALL_PARTIAL_FLAG") {
-        auto pos = json_str.rfind(needle);
-        if (pos == std::string::npos) {
-            return false;
-        }
-        for (auto i = pos + needle.size(); i < json_str.size(); ++i) {
-            unsigned char ch = static_cast<unsigned char>(json_str[i]);
-            if (ch != '\'' && ch != '"' && ch != '}' && ch != ':' && !std::isspace(ch)) {
-                return false;
-            }
-        }
-        if (pos != 0 && json_str[pos - 1] == '"') {
-            --pos;
-        }
-        json_str.resize(pos);
-        return true;
-    };
-    // Helper to generate a partial argument JSON
-    constexpr auto gen_partial_json = [partial_json](auto set_partial_arg, auto &arguments, auto &builder, auto &function_name) {
-        auto rest = builder.consume_rest();
-        utf8_truncate_safe_resize(rest);
-        set_partial_arg(rest, "XML_TOOL_CALL_PARTIAL_FLAG");
-        auto tool_str = arguments.dump();
-        if (partial_json(tool_str)) {
-            if (builder.add_tool_call(function_name, "", tool_str)) {
-                return;
-            }
-        }
-        LOG_DBG("Failed to parse partial XML-Style tool call, fallback to non-partial: %s\n", tool_str.c_str());
-    };
-    // Helper to find a close (because there may be form.last_val_end or form.last_tool_end)
-    constexpr auto try_find_close = [](
-            common_chat_msg_parser & builder,
-            const std::string & end,
-            const std::optional<std::string> & alt_end,
-            const std::string & end_next,
-            const std::optional<std::string> & alt_end_next
-    ) {
-        auto saved_pos = builder.pos();
-        auto tc = builder.try_find_literal(end);
-        auto val_end_size = end.size();
-        if (alt_end) {
-            auto pos_1 = builder.pos();
-            builder.move_to(saved_pos);
-            auto tc2 = try_find_2_literal_splited_by_spaces(builder, *alt_end, end_next);
-            if (alt_end_next) {
-                builder.move_to(saved_pos);
-                auto tc3 = try_find_2_literal_splited_by_spaces(builder, *alt_end, *alt_end_next);
-                if (tc3 && (!tc2 || tc2->prelude.size() > tc3->prelude.size())) {
-                    tc2 = tc3;
-                }
-            }
-            if (tc2 && (!tc || tc->prelude.size() > tc2->prelude.size())) {
-                tc = tc2;
-                tc->groups[0].end = std::min(builder.input().size(), tc->groups[0].begin + alt_end->size());
-                builder.move_to(tc->groups[0].end);
-                val_end_size = alt_end->size();
-            } else {
-                builder.move_to(pos_1);
-            }
-        }
-        return std::make_pair(val_end_size, tc);
-    };
-    // Helper to find a val_end or last_val_end, returns matched pattern size
-    const auto try_find_val_end = [try_find_close, &builder, &form]() {
-        return try_find_close(builder, form.val_end, form.last_val_end, form.tool_end, form.last_tool_end);
-    };
-    // Helper to find a tool_end or last_tool_end, returns matched pattern size
-    const auto try_find_tool_end = [try_find_close, &builder, &form]() {
-        return try_find_close(builder, form.tool_end, form.last_tool_end, form.scope_end, std::nullopt);
-    };
-
-    bool recovery = true;
-    const auto start_pos = builder.pos();
-    if (!all_space(form.scope_start)) {
-        if (auto tc = builder.try_find_literal(form.scope_start)) {
-            if (all_space(tc->prelude)) {
-                if (form.scope_start.size() != tc->groups[0].end - tc->groups[0].begin)
-                    throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.scope_start));
-            } else {
-                builder.move_to(start_pos);
-                return false;
-            }
-        } else return false;
-    }
-    while (auto tc = builder.try_find_literal(form.tool_start)) {
-        if (!all_space(tc->prelude)) {
-            LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
-                    gbnf_format_literal(form.tool_start).c_str(),
-                    gbnf_format_literal(tc->prelude).c_str()
-            );
-            builder.move_to(tc->groups[0].begin - tc->prelude.size());
-            break;
-        }
-
-        // Find tool name
-        auto func_name = builder.try_find_literal(all_space(form.tool_sep) ? form.key_start : form.tool_sep);
-        if (!func_name) {
-            auto [sz, tc] = try_find_tool_end();
-            func_name = tc;
-        }
-        if (!func_name) {
-            // Partial tool name not supported
-            throw common_chat_msg_partial_exception("incomplete tool_call");
-        }
-        // If the model generate multiple tool call and the first tool call has no argument
-        if (func_name->prelude.find(form.tool_end) != std::string::npos || (form.last_tool_end ? func_name->prelude.find(*form.last_tool_end) != std::string::npos : false)) {
-            builder.move_to(func_name->groups[0].begin - func_name->prelude.size());
-            auto [sz, tc] = try_find_tool_end();
-            func_name = tc;
-        }
-
-        // Parse tool name
-        builder.move_to(all_space(form.tool_sep) ? func_name->groups[0].begin : func_name->groups[0].end);
-        std::string function_name = string_strip(func_name->prelude);
-        // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
-        if (builder.syntax().format == COMMON_CHAT_FORMAT_KIMI_K2) {
-            if (string_starts_with(function_name, "functions.")) {
-                static const std::regex re(":\\d+$");
-                if (std::regex_search(function_name, re)) {
-                    function_name = function_name.substr(10, function_name.rfind(":") - 10);
-                }
-            }
-        }
-
-        // Argument JSON
-        json arguments = json::object();
-
-        // Helper to generate a partial argument JSON
-        const auto gen_partial_args = [&](auto set_partial_arg) {
-            gen_partial_json(set_partial_arg, arguments, builder, function_name);
-        };
-
-        // Parse all arg_key/arg_value pairs
-        while (auto tc = builder.try_find_literal(form.key_start)) {
-            if (!all_space(tc->prelude)) {
-                LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
-                        gbnf_format_literal(form.key_start).c_str(),
-                        gbnf_format_literal(tc->prelude).c_str()
-                );
-                builder.move_to(tc->groups[0].begin - tc->prelude.size());
-                break;
-            }
-            if (tc->groups[0].end - tc->groups[0].begin != form.key_start.size()) {
-                auto tool_call_arg = arguments.dump();
-                if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
-                    tool_call_arg.resize(tool_call_arg.size() - 1);
-                }
-                builder.add_tool_call(function_name, "", tool_call_arg);
-                throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_start));
-            }
-
-            // Parse arg_key
-            auto key_res = builder.try_find_literal(form.key_val_sep);
-            if (!key_res) {
-                gen_partial_args([&](auto &rest, auto &needle) {arguments[rest + needle] = "";});
-                throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.key_val_sep) + " after " + gbnf_format_literal(form.key_start));
-            }
-            if (key_res->groups[0].end - key_res->groups[0].begin != form.key_val_sep.size()) {
-                gen_partial_args([&](auto &, auto &needle) {arguments[key_res->prelude + needle] = "";});
-                throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_val_sep));
-            }
-            auto &key = key_res->prelude;
-            recovery = false;
-
-            // Parse arg_value
-            if (form.key_val_sep2) {
-                if (auto tc = builder.try_find_literal(*form.key_val_sep2)) {
-                    if (!all_space(tc->prelude)) {
-                        LOG_DBG("Failed to parse XML-Style tool call: Unexcepted %s between %s and %s\n",
-                                gbnf_format_literal(tc->prelude).c_str(),
-                                gbnf_format_literal(form.key_val_sep).c_str(),
-                                gbnf_format_literal(*form.key_val_sep2).c_str()
-                        );
-                        return return_error(builder, start_pos, false);
-                    }
-                    if (tc->groups[0].end - tc->groups[0].begin != form.key_val_sep2->size()) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                        throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(*form.key_val_sep2));
-                    }
-                } else {
-                    gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                    throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(*form.key_val_sep2) + " after " + gbnf_format_literal(form.key_val_sep));
-                }
-            }
-            auto val_start = builder.pos();
-
-            // Test if arg_val is a partial JSON
-            std::optional<common_json> value_json = std::nullopt;
-            if (!form.raw_argval || !*form.raw_argval) {
-                try { value_json = builder.try_consume_json(); }
-                catch (const std::runtime_error&) { builder.move_to(val_start); }
-                // TODO: Delete this when json_partial adds top-level support for null/true/false
-                if (builder.pos() == val_start) {
-                    const static std::regex number_regex(R"([0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?)");
-                    builder.consume_spaces();
-                    std::string_view sv = utf8_truncate_safe_view(builder.input());
-                    sv.remove_prefix(builder.pos());
-                    std::string rest = "a";
-                    if (sv.size() < 6) rest = sv;
-                    if (string_starts_with("null", rest) || string_starts_with("true", rest) || string_starts_with("false", rest) || std::regex_match(sv.begin(), sv.end(), number_regex)) {
-                        value_json = {123, {"123", "123"}};
-                        builder.consume_rest();
-                    } else {
-                        builder.move_to(val_start);
-                    }
-                }
-            }
-
-            // If it is a JSON and followed by </arg_value>, parse as json
-            // cannot support streaming because it may be a plain text starting with JSON
-            if (value_json) {
-                auto json_end = builder.pos();
-                builder.consume_spaces();
-                if (builder.pos() == builder.input().size()) {
-                    if (form.raw_argval && !*form.raw_argval && (value_json->json.is_string() || value_json->json.is_object() || value_json->json.is_array())) {
-                        arguments[key] = value_json->json;
-                        auto json_str = arguments.dump();
-                        if (!value_json->healing_marker.json_dump_marker.empty()) {
-                            GGML_ASSERT(std::string::npos != json_str.rfind(value_json->healing_marker.json_dump_marker));
-                            json_str.resize(json_str.rfind(value_json->healing_marker.json_dump_marker));
-                        } else {
-                            GGML_ASSERT(json_str.back() == '}');
-                            json_str.resize(json_str.size() - 1);
-                        }
-                        builder.add_tool_call(function_name, "", json_str);
-                    } else {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                    }
-                    LOG_DBG("Possible JSON arg_value: %s\n", value_json->json.dump().c_str());
-                    throw common_chat_msg_partial_exception("JSON arg_value detected. Waiting for more tokens for validations.");
-                }
-                builder.move_to(json_end);
-                auto [val_end_size, tc] = try_find_val_end();
-                if (tc && all_space(tc->prelude) && value_json->healing_marker.marker.empty()) {
-                    if (tc->groups[0].end - tc->groups[0].begin != val_end_size) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                        LOG_DBG("Possible terminated JSON arg_value: %s\n", value_json->json.dump().c_str());
-                        throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.val_end) + (form.last_val_end ? gbnf_format_literal(*form.last_val_end) : ""));
-                    } else arguments[key] = value_json->json;
-                } else builder.move_to(val_start);
-            }
-
-            // If not, parse as plain text
-            if (val_start == builder.pos()) {
-                if (auto [val_end_size, value_plain] = try_find_val_end(); value_plain) {
-                    auto &value_str = value_plain->prelude;
-                    if (form.trim_raw_argval) value_str = string_strip(value_str);
-                    if (value_plain->groups[0].end - value_plain->groups[0].begin != val_end_size) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = value_str + needle;});
-                        throw common_chat_msg_partial_exception(
-                                "Expected " + gbnf_format_literal(form.val_end) +
-                                " after " + gbnf_format_literal(form.key_val_sep) +
-                                (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
-                        );
-                    }
-                    arguments[key] = value_str;
-                } else {
-                    if (form.trim_raw_argval) {
-                        gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = string_strip(rest) + needle;});
-                    } else {
-                        gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = rest + needle;});
-                    }
-                    throw common_chat_msg_partial_exception(
-                            "Expected " + gbnf_format_literal(form.val_end) +
-                            " after " + gbnf_format_literal(form.key_val_sep) +
-                            (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
-                    );
-                }
-            }
-        }
-
-        // Consume closing tag
-        if (auto [tool_end_size, tc] = try_find_tool_end(); tc) {
-            if (!all_space(tc->prelude)) {
-                LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                        gbnf_format_literal(form.tool_end).c_str(),
-                        gbnf_format_literal(tc->prelude).c_str()
-                );
-                return return_error(builder, start_pos, recovery);
-            }
-            if (tc->groups[0].end - tc->groups[0].begin == tool_end_size) {
-                // Add the parsed tool call
-                if (!builder.add_tool_call(function_name, "", arguments.dump())) {
-                    throw common_chat_msg_partial_exception("Failed to add XML-Style tool call");
-                }
-                recovery = false;
-                continue;
-            }
-        }
-
-        auto tool_call_arg = arguments.dump();
-        if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
-            tool_call_arg.resize(tool_call_arg.size() - 1);
-        }
-        builder.add_tool_call(function_name, "", tool_call_arg);
-        throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.tool_end) + " after " + gbnf_format_literal(form.val_end));
-    }
-    if (auto tc = builder.try_find_literal(form.scope_end)) {
-        if (!all_space(tc->prelude)) {
-            LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                    gbnf_format_literal(form.scope_end).c_str(),
-                    gbnf_format_literal(tc->prelude).c_str()
-            );
-            return return_error(builder, start_pos, recovery);
-        }
-    } else {
-        if (all_space(form.scope_end)) return true;
-        builder.consume_spaces();
-        if (builder.pos() == builder.input().size())
-            throw common_chat_msg_partial_exception("incomplete tool calls");
-        LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                gbnf_format_literal(form.scope_end).c_str(),
-                gbnf_format_literal(builder.consume_rest()).c_str()
-        );
-        return return_error(builder, start_pos, recovery);
-    }
-
-    return true;
-}
-
-/**
- * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
- * May cause std::runtime_error if there is invalid syntax because partial valid tool call is already sent out to client.
- * form.scope_start, form.tool_sep and form.scope_end can be empty.
- */
-bool common_chat_msg_parser::try_consume_xml_tool_calls(const struct xml_tool_call_format & form) {
-    auto pos = pos_;
-    auto tsize = result_.tool_calls.size();
-    try { return parse_xml_tool_calls(*this, form); }
-    catch (const xml_toolcall_syntax_exception&) {}
-    move_to(pos);
-    result_.tool_calls.resize(tsize);
-    return false;
-}
-
-/**
- * Parse content uses reasoning and XML-Style tool call
- * TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
- */
-inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>") {
-    constexpr auto rstrip = [](std::string &s) {
-        s.resize(std::distance(s.begin(), std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base()));
-    };
-    // Erase substring from l to r, along with additional spaces nearby
-    constexpr auto erase_spaces = [](auto &str, size_t l, size_t r) {
-        while (/* l > -1 && */ --l < str.size() && std::isspace(static_cast<unsigned char>(str[l])));
-        ++l;
-        while (++r < str.size() && std::isspace(static_cast<unsigned char>(str[r])));
-        if (l < r) str[l] = '\n';
-        if (l + 1 < r) str[l + 1] = '\n';
-        if (l != 0) l += 2;
-        str.erase(l, r - l);
-        return l;
-    };
-    constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
-        auto best_match = content.size();
-        for (auto pattern: list) {
-            if (pattern.size() == 0) continue;
-            for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
-                auto match_len = content.size() - match_idx;
-                if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
-                    best_match = match_idx;
-                }
-            }
-        }
-        if (content.size() > best_match) {
-            content.erase(best_match);
-        }
-    };
-    const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {
-        return trim_suffix(content, {
-            start_think, end_think, form.scope_start, form.tool_start, form.tool_sep, form.key_start,
-            form.key_val_sep, form.key_val_sep2 ? form.key_val_sep2->c_str() : "",
-            form.val_end, form.last_val_end ? form.last_val_end->c_str() : "",
-            form.tool_end, form.last_tool_end ? form.last_tool_end->c_str() : "",
-            form.scope_end
-        });
-    };
-
-
-    // Trim leading spaces without affecting keyword matching
-    static const common_regex spaces_regex("\\s*");
-    {
-        auto tc = builder.consume_regex(spaces_regex);
-        auto spaces = builder.str(tc.groups[0]);
-        auto s1 = spaces.size();
-        trim_potential_partial_word(spaces);
-        auto s2 = spaces.size();
-        builder.move_to(builder.pos() - (s1 - s2));
-    }
-
-    // Parse content
-    bool reasoning_unclosed = builder.syntax().thinking_forced_open;
-    std::string unclosed_reasoning_content("");
-    for (;;) {
-        auto tc = try_find_2_literal_splited_by_spaces(builder, form.scope_start, form.tool_start);
-        std::string content;
-        std::string tool_call_start;
-
-        if (tc) {
-            content = std::move(tc->prelude);
-            tool_call_start = builder.str(tc->groups[0]);
-            LOG_DBG("Matched tool start: %s\n", gbnf_format_literal(tool_call_start).c_str());
-        } else {
-            content = builder.consume_rest();
-            utf8_truncate_safe_resize(content);
-        }
-
-        // Handle unclosed think block
-        if (reasoning_unclosed) {
-            if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) {
-                unclosed_reasoning_content += content;
-                if (!(form.allow_toolcall_in_think && tc)) {
-                    unclosed_reasoning_content += tool_call_start;
-                    continue;
-                }
-            } else {
-                reasoning_unclosed = false;
-                std::string reasoning_content;
-                if (pos == std::string::npos) {
-                    reasoning_content = std::move(content);
-                } else {
-                    reasoning_content = content.substr(0, pos);
-                    content.erase(0, pos + end_think.size());
-                }
-                if (builder.pos() == builder.input().size() && all_space(content)) {
-                    rstrip(reasoning_content);
-                    trim_potential_partial_word(reasoning_content);
-                    rstrip(reasoning_content);
-                    if (reasoning_content.empty()) {
-                        rstrip(unclosed_reasoning_content);
-                        trim_potential_partial_word(unclosed_reasoning_content);
-                        rstrip(unclosed_reasoning_content);
-                        if (unclosed_reasoning_content.empty()) continue;
-                    }
-                }
-                if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
-                    builder.add_content(start_think);
-                    builder.add_content(unclosed_reasoning_content);
-                    builder.add_content(reasoning_content);
-                    if (builder.pos() != builder.input().size() || !all_space(content))
-                        builder.add_content(end_think);
-                } else {
-                    builder.add_reasoning_content(unclosed_reasoning_content);
-                    builder.add_reasoning_content(reasoning_content);
-                }
-                unclosed_reasoning_content.clear();
-            }
-        }
-
-        // Handle multiple think block
-        bool toolcall_in_think = false;
-        for (auto think_start = content.find(start_think); think_start != std::string::npos; think_start = content.find(start_think, think_start)) {
-            if (auto think_end = content.find(end_think, think_start + start_think.size()); think_end != std::string::npos) {
-                if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-                    auto reasoning_content = content.substr(think_start + start_think.size(), think_end - think_start - start_think.size());
-                    builder.add_reasoning_content(reasoning_content);
-                    think_start = erase_spaces(content, think_start, think_end + end_think.size() - 1);
-                } else {
-                    think_start = think_end + end_think.size() - 1;
-                }
-            } else {
-                // This <tool_call> start is in thinking block, skip this tool call
-                // This <tool_call> start is in thinking block
-                if (form.allow_toolcall_in_think) {
-                    unclosed_reasoning_content = content.substr(think_start + start_think.size());
-                } else {
-                    unclosed_reasoning_content = content.substr(think_start + start_think.size()) + tool_call_start;
-                }
-                reasoning_unclosed = true;
-                content.resize(think_start);
-                toolcall_in_think = true;
-            }
-        }
-
-        if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-            rstrip(content);
-            // Handle unclosed </think> token from content: delete all </think> token
-            if (auto pos = content.rfind(end_think); pos != std::string::npos) {
-                while (pos != std::string::npos) {
-                    pos = erase_spaces(content, pos, pos + end_think.size() - 1);
-                    pos = content.rfind(end_think, pos);
-                }
-            }
-            // Strip if needed
-            if (content.size() > 0 && std::isspace(static_cast<unsigned char>(content[0]))) {
-                content = string_strip(content);
-            }
-        }
-
-        // remove potential partial suffix
-        if (builder.pos() == builder.input().size()) {
-            if (unclosed_reasoning_content.empty()) {
-                rstrip(content);
-                trim_potential_partial_word(content);
-                rstrip(content);
-            } else {
-                rstrip(unclosed_reasoning_content);
-                trim_potential_partial_word(unclosed_reasoning_content);
-                rstrip(unclosed_reasoning_content);
-            }
-        }
-
-        // consume unclosed_reasoning_content if allow_toolcall_in_think is set
-        if (form.allow_toolcall_in_think && !unclosed_reasoning_content.empty()) {
-            if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-                builder.add_reasoning_content(unclosed_reasoning_content);
-            } else {
-                if (content.empty()) {
-                    content = start_think + unclosed_reasoning_content;
-                } else {
-                    content += "\n\n" + start_think;
-                    content += unclosed_reasoning_content;
-                }
-            }
-            unclosed_reasoning_content.clear();
-        }
-
-        // Add content
-        if (!content.empty()) {
-            // If there are multiple content blocks
-            if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) {
-                builder.add_content("\n\n");
-            }
-            builder.add_content(content);
-        }
-
-        // This <tool_call> start is in thinking block and toolcall_in_think not set, skip this tool call
-        if (toolcall_in_think && !form.allow_toolcall_in_think) {
-            continue;
-        }
-
-        // There is no tool call and all content is parsed
-        if (!tc) {
-            GGML_ASSERT(builder.pos() == builder.input().size());
-            GGML_ASSERT(unclosed_reasoning_content.empty());
-            if (!form.allow_toolcall_in_think) GGML_ASSERT(!reasoning_unclosed);
-            break;
-        }
-
-        builder.move_to(tc->groups[0].begin);
-        if (builder.try_consume_xml_tool_calls(form)) {
-            auto end_of_tool = builder.pos();
-            builder.consume_spaces();
-            if (builder.pos() != builder.input().size()) {
-                builder.move_to(end_of_tool);
-                if (!builder.result().content.empty()) {
-                    builder.add_content("\n\n");
-                }
-            }
-        } else {
-            static const common_regex next_char_regex(".");
-            auto c = builder.str(builder.consume_regex(next_char_regex).groups[0]);
-            rstrip(c);
-            builder.add_content(c);
-        }
-    }
-}
-
-/**
- * Parse content uses reasoning and XML-Style tool call
- */
-void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) {
-    parse_msg_with_xml_tool_calls(*this, form, start_think, end_think);
-}
diff --git a/common/chat-parser-xml-toolcall.h b/common/chat-parser-xml-toolcall.h
deleted file mode 100644
index b309fb66705..00000000000
--- a/common/chat-parser-xml-toolcall.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#pragma once
-
-#include "chat.h"
-
-#include <nlohmann/json.hpp>
-
-#include <optional>
-#include <string>
-#include <vector>
-
-
-// Sample config:
-// MiniMax-M2 (left): <minimax:tool_call>\n<invoke name="tool-name">\n<parameter name="key">value</parameter>\n...</invoke>\n...</minimax:tool_call>
-// GLM 4.5   (right): <tool_call>function_name\n<arg_key>key</arg_key>\n<arg_value>value</arg_value>\n</tool_call>
-struct xml_tool_call_format {
-    std::string scope_start; // <minimax:tool_call>\n  // \n                      // can be empty
-    std::string tool_start;  // <invoke name=\"        // <tool_call>
-    std::string tool_sep;    // \">\n                  // \n                      // can be empty only for parse_xml_tool_calls
-    std::string key_start;   // <parameter name=\"     // <arg_key>
-    std::string key_val_sep; // \">                    // </arg_key>\n<arg_value>
-    std::string val_end;     // </parameter>\n         // </arg_value>\n
-    std::string tool_end;    // </invoke>\n            // </tool_call>\n
-    std::string scope_end;   // </minimax:tool_call>   //                         // can be empty
-    // Set this if there can be dynamic spaces inside key_val_sep.
-    // e.g. key_val_sep=</arg_key> key_val_sep2=<arg_value> for GLM4.5
-    std::optional<std::string> key_val_sep2 = std::nullopt;
-    // Set true if argval should only be raw string. e.g. Hello "world" hi
-    // Set false if argval should only be json string. e.g. "Hello \"world\" hi"
-    // Defaults to std::nullopt, both will be allowed.
-    std::optional<bool> raw_argval = std::nullopt;
-    std::optional<std::string> last_val_end = std::nullopt;
-    std::optional<std::string> last_tool_end = std::nullopt;
-    bool trim_raw_argval = false;
-    bool allow_toolcall_in_think = false;
-};
-
-// make a GBNF that accept any strings except those containing any of the forbidden strings.
-std::string make_gbnf_excluding(std::vector<std::string> forbids);
-
-/**
- * Build grammar for xml-style tool call
- * form.scope_start and form.scope_end can be empty.
- * Requires data.format for model-specific hacks.
- */
-void build_grammar_xml_tool_call(common_chat_params & data, const nlohmann::ordered_json & tools, const struct xml_tool_call_format & form);
diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp
deleted file mode 100644
index 29819e48d3b..00000000000
--- a/common/chat-parser.cpp
+++ /dev/null
@@ -1,1669 +0,0 @@
-#include "chat-parser.h"
-#include "chat-peg-parser.h"
-#include "common.h"
-#include "log.h"
-#include "peg-parser.h"
-#include "regex-partial.h"
-
-#include <algorithm>
-#include <cctype>
-#include <optional>
-#include <stdexcept>
-#include <string>
-#include <string_view>
-#include <vector>
-
-using json = nlohmann::ordered_json;
-
-static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder,
-                                                const common_regex &     prefix,
-                                                size_t                   rstrip_prefix = 0) {
-    static const std::vector<std::vector<std::string>> args_paths = { { "arguments" } };
-    if (auto res = builder.try_find_regex(prefix)) {
-        builder.move_back(rstrip_prefix);
-        auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
-        if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call array");
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
-    std::string arguments;
-    if (builder.is_partial()) {
-        arguments = (json{
-                         { "code", code + builder.healing_marker() }
-        })
-                        .dump();
-        auto idx = arguments.find(builder.healing_marker());
-        if (idx != std::string::npos) {
-            arguments.resize(idx);
-        }
-    } else {
-        arguments = (json{
-                         { "code", code }
-        })
-                        .dump();
-    }
-    return arguments;
-}
-
-/**
- * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
- * Aggregates the prefix, suffix and in-between text into the content.
- */
-static void parse_json_tool_calls(
-    common_chat_msg_parser &            builder,
-    const std::optional<common_regex> & block_open,
-    const std::optional<common_regex> & function_regex_start_only,
-    const std::optional<common_regex> & function_regex,
-    const common_regex &                close_regex,
-    const std::optional<common_regex> & block_close,
-    bool                                allow_raw_python = false,
-    const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name =
-        nullptr) {
-    auto parse_tool_calls = [&]() {
-        size_t from  = std::string::npos;
-        auto   first = true;
-        while (true) {
-            auto start_pos = builder.pos();
-            auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) :
-                       function_regex                     ? builder.try_find_regex(*function_regex, from) :
-                                                            std::nullopt;
-
-            if (res) {
-                std::string name;
-                if (get_function_name) {
-                    name = get_function_name(*res);
-                } else {
-                    GGML_ASSERT(res->groups.size() == 2);
-                    name = builder.str(res->groups[1]);
-                }
-                first = false;
-                if (name.empty()) {
-                    // get_function_name signalled us that we should skip this match and treat it as content.
-                    from = res->groups[0].begin + 1;
-                    continue;
-                }
-                from = std::string::npos;
-
-                auto maybe_raw_python = name == "python" && allow_raw_python;
-                if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
-                    if (auto arguments = builder.try_consume_json_with_dumped_args({ {} })) {
-                        if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
-                            throw common_chat_msg_partial_exception("incomplete tool call");
-                        }
-                        builder.consume_regex(close_regex);
-                    }
-                    continue;
-                }
-                if (maybe_raw_python) {
-                    auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
-                    if (!builder.add_tool_call(name, "", arguments)) {
-                        throw common_chat_msg_partial_exception("incomplete tool call");
-                    }
-                    return;
-                }
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            } else {
-                builder.move_to(start_pos);
-            }
-            break;
-        }
-        if (block_close) {
-            builder.consume_regex(*block_close);
-        }
-        builder.consume_spaces();
-        builder.add_content(builder.consume_rest());
-    };
-    if (block_open) {
-        if (auto res = builder.try_find_regex(*block_open)) {
-            parse_tool_calls();
-        } else {
-            builder.add_content(builder.consume_rest());
-        }
-    } else {
-        parse_tool_calls();
-    }
-}
-
-common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax)
-    : input_(input), is_partial_(is_partial), syntax_(syntax)
-{
-    result_.role = "assistant";
-
-    while (true) {
-        std::string id = std::to_string(std::rand());
-        if (input.find(id) == std::string::npos) {
-            healing_marker_ = id;
-            break;
-        }
-    }
-}
-
-std::string common_chat_msg_parser::str(const common_string_range & rng) const {
-    GGML_ASSERT(rng.begin <= rng.end);
-    return input_.substr(rng.begin, rng.end - rng.begin);
-}
-
-void common_chat_msg_parser::add_content(const std::string &content) {
-    result_.content += content;
-}
-
-void common_chat_msg_parser::add_reasoning_content(const std::string &reasoning_content) {
-    result_.reasoning_content += reasoning_content;
-}
-
-bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::string & id, const std::string & arguments) {
-    if (name.empty()) {
-        return false;
-    }
-
-    common_chat_tool_call tool_call;
-    tool_call.name = name;
-    tool_call.arguments = arguments;
-    tool_call.id = id;
-
-    // LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());
-    result_.tool_calls.emplace_back(tool_call);
-
-    return true;
-}
-bool common_chat_msg_parser::add_tool_call(const json & tool_call) {
-    std::string name = tool_call.contains("name") ? tool_call.at("name") : "";
-    std::string id = tool_call.contains("id") ? tool_call.at("id") : "";
-    std::string arguments = "";
-    if (tool_call.contains("arguments")) {
-        if (tool_call.at("arguments").is_object()) {
-            arguments = tool_call.at("arguments").dump();
-        } else {
-            arguments = tool_call.at("arguments");
-        }
-    }
-
-    return add_tool_call(name, id, arguments);
-}
-
-bool common_chat_msg_parser::add_tool_calls(const json & arr) {
-    for (const auto & item : arr) {
-        if (!add_tool_call(item)) {
-            return false;
-        }
-    }
-    return true;
-}
-
-bool common_chat_msg_parser::add_tool_call_short_form(const json & tool_call) {
-    if (!tool_call.is_object() || tool_call.size() != 1) {
-        return false;
-    }
-
-    // Get the tool name (the single key in the object)
-    auto it = tool_call.begin();
-    std::string name = it.key();
-
-    if (name.empty()) {
-        return false;
-    }
-
-    // Get the arguments (the nested object)
-    const json & args_json = it.value();
-    std::string arguments = "";
-
-    if (args_json.is_object()) {
-        arguments = args_json.dump();
-    } else if (args_json.is_string()) {
-        arguments = args_json;
-    } else if (!args_json.is_null()) {
-        // For other types, convert to string representation
-        arguments = args_json.dump();
-    }
-
-    return add_tool_call(name, "", arguments);
-}
-void common_chat_msg_parser::finish() {
-    if (!is_partial_ && pos_ != input_.size()) {
-        throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_));
-    }
-}
-
-bool common_chat_msg_parser::consume_spaces() {
-    const auto length = input_.size();
-    auto consumed = false;
-    while (pos_ < length && std::isspace(input_[pos_])) {
-        ++pos_;
-        consumed = true;
-    }
-    return consumed;
-}
-
-bool common_chat_msg_parser::try_consume_literal(const std::string & literal) {
-    auto pos = pos_;
-    for (auto i = 0u; i < literal.size(); ++i) {
-        if (pos >= input_.size()) {
-            return false;
-        }
-        if (input_[pos] != literal[i]) {
-            return false;
-        }
-        ++pos;
-    }
-    pos_ = pos;
-    return true;
-}
-
-std::optional<common_chat_msg_parser::find_regex_result>  common_chat_msg_parser::try_find_literal(const std::string & literal) {
-    auto idx = input_.find(literal, pos_);
-    if (idx != std::string::npos) {
-        find_regex_result res;
-        res.prelude = input_.substr(pos_, idx - pos_);
-        auto end = idx + literal.size();
-        res.groups.emplace_back(common_string_range{idx, end});
-        move_to(end);
-        return res;
-    }
-    if (is_partial_) {
-        idx = string_find_partial_stop(input_, literal);
-        if (idx != std::string::npos && idx >= pos_) {
-            find_regex_result res;
-            res.prelude = input_.substr(pos_, idx - pos_);
-            auto end = input_.size();
-            res.groups.emplace_back(common_string_range{idx, end});
-            move_to(end);
-            return res;
-        }
-    }
-    return std::nullopt;
-}
-
-void common_chat_msg_parser::consume_literal(const std::string & literal) {
-    if (!try_consume_literal(literal)) {
-        throw common_chat_msg_partial_exception(literal);
-    }
-}
-
-bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
-    std::string pending_reasoning_prefix;
-
-    if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) {
-        return false;
-    }
-
-    auto set_reasoning_prefix = [&](size_t prefix_pos) {
-        if (!syntax_.thinking_forced_open || syntax_.reasoning_in_content) {
-            return;
-        }
-        if (prefix_pos + start_think.size() > input_.size()) {
-            pending_reasoning_prefix.clear();
-            return;
-        }
-        // Capture the exact literal that opened the reasoning section so we can
-        // surface it back to callers. This ensures formats that force the
-        // reasoning tag open (e.g. DeepSeek R1) retain their original prefix
-        // instead of dropping it during parsing.
-        pending_reasoning_prefix = input_.substr(prefix_pos, start_think.size());
-    };
-
-    auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
-        auto stripped_reasoning = string_strip(reasoning);
-        if (stripped_reasoning.empty()) {
-            return;
-        }
-        if (syntax_.reasoning_in_content) {
-            add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "<think>" : start_think);
-            add_content(stripped_reasoning);
-            if (closed) {
-                add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "</think>" : end_think);
-            }
-        } else {
-            if (!pending_reasoning_prefix.empty()) {
-                add_reasoning_content(pending_reasoning_prefix);
-                pending_reasoning_prefix.clear();
-            }
-            add_reasoning_content(stripped_reasoning);
-        }
-    };
-
-    const size_t saved_pos = pos_;
-    const size_t saved_content_size = result_.content.size();
-    const size_t saved_reasoning_size = result_.reasoning_content.size();
-
-    auto restore_state = [&]() {
-        move_to(saved_pos);
-        result_.content.resize(saved_content_size);
-        result_.reasoning_content.resize(saved_reasoning_size);
-    };
-
-    // Allow leading whitespace to be preserved as content when reasoning is present at the start
-    size_t cursor = pos_;
-    size_t whitespace_end = cursor;
-    while (whitespace_end < input_.size() && std::isspace(static_cast<unsigned char>(input_[whitespace_end]))) {
-        ++whitespace_end;
-    }
-
-    if (whitespace_end >= input_.size()) {
-        restore_state();
-        if (syntax_.thinking_forced_open) {
-            auto rest = input_.substr(saved_pos);
-            if (!rest.empty()) {
-                handle_reasoning(rest, /* closed */ !is_partial());
-            }
-            move_to(input_.size());
-            return true;
-        }
-        return false;
-    }
-
-    cursor = whitespace_end;
-    const size_t remaining = input_.size() - cursor;
-    const size_t start_prefix = std::min(start_think.size(), remaining);
-    const bool has_start_tag = input_.compare(cursor, start_prefix, start_think, 0, start_prefix) == 0;
-
-    if (has_start_tag && start_prefix < start_think.size()) {
-        move_to(input_.size());
-        return true;
-    }
-
-    if (has_start_tag) {
-        if (whitespace_end > pos_) {
-            add_content(input_.substr(pos_, whitespace_end - pos_));
-        }
-        set_reasoning_prefix(cursor);
-        cursor += start_think.size();
-    } else if (syntax_.thinking_forced_open) {
-        cursor = whitespace_end;
-    } else {
-        restore_state();
-        return false;
-    }
-    while (true) {
-        if (cursor >= input_.size()) {
-            move_to(input_.size());
-            return true;
-        }
-
-        size_t end_pos = input_.find(end_think, cursor);
-        if (end_pos == std::string::npos) {
-            std::string_view remaining_view(input_.data() + cursor, input_.size() - cursor);
-            size_t partial_off = string_find_partial_stop(remaining_view, end_think);
-            size_t reasoning_end = partial_off == std::string::npos ? input_.size() : cursor + partial_off;
-            if (reasoning_end > cursor) {
-                handle_reasoning(input_.substr(cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial());
-            }
-            move_to(input_.size());
-            return true;
-        }
-
-        if (end_pos > cursor) {
-            handle_reasoning(input_.substr(cursor, end_pos - cursor), /* closed */ true);
-        } else {
-            handle_reasoning("", /* closed */ true);
-        }
-
-        cursor = end_pos + end_think.size();
-
-        while (cursor < input_.size() && std::isspace(static_cast<unsigned char>(input_[cursor]))) {
-            ++cursor;
-        }
-
-        const size_t next_remaining = input_.size() - cursor;
-        if (next_remaining == 0) {
-            move_to(cursor);
-            return true;
-        }
-
-        const size_t next_prefix = std::min(start_think.size(), next_remaining);
-        if (input_.compare(cursor, next_prefix, start_think, 0, next_prefix) == 0) {
-            if (next_prefix < start_think.size()) {
-                move_to(input_.size());
-                return true;
-            }
-            set_reasoning_prefix(cursor);
-            cursor += start_think.size();
-            continue;
-        }
-
-        move_to(cursor);
-        return true;
-    }
-}
-
-std::string common_chat_msg_parser::consume_rest() {
-    auto rest = input_.substr(pos_);
-    pos_ = input_.size();
-    return rest;
-}
-
-// Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback.
-std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) {
-    auto m = regex.search(input_, from == std::string::npos ? pos_ : from);
-    if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
-        return std::nullopt;
-    }
-    auto prelude = input_.substr(pos_, m.groups[0].begin - pos_);
-    pos_ = m.groups[0].end;
-
-    if (add_prelude_to_content) {
-        add_content(prelude);
-    }
-    if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
-        if (is_partial()) {
-            throw common_chat_msg_partial_exception(regex.str());
-        }
-        return std::nullopt;
-    }
-    return find_regex_result{prelude, m.groups};
-}
-
-common_chat_msg_parser::find_regex_result common_chat_msg_parser::consume_regex(const common_regex & regex) {
-    if (auto result = try_consume_regex(regex)) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception(regex.str());
-}
-
-std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_consume_regex(const common_regex & regex) {
-    auto m = regex.search(input_, pos_);
-    if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
-        return std::nullopt;
-    }
-    if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
-        if (is_partial()) {
-            throw common_chat_msg_partial_exception(regex.str());
-        }
-        return std::nullopt;
-    }
-    if (m.groups[0].begin != pos_) {
-        // Didn't match at the current position.
-        return std::nullopt;
-    }
-    pos_ = m.groups[0].end;
-
-    return find_regex_result {
-        /* .prelude = */ "",
-        m.groups,
-    };
-}
-
-std::optional<common_json> common_chat_msg_parser::try_consume_json() {
-    auto it = input_.cbegin() + pos_;
-    const auto end = input_.cend();
-    common_json result;
-    if (!common_json_parse(it, end, healing_marker_, result)) {
-        return std::nullopt;
-    }
-    pos_ = std::distance(input_.cbegin(), it);
-    if (result.healing_marker.marker.empty()) {
-        // No healing marker, just return the parsed json
-        return result;
-    }
-    if (!is_partial()) {
-        throw common_chat_msg_partial_exception("JSON");
-    }
-    return result;
-}
-
-common_json common_chat_msg_parser::consume_json() {
-    if (auto result = try_consume_json()) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception("JSON");
-}
-
-common_chat_msg_parser::consume_json_result common_chat_msg_parser::consume_json_with_dumped_args(
-    const std::vector<std::vector<std::string>> & args_paths,
-    const std::vector<std::vector<std::string>> & content_paths
-) {
-    if (auto result = try_consume_json_with_dumped_args(args_paths, content_paths)) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception("JSON");
-}
-
-std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parser::try_consume_json_with_dumped_args(
-    const std::vector<std::vector<std::string>> & args_paths,
-    const std::vector<std::vector<std::string>> & content_paths
-) {
-    auto partial = try_consume_json();
-    if (!partial) {
-        return std::nullopt;
-    }
-    auto is_arguments_path = [&](const std::vector<std::string> & path) {
-        return std::find(args_paths.begin(), args_paths.end(), path) != args_paths.end();
-    };
-    auto is_content_path = [&](const std::vector<std::string> & path) {
-        return std::find(content_paths.begin(), content_paths.end(), path) != content_paths.end();
-    };
-
-    if (partial->healing_marker.marker.empty()) {
-        if (args_paths.empty()) {
-            // No arguments to dump, and JSON was parsed fully.
-            return consume_json_result {
-                partial->json,
-                /* .is_partial = */ false,
-            };
-        }
-        if (is_arguments_path({})) {
-            // Entire JSON is the arguments and was parsed fully.
-            return consume_json_result {
-                partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
-                /* .is_partial = */ false,
-            };
-        }
-    }
-
-    LOG_DBG("Parsed partial JSON: %s (json_healing_marker: %s)\n", partial->json.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
-
-    auto found_healing_marker = false;
-    std::vector<std::string> path;
-    std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
-        if (is_arguments_path(path)) {
-            auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
-            if (is_partial() && !partial->healing_marker.marker.empty()) {
-                auto idx = arguments.find(partial->healing_marker.json_dump_marker);
-                if (idx != std::string::npos) {
-                    arguments.resize(idx);
-                    found_healing_marker = true;
-                }
-                if (arguments == "\"") {
-                    // This happens because of completing `:"$magic` after `"arguments"`
-                    arguments = "";
-                }
-            }
-            return arguments;
-        }
-        if (is_content_path(path)) {
-            if (!j.is_string()) {
-                throw std::runtime_error("Content path must be a string");
-            }
-            std::string str = j;
-            auto idx = str.find(partial->healing_marker.marker); // not using json_dump_marker as we're inside a string
-            if (idx != std::string::npos) {
-                str.resize(idx);
-                found_healing_marker = true;
-            }
-            return str;
-        }
-        if (j.is_object()) {
-            auto obj = json::object();
-            for (const auto & p : j.items()) {
-                const auto & key = p.key();
-                const auto & value = p.value();
-                const std::string key_str = key; // NOLINT
-                auto idx = key_str.find(healing_marker_);
-                if (idx != std::string::npos) {
-                    found_healing_marker = true;
-                    break;
-                }
-                path.push_back(key_str);
-                if (value.is_string()) {
-                    const std::string value_str = value;
-                    if (value_str.find(healing_marker_) != std::string::npos) {
-                        found_healing_marker = true;
-                        if (is_content_path(path)) {
-                            if (partial->healing_marker.marker == partial->healing_marker.json_dump_marker) {
-                                // The healing occurred inside the string: good. Otherwise we just ditch the entire key/value pair.
-                                obj[key] = remove_unsupported_healings_and_dump_args(value);
-                            }
-                        }
-                        break;
-                    }
-                    obj[key] = value;
-                } else {
-                    obj[key] = remove_unsupported_healings_and_dump_args(value);
-                }
-                path.pop_back();
-            }
-            return obj;
-        }
-        if (j.is_array()) {
-            auto arr = json::array();
-            for (const auto & value : j) {
-                if (value.is_string()) {
-                    std::string str = value;
-                    auto idx = str.find(healing_marker_);
-                    if (idx != std::string::npos) {
-                        // Don't heal array values that aren't in the arguments.
-                        found_healing_marker = true;
-                        break;
-                    }
-                }
-                arr.push_back(remove_unsupported_healings_and_dump_args(value));
-            }
-            return arr;
-        }
-        return j;
-    };
-
-    auto cleaned = remove_unsupported_healings_and_dump_args(partial->json);
-    LOG_DBG("Cleaned up JSON %s to %s (json_healing_marker : '%s')\n", partial->json.dump().c_str(), cleaned.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
-    return consume_json_result {
-        cleaned,
-        /* .is_partial = */ found_healing_marker,
-    };
-}
-
-void common_chat_msg_parser::clear_tools() {
-    result_.tool_calls.clear();
-}
-
-/**
- * All common_chat_parse_* moved from chat.cpp to chat-parser.cpp below
- * to reduce incremental compile time for parser changes.
- */
-static void common_chat_parse_generic(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    static const std::vector<std::vector<std::string>> content_paths = {
-        {"response"},
-    };
-    static const std::vector<std::vector<std::string>> args_paths = {
-        {"tool_call", "arguments"},
-        {"tool_calls", "arguments"},
-    };
-    auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
-    if (data.value.contains("tool_calls")) {
-        if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool calls");
-        }
-    } else if (data.value.contains("tool_call")) {
-        if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-    } else if (data.value.contains("response")) {
-        const auto & response = data.value.at("response");
-        builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
-        if (data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete response");
-        }
-    } else {
-        throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
-    }
-}
-
-static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
-    parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
-static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("[THINK]", "[/THINK]");
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
-    parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
-static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
-
-    static const common_regex start_action_regex("<\\|START_ACTION\\|>");
-    static const common_regex end_action_regex("<\\|END_ACTION\\|>");
-    static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
-    static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
-
-    if (auto res = builder.try_find_regex(start_action_regex)) {
-        // If we didn't extract thoughts, prelude includes them.
-        auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
-        for (const auto & tool_call : tool_calls.value) {
-            std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
-            std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
-            std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
-            if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-        if (tool_calls.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-        builder.consume_regex(end_action_regex);
-    } else if (auto res = builder.try_find_regex(start_response_regex)) {
-        if (!builder.try_find_regex(end_response_regex)) {
-            builder.add_content(builder.consume_rest());
-            throw common_chat_msg_partial_exception(end_response_regex.str());
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
-    builder.try_parse_reasoning("<think>", "</think>");
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex function_regex(
-        "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
-    static const common_regex close_regex("\\}\\s*");
-
-    static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
-    static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
-
-    if (with_builtin_tools) {
-        static const common_regex builtin_call_regex("<\\|python_tag\\|>");
-        if (auto res = builder.try_find_regex(builtin_call_regex)) {
-            auto fun_res = builder.consume_regex(function_name_regex);
-            auto function_name = builder.str(fun_res.groups[1]);
-
-            common_healing_marker healing_marker;
-            json args = json::object();
-            while (true) {
-                if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
-                    auto arg_name = builder.str(arg_res->groups[1]);
-                    auto partial = builder.consume_json();
-                    args[arg_name] = partial.json;
-                    healing_marker.marker = partial.healing_marker.marker;
-                    healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
-                    builder.consume_spaces();
-                    if (!builder.try_consume_literal(",")) {
-                        break;
-                    }
-                } else {
-                    break;
-                }
-            }
-            builder.consume_literal(")");
-            builder.consume_spaces();
-
-            auto arguments = args.dump();
-            if (!builder.add_tool_call(function_name, "", arguments)) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            return;
-        }
-    }
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ std::nullopt,
-        /* function_regex_start_only= */ function_regex,
-        /* function_regex= */ std::nullopt,
-        close_regex,
-        std::nullopt);
-
-}
-
-static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex tool_calls_begin("(?:<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)");
-    static const common_regex tool_calls_end("<｜tool▁calls▁end｜>");
-    static const common_regex function_regex("(?:<｜tool▁call▁begin｜>)?function<｜tool▁sep｜>([^\n]+)\n```json\n");
-    static const common_regex close_regex("```[\\s\\r\\n]*<｜tool▁call▁end｜>");
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ tool_calls_begin,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
-    static const common_regex function_regex("(?:<｜tool▁call▁begin｜>)?([^\\n<]+)(?:<｜tool▁sep｜>)");
-
-    static const common_regex close_regex("(?:[\\s]*)?<｜tool▁call▁end｜>");
-    static const common_regex tool_calls_begin("(?:<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)");
-    static const common_regex tool_calls_end("<｜tool▁calls▁end｜>");
-
-    if (!builder.syntax().parse_tool_calls) {
-        LOG_DBG("%s: not parse_tool_calls\n", __func__);
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    LOG_DBG("%s: parse_tool_calls\n", __func__);
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ tool_calls_begin,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
-    // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
-    // First try to parse using the standard reasoning parsing method
-    LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
-
-    auto start_pos = builder.pos();
-    auto found_end_think = builder.try_find_literal("</think>");
-    builder.move_to(start_pos);
-
-    if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
-        LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
-        common_chat_parse_deepseek_v3_1_content(builder);
-    } else if (builder.try_parse_reasoning("<think>", "</think>")) {
-        // If reasoning was parsed successfully, the remaining content is regular content
-        LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
-        // </think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>NAME\n```json\nJSON\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>
-        common_chat_parse_deepseek_v3_1_content(builder);
-    } else {
-        if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
-          LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
-          common_chat_parse_deepseek_v3_1_content(builder);
-          return;
-        }
-        // If no reasoning tags found, check if we should treat everything as reasoning
-        if (builder.syntax().thinking_forced_open) {
-            // If thinking is forced open but no tags found, treat everything as reasoning
-            LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
-            builder.add_reasoning_content(builder.consume_rest());
-        } else {
-            LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
-            // <｜tool▁call▁begin｜>NAME<｜tool▁sep｜>JSON<｜tool▁call▁end｜>
-            common_chat_parse_deepseek_v3_1_content(builder);
-        }
-    }
-}
-
-static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<minimax:tool_call>",
-        /* form.tool_start  = */ "<invoke name=\"",
-        /* form.tool_sep    = */ "\">",
-        /* form.key_start   = */ "<parameter name=\"",
-        /* form.key_val_sep = */ "\">",
-        /* form.val_end     = */ "</parameter>",
-        /* form.tool_end    = */ "</invoke>",
-        /* form.scope_end   = */ "</minimax:tool_call>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<tool_call>";
-        form.tool_start  = "<function=";
-        form.tool_sep    = ">";
-        form.key_start   = "<parameter=";
-        form.key_val_sep = ">";
-        form.val_end     = "</parameter>";
-        form.tool_end    = "</function>";
-        form.scope_end   = "</tool_call>";
-        form.trim_raw_argval = true;
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form);
-}
-
-static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<|tool_calls_section_begin|>";
-        form.tool_start  = "<|tool_call_begin|>";
-        form.tool_sep    = "<|tool_call_argument_begin|>{";
-        form.key_start   = "\"";
-        form.key_val_sep = "\":";
-        form.val_end     = ",";
-        form.tool_end    = "}<|tool_call_end|>";
-        form.scope_end   = "<|tool_calls_section_end|>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.allow_toolcall_in_think = true;
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<tool_calls>[";
-        form.tool_start  = "{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}, ";
-        form.scope_end   = "]</tool_calls>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.last_tool_end = "}";
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
-}
-
-static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "";
-        form.tool_start  = "<tool_call>\n{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}\n</tool_call>";
-        form.scope_end   = "";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form);
-}
-
-static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
-    static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
-    static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
-
-    static const common_regex start_regex("<\\|start\\|>assistant");
-    static const common_regex analysis_regex("<\\|channel\\|>analysis");
-    static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
-    static const common_regex preamble_regex("<\\|channel\\|>commentary");
-    static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
-    static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
-
-    auto consume_end = [&](bool include_end = false) {
-        if (auto res = builder.try_find_literal("<|end|>")) {
-            return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
-        }
-        return builder.consume_rest();
-    };
-
-    auto handle_tool_call = [&](const std::string & name) {
-        if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
-            if (builder.syntax().parse_tool_calls) {
-                if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-            } else if (args->is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-    };
-
-    auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> {
-        auto match = regex.search(input, 0, true);
-        if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
-            return match;
-        }
-        return std::nullopt;
-    };
-
-    do {
-        auto header_start_pos = builder.pos();
-        auto content_start = builder.try_find_literal("<|message|>");
-        if (!content_start) {
-            throw common_chat_msg_partial_exception("incomplete header");
-        }
-
-        auto header = content_start->prelude;
-
-        if (auto match = regex_match(tool_call1_regex, header)) {
-            auto group = match->groups[1];
-            auto name = header.substr(group.begin, group.end - group.begin);
-            handle_tool_call(name);
-            continue;
-        }
-
-        if (auto match = regex_match(tool_call2_regex, header)) {
-            auto group = match->groups[2];
-            auto name = header.substr(group.begin, group.end - group.begin);
-            handle_tool_call(name);
-            continue;
-        }
-
-        if (regex_match(analysis_regex, header)) {
-            builder.move_to(header_start_pos);
-            if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
-                builder.add_content(consume_end(true));
-            } else {
-                builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
-            }
-            continue;
-        }
-
-        if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
-            builder.add_content(consume_end());
-            continue;
-        }
-
-        // Possibly a malformed message, attempt to recover by rolling
-        // back to pick up the next <|start|>
-        LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
-        builder.move_to(header_start_pos);
-    } while (builder.try_find_regex(start_regex, std::string::npos, false));
-
-    auto remaining = builder.consume_rest();
-    if (!remaining.empty()) {
-        LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
-    }
-}
-
-static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start  = */ "",
-        /* form.tool_start   = */ "<tool_call>",
-        /* form.tool_sep     = */ "",
-        /* form.key_start    = */ "<arg_key>",
-        /* form.key_val_sep  = */ "</arg_key>",
-        /* form.val_end      = */ "</arg_value>",
-        /* form.tool_end     = */ "</tool_call>",
-        /* form.scope_end    = */ "",
-        /* form.key_val_sep2 = */ "<arg_value>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    static const common_regex prefix(regex_escape(" functools["));
-    parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
-}
-
-static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
-    static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
-    static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
-    static const common_regex close_regex(R"(\s*)");
-
-    parse_json_tool_calls(
-        builder,
-        std::nullopt,
-        function_regex_start_only,
-        function_regex,
-        close_regex,
-        std::nullopt,
-        /* allow_raw_python= */ true,
-        /* get_function_name= */ [&](const auto & res) -> std::string {
-            auto at_start = res.groups[0].begin == 0;
-            auto name = builder.str(res.groups[1]);
-            if (!name.empty() && name.back() == '{') {
-                // Unconsume the opening brace '{' to ensure the JSON parsing goes well.
-                builder.move_back(1);
-            }
-            auto idx = name.find_last_not_of("\n{");
-            name = name.substr(0, idx + 1);
-            if (at_start && name == "all") {
-                return "";
-            }
-            return name;
-        });
-}
-
-static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
-    static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
-
-    static const common_regex function_regex(R"(<function=(\w+)>)");
-    static const common_regex close_regex(R"(</function>)");
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ std::nullopt,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        std::nullopt);
-
-    if (auto res = builder.try_find_regex(python_tag_regex)) {
-        auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
-        builder.add_tool_call("python", "", arguments);
-        return;
-    }
-}
-
-static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex open_regex(
-        "(?:"
-            "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
-            "("                          // match 2 (open_tag)
-                "<tool_call>"
-                "|<function_call>"
-                "|<tool>"
-                "|<tools>"
-                "|<response>"
-                "|<json>"
-                "|<xml>"
-                "|<JSON>"
-            ")?"
-            "(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
-        ")"
-        "|<function=([^>]+)>"            // match 4 (function name)
-        "|<function name=\"([^\"]+)\">"  // match 5 (function name again)
-    );
-
-    while (auto res = builder.try_find_regex(open_regex)) {
-        const auto & block_start = res->groups[1];
-        std::string block_end = block_start.empty() ? "" : "```";
-
-        const auto & open_tag = res->groups[2];
-        std::string close_tag;
-
-        if (!res->groups[3].empty()) {
-            builder.move_to(res->groups[3].begin);
-            close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
-
-            if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
-                if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-                builder.consume_spaces();
-                builder.consume_literal(close_tag);
-                builder.consume_spaces();
-                if (!block_end.empty()) {
-                    builder.consume_literal(block_end);
-                    builder.consume_spaces();
-                }
-            } else {
-                throw common_chat_msg_partial_exception("failed to parse tool call");
-            }
-        } else {
-            auto function_name = builder.str(res->groups[4]);
-            if (function_name.empty()) {
-                function_name = builder.str(res->groups[5]);
-            }
-            GGML_ASSERT(!function_name.empty());
-
-            close_tag = "</function>";
-
-            if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
-                if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-                builder.consume_spaces();
-                builder.consume_literal(close_tag);
-                builder.consume_spaces();
-                if (!block_end.empty()) {
-                    builder.consume_literal(block_end);
-                    builder.consume_spaces();
-                }
-            }
-        }
-    }
-
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_granite(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    static const common_regex start_think_regex(regex_escape("<think>"));
-    static const common_regex end_think_regex(regex_escape("</think>"));
-    // Granite models output partial tokens such as "<" and "<think".
-    // By leveraging try_consume_regex()/try_find_regex() throwing
-    // common_chat_msg_partial_exception for these partial tokens,
-    // processing is interrupted and the tokens are not passed to add_content().
-    if (auto res = builder.try_consume_regex(start_think_regex)) {
-        // Restore position for try_parse_reasoning()
-        builder.move_to(res->groups[0].begin);
-        builder.try_find_regex(end_think_regex, std::string::npos, false);
-        // Restore position for try_parse_reasoning()
-        builder.move_to(res->groups[0].begin);
-    }
-    builder.try_parse_reasoning("<think>", "</think>");
-
-    // Parse response tags
-    static const common_regex start_response_regex(regex_escape("<response>"));
-    static const common_regex end_response_regex(regex_escape("</response>"));
-    // Granite models output partial tokens such as "<" and "<response".
-    // Same hack as reasoning parsing.
-    if (builder.try_consume_regex(start_response_regex)) {
-        builder.try_find_regex(end_response_regex);
-    }
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<|tool_call|>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        // Expect JSON array of tool calls
-        if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
-            if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<TOOLCALL>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        // Expect JSON array of tool calls
-        auto tool_calls_data = builder.consume_json();
-        if (tool_calls_data.json.is_array()) {
-            if (!builder.try_consume_literal("</TOOLCALL>")) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            builder.add_tool_calls(tool_calls_data.json);
-        } else {
-            throw common_chat_msg_partial_exception("Incomplete tool call");
-        }
-    }
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        auto tool_calls_data = builder.consume_json();
-        if (tool_calls_data.json.is_array()) {
-            builder.consume_spaces();
-            if (!builder.try_consume_literal("<|tools_suffix|>")) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            for (const auto & value : tool_calls_data.json) {
-                if (value.is_object()) {
-                    builder.add_tool_call_short_form(value);
-                }
-            }
-        } else {
-            throw common_chat_msg_partial_exception("Incomplete tool call");
-        }
-    }
-    builder.add_content(builder.consume_rest());
-}
-
-
-static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
-    static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
-    static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
-
-    // Loop through all tool calls
-    while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
-        builder.move_to(res->groups[0].end);
-
-        // Parse JSON array format: [{"name": "...", "arguments": {...}}]
-        auto tool_calls_data = builder.consume_json();
-
-        // Consume end marker
-        builder.consume_spaces();
-        if (!builder.try_consume_regex(tool_call_end_regex)) {
-            throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
-        }
-
-        // Process each tool call in the array
-        if (tool_calls_data.json.is_array()) {
-            for (const auto & tool_call : tool_calls_data.json) {
-                if (!tool_call.is_object()) {
-                    throw common_chat_msg_partial_exception("Tool call must be an object");
-                }
-
-                if (!tool_call.contains("name")) {
-                    throw common_chat_msg_partial_exception("Tool call missing 'name' field");
-                }
-
-                std::string function_name = tool_call.at("name");
-                std::string arguments = "{}";
-
-                if (tool_call.contains("arguments")) {
-                    if (tool_call.at("arguments").is_object()) {
-                        arguments = tool_call.at("arguments").dump();
-                    } else if (tool_call.at("arguments").is_string()) {
-                        arguments = tool_call.at("arguments");
-                    }
-                }
-
-                if (!builder.add_tool_call(function_name, "", arguments)) {
-                    throw common_chat_msg_partial_exception("Incomplete tool call");
-                }
-            }
-        } else {
-            throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
-        }
-
-        // Consume any trailing whitespace after this tool call
-        builder.consume_spaces();
-    }
-
-    // Consume any remaining content after all tool calls
-    auto remaining = builder.consume_rest();
-    if (!string_strip(remaining).empty()) {
-        builder.add_content(remaining);
-    }
-}
-
-static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<seed:tool_call>",
-        /* form.tool_start  = */ "<function=",
-        /* form.tool_sep    = */ ">",
-        /* form.key_start   = */ "<parameter=",
-        /* form.key_val_sep = */ ">",
-        /* form.val_end     = */ "</parameter>",
-        /* form.tool_end    = */ "</function>",
-        /* form.scope_end   = */ "</seed:tool_call>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
-}
-
-static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
-
-    // TODO: Tool calling
-
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_exaone_moe_content(common_chat_msg_parser & builder) {
-    // 1) <tool_call>{ "name": "...", "arguments": {...} }</tool_call>
-    // 2) <tool_call>{ "id": "...", "type": "function", "function": { "name": "...", "arguments": {...} } }</tool_call>
-    static const common_regex tool_call_open(R"(<tool_call[^>]*>)");
-
-    if (!builder.syntax().parse_tool_calls) {
-        LOG_DBG("%s: not parse_tool_calls\n", __func__);
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    LOG_DBG("%s: parse_tool_calls\n", __func__);
-
-    // Find all <tool_call></tool_call> blocks
-    while (auto first = builder.try_find_regex(tool_call_open, std::string::npos, /* add_prelude_to_content= */ true)) {
-        builder.move_to(first->groups[0].end);
-        builder.consume_spaces();
-
-        builder.try_consume_literal("```json");
-        builder.try_consume_literal("```");
-        builder.consume_spaces();
-
-        // Consume JSON object
-        auto data = builder.consume_json();
-
-        builder.consume_spaces();
-        builder.try_consume_literal("```");
-        builder.consume_spaces();
-
-        if (!builder.try_consume_literal("</tool_call>")) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-        builder.consume_spaces();
-
-        // Extract name and arguments
-        std::string name;
-        std::string id;
-        nlohmann::ordered_json arguments;
-
-        const auto extract_args = [&](const nlohmann::ordered_json & obj) -> bool {
-            if (!obj.contains("name") || !obj.contains("arguments")) {
-                return false;
-            }
-            name = obj.at("name").get<std::string>();
-            arguments = obj.at("arguments");
-            if (obj.contains("id") && obj.at("id").is_string()) {
-                id = obj.at("id").get<std::string>();
-            }
-            return true;
-        };
-
-        if (!extract_args(data.json)) {
-            if (data.json.contains("function") && data.json.at("function").is_object()) {
-                auto fn = data.json.at("function");
-                extract_args(fn);
-                if (id.empty() && data.json.contains("id") && data.json.at("id").is_string()) {
-                    id = data.json.at("id").get<std::string>();
-                }
-            }
-        }
-
-        // If name is empty, treat the JSON object as content
-        if (name.empty()) {
-            LOG_DBG("%s: tool call missing name, treating as content\n", __func__);
-            builder.add_content(data.json.dump());
-            continue;
-        }
-
-        std::string args_str = arguments.dump();
-        if (!builder.add_tool_call(name, id, args_str)) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-    }
-
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) {
-    LOG_DBG("%s: parsing exaone_moe\n", __func__);
-    // EXAONE MoE outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
-    // First try to parse using the standard reasoning parsing method
-    LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
-
-    auto start_pos = builder.pos();
-    auto found_end_think = builder.try_find_literal("</think>");
-    builder.move_to(start_pos);
-
-    if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
-        LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
-        common_chat_parse_exaone_moe_content(builder);
-    } else if (builder.try_parse_reasoning("<think>", "</think>")) {
-        // If reasoning was parsed successfully, the remaining content is regular content
-        LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
-        common_chat_parse_exaone_moe_content(builder);
-    } else {
-        if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
-          LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
-          common_chat_parse_exaone_moe_content(builder);
-          return;
-        }
-        // If no reasoning tags found, check if we should treat everything as reasoning
-        if (builder.syntax().thinking_forced_open) {
-            // If thinking is forced open but no tags found, treat everything as reasoning
-            LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
-            builder.add_reasoning_content(builder.consume_rest());
-        } else {
-            LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
-            common_chat_parse_exaone_moe_content(builder);
-        }
-    }
-}
-
-static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse(common_chat_msg_parser & builder) {
-    LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
-
-    switch (builder.syntax().format) {
-        case COMMON_CHAT_FORMAT_CONTENT_ONLY:
-            common_chat_parse_content_only(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GENERIC:
-            common_chat_parse_generic(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
-            common_chat_parse_mistral_nemo(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MAGISTRAL:
-            common_chat_parse_magistral(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LLAMA_3_X:
-            common_chat_parse_llama_3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
-            common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
-            break;
-        case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
-            common_chat_parse_deepseek_r1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
-            common_chat_parse_deepseek_v3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
-            common_chat_parse_functionary_v3_2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
-            common_chat_parse_functionary_v3_1_llama_3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_HERMES_2_PRO:
-            common_chat_parse_hermes_2_pro(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
-            common_chat_parse_firefunction_v2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_COMMAND_R7B:
-            common_chat_parse_command_r7b(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GRANITE:
-            common_chat_parse_granite(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GPT_OSS:
-            common_chat_parse_gpt_oss(builder);
-            break;
-        case COMMON_CHAT_FORMAT_SEED_OSS:
-            common_chat_parse_seed_oss(builder);
-            break;
-        case COMMON_CHAT_FORMAT_NEMOTRON_V2:
-            common_chat_parse_nemotron_v2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_APERTUS:
-            common_chat_parse_apertus(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
-            common_chat_parse_lfm2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MINIMAX_M2:
-            common_chat_parse_minimax_m2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GLM_4_5:
-            common_chat_parse_glm_4_5(builder);
-            break;
-        case COMMON_CHAT_FORMAT_KIMI_K2:
-            common_chat_parse_kimi_k2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
-            common_chat_parse_qwen3_coder_xml(builder);
-            break;
-        case COMMON_CHAT_FORMAT_APRIEL_1_5:
-            common_chat_parse_apriel_1_5(builder);
-            break;
-        case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
-            common_chat_parse_xiaomi_mimo(builder);
-            break;
-        case COMMON_CHAT_FORMAT_SOLAR_OPEN:
-            common_chat_parse_solar_open(builder);
-            break;
-        case COMMON_CHAT_FORMAT_EXAONE_MOE:
-            common_chat_parse_exaone_moe(builder);
-            break;
-        default:
-            throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
-    }
-    builder.finish();
-}
-
-common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
-    if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
-        syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
-        syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
-        return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
-    }
-    common_chat_msg_parser builder(input, is_partial, syntax);
-    try {
-        common_chat_parse(builder);
-    } catch (const common_chat_msg_partial_exception & ex) {
-        LOG_DBG("Partial parse: %s\n", ex.what());
-        if (!is_partial) {
-            builder.clear_tools();
-            builder.move_to(0);
-            common_chat_parse_content_only(builder);
-        }
-    }
-    auto msg = builder.result();
-    if (!is_partial) {
-        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
-    }
-    return msg;
-}
-
-common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
-    if (parser.empty()) {
-        throw std::runtime_error("Failed to parse due to missing parser definition.");
-    }
-
-    LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
-
-    common_peg_parse_context ctx(input, is_partial);
-    auto result = parser.parse(ctx);
-    if (result.fail()) {
-        throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
-    }
-
-    common_chat_msg msg;
-    msg.role = "assistant";
-
-    if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
-        auto mapper = common_chat_peg_native_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    } else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
-        auto mapper = common_chat_peg_constructed_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    } else {
-        // Generic mapper
-        auto mapper = common_chat_peg_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    }
-    if (!is_partial) {
-        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
-    }
-    return msg;
-}
diff --git a/common/chat-parser.h b/common/chat-parser.h
deleted file mode 100644
index 3ed9c30a2b3..00000000000
--- a/common/chat-parser.h
+++ /dev/null
@@ -1,133 +0,0 @@
-#pragma once
-
-#include "chat.h"
-#include "chat-parser-xml-toolcall.h"
-#include "json-partial.h"
-#include "regex-partial.h"
-
-#include <nlohmann/json_fwd.hpp>
-
-#include <optional>
-#include <string>
-#include <vector>
-
-class common_chat_msg_partial_exception : public std::runtime_error {
-  public:
-    common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
-};
-
-class common_chat_msg_parser {
-    std::string input_;
-    bool is_partial_;
-    common_chat_parser_params syntax_; // TODO: rename to params
-    std::string healing_marker_;
-
-    size_t pos_ = 0;
-    common_chat_msg result_;
-
-  public:
-    common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
-    const std::string & input() const { return input_; }
-    size_t pos() const { return pos_; }
-    const std::string & healing_marker() const { return healing_marker_; }
-    const bool & is_partial() const { return is_partial_; }
-    const common_chat_msg & result() const { return result_; }
-    const common_chat_parser_params & syntax() const { return syntax_; }
-
-    void move_to(size_t pos) {
-        if (pos > input_.size()) {
-            throw std::runtime_error("Invalid position!");
-        }
-        pos_ = pos;
-    }
-    void move_back(size_t n) {
-        if (pos_ < n) {
-            throw std::runtime_error("Can't move back that far!");
-        }
-        pos_ -= n;
-    }
-
-    // Get the substring of the input at the given range
-    std::string str(const common_string_range & rng) const;
-
-    // Appends to the result.content field
-    void add_content(const std::string & content);
-
-    // Appends to the result.reasoning_content field
-    void add_reasoning_content(const std::string & reasoning_content);
-
-    // Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
-    bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
-
-    // Adds a tool call using the "name", "id" and "arguments" fields of the json object
-    bool add_tool_call(const nlohmann::ordered_json & tool_call);
-
-    // Adds an array of tool calls using their "name", "id" and "arguments" fields.
-    bool add_tool_calls(const nlohmann::ordered_json & arr);
-
-    // Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
-    bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call);
-
-    void finish();
-
-    bool consume_spaces();
-
-    void consume_literal(const std::string & literal);
-
-    bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
-
-    std::string consume_rest();
-
-    struct find_regex_result {
-        std::string prelude;
-        std::vector<common_string_range> groups;
-    };
-
-    std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
-
-    bool try_consume_literal(const std::string & literal);
-
-    std::optional<find_regex_result> try_find_literal(const std::string & literal);
-
-    find_regex_result consume_regex(const common_regex & regex);
-
-    std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
-
-    std::optional<common_json> try_consume_json();
-    common_json consume_json();
-
-    struct consume_json_result {
-        nlohmann::ordered_json value;
-        bool is_partial;
-    };
-
-    /*
-        Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
-
-        By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
-        e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
-
-        But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
-        - with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
-        - with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
-    */
-    consume_json_result consume_json_with_dumped_args(
-        const std::vector<std::vector<std::string>> & args_paths = {},
-        const std::vector<std::vector<std::string>> & content_paths = {}
-    );
-    std::optional<consume_json_result> try_consume_json_with_dumped_args(
-        const std::vector<std::vector<std::string>> & args_paths = {},
-        const std::vector<std::vector<std::string>> & content_paths = {}
-    );
-
-    /**
-     * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
-     * form.scope_start, form.tool_sep and form.scope_end can be empty.
-     */
-    bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form);
-
-    // Parse content uses reasoning and XML-Style tool call
-    void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>");
-
-    void clear_tools();
-};
diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp
index 1bcba9cd866..f72bece7b06 100644
--- a/common/chat-peg-parser.cpp
+++ b/common/chat-peg-parser.cpp
@@ -1,13 +1,16 @@
 #include "chat-peg-parser.h"
 
+#include "chat-auto-parser.h"
+#include "ggml.h"
+
 #include <nlohmann/json.hpp>
 
-using json = nlohmann::json;
+using json = nlohmann::ordered_json;
 
 static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
     int count = 0;
     while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
-        if (max != -1 && count <= max) {
+        if (max != -1 && count >= max) {
             break;
         }
         sv.remove_suffix(1);
@@ -16,109 +19,735 @@ static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
     return sv;
 }
 
+static std::string_view trim_leading_space(std::string_view sv, int max = -1) {
+    int count = 0;
+    while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.front()))) {
+        if (max != -1 && count >= max) {
+            break;
+        }
+        sv.remove_prefix(1);
+        count++;
+    }
+    return sv;
+}
+
+static std::string_view trim(std::string_view sv) {
+    return trim_trailing_space(trim_leading_space(sv, 1));
+}
+
+// Convert Python-style single-quoted strings to JSON double-quoted strings
+// Only converts outer string delimiters, properly handling escape sequences:
+// - {'key': 'value'} -> {"key": "value"}
+// - {'code': 'print(\'hello\')'} -> {"code": "print('hello')"}
+// - {'msg': 'He said "hi"'} -> {"msg": "He said \"hi\""}
+static std::string normalize_quotes_to_json(const std::string & input) {
+    std::string result;
+    result.reserve(input.size() + 16);  // May need extra space for escaping
+
+    bool in_single_quoted = false;
+    bool in_double_quoted = false;
+
+    for (size_t i = 0; i < input.size(); ++i) {
+        char c = input[i];
+
+        // Handle escape sequences
+        if (c == '\\' && i + 1 < input.size()) {
+            char next = input[i + 1];
+
+            if (in_single_quoted) {
+                // Inside a single-quoted string being converted to double quotes
+                if (next == '\'') {
+                    // \' -> ' (escaped single quote becomes unescaped in double-quoted string)
+                    result += '\'';
+                    ++i;
+                    continue;
+                }
+                if (next == '"') {
+                    // \" stays as \" (already escaped, works in double-quoted string)
+                    result += "\\\"";
+                    ++i;
+                    continue;
+                }
+                // Other escapes (\n, \\, etc.): pass through both characters
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            if (in_double_quoted) {
+                // Inside a double-quoted string - pass through escape sequences as-is
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            // Outside any string - just pass through the backslash
+            result += c;
+            continue;
+        }
+
+        // Handle quote characters
+        if (c == '"') {
+            if (in_single_quoted) {
+                // Unescaped double quote inside single-quoted string -> must escape for JSON
+                result += "\\\"";
+            } else {
+                // Double quote as string delimiter or outside strings
+                in_double_quoted = !in_double_quoted;
+                result += c;
+            }
+        } else if (c == '\'') {
+            if (in_double_quoted) {
+                // Single quote inside double-quoted string -> pass through
+                result += c;
+            } else if (in_single_quoted) {
+                // Closing single quote -> convert to double quote
+                in_single_quoted = false;
+                result += '"';
+            } else {
+                // Opening single quote -> convert to double quote
+                in_single_quoted = true;
+                result += '"';
+            }
+        } else {
+            result += c;
+        }
+    }
+
+    return result;
+}
+
 void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
-    arena.visit(result, [this](const common_peg_ast_node & node) {
-        map(node);
-    });
+    arena.visit(result, [this](const common_peg_ast_node & node) { map(node); });
 }
 
 void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
     bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
-    bool is_content = node.tag == common_chat_peg_builder::CONTENT;
+    bool is_content   = node.tag == common_chat_peg_builder::CONTENT;
 
-    if (is_reasoning) {
-        result.reasoning_content = std::string(trim_trailing_space(node.text));
+    if (is_reasoning) { // GPT OSS can have more than 1 reasoning block, so concatenate here
+        result.reasoning_content += std::string(trim_trailing_space(node.text));
     }
 
     if (is_content) {
-        result.content = std::string(trim_trailing_space(node.text));
+        // Concatenate content from multiple content nodes (e.g., when reasoning markers
+        // are preserved before content markers in reasoning_format=NONE mode)
+        result.content += std::string(trim_trailing_space(node.text));
+    }
+}
+
+common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::string &       tag_name,
+                                                                 const std::string &       marker,
+                                                                 const common_peg_parser & p) {
+    if (marker.empty()) {
+        return zero_or_more(choice({ p, rule(tag_name, content(any())) }));
+    }
+    auto content_chunk = rule(tag_name, content(negate(literal(marker)) + any() + until(marker)));
+    return zero_or_more(choice({ p, content_chunk }));
+}
+
+void common_chat_peg_unified_mapper::from_ast(const common_peg_ast_arena &    arena,
+                                              const common_peg_parse_result & parse_result_arg) {
+    // Call base class to visit all nodes
+    common_chat_peg_mapper::from_ast(arena, parse_result_arg);
+
+    // Flush any pending tool call that was started but never got a name
+    // This happens during partial parsing when the tool call is incomplete
+    if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) {
+        // Transfer any buffered arguments
+        if (!args_buffer.empty()) {
+            pending_tool_call->arguments = args_buffer;
+        }
+        // Close any open quotes in buffered args
+        if (buffer_needs_closing_quote && !pending_tool_call->arguments.empty()) {
+            pending_tool_call->arguments += "\"";
+        }
+        // Add the incomplete tool call to results
+        result.tool_calls.push_back(pending_tool_call.value());
+        pending_tool_call.reset();
     }
 }
 
-void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
+void common_chat_peg_unified_mapper::map(const common_peg_ast_node & node) {
+    // First call base class for reasoning/content handling
     common_chat_peg_mapper::map(node);
 
-    bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
-    bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
-    bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
-    bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
+    // Handle tool-related tags (unified version supporting both JSON and tagged formats)
+    bool is_tool_open  = node.tag == common_chat_peg_unified_builder::TOOL_OPEN;
+    bool is_tool_close = node.tag == common_chat_peg_unified_builder::TOOL_CLOSE;
+    bool is_tool_name  = node.tag == common_chat_peg_unified_builder::TOOL_NAME;
+    bool is_tool_id    = node.tag == common_chat_peg_unified_builder::TOOL_ID;
+    bool is_tool_args  = node.tag == common_chat_peg_unified_builder::TOOL_ARGS;
+    bool is_arg_open   = node.tag == common_chat_peg_unified_builder::TOOL_ARG_OPEN;
+    bool is_arg_close  = node.tag == common_chat_peg_unified_builder::TOOL_ARG_CLOSE;
+    bool is_arg_name         = node.tag == common_chat_peg_unified_builder::TOOL_ARG_NAME;
+    bool is_arg_value        = node.tag == common_chat_peg_unified_builder::TOOL_ARG_VALUE;
+    bool is_arg_string_value = node.tag == common_chat_peg_unified_builder::TOOL_ARG_STRING_VALUE;
 
     if (is_tool_open) {
-        result.tool_calls.emplace_back();
-        current_tool = &result.tool_calls.back();
+        // Don't create tool call yet - wait for name to be known
+        // This prevents sending incomplete tool calls in streaming mode
+        pending_tool_call = common_chat_tool_call();
+        current_tool      = &pending_tool_call.value();
+        arg_count         = 0;
+        // Clear the arguments buffer for the new tool
+        args_buffer.clear();
+        needs_closing_quote        = false;
+        buffer_needs_closing_quote = false;
     }
 
     if (is_tool_id && current_tool) {
-        current_tool->id = std::string(trim_trailing_space(node.text));
+        auto text = trim_trailing_space(node.text);
+        if (text.size() >= 2 && text.front() == '"' && text.back() == '"') {
+            text = text.substr(1, text.size() - 2);
+        }
+        current_tool->id = std::string(text);
     }
 
     if (is_tool_name && current_tool) {
         current_tool->name = std::string(trim_trailing_space(node.text));
+        // Now that we have the name, we can populate the arguments from the buffer
+        if (!args_buffer.empty()) {
+            current_tool->arguments = args_buffer;
+            args_buffer.clear();
+        } else if (current_tool->arguments.empty()) {
+            // Initialize arguments if we're using tagged format and no buffered args
+            current_tool->arguments = "{";
+        }
+        // Now that we have the name, add the tool call to the result
+        if (pending_tool_call.has_value()) {
+            result.tool_calls.push_back(pending_tool_call.value());
+            pending_tool_call.reset();
+            current_tool = &result.tool_calls.back();
+        }
     }
 
     if (is_tool_args && current_tool) {
-        current_tool->arguments = std::string(trim_trailing_space(node.text));
+        // For JSON format, the arguments come as a complete JSON object
+        // For tagged format, we build up arguments from individual arg_name/arg_value nodes
+        // Check if this looks like JSON (starts with {) vs tagged format (starts with <)
+        auto text = trim_trailing_space(node.text);
+        if (!text.empty() && text.front() == '{') {
+            // If we have the tool name, populate directly; otherwise buffer
+            if (!current_tool->name.empty()) {
+                current_tool->arguments = std::string(text);
+            } else {
+                args_buffer = std::string(text);
+            }
+        }
+        // If it's tagged format, we ignore this and let arg_name/arg_value build up the JSON
     }
-}
 
-void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
-    common_chat_peg_mapper::map(node);
+    if (is_arg_open) {
+        // Reset for new argument
+        if (!current_tool->name.empty()) {
+            needs_closing_quote = false;
+        } else {
+            buffer_needs_closing_quote = false;
+        }
+    }
 
-    bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
-    bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
-    bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
-    bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
-    bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
-    bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
-    bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
-    bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
+    if (is_arg_name && current_tool) {
+        std::string arg_entry;
+        if (arg_count > 0) {
+            arg_entry = ",";
+        }
+        arg_entry += json(trim(node.text)).dump() + ":";
+        ++arg_count;
 
-    if (is_tool_open) {
-        result.tool_calls.emplace_back();
-        current_tool = &result.tool_calls.back();
-        arg_count = 0;
+        // If we have the tool name, add directly; otherwise buffer
+        if (!current_tool->name.empty()) {
+            current_tool->arguments += arg_entry;
+        } else {
+            if (args_buffer.empty()) {
+                args_buffer = "{";
+            }
+            args_buffer += arg_entry;
+        }
     }
 
-    if (is_tool_name) {
-        current_tool->name = std::string(node.text);
-        current_tool->arguments = "{";
+    if ((is_arg_value || is_arg_string_value) && current_tool) {
+        std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1));
+
+        std::string value_to_add;
+        if (!value_content.empty()) {
+            // For potential containers, normalize Python-style single quotes to JSON double quotes first
+            // This ensures consistent output during both partial and final parsing
+            // Note: is_arg_string_value means the schema explicitly declares this as a string type,
+            // so we should NOT treat it as a potential container even if it starts with [ or {
+            bool is_potential_container = !is_arg_string_value &&
+                (value_content[0] == '[' || value_content[0] == '{');
+            if (is_potential_container) {
+                value_content = normalize_quotes_to_json(value_content);
+            }
+
+            // Try to parse as JSON value (number, bool, null, object, array)
+            // For strings, we need special handling to support incremental parsing
+            try {
+                json parsed = json::parse(value_content);
+                if (parsed.is_string()) {
+                    // For string values, don't add closing quote yet (added by arg_close)
+                    // This ensures incremental parsing produces monotonic arguments
+                    std::string escaped = parsed.dump();
+                    // Remove the trailing quote
+                    if (!escaped.empty() && escaped.back() == '"') {
+                        escaped.pop_back();
+                    }
+                    value_to_add = escaped;
+                    if (!current_tool->name.empty()) {
+                        needs_closing_quote = true;
+                    } else {
+                        buffer_needs_closing_quote = true;
+                    }
+                } else {
+                    // For non-string values (number, bool, null, object, array), add raw value content
+                    // Using raw content instead of dump() ensures monotonicity for streaming
+                    // (prevents issues with spaces being removed by dump())
+                    value_to_add = value_content;
+                }
+            } catch (...) {
+                // JSON parsing failed - content is either incomplete (partial) or not valid JSON
+                // Note: potential containers were already normalized above, so value_content
+                // already has double quotes if it started with [ or {
+
+                if (node.is_partial && is_potential_container) {
+                    // During incremental parsing, if it looks like a JSON container, don't wrap in quotes yet
+                    // and don't escape. Just pass through the (already normalized) content.
+                    value_to_add = value_content;
+                } else {
+                    // Not valid JSON and NOT a potential partial container - treat as string value
+                    // Add opening quote if not already in a string
+                    if (!current_tool->name.empty()) {
+                        if (!needs_closing_quote) {
+                            value_to_add        = "\"";
+                            needs_closing_quote = true;
+                        }
+                    } else {
+                        if (!buffer_needs_closing_quote) {
+                            value_to_add               = "\"";
+                            buffer_needs_closing_quote = true;
+                        }
+                    }
+                    // Escape special characters in the string content
+                    std::string escaped = json(value_content).dump();
+                    // Remove the surrounding quotes from the escaped string
+                    if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
+                        escaped = escaped.substr(1, escaped.size() - 2);
+                    }
+                    value_to_add += escaped;
+                }
+            }
+        }
+
+        // If we have the tool name, add directly; otherwise buffer
+        if (!current_tool->name.empty()) {
+            current_tool->arguments += value_to_add;
+        } else {
+            if (args_buffer.empty()) {
+                args_buffer = "{";
+            }
+            args_buffer += value_to_add;
+        }
     }
 
-    if (is_arg_open) {
-        needs_closing_quote = false;
+    if (is_arg_close && current_tool) {
+        if (!current_tool->name.empty()) {
+            if (needs_closing_quote) {
+                current_tool->arguments += "\"";
+                needs_closing_quote = false;
+            }
+        } else {
+            if (buffer_needs_closing_quote) {
+                if (args_buffer.empty()) {
+                    args_buffer = "{";
+                }
+                args_buffer += "\"";
+                buffer_needs_closing_quote = false;
+            }
+        }
     }
 
-    if (is_arg_name && current_tool) {
-        if (arg_count > 0) {
-            current_tool->arguments += ",";
+    if (is_tool_close && current_tool) {
+        if (!current_tool->name.empty()) {
+            if (needs_closing_quote) {
+                current_tool->arguments += "\"";
+                needs_closing_quote = false;
+            }
+            if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') {
+                current_tool->arguments += "}";
+            }
+            // If we have a pending tool call that wasn't added yet, add it now
+            if (pending_tool_call.has_value()) {
+                result.tool_calls.push_back(pending_tool_call.value());
+                pending_tool_call.reset();
+            }
+        } else {
+            // We're closing a tool without a name - flush the buffer
+            if (!args_buffer.empty()) {
+                current_tool->arguments = args_buffer;
+                args_buffer.clear();
+            }
+            if (buffer_needs_closing_quote) {
+                current_tool->arguments += "\"";
+                buffer_needs_closing_quote = false;
+            }
+            // Close the arguments object if using tagged format
+            if (!current_tool->arguments.empty() && current_tool->arguments.back() != '}') {
+                current_tool->arguments += "}";
+            }
+            // Don't add to result if no name - this prevents incomplete tool calls
+            pending_tool_call.reset();
         }
-        current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
-        ++arg_count;
     }
+}
 
-    if (is_arg_string && current_tool) {
-        // Serialize to JSON, but exclude the end quote
-        std::string dumped = json(trim_trailing_space(node.text)).dump();
-        current_tool->arguments += dumped.substr(0, dumped.size() - 1);
-        needs_closing_quote = true;
+common_peg_parser common_chat_peg_unified_builder::standard_constructed_tools(
+    const std::map<std::string, std::string> & markers,
+    const nlohmann::json &                     tools,
+    bool                                       parallel_tool_calls,
+    bool                                       force_tool_calls) {
+    if (!tools.is_array() || tools.empty()) {
+        return eps();
     }
 
-    if (is_arg_close && current_tool) {
-        if (needs_closing_quote) {
-            current_tool->arguments += "\"";
-            needs_closing_quote = false;
+    // Extract markers with defaults
+    auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string {
+        auto it = markers.find(key);
+        return it != markers.end() ? it->second : default_val;
+    };
+
+    std::string section_start    = get_marker("tool_call_start_marker", "<tool_call>");
+    std::string section_end      = get_marker("tool_call_end_marker", "</tool_call>");
+    std::string func_opener      = get_marker("function_opener", "<function=");
+    std::string func_name_suffix = get_marker("function_name_suffix", ">");
+    std::string func_closer      = get_marker("function_closer", "</function>");
+    std::string param_key_prefix = get_marker("parameter_key_prefix", "<param=");
+    std::string param_key_suffix = get_marker("parameter_key_suffix", ">");
+    std::string param_closer     = get_marker("parameter_closer", "</param>");
+
+    // Build tool choices for tagged format
+    auto tool_choices = choice();
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
         }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+        // Build argument parsers
+        auto args = eps();
+        if (params.contains("properties") && !params["properties"].empty()) {
+            auto arg_choice = choice();
+            for (const auto & el : params["properties"].items()) {
+                const std::string & prop_name = el.key();
+
+                auto arg_name_parser =
+                    choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") });
+
+                auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) +
+                                         literal(param_key_suffix) + tool_arg_value(until(param_closer)) +
+                                         tool_arg_close(literal(param_closer)));
+                arg_choice |= arg_rule;
+            }
+            args = zero_or_more(arg_choice + space());
+        }
+
+        // Build function parser: <function=name>args</function>
+        auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) +
+                                space() + tool_args(args) + space() + tool_close(literal(func_closer)));
+
+        tool_choices |= rule("tool-" + name, tool_parser);
+    }
+
+    // Build the section with markers
+    auto section =
+        parallel_tool_calls ?
+            trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) +
+                                          literal(section_end)) :
+            trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end));
+
+    return force_tool_calls ? section : optional(section);
+}
+
+// Helper: Parse dot notation key into prefix and field name
+static std::pair<std::string, std::string> parse_key_spec(const std::string & key) {
+    auto dot_pos = key.find('.');
+    if (dot_pos == std::string::npos) {
+        return {"", key};  // Top-level field
     }
+    return {key.substr(0, dot_pos), key.substr(dot_pos + 1)};
+}
 
-    if (is_arg_json && current_tool) {
-        current_tool->arguments += std::string(trim_trailing_space(node.text));
+common_peg_parser common_chat_peg_unified_builder::standard_json_tools(
+                                                       const std::string &              section_start,
+                                                       const std::string &              section_end,
+                                                       const nlohmann::json &           tools,
+                                                       bool                             parallel_tool_calls,
+                                                       bool                             force_tool_calls,
+                                                       const std::string &              name_key,
+                                                       const std::string &              args_key,
+                                                       bool                             array_wrapped,
+                                                       bool                             function_is_key,
+                                                       const std::string &              call_id_key,
+                                                       const std::string &              gen_call_id_key,
+                                                       const std::vector<std::string> & parameters_order) {
+    if (!tools.is_array() || tools.empty()) {
+        return eps();
     }
 
-    if (is_tool_close && current_tool) {
-        if (needs_closing_quote) {
-            current_tool->arguments += "\"";
-            needs_closing_quote = false;
+    // Build tool choices for JSON format
+    auto tool_choices = choice();
+    // auto other_member = json_string() + space() + literal(":") + space() + json();
+
+    // Determine effective field names
+    std::string effective_name_key = name_key.empty() ? "name" : name_key;
+    std::string effective_args_key = args_key.empty() ? "arguments" : args_key;
+
+    // Check if we have nested keys (dot notation)
+    auto name_spec = parse_key_spec(effective_name_key);
+    auto args_spec = parse_key_spec(effective_args_key);
+    bool has_nested_keys = !name_spec.first.empty() || !args_spec.first.empty();
+
+    // Mode 1: function_is_key - parse {"function_name": {...}}
+    if (function_is_key) {
+        for (const auto & tool_def : tools) {
+            if (!tool_def.contains("function")) {
+                continue;
+            }
+            const auto &   function = tool_def.at("function");
+            std::string    name     = function.at("name");
+            nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+            // Build inner object fields
+            std::vector<common_peg_parser> inner_fields;
+
+            // Add optional string ID field
+            if (!call_id_key.empty()) {
+                auto id_parser = atomic(
+                    literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+                    literal("\"") + tool_id(json_string_content()) + literal("\"")
+                );
+                inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
+            }
+
+            // Add optional generated integer ID field
+            if (!gen_call_id_key.empty()) {
+                auto gen_id_parser = atomic(
+                    literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+                    choice({
+                        literal("\"") + tool_id(json_string_content()) + literal("\""),
+                        tool_id(json_number())
+                    })
+                );
+                inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
+            }
+
+            // Add arguments - either wrapped in args_key or parsed directly
+            common_peg_parser args_parser = eps();
+            if (args_key.empty()) {
+                // Arguments are directly the inner object value: {"func_name": {"arg1": "val"}}
+                args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
+            } else {
+                // Arguments are wrapped in a key: {"func_name": {"arguments": {"arg1": "val"}}}
+                args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
+                              tool_args(schema(json(), "tool-" + name + "-schema", params));
+            }
+            inner_fields.push_back(args_parser);
+
+            // Build inner object parser - no greedy other_member skipping to avoid consuming ID
+            common_peg_parser inner_object = eps();
+            if (args_key.empty() && inner_fields.size() == 1) {
+                // Direct arguments: {"func_name": {"arg1": "val"}}
+                // The args_parser is already the full object schema
+                inner_object = inner_fields[0];
+            } else {
+                // Wrapped arguments: {"func_name": {"arguments": {"arg1": "val"}}}
+                inner_object = literal("{") + space();
+                for (size_t i = 0; i < inner_fields.size(); i++) {
+                    inner_object = inner_object + inner_fields[i];
+                    if (i < inner_fields.size() - 1) {
+                        inner_object = inner_object + space();
+                    }
+                }
+                inner_object = inner_object + space() + literal("}");
+            }
+
+            // Tool call format: { "function_name": { inner_object } }
+            auto tool_parser = tool(
+                tool_open(literal("{")) + space() +
+                literal("\"") + tool_name(literal(name)) + literal("\"") +
+                space() + literal(":") + space() +
+                inner_object +
+                space() + tool_close(literal("}"))
+            );
+
+            tool_choices |= rule("tool-" + name, tool_parser);
         }
-        current_tool->arguments += "}";
     }
+    // Mode 2: Nested keys (dot notation like "function.name")
+    else if (has_nested_keys) {
+        // Group fields by prefix
+        std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first;
+        std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key;
+        std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key;
+
+        for (const auto & tool_def : tools) {
+            if (!tool_def.contains("function")) {
+                continue;
+            }
+            const auto &   function = tool_def.at("function");
+            std::string    name     = function.at("name");
+            nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+            // Build nested object with name and arguments
+            auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
+                              literal("\"") + tool_name(literal(name)) + literal("\"");
+            auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
+                              tool_args(schema(json(), "tool-" + name + "-schema", params));
+
+            auto nested_object = literal("{") + space() +
+                                nested_name + space() + literal(",") + space() +
+                                nested_args +
+                                space() + literal("}");
+
+            // Build top-level parser - simpler structure without greedy other_member skipping
+            // Format: { id?, "function": {...} }
+            auto tool_parser_body = tool_open(literal("{")) + space();
+
+            // Add optional string ID field at top level
+            if (!call_id_key.empty()) {
+                auto id_spec = parse_key_spec(call_id_key);
+                if (id_spec.first.empty()) {  // Top-level ID field
+                    auto id_parser = atomic(
+                        literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+                        literal("\"") + tool_id(json_string_content()) + literal("\"")
+                    );
+                    tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
+                }
+            }
+
+            // Add optional generated integer ID field at top level
+            if (!gen_call_id_key.empty()) {
+                auto gen_id_spec = parse_key_spec(gen_call_id_key);
+                if (gen_id_spec.first.empty()) {  // Top-level gen ID field
+                    auto gen_id_parser = atomic(
+                        literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+                        choice({
+                            literal("\"") + tool_id(json_string_content()) + literal("\""),
+                            tool_id(json_number())
+                        })
+                    );
+                    tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
+                }
+            }
+
+            // Add the nested object field
+            auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
+            tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
+
+            tool_choices |= rule("tool-" + name, tool(tool_parser_body));
+        }
+    }
+    // Mode 3: Flat keys (enhanced with ID fields and parameter ordering)
+    else {
+        auto name_key_parser = literal("\"" + name_key + "\"");
+        auto args_key_parser = literal("\"" + args_key + "\"");
+
+        for (const auto & tool_def : tools) {
+            if (!tool_def.contains("function")) {
+                continue;
+            }
+            const auto &   function = tool_def.at("function");
+            std::string    name     = function.at("name");
+            nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+            auto tool_name_ = name_key_parser + space() + literal(":") + space() +
+                             literal("\"") + tool_name(literal(name)) + literal("\"");
+            auto tool_args_ = args_key_parser + space() + literal(":") + space() +
+                             tool_args(schema(json(), "tool-" + name + "-schema", params));
+
+            // Build ID parsers if keys are provided
+            common_peg_parser id_parser = eps();
+            if (!call_id_key.empty()) {
+                id_parser = atomic(
+                    literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+                    choice({
+                        literal("\"") + tool_id(json_string_content()) + literal("\""),
+                        tool_id(json_number())
+                    })
+                );
+            }
+
+            common_peg_parser gen_id_parser = eps();
+            if (!gen_call_id_key.empty()) {
+                gen_id_parser = atomic(
+                    literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+                    choice({
+                        literal("\"") + tool_id(json_string_content()) + literal("\""),
+                        tool_id(json_number())
+                    })
+                );
+            }
+
+            common_peg_parser tool_parser = eps();
+
+            // Use parameter ordering if provided - parse fields in specified order without greedy skipping
+            if (!parameters_order.empty()) {
+            }
+            // Build parser using parameter ordering (works with or without explicit parameters_order)
+            // Create list of (parser, key) pairs for all fields
+            std::vector<std::pair<common_peg_parser, std::string>> parser_pairs;
+            parser_pairs.emplace_back(tool_name_, effective_name_key);
+            parser_pairs.emplace_back(tool_args_, effective_args_key);
+            if (!call_id_key.empty()) {
+                parser_pairs.emplace_back(optional(id_parser), call_id_key);
+            }
+            if (!gen_call_id_key.empty()) {
+                parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
+            }
+
+            // Sort by position in parameters_order (or at end if not present)
+            std::sort(parser_pairs.begin(), parser_pairs.end(),
+                [&parameters_order](const auto & a, const auto & b) {
+                    auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
+                    auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
+                    size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
+                    size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
+                    return idx_a < idx_b;
+                });
+
+            // Build ordered parser
+            auto ordered_body = tool_open(literal("{")) + space();
+            for (size_t i = 0; i < parser_pairs.size(); i++) {
+                ordered_body = ordered_body + parser_pairs[i].first;
+                if (i < parser_pairs.size() - 1) {
+                    ordered_body = ordered_body + space() + literal(",") + space();
+                }
+            }
+            ordered_body = ordered_body + space() + tool_close(literal("}"));
+            tool_parser = tool(ordered_body);
+
+            tool_choices |= rule("tool-" + name, tool_parser);
+        }
+    }
+
+    // Build the section with markers
+    auto tool_calls = tool_choices;
+    if (parallel_tool_calls) {
+        tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices);
+    }
+
+    // Optionally wrap in array brackets
+    if (array_wrapped) {
+        tool_calls = literal("[") + space() + tool_calls + space() + literal("]");
+    }
+
+    auto section =
+        trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end));
+
+    return force_tool_calls ? section : optional(section);
 }
diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h
index b84cbed2069..7304ca7e616 100644
--- a/common/chat-peg-parser.h
+++ b/common/chat-peg-parser.h
@@ -3,18 +3,29 @@
 #include "chat.h"
 #include "peg-parser.h"
 
+#include <map>
+#include <optional>
+#include <vector>
+
 class common_chat_peg_builder : public common_peg_parser_builder {
   public:
     static constexpr const char * REASONING_BLOCK = "reasoning-block";
-    static constexpr const char * REASONING = "reasoning";
-    static constexpr const char * CONTENT = "content";
+    static constexpr const char * REASONING       = "reasoning";
+    static constexpr const char * CONTENT         = "content";
 
     common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
+
     common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
+
     common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
+
+    common_peg_parser tag_with_safe_content(const std::string &       tag_name,
+                                            const std::string &       marker,
+                                            const common_peg_parser & p);
 };
 
-inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
+inline common_peg_arena build_chat_peg_parser(
+    const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
     common_chat_peg_builder builder;
     builder.set_root(fn(builder));
     return builder.build();
@@ -26,80 +37,97 @@ class common_chat_peg_mapper {
 
     common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
 
+    virtual ~common_chat_peg_mapper() = default;
+
     virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
     virtual void map(const common_peg_ast_node & node);
 };
 
-class common_chat_peg_native_builder : public common_chat_peg_builder {
-  public:
-    static constexpr const char * TOOL = "tool";
-    static constexpr const char * TOOL_OPEN = "tool-open";
-    static constexpr const char * TOOL_CLOSE = "tool-close";
-    static constexpr const char * TOOL_ID = "tool-id";
-    static constexpr const char * TOOL_NAME = "tool-name";
-    static constexpr const char * TOOL_ARGS = "tool-args";
-
-    common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
-    common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
-    common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
-    common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
-    common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
-    common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
-};
-
-class common_chat_peg_native_mapper : public common_chat_peg_mapper {
-    common_chat_tool_call * current_tool;
+struct content_structure;
+struct tool_call_structure;
 
+class common_chat_peg_unified_builder : public common_chat_peg_builder {
   public:
-    common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
-
-    void map(const common_peg_ast_node & node) override;
-};
-
-inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
-    common_chat_peg_native_builder builder;
-    builder.set_root(fn(builder));
-    return builder.build();
-}
-
-class common_chat_peg_constructed_builder : public common_chat_peg_builder {
-  public:
-    static constexpr const char * TOOL = "tool";
-    static constexpr const char * TOOL_OPEN = "tool-open";
-    static constexpr const char * TOOL_CLOSE = "tool-close";
-    static constexpr const char * TOOL_NAME = "tool-name";
-    static constexpr const char * TOOL_ARG = "tool-arg";
-    static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
+    // Tag constants
+    static constexpr const char * TOOL           = "tool";
+    static constexpr const char * TOOL_OPEN      = "tool-open";
+    static constexpr const char * TOOL_CLOSE     = "tool-close";
+    static constexpr const char * TOOL_ID        = "tool-id";
+    static constexpr const char * TOOL_NAME      = "tool-name";
+    static constexpr const char * TOOL_ARGS      = "tool-args";
+    static constexpr const char * TOOL_ARG       = "tool-arg";
+    static constexpr const char * TOOL_ARG_OPEN  = "tool-arg-open";
     static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
-    static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
-    static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
-    static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
+    static constexpr const char * TOOL_ARG_NAME         = "tool-arg-name";
+    static constexpr const char * TOOL_ARG_VALUE        = "tool-arg-value";
+    static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";  // For schema-declared string types
 
+    // Low-level tag methods
     common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
     common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
     common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
+    common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
     common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
+    common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
     common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
     common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
     common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
     common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
+    common_peg_parser tool_arg_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
+
+    // Use for schema-declared string types - won't be treated as potential JSON container
     common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
-    common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
+    common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
+
+    // Legacy-compatible helper for building standard JSON tool calls
+    // Used by tests and manual parsers
+    // name_key/args_key: JSON key names for function name and arguments
+    //   Empty or "name"/"arguments" will accept both common variations
+    //   Supports dot notation for nested objects (e.g., "function.name")
+    // array_wrapped: if true, tool calls are wrapped in JSON array [...]
+    // function_is_key: if true, function name is the JSON key (e.g., {"func_name": {...}})
+    // call_id_key: JSON key for string call ID (e.g., "id")
+    // gen_call_id_key: JSON key for generated integer call ID (e.g., "tool_call_id")
+    // parameters_order: order in which JSON fields should be parsed
+    common_peg_parser standard_json_tools(const std::string &              section_start,
+                                          const std::string &              section_end,
+                                          const nlohmann::json &           tools,
+                                          bool                             parallel_tool_calls,
+                                          bool                             force_tool_calls,
+                                          const std::string &              name_key = "",
+                                          const std::string &              args_key = "",
+                                          bool                             array_wrapped = false,
+                                          bool                             function_is_key = false,
+                                          const std::string &              call_id_key = "",
+                                          const std::string &              gen_call_id_key = "",
+                                          const std::vector<std::string> & parameters_order = {});
+
+    // Legacy-compatible helper for building XML/tagged style tool calls
+    // Used by tests and manual parsers
+    common_peg_parser standard_constructed_tools(const std::map<std::string, std::string> & markers,
+                                                 const nlohmann::json &                     tools,
+                                                 bool                                       parallel_tool_calls,
+                                                 bool                                       force_tool_calls);
 };
 
-class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
-    common_chat_tool_call * current_tool;
-    int arg_count = 0;
-    bool needs_closing_quote = false;
+inline common_peg_arena build_chat_peg_unified_parser(
+    const std::function<common_peg_parser(common_chat_peg_unified_builder & builder)> & fn) {
+    common_chat_peg_unified_builder builder;
+    builder.set_root(fn(builder));
+    return builder.build();
+}
+
+class common_chat_peg_unified_mapper : public common_chat_peg_mapper {
+    std::optional<common_chat_tool_call> pending_tool_call;  // Tool call waiting for name
+    common_chat_tool_call *              current_tool        = nullptr;
+    int                                  arg_count           = 0;
+    bool                                 needs_closing_quote = false;
+    std::string                          args_buffer;  // Buffer to delay arguments until tool name is known
+    bool                                 buffer_needs_closing_quote = false;  // Track quote state for buffered args
 
   public:
-    common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+    common_chat_peg_unified_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
 
+    void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & parse_result_arg) override;
     void map(const common_peg_ast_node & node) override;
 };
-
-inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
-    common_chat_peg_constructed_builder builder;
-    builder.set_root(fn(builder));
-    return builder.build();
-}
diff --git a/common/chat.cpp b/common/chat.cpp
index 2bf46326694..a19760a5de0 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1,24 +1,24 @@
 #include "chat.h"
-#include "chat-parser.h"
+
+#include "chat-auto-parser.h"
+#include "chat-diff-analyzer.h"
 #include "chat-peg-parser.h"
 #include "common.h"
-#include "json-partial.h"
+#include "ggml.h"
 #include "json-schema-to-grammar.h"
 #include "log.h"
-#include "regex-partial.h"
 
-#include "jinja/parser.h"
 #include "jinja/value.h"
 #include "jinja/runtime.h"
 #include "jinja/caps.h"
 
-#include <algorithm>
 #include <cstdio>
-#include <cctype>
+#include <cstdlib>
 #include <exception>
 #include <functional>
-#include <iostream>
+
 #include <optional>
+#include <sstream>
 #include <stdexcept>
 #include <string>
 #include <vector>
@@ -26,14 +26,26 @@
 using json = nlohmann::ordered_json;
 
 static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
-    auto time = std::chrono::system_clock::to_time_t(now);
-    auto local_time = *std::localtime(&time);
+    auto               time       = std::chrono::system_clock::to_time_t(now);
+    auto               local_time = *std::localtime(&time);
     std::ostringstream ss;
     ss << std::put_time(&local_time, format.c_str());
     auto res = ss.str();
     return res;
 }
 
+static json safe_args_parse(const std::string & to_parse) {
+    std::string stripped = to_parse;
+    if (to_parse.at(0) == '"' && to_parse.at(to_parse.length() - 1) == '"') {
+        stripped = to_parse.substr(1, to_parse.length() - 1);
+    }
+    try {
+        return json::parse(stripped);
+    } catch (json::exception & e) {
+        return stripped;
+    }
+}
+
 static std::string string_diff(const std::string & last, const std::string & current) {
     if (last.empty()) {
         return current;
@@ -105,7 +117,7 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
                 {"type", "function"},
                 {"function", {
                     {"name", tool_call.name},
-                    {"arguments", tool_call.arguments},
+                    {"arguments", json::parse(tool_call.arguments)},
                 }},
             };
             if (!tool_call.id.empty()) {
@@ -122,7 +134,8 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
     return jmsg;
 }
 
-std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
+std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv,
+                                                                      const common_chat_msg & msg_new) {
     std::vector<common_chat_msg_diff> diffs;
     if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
         diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
@@ -132,38 +145,56 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
 
     // TODO: these can become expensive for long messages - how to optimize?
     if (msg_prv.reasoning_content != msg_new.reasoning_content) {
-        auto & diff = diffs.emplace_back();
+        auto & diff                  = diffs.emplace_back();
         diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
     }
     if (msg_prv.content != msg_new.content) {
-        auto & diff = diffs.emplace_back();
+        auto & diff        = diffs.emplace_back();
         diff.content_delta = string_diff(msg_prv.content, msg_new.content);
     }
 
     if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
-        throw std::runtime_error("Invalid diff: now finding less tool calls!");
+        std::string err = "Invalid diff: now finding less tool calls!\n";
+        err += "  Previous (" + std::to_string(msg_prv.tool_calls.size()) + "):\n";
+        for (const auto & tc : msg_prv.tool_calls) {
+            err += "    - name: '" + tc.name + "', args: '" + tc.arguments + "'\n";
+        }
+        err += "  Current (" + std::to_string(msg_new.tool_calls.size()) + "):\n";
+        for (const auto & tc : msg_new.tool_calls) {
+            err += "    - name: '" + tc.name + "', args: '" + tc.arguments + "'\n";
+        }
+        err += "  Current msg text content:\n" + msg_new.content + "\n";
+        throw std::runtime_error(err);
     }
 
     if (!msg_prv.tool_calls.empty()) {
-        const auto idx = msg_prv.tool_calls.size() - 1;
+        const auto   idx  = msg_prv.tool_calls.size() - 1;
         const auto & pref = msg_prv.tool_calls[idx];
         const auto & newf = msg_new.tool_calls[idx];
-        if (pref.name != newf.name) {
-            throw std::runtime_error("Invalid diff: tool call mismatch!");
+        // Allow tool name to change during incremental parsing:
+        // - empty -> non-empty (initial discovery)
+        // - prefix -> longer string (name grows as more input is parsed)
+        if (pref.name != newf.name && !pref.name.empty() && !newf.name.empty()) {
+            // Check if one is a prefix of the other (for incremental parsing where names grow or shrink)
+            bool is_prefix = (newf.name.rfind(pref.name, 0) == 0);
+            if (!is_prefix) {
+                LOG_ERR("Tool call mismatch: prev='%s' new='%s'\n", pref.name.c_str(), newf.name.c_str());
+                throw std::runtime_error("Invalid diff: tool call mismatch!");
+            }
         }
         const auto args_diff = string_diff(pref.arguments, newf.arguments);
-        if (!args_diff.empty() || pref.id != newf.id) {
-            auto & diff = diffs.emplace_back();
+        if (!args_diff.empty() || pref.id != newf.id || pref.name != newf.name) {
+            auto & diff          = diffs.emplace_back();
             diff.tool_call_index = idx;
-            if (pref.id != newf.id) {
-                diff.tool_call_delta.id = newf.id;
+            if (pref.id != newf.id || pref.name != newf.name) {
+                diff.tool_call_delta.id   = newf.id;
                 diff.tool_call_delta.name = newf.name;
             }
             diff.tool_call_delta.arguments = args_diff;
         }
     }
     for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
-        auto & diff = diffs.emplace_back();
+        auto & diff          = diffs.emplace_back();
         diff.tool_call_index = idx;
         diff.tool_call_delta = msg_new.tool_calls[idx];
     }
@@ -173,94 +204,14 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
 
 using chat_template_caps = jinja::caps;
 
-struct common_chat_template {
-    jinja::program prog;
-    std::string bos_tok;
-    std::string eos_tok;
-    std::string src;
-    chat_template_caps caps;
-
-    common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
-        jinja::lexer lexer;
-        auto lexer_res = lexer.tokenize(src);
-        this->prog = jinja::parse_from_tokens(lexer_res);
-
-        this->src = lexer_res.source;
-        this->bos_tok = bos_token;
-        this->eos_tok = eos_token;
-
-        this->caps = jinja::caps_get(prog);
-        // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
-    }
-
-    const std::string & source() const { return src; }
-    const std::string & bos_token() const { return bos_tok; }
-    const std::string & eos_token() const { return eos_tok; }
-
-    // TODO: this is ugly, refactor it somehow
-    json add_system(const json & messages, const std::string & system_prompt) const {
-        GGML_ASSERT(messages.is_array());
-        auto msgs_copy = messages;
-        if (!caps.supports_system_role) {
-            if (msgs_copy.empty()) {
-                msgs_copy.insert(msgs_copy.begin(), json{
-                    {"role", "user"},
-                    {"content", system_prompt}
-                });
-            } else {
-                auto & first_msg = msgs_copy[0];
-                if (!first_msg.contains("content")) {
-                    first_msg["content"] = "";
-                }
-                first_msg["content"] = system_prompt + "\n\n"
-                    + first_msg["content"].get<std::string>();
-            }
-        } else {
-            if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
-                msgs_copy.insert(msgs_copy.begin(), json{
-                    {"role", "system"},
-                    {"content", system_prompt}
-                });
-            } else if (msgs_copy[0].at("role") == "system") {
-                msgs_copy[0]["content"] = system_prompt;
-            }
-        }
-        return msgs_copy;
-    }
-
-    chat_template_caps original_caps() const {
-        return caps;
-    }
-
-};
-
 struct common_chat_templates {
     bool add_bos;
     bool add_eos;
-    bool has_explicit_template; // Model had builtin template or template overridde was specified.
-    std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
+    bool has_explicit_template;  // Model had builtin template or template overridde was specified.
+    std::unique_ptr<common_chat_template> template_default;  // always set (defaults to chatml)
     std::unique_ptr<common_chat_template> template_tool_use;
 };
 
-struct templates_params {
-    json messages;
-    json tools;
-    common_chat_tool_choice tool_choice;
-    json json_schema;
-    bool parallel_tool_calls;
-    common_reasoning_format reasoning_format;
-    bool stream;
-    std::string grammar;
-    bool add_generation_prompt = true;
-    bool enable_thinking = true;
-    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
-    json extra_context;
-    bool add_bos;
-    bool add_eos;
-    bool is_inference = true;
-    bool mark_input = true; // whether to mark input strings in the jinja context
-};
-
 common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
     if (tool_choice == "auto") {
         return COMMON_CHAT_TOOL_CHOICE_AUTO;
@@ -276,22 +227,27 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
 
 bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
     common_chat_templates_inputs dummy_inputs;
-    common_chat_msg msg;
-    msg.role = "user";
-    msg.content = "test";
-    dummy_inputs.messages = {msg};
-    dummy_inputs.enable_thinking = false;
-    const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
-    dummy_inputs.enable_thinking = true;
+    common_chat_msg              msg;
+    msg.role                          = "user";
+    msg.content                       = "test";
+    dummy_inputs.messages             = { msg };
+    dummy_inputs.enable_thinking      = false;
+    const auto rendered_no_thinking   = common_chat_templates_apply(chat_templates, dummy_inputs);
+    dummy_inputs.enable_thinking      = true;
     const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
-    return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
+    bool detect = rendered_no_thinking.prompt != rendered_with_thinking.prompt;
+    const auto & tmpl = chat_templates->template_tool_use
+        ? *chat_templates->template_tool_use
+        : *chat_templates->template_default;
+    diff_analysis_result result = differential_analyzer::analyze(tmpl);
+    detect |= result.reasoning != reasoning_mode::NONE;
+    return detect;
 }
 
 std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
     std::vector<common_chat_msg> msgs;
 
     try {
-
         if (!messages.is_array()) {
             throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
         }
@@ -307,7 +263,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
             }
             msg.role = message.at("role");
 
-            auto has_content = message.contains("content");
+            auto has_content    = message.contains("content");
             auto has_tool_calls = message.contains("tool_calls");
             if (has_content) {
                 const auto & content = message.at("content");
@@ -328,7 +284,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                         msg.content_parts.push_back(msg_part);
                     }
                 } else if (!content.is_null()) {
-                    throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
+                    throw std::invalid_argument("Invalid 'content' type: expected string or array, got " +
+                                                content.dump() +
+                                                " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
                 }
             }
             if (has_tool_calls) {
@@ -348,8 +306,13 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                     if (!fc.contains("name")) {
                         throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
                     }
-                    tc.name = fc.at("name");
-                    tc.arguments = fc.at("arguments");
+                    tc.name           = fc.at("name");
+                    const auto & args = fc.at("arguments");
+                    if (args.is_string()) {
+                        tc.arguments = args;
+                    } else {
+                        tc.arguments = args.dump();
+                    }
                     if (tool_call.contains("id")) {
                         tc.id = tool_call.at("id");
                     }
@@ -357,7 +320,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                 }
             }
             if (!has_content && !has_tool_calls) {
-                throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
+                throw std::invalid_argument(
+                    "Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & "
+                    "https://github.com/ggml-org/llama.cpp/issues/12279)");
             }
             if (message.contains("reasoning_content")) {
                 msg.reasoning_content = message.at("reasoning_content");
@@ -432,12 +397,13 @@ json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & t
     auto result = json::array();
     for (const auto & tool : tools) {
         result.push_back({
-            {"type", "function"},
-            {"function", {
-                {"name", tool.name},
-                {"description", tool.description},
-                {"parameters", json::parse(tool.parameters)},
-            }},
+            { "type",     "function" },
+            { "function",
+             {
+                  { "name", tool.name },
+                  { "description", tool.description },
+                  { "parameters", json::parse(tool.parameters) },
+              }                      },
         });
     }
     return result;
@@ -455,16 +421,20 @@ json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
         json tool_call;
         tool_call["index"] = diff.tool_call_index;
         if (!diff.tool_call_delta.id.empty()) {
-            tool_call["id"] = diff.tool_call_delta.id;
+            tool_call["id"]   = diff.tool_call_delta.id;
             tool_call["type"] = "function";
         }
-        json function = json::object();
-        if (!diff.tool_call_delta.name.empty()) {
-            function["name"] = diff.tool_call_delta.name;
+        if (!diff.tool_call_delta.name.empty() || !diff.tool_call_delta.arguments.empty()) {
+            json function = json::object();
+            if (!diff.tool_call_delta.name.empty()) {
+                function["name"] = diff.tool_call_delta.name;
+            }
+            if (!diff.tool_call_delta.arguments.empty()) {
+                function["arguments"] = diff.tool_call_delta.arguments;
+            }
+            tool_call["function"] = function;
         }
-        function["arguments"] = diff.tool_call_delta.arguments;
-        tool_call["function"] = function;
-        delta["tool_calls"] = json::array({tool_call});
+        delta["tool_calls"] = json::array({ tool_call });
     }
     return delta;
 }
@@ -473,13 +443,13 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
     if (use_jinja) {
         try {
             common_chat_msg msg;
-            msg.role = "user";
+            msg.role    = "user";
             msg.content = "test";
 
             auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl);
 
             common_chat_templates_inputs inputs;
-            inputs.messages = {msg};
+            inputs.messages = { msg };
 
             common_chat_templates_apply(tmpls.get(), inputs);
             return true;
@@ -488,28 +458,28 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
             return false;
         }
     }
-    llama_chat_message chat[] = {{"user", "test"}};
+    llama_chat_message chat[] = {
+        { "user", "test" }
+    };
     const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0);
     return res >= 0;
 }
 
-std::string common_chat_format_single(
-        const struct common_chat_templates * tmpls,
-        const std::vector<common_chat_msg> & past_msg,
-        const common_chat_msg & new_msg,
-        bool add_ass,
-        bool use_jinja) {
-
+std::string common_chat_format_single(const struct common_chat_templates * tmpls,
+                                      const std::vector<common_chat_msg> & past_msg,
+                                      const common_chat_msg &              new_msg,
+                                      bool                                 add_ass,
+                                      bool                                 use_jinja) {
     common_chat_templates_inputs inputs;
     inputs.use_jinja = use_jinja;
-    inputs.add_bos = tmpls->add_bos;
-    inputs.add_eos = tmpls->add_eos;
+    inputs.add_bos   = tmpls->add_bos;
+    inputs.add_eos   = tmpls->add_eos;
 
     std::string fmt_past_msg;
     if (!past_msg.empty()) {
-        inputs.messages = past_msg;
+        inputs.messages              = past_msg;
         inputs.add_generation_prompt = false;
-        fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt;
+        fmt_past_msg                 = common_chat_templates_apply(tmpls, inputs).prompt;
     }
     std::ostringstream ss;
     // if the past_msg ends with a newline, we must preserve it in the formatted version
@@ -519,37 +489,39 @@ std::string common_chat_format_single(
     // format chat with new_msg
     inputs.messages.push_back(new_msg);
     inputs.add_generation_prompt = add_ass;
-    auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt;
+    auto fmt_new_msg             = common_chat_templates_apply(tmpls, inputs).prompt;
     // get the diff part
     ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
     return ss.str();
 }
 
-std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map<std::string, std::string> & chat_template_kwargs) {
+std::string common_chat_format_example(const struct common_chat_templates *       tmpls,
+                                       bool                                       use_jinja,
+                                       const std::map<std::string, std::string> & chat_template_kwargs) {
     common_chat_templates_inputs inputs;
-    inputs.use_jinja = use_jinja;
-    inputs.add_bos = tmpls->add_bos;
-    inputs.add_eos = tmpls->add_eos;
+    inputs.use_jinja            = use_jinja;
+    inputs.add_bos              = tmpls->add_bos;
+    inputs.add_eos              = tmpls->add_eos;
     inputs.chat_template_kwargs = chat_template_kwargs;
-    auto add_simple_msg = [&](auto role, auto content) {
+    auto add_simple_msg         = [&](auto role, auto content) {
         common_chat_msg msg;
-        msg.role = role;
+        msg.role    = role;
         msg.content = content;
         inputs.messages.push_back(msg);
     };
-    add_simple_msg("system",    "You are a helpful assistant");
-    add_simple_msg("user",      "Hello");
+    add_simple_msg("system", "You are a helpful assistant");
+    add_simple_msg("user", "Hello");
     add_simple_msg("assistant", "Hi there");
-    add_simple_msg("user",      "How are you?");
+    add_simple_msg("user", "How are you?");
     return common_chat_templates_apply(tmpls, inputs).prompt;
 }
 
-#define CHATML_TEMPLATE_SRC \
-    "{%- for message in messages -%}\n" \
+#define CHATML_TEMPLATE_SRC                                                               \
+    "{%- for message in messages -%}\n"                                                   \
     "  {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \
-    "{%- endfor -%}\n" \
-    "{%- if add_generation_prompt -%}\n" \
-    "  {{- '<|im_start|>assistant\n' -}}\n" \
+    "{%- endfor -%}\n"                                                                    \
+    "{%- if add_generation_prompt -%}\n"                                                  \
+    "  {{- '<|im_start|>assistant\n' -}}\n"                                               \
     "{%- endif -%}"
 
 void common_chat_templates_free(struct common_chat_templates * tmpls) {
@@ -567,19 +539,16 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
                 return tmpls->template_tool_use->source();
             }
             return "";
-        } else {
-            LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
         }
+        LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
     }
     return tmpls->template_default->source();
 }
 
-common_chat_templates_ptr common_chat_templates_init(
-    const struct llama_model * model,
-    const std::string & chat_template_override,
-    const std::string & bos_token_override,
-    const std::string & eos_token_override)
-{
+common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model,
+                                                     const std::string &        chat_template_override,
+                                                     const std::string &        bos_token_override,
+                                                     const std::string &        eos_token_override) {
     std::string default_template_src;
     std::string template_tool_use_src;
 
@@ -588,7 +557,7 @@ common_chat_templates_ptr common_chat_templates_init(
         GGML_ASSERT(model != nullptr);
         const auto * str = llama_model_chat_template(model, /* name */ nullptr);
         if (str) {
-            default_template_src = str;
+            default_template_src  = str;
             has_explicit_template = true;
         }
         str = llama_model_chat_template(model, /* name */ "tool_use");
@@ -610,34 +579,40 @@ common_chat_templates_ptr common_chat_templates_init(
     // TODO @ngxson : this is a temporary hack to prevent chat template from throwing an error
     // Ref: https://github.com/ggml-org/llama.cpp/pull/15230#issuecomment-3173959633
     if (default_template_src.find("<|channel|>") != std::string::npos
-            // search for the error message and patch it
-            && default_template_src.find("in message.content or") != std::string::npos) {
+        // search for the error message and patch it
+        && default_template_src.find("in message.content or") != std::string::npos) {
         string_replace_all(default_template_src,
-            "{%- if \"<|channel|>analysis<|message|>\" in message.content or \"<|channel|>final<|message|>\" in message.content %}",
-            "{%- if false %}");
+                           "{%- if \"<|channel|>analysis<|message|>\" in message.content or "
+                           "\"<|channel|>final<|message|>\" in message.content %}",
+                           "{%- if false %}");
     }
 
     // TODO @aldehir : this is a temporary fix, pending Minja changes
     // Ref: https://github.com/ggml-org/llama.cpp/pull/17713#issuecomment-3631342664
     if (default_template_src.find("[TOOL_CALLS]") != std::string::npos
-            // search for the error message and patch it
-            && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
+        // search for the error message and patch it
+        && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
         string_replace_all(default_template_src,
-            "{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
-            "{%- if false %}");
+                           "{%- if (message['content'] is none or message['content'] == '' or "
+                           "message['content']|length == 0) and (message['tool_calls'] is not defined or "
+                           "message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
+                           "{%- if false %}");
     }
 
     std::string token_bos = bos_token_override;
     std::string token_eos = eos_token_override;
-    bool add_bos = false;
-    bool add_eos = false;
+    bool        add_bos   = false;
+    bool        add_eos   = false;
     if (model) {
-        const auto * vocab = llama_model_get_vocab(model);
-        const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
+        const auto * vocab     = llama_model_get_vocab(model);
+        const auto   get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
             if (token == LLAMA_TOKEN_NULL) {
-                if (default_template_src.find(jinja_variable_name) != std::string::npos
-                    || template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
-                    LOG_WRN("common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't work as intended.\n", name);
+                if (default_template_src.find(jinja_variable_name) != std::string::npos ||
+                    template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
+                    LOG_WRN(
+                        "common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't "
+                          "work as intended.\n",
+                        name);
                 }
                 return std::string();
             }
@@ -645,13 +620,13 @@ common_chat_templates_ptr common_chat_templates_init(
         };
         token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token");
         token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token");
-        add_bos = llama_vocab_get_add_bos(vocab);
-        add_eos = llama_vocab_get_add_eos(vocab);
+        add_bos   = llama_vocab_get_add_bos(vocab);
+        add_eos   = llama_vocab_get_add_eos(vocab);
     }
     common_chat_templates_ptr tmpls(new common_chat_templates());
     tmpls->has_explicit_template = has_explicit_template;
-    tmpls->add_bos = add_bos;
-    tmpls->add_eos = add_eos;
+    tmpls->add_bos               = add_bos;
+    tmpls->add_eos               = add_eos;
     try {
         tmpls->template_default = std::make_unique<common_chat_template>(default_template_src, token_bos, token_eos);
     } catch (const std::exception & e) {
@@ -672,36 +647,12 @@ common_chat_templates_ptr common_chat_templates_init(
 
 const char * common_chat_format_name(common_chat_format format) {
     switch (format) {
-        case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
-        case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
-        case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
-        case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral";
-        case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
-        case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
-        case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
-        case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
-        case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1";
-        case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
-        case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
-        case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
-        case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS";
-        case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
-        case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
-        case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
-        case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
-        case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
-        case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
-        case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
-        case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
-        case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
-        case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
-        case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
-        case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE";
-        case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
-        case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
-        case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
+        case COMMON_CHAT_FORMAT_CONTENT_ONLY:
+            return "Content-only";
+        case COMMON_CHAT_FORMAT_PEG_SIMPLE:
+            return "peg-simple";
+        case COMMON_CHAT_FORMAT_PEG_NATIVE:
+            return "peg-native";
         default:
             throw std::runtime_error("Unknown chat format");
     }
@@ -709,10 +660,14 @@ const char * common_chat_format_name(common_chat_format format) {
 
 const char * common_reasoning_format_name(common_reasoning_format format) {
     switch (format) {
-        case COMMON_REASONING_FORMAT_NONE:     return "none";
-        case COMMON_REASONING_FORMAT_AUTO:     return "auto";
-        case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
-        case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy";
+        case COMMON_REASONING_FORMAT_NONE:
+            return "none";
+        case COMMON_REASONING_FORMAT_AUTO:
+            return "auto";
+        case COMMON_REASONING_FORMAT_DEEPSEEK:
+            return "deepseek";
+        case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY:
+            return "deepseek-legacy";
         default:
             throw std::runtime_error("Unknown reasoning format");
     }
@@ -721,11 +676,14 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
 common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
     if (format == "none") {
         return COMMON_REASONING_FORMAT_NONE;
-    } else if (format == "auto") {
+    }
+    if (format == "auto") {
         return COMMON_REASONING_FORMAT_AUTO;
-    } else if (format == "deepseek") {
+    }
+    if (format == "deepseek") {
         return COMMON_REASONING_FORMAT_DEEPSEEK;
-    } else if (format == "deepseek-legacy") {
+    }
+    if (format == "deepseek-legacy") {
         return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
     }
     throw std::runtime_error("Unknown reasoning format: " + format);
@@ -741,7 +699,8 @@ static void foreach_function(const json & tools, const std::function<void(const
     }
 }
 
-static void foreach_parameter(const json & function, const std::function<void(const std::string &, const json &, bool)> & fn) {
+static void foreach_parameter(const json &                                                         function,
+                              const std::function<void(const std::string &, const json &, bool)> & fn) {
     if (!function.contains("parameters") || !function.at("parameters").is_object()) {
         return;
     }
@@ -749,7 +708,7 @@ static void foreach_parameter(const json & function, const std::function<void(co
     if (!params.contains("properties") || !params.at("properties").is_object()) {
         return;
     }
-    const auto & props = params.at("properties");
+    const auto &          props = params.at("properties");
     std::set<std::string> required;
     if (params.contains("required") && params.at("required").is_array()) {
         params.at("required").get_to(required);
@@ -760,19 +719,19 @@ static void foreach_parameter(const json & function, const std::function<void(co
     }
 }
 
-static std::string apply(
+std::string common_chat_template_direct_apply(
     const common_chat_template & tmpl,
     const struct templates_params & inputs,
-    const std::optional<json> & messages_override = std::nullopt,
-    const std::optional<json> & tools_override = std::nullopt,
-    const std::optional<json> & additional_context = std::nullopt)
-{
+    const std::optional<json> & messages_override,
+    const std::optional<json> & tools_override,
+    const std::optional<json> & additional_context) {
     jinja::context ctx(tmpl.source());
 
     nlohmann::ordered_json inp = nlohmann::ordered_json{
         {"messages", messages_override.has_value() ? *messages_override : inputs.messages},
         {"bos_token", tmpl.bos_token()},
         {"eos_token", tmpl.eos_token()},
+        {"enable_thinking", inputs.enable_thinking},
     };
     if (tools_override.has_value() || !inputs.tools.empty()) {
         inp["tools"] = tools_override.has_value() ? *tools_override : inputs.tools;
@@ -798,7 +757,7 @@ static std::string apply(
     // render
     jinja::runtime runtime(ctx);
     const jinja::value results = runtime.execute(tmpl.prog);
-    auto parts = runtime.gather_string_parts(results);
+    auto parts = jinja::runtime::gather_string_parts(results);
 
     std::string result = parts->as_string().str();
 
@@ -812,265 +771,8 @@ static std::string apply(
     return result;
 }
 
-static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    auto tool_call_schemas = json::array();
-    foreach_function(inputs.tools, [&](const json & tool) {
-        const auto & function = tool.at("function");
-        auto tool_schema = json {
-            {"type", "object"},
-            {"properties", {
-                {"name", {
-                    {"type", "string"},
-                    {"const", function.at("name")},
-                }},
-                {"arguments", function.at("parameters")},
-            }},
-            {"required", json::array({"name", "arguments"})},
-        };
-        if (function.contains("description")) {
-            tool_schema["description"] = function.at("description");
-        }
-        if (inputs.parallel_tool_calls) {
-            tool_schema.at("properties")["id"] = {
-                {"type", "string"},
-                {"minLength", 4},
-            };
-            tool_schema.at("required").push_back("id");
-        }
-        tool_call_schemas.emplace_back(tool_schema);
-    });
-    const auto tool_call =
-        inputs.parallel_tool_calls
-            ? json {
-                {"type", "object"},
-                {"properties", {
-                    {"tool_calls", {
-                        {"type", "array"},
-                        {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
-                            {"anyOf", tool_call_schemas},
-                        }},
-                        {"minItems", 1},
-                    }},
-                }},
-                {"required", json::array({"tool_calls"})},
-            }
-            : json {
-                {"type", "object"},
-                {"properties", {
-                    {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
-                        {"anyOf", tool_call_schemas},
-                    }},
-                }},
-                {"required", json::array({"tool_call"})},
-            };
-    const auto schema =
-        inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED
-            ? json {
-                {"anyOf", json::array({
-                    tool_call,
-                    {
-                        {"type", "object"},
-                        {"properties", {
-                            {"response", inputs.json_schema.is_null()
-                                ? json {{"type", "string"}}
-                                : inputs.json_schema
-                            },
-                        }},
-                        {"required", json::array({"response"})},
-                    },
-                })}
-            }
-            : tool_call;
-
-    data.grammar_lazy = false;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        builder.add_schema("root", schema);
-    });
-
-    auto tweaked_messages = tmpl.add_system(
-        inputs.messages,
-        "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
-
-    // ensure all messages has "content" field
-    for (auto & message : tweaked_messages) {
-        if (!message.contains("content") || message["content"].is_null()) {
-            message["content"] = "";
-        }
-    }
-
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
-    data.format = COMMON_CHAT_FORMAT_GENERIC;
-    return data;
-}
-
-static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        auto schemas = json::array();
-        foreach_function(inputs.tools, [&](const json & tool) {
-            const auto & function = tool.at("function");
-            schemas.push_back({
-                {"type", "object"},
-                {"properties", {
-                    // Important note: the model is probably trained to take a JSON stringified arguments value.
-                    // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
-                    {"name", {
-                        {"type", "string"},
-                        {"const", function.at("name")},
-                    }},
-                    {"arguments", function.at("parameters")},
-                    {"id", {
-                        {"type", "string"},
-                        // Nemo's template expects a 9-character alphanumeric ID.
-                        {"pattern", "^[a-zA-Z0-9]{9}$"},
-                    }},
-                }},
-                {"required", json::array({"name", "arguments", "id"})},
-            });
-        });
-        auto schema = json {
-            {"type", "array"},
-            {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-            {"minItems", 1},
-        };
-        if (!inputs.parallel_tool_calls) {
-            schema["maxItems"] = 1;
-        }
-        builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
-    });
-    data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
-    data.preserved_tokens = {
-        "[TOOL_CALLS]",
-    };
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
-    return data;
-}
-
-
-// Case-insensitive find
-static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
-    auto it = std::search(
-        haystack.begin() + pos, haystack.end(),
-        needle.begin(), needle.end(),
-        [](char a, char b) { return std::tolower(a) == std::tolower(b); }
-    );
-    return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);
-}
-
-static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    const auto is_json_schema_provided = !inputs.json_schema.is_null();
-    const auto is_grammar_provided = !inputs.grammar.empty();
-    const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
-
-    // the logic requires potentially modifying the messages
-    auto tweaked_messages = inputs.messages;
-
-    auto replace_json_schema_marker = [](json & messages) -> bool {
-        static std::string marker1 = "force json schema.\n";
-        static std::string marker2 = "force json schema.";
-
-        if (messages.empty() || messages.at(0).at("role") != "system") {
-            return false;
-        }
-
-        std::string content = messages.at(0).at("content");
-
-        for (const auto & marker : {marker1, marker2}) {
-            const auto pos = ifind_string(content, marker);
-            if (pos != std::string::npos) {
-                content.replace(pos, marker.length(), "");
-                // inject modified content back into the messages
-                messages.at(0).at("content") = content;
-                return true;
-            }
-        }
-
-        return false;
-    };
-
-    // Lfm2 model does not natively work with json, but can generally understand the tools structure
-    //
-    // Example of the pytorch dialog structure:
-    //     <|startoftext|><|im_start|>system
-    //     List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
-    //     <|im_start|>user
-    //     What is the current status of candidate ID 12345?<|im_end|>
-    //     <|im_start|>assistant
-    //     <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
-    //     <|im_start|>tool
-    //     <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
-    //     <|im_start|>assistant
-    //     The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
-    //
-    // For the llama server compatibility with json tools semantic,
-    // the client can add "Follow json schema." line into the system message prompt to force the json output.
-    //
-    if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
-        // server/utils.hpp prohibits that branch for the custom grammar anyways
-        throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
-    } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
-        LOG_INF("%s: Using tools to build a grammar\n", __func__);
-
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-
-            builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
-        });
-        // model has no concept of tool selection mode choice,
-        // if the system prompt rendered correctly it will produce a tool call
-        // the grammar goes inside the tool call body
-        data.grammar_lazy = true;
-        data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};
-        data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
-        data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
-    } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
-        LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
-        // output those tokens
-        data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
-    } else if (is_json_schema_provided) {
-        LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
-        data.grammar = json_schema_to_grammar(inputs.json_schema);
-    } else if (is_grammar_provided) {
-        LOG_INF("%s: Using provided grammar\n", __func__);
-        data.grammar = inputs.grammar;
-    } else {
-        LOG_INF("%s: Using content relying on the template\n", __func__);
-    }
-
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
-    LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_ministral_3(const common_chat_template &    tmpl,
+                                                              const struct templates_params & inputs) {
     common_chat_params data;
 
     // Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja
@@ -1088,8 +790,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
         // If message contains `reasoning_content`, add it as a block of type `thinking`
         if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
             content.push_back({
-                {"type", "thinking"},
-                {"thinking", msg.at("reasoning_content").get<std::string>()},
+                { "type",     "thinking"                                     },
+                { "thinking", msg.at("reasoning_content").get<std::string>() },
             });
         }
 
@@ -1097,8 +799,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
         if (msg.contains("content")) {
             if (msg.at("content").is_string()) {
                 content.push_back({
-                    {"type", "text"},
-                    {"text", msg.at("content").get<std::string>()},
+                    { "type", "text"                               },
+                    { "text", msg.at("content").get<std::string>() },
                 });
             } else if (msg.at("content").is_array()) {
                 auto blocks = msg.at("content");
@@ -1106,18 +808,18 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
             }
         }
 
-        auto adjusted = msg;
+        auto adjusted       = msg;
         adjusted["content"] = content;
         adjusted.erase("reasoning_content");
         adjusted_messages.push_back(adjusted);
     }
 
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
     auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
-    auto include_grammar = true;
+    auto include_grammar   = true;
 
-    data.prompt = apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
-    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.prompt           = common_chat_template_direct_apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
+    data.format           = COMMON_CHAT_FORMAT_PEG_NATIVE;
     data.preserved_tokens = {
         "[THINK]",
         "[/THINK]",
@@ -1125,13 +827,15 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
         "[ARGS]",
     };
 
-    auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
-        auto reasoning = extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        auto reasoning =
+            extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
 
         // Response format parser
         if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
             // Ministral wants to emit json surrounded by code fences
-            return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```";
+            return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema))
+                             << "```";
         }
 
         // Tool call parser
@@ -1139,17 +843,16 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
             auto tool_choice = p.choice();
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                const auto & schema = function.at("parameters");
+                std::string  name     = function.at("name");
+                const auto & schema   = function.at("parameters");
 
-                tool_choice |= p.rule("tool-" + name,
-                    p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]")
-                    + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
-                );
+                tool_choice |=
+                    p.rule("tool-" + name, p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]") +
+                                               p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
             });
 
-            auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
-            auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
+            auto min_calls  = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+            auto max_calls  = inputs.parallel_tool_calls ? -1 : 1;
             auto tool_calls = p.trigger_rule("tool-call", p.repeat("[TOOL_CALLS]" + tool_choice, min_calls, max_calls));
 
             return reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls;
@@ -1168,1748 +871,273 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
         data.grammar = build_grammar([&](const common_grammar_builder & builder) {
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                auto schema = function.at("parameters");
+                auto         schema   = function.at("parameters");
                 builder.resolve_refs(schema);
             });
             parser.build_grammar(builder, data.grammar_lazy);
         });
 
         data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]" }
         };
     }
 
     return data;
 }
 
-static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_MAGISTRAL;
-    data.preserved_tokens = {
-        "[THINK]",
-        "[/THINK]",
-    };
-
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                        {"id", {
-                            {"type", "string"},
-                            {"pattern", "^[a-zA-Z0-9]{9}$"},
-                        }},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
-        });
-        data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
-        data.preserved_tokens.push_back("[TOOL_CALLS]");
-    } else {
-        data.grammar_lazy = false;
-        if (!inputs.json_schema.is_null()) {
-            if (!inputs.grammar.empty()) {
-                throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
-            }
-            data.grammar = json_schema_to_grammar(inputs.json_schema);
-        } else {
-            data.grammar = inputs.grammar;
-        }
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template &    tmpl,
+                                                          const struct templates_params & inputs) {
     common_chat_params data;
 
+    // Copy reasoning to the "thinking" field as expected by the gpt-oss template
     auto adjusted_messages = json::array();
     for (const auto & msg : inputs.messages) {
         auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
-        auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
+        auto has_tool_calls        = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
+
         if (has_reasoning_content && has_tool_calls) {
-            auto adjusted_message = msg;
-            adjusted_message["tool_plan"] = msg.at("reasoning_content");
-            adjusted_message.erase("reasoning_content");
+            auto adjusted_message        = msg;
+            adjusted_message["thinking"] = msg.at("reasoning_content");
             adjusted_messages.push_back(adjusted_message);
         } else {
             adjusted_messages.push_back(msg);
         }
     }
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
-    data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
-    if (string_ends_with(data.prompt, "<|START_THINKING|>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "<|END_THINKING|>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    } else if (!inputs.enable_thinking && string_ends_with(data.prompt, "<|CHATBOT_TOKEN|>")) {
-        data.prompt += "<|START_THINKING|><|END_THINKING|>";
-    }
-
-    data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        auto schemas = json::array();
-        foreach_function(inputs.tools, [&](const json & tool) {
-            const auto & function = tool.at("function");
-            schemas.push_back({
-                {"type", "object"},
-                {"properties", {
-                    {"tool_call_id", {
-                        {"type", "string"},
-                        // Command-R's template expects an integer string.
-                        {"pattern", "^[0-9]{1,10}$"},
-                    }},
-                    {"tool_name", {
-                        {"type", "string"},
-                        {"const", function.at("name")},
-                    }},
-                    {"parameters", function.at("parameters")},
-                }},
-                {"required", json::array({"tool_call_id", "tool_name", "parameters"})},
-            });
-        });
-        auto schema = json {
-            {"type", "array"},
-            {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-            {"minItems", 1},
-        };
-        if (!inputs.parallel_tool_calls) {
-            schema["maxItems"] = 1;
-        }
-        builder.add_rule("root",
-            std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") +
-            "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
-    });
-    data.grammar_triggers.push_back({
-        COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-        // If thinking_forced_open, then we capture the </think> tag in the grammar,
-        // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-        std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") +
-            "(<\\|START_ACTION\\|>)[\\s\\S]*"
-    });
-    data.preserved_tokens = {
-        "<|START_ACTION|>",
-        "<|END_ACTION|>",
-        "<|START_RESPONSE|>",
-        "<|END_RESPONSE|>",
-        "<|START_THINKING|>",
-        "<|END_THINKING|>",
-    };
-    return data;
-}
-
-static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
-    if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
-        throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
-    }
-    const auto & parameters_properties = parameters.at("properties");
-    const auto & parameters_required = parameters.at("required");
-    for (const auto & prop : expected_properties) {
-        if (!parameters_properties.contains(prop)) {
-            throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); // NOLINT
-        }
-        if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) {
-            throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); // NOLINT
-        }
-    }
-    if (parameters_properties.size() != expected_properties.size()) {
-        throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", "));
-    }
-}
-
-static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
-    auto builtin_tools = json::array();
-    common_chat_params data;
-    if (!inputs.tools.is_null()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-
-            auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
-                if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
-                    expect_tool_parameters(name, parameters, {"query"});
-                } else if (name == "python" || name == "code_interpreter") {
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
-                    expect_tool_parameters(name, parameters, {"code"});
-                } else {
-                    return false;
-                }
-
-                std::vector<std::string> kvs;
-                for (const auto & [key, value] : parameters.at("properties").items()) {
-                    kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
-                }
-
-                tool_rules.push_back(
-                    builder.add_rule(
-                        name + "-call",
-                        "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
-                builtin_tools.push_back(name);
-
-                return true;
-            };
-
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
-                if (allow_python_tag_builtin_tools) {
-                    handle_builtin_tool(name, parameters);
-                }
-                tool_rules.push_back(
-                    builder.add_rule(
-                        name + "-call",
-                        "\"{\" space "
-                        "( \"\\\"type\\\"\"       space \":\" space \"\\\"function\\\"\"     space \",\" space )? "
-                        "  \"\\\"name\\\"\"       space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
-                        "  \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
-                        "\"}\" space"));
-            });
-            // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                "(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*",
-            });
-            if (!builtin_tools.empty()) {
-                data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
-                data.preserved_tokens.push_back("<|python_tag|>");
-            }
-            // Allow a few empty lines on top of the usual constrained json schema space rule.
-            builder.add_rule("root", string_join(tool_rules, " | "));
-            data.additional_stops.push_back("<|eom_id|>");
-        });
-        data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
-            ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
-            : COMMON_CHAT_FORMAT_LLAMA_3_X;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, json {
-        {"date_string", format_time(inputs.now, "%d %b %Y")},
-        {"tools_in_user_message", false},
-        {"builtin_tools", builtin_tools},
-    });
-    return data;
-}
 
-static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Generate the prompt using the apply() function with the template
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2;
+    auto prompt = common_chat_template_direct_apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
 
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
+    // Check if we need to replace the return token with end token during
+    // inference and without generation prompt. For more details see:
+    // https://github.com/ggml-org/llama.cpp/issues/15417
+    if (inputs.is_inference && !inputs.add_generation_prompt) {
+        static constexpr std::string_view return_token = "<|return|>";
+        static constexpr std::string_view end_token    = "<|end|>";
+        if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
+            prompt.replace(pos, return_token.length(), end_token);
         }
     }
 
-    // When tools are present, build grammar for the <TOOLCALL> format, similar to CommandR, but without tool call ID
-    if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = true;
-        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    { "type",       "object"                                                   },
-                    { "properties",
-                        {
-                            { "name",
-                            {
-                                { "type", "string" },
-                                { "const", function.at("name") },
-                            } },
-                            { "arguments", function.at("parameters") },
-                        }                                                                        },
-                    { "required",   json::array({ "name", "arguments" }) },
-                });
-            });
-            auto schema = json{
-                        { "type",     "array"                                                         },
-                        { "items",    schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
-                        { "minItems", 1                                                               },
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root",
-                                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                                    "\"<TOOLCALL>\" " + builder.add_schema("tool_calls", schema) +
-                                    " \"</TOOLCALL>\"");
-        });
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-            // If thinking_forced_open, then we capture the </think> tag in the grammar,
-            // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-            std::string(data.thinking_forced_open ?
-                            "[\\s\\S]*?(</think>\\s*)" :
-                            "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                "(<TOOLCALL>)[\\s\\S]*" });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED;
-
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
+    data.prompt = prompt;
+    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
 
+    // These special tokens are required to parse properly, so we include them
+    // even if parse_tool_calls is false.
     data.preserved_tokens = {
-        "<think>",
-        "</think>",
-        "<tool_call>",
-        "</tool_call>",
+        "<|channel|>", "<|constrain|>", "<|message|>", "<|start|>", "<|end|>",
     };
 
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
     auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
-    auto include_grammar = true;
-
-    auto parser = build_chat_peg_constructed_parser([&](auto & p) {
-        auto reasoning = p.eps();
-        if (inputs.enable_thinking && extract_reasoning) {
-            auto reasoning_content = p.reasoning(p.until("</think>")) + ("</think>" | p.end());
-            if (data.thinking_forced_open) {
-                reasoning = reasoning_content;
-            }
-        }
+    auto include_grammar   = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && has_tools;
+
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        const std::string END                = "<|end|>";
+        const std::string START              = "<|start|>";
+        const std::string MESSAGE            = "<|message|>";
+        const std::string CHANNEL            = "<|channel|>";
+        const std::string CONSTRAIN          = "<|constrain|>";
+        const std::string START_ASSISTANT    = START + "assistant";
+        const std::string CHANNEL_ANALYSIS   = CHANNEL + "analysis";
+        const std::string CHANNEL_COMMENTARY = CHANNEL + "commentary";
+        const std::string CHANNEL_FINAL      = CHANNEL + "final";
+
+        auto the_end = END | p.end();
+
+        const std::string analysis_header  = CHANNEL_ANALYSIS + MESSAGE;
+        auto              segment_content  = p.until(END);
+        auto              analysis_segment = extract_reasoning ?
+                                                 p.literal(analysis_header) + p.reasoning(segment_content) + p.until(END) + the_end :
+                                                 p.content(analysis_header + p.until(END) + the_end);
+
+        auto channel_header_content = p.until_one_of({ " to=functions.", MESSAGE });
+        auto content_header         = p.choice({ p.literal(CHANNEL_COMMENTARY), p.literal(CHANNEL_FINAL) });
+        auto content_segment        = p.rule("content-segment", content_header + channel_header_content + MESSAGE +
+                                                                    p.content(segment_content) + the_end);
 
-        // Response format parser
-        if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
-            return reasoning << p.content(p.schema(p.json(), "response-format", inputs.json_schema));
+        if (!inputs.json_schema.is_null()) {
+            auto final_header = p.literal(CHANNEL_FINAL);
+            auto constraint   = p.optional(p.space() + p.literal(CONSTRAIN) + channel_header_content);
+            return p.optional(analysis_segment) + final_header + constraint + MESSAGE +
+                   p.content(p.schema(p.json(), "response-format", inputs.json_schema));
         }
 
+        auto segment  = p.optional(START_ASSISTANT + p.space()) + p.choice({ content_segment, analysis_segment });
+        auto contents = p.optional(segment + p.repeat(p.optional(p.space()) + segment, 0, -1)) + p.end();
+
         // Tool call parser
         if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
             auto tool_choice = p.choice();
+
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-
-                auto schema_info = common_schema_info();
-                schema_info.resolve_refs(parameters);
-
-                auto tool_open = "<function=" + p.tool_name(p.literal(name)) + ">\n";
-                auto tool_close = p.literal("</function>\n");
-                auto args = p.sequence();
-                auto arg_string = p.rule("xml-arg-string", p.until_one_of({
-                    "\n</parameter>",
-                    "\n<parameter=",
-                    "\n</function>"
-                }));
-
-                foreach_parameter(function, [&](const auto & param_name, const json & param_schema, bool is_required) {
-                    auto rule_name = "tool-" + name + "-arg-" + param_name;
-
-                    auto arg_open = "<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">\n";
-                    auto arg_close = p.literal("</parameter>\n");
-                    auto arg_value = p.eps();
-
-                    if (schema_info.resolves_to_string(param_schema)) {
-                        arg_value = p.tool_arg_string_value(arg_string) + "\n";
-                    } else {
-                        arg_value = p.tool_arg_json_value(p.schema(p.json(), rule_name + "-schema", param_schema));
-                    }
+                std::string  name     = function.at("name");
+                const auto & params   = function.at("parameters");
 
-                    // Model may or my not close with </parameter>
-                    auto arg_rule = p.rule(rule_name, p.tool_arg_open(arg_open) + arg_value + p.optional(p.tool_arg_close(arg_close)));
-                    args += p.repeat(arg_rule, /* min = */ is_required ? 1 : 0, /* max = */ 1);
-                });
+                // Tool call can appear as:
+                // 1. In role header: " to=functions.NAME<|channel|>..."
+                // 2. In channel: "<|channel|>(analysis|commentary) to=functions.NAME..."
+                auto func_name = p.literal(" to=functions.") + p.tool_name(p.literal(name));
+
+                auto channel    = p.literal(CHANNEL_COMMENTARY) | p.literal(CHANNEL_ANALYSIS);
+                auto constraint = p.space() + p.optional(p.literal(CONSTRAIN) + channel_header_content);
+                auto args       = p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", params));
 
-                tool_choice |= p.rule("tool-" + name, p.tool_open(tool_open) + args + p.tool_close(tool_close));
+                // Pattern 1: recipient in role header
+                // " to=functions.NAME<|channel|>(analysis|commentary)[constraint]<|message|>ARGS"
+                auto tool_in_role = p.tool(p.tool_open(func_name + channel) + constraint + MESSAGE + args);
+
+                // Pattern 2: recipient in channel header
+                // "<|channel|>(analysis|commentary) to=functions.NAME[constraint]<|message|>ARGS"
+
+                auto tool_in_channel = p.tool(channel + p.tool_open(func_name + constraint + MESSAGE) + args);
+
+                tool_choice |= p.trigger_rule("tool-" + name, tool_in_role | tool_in_channel);
             });
 
             auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
             auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
-            auto tool_call = p.rule("tool-call", "<tool_call>\n" + tool_choice + "</tool_call>" + p.space());
-            auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls));
 
-            return reasoning << p.content(p.until("<tool_call>")) << tool_calls;
+            auto role_start = p.optional(p.space() + p.literal(START_ASSISTANT));
+            auto tool_call  = p.rule("tool-call", p.repeat(role_start + tool_choice, min_calls, max_calls) + p.end());
+
+            return p.choice({ tool_call, p.one_or_more(segment) + tool_call });
         }
 
-        // Content only parser
-        include_grammar = false;
-        return reasoning << p.content(p.rest());
+        return contents;
     });
 
     data.parser = parser.save();
 
     if (include_grammar) {
         data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
-
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                auto schema = function.at("parameters");
+                auto         schema   = function.at("parameters");
                 builder.resolve_refs(schema);
             });
             parser.build_grammar(builder, data.grammar_lazy);
         });
 
         data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"}
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "^(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)"               },
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "(?:<\\|end\\|>)(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)" },
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
+             "(?:<\\|start\\|>assistant\\s*)?(<\\|channel\\|>(?:commentary|analysis)\\s+to=functions)"                }
         };
     }
 
     return data;
 }
 
-
-static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
+// Functionary v3.2 - uses recipient-based format: >>>recipient\n{content}
+static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template &    tmpl,
+                                                                   const struct templates_params & inputs) {
     common_chat_params data;
 
-    // Generate the prompt using the apply() function with the template
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_APERTUS;
+    data.prompt           = common_chat_template_direct_apply(tmpl, inputs);
+    data.format           = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.preserved_tokens = {
+        ">>>all",
+    };
+
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
 
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (string_ends_with(data.prompt, "<|inner_prefix|>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "<|inner_suffix|>";
-        } else {
-            data.thinking_forced_open = true;
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        // Functionary v3.2 format:
+        // - Normal content: >>>all\n{content}
+        // - Tool calls: >>>function_name\n{json_args}
+        // Generation prompt ends with ">>>" so model outputs recipient immediately
+
+        // Build content parser for >>>all\n{content}
+        // When tools are present, content stops before the next ">>>" (tool call)
+        // When no tools, content goes until end
+        auto content_until_tool = p.literal(">>>all\n") + p.content(p.until(">>>"));
+        auto content_until_end  = p.literal(">>>all\n") + p.content(p.rest());
+
+        // If no tools or tool_choice is NONE, just parse content
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            // When no tools, just match the prefix and capture everything after
+            return content_until_end + p.end();
         }
-    }
 
-    // When tools are present, build grammar for the <|tools_prefix|> format
-    if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = true;
-        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    { "type",       "object"                                                   },
-                    { "properties",
-                        {
-                            { function.at("name"), function.at("parameters") }
-                        }                                                                        },
-                    { "required",   json::array({ function.at("name") }) },
-                });
-            });
-            auto schema = json{
-                        { "type",     "array"                                                         },
-                        { "items",    schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
-                        { "minItems", 1                                                               },
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root",
-                                std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") +
-                                    "\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\"");
-                            });
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-            // If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar,
-            // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-            std::string(data.thinking_forced_open ?
-                            "[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" :
-                            "(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") +
-                "(<\\|tools_prefix\\|>)[\\s\\S]*" });
-        data.preserved_tokens = {
-            "<|system_start|>",
-            "<|system_end|>",
-            "<|developer_start|>",
-            "<|developer_end|>",
-            "<|user_start|>",
-            "<|user_end|>",
-            "<|assistant_start|>",
-            "<|assistant_end|>",
-            "<|inner_prefix|>",
-            "<|inner_suffix|>",
-            "<|tools_prefix|>",
-            "<|tools_suffix|>",
-        };
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    auto prompt = apply(tmpl, inputs);
-
-    // Hacks to fix the official (broken) prompt.
-    // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
-    // until the official template is fixed.
-    if (tmpl.source().find("{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}") != std::string::npos) {
-        // Don't leave the chat dangling after tool results
-        if (string_ends_with(prompt, "<｜tool▁outputs▁end｜>")) {
-            prompt += "<｜end▁of▁sentence｜>";
-            if (inputs.add_generation_prompt) {
-                prompt += "<｜Assistant｜>";
-            }
-        }
-        // Fix up tool call delta example added by Minja
-        prompt = std::regex_replace(
-            prompt,
-            std::regex("(<｜tool▁call▁end｜>)[\\s\\r\\n]*(<｜tool▁outputs▁begin｜>|<｜User｜>)"),
-            "$1<｜tool▁calls▁end｜><｜end▁of▁sentence｜>$2");
-    }
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                    "( \"<｜tool▁call▁begin｜>\" )? \"function<｜tool▁sep｜>" + name + "\\n"
-                    "```json\\n\" " + builder.add_schema(name + "-args", parameters) + " "
-                    "\"```<｜tool▁call▁end｜>\""));
-            });
-            // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
-            // so we accept common variants (then it's all constrained)
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                "( \"<｜tool▁calls▁begin｜>\" | \"<｜tool_calls_begin｜>\" | \"<｜tool calls begin｜>\" | \"<｜tool\\\\_calls\\\\_begin｜>\" | \"<｜tool▁calls｜>\" ) "
-                "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
-                "\"<｜tool▁calls▁end｜>\""
-                " space");
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                    "(<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)[\\s\\S]*"
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<｜tool▁calls▁begin｜>",
-                "<｜tool▁call▁begin｜>",
-                "<｜tool▁sep｜>",
-                "<｜tool▁call▁end｜>",
-                "<｜tool▁calls▁end｜",
-            };
-        });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Pass thinking context for DeepSeek V3.1 template
-    json additional_context = {
-        {"thinking", inputs.enable_thinking},
-    };
-
-    auto prompt = apply(tmpl, inputs,
-                       /* messages_override= */ inputs.messages,
-                       /* tools_override= */ std::nullopt,
-                       additional_context);
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    if (string_ends_with(data.prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                    "( \"<｜tool▁call▁begin｜>\" )? \"" + name + "<｜tool▁sep｜>"
-                    "\" " + builder.add_schema(name + "-args", parameters) + " "
-                    "\"<｜tool▁call▁end｜>\""));
-            });
-            // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
-            // so we accept common variants (then it's all constrained)
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                "( \"<｜tool▁calls▁begin｜>\" | \"<｜tool_calls_begin｜>\" | \"<｜tool calls begin｜>\" | \"<｜tool\\\\_calls\\\\_begin｜>\" | \"<｜tool▁calls｜>\" ) "
-                "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
-                "\"<｜tool▁calls▁end｜>\""
-                " space");
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                    "(<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)[\\s\\S]*"
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<｜tool▁calls▁begin｜>",
-                "<｜tool▁call▁begin｜>",
-                "<｜tool▁sep｜>",
-                "<｜tool▁call▁end｜>",
-                "<｜tool▁calls▁end｜>",
-            };
-        });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_MINIMAX_M2;
-
-    // Handle thinking tags based on prompt ending
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!params.enable_thinking) {
-            // Close the thinking tag immediately if thinking is disabled
-            data.prompt += "</think>\n\n";
-        } else {
-            // Mark thinking as forced open (template started with <think>)
-            data.thinking_forced_open = true;
-        }
-    }
-
-    // Preserve MiniMax-M2 special tokens
-    data.preserved_tokens = {
-        "<think>",
-        "</think>",
-        "<minimax:tool_call>",
-        "</minimax:tool_call>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<minimax:tool_call>\n",
-        /* form.tool_start  = */ "<invoke name=\"",
-        /* form.tool_sep    = */ "\">\n",
-        /* form.key_start   = */ "<parameter name=\"",
-        /* form.key_val_sep = */ "\">",
-        /* form.val_end     = */ "</parameter>\n",
-        /* form.tool_end    = */ "</invoke>\n",
-        /* form.scope_end   = */ "</minimax:tool_call>",
-    };
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
-
-    data.preserved_tokens = {
-        "<tool_call>",
-        "</tool_call>",
-        "<function=",
-        "</function>",
-        "<parameter=",
-        "</parameter>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<tool_call>\n",
-        /* form.tool_start  = */ "<function=",
-        /* form.tool_sep    = */ ">\n",
-        /* form.key_start   = */ "<parameter=",
-        /* form.key_val_sep = */ ">\n",
-        /* form.val_end     = */ "\n</parameter>\n",
-        /* form.tool_end    = */ "</function>\n",
-        /* form.scope_end   = */ "</tool_call>",
-    };
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_KIMI_K2;
-
-    data.preserved_tokens = {
-        "<think>",
-        "</think>",
-        "<|tool_calls_section_begin|>",
-        "<|tool_call_begin|>",
-        "<|tool_call_argument_begin|>",
-        "<|tool_call_end|>",
-        "<|tool_calls_section_end|>",
-        "<|im_end|>",
-        "<|im_system|>",
-        "<|im_middle|>",
-    };
-
-    data.additional_stops.insert(data.additional_stops.end(), {
-        "<|im_end|>",
-        "<|im_middle|>"
-    });
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<|tool_calls_section_begin|>";
-        form.tool_start  = "<|tool_call_begin|>";
-        form.tool_sep    = "<|tool_call_argument_begin|>{";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}<|tool_call_end|>";
-        form.scope_end   = "<|tool_calls_section_end|>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_APRIEL_1_5;
-
-    data.preserved_tokens = {
-        "<thinking>",
-        "</thinking>",
-        "<tool_calls>",
-        "</tool_calls>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<tool_calls>[";
-        form.tool_start  = "{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}, ";
-        form.scope_end   = "]</tool_calls>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.last_tool_end = "}";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO;
-
-    data.preserved_tokens = {
-        "<tool_call>",
-        "</tool_call>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "\n";
-        form.tool_start  = "<tool_call>\n{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}\n</tool_call>";
-        form.scope_end   = "";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Copy reasoning to the "thinking" field as expected by the gpt-oss template
-    auto adjusted_messages = json::array();
-    for (const auto & msg : inputs.messages) {
-        auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
-        auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
-
-        if (has_reasoning_content && has_tool_calls) {
-            auto adjusted_message = msg;
-            adjusted_message["thinking"] = msg.at("reasoning_content");
-            adjusted_messages.push_back(adjusted_message);
-        } else {
-            adjusted_messages.push_back(msg);
-        }
-    }
-
-    auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
-
-    // Check if we need to replace the return token with end token during
-    // inference and without generation prompt. For more details see:
-    // https://github.com/ggml-org/llama.cpp/issues/15417
-    if (inputs.is_inference && !inputs.add_generation_prompt) {
-        static constexpr std::string_view return_token = "<|return|>";
-        static constexpr std::string_view end_token    = "<|end|>";
-        if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
-            prompt.replace(pos, return_token.length(), end_token);
-        }
-    }
-
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_GPT_OSS;
-
-    // These special tokens are required to parse properly, so we include them
-    // even if parse_tool_calls is false.
-    data.preserved_tokens = {
-        "<|channel|>",
-        "<|constrain|>",
-        "<|message|>",
-        "<|start|>",
-        "<|end|>",
-    };
-
-    if (!inputs.json_schema.is_null()) {
-        data.grammar_lazy = false;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schema = inputs.json_schema;
-            builder.resolve_refs(schema);
-
-            auto not_end = builder.add_rule("not-end",
-                "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
-            auto analysis = builder.add_rule("analysis",
-                "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-            auto constraint = builder.add_rule("constraint", "\"<|constrain|>\"? [a-zA-Z0-9_-]+");
-            auto final = builder.add_rule("final",
-                "\"<|channel|>final\" ( \" \" " + constraint + " )? \"<|message|>\" " +
-                builder.add_schema("response", schema)
-            );
-
-            builder.add_rule("root", "( " + analysis + " \"<|start|>assistant\" )? " + final);
-        });
-    }
-
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            // tool calls can appear in commentary or analysis channels
-            auto channel = builder.add_rule("channel", "\"<|channel|>\" ( \"commentary\" | \"analysis\" )");
-
-            std::vector<std::string> tool_rules_recipient_in_role;
-            std::vector<std::string> tool_rules_recipient_in_channel;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                tool_rules_recipient_in_role.push_back(
-                    builder.add_rule(name + "-call",
-                        "\"" + name + "\"" + channel + " \" <|constrain|>json\"? \"<|message|>\" " +
-                        builder.add_schema(name + "-args", parameters)
-                    )
-                );
-
-                tool_rules_recipient_in_channel.push_back(
-                    builder.add_rule(name + "-call",
-                        "\"" + name + "\"" + " \" <|constrain|>json\"? \"<|message|>\" " +
-                        builder.add_schema(name + "-args", parameters)
-                    )
-                );
-            });
-
-            auto recipient_in_channel = builder.add_rule("recipient_in_channel",
-                channel + " \" to=functions.\" ( " +
-                string_join(tool_rules_recipient_in_channel, " | ") + " )"
-            );
-
-            if (data.grammar_lazy) {
-                auto recipient_in_role = builder.add_rule("recipient_in_role",
-                    "\"<|start|>assistant\"? \" to=functions.\" ( " +
-                    string_join(tool_rules_recipient_in_role, " | ") + " )"
-                );
-
-                builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
-            } else {
-                auto not_end = builder.add_rule("not-end",
-                    "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
-                auto analysis = builder.add_rule("analysis",
-                    "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-                auto commentary = builder.add_rule("commentary",
-                    "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-
-                auto recipient_in_role = builder.add_rule("recipient_in_role",
-                    "\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )"
-                );
-
-                builder.add_rule("root",
-                    "( " + analysis + " \"<|start|>assistant\" )? " +
-                    "( " + commentary + " \"<|start|>assistant\" )? " +
-                    "( " + recipient_in_role + " | " + recipient_in_channel + " )"
-                );
-            }
-
-            // Trigger on tool calls that appear in the commentary channel
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                "<\\|channel\\|>(?:commentary|analysis) to"
-            });
-
-            // Trigger tool calls that appear in the role section, either at the
-            // start or in the middle.
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                "^ to"
-            });
-
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                "<\\|start\\|>assistant to"
-            });
-        });
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    std::string prompt = apply(tmpl, inputs);
-
-    // match the existing trimming behavior
-    if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) {
-        prompt.erase(0, tmpl.bos_token().size());
-    }
-    if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) {
-        prompt.erase(prompt.size() - tmpl.eos_token().size());
-    }
-    if (string_ends_with(prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    // add GLM preserved tokens
-    data.preserved_tokens = {
-        "<|endoftext|>",
-        "[MASK]",
-        "[gMASK]",
-        "[sMASK]",
-        "<sop>",
-        "<eop>",
-        "<|system|>",
-        "<|user|>",
-        "<|assistant|>",
-        "<|observation|>",
-        "<|begin_of_image|>",
-        "<|end_of_image|>",
-        "<|begin_of_video|>",
-        "<|end_of_video|>",
-        "<|begin_of_audio|>",
-        "<|end_of_audio|>",
-        "<|begin_of_transcription|>",
-        "<|end_of_transcription|>",
-        "<|code_prefix|>",
-        "<|code_middle|>",
-        "<|code_suffix|>",
-        "/nothink",
-        "<think>",
-        "</think>",
-        "<tool_call>",
-        "</tool_call>",
-        "<arg_key>",
-        "</arg_key>",
-        "<arg_value>",
-        "</arg_value>"
-    };
-
-    // extra GLM 4.5 stop word
-    data.additional_stops.insert(data.additional_stops.end(), {
-        "<|user|>",
-        "<|observation|>"
-    });
-
-    // build grammar for tool call
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "",
-        /* form.tool_start  = */ "\n<tool_call>",
-        /* form.tool_sep    = */ "\n",
-        /* form.key_start   = */ "<arg_key>",
-        /* form.key_val_sep = */ "</arg_key>\n<arg_value>",
-        /* form.val_end     = */ "</arg_value>\n",
-        /* form.tool_end    = */ "</tool_call>\n",
-        /* form.scope_end   = */ "",
-    };
-    build_grammar_xml_tool_call(data, inputs.tools, form);
-
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_GLM_4_5;
-    return data;
-}
-
-static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    LOG_DBG("%s\n", __func__);
-    common_chat_params data;
-    const std::optional<json> additional_context = json {
-        {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
-        {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
-    };
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override =*/ std::nullopt, additional_context);
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
-        });
-        data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["});
-        data.preserved_tokens = {
-            " functools[",
-        };
-        data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
-    // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
-    // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code.
-    common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> first_tool_rules;
-            std::vector<std::string> subsequent_tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                std::string args_pattern = "[\\s\\S]*";
-                auto args_rule = builder.add_schema(name + "-args", parameters);
-                if (name == "python") {
-                    args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*");
-                } else {
-                    args_pattern = "\\{" + args_pattern;
-                }
-                auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule);
-                first_tool_rules.push_back(call_rule);
-                if (inputs.parallel_tool_calls) {
-                    subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule));
-                }
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                    "((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern,
-                });
-            });
-            data.preserved_tokens = {
-                "<|end_header_id|>",
-            };
-            auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space";
-            if (inputs.parallel_tool_calls) {
-                auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space";
-                builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*");
-            } else {
-                builder.add_rule("root", first_rule);
-            }
-
-        });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
-    common_chat_params data;
-
-    if (!inputs.tools.is_null()) {
-        std::string python_code_argument_name;
-        auto has_raw_python = false;
-
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                const auto & parameters = function.at("parameters");
-                std::string name = function.at("name");
-                if (name == "python" || name == "ipython") {
-                    if (!parameters.contains("type")) {
-                        throw std::runtime_error("Missing type in python tool");
-                    }
-                    has_raw_python = true;
-                    const auto & type = parameters.at("type");
-                    if (type == "object") {
-                        auto properties = parameters.at("properties");
-                        for (auto it = properties.begin(); it != properties.end(); ++it) {
-                            if (it.value().at("type") == "string") {
-                                if (!python_code_argument_name.empty()) {
-                                    throw std::runtime_error("Multiple string arguments found in python tool");
-                                }
-                                python_code_argument_name = it.key();
-                            }
-                        }
-                        if (python_code_argument_name.empty()) {
-                            throw std::runtime_error("No string argument found in python tool");
-                        }
-                    } else if (type != "string") {
-                        throw std::runtime_error("Invalid type in python tool: " + type.dump());
-                    }
-                }
-                tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
-            });
-            if (has_raw_python) {
-                tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
-                data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
-                data.preserved_tokens.push_back("<|python_tag|>");
-            }
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
-            builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
-            data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
-        });
-        data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
-
-    data.prompt = apply(tmpl, inputs);
-    // TODO: if (has_raw_python)
-    return data;
-}
-
-static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    json extra_context = json {
-        {"enable_thinking", inputs.enable_thinking},
-    };
-    extra_context.update(inputs.extra_context);
-
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, extra_context);
-    data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!extra_context["enable_thinking"]) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (!inputs.tools.is_null()) {
-        // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            std::vector<std::string> tool_call_alts;
-            std::vector<std::string> escaped_names;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_schema(name + "-call", {
-                    {"type", "object"},
-                    {"properties", json {
-                        {"name", json {{"const", name}}},
-                        {"arguments", parameters},
-                    }},
-                    {"required", json::array({"name", "arguments"})},
-                }));
-                tool_call_alts.push_back(builder.add_rule(
-                    name + "-function-tag",
-                    "\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
-                    builder.add_schema(name + "-args", parameters) + " "
-                    "\"</function>\" space"));
-
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
-                    "<function=" + name + ">",
-                });
-                auto escaped_name = regex_escape(name);
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                    "<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
-                });
-                escaped_names.push_back(escaped_name);
-            });
-            auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
-            std::vector<std::string> alt_tags {
-                any_tool_call,
-                "\"<tool_call>\" space "     + any_tool_call + " \"</tool_call>\"",
-                // The rest is just to accommodate common "good bad" outputs.
-                "\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
-                "\"<response>\"  space "     + any_tool_call + " \"</response>\"",
-                "\"<tools>\"     space "     + any_tool_call + " \"</tools>\"",
-                "\"<json>\"      space "     + any_tool_call + " \"</json>\"",
-                "\"<xml>\"      space "     + any_tool_call + " \"</xml>\"",
-                "\"<JSON>\"      space "     + any_tool_call + " \"</JSON>\"",
-            };
-            auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
-            tool_call_alts.push_back(wrappable_tool_call);
-            tool_call_alts.push_back(
-                "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
-            // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "(</think>\\s*)" : "") + (
-                    "\\s*("
-                    "(?:<tool_call>"
-                    "|<function"
-                    "|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
-                    "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\""
-                    ")"
-                    ")"
-                ),
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<tool_call>",
-                "</tool_call>",
-                "<function",
-                "<tools>",
-                "</tools>",
-                "<response>",
-                "</response>",
-                "<function_call>",
-                "</function_call>",
-                "<json>",
-                "</json>",
-                "<JSON>",
-                "</JSON>",
-                "```",
-                "```json",
-                "```xml",
-            };
-        });
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Pass thinking context for Granite template
-    json additional_context = {
-        {"thinking", inputs.enable_thinking},
-    };
-
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ std::nullopt, /* tools_override= */ std::nullopt, additional_context);
-    data.format = COMMON_CHAT_FORMAT_GRANITE;
-
-    if (string_ends_with(data.prompt, "<think>\n") || string_ends_with(data.prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
+        // Build tool call parsers for each available function
+        auto tool_choice = p.choice();
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const auto & function = tool.at("function");
+            std::string  name     = function.at("name");
+            const auto & schema   = function.at("parameters");
 
-    if (!inputs.tools.is_null()) {
-        // Granite uses <|tool_call|> followed by JSON list
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call", builder.add_schema(name +
-"-args", {
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {{"const", name}}},
-                        {"arguments", parameters},
-                    }},
-                    {"required", json::array({"name", "arguments"})},
-                })));
-            });
+            // Tool format: >>>function_name\n{json_args}
+            auto tool_parser = p.tool(
+                p.tool_open(p.literal(">>>") + p.tool_name(p.literal(name)) + p.literal("\n")) +
+                p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
+            );
 
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
-            auto tool_list = builder.add_rule("tool_list", "\"[\" space " + tool_call + " (\",\" space " + tool_call + ")* space \"]\"");
+            tool_choice |= p.rule("tool-" + name, tool_parser);
+        });
 
-            if (data.thinking_forced_open) {
-                builder.add_rule("root", "\"</think>\" space \"<response>\" space [^<]* \"</response>\" space \"<|tool_call|>\" space " + tool_list);
+        // The model can output:
+        // 1. Just content: >>>all\n{content}
+        // 2. Just tool call(s): >>>function_name\n{json_args}
+        // 3. Both: >>>all\n{content}>>>function_name\n{json_args}
+        
+        // Option 1: Content only (no following tool call)
+        auto content_only = content_until_end;
+        
+        // Option 2: Content followed by tool call(s)
+        auto content_and_tools = content_until_tool + p.one_or_more(tool_choice);
+        
+        // Option 3: Just tool call(s) (no content)
+        auto tools_only = p.one_or_more(tool_choice);
+
+        if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) {
+            // Must have at least one tool call
+            if (inputs.parallel_tool_calls) {
+                // Multiple tool calls allowed
+                return p.choice({ content_and_tools, tools_only }) + p.end();
             } else {
-                builder.add_rule("root", "\"<|tool_call|>\" space " + tool_list);
+                // Single tool call only
+                return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end();
             }
-
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
-                "<|tool_call|>"
-            });
-
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<response>",
-                "</response>",
-                "<|tool_call|>",
-            };
-        });
-    } else {
-        // Handle thinking tags for non-tool responses
-        if (data.thinking_forced_open && inputs.enable_thinking) {
-            data.grammar_lazy = false;
-            data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-                builder.add_rule("root", "\"</think>\" space \"<response>\" space .* \"</response>\" space");
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<response>",
-                "</response>",
-            };
-        }
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Copy `reasoning_content` to `reasoning`
-    auto adjusted_messages = json::array();
-    for (const auto & msg : inputs.messages) {
-        if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
-            auto adjusted_message = msg;
-            adjusted_message["reasoning"] = msg.at("reasoning_content");
-            adjusted_message.erase("reasoning_content");
-            adjusted_messages.push_back(adjusted_message);
         } else {
-            adjusted_messages.push_back(msg);
-        }
-    }
-
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
-    auto include_grammar = true;
-
-    auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
-
-    // Check if we need to replace the flush token with end token during inference and without generation prompt.
-    if (inputs.is_inference && !inputs.add_generation_prompt) {
-        static constexpr std::string_view return_token = "<|flush|>";
-        static constexpr std::string_view end_token    = "<|end|>";
-        if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
-            prompt.replace(pos, return_token.length(), end_token);
-        }
-    }
-
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
-    data.preserved_tokens = {
-        "<|think|>",
-        "<|content|>",
-        "<|begin|>",
-        "<|end|>",
-        "<|tool_calls|>",
-        "<|tool_call:begin|>",
-        "<|tool_call:end|>",
-        "<|tool_call:name|>",
-        "<|tool_call:args|>",
-    };
-
-    auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
-        auto lit_think = p.atomic(p.literal("<|think|>"));
-        auto lit_assistant_begin = p.atomic(p.literal("<|begin|>assistant"));
-        auto lit_content = p.atomic(p.literal("<|content|>"));
-        auto lit_end = p.atomic(p.literal("<|end|>"));
-        auto parser_until_end = p.until("<|end|>");
-
-        // reasoning <- "<|think|>" (!"<|end|>" .)*
-        auto parser_reasoning = p.rule("reasoning", lit_think + p.reasoning(parser_until_end));
-
-        // content <- "<|content|>" (!"<|end|>" .)*
-        auto parser_content = p.rule("content", lit_content + p.content(parser_until_end));
-
-        // wrap_choice(items) <- item-choice wrapped*
-        // item-choice        <- items[0] / ... / items[n]
-        // wrapped            <- "<|end|><|begin|>assistant" item-choice
-        auto wrap_choice = [&](const std::vector<common_peg_parser> & items) {
-            auto choice = p.choice(items);
-            return choice + p.zero_or_more(lit_end + lit_assistant_begin + choice);
-        };
-
-        // wrap_seq(items) <- item[0] "<|end|><|begin|>assistant" item[1] ...
-        auto wrap_seq = [&](const std::vector<common_peg_parser> & items) {
-            auto seq = p.sequence();
-            for (auto i = 0u; i < items.size(); i++) {
-                if (i == 0) {
-                    seq += items[i];
-                    continue;
-                }
-                seq += lit_end + lit_assistant_begin + items[i];
-            }
-            return seq;
-        };
-
-        // Response format parser
-        if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
-            auto parser_response_format = lit_content + p.content(p.schema(p.json(), "response-format", inputs.json_schema));
-            return p.choice({
-                wrap_seq({parser_reasoning, parser_response_format}),
-                wrap_seq({parser_response_format})
-            });
-        }
-
-        auto lit_tool_call_begin = p.literal("<|tool_call:begin|>");
-        auto lit_tool_call_name = p.literal("<|tool_call:name|>");
-        auto lit_tool_call_args = p.literal("<|tool_call:args|>");
-        auto lit_tool_call_end = p.literal("<|tool_call:end|>");
-
-        // Tool call parser
-        if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
-            auto parser_tool_call = p.choice();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                const auto & schema = function.at("parameters");
-
-                // tool(name, schema) <- name "<|tool_call:args|>" schema
-                parser_tool_call |= p.rule("tool-" + name,
-                    p.atomic(p.tool_name(p.literal(name)) + lit_tool_call_args)
-                    + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
-            });
-
-            auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
-            auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
-
-            // tool-calls  <- "<|tool_calls|>" tool-call+
-            // tool-call   <- "<|tool_call:begin|> call-id "<|tool_call:name|>" &([^<]+ "<|tool_call:args|>") tool-choice "<|tool_call:end|>"
-            // call-id     <- [a-zA-Z0-9_-]+
-            // tool-choice <- tool(t[0].name, t[0].schema) / ... / tool(t[n].name, t[n].schema)
-            auto parser_tool_calls = p.trigger_rule("tool-calls",
-                p.atomic(p.literal("<|tool_calls|>"))
-                + p.repeat(
-                    p.tool_open(
-                        lit_tool_call_begin
-                        + p.tool_id(p.chars("[a-zA-Z0-9_-]", 1, -1))
-                        + lit_tool_call_name
-                        + p.peek(p.chars("[^<]", 1, -1) + lit_tool_call_args))
-                    + parser_tool_call
-                    + p.tool_close(lit_tool_call_end),
-                /* min = */ 1,
-                /* max = */ max_calls));
-
-            if (min_calls == 1) {
-                // If required, then try any combination of the reasoning, content, and tool call
-                return p.choice({
-                    wrap_seq({parser_reasoning, parser_content, parser_tool_calls}),
-                    wrap_seq({parser_reasoning, parser_tool_calls}),
-                    wrap_seq({parser_content, parser_tool_calls}),
-                    wrap_seq({parser_tool_calls})
-                });
+            // Tool calls are optional (auto mode)
+            if (inputs.parallel_tool_calls) {
+                // Multiple tool calls allowed
+                return p.choice({ content_and_tools, content_only, tools_only }) + p.end();
+            } else {
+                // Single tool call at most
+                auto content_and_tool = content_until_tool + tool_choice;
+                return p.choice({ content_and_tool, content_only, tool_choice }) + p.end();
             }
-
-            return wrap_choice({parser_reasoning, parser_content, parser_tool_calls});
         }
-
-        // Content only parser
-        include_grammar = false;
-        return wrap_choice({parser_reasoning, parser_content});
     });
 
     data.parser = parser.save();
 
     if (include_grammar) {
-        data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
 
         data.grammar = build_grammar([&](const common_grammar_builder & builder) {
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                auto schema = function.at("parameters");
+                auto         schema   = function.at("parameters");
                 builder.resolve_refs(schema);
             });
             parser.build_grammar(builder, data.grammar_lazy);
         });
 
+        // Grammar trigger for when the model starts outputting a tool call
+        // (after the initial ">>>" in the generation prompt)
         data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_calls|>"}
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, ">>>" }
         };
     }
 
     return data;
 }
 
-static common_chat_params common_chat_params_init_exaone_moe(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_EXAONE_MOE;
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>\n\n";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                // Expect: <tool_call>{"name": "<name>", "arguments": {...}}</tool_call>
-                tool_rules.push_back(builder.add_rule(
-                    name + "-call",
-                    "\"<tool_call>\" space " +
-                        builder.add_schema(name + "-obj", json{
-                            {"type", "object"},
-                            {"properties", {
-                                {"name",      json{{"const", name}}},
-                                {"arguments", parameters},
-                            }},
-                            {"required", json::array({"name", "arguments"})},
-                        }) +
-                    " space \"</tool_call>\" space"));
-            });
-
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
-
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)?" : "") +
-                    "(<tool_call>)[\\s\\S]*"
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<tool_call>",
-                "</tool_call>",
-            };
-        });
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_translate_gemma(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // This template does not support tools or reasoning
-    // we just need to transform the messages into the correct schema
-
-    templates_params inputs_new = inputs;
-    json & messages = inputs_new.messages;
-
-    // default to chat_template_kwargs, or en-GB if not specified
-    std::string default_src_lang = inputs.extra_context.value("source_lang_code", "en-GB");
-    std::string default_tgt_lang = inputs.extra_context.value("target_lang_code", "en-GB");
-
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("role") && message["role"].get<std::string>() != "user") {
-            continue;
-        }
-        if (!message.contains("content")) {
-            message["content"] = json::array();
-        }
-        if (message.contains("content") && !message["content"].is_array()) {
-            auto content_str = message["content"].get<std::string>();
-            // default to en-GB if not specified (to make common_chat_format_example works)
-            auto src_lang = message.contains("source_lang_code")
-                        ? message["source_lang_code"].get<std::string>() : default_src_lang;
-            auto tgt_lang = message.contains("target_lang_code")
-                        ? message["target_lang_code"].get<std::string>() : default_tgt_lang;
-            message["content"] = json::array({
-                json{
-                    {"type", "text"},
-                    {"text", content_str},
-                    {"source_lang_code", src_lang},
-                    {"target_lang_code", tgt_lang},
-                }
-            });
-        }
-    }
-
-    data.prompt = apply(tmpl, inputs_new, std::nullopt, std::nullopt);
-    data.format = COMMON_CHAT_FORMAT_GENERIC;
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    data.grammar_lazy = false;
-    if (!inputs.json_schema.is_null()) {
-        if (!inputs.grammar.empty()) {
-            throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
-        }
-        data.grammar = json_schema_to_grammar(inputs.json_schema);
-    } else {
-        data.grammar = inputs.grammar;
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_seed_oss(
-    const common_chat_template         & tmpl,
-    templates_params                   & params,
-    const common_chat_templates_inputs & inputs)
-{
-    common_chat_params data;
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_SEED_OSS;
-    if (string_ends_with(data.prompt, "<seed:think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</seed:think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (params.tools.is_array() && !params.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(params.tools, [&](const json & tool) {
-                const auto & function   = tool.at("function");
-                std::string  name       = function.at("name");
-                auto         parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                // Create rule for Seed-OSS function call format
-                std::string param_rules;
-                if (parameters.contains("properties")) {
-                    for (const auto & [key, value] : parameters.at("properties").items()) {
-                        param_rules += "\"<parameter=" + key + ">\"" + builder.add_schema(name + "-arg-" + key, value) +
-                                       "\"</parameter>\"";
-                    }
-                }
-
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                                                      "\"<seed:tool_call>\" space \"<function=" + name + ">\" space " +
-                                                          param_rules +
-                                                          " \"</function>\" space \"</seed:tool_call>\""));
-            });
-
-            data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<seed:tool_call>" });
-
-            data.preserved_tokens = {
-                "<seed:think>", "</seed:think>", "<seed:tool_call>", "</seed:tool_call>",
-                "<function=",   "</function>",   "<parameter=",      "</parameter>",
-            };
-
-            builder.add_rule("root", string_join(tool_rules, " | "));
-        });
-    }
-    return data;
-}
-
-// various workarounds for known issues with certain templates or model behaviors
-// TODO @ngxson : improve this (how?)
 namespace workaround {
 
 // if first message is system and template does not support it, merge it with next message
@@ -2929,6 +1157,15 @@ static void system_message_not_supported(json & messages) {
     }
 }
 
+static void requires_non_null_content(json & messages) {
+    GGML_ASSERT(messages.is_array());
+    for (auto & message : messages) {
+        if (message.contains("tool_calls") && !message.contains("content")) {
+            message["content"] = "";
+        }
+    }
+}
+
 static void func_args_not_string(json & messages) {
     GGML_ASSERT(messages.is_array());
     for (auto & message : messages) {
@@ -2949,70 +1186,10 @@ static void func_args_not_string(json & messages) {
     }
 }
 
-static void move_tool_calls_to_content(json & messages, int indent_spaces = 2) {
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("tool_calls")) {
-            auto tool_calls_new = json{
-                {"tool_calls", message.at("tool_calls")}
-            };
-            message.erase("tool_calls");
-            auto content = message.at("content");
-            std::string content_new = content.is_null() ? "" : content.get<std::string>();
-            message["content"] = content_new + tool_calls_new.dump(indent_spaces, ' ', false, json::error_handler_t::replace);
-        }
-    }
-}
-
-// TODO @ngxson : we may remove support for generic schema in the future
-static void use_generic_schema(json & messages) {
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("tool_calls") && message.at("tool_calls").is_array()) {
-            auto & tool_calls = message.at("tool_calls");
-            for (auto & tool_call : tool_calls) {
-                if (tool_call.contains("type") && tool_call.at("type") == "function" &&
-                    tool_call.contains("function") && tool_call.at("function").is_object()) {
-                    // Copy values before erasing to avoid use-after-free
-                    json name_value;
-                    json arguments_value;
-                    json id_value;
-                    const auto & function = tool_call.at("function");
-                    if (function.contains("name")) {
-                        name_value = function.at("name");
-                    }
-                    if (function.contains("arguments")) {
-                        arguments_value = function.at("arguments");
-                    }
-                    if (tool_call.contains("id")) {
-                        id_value = tool_call.at("id");
-                    }
-                    // Now safely erase and assign in the correct order
-                    tool_call.erase("type");
-                    tool_call.erase("function");
-                    tool_call.erase("id");
-                    // Reassign in desired order: name, arguments, id
-                    if (!name_value.is_null()) {
-                        tool_call["name"] = name_value;
-                    }
-                    if (!arguments_value.is_null()) {
-                        tool_call["arguments"] = arguments_value;
-                    }
-                    if (!id_value.is_null()) {
-                        tool_call["id"] = id_value;
-                    }
-                }
-            }
-        }
-    }
 }
 
-} // namespace workaround
-
-static common_chat_params common_chat_templates_apply_jinja(
-    const struct common_chat_templates        * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
+static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates *        tmpls,
+                                                            const struct common_chat_templates_inputs & inputs) {
     templates_params params;
     params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
     const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use
@@ -3034,6 +1211,13 @@ static common_chat_params common_chat_templates_apply_jinja(
         workaround::system_message_not_supported(params.messages);
     }
 
+    if (tmpl.original_caps().supports_tool_calls) {
+        // some templates will require the content field in tool call messages
+        // to still be non-null, this puts an empty string everywhere where the 
+        // content field is null
+        workaround::requires_non_null_content(params.messages);
+    }
+
     params.extra_context = json::object();
     for (auto el : inputs.chat_template_kwargs) {
         params.extra_context[el.first] = json::parse(el.second);
@@ -3043,235 +1227,62 @@ static common_chat_params common_chat_templates_apply_jinja(
         params.json_schema = json::parse(inputs.json_schema);
     }
 
-    if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
-        LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
-        params.parallel_tool_calls = false;
-    } else {
-        params.parallel_tool_calls = inputs.parallel_tool_calls;
-    }
+    // if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
+    //     LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
+    //     params.parallel_tool_calls = false;
+    // } else {
+    params.parallel_tool_calls = inputs.parallel_tool_calls;
+    //}
 
     if (params.tools.is_array()) {
         if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) {
             throw std::runtime_error("Cannot specify grammar with tools");
         }
         if (caps.supports_tool_calls && !caps.supports_tools) {
-            LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
-        }
-    }
-
-    // DeepSeek V3.1: detect based on specific patterns in the template
-    if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos &&
-        params.json_schema.is_null()) {
-        return common_chat_params_init_deepseek_v3_1(tmpl, params);
-    }
-
-    // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
-    if (src.find("<｜tool▁calls▁begin｜>") != std::string::npos && params.json_schema.is_null()) {
-        return common_chat_params_init_deepseek_r1(tmpl, params);
-    }
-
-    // Command R7B: : use handler in all cases except json schema (thinking / tools).
-    if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_command_r7b(tmpl, params);
-    }
-
-    // Granite (IBM) - detects thinking / tools support
-    if (src.find("elif thinking") != std::string::npos && src.find("<|tool_call|>") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        workaround::use_generic_schema(params.messages);
-        workaround::move_tool_calls_to_content(params.messages);
-        return common_chat_params_init_granite(tmpl, params);
-    }
-
-    // GLM 4.5: detect by <arg_key> and <arg_value> tags (check before Hermes since both use <tool_call>)
-    if (src.find("[gMASK]<sop>") != std::string::npos &&
-        src.find("<arg_key>") != std::string::npos &&
-        src.find("<arg_value>") != std::string::npos &&
-        params.json_schema.is_null()) {
-        workaround::func_args_not_string(params.messages);
-        if (!params.extra_context.contains("clear_thinking")) {
-            // by default, do not clear reasoning_content (added since GLM-4.7)
-            params.extra_context["clear_thinking"] = false;
+            LOG_WRN(
+                "Template supports tool calls but does not natively describe tools. The fallback behaviour used may "
+                "produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
         }
-        return common_chat_params_init_glm_4_5(tmpl, params);
-    }
-
-    // Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
-    // Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates.
-    // Require presence of <tool_call>, <function=...>, and <parameter=...> blocks.
-    if (src.find("<tool_call>") != std::string::npos &&
-        src.find("<function>") != std::string::npos &&
-        src.find("<function=") != std::string::npos &&
-        src.find("<parameters>") != std::string::npos &&
-        src.find("<parameter=") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        // Nemotron 3 Nano 30B A3B
-        if (src.find("<think>") != std::string::npos) {
-            return common_chat_params_init_nemotron_v3(tmpl, params);
-        }
-        return common_chat_params_init_qwen3_coder_xml(tmpl, params);
-    }
-
-    // Xiaomi MiMo format detection (must come before Hermes 2 Pro)
-    if (src.find("<tools>") != std::string::npos &&
-        src.find("# Tools") != std::string::npos &&
-        src.find("</tools>") != std::string::npos &&
-        src.find("<tool_calls>") != std::string::npos &&
-        src.find("</tool_calls>") != std::string::npos &&
-        src.find("<tool_response>") != std::string::npos) {
-        return common_chat_params_init_xiaomi_mimo(tmpl, params);
-    }
-
-    // EXAONE MoE format detection
-    if (src.find("<tool_call>") != std::string::npos &&
-        src.find("<tool_result>") != std::string::npos &&
-        src.find("<|tool_declare|>") != std::string::npos) {
-        return common_chat_params_init_exaone_moe(tmpl, params);
     }
 
-    // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
-    if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
-        return common_chat_params_init_hermes_2_pro(tmpl, params);
+    // Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser
+    // Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them
+    if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos &&
+        src.find("[ARGS]") != std::string::npos && src.find("[CALL_ID]") == std::string::npos) {
+        LOG_DBG("Using specialized template: Ministral/Magistral Large 3\n");
+        return common_chat_params_init_ministral_3(tmpl, params);
     }
 
-    // GPT-OSS
+    // GPT-OSS - has unique channel-based structure that needs dedicated handler
     if (src.find("<|channel|>") != std::string::npos) {
+        LOG_DBG("Using specialized template: GPT-OSS\n");
         return common_chat_params_init_gpt_oss(tmpl, params);
     }
 
-    // Seed-OSS
-    if (src.find("<seed:think>") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_seed_oss(tmpl, params, inputs);
-    }
-
-    // Nemotron v2
-    if (src.find("<SPECIAL_10>") != std::string::npos) {
-        return common_chat_params_init_nemotron_v2(tmpl, params);
-    }
-
-    // Apertus format detection
-    if (src.find("<|system_start|>") != std::string::npos && src.find("<|tools_prefix|>") != std::string::npos) {
-        return common_chat_params_init_apertus(tmpl, params);
-    }
-
-    // LFM2 (w/ tools)
-    if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&
-        src.find("]<|tool_list_end|>") != std::string::npos) {
-        return common_chat_params_init_lfm2(tmpl, params);
-    }
-
-    // MiniMax-M2 format detection
-    if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_minimax_m2(tmpl, params);
-    }
-
-    // Kimi K2 format detection
-    if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos &&
-        src.find("<|tool_calls_section_begin|>") != std::string::npos &&
-        src.find("## Return of") != std::string::npos) {
-        return common_chat_params_init_kimi_k2(tmpl, params);
-    }
-
-    // Apriel 1.5 format detection
-    if (src.find("<thinking>") != std::string::npos &&
-        src.find("</thinking>") != std::string::npos &&
-        src.find("<available_tools>") != std::string::npos &&
-        src.find("<|assistant|>") != std::string::npos &&
-        src.find("<|tool_result|>") != std::string::npos &&
-        src.find("<tool_calls>[") != std::string::npos &&
-        src.find("]</tool_calls>") != std::string::npos) {
-        return common_chat_params_init_apriel_1_5(tmpl, params);
-    }
-
-    // Solar Open
-    if (src.find("<|tool_response:begin|>") != std::string::npos &&
-        src.find("<|tool_response:name|>") != std::string::npos &&
-        src.find("<|tool_response:result|>") != std::string::npos) {
-        return common_chat_params_init_solar_open(tmpl, params);
-    }
-
-    // Use generic handler when mixing tools + JSON schema.
-    // TODO: support that mix in handlers below.
-    if ((params.tools.is_array() && params.json_schema.is_object())) {
-        return common_chat_params_init_generic(tmpl, params);
-    }
-
-    // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases.
-    if (src.find(">>>all") != std::string::npos) {
+    // Functionary v3.2 - uses recipient-based format with >>>recipient\n{content}
+    // Detection: template has ">>>all" for content and ">>>" prefix for tool calls
+    if (src.find(">>>all") != std::string::npos && src.find(">>>${recipient}") != std::string::npos) {
+        LOG_DBG("Using specialized template: Functionary v3.2\n");
         return common_chat_params_init_functionary_v3_2(tmpl, params);
     }
 
-    // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases.
-    if (src.find(" functools[") != std::string::npos) {
-        return common_chat_params_init_firefunction_v2(tmpl, params);
-    }
-
-    // Functionary v3.1 (w/ tools)
-    if (src.find("<|start_header_id|>") != std::string::npos
-        && src.find("<function=") != std::string::npos) {
-        return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, params);
-    }
-
-    // Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
-    if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
-        auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
-    }
-
-    // Ministral/Mistral Large 3
-    if (src.find("[SYSTEM_PROMPT]") != std::string::npos &&
-        src.find("[TOOL_CALLS]") != std::string::npos &&
-        src.find("[ARGS]") != std::string::npos) {
-        return common_chat_params_init_ministral_3(tmpl, params);
-    }
-
-    if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
-        return common_chat_params_init_magistral(tmpl, params);
-    }
-
-    // Solar Open
-    if (src.find("<|tool_response:begin|>") != std::string::npos &&
-        src.find("<|tool_response:name|>") != std::string::npos &&
-        src.find("<|tool_response:result|>") != std::string::npos) {
-        return common_chat_params_init_solar_open(tmpl, params);
-    }
-
-    // TranslateGemma
-    if (src.find("[source_lang_code]") != std::string::npos &&
-        src.find("[target_lang_code]") != std::string::npos) {
-        return common_chat_params_init_translate_gemma(tmpl, params);
-    }
-
-    // Plain handler (no tools)
-    if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
-        return common_chat_params_init_without_tools(tmpl, params);
-    }
-
-    // Mistral Nemo (w/ tools)
-    if (src.find("[TOOL_CALLS]") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_mistral_nemo(tmpl, params);
+    try {
+        LOG_DBG("Using differential autoparser\n");
+        auto auto_params = universal_peg_generator::generate_parser(tmpl, params);
+        return auto_params;
+    } catch (const std::exception & e) {
+        LOG_WRN("Automatic parser generation failed: %s\n", e.what());
     }
 
-    // Generic fallback
-    workaround::func_args_not_string(params.messages);
-    workaround::use_generic_schema(params.messages);
-    workaround::move_tool_calls_to_content(params.messages);
-    return common_chat_params_init_generic(tmpl, params);
+    GGML_ABORT("Unable to generate parser for this template.");
 }
 
 // Legacy template route (adhoc C++ implementation of known templates), forward to llama_chat_apply_template.
-static common_chat_params common_chat_templates_apply_legacy(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
-    size_t alloc_size = 0;
+static common_chat_params common_chat_templates_apply_legacy(const struct common_chat_templates *        tmpls,
+                                                             const struct common_chat_templates_inputs & inputs) {
+    size_t                          alloc_size = 0;
     std::vector<llama_chat_message> chat;
-    std::vector<std::string> contents;
+    std::vector<std::string>        contents;
 
     for (const auto & msg : inputs.messages) {
         auto content = msg.content;
@@ -3281,25 +1292,27 @@ static common_chat_params common_chat_templates_apply_legacy(
                 continue;
             }
             if (!content.empty()) {
-                content += "\n";;
+                content += "\n";
+                ;
             }
             content += part.text;
         }
         contents.emplace_back(std::move(content));
     }
     for (size_t i = 0; i < contents.size(); ++i) {
-        const auto & msg = inputs.messages[i];
+        const auto & msg     = inputs.messages[i];
         const auto & content = contents[i];
-        chat.push_back({msg.role.c_str(), content.c_str()});
+        chat.push_back({ msg.role.c_str(), content.c_str() });
         size_t msg_size = msg.role.size() + content.size();
-        alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops
+        alloc_size += msg_size + (msg_size / 4);  // == msg_size * 1.25 but avoiding float ops
     }
 
     std::vector<char> buf(alloc_size);
 
     // run the first time to get the total output length
     const auto & src = tmpls->template_default->source();
-    int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
+    int32_t      res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt,
+                                                 buf.data(), buf.size());
 
     // error: chat template is not supported
     if (res < 0) {
@@ -3311,7 +1324,8 @@ static common_chat_params common_chat_templates_apply_legacy(
     // if it turns out that our buffer is too small, we resize it
     if ((size_t) res > buf.size()) {
         buf.resize(res);
-        res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
+        res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(),
+                                        buf.size());
     }
 
     // for safety, we check the result again
@@ -3329,14 +1343,68 @@ static common_chat_params common_chat_templates_apply_legacy(
     return params;
 }
 
-common_chat_params common_chat_templates_apply(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
+common_chat_params common_chat_templates_apply(const struct common_chat_templates *        tmpls,
+                                               const struct common_chat_templates_inputs & inputs) {
     GGML_ASSERT(tmpls != nullptr);
-    return inputs.use_jinja
-        ? common_chat_templates_apply_jinja(tmpls, inputs)
-        : common_chat_templates_apply_legacy(tmpls, inputs);
+    return inputs.use_jinja ? common_chat_templates_apply_jinja(tmpls, inputs) :
+                              common_chat_templates_apply_legacy(tmpls, inputs);
+}
+
+common_chat_msg common_chat_parse(const std::string &               input,
+                                  bool                              is_partial,
+                                  const common_chat_parser_params & params) {
+    return common_chat_peg_parse(params.parser, input, is_partial, params);
+}
+
+common_chat_msg common_chat_peg_parse(const common_peg_arena &          parser,
+                                      const std::string &               input,
+                                      bool                              is_partial,
+                                      const common_chat_parser_params & params) {
+    if (parser.empty()) {
+        throw std::runtime_error("Failed to parse due to missing parser definition.");
+    }
+
+    LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), input.c_str());
+
+    common_peg_parse_context ctx(input, is_partial);
+    ctx.debug   = params.debug;
+    auto result = parser.parse(ctx);
+
+    if (result.fail()) {
+        // During partial parsing, return partial results if any AST nodes were captured
+        // This allows streaming to work correctly for formats like FUNC_MARKDOWN_CODE_BLOCK
+        if (is_partial && result.end > 0) {
+            // Try to extract any partial results from what was successfully parsed
+            common_chat_msg msg;
+            msg.role = "assistant";
+            auto mapper = common_chat_peg_unified_mapper(msg);
+            mapper.from_ast(ctx.ast, result);
+
+            if (ctx.debug) {
+                fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str());
+                fflush(stderr);
+            }
+            return msg;
+        }
+        throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " +
+                                 input.substr(result.end));
+    }
+
+    common_chat_msg msg;
+    msg.role = "assistant";
+
+    auto mapper = common_chat_peg_unified_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    if (ctx.debug) {
+        fprintf(stderr, "\nAST for %s parse:\n%s\n", is_partial ? "partial" : "full", ctx.ast.dump().c_str());
+        fflush(stderr);
+    }
+
+    if (!is_partial) {
+        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({ msg }).at(0).dump().c_str());
+    }
+    return msg;
 }
 
 std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates) {
@@ -3344,3 +1412,4 @@ std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_tem
     GGML_ASSERT(chat_templates->template_default != nullptr);
     return chat_templates->template_default->caps.to_map();
 }
+
diff --git a/common/chat.h b/common/chat.h
index 24aa4aab5cd..a4dc21b7569 100644
--- a/common/chat.h
+++ b/common/chat.h
@@ -3,12 +3,21 @@
 #pragma once
 
 #include "common.h"
+#include "jinja/parser.h"
+#include "nlohmann/json_fwd.hpp"
 #include "peg-parser.h"
-#include <functional>
+#include "jinja/runtime.h"
+#include "jinja/caps.h"
+#include "nlohmann/json.hpp"
+
 #include <chrono>
+#include <functional>
+#include <map>
 #include <string>
 #include <vector>
-#include <map>
+
+using chat_template_caps = jinja::caps;
+using json = nlohmann::ordered_json;
 
 #include <nlohmann/json_fwd.hpp>
 
@@ -38,21 +47,85 @@ struct common_chat_msg_content_part {
     }
 };
 
+struct common_chat_template {
+    jinja::program prog;
+    std::string bos_tok;
+    std::string eos_tok;
+    std::string src;
+    chat_template_caps caps;
+
+    common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
+        jinja::lexer lexer;
+        auto lexer_res = lexer.tokenize(src);
+        this->prog = jinja::parse_from_tokens(lexer_res);
+
+        this->src = lexer_res.source;
+        this->bos_tok = bos_token;
+        this->eos_tok = eos_token;
+
+        this->caps = jinja::caps_get(prog);
+        // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
+    }
+
+    const std::string & source() const { return src; }
+    const std::string & bos_token() const { return bos_tok; }
+    const std::string & eos_token() const { return eos_tok; }
+
+    // TODO: this is ugly, refactor it somehow
+    json add_system(const json & messages, const std::string & system_prompt) const {
+        GGML_ASSERT(messages.is_array());
+        auto msgs_copy = messages;
+        if (!caps.supports_system_role) {
+            if (msgs_copy.empty()) {
+                msgs_copy.insert(msgs_copy.begin(), json{
+                    {"role", "user"},
+                    {"content", system_prompt}
+                });
+            } else {
+                auto & first_msg = msgs_copy[0];
+                if (!first_msg.contains("content")) {
+                    first_msg["content"] = "";
+                }
+                first_msg["content"] = system_prompt + "\n\n"
+                    + first_msg["content"].get<std::string>();
+            }
+        } else {
+            if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
+                msgs_copy.insert(msgs_copy.begin(), json{
+                    {"role", "system"},
+                    {"content", system_prompt}
+                });
+            } else if (msgs_copy[0].at("role") == "system") {
+                msgs_copy[0]["content"] = system_prompt;
+            }
+        }
+        return msgs_copy;
+    }
+
+    chat_template_caps original_caps() const {
+        return caps;
+    }
+
+};
+
 struct common_chat_msg {
-    std::string role;
-    std::string content;
+    std::string                               role;
+    std::string                               content;
     std::vector<common_chat_msg_content_part> content_parts;
-    std::vector<common_chat_tool_call> tool_calls;
-    std::string reasoning_content;
-    std::string tool_name;
-    std::string tool_call_id;
+    std::vector<common_chat_tool_call>        tool_calls;
+    std::string                               reasoning_content;
+    std::string                               tool_name;
+    std::string                               tool_call_id;
 
     nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const;
 
     bool empty() const {
-        return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
+        return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() &&
+               tool_name.empty() && tool_call_id.empty();
     }
-    void set_tool_call_ids(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
+
+    void set_tool_call_ids(std::vector<std::string> &           ids_cache,
+                           const std::function<std::string()> & gen_tool_call_id) {
         for (auto i = 0u; i < tool_calls.size(); i++) {
             if (ids_cache.size() <= i) {
                 auto id = tool_calls[i].id;
@@ -64,32 +137,28 @@ struct common_chat_msg {
             tool_calls[i].id = ids_cache[i];
         }
     }
+
     bool operator==(const common_chat_msg & other) const {
-        return role == other.role
-            && content == other.content
-            && content_parts == other.content_parts
-            && tool_calls == other.tool_calls
-            && reasoning_content == other.reasoning_content
-            && tool_name == other.tool_name
-            && tool_call_id == other.tool_call_id;
-    }
-    bool operator!=(const common_chat_msg & other) const {
-        return !(*this == other);
+        return role == other.role && content == other.content && content_parts == other.content_parts &&
+               tool_calls == other.tool_calls && reasoning_content == other.reasoning_content &&
+               tool_name == other.tool_name && tool_call_id == other.tool_call_id;
     }
+
+    bool operator!=(const common_chat_msg & other) const { return !(*this == other); }
 };
 
 struct common_chat_msg_diff {
-    std::string reasoning_content_delta;
-    std::string content_delta;
-    size_t tool_call_index = std::string::npos;
+    std::string           reasoning_content_delta;
+    std::string           content_delta;
+    size_t                tool_call_index = std::string::npos;
     common_chat_tool_call tool_call_delta;
 
-    static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
+    static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv,
+                                                           const common_chat_msg & msg_new);
 
     bool operator==(const common_chat_msg_diff & other) const {
-        return content_delta == other.content_delta
-        && tool_call_index == other.tool_call_index
-        && tool_call_delta == other.tool_call_delta;
+        return content_delta == other.content_delta && tool_call_index == other.tool_call_index &&
+               tool_call_delta == other.tool_call_delta;
     }
 };
 
@@ -107,64 +176,37 @@ enum common_chat_tool_choice {
 
 enum common_chat_format {
     COMMON_CHAT_FORMAT_CONTENT_ONLY,
-    COMMON_CHAT_FORMAT_GENERIC,
-    COMMON_CHAT_FORMAT_MISTRAL_NEMO,
-    COMMON_CHAT_FORMAT_MAGISTRAL,
-    COMMON_CHAT_FORMAT_LLAMA_3_X,
-    COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-    COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
-    COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
-    COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
-    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-    COMMON_CHAT_FORMAT_HERMES_2_PRO,
-    COMMON_CHAT_FORMAT_COMMAND_R7B,
-    COMMON_CHAT_FORMAT_GRANITE,
-    COMMON_CHAT_FORMAT_GPT_OSS,
-    COMMON_CHAT_FORMAT_SEED_OSS,
-    COMMON_CHAT_FORMAT_NEMOTRON_V2,
-    COMMON_CHAT_FORMAT_APERTUS,
-    COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
-    COMMON_CHAT_FORMAT_GLM_4_5,
-    COMMON_CHAT_FORMAT_MINIMAX_M2,
-    COMMON_CHAT_FORMAT_KIMI_K2,
-    COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
-    COMMON_CHAT_FORMAT_APRIEL_1_5,
-    COMMON_CHAT_FORMAT_XIAOMI_MIMO,
-    COMMON_CHAT_FORMAT_SOLAR_OPEN,
-    COMMON_CHAT_FORMAT_EXAONE_MOE,
 
     // These are intended to be parsed by the PEG parser
     COMMON_CHAT_FORMAT_PEG_SIMPLE,
     COMMON_CHAT_FORMAT_PEG_NATIVE,
-    COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
 
-    COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
+    COMMON_CHAT_FORMAT_COUNT,  // Not a format, just the # formats
 };
 
 struct common_chat_templates_inputs {
-    std::vector<common_chat_msg> messages;
-    std::string grammar;
-    std::string json_schema;
-    bool add_generation_prompt = true;
-    bool use_jinja = true;
+    std::vector<common_chat_msg>          messages;
+    std::string                           grammar;
+    std::string                           json_schema;
+    bool                                  add_generation_prompt = true;
+    bool                                  use_jinja             = true;
     // Parameters below only supported when use_jinja is true
-    std::vector<common_chat_tool> tools;
-    common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
-    bool parallel_tool_calls = false;
+    std::vector<common_chat_tool>         tools;
+    common_chat_tool_choice               tool_choice         = COMMON_CHAT_TOOL_CHOICE_AUTO;
+    bool                                  parallel_tool_calls = false;
     common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
-    bool enable_thinking = true;
-    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
-    std::map<std::string, std::string> chat_template_kwargs;
-    bool add_bos = false;
-    bool add_eos = false;
+    bool                                  enable_thinking     = true;
+    std::chrono::system_clock::time_point now                 = std::chrono::system_clock::now();
+    std::map<std::string, std::string>    chat_template_kwargs;
+    bool                                  add_bos = false;
+    bool                                  add_eos = false;
 };
 
 struct common_chat_params {
     common_chat_format                  format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
     std::string                         prompt;
     std::string                         grammar;
-    bool                                grammar_lazy = false;
+    bool                                grammar_lazy         = false;
     bool                                thinking_forced_open = false;
     std::vector<common_grammar_trigger> grammar_triggers;
     std::vector<std::string>            preserved_tokens;
@@ -175,13 +217,14 @@ struct common_chat_params {
 // per-message parsing syntax
 // should be derived from common_chat_params
 struct common_chat_parser_params {
-    common_chat_format       format                = COMMON_CHAT_FORMAT_CONTENT_ONLY;
+    common_chat_format      format               = COMMON_CHAT_FORMAT_CONTENT_ONLY;
     common_reasoning_format  reasoning_format      = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
     // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
-    bool                     reasoning_in_content  = false;
-    bool                     thinking_forced_open  = false;
-    bool                     parse_tool_calls      = true;
-    common_peg_arena         parser                = {};
+    bool                    reasoning_in_content = false;
+    bool                    thinking_forced_open = false;
+    bool                    parse_tool_calls     = true;
+    bool                    debug                = false;  // Enable debug output for PEG parser
+    common_peg_arena        parser               = {};
     common_chat_parser_params() = default;
     common_chat_parser_params(const common_chat_params & chat_params) {
         format               = chat_params.format;
@@ -194,45 +237,42 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
 
 void common_chat_templates_free(struct common_chat_templates * tmpls);
 
-struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
+struct common_chat_templates_deleter {
+    void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); }
+};
 
 typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
 
-common_chat_templates_ptr common_chat_templates_init(
-                                    const struct llama_model * model,
-                                           const std::string & chat_template_override,
-                                           const std::string & bos_token_override = "",
-                                           const std::string & eos_token_override = "");
+common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model,
+                                                     const std::string &        chat_template_override,
+                                                     const std::string &        bos_token_override = "",
+                                                     const std::string &        eos_token_override = "");
 
 bool         common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
 std::string  common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");
 
-
-struct common_chat_params      common_chat_templates_apply(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs);
+struct common_chat_params common_chat_templates_apply(const struct common_chat_templates *        tmpls,
+                                                      const struct common_chat_templates_inputs & inputs);
 
 // Format single message, while taking into account the position of that message in chat history
-std::string common_chat_format_single(
-        const struct common_chat_templates * tmpls,
-        const std::vector<common_chat_msg> & past_msg,
-        const common_chat_msg & new_msg,
-        bool add_ass,
-        bool use_jinja);
+std::string common_chat_format_single(const struct common_chat_templates * tmpls,
+                                      const std::vector<common_chat_msg> & past_msg,
+                                      const common_chat_msg &              new_msg,
+                                      bool                                 add_ass,
+                                      bool                                 use_jinja);
 
 // Returns an example of formatted chat
-std::string common_chat_format_example(
-    const struct common_chat_templates * tmpls,
-    bool use_jinja,
-    const std::map<std::string, std::string> & chat_template_kwargs);
+std::string common_chat_format_example(const struct common_chat_templates *       tmpls,
+                                       bool                                       use_jinja,
+                                       const std::map<std::string, std::string> & chat_template_kwargs);
 
-const char*               common_chat_format_name(common_chat_format format);
-common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
-common_chat_msg           common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
+const char *            common_chat_format_name(common_chat_format format);
+common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & params);
+common_chat_msg           common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & params);
 
 // used by arg and server
-const char *             common_reasoning_format_name(common_reasoning_format format);
-common_reasoning_format  common_reasoning_format_from_name(const std::string & format);
+const char *            common_reasoning_format_name(common_reasoning_format format);
+common_reasoning_format common_reasoning_format_from_name(const std::string & format);
 
 common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
 
@@ -249,3 +289,10 @@ nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_
 
 // get template caps, useful for reporting to server /props endpoint
 std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates);
+
+std::string common_chat_template_direct_apply(
+    const common_chat_template & tmpl,
+    const struct templates_params & inputs,
+    const std::optional<json> & messages_override = std::nullopt,
+    const std::optional<json> & tools_override = std::nullopt,
+    const std::optional<json> & additional_context = std::nullopt);
diff --git a/common/jinja/caps.cpp b/common/jinja/caps.cpp
index f27490f1fb7..94ec2b6a2c8 100644
--- a/common/jinja/caps.cpp
+++ b/common/jinja/caps.cpp
@@ -1,3 +1,4 @@
+#include "log.h"
 #include "value.h"
 #include "runtime.h"
 #include "caps.h"
@@ -16,7 +17,7 @@ using json = nlohmann::ordered_json;
 namespace jinja {
 
 using caps_json_fn = std::function<json()>;
-using caps_analyze_fn = std::function<void(bool, value &, value &)>;
+using caps_analyze_fn = std::function<void(bool, value &, value &, const std::string &)>;
 
 static void caps_try_execute(jinja::program & prog,
                              const caps_json_fn & messages_fn,
@@ -36,16 +37,20 @@ static void caps_try_execute(jinja::program & prog,
     auto tools = ctx.get_val("tools");
 
     bool success = false;
+    std::string result;
     try {
         jinja::runtime runtime(ctx);
-        runtime.execute(prog);
+        auto results = runtime.execute(prog);
+        auto parts = jinja::runtime::gather_string_parts(results);
+        std::string result = parts->as_string().str();
         success = true;
     } catch (const std::exception & e) {
         JJ_DEBUG("Exception during execution: %s", e.what());
+        result = "";
         // ignore exceptions during capability analysis
     }
 
-    analyze_fn(success, messages, tools);
+    analyze_fn(success, messages, tools, result);
 }
 
 // for debugging only
@@ -105,7 +110,7 @@ caps caps_get(jinja::program & prog) {
             // tools
             return json{nullptr};
         },
-        [&](bool, value & messages, value &) {
+        [&](bool, value & messages, value &, const std::string &) {
             auto & content = messages->at(0)->at("content");
             caps_print_stats(content, "messages[0].content");
             if (has_op(content, "selectattr") || has_op(content, "array_access")) {
@@ -136,7 +141,7 @@ caps caps_get(jinja::program & prog) {
             // tools
             return json::array();
         },
-        [&](bool, value & messages, value &) {
+        [&](bool, value & messages, value &, const std::string &) {
             auto & content = messages->at(0)->at("content");
             caps_print_stats(content, "messages[0].content");
             if (!content->stats.used) {
@@ -160,7 +165,7 @@ caps caps_get(jinja::program & prog) {
                     {"content", "Assistant message"},
                     {"tool_calls", json::array({
                         {
-                            {"id", "call1"},
+                            {"id", "call00001"},
                             {"type", "function"},
                             {"function", {
                                 {"name", "tool1"},
@@ -170,10 +175,10 @@ caps caps_get(jinja::program & prog) {
                             }}
                         },
                         {
-                            {"id", "call2"},
+                            {"id", "call00002"},
                             {"type", "function"},
                             {"function", {
-                                {"name", "tool2"},
+                                {"name", "tool1"},
                                 {"arguments", {
                                     {"arg", "value"}
                                 }}
@@ -181,6 +186,15 @@ caps caps_get(jinja::program & prog) {
                         }
                     })}
                 },
+                {
+                    {"role", "tool"},
+                    {"content", "Tool response"},
+                    {"tool_call_id", "call00001"}
+                },
+                {
+                    {"role", "assistant"},
+                    {"content", "The tool response was 'tool response'"}
+                },
                 {
                     {"role", "user"},
                     {"content", "User message"},
@@ -194,7 +208,7 @@ caps caps_get(jinja::program & prog) {
                     {"name", "tool"},
                     {"type", "function"},
                     {"function", {
-                        {"name", "tool"},
+                        {"name", "tool1"},
                         {"description", "Tool description"},
                         {"parameters", {
                             {"type", "object"},
@@ -210,7 +224,7 @@ caps caps_get(jinja::program & prog) {
                 },
             });
         },
-        [&](bool success, value & messages, value & tools) {
+        [&](bool success, value & messages, value & tools, const std::string & res) {
             if (!success) {
                 result.supports_tool_calls = false;
                 result.supports_tools = false;
@@ -219,8 +233,11 @@ caps caps_get(jinja::program & prog) {
 
             auto & tool_name = tools->at(0)->at("function")->at("name");
             caps_print_stats(tool_name, "tools[0].function.name");
+            caps_print_stats(tools, "tools");
             if (!tool_name->stats.used) {
-                result.supports_tools = false;
+                if (!tools->stats.used && res.find(tool_name->as_string().str()) == std::string::npos) {
+                    result.supports_tools = false;
+                }
             }
 
             auto & tool_calls = messages->at(1)->at("tool_calls");;
@@ -263,7 +280,7 @@ caps caps_get(jinja::program & prog) {
             // tools
             return json::array();
         },
-        [&](bool, value & messages, value &) {
+        [&](bool, value & messages, value &, const std::string &) {
             auto & content = messages->at(1)->at("reasoning_content");
             caps_print_stats(content, "messages[1].reasoning_content");
             if (content->stats.used) {
diff --git a/common/jinja/value.cpp b/common/jinja/value.cpp
index 2aa156b1778..17d7eae764e 100644
--- a/common/jinja/value.cpp
+++ b/common/jinja/value.cpp
@@ -428,6 +428,22 @@ const func_builtins & global_builtins() {
             bool res = it != builtins.end();
             return mk_val<value_bool>(res);
         }},
+        {"test_is_in", [](const func_args & args) -> value {
+            args.ensure_count(2, 2);
+            value val_needle = args.get_pos(0);
+            value val_haystack = args.get_pos(1);
+            const auto & haystack = is_val<value_array>(val_haystack) ? val_haystack->as_array() : std::vector<value>(1, val_haystack);
+            for (auto it = haystack.cbegin(); it != haystack.cend(); it++) {
+                if ((*it)->type() == val_needle->type()) {
+                    if (is_val<value_string>(val_haystack) ?
+                        (*it)->as_string().str().find(val_needle->as_string().str()) != std::string::npos :
+                        value_compare(*it, val_needle, value_compare_op::eq)) {
+                        return mk_val<value_bool>(true);
+                    }
+                }
+            }
+            return mk_val<value_bool>(false);
+        }},
         {"test_is_sameas", [](const func_args & args) -> value {
             // Check if an object points to the same memory address as another object
             (void)args;
@@ -715,8 +731,26 @@ const func_builtins & value_string_t::get_builtins() const {
             return args.get_pos(0);
         }},
         {"tojson", tojson},
-        {"indent", [](const func_args &) -> value {
-            throw not_implemented_exception("String indent builtin not implemented");
+        {"indent", [](const func_args &args) -> value {
+            // no support for "first" as that would require us to somehow access generation context
+            args.ensure_count(2, 4);
+            args.ensure_vals<value_string, value_int, value_bool, value_bool>(true, true, false, false);
+
+            auto input = args.get_pos(0);
+            auto arg0 = args.get_pos(1);
+
+            int count = arg0->as_int();
+            if (count <= 0) {
+                throw raised_exception("indent must be a positive number");
+            }
+            std::string indented;
+            for (int i = 0; i < count; i++) {
+                indented.append(" ");
+            }
+            indented.append(input->as_string().str());
+            auto res = mk_val<value_string>(indented);
+            res->val_str.mark_input_based_on(input->as_string());
+            return res;
         }},
         {"join", [](const func_args &) -> value {
             throw not_implemented_exception("String join builtin not implemented");
diff --git a/common/jinja/value.h b/common/jinja/value.h
index 1c04760a08c..df3eeaf444f 100644
--- a/common/jinja/value.h
+++ b/common/jinja/value.h
@@ -502,12 +502,21 @@ struct value_object_t : public value_t {
     virtual bool is_immutable() const override { return false; }
     virtual const std::vector<std::pair<value, value>> & as_ordered_object() const override { return val_obj; }
     virtual string as_string() const override {
+        // Use JSON format for object string representation to ensure compatibility
+        // when concatenated in templates (e.g., '{"name": ' + arguments + '}')
         std::ostringstream ss;
         ss << "{";
         for (size_t i = 0; i < val_obj.size(); i++) {
             if (i > 0) ss << ", ";
             auto & [key, val] = val_obj.at(i);
-            ss << value_to_string_repr(key) << ": " << value_to_string_repr(val);
+            // Use double quotes for keys (JSON format)
+            ss << "\"" << key->as_string().str() << "\": ";
+            if (is_val<value_string>(val)) {
+                // Strings need to be quoted in JSON
+                ss << "\"" << val->as_string().str() << "\"";
+            } else {
+                ss << val->as_string().str();
+            }
         }
         ss << "}";
         return ss.str();
@@ -617,6 +626,8 @@ struct value_undefined_t : public value_t {
     value_undefined_t(const std::string & h = "") : hint(h) {}
     virtual std::string type() const override { return hint.empty() ? "Undefined" : "Undefined (hint: '" + hint + "')"; }
     virtual bool is_undefined() const override { return true; }
+    // note: some templates use "is none" as equivalent to "is undefined"
+    virtual bool is_none() const override { return true; }
     virtual bool as_bool() const override { return false; }
     virtual std::string as_repr() const override { return type(); }
     virtual const func_builtins & get_builtins() const override;
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
index 2f67c74d796..efd2c8ef952 100644
--- a/common/json-schema-to-grammar.cpp
+++ b/common/json-schema-to-grammar.cpp
@@ -27,11 +27,11 @@ static std::string build_repetition(const std::string & item_rule, int min_items
     if (separator_rule.empty()) {
         if (min_items == 1 && !has_max) {
             return item_rule + "+";
-        } else if (min_items == 0 && !has_max) {
+        } 
+        if (min_items == 0 && !has_max) {
             return item_rule + "*";
-        } else {
-            return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
-        }
+        } 
+        return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
     }
 
     auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items);
@@ -41,7 +41,7 @@ static std::string build_repetition(const std::string & item_rule, int min_items
     return result;
 }
 
-static void _build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
+static void build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
     auto has_min = min_value != std::numeric_limits<int64_t>::min();
     auto has_max = max_value != std::numeric_limits<int64_t>::max();
 
@@ -128,14 +128,14 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
     if (has_min && has_max) {
         if (min_value < 0 && max_value < 0) {
             out << "\"-\" (";
-            _build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
+            build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
             out << ")";
             return;
         }
 
         if (min_value < 0) {
             out << "\"-\" (";
-            _build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
+            build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
             out << ") | ";
             min_value = 0;
         }
@@ -159,7 +159,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
     if (has_min) {
         if (min_value < 0) {
             out << "\"-\" (";
-            _build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
+            build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
             out << ") | [0] | [1-9] ";
             more_digits(0, decimals_left - 1);
         } else if (min_value == 0) {
@@ -194,7 +194,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
             }
             digit_range(c, c);
             out << " (";
-            _build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
+            build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
             out << ")";
             if (c < '9') {
                 out << " | ";
@@ -213,10 +213,10 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
                 more_digits(0, less_decimals);
                 out << " | ";
             }
-            _build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
+            build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
         } else {
             out << "\"-\" (";
-            _build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
+            build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
             out << ")";
         }
         return;
@@ -232,7 +232,7 @@ struct BuiltinRule {
     std::vector<std::string> deps;
 };
 
-std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
+static std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
     {"boolean", {"(\"true\" | \"false\") space", {}}},
     {"decimal-part", {"[0-9]{1,16}", {}}},
     {"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}},
@@ -247,7 +247,7 @@ std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
     {"null", {"\"null\" space", {}}},
 };
 
-std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
+static std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
     {"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
     {"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
     {"date-time", {"date \"T\" time", {"date", "time"}}},
@@ -260,22 +260,26 @@ static bool is_reserved_name(const std::string & name) {
     static const std::unordered_set<std::string> RESERVED_NAMES = [] {
         std::unordered_set<std::string> s;
         s.insert("root");
-        for (const auto & p : PRIMITIVE_RULES) s.insert(p.first);
-        for (const auto & p : STRING_FORMAT_RULES) s.insert(p.first);
+        for (const auto & p : PRIMITIVE_RULES) {
+            s.insert(p.first);
+        }
+        for (const auto & p : STRING_FORMAT_RULES) {
+            s.insert(p.first);
+        }
         return s;
     }();
     return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
 }
 
-std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
-std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
-std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
-std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
+static std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
+static std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
+static std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
+static std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
     {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}, {'\\', "\\\\"}
 };
 
-std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
-std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
+static std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
+static std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
 
 static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch  &)> & replacement) {
     std::smatch match;
@@ -322,19 +326,19 @@ class common_schema_converter {
         if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) {
             _rules[esc_name] = rule;
             return esc_name;
-        } else {
-            int i = 0;
-            while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
-                i++;
-            }
-            std::string key = esc_name + std::to_string(i);
-            _rules[key] = rule;
-            return key;
         }
+        int i = 0;
+        while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
+            i++;
+        }
+        std::string key = esc_name + std::to_string(i);
+        _rules[key] = rule;
+        return key;
     }
 
     std::string _generate_union_rule(const std::string & name, const std::vector<json> & alt_schemas) {
         std::vector<std::string> rules;
+        rules.reserve(alt_schemas.size());
         for (size_t i = 0; i < alt_schemas.size(); i++) {
             rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
         }
@@ -398,6 +402,7 @@ class common_schema_converter {
                 flush_literal();
 
                 std::vector<std::string> results;
+                results.reserve(ret.size());
                 for (const auto & item : ret) {
                     results.push_back(to_rule(item));
                 }
@@ -551,7 +556,7 @@ class common_schema_converter {
             TrieNode() : is_end_of_string(false) {}
 
             void insert(const std::string & string) {
-                auto node = this;
+                auto *node = this;
                 for (char c : string) {
                     node = &node->children[c];
                 }
@@ -676,7 +681,7 @@ class common_schema_converter {
                 if (ks.empty()) {
                     return res;
                 }
-                std::string k = ks[0];
+                const std::string& k = ks[0];
                 std::string kv_rule_name = prop_kv_rule_names[k];
                 std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
                 if (first_is_optional) {
@@ -779,7 +784,7 @@ class common_schema_converter {
                         std::string pointer = ref.substr(ref.find('#') + 1);
                         std::vector<std::string> tokens = string_split(pointer, "/");
                         for (size_t i = 1; i < tokens.size(); ++i) {
-                            std::string sel = tokens[i];
+                            const std::string& sel = tokens[i];
                             if (target.is_object() && target.contains(sel)) {
                                 target = target[sel];
                             } else if (target.is_array()) {
@@ -802,7 +807,7 @@ class common_schema_converter {
                         _refs[ref] = target;
                     }
                 } else {
-                    for (auto & kv : n.items()) {
+                    for (const auto & kv : n.items()) {
                         visit_refs(kv.value());
                     }
                 }
@@ -812,7 +817,7 @@ class common_schema_converter {
         visit_refs(schema);
     }
 
-    std::string _generate_constant_rule(const json & value) {
+    static std::string _generate_constant_rule(const json & value) {
         return format_literal(value.dump());
     }
 
@@ -823,10 +828,12 @@ class common_schema_converter {
 
         if (schema.contains("$ref")) {
             return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
-        } else if (schema.contains("oneOf") || schema.contains("anyOf")) {
+        } 
+        if (schema.contains("oneOf") || schema.contains("anyOf")) {
             std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
             return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
-        } else if (schema_type.is_array()) {
+        } 
+        if (schema_type.is_array()) {
             std::vector<json> schema_types;
             for (const auto & t : schema_type) {
                 json schema_copy(schema);
@@ -834,15 +841,18 @@ class common_schema_converter {
                 schema_types.push_back(schema_copy);
             }
             return _add_rule(rule_name, _generate_union_rule(name, schema_types));
-        } else if (schema.contains("const")) {
+        } 
+        if (schema.contains("const")) {
             return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
-        } else if (schema.contains("enum")) {
+        } 
+        if (schema.contains("enum")) {
             std::vector<std::string> enum_values;
             for (const auto & v : schema["enum"]) {
                 enum_values.push_back(_generate_constant_rule(v));
             }
             return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space");
-        } else if ((schema_type.is_null() || schema_type == "object")
+        } 
+        if ((schema_type.is_null() || schema_type == "object")
                 && (schema.contains("properties") ||
                     (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
             std::unordered_set<std::string> required;
@@ -863,11 +873,12 @@ class common_schema_converter {
                 _build_object_rule(
                     properties, required, name,
                     schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
-        } else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
+        } 
+        if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
             std::unordered_set<std::string> required;
             std::vector<std::pair<std::string, json>> properties;
             std::map<std::string, size_t> enum_values;
-            std::string hybrid_name = name;
+            const std::string& hybrid_name = name;
             std::function<void(const json &, bool)> add_component = [&](const json & comp_schema, bool is_required) {
                 if (comp_schema.contains("$ref")) {
                     add_component(_refs[comp_schema["$ref"]], is_required);
@@ -890,9 +901,9 @@ class common_schema_converter {
                   // todo warning
                 }
             };
-            for (auto & t : schema["allOf"]) {
+            for (const auto & t : schema["allOf"]) {
                 if (t.contains("anyOf")) {
-                    for (auto & tt : t["anyOf"]) {
+                    for (const auto & tt : t["anyOf"]) {
                         add_component(tt, false);
                     }
                 } else {
@@ -911,7 +922,8 @@ class common_schema_converter {
                 }
             }
             return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
-        } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
+        } 
+        if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
             json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
             if (items.is_array()) {
                 std::string rule = "\"[\" space ";
@@ -923,27 +935,31 @@ class common_schema_converter {
                 }
                 rule += " \"]\" space";
                 return _add_rule(rule_name, rule);
-            } else {
-                std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
-                int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
-                json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
-                int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
-
-                return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
             }
-        } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
+            std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
+            int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
+            json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
+            int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
+
+            return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
+        } 
+        if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
             return _visit_pattern(schema["pattern"], rule_name);
-        } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
+        } 
+        if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
             return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
-        } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
+        } 
+        if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
             auto prim_name = schema_format + "-string";
             return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name)));
-        } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
+        } 
+        if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
             std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
             int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
             int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
             return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
-        } else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
+        } 
+        if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
             int64_t min_value = std::numeric_limits<int64_t>::min();
             int64_t max_value = std::numeric_limits<int64_t>::max();
             if (schema.contains("minimum")) {
@@ -958,19 +974,19 @@ class common_schema_converter {
             }
             std::stringstream out;
             out << "(";
-            _build_min_max_int(min_value, max_value, out);
+            build_min_max_int(min_value, max_value, out);
             out << ") space";
             return _add_rule(rule_name, out.str());
-        } else if (schema.empty() || schema_type == "object") {
+        } 
+        if (schema.empty() || schema_type == "object") {
             return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
-        } else {
-            if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
-                _errors.push_back("Unrecognized schema: " + schema.dump());
-                return "";
-            }
-            // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
-            return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
+        } 
+        if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
+            _errors.push_back("Unrecognized schema: " + schema.dump());
+            return "";
         }
+        // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
+        return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
     }
 
     void check_errors() {
@@ -985,7 +1001,7 @@ class common_schema_converter {
     std::string format_grammar() {
         std::stringstream ss;
         for (const auto & kv : _rules) {
-            ss << kv.first << " ::= " << kv.second << std::endl;
+            ss << kv.first << " ::= " << kv.second << '\n';
         }
         return ss.str();
     }
diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp
index f2fc84500f7..7a4c1cc3983 100644
--- a/common/peg-parser.cpp
+++ b/common/peg-parser.cpp
@@ -1,28 +1,32 @@
-#include "common.h"
 #include "peg-parser.h"
+
+#include "common.h"
 #include "json-schema-to-grammar.h"
+#include "log.h"
 #include "unicode.h"
 
-#include <nlohmann/json.hpp>
-
 #include <algorithm>
 #include <initializer_list>
 #include <map>
 #include <memory>
+#include <nlohmann/json.hpp>
 #include <regex>
 #include <stdexcept>
 #include <unordered_set>
 
 // Trick to catch missing branches
-template <typename T>
-inline constexpr bool is_always_false_v = false;
+template <typename T> inline constexpr bool is_always_false_v = false;
 
 const char * common_peg_parse_result_type_name(common_peg_parse_result_type type) {
     switch (type) {
-        case COMMON_PEG_PARSE_RESULT_FAIL:            return "fail";
-        case COMMON_PEG_PARSE_RESULT_SUCCESS:         return "success";
-        case COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT: return "need_more_input";
-        default:                                      return "unknown";
+        case COMMON_PEG_PARSE_RESULT_FAIL:
+            return "fail";
+        case COMMON_PEG_PARSE_RESULT_SUCCESS:
+            return "success";
+        case COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT:
+            return "need_more_input";
+        default:
+            return "unknown";
     }
 }
 
@@ -34,81 +38,88 @@ static bool is_hex_digit(const char c) {
 // This is used in common_peg_until_parser and to build a GBNF exclusion grammar
 struct trie {
     struct node {
-        size_t depth = 0;
-        std::map<unsigned char, size_t> children;
-        bool is_word;
+        std::map<uint32_t, size_t> children;
+        bool                       is_word = false;
     };
 
     std::vector<node> nodes;
 
     trie(const std::vector<std::string> & words) {
-      create_node(); // root node
-      for (const auto & w : words) {
-          insert(w);
-      }
+        create_node();  // root node
+        for (const auto & w : words) {
+            insert(w);
+        }
     }
 
     enum match_result { NO_MATCH, PARTIAL_MATCH, COMPLETE_MATCH };
 
     // Check if a delimiter starts at the given position
     match_result check_at(std::string_view sv, size_t start_pos) const {
-        size_t current = 0; // Start at root
-        size_t pos = start_pos;
+        size_t current = 0;  // Start at root
+        size_t pos     = start_pos;
+
+        // LOG_DBG("%s: checking at pos %zu, sv='%s'\n", __func__, start_pos, std::string(sv).c_str());
 
         while (pos < sv.size()) {
-            auto it = nodes[current].children.find(sv[pos]);
+            auto result = parse_utf8_codepoint(sv, pos);
+            if (result.status != utf8_parse_result::SUCCESS) {
+                break;
+            }
+
+            auto it = nodes[current].children.find(result.codepoint);
             if (it == nodes[current].children.end()) {
                 // Can't continue matching
-                return match_result{match_result::NO_MATCH};
+                return match_result{ match_result::NO_MATCH };
             }
 
             current = it->second;
-            pos++;
+            pos += result.bytes_consumed;
 
             // Check if we've matched a complete word
             if (nodes[current].is_word) {
-                return match_result{match_result::COMPLETE_MATCH};
+                // LOG_DBG("%s: complete match found at pos %zu\n", __func__, pos);
+                return match_result{ match_result::COMPLETE_MATCH };
             }
         }
 
         // Reached end of input while still in the trie (not at root)
         if (current != 0) {
             // We're in the middle of a potential match
-            return match_result{match_result::PARTIAL_MATCH};
+            return match_result{ match_result::PARTIAL_MATCH };
         }
 
         // Reached end at root (no match)
-        return match_result{match_result::NO_MATCH};
+        return match_result{ match_result::NO_MATCH };
     }
 
     struct prefix_and_next {
-        std::string prefix;
-        std::string next_chars;
+        std::vector<uint32_t> prefix;
+        std::vector<uint32_t> next_chars;
     };
 
     std::vector<prefix_and_next> collect_prefix_and_next() {
-        std::string prefix;
+        std::vector<uint32_t>        prefix;
         std::vector<prefix_and_next> result;
         collect_prefix_and_next(0, prefix, result);
         return result;
     }
 
   private:
-    void collect_prefix_and_next(size_t index, std::string & prefix, std::vector<prefix_and_next> & out) {
+    void collect_prefix_and_next(size_t index, std::vector<uint32_t> & prefix, std::vector<prefix_and_next> & out) {
         if (!nodes[index].is_word) {
             if (!nodes[index].children.empty()) {
-                std::string chars;
+                std::vector<uint32_t> chars;
                 chars.reserve(nodes[index].children.size());
                 for (const auto & p : nodes[index].children) {
                     chars.push_back(p.first);
                 }
-                out.emplace_back(prefix_and_next{prefix, chars});
+                out.emplace_back(prefix_and_next{ prefix, chars });
             }
         }
 
         for (const auto & p : nodes[index].children) {
-            unsigned char ch = p.first;
-            auto child = p.second;
+            uint32_t ch    = p.first;
+            auto     child = p.second;
             prefix.push_back(ch);
             collect_prefix_and_next(child, prefix, out);
             prefix.pop_back();
@@ -123,13 +134,21 @@ struct trie {
 
     void insert(const std::string & word) {
         size_t current = 0;
-        for (unsigned char ch : word) {
+        size_t pos     = 0;
+        while (pos < word.length()) {
+            auto result = parse_utf8_codepoint(word, pos);
+            if (result.status != utf8_parse_result::SUCCESS) {
+                break;
+            }
+
+            uint32_t ch = result.codepoint;
+            pos += result.bytes_consumed;
+
             auto it = nodes[current].children.find(ch);
             if (it == nodes[current].children.end()) {
-                size_t child = create_node();
-                nodes[child].depth = nodes[current].depth + 1;
+                size_t child                = create_node();
                 nodes[current].children[ch] = child;
-                current = child;
+                current                     = child;
             } else {
                 current = it->second;
             }
@@ -140,14 +159,14 @@ struct trie {
 
 static std::pair<uint32_t, size_t> parse_hex_escape(const std::string & str, size_t pos, int hex_count) {
     if (pos + hex_count > str.length()) {
-        return {0, 0};
+        return { 0, 0 };
     }
 
     uint32_t value = 0;
     for (int i = 0; i < hex_count; i++) {
         char c = str[pos + i];
         if (!is_hex_digit(c)) {
-            return {0, 0};
+            return { 0, 0 };
         }
         value <<= 4;
         if ('a' <= c && c <= 'f') {
@@ -160,53 +179,64 @@ static std::pair<uint32_t, size_t> parse_hex_escape(const std::string & str, siz
             break;
         }
     }
-    return {value, static_cast<size_t>(hex_count)};
+    return { value, static_cast<size_t>(hex_count) };
 }
 
 static std::pair<uint32_t, size_t> parse_char_class_char(const std::string & content, size_t pos) {
     if (content[pos] == '\\' && pos + 1 < content.length()) {
         switch (content[pos + 1]) {
-            case 'x': {
-                auto result = parse_hex_escape(content, pos + 2, 2);
-                if (result.second > 0) {
-                    return {result.first, 2 + result.second};
+            case 'x':
+                {
+                    auto result = parse_hex_escape(content, pos + 2, 2);
+                    if (result.second > 0) {
+                        return { result.first, 2 + result.second };
+                    }
+                    // Invalid escape, treat as literal 'x'
+                    return { static_cast<uint32_t>('x'), 2 };
                 }
-                // Invalid escape, treat as literal 'x'
-                return {static_cast<uint32_t>('x'), 2};
-            }
-            case 'u': {
-                auto result = parse_hex_escape(content, pos + 2, 4);
-                if (result.second > 0) {
-                    return {result.first, 2 + result.second};
+            case 'u':
+                {
+                    auto result = parse_hex_escape(content, pos + 2, 4);
+                    if (result.second > 0) {
+                        return { result.first, 2 + result.second };
+                    }
+                    // Invalid escape, treat as literal 'u'
+                    return { static_cast<uint32_t>('u'), 2 };
                 }
-                // Invalid escape, treat as literal 'u'
-                return {static_cast<uint32_t>('u'), 2};
-            }
-            case 'U': {
-                auto result = parse_hex_escape(content, pos + 2, 8);
-                if (result.second > 0) {
-                    return {result.first, 2 + result.second};
+            case 'U':
+                {
+                    auto result = parse_hex_escape(content, pos + 2, 8);
+                    if (result.second > 0) {
+                        return { result.first, 2 + result.second };
+                    }
+                    // Invalid escape, treat as literal 'U'
+                    return { static_cast<uint32_t>('U'), 2 };
                 }
-                // Invalid escape, treat as literal 'U'
-                return {static_cast<uint32_t>('U'), 2};
-            }
-            case 'n':  return {'\n', 2};
-            case 't':  return {'\t', 2};
-            case 'r':  return {'\r', 2};
-            case '\\': return {'\\', 2};
-            case ']':  return {']', 2};
-            case '[':  return {'[', 2};
-            default:   return {static_cast<uint32_t>(content[pos + 1]), 2};
+            case 'n':
+                return { '\n', 2 };
+            case 't':
+                return { '\t', 2 };
+            case 'r':
+                return { '\r', 2 };
+            case '\\':
+                return { '\\', 2 };
+            case ']':
+                return { ']', 2 };
+            case '[':
+                return { '[', 2 };
+            default:
+                return { static_cast<uint32_t>(content[pos + 1]), 2 };
         }
     }
 
     // Regular character - return as codepoint
-    return {static_cast<uint32_t>(static_cast<unsigned char>(content[pos])), 1};
+    return { static_cast<uint32_t>(static_cast<unsigned char>(content[pos])), 1 };
 }
 
-static std::pair<std::vector<common_peg_chars_parser::char_range>, bool> parse_char_classes(const std::string & classes) {
+static std::pair<std::vector<common_peg_chars_parser::char_range>, bool> parse_char_classes(
+    const std::string & classes) {
     std::vector<common_peg_chars_parser::char_range> ranges;
-    bool negated = false;
+    bool                                             negated = false;
 
     std::string content = classes;
     if (content.front() == '[') {
@@ -231,14 +261,14 @@ static std::pair<std::vector<common_peg_chars_parser::char_range>, bool> parse_c
         if (i + 1 < content.length() && content[i] == '-') {
             // Range detected
             auto [end, end_len] = parse_char_class_char(content, i + 1);
-            ranges.push_back(common_peg_chars_parser::char_range{start, end});
+            ranges.push_back(common_peg_chars_parser::char_range{ start, end });
             i += 1 + end_len;
         } else {
-            ranges.push_back(common_peg_chars_parser::char_range{start, start});
+            ranges.push_back(common_peg_chars_parser::char_range{ start, start });
         }
     }
 
-    return {ranges, negated};
+    return { ranges, negated };
 }
 
 void common_peg_ast_arena::visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const {
@@ -279,29 +309,53 @@ common_peg_parser_id common_peg_arena::get_rule(const std::string & name) const
 }
 
 struct parser_executor {
-    const common_peg_arena & arena;
+    const common_peg_arena &   arena;
     common_peg_parse_context & ctx;
-    size_t start_pos;
+    size_t                     start_pos;
+
+    parser_executor(const common_peg_arena & arena, common_peg_parse_context & ctx, size_t start) :
+        arena(arena),
+        ctx(ctx),
+        start_pos(start) {}
 
-    parser_executor(const common_peg_arena & arena, common_peg_parse_context & ctx, size_t start)
-        : arena(arena), ctx(ctx), start_pos(start) {}
+    std::string debug_indent() const { return std::string(ctx.parse_depth * 2, ' '); }
+
+    std::string debug_input_snippet(size_t pos, size_t len = 60) const {
+        if (pos >= ctx.input.size()) {
+            return "<EOF>";
+        }
+        auto        snippet = ctx.input.substr(pos, len);
+        // Escape newlines for display
+        std::string result;
+        for (char c : snippet) {
+            if (c == '\n') {
+                result += "\\n";
+            } else if (c == '\r') {
+                result += "\\r";
+            } else if (c == '\t') {
+                result += "\\t";
+            } else {
+                result += c;
+            }
+        }
+        if (pos + len < ctx.input.size()) {
+            result += "...";
+        }
+        return result;
+    }
 
     common_peg_parse_result operator()(const common_peg_epsilon_parser & /* p */) const {
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos);
     }
 
     common_peg_parse_result operator()(const common_peg_start_parser & /* p */) const {
-        return common_peg_parse_result(
-            start_pos == 0 ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL,
-            start_pos
-        );
+        return common_peg_parse_result(start_pos == 0 ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL,
+                                       start_pos);
     }
 
     common_peg_parse_result operator()(const common_peg_end_parser & /* p */) const {
         return common_peg_parse_result(
-            start_pos >= ctx.input.size() ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL,
-            start_pos
-        );
+            start_pos >= ctx.input.size() ? COMMON_PEG_PARSE_RESULT_SUCCESS : COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
     }
 
     common_peg_parse_result operator()(const common_peg_literal_parser & p) {
@@ -323,12 +377,39 @@ struct parser_executor {
     }
 
     common_peg_parse_result operator()(const common_peg_sequence_parser & p) {
-        auto pos = start_pos;
+        if (ctx.debug) {
+            LOG_DBG("%sSEQ start at %zu '%s' (%zu children)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.children.size());
+        }
+        ctx.parse_depth++;
+
+        auto                           pos = start_pos;
         std::vector<common_peg_ast_id> nodes;
 
-        for (const auto & child_id : p.children) {
+        for (size_t i = 0; i < p.children.size(); i++) {
+            const auto & child_id = p.children[i];
+            if (ctx.debug) {
+                fprintf(stderr, "%sSEQ child %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str());
+            }
             auto result = arena.parse(child_id, ctx, pos);
+
+            if (ctx.debug) {
+                fprintf(stderr, "%sSEQ child %zu: %s at %zu->%zu\n", debug_indent().c_str(), i,
+                        common_peg_parse_result_type_name(result.type), result.start, result.end);
+            }
+
             if (result.fail()) {
+                ctx.parse_depth--;
+                if (ctx.is_partial && result.end >= ctx.input.size()) {
+                    if (ctx.debug) {
+                        fprintf(stderr, "%sSEQ -> NEED_MORE (child failed at end)\n", debug_indent().c_str());
+                    }
+                    return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end,
+                                                   std::move(nodes));
+                }
+                if (ctx.debug) {
+                    fprintf(stderr, "%sSEQ -> FAIL\n", debug_indent().c_str());
+                }
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, result.end);
             }
 
@@ -337,43 +418,93 @@ struct parser_executor {
             }
 
             if (result.need_more_input()) {
-                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes));
+                ctx.parse_depth--;
+                if (ctx.debug) {
+                    fprintf(stderr, "%sSEQ -> NEED_MORE\n", debug_indent().c_str());
+                }
+                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end,
+                                               std::move(nodes));
             }
 
             pos = result.end;
         }
 
+        ctx.parse_depth--;
+        if (ctx.debug) {
+            fprintf(stderr, "%sSEQ -> SUCCESS at %zu->%zu\n", debug_indent().c_str(), start_pos, pos);
+        }
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes));
     }
 
     common_peg_parse_result operator()(const common_peg_choice_parser & p) {
+        if (ctx.debug) {
+            fprintf(stderr, "%sCHOICE start at %zu '%s' (%zu options)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.children.size());
+        }
+        ctx.parse_depth++;
+
         auto pos = start_pos;
-        for (const auto & child_id : p.children) {
+        for (size_t i = 0; i < p.children.size(); i++) {
+            const auto & child_id = p.children[i];
+            if (ctx.debug) {
+                fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str());
+            }
             auto result = arena.parse(child_id, ctx, pos);
+            if (ctx.debug) {
+                fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i,
+                        common_peg_parse_result_type_name(result.type));
+            }
             if (!result.fail()) {
+                ctx.parse_depth--;
+                if (ctx.debug) {
+                    fprintf(stderr, "%sCHOICE -> %s (option %zu)\n", debug_indent().c_str(),
+                            common_peg_parse_result_type_name(result.type), i);
+                }
                 return result;
             }
         }
 
+        ctx.parse_depth--;
+        if (ctx.debug) {
+            fprintf(stderr, "%sCHOICE -> FAIL (no options matched)\n", debug_indent().c_str());
+        }
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
     }
 
     common_peg_parse_result operator()(const common_peg_repetition_parser & p) {
-        auto pos = start_pos;
-        int match_count = 0;
+        if (ctx.debug) {
+            fprintf(stderr, "%sREPEAT start at %zu '%s' (min=%d, max=%d)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.min_count, p.max_count);
+        }
+        ctx.parse_depth++;
+
+        auto                           pos         = start_pos;
+        int                            match_count = 0;
         std::vector<common_peg_ast_id> nodes;
 
         // Try to match up to max_count times (or unlimited if max_count is -1)
         while (p.max_count == -1 || match_count < p.max_count) {
             if (pos >= ctx.input.size()) {
+                if (ctx.debug) {
+                    fprintf(stderr, "%sREPEAT: at end of input, count=%d\n", debug_indent().c_str(), match_count);
+                }
                 break;
             }
 
             auto result = arena.parse(p.child, ctx, pos);
 
+            if (ctx.debug) {
+                fprintf(stderr, "%sREPEAT iter %d: %s at %zu->%zu, nodes=%zu\n", debug_indent().c_str(), match_count,
+                        common_peg_parse_result_type_name(result.type), result.start, result.end, result.nodes.size());
+                fprintf(stderr, "%sREPEAT CHILD: %s\n", debug_indent().c_str(), arena.dump(p.child).c_str());
+            }
+
             if (result.success()) {
                 // Prevent infinite loop on empty matches
                 if (result.end == pos) {
+                    if (ctx.debug) {
+                        fprintf(stderr, "%s  REPEAT: empty match, stopping\n", debug_indent().c_str());
+                    }
                     break;
                 }
 
@@ -391,21 +522,45 @@ struct parser_executor {
                     nodes.insert(nodes.end(), result.nodes.begin(), result.nodes.end());
                 }
 
-                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes));
+                ctx.parse_depth--;
+                if (ctx.debug) {
+                    fprintf(stderr, "%sREPEAT -> NEED_MORE (count=%d, nodes=%zu)\n", debug_indent().c_str(),
+                            match_count, nodes.size());
+                }
+                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end,
+                                               std::move(nodes));
             }
 
             // Child failed - stop trying
+            if (ctx.debug) {
+                fprintf(stderr, "%sREPEAT: child failed, stopping\n", debug_indent().c_str());
+            }
             break;
         }
 
         // Check if we got enough matches
         if (p.min_count > 0 && match_count < p.min_count) {
+            ctx.parse_depth--;
             if (pos >= ctx.input.size() && ctx.is_partial) {
-                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos, std::move(nodes));
+                if (ctx.debug) {
+                    fprintf(stderr, "%sREPEAT -> NEED_MORE (not enough matches: %d < %d)\n", debug_indent().c_str(),
+                            match_count, p.min_count);
+                }
+                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos,
+                                               std::move(nodes));
+            }
+            if (ctx.debug) {
+                fprintf(stderr, "%sREPEAT -> FAIL (not enough matches: %d < %d)\n", debug_indent().c_str(), match_count,
+                        p.min_count);
             }
             return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos);
         }
 
+        ctx.parse_depth--;
+        if (ctx.debug) {
+            fprintf(stderr, "%sREPEAT -> SUCCESS (count=%d, nodes=%zu)\n", debug_indent().c_str(), match_count,
+                    nodes.size());
+        }
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes));
     }
 
@@ -463,8 +618,8 @@ struct parser_executor {
     }
 
     common_peg_parse_result operator()(const common_peg_chars_parser & p) const {
-        auto pos = start_pos;
-        int match_count = 0;
+        auto pos         = start_pos;
+        int  match_count = 0;
 
         // Try to match up to max_count times (or unlimited if max_count is -1)
         while (p.max_count == -1 || match_count < p.max_count) {
@@ -527,7 +682,7 @@ struct parser_executor {
     }
 
     static common_peg_parse_result handle_escape_sequence(common_peg_parse_context & ctx, size_t start, size_t & pos) {
-        ++pos; // consume '\'
+        ++pos;  // consume '\'
         if (pos >= ctx.input.size()) {
             if (!ctx.is_partial) {
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start);
@@ -537,6 +692,7 @@ struct parser_executor {
 
         switch (ctx.input[pos]) {
             case '"':
+            case '\'':
             case '\\':
             case '/':
             case 'b':
@@ -555,7 +711,7 @@ struct parser_executor {
     }
 
     static common_peg_parse_result handle_unicode_escape(common_peg_parse_context & ctx, size_t start, size_t & pos) {
-        ++pos; // consume 'u'
+        ++pos;  // consume 'u'
         for (int i = 0; i < 4; ++i) {
             if (pos >= ctx.input.size()) {
                 if (!ctx.is_partial) {
@@ -613,11 +769,53 @@ struct parser_executor {
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
     }
 
+    common_peg_parse_result operator()(const common_peg_python_dict_string_parser & /* p */) {
+        auto pos = start_pos;
+
+        // Parse string content (without quotes)
+        while (pos < ctx.input.size()) {
+            char c = ctx.input[pos];
+
+            if (c == '\'') {
+                // Found closing quote - success (don't consume it)
+                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos);
+            }
+
+            if (c == '\\') {
+                auto result = handle_escape_sequence(ctx, start_pos, pos);
+                if (!result.success()) {
+                    return result;
+                }
+            } else {
+                auto utf8_result = parse_utf8_codepoint(ctx.input, pos);
+
+                if (utf8_result.status == utf8_parse_result::INCOMPLETE) {
+                    if (!ctx.is_partial) {
+                        return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
+                    }
+                    return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
+                }
+
+                if (utf8_result.status == utf8_parse_result::INVALID) {
+                    return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
+                }
+
+                pos += utf8_result.bytes_consumed;
+            }
+        }
+
+        // Reached end without finding closing quote
+        if (!ctx.is_partial) {
+            return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos);
+        }
+        return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
+    }
+
     common_peg_parse_result operator()(const common_peg_until_parser & p) const {
         trie matcher(p.delimiters);
 
         // Scan input and check for delimiters
-        size_t pos = start_pos;
+        size_t pos            = start_pos;
         size_t last_valid_pos = start_pos;
 
         while (pos < ctx.input.size()) {
@@ -638,16 +836,12 @@ struct parser_executor {
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
             }
 
-            // Check if a delimiter starts at this position
             auto match = matcher.check_at(ctx.input, pos);
-
             if (match == trie::COMPLETE_MATCH) {
-                // Found a complete delimiter, return everything before it
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos);
             }
 
             if (match == trie::PARTIAL_MATCH) {
-                // Found a partial match extending to end of input, return everything before it
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos);
             }
 
@@ -673,18 +867,12 @@ struct parser_executor {
         if (!result.fail()) {
             std::string_view text;
             if (result.start < ctx.input.size()) {
-                text = std::string_view(ctx.input).substr(result.start, result.end - result.start);
+                text = std::string_view(ctx.input).substr(
+                    result.start, std::min(result.end - result.start, ctx.input.size() - result.start));
             }
 
-            auto node_id = ctx.ast.add_node(
-                p.name,
-                "",
-                result.start,
-                result.end,
-                text,
-                std::move(result.nodes),
-                result.need_more_input()
-            );
+            auto node_id = ctx.ast.add_node(p.name, "", result.start, result.end, text, std::move(result.nodes),
+                                            result.need_more_input());
 
             return common_peg_parse_result(result.type, result.start, result.end, { node_id });
         }
@@ -694,6 +882,9 @@ struct parser_executor {
 
     common_peg_parse_result operator()(const common_peg_tag_parser & p) {
         // Parse the child
+        if (ctx.debug) {
+            fprintf(stderr, "%sTAG: %s\n", debug_indent().c_str(), p.tag.c_str());
+        }
         auto result = arena.parse(p.child, ctx, start_pos);
 
         if (!result.fail()) {
@@ -702,15 +893,8 @@ struct parser_executor {
                 text = std::string_view(ctx.input).substr(result.start, result.end - result.start);
             }
 
-            auto node_id = ctx.ast.add_node(
-                "",
-                p.tag,
-                result.start,
-                result.end,
-                text,
-                std::move(result.nodes),
-                result.need_more_input()
-            );
+            auto node_id = ctx.ast.add_node("", p.tag, result.start, result.end, text, std::move(result.nodes),
+                                            result.need_more_input());
 
             return common_peg_parse_result(result.type, result.start, result.end, { node_id });
         }
@@ -740,60 +924,90 @@ common_peg_parse_result common_peg_arena::parse(common_peg_parse_context & ctx,
     return parse(root_, ctx, start);
 }
 
-common_peg_parse_result common_peg_arena::parse(common_peg_parser_id id, common_peg_parse_context & ctx, size_t start) const {
+common_peg_parse_result common_peg_arena::parse(common_peg_parser_id       id,
+                                                common_peg_parse_context & ctx,
+                                                size_t                     start) const {
     // Execute parser
-    const auto & parser = parsers_.at(id);
+    const auto &    parser = parsers_.at(id);
     parser_executor exec(*this, ctx, start);
     return std::visit(exec, parser);
 }
 
 common_peg_parser_id common_peg_arena::resolve_ref(common_peg_parser_id id) {
     const auto & parser = parsers_.at(id);
-    if (auto ref = std::get_if<common_peg_ref_parser>(&parser)) {
+    if (const auto *ref = std::get_if<common_peg_ref_parser>(&parser)) {
         return get_rule(ref->name);
     }
     return id;
 }
 
+static void bfs_node(common_peg_ast_arena &arena, std::ostringstream & oss, const common_peg_ast_node & node, int indent) {
+    for (int i = 0; i < indent; i++) {
+        oss << "  ";
+    }
+    oss << "NODE " << node.id;
+    if (!node.rule.empty()) {
+        oss << " (rule " << node.rule << ")";
+    }
+    if (!node.tag.empty()) {
+        oss << " (tag " << node.tag << ")";
+    }
+    oss << " ['" << node.text << "']\n";
+    for (const auto child : node.children) {
+        bfs_node(arena, oss, arena.get(child), indent + 1);
+    }
+}
+
+std::string common_peg_ast_arena::dump() {
+    std::ostringstream oss;
+    for (auto & node : nodes_) {
+        bfs_node(*this, oss, node, 0);
+    }
+    return oss.str();
+}
+
 void common_peg_arena::resolve_refs() {
     // Walk through all parsers and replace refs with their corresponding rule IDs
     for (auto & parser : parsers_) {
-        std::visit([this](auto & p) {
-            using T = std::decay_t<decltype(p)>;
+        std::visit(
+            [this](auto & p) {
+                using T = std::decay_t<decltype(p)>;
 
-            if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
-                for (auto & child : p.children) {
-                    child = resolve_ref(child);
-                }
-            } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
-                for (auto & child : p.children) {
-                    child = resolve_ref(child);
+                if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
+                    for (auto & child : p.children) {
+                        child = resolve_ref(child);
+                    }
+                } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
+                    for (auto & child : p.children) {
+                        child = resolve_ref(child);
+                    }
+                } else if constexpr (std::is_same_v<T, common_peg_repetition_parser> ||
+                                     std::is_same_v<T, common_peg_and_parser> ||
+                                     std::is_same_v<T, common_peg_not_parser> ||
+                                     std::is_same_v<T, common_peg_tag_parser> ||
+                                     std::is_same_v<T, common_peg_atomic_parser>) {
+                    p.child = resolve_ref(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
+                    p.child = resolve_ref(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
+                    p.child = resolve_ref(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
+                                     std::is_same_v<T, common_peg_start_parser> ||
+                                     std::is_same_v<T, common_peg_end_parser> ||
+                                     std::is_same_v<T, common_peg_ref_parser> ||
+                                     std::is_same_v<T, common_peg_until_parser> ||
+                                     std::is_same_v<T, common_peg_literal_parser> ||
+                                     std::is_same_v<T, common_peg_json_string_parser> ||
+                                    std::is_same_v<T, common_peg_python_dict_string_parser> ||
+                                     std::is_same_v<T, common_peg_chars_parser> ||
+                                     std::is_same_v<T, common_peg_any_parser> ||
+                                     std::is_same_v<T, common_peg_space_parser>) {
+                    // These rules do not have children
+                } else {
+                    static_assert(is_always_false_v<T>);
                 }
-            } else if constexpr (std::is_same_v<T, common_peg_repetition_parser> ||
-                                 std::is_same_v<T, common_peg_and_parser> ||
-                                 std::is_same_v<T, common_peg_not_parser> ||
-                                 std::is_same_v<T, common_peg_tag_parser> ||
-                                 std::is_same_v<T, common_peg_atomic_parser>) {
-                p.child = resolve_ref(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-                p.child = resolve_ref(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
-                p.child = resolve_ref(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
-                                 std::is_same_v<T, common_peg_start_parser> ||
-                                 std::is_same_v<T, common_peg_end_parser> ||
-                                 std::is_same_v<T, common_peg_ref_parser> ||
-                                 std::is_same_v<T, common_peg_until_parser> ||
-                                 std::is_same_v<T, common_peg_literal_parser> ||
-                                 std::is_same_v<T, common_peg_json_string_parser> ||
-                                 std::is_same_v<T, common_peg_chars_parser> ||
-                                 std::is_same_v<T, common_peg_any_parser> ||
-                                 std::is_same_v<T, common_peg_space_parser>) {
-                // These rules do not have children
-            } else {
-                static_assert(is_always_false_v<T>);
-            }
-        }, parser);
+            },
+            parser);
     }
 
     // Also flatten root if it's a ref
@@ -803,63 +1017,88 @@ void common_peg_arena::resolve_refs() {
 }
 
 std::string common_peg_arena::dump(common_peg_parser_id id) const {
+    std::unordered_set<common_peg_parser_id> visited;
+    return dump_impl(id, visited);
+}
+
+std::string common_peg_arena::dump_impl(common_peg_parser_id                       id,
+                                        std::unordered_set<common_peg_parser_id> & visited) const {
+    // Check for cycles
+    if (visited.count(id)) {
+        return "[cycle]";
+    }
+    visited.insert(id);
+
     const auto & parser = parsers_.at(id);
 
-    return std::visit([this](const auto & p) -> std::string {
-        using T = std::decay_t<decltype(p)>;
-
-        if constexpr (std::is_same_v<T, common_peg_epsilon_parser>) {
-            return "Epsilon";
-        } else if constexpr (std::is_same_v<T, common_peg_start_parser>) {
-            return "Start";
-        } else if constexpr (std::is_same_v<T, common_peg_end_parser>) {
-            return "End";
-        } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
-            return "Literal(" + p.literal + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
-            std::vector<std::string> parts;
-            for (const auto & child : p.children) {
-                parts.push_back(dump(child));
-            }
-            return "Sequence(" + string_join(parts, ", ") + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
-            std::vector<std::string> parts;
-            for (const auto & child : p.children) {
-                parts.push_back(dump(child));
-            }
-            return "Choice(" + string_join(parts, ", ") + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
-            if (p.max_count == -1) {
-                return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", unbounded)";
-            }
-            return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_and_parser>) {
-            return "And(" + dump(p.child) + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_not_parser>) {
-            return "Not(" + dump(p.child) + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
-            return "Any";
-        } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
-            return "Space";
-        } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
-            if (p.max_count == -1) {
-                return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", unbounded)";
+    return std::visit(
+        [this, &visited](const auto & p) -> std::string {
+            using T = std::decay_t<decltype(p)>;
+
+            if constexpr (std::is_same_v<T, common_peg_epsilon_parser>) {
+                return "Epsilon";
+            } else if constexpr (std::is_same_v<T, common_peg_start_parser>) {
+                return "Start";
+            } else if constexpr (std::is_same_v<T, common_peg_end_parser>) {
+                return "End";
+            } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
+                return "Literal(" + p.literal + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
+                std::vector<std::string> parts;
+                for (const auto & child : p.children) {
+                    parts.push_back(dump_impl(child, visited));
+                }
+                return "Sequence(" + string_join(parts, ", ") + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
+                std::vector<std::string> parts;
+                for (const auto & child : p.children) {
+                    parts.push_back(dump_impl(child, visited));
+                }
+                return "Choice(" + string_join(parts, ", ") + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
+                if (p.max_count == -1) {
+                    return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) +
+                           ", unbounded)";
+                }
+                return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) + ", " +
+                       std::to_string(p.max_count) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_and_parser>) {
+                return "And(" + dump_impl(p.child, visited) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_not_parser>) {
+                return "Not(" + dump_impl(p.child, visited) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+                return "Atomic(" + dump_impl(p.child, visited) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
+                return "Any";
+            } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
+                return "Space";
+            } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
+                if (p.max_count == -1) {
+                    return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", unbounded)";
+                }
+                return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", " +
+                       std::to_string(p.max_count) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
+                return "JsonString()";
+            } else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
+                return "PythonDictString()";
+            } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
+                return "Until(" + string_join(p.delimiters, " | ") + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
+                return "Schema(" + dump_impl(p.child, visited) + ", " + (p.schema ? p.schema->dump() : "null") + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
+                return "Rule(" + p.name + ", " + dump_impl(p.child, visited) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
+                return "Ref(" + p.name + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
+                return "Tag(" + p.tag + ", " + dump(p.child) + ")";
+            } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+                return "Atomic(" + dump(p.child) + ")";
+            } else {
+                return "Unknown";
             }
-            return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
-            return "JsonString()";
-        } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
-            return "Until(" + string_join(p.delimiters, " | ") + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
-            return "Schema(" + dump(p.child) + ", " + (p.schema ? p.schema->dump() : "null") + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-            return "Rule(" + p.name + ", " + dump(p.child) + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
-            return "Ref(" + p.name + ")";
-        } else {
-            return "Unknown";
-        }
-    }, parser);
+        },
+        parser);
 }
 
 common_peg_parser & common_peg_parser::operator=(const common_peg_parser & other) {
@@ -868,25 +1107,25 @@ common_peg_parser & common_peg_parser::operator=(const common_peg_parser & other
 }
 
 common_peg_parser & common_peg_parser::operator+=(const common_peg_parser & other) {
-    id_ = builder_.sequence({id_, other.id_});
+    id_ = builder_.sequence({ id_, other.id_ });
     return *this;
 }
 
 common_peg_parser & common_peg_parser::operator|=(const common_peg_parser & other) {
-    id_ = builder_.choice({id_, other.id_});
+    id_ = builder_.choice({ id_, other.id_ });
     return *this;
 }
 
 common_peg_parser common_peg_parser::operator+(const common_peg_parser & other) const {
-    return builder_.sequence({id_, other.id_});
+    return builder_.sequence({ id_, other.id_ });
 }
 
 common_peg_parser common_peg_parser::operator|(const common_peg_parser & other) const {
-    return builder_.choice({id_, other.id_});
+    return builder_.choice({ id_, other.id_ });
 }
 
 common_peg_parser common_peg_parser::operator<<(const common_peg_parser & other) const {
-    return builder_.sequence({id_, builder_.space(), other.id_});
+    return builder_.sequence({ id_, builder_.space(), other.id_ });
 }
 
 common_peg_parser common_peg_parser::operator+(const char * str) const {
@@ -955,7 +1194,7 @@ common_peg_parser common_peg_parser_builder::sequence(const std::vector<common_p
             flattened.push_back(p);
         }
     }
-    return wrap(arena_.add_parser(common_peg_sequence_parser{flattened}));
+    return wrap(arena_.add_parser(common_peg_sequence_parser{ flattened }));
 }
 
 common_peg_parser common_peg_parser_builder::sequence(const std::vector<common_peg_parser> & parsers) {
@@ -987,7 +1226,7 @@ common_peg_parser common_peg_parser_builder::choice(const std::vector<common_peg
             flattened.push_back(p);
         }
     }
-    return wrap(arena_.add_parser(common_peg_choice_parser{flattened}));
+    return wrap(arena_.add_parser(common_peg_choice_parser{ flattened }));
 }
 
 common_peg_parser common_peg_parser_builder::choice(const std::vector<common_peg_parser> & parsers) {
@@ -1010,36 +1249,42 @@ common_peg_parser common_peg_parser_builder::choice(std::initializer_list<common
 
 common_peg_parser common_peg_parser_builder::chars(const std::string & classes, int min, int max) {
     auto [ranges, negated] = parse_char_classes(classes);
-    return wrap(arena_.add_parser(common_peg_chars_parser{classes, ranges, negated, min, max}));
+    return wrap(arena_.add_parser(common_peg_chars_parser{ classes, ranges, negated, min, max }));
 }
 
-common_peg_parser common_peg_parser_builder::schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw) {
-    return wrap(arena_.add_parser(common_peg_schema_parser{p.id(), name, std::make_shared<nlohmann::ordered_json>(schema), raw}));
+common_peg_parser common_peg_parser_builder::schema(const common_peg_parser &      p,
+                                                    const std::string &            name,
+                                                    const nlohmann::ordered_json & schema,
+                                                    bool                           raw) {
+    return wrap(arena_.add_parser(
+        common_peg_schema_parser{ p.id(), name, std::make_shared<nlohmann::ordered_json>(schema), raw }));
 }
 
 common_peg_parser common_peg_parser_builder::rule(const std::string & name, const common_peg_parser & p, bool trigger) {
     auto clean_name = rule_name(name);
-    auto rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, p.id(), trigger});
+    auto rule_id    = arena_.add_parser(common_peg_rule_parser{ clean_name, p.id(), trigger });
     arena_.add_rule(clean_name, rule_id);
     return ref(clean_name);
 }
 
-common_peg_parser common_peg_parser_builder::rule(const std::string & name, const std::function<common_peg_parser()> & builder_fn, bool trigger) {
+common_peg_parser common_peg_parser_builder::rule(const std::string &                        name,
+                                                  const std::function<common_peg_parser()> & builder_fn,
+                                                  bool                                       trigger) {
     auto clean_name = rule_name(name);
     if (arena_.has_rule(clean_name)) {
         return ref(clean_name);
     }
 
     // Create placeholder rule to allow recursive references
-    auto placeholder = any();  // Temporary placeholder
-    auto placeholder_rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, placeholder.id(), trigger});
+    auto placeholder         = any();  // Temporary placeholder
+    auto placeholder_rule_id = arena_.add_parser(common_peg_rule_parser{ clean_name, placeholder.id(), trigger });
     arena_.add_rule(clean_name, placeholder_rule_id);
 
     // Build the actual parser
     auto parser = builder_fn();
 
     // Replace placeholder with actual rule
-    auto rule_id = arena_.add_parser(common_peg_rule_parser{clean_name, parser.id(), trigger});
+    auto rule_id              = arena_.add_parser(common_peg_rule_parser{ clean_name, parser.id(), trigger });
     arena_.rules_[clean_name] = rule_id;
 
     return ref(clean_name);
@@ -1056,77 +1301,67 @@ common_peg_arena common_peg_parser_builder::build() {
 
 // JSON parsers
 common_peg_parser common_peg_parser_builder::json_number() {
-   return rule("json-number", [this]() {
+    return rule("json-number", [this]() {
         auto digit1_9 = chars("[1-9]", 1, 1);
-        auto digits = chars("[0-9]");
-        auto int_part = choice({literal("0"), sequence({digit1_9, chars("[0-9]", 0, -1)})});
-        auto frac = sequence({literal("."), digits});
-        auto exp = sequence({choice({literal("e"), literal("E")}), optional(chars("[+-]", 1, 1)), digits});
-        return sequence({optional(literal("-")), int_part, optional(frac), optional(exp), space()});
+        auto digits   = chars("[0-9]");
+        auto int_part = choice({ literal("0"), sequence({ digit1_9, chars("[0-9]", 0, -1) }) });
+        auto frac     = sequence({ literal("."), digits });
+        auto exp      = sequence({ choice({ literal("e"), literal("E") }), optional(chars("[+-]", 1, 1)), digits });
+        return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), space() });
     });
 }
 
 common_peg_parser common_peg_parser_builder::json_string() {
-    return rule("json-string", [this]() {
-        return sequence({literal("\""), json_string_content(), literal("\""), space()});
+    // When allow_python_dict_format is true, accept both single and double quotes
+    if (allow_python_dict_format_) {
+        return rule("json-string-flex", [this]() {
+            auto json_str = sequence({ literal("\""), json_string_content(), literal("\""), space() });
+            auto python_str = sequence({ literal("'"), python_dict_string_content(), literal("'"), space() });
+            return choice({ json_str, python_str });
+        });
+    }
+    // Standard JSON strings with double quotes only
+    return rule("json-string",
+                [this]() { return sequence({ literal("\""), json_string_content(), literal("\""), space() }); });
+}
+
+common_peg_parser common_peg_parser_builder::flexible_string() {
+    // Always returns a choice of both quote styles regardless of flag
+    return rule("flexible-string", [this]() {
+        auto json_str = sequence({ literal("\""), json_string_content(), literal("\""), space() });
+        auto python_str = sequence({ literal("'"), python_dict_string_content(), literal("'"), space() });
+        return choice({ json_str, python_str });
     });
 }
 
 common_peg_parser common_peg_parser_builder::json_bool() {
-    return rule("json-bool", [this]() {
-        return sequence({choice({literal("true"), literal("false")}), space()});
-    });
+    return rule("json-bool", [this]() { return sequence({ choice({ literal("true"), literal("false") }), space() }); });
 }
 
 common_peg_parser common_peg_parser_builder::json_null() {
-    return rule("json-null", [this]() {
-        return sequence({literal("null"), space()});
-    });
+    return rule("json-null", [this]() { return sequence({ literal("null"), space() }); });
 }
 
 common_peg_parser common_peg_parser_builder::json_object() {
     return rule("json-object", [this]() {
-        auto ws = space();
-        auto member = sequence({json_string(), ws, literal(":"), ws, json()});
-        auto members = sequence({member, zero_or_more(sequence({ws, literal(","), ws, member}))});
-        return sequence({
-            literal("{"),
-            ws,
-            choice({
-                literal("}"),
-                sequence({members, ws, literal("}")})
-            }),
-            ws
-        });
+        auto ws      = space();
+        auto member  = sequence({ json_string(), ws, literal(":"), ws, json() });
+        auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) });
+        return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }) });
     });
 }
 
 common_peg_parser common_peg_parser_builder::json_array() {
     return rule("json-array", [this]() {
-        auto ws = space();
-        auto elements = sequence({json(), zero_or_more(sequence({literal(","), ws, json()}))});
-        return sequence({
-            literal("["),
-            ws,
-            choice({
-                literal("]"),
-                sequence({elements, ws, literal("]")})
-            }),
-            ws
-        });
+        auto ws       = space();
+        auto elements = sequence({ json(), zero_or_more(sequence({ literal(","), ws, json() })) });
+        return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }) });
     });
 }
 
 common_peg_parser common_peg_parser_builder::json() {
     return rule("json-value", [this]() {
-        return choice({
-            json_object(),
-            json_array(),
-            json_string(),
-            json_number(),
-            json_bool(),
-            json_null()
-        });
+        return choice({ json_object(), json_array(), json_string(), json_number(), json_bool(), json_null() });
     });
 }
 
@@ -1134,6 +1369,57 @@ common_peg_parser common_peg_parser_builder::json_string_content() {
     return wrap(arena_.add_parser(common_peg_json_string_parser{}));
 }
 
+common_peg_parser common_peg_parser_builder::python_dict_string_content() {
+    return wrap(arena_.add_parser(common_peg_python_dict_string_parser{}));
+}
+
+common_peg_parser common_peg_parser_builder::python_dict_string() {
+    return rule("python-dict-string",
+                [this]() { return sequence({ literal("'"), python_dict_string_content(), literal("'"), space() }); });
+}
+
+common_peg_parser common_peg_parser_builder::python_dict_number() {
+    // Same as JSON number
+    return json_number();
+}
+
+common_peg_parser common_peg_parser_builder::python_dict_bool() {
+    // Same as JSON bool
+    return json_bool();
+}
+
+common_peg_parser common_peg_parser_builder::python_dict_null() {
+    // Same as JSON null
+    return json_null();
+}
+
+common_peg_parser common_peg_parser_builder::python_dict_object() {
+    return rule("python-dict-object", [this]() {
+        auto ws      = space();
+        auto member  = sequence({ python_dict_string(), ws, literal(":"), ws, python_dict() });
+        auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) });
+        return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }) });
+    });
+}
+
+common_peg_parser common_peg_parser_builder::python_dict_array() {
+    return rule("python-dict-array", [this]() {
+        auto ws       = space();
+        auto elements = sequence({ python_dict(), zero_or_more(sequence({ literal(","), ws, python_dict() })) });
+        return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }) });
+    });
+}
+
+common_peg_parser common_peg_parser_builder::python_dict() {
+    return rule("python-dict-value", [this]() {
+        std::vector<common_peg_parser> parsers = {
+            python_dict_object(), python_dict_array(), python_dict_string(), python_dict_number(),
+            python_dict_bool(), python_dict_null()
+        };
+        return choice(parsers);
+    });
+}
+
 common_peg_parser common_peg_parser_builder::json_member(const std::string & key, const common_peg_parser & p) {
     auto ws = space();
     return sequence({
@@ -1145,17 +1431,76 @@ common_peg_parser common_peg_parser_builder::json_member(const std::string & key
     });
 }
 
+static std::string gbnf_escape_char_class(uint32_t c) {
+    if (c == '-' || c == ']' || c == '[' || c == '\\') {
+        return "\\" + std::string(1, (char) c);
+    }
+    // Escape whitespace control characters
+    if (c == '\n') {
+        return "\\n";
+    }
+    if (c == '\t') {
+        return "\\t";
+    }
+    if (c == '\r') {
+        return "\\r";
+    }
+
+    // Printable ASCII
+    if (c >= 0x20 && c <= 0x7E) {
+        return std::string(1, (char) c);
+    }
+
+    // Hex escape
+    char         buf[16];
+    const char * hex = "0123456789ABCDEF";
 
-static std::string gbnf_escape_char_class(char c) {
-    switch (c) {
-        case '\n': return "\\n";
-        case '\t': return "\\t";
-        case '\r': return "\\r";
-        case '\\': return "\\\\";
-        case ']':  return "\\]";
-        case '[':  return "\\[";
-        default:   return std::string(1, c);
+    if (c <= 0xFF) {
+        buf[0] = '\\';
+        buf[1] = 'x';
+        buf[2] = hex[(c >> 4) & 0xF];
+        buf[3] = hex[c & 0xF];
+        buf[4] = '\0';
+    } else if (c <= 0xFFFF) {
+        buf[0] = '\\';
+        buf[1] = 'u';
+        buf[2] = hex[(c >> 12) & 0xF];
+        buf[3] = hex[(c >> 8) & 0xF];
+        buf[4] = hex[(c >> 4) & 0xF];
+        buf[5] = hex[c & 0xF];
+        buf[6] = '\0';
+    } else {
+        buf[0] = '\\';
+        buf[1] = 'U';
+        for (int i = 0; i < 8; i++) {
+            buf[2 + i] = hex[(c >> ((7 - i) * 4)) & 0xF];
+        }
+        buf[10] = '\0';
+    }
+
+    return std::string(buf);
+}
+
+static std::string codepoints_to_utf8(const std::vector<uint32_t> & cps) {
+    std::string s;
+    for (uint32_t cp : cps) {
+        if (cp < 0x80) {
+            s += (char) cp;
+        } else if (cp < 0x800) {
+            s += (char) (0xC0 | (cp >> 6));
+            s += (char) (0x80 | (cp & 0x3F));
+        } else if (cp < 0x10000) {
+            s += (char) (0xE0 | (cp >> 12));
+            s += (char) (0x80 | ((cp >> 6) & 0x3F));
+            s += (char) (0x80 | (cp & 0x3F));
+        } else {
+            s += (char) (0xF0 | (cp >> 18));
+            s += (char) (0x80 | ((cp >> 12) & 0x3F));
+            s += (char) (0x80 | ((cp >> 6) & 0x3F));
+            s += (char) (0x80 | (cp & 0x3F));
+        }
     }
+    return s;
 }
 
 static std::string gbnf_excluding_pattern(const std::vector<std::string> & strings) {
@@ -1168,17 +1513,17 @@ static std::string gbnf_excluding_pattern(const std::vector<std::string> & strin
             pattern += " | ";
         }
 
-        const auto & pre = pieces[i].prefix;
+        const auto & pre   = pieces[i].prefix;
         const auto & chars = pieces[i].next_chars;
 
         std::string cls;
-        cls.reserve(chars.size());
-        for (const auto & ch : chars) {
+        cls.reserve(chars.size() * 4);
+        for (uint32_t ch : chars) {
             cls += gbnf_escape_char_class(ch);
         }
 
         if (!pre.empty()) {
-            pattern += gbnf_format_literal(pre) + " [^" + cls + "]";
+            pattern += gbnf_format_literal(codepoints_to_utf8(pre)) + " [^" + cls + "]";
         } else {
             pattern += "[^" + cls + "]";
         }
@@ -1187,58 +1532,57 @@ static std::string gbnf_excluding_pattern(const std::vector<std::string> & strin
     return "(" + pattern + ")*";
 }
 
-static std::unordered_set<std::string> collect_reachable_rules(
-    const common_peg_arena & arena,
-    const common_peg_parser_id & rule
-) {
+static std::unordered_set<std::string> collect_reachable_rules(const common_peg_arena &     arena,
+                                                               const common_peg_parser_id & rule) {
     std::unordered_set<std::string> reachable;
     std::unordered_set<std::string> visited;
 
     std::function<void(common_peg_parser_id)> visit = [&](common_peg_parser_id id) {
         const auto & parser = arena.get(id);
 
-        std::visit([&](const auto & p) {
-            using T = std::decay_t<decltype(p)>;
-
-            if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
-                          std::is_same_v<T, common_peg_start_parser> ||
-                          std::is_same_v<T, common_peg_end_parser> ||
-                          std::is_same_v<T, common_peg_until_parser> ||
-                          std::is_same_v<T, common_peg_literal_parser> ||
-                          std::is_same_v<T, common_peg_chars_parser> ||
-                          std::is_same_v<T, common_peg_space_parser> ||
-                          std::is_same_v<T, common_peg_any_parser> ||
-                          std::is_same_v<T, common_peg_json_string_parser>) {
-                // These parsers do not have any children
-            } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
-                for (auto child : p.children) {
-                    visit(child);
-                }
-            } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
-                for (auto child : p.children) {
-                    visit(child);
-                }
-            } else if constexpr (std::is_same_v<T, common_peg_repetition_parser> ||
-                                 std::is_same_v<T, common_peg_and_parser> ||
-                                 std::is_same_v<T, common_peg_not_parser> ||
-                                 std::is_same_v<T, common_peg_tag_parser> ||
-                                 std::is_same_v<T, common_peg_atomic_parser> ||
-                                 std::is_same_v<T, common_peg_schema_parser>) {
-                visit(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-                if (visited.find(p.name) == visited.end()) {
-                    visited.insert(p.name);
-                    reachable.insert(p.name);
+        std::visit(
+            [&](const auto & p) {
+                using T = std::decay_t<decltype(p)>;
+
+                if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
+                              std::is_same_v<T, common_peg_start_parser> || std::is_same_v<T, common_peg_end_parser> ||
+                              std::is_same_v<T, common_peg_until_parser> ||
+                              std::is_same_v<T, common_peg_literal_parser> ||
+                              std::is_same_v<T, common_peg_chars_parser> ||
+                              std::is_same_v<T, common_peg_space_parser> || std::is_same_v<T, common_peg_any_parser> ||
+                              std::is_same_v<T, common_peg_json_string_parser> ||
+                              std::is_same_v<T, common_peg_python_dict_string_parser>) {
+                    // These parsers do not have any children
+                } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
+                    for (auto child : p.children) {
+                        visit(child);
+                    }
+                } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
+                    for (auto child : p.children) {
+                        visit(child);
+                    }
+                } else if constexpr (std::is_same_v<T, common_peg_repetition_parser> ||
+                                     std::is_same_v<T, common_peg_and_parser> ||
+                                     std::is_same_v<T, common_peg_not_parser> ||
+                                     std::is_same_v<T, common_peg_tag_parser> ||
+                                     std::is_same_v<T, common_peg_atomic_parser> ||
+                                     std::is_same_v<T, common_peg_schema_parser>) {
                     visit(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
+                    if (visited.find(p.name) == visited.end()) {
+                        visited.insert(p.name);
+                        reachable.insert(p.name);
+                        visit(p.child);
+                    }
+                } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
+                    // Traverse rules so we pick up everything
+                    auto referenced_rule = arena.get_rule(p.name);
+                    visit(referenced_rule);
+                } else {
+                    static_assert(is_always_false_v<T>);
                 }
-            } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
-                // Traverse rules so we pick up everything
-                auto referenced_rule = arena.get_rule(p.name);
-                visit(referenced_rule);
-            } else {
-                static_assert(is_always_false_v<T>);
-            }
-        }, parser);
+            },
+            parser);
     };
 
     visit(rule);
@@ -1251,129 +1595,138 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
     std::function<std::string(common_peg_parser_id)> to_gbnf = [&](common_peg_parser_id id) -> std::string {
         const auto & parser = parsers_.at(id);
 
-        return std::visit([&](const auto & p) -> std::string {
-            using T = std::decay_t<decltype(p)>;
-
-            if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
-                          std::is_same_v<T, common_peg_start_parser> ||
-                          std::is_same_v<T, common_peg_end_parser>) {
-                return "";
-            } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
-                return gbnf_format_literal(p.literal);
-            } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
-                std::string s;
-                for (const auto & child : p.children) {
-                    if (!s.empty()) {
-                        s += " ";
+        return std::visit(
+            [&](const auto & p) -> std::string {
+                using T = std::decay_t<decltype(p)>;
+
+                if constexpr (std::is_same_v<T, common_peg_epsilon_parser> ||
+                              std::is_same_v<T, common_peg_start_parser> || std::is_same_v<T, common_peg_end_parser>) {
+                    return "";
+                } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
+                    return gbnf_format_literal(p.literal);
+                } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
+                    std::string s;
+                    for (const auto & child : p.children) {
+                        if (!s.empty()) {
+                            s += " ";
+                        }
+                        auto         child_gbnf   = to_gbnf(child);
+                        const auto & child_parser = parsers_.at(child);
+                        if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
+                            std::holds_alternative<common_peg_sequence_parser>(child_parser) ||
+                            std::holds_alternative<common_peg_tag_parser>(child_parser) ||
+                            std::holds_alternative<common_peg_atomic_parser>(child_parser)) {
+                            s += "(" + child_gbnf + ")";
+                        } else {
+                            s += child_gbnf;
+                        }
+                    }
+                    return s;
+                } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
+                    std::string s;
+                    for (const auto & child : p.children) {
+                        if (!s.empty()) {
+                            s += " | ";
+                        }
+                        auto         child_gbnf   = to_gbnf(child);
+                        const auto & child_parser = parsers_.at(child);
+                        if (std::holds_alternative<common_peg_choice_parser>(child_parser)) {
+                            s += "(" + child_gbnf + ")";
+                        } else {
+                            s += child_gbnf;
+                        }
                     }
-                    auto child_gbnf = to_gbnf(child);
-                    const auto & child_parser = parsers_.at(child);
+                    return s;
+                } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
+                    auto         child_gbnf   = to_gbnf(p.child);
+                    const auto & child_parser = parsers_.at(p.child);
                     if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
-                        std::holds_alternative<common_peg_sequence_parser>(child_parser)) {
-                        s += "(" + child_gbnf + ")";
-                    } else {
-                        s += child_gbnf;
+                        std::holds_alternative<common_peg_sequence_parser>(child_parser) ||
+                        std::holds_alternative<common_peg_tag_parser>(child_parser) ||
+                        std::holds_alternative<common_peg_atomic_parser>(child_parser)) {
+                        child_gbnf = "(" + child_gbnf + ")";
                     }
-                }
-                return s;
-            } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
-                std::string s;
-                for (const auto & child : p.children) {
-                    if (!s.empty()) {
-                        s += " | ";
+                    if (p.min_count == 0 && p.max_count == 1) {
+                        return child_gbnf + "?";
                     }
-                    auto child_gbnf = to_gbnf(child);
-                    const auto & child_parser = parsers_.at(child);
-                    if (std::holds_alternative<common_peg_choice_parser>(child_parser)) {
-                        s += "(" + child_gbnf + ")";
-                    } else {
-                        s += child_gbnf;
+                    if (p.min_count == 0 && p.max_count == -1) {
+                        return child_gbnf + "*";
                     }
-                }
-                return s;
-            } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
-                auto child_gbnf = to_gbnf(p.child);
-                const auto & child_parser = parsers_.at(p.child);
-                if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
-                    std::holds_alternative<common_peg_sequence_parser>(child_parser)) {
-                    child_gbnf = "(" + child_gbnf + ")";
-                }
-                if (p.min_count == 0 && p.max_count == 1) {
-                    return child_gbnf + "?";
-                }
-                if (p.min_count == 0 && p.max_count == -1) {
-                    return child_gbnf + "*";
-                }
-                if (p.min_count == 1 && p.max_count == -1) {
-                    return child_gbnf + "+";
-                }
-                if (p.max_count == -1) {
-                    return child_gbnf + "{" + std::to_string(p.min_count) + ",}";
-                }
-                if (p.min_count == p.max_count) {
-                    if (p.min_count == 1) {
-                        return child_gbnf;
+                    if (p.min_count == 1 && p.max_count == -1) {
+                        return child_gbnf + "+";
                     }
-                    return child_gbnf + "{" + std::to_string(p.min_count) + "}";
-                }
-                return child_gbnf + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
-            } else if constexpr (std::is_same_v<T, common_peg_and_parser> || std::is_same_v<T, common_peg_not_parser>) {
-                return "";  // Lookahead not supported in GBNF
-            } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
-                return ".";
-            } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
-                return "space";
-            } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
-                std::string result = p.pattern;
-                if (p.min_count == 0 && p.max_count == 1) {
-                    return result + "?";
-                }
-                if (p.min_count == 0 && p.max_count == -1) {
-                    return result + "*";
-                }
-                if (p.min_count == 1 && p.max_count == -1) {
-                    return result + "+";
-                }
-                if (p.max_count == -1) {
-                    return result + "{" + std::to_string(p.min_count) + ",}";
-                }
-                if (p.min_count == p.max_count) {
-                    if (p.min_count == 1) {
-                        return result;
+                    if (p.max_count == -1) {
+                        return child_gbnf + "{" + std::to_string(p.min_count) + ",}";
                     }
-                    return result + "{" + std::to_string(p.min_count) + "}";
-                }
-                return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
-            } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
-                return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
-            } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
-                if (p.delimiters.empty()) {
-                    return ".*";
-                }
-                return gbnf_excluding_pattern(p.delimiters);
-            } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
-                if (p.schema) {
-                    if (p.raw && p.schema->contains("type") && p.schema->at("type").is_string() && p.schema->at("type") == "string") {
-                        // TODO: Implement more comprehensive grammar generation for raw strings.
-                        // For now, use the grammar emitted from the underlying parser.
-                        return to_gbnf(p.child);
+                    if (p.min_count == p.max_count) {
+                        if (p.min_count == 1) {
+                            return child_gbnf;
+                        }
+                        return child_gbnf + "{" + std::to_string(p.min_count) + "}";
                     }
-                    return builder.add_schema(p.name, *p.schema);
+                    return child_gbnf + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
+                } else if constexpr (std::is_same_v<T, common_peg_and_parser> ||
+                                     std::is_same_v<T, common_peg_not_parser>) {
+                    return "";  // Lookahead not supported in GBNF
+                } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
+                    return ".";
+                } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
+                    return "space";
+                } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
+                    std::string result = p.pattern;
+                    if (p.min_count == 0 && p.max_count == 1) {
+                        return result + "?";
+                    }
+                    if (p.min_count == 0 && p.max_count == -1) {
+                        return result + "*";
+                    }
+                    if (p.min_count == 1 && p.max_count == -1) {
+                        return result + "+";
+                    }
+                    if (p.max_count == -1) {
+                        return result + "{" + std::to_string(p.min_count) + ",}";
+                    }
+                    if (p.min_count == p.max_count) {
+                        if (p.min_count == 1) {
+                            return result;
+                        }
+                        return result + "{" + std::to_string(p.min_count) + "}";
+                    }
+                    return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
+                } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
+                    return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
+                } else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
+                    return R"(( [^'\\] | "\\" ( ['"\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
+                } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
+                    if (p.delimiters.empty()) {
+                        return ".*";
+                    }
+                    return gbnf_excluding_pattern(p.delimiters);
+                } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
+                    if (p.schema) {
+                        if (p.raw && p.schema->contains("type") && p.schema->at("type").is_string() &&
+                            p.schema->at("type") == "string") {
+                            // TODO: Implement more comprehensive grammar generation for raw strings.
+                            // For now, use the grammar emitted from the underlying parser.
+                            return to_gbnf(p.child);
+                        }
+                        return builder.add_schema(p.name, *p.schema);
+                    }
+                    return to_gbnf(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
+                    return p.name;
+                } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
+                    // Refs should not exist after flattening, but kept just in case
+                    return p.name;
+                } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
+                    return to_gbnf(p.child);
+                } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+                    return to_gbnf(p.child);
+                } else {
+                    static_assert(is_always_false_v<T>);
                 }
-                return to_gbnf(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-                return p.name;
-            } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
-                // Refs should not exist after flattening, but kept just in case
-                return p.name;
-            } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
-                return to_gbnf(p.child);
-            } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
-                return to_gbnf(p.child);
-            } else {
-                static_assert(is_always_false_v<T>);
-            }
-        }, parser);
+            },
+            parser);
     };
 
     // Collect reachable rules
@@ -1432,80 +1785,125 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
 static nlohmann::json serialize_parser_variant(const common_peg_parser_variant & variant) {
     using json = nlohmann::json;
 
-    return std::visit([](const auto & p) -> json {
-        using T = std::decay_t<decltype(p)>;
-
-        if constexpr (std::is_same_v<T, common_peg_epsilon_parser>) {
-            return json{{"type", "epsilon"}};
-        } else if constexpr (std::is_same_v<T, common_peg_start_parser>) {
-            return json{{"type", "start"}};
-        } else if constexpr (std::is_same_v<T, common_peg_end_parser>) {
-            return json{{"type", "end"}};
-        } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
-            return json{{"type", "literal"}, {"literal", p.literal}};
-        } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
-            return json{{"type", "sequence"}, {"children", p.children}};
-        } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
-            return json{{"type", "choice"}, {"children", p.children}};
-        } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
-            return json{
-                {"type", "repetition"},
-                {"child", p.child},
-                {"min_count", p.min_count},
-                {"max_count", p.max_count}
-            };
-        } else if constexpr (std::is_same_v<T, common_peg_and_parser>) {
-            return json{{"type", "and"}, {"child", p.child}};
-        } else if constexpr (std::is_same_v<T, common_peg_not_parser>) {
-            return json{{"type", "not"}, {"child", p.child}};
-        } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
-            return json{{"type", "any"}};
-        } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
-            return json{{"type", "space"}};
-        } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
-            json ranges = json::array();
-            for (const auto & range : p.ranges) {
-                ranges.push_back({{"start", range.start}, {"end", range.end}});
+    return std::visit(
+        [](const auto & p) -> json {
+            using T = std::decay_t<decltype(p)>;
+
+            if constexpr (std::is_same_v<T, common_peg_epsilon_parser>) {
+                return json{
+                    { "type", "epsilon" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_start_parser>) {
+                return json{
+                    { "type", "start" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_end_parser>) {
+                return json{
+                    { "type", "end" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_literal_parser>) {
+                return json{
+                    { "type",    "literal" },
+                    { "literal", p.literal }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
+                return json{
+                    { "type",     "sequence" },
+                    { "children", p.children }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
+                return json{
+                    { "type",     "choice"   },
+                    { "children", p.children }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
+                return json{
+                    { "type",      "repetition" },
+                    { "child",     p.child      },
+                    { "min_count", p.min_count  },
+                    { "max_count", p.max_count  }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_and_parser>) {
+                return json{
+                    { "type",  "and"   },
+                    { "child", p.child }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_not_parser>) {
+                return json{
+                    { "type",  "not"   },
+                    { "child", p.child }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
+                return json{
+                    { "type", "any" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
+                return json{
+                    { "type", "space" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_chars_parser>) {
+                json ranges = json::array();
+                for (const auto & range : p.ranges) {
+                    ranges.push_back({
+                        { "start", range.start },
+                        { "end",   range.end   }
+                    });
+                }
+                return json{
+                    { "type",      "chars"     },
+                    { "pattern",   p.pattern   },
+                    { "ranges",    ranges      },
+                    { "negated",   p.negated   },
+                    { "min_count", p.min_count },
+                    { "max_count", p.max_count }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
+                return json{
+                    { "type", "json_string" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
+                return json{
+                    { "type", "python_dict_string" }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
+                return json{
+                    { "type",       "until"      },
+                    { "delimiters", p.delimiters }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
+                return json{
+                    { "type",   "schema"                       },
+                    { "child",  p.child                        },
+                    { "name",   p.name                         },
+                    { "schema", p.schema ? *p.schema : nullptr },
+                    { "raw",    p.raw                          }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
+                return json{
+                    { "type",    "rule"    },
+                    { "name",    p.name    },
+                    { "child",   p.child   },
+                    { "trigger", p.trigger }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
+                return json{
+                    { "type", "ref"  },
+                    { "name", p.name }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+                return json{
+                    { "type",  "atomic" },
+                    { "child", p.child  }
+                };
+            } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
+                return json{
+                    { "type",  "tag"   },
+                    { "child", p.child },
+                    { "tag",   p.tag   }
+                };
             }
-            return json{
-                {"type", "chars"},
-                {"pattern", p.pattern},
-                {"ranges", ranges},
-                {"negated", p.negated},
-                {"min_count", p.min_count},
-                {"max_count", p.max_count}
-            };
-        } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
-            return json{{"type", "json_string"}};
-        } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
-            return json{{"type", "until"}, {"delimiters", p.delimiters}};
-        } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
-            return json{
-                {"type", "schema"},
-                {"child", p.child},
-                {"name", p.name},
-                {"schema", p.schema ? *p.schema : nullptr},
-                {"raw", p.raw}
-            };
-        } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-            return json{
-                {"type", "rule"},
-                {"name", p.name},
-                {"child", p.child},
-                {"trigger", p.trigger}
-            };
-        } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
-            return json{{"type", "ref"}, {"name", p.name}};
-        } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
-            return json{{"type", "atomic"}, {"child", p.child}};
-        } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
-            return json{
-                {"type", "tag"},
-                {"child", p.child},
-                {"tag", p.tag}
-            };
-        }
-    }, variant);
+        },
+        variant);
 }
 
 nlohmann::json common_peg_arena::to_json() const {
@@ -1514,9 +1912,9 @@ nlohmann::json common_peg_arena::to_json() const {
         parsers.push_back(serialize_parser_variant(parser));
     }
     return nlohmann::json{
-        {"parsers", parsers},
-        {"rules", rules_},
-        {"root", root_}
+        { "parsers", parsers },
+        { "rules",   rules_  },
+        { "root",    root_   }
     };
 }
 
@@ -1540,41 +1938,38 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
         if (!j.contains("literal") || !j["literal"].is_string()) {
             throw std::runtime_error("literal parser missing or invalid 'literal' field");
         }
-        return common_peg_literal_parser{j["literal"]};
+        return common_peg_literal_parser{ j["literal"] };
     }
     if (type == "sequence") {
         if (!j.contains("children") || !j["children"].is_array()) {
             throw std::runtime_error("sequence parser missing or invalid 'children' field");
         }
-        return common_peg_sequence_parser{j["children"].get<std::vector<common_peg_parser_id>>()};
+        return common_peg_sequence_parser{ j["children"].get<std::vector<common_peg_parser_id>>() };
     }
     if (type == "choice") {
         if (!j.contains("children") || !j["children"].is_array()) {
             throw std::runtime_error("choice parser missing or invalid 'children' field");
         }
-        return common_peg_choice_parser{j["children"].get<std::vector<common_peg_parser_id>>()};
+        return common_peg_choice_parser{ j["children"].get<std::vector<common_peg_parser_id>>() };
     }
     if (type == "repetition") {
         if (!j.contains("child") || !j.contains("min_count") || !j.contains("max_count")) {
             throw std::runtime_error("repetition parser missing required fields");
         }
-        return common_peg_repetition_parser{
-            j["child"].get<common_peg_parser_id>(),
-            j["min_count"].get<int>(),
-            j["max_count"].get<int>()
-        };
+        return common_peg_repetition_parser{ j["child"].get<common_peg_parser_id>(), j["min_count"].get<int>(),
+                                             j["max_count"].get<int>() };
     }
     if (type == "and") {
         if (!j.contains("child")) {
             throw std::runtime_error("and parser missing 'child' field");
         }
-        return common_peg_and_parser{j["child"].get<common_peg_parser_id>()};
+        return common_peg_and_parser{ j["child"].get<common_peg_parser_id>() };
     }
     if (type == "not") {
         if (!j.contains("child")) {
             throw std::runtime_error("not parser missing 'child' field");
         }
-        return common_peg_not_parser{j["child"].get<common_peg_parser_id>()};
+        return common_peg_not_parser{ j["child"].get<common_peg_parser_id>() };
     }
     if (type == "any") {
         return common_peg_any_parser{};
@@ -1583,34 +1978,34 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
         return common_peg_space_parser{};
     }
     if (type == "chars") {
-        if (!j.contains("pattern") || !j.contains("ranges") || !j.contains("negated") ||
-            !j.contains("min_count") || !j.contains("max_count")) {
+        if (!j.contains("pattern") || !j.contains("ranges") || !j.contains("negated") || !j.contains("min_count") ||
+            !j.contains("max_count")) {
             throw std::runtime_error("chars parser missing required fields");
         }
         common_peg_chars_parser parser;
-        parser.pattern = j["pattern"];
-        parser.negated = j["negated"];
+        parser.pattern   = j["pattern"];
+        parser.negated   = j["negated"];
         parser.min_count = j["min_count"];
         parser.max_count = j["max_count"];
         for (const auto & range_json : j["ranges"]) {
             if (!range_json.contains("start") || !range_json.contains("end")) {
                 throw std::runtime_error("char_range missing 'start' or 'end' field");
             }
-            parser.ranges.push_back({
-                range_json["start"].get<uint32_t>(),
-                range_json["end"].get<uint32_t>()
-            });
+            parser.ranges.push_back({ range_json["start"].get<uint32_t>(), range_json["end"].get<uint32_t>() });
         }
         return parser;
     }
     if (type == "json_string") {
         return common_peg_json_string_parser{};
     }
+    if (type == "python_dict_string") {
+        return common_peg_python_dict_string_parser{};
+    }
     if (type == "until") {
         if (!j.contains("delimiters") || !j["delimiters"].is_array()) {
             throw std::runtime_error("until parser missing or invalid 'delimiters' field");
         }
-        return common_peg_until_parser{j["delimiters"].get<std::vector<std::string>>()};
+        return common_peg_until_parser{ j["delimiters"].get<std::vector<std::string>>() };
     }
     if (type == "schema") {
         if (!j.contains("child") || !j.contains("name") || !j.contains("schema") || !j.contains("raw")) {
@@ -1618,7 +2013,7 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
         }
         common_peg_schema_parser parser;
         parser.child = j["child"].get<common_peg_parser_id>();
-        parser.name = j["name"];
+        parser.name  = j["name"];
         if (!j["schema"].is_null()) {
             parser.schema = std::make_shared<nlohmann::ordered_json>(j["schema"]);
         }
@@ -1629,17 +2024,14 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
         if (!j.contains("name") || !j.contains("child") || !j.contains("trigger")) {
             throw std::runtime_error("rule parser missing required fields");
         }
-        return common_peg_rule_parser{
-            j["name"].get<std::string>(),
-            j["child"].get<common_peg_parser_id>(),
-            j["trigger"].get<bool>()
-        };
+        return common_peg_rule_parser{ j["name"].get<std::string>(), j["child"].get<common_peg_parser_id>(),
+                                       j["trigger"].get<bool>() };
     }
     if (type == "ref") {
         if (!j.contains("name") || !j["name"].is_string()) {
             throw std::runtime_error("ref parser missing or invalid 'name' field");
         }
-        return common_peg_ref_parser{j["name"]};
+        return common_peg_ref_parser{ j["name"] };
     }
     if (type == "atomic") {
         if (!j.contains("child")) {
diff --git a/common/peg-parser.h b/common/peg-parser.h
index 1cd640365f2..9bd5e058388 100644
--- a/common/peg-parser.h
+++ b/common/peg-parser.h
@@ -4,6 +4,7 @@
 
 #include <memory>
 #include <unordered_map>
+#include <unordered_set>
 #include <string>
 #include <string_view>
 #include <functional>
@@ -111,6 +112,8 @@ class common_peg_ast_arena {
 
     void visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const;
     void visit(const common_peg_parse_result & result, const common_peg_ast_visitor & visitor) const;
+
+    std::string dump();
 };
 
 struct common_peg_parse_result {
@@ -139,6 +142,7 @@ struct common_peg_parse_result {
 struct common_peg_parse_context {
     std::string input;
     bool is_partial;
+    bool debug = false;  // Enable debug output for parser tracing
     common_peg_ast_arena ast;
 
     int parse_depth;
@@ -207,6 +211,7 @@ struct common_peg_chars_parser {
 };
 
 struct common_peg_json_string_parser {};
+struct common_peg_python_dict_string_parser {};
 
 struct common_peg_until_parser {
     std::vector<std::string> delimiters;
@@ -255,6 +260,7 @@ using common_peg_parser_variant = std::variant<
     common_peg_space_parser,
     common_peg_chars_parser,
     common_peg_json_string_parser,
+    common_peg_python_dict_string_parser,
     common_peg_until_parser,
     common_peg_schema_parser,
     common_peg_rule_parser,
@@ -299,6 +305,8 @@ class common_peg_arena {
     friend class common_peg_parser_builder;
 
   private:
+    std::string dump_impl(common_peg_parser_id id, std::unordered_set<common_peg_parser_id> & visited) const;
+
     common_peg_parser_id add_parser(common_peg_parser_variant parser);
     void add_rule(const std::string & name, common_peg_parser_id id);
 
@@ -310,9 +318,16 @@ class common_peg_parser_builder {
 
     common_peg_parser wrap(common_peg_parser_id id) { return common_peg_parser(id, *this); }
     common_peg_parser add(const common_peg_parser_variant & p) { return wrap(arena_.add_parser(p)); }
+    
+        bool allow_python_dict_format_ = false;
 
   public:
     common_peg_parser_builder();
+    
+        // Enable/disable Python dict format support (single-quoted strings).
+        // When enabled, JSON parsers will also accept Python dict-style single-quoted strings.
+        void set_allow_python_dict_format(bool allow) { allow_python_dict_format_ = allow; }
+        bool get_allow_python_dict_format() const { return allow_python_dict_format_; }
 
     // Match nothing, always succeed.
     //   S -> ε
@@ -418,10 +433,29 @@ class common_peg_parser_builder {
     // Useful for extracting content within a JSON string.
     common_peg_parser json_string_content();
 
+    // Matches a string that accepts both JSON double-quoted and Python dict single-quoted styles.
+    // This is useful when you explicitly want to accept both formats regardless of the allow_python_dict_format flag.
+    common_peg_parser flexible_string();
+
+    // Matches a Python dict-style single-quoted string content without the surrounding quotes.
+    // Useful for extracting content within a Python dict string.
+    common_peg_parser python_dict_string_content();
+
     // Matches a JSON object member with a key and associated parser as the
     // value.
     common_peg_parser json_member(const std::string & key, const common_peg_parser & p);
 
+    // Creates a complete Python dict format parser supporting objects, arrays, single-quoted strings,
+    // numbers, booleans, and null. Similar to JSON but uses single quotes for strings.
+    //   value -> object | array | string | number | true | false | null
+    common_peg_parser python_dict();
+    common_peg_parser python_dict_object();
+    common_peg_parser python_dict_string();
+    common_peg_parser python_dict_array();
+    common_peg_parser python_dict_number();
+    common_peg_parser python_dict_bool();
+    common_peg_parser python_dict_null();
+
     // Wraps a parser with JSON schema metadata for grammar generation.
     // Used internally to convert JSON schemas to GBNF grammar rules.
     common_peg_parser schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw = false);
diff --git a/docs/autoparser.md b/docs/autoparser.md
new file mode 100644
index 00000000000..4b48cceb768
--- /dev/null
+++ b/docs/autoparser.md
@@ -0,0 +1,700 @@
+# Unified Auto-Parser Architecture
+
+The auto-parser automatically analyzes chat templates to determine how to parse model outputs, including content, reasoning, and tool calls.
+
+## Overview
+
+The unified auto-parser uses a **pure differential, compositional approach** to analyze chat templates:
+
+**Core Philosophy**:
+
+- **Zero Hardcoded Patterns**: All markers extracted through template comparison (the **only heuristic** is JSON detection)
+- **Compositional Architecture**: Separate parsers for reasoning, content, and tools that compose cleanly
+- **Variant Types**: Structural descriptions (strings) instead of forced enum classification
+
+**Two-Phase Analysis**:
+
+1. **Phase 1: Content & Reasoning Analysis** - Analyzes how the template handles basic content and reasoning, without considering tools
+2. **Phase 2: Tool Call Analysis** - Analyzes tool calling patterns, layered on top of Phase 1
+
+## Data Structures
+
+### content_structure (Phase 1 Result)
+
+Describes how the template handles content and reasoning:
+
+```cpp
+struct content_structure {
+    enum reasoning_mode_type {
+        REASONING_NONE,         // No reasoning markers detected
+        REASONING_OPTIONAL,     // <think>...</think> may appear before content
+        REASONING_FORCED_OPEN,  // Template ends with open reasoning tag OR starts implicitly (empty start, present end)
+    };
+
+    reasoning_mode_type reasoning_mode = REASONING_NONE;
+    std::string         reasoning_start;  // e.g., "<think>", "<|START_THINKING|>"
+    std::string         reasoning_end;    // e.g., "</think>", "<|END_THINKING|>"
+
+    // Content wrapping mode
+    enum content_mode_type {
+        CONTENT_PLAIN,                   // No content markers
+        CONTENT_ALWAYS_WRAPPED,          // <response>...</response> always present
+        CONTENT_WRAPPED_WITH_REASONING,  // Content wrapped only when reasoning present
+    };
+
+    content_mode_type content_mode = CONTENT_PLAIN;
+    std::string       content_start;  // e.g., "<response>", "<|START_RESPONSE|>"
+    std::string       content_end;    // e.g., "</response>", "<|END_RESPONSE|>"
+};
+```
+
+### diff_analysis_result (Analysis Result)
+
+The result of differential analysis contains all extracted markers and format classifications:
+
+```cpp
+struct diff_analysis_result {
+    // Classification results
+    reasoning_mode  reasoning = reasoning_mode::NONE;
+    content_mode    content   = content_mode::PLAIN;
+    tool_format     tools     = tool_format::NONE;
+    argument_format args      = argument_format::JSON;
+
+    // All extracted markers (see marker_registry below)
+    marker_registry markers;
+
+    // JSON field names (for JSON-based formats)
+    std::string name_field = "name";
+    std::string args_field = "arguments";
+    std::string id_field;
+
+    // Flags
+    bool supports_tools           = false;
+    bool supports_parallel_calls  = false;
+    bool requires_nonnull_content = false;
+
+    // Preserved tokens for tokenizer
+    std::vector<std::string> preserved_tokens;
+};
+```
+
+### marker_registry (Extracted Markers)
+
+All markers are extracted via differential analysis without hardcoded patterns:
+
+```cpp
+struct marker_registry {
+    // === Reasoning markers ===
+    std::string reasoning_start;  // e.g., "<think>", "[THINK]", "<|START_THINKING|>"
+    std::string reasoning_end;    // e.g., "</think>", "[/THINK]", "<|END_THINKING|>"
+
+    // === Content markers ===
+    std::string content_start;  // e.g., "<response>", ">>>all\n"
+    std::string content_end;    // e.g., "</response>"
+
+    // === Tool section markers ===
+    std::string tool_section_start;  // e.g., "<tool_call>", "[TOOL_CALLS]"
+    std::string tool_section_end;    // e.g., "</tool_call>", "]"
+    std::string per_call_start;      // e.g., "\u2985" (for multi-call templates)
+    std::string per_call_end;        // e.g., " \u2985"
+    std::string call_separator;      // e.g., ",", "\n"
+
+    // === Function markers ===
+    std::string func_name_prefix;  // e.g., "<function=", "\"name\": \""
+    std::string func_name_suffix;  // e.g., ">", "\""
+    std::string func_close;        // e.g., "</function>"
+    std::string args_start;        // e.g., "{", " \u300b"
+    std::string args_end;          // e.g., "}", ""
+
+    // === Argument markers (for tagged args format) ===
+    std::string arg_name_prefix;   // e.g., "<param=", "<arg_key>"
+    std::string arg_name_suffix;   // e.g., ">", "</arg_key>"
+    std::string arg_value_prefix;  // e.g., "", "<arg_value>"
+    std::string arg_value_suffix;  // e.g., "</param>", "</arg_value>"
+    std::string arg_separator;
+
+    // === Special markers ===
+    std::string code_block_marker;    // e.g., "Action:" (markdown code block format)
+    std::string id_marker;            // e.g., "[CALL_ID]" (bracket-tag format)
+    std::string function_namespace;   // e.g., "functions." (prefixed-indexed format)
+};
+```
+
+## Tool Calling Formats
+
+The auto-parser recognizes three primary tool calling formats. Other formats may be deprecated in future versions.
+
+### JSON_NATIVE
+
+**Structure**: The entire tool call (function name, arguments, and values) is in JSON format. There may be enclosing tags around the tool calling section.
+
+**Characteristics**:
+- Function name is a JSON field: `"name": "function_name"`
+- Arguments are a JSON object: `"arguments": {"key": "value"}`
+- May be wrapped in section markers like `<tool_call>...</tool_call>` or `[TOOL_CALLS]...]`
+
+**Examples**:
+
+Standard OpenAI-style:
+```json
+<tool_call>
+{"name": "get_weather", "arguments": {"location": "Paris", "unit": "celsius"}}
+</tool_call>
+```
+
+Mistral Nemo with array wrapper:
+```json
+[TOOL_CALLS]
+[{"name": "calculate", "arguments": {"expr": "2+2"}}]
+```
+
+Hermes-style with tool_calls wrapper:
+```json
+<tool_calls>
+{"name": "search", "arguments": {"query": "llama.cpp"}}
+</tool_calls>
+```
+
+**Detection**: `args_start == "{"`, `args_end == "}"`, no function name prefix markers
+
+---
+
+### TAG_WITH_JSON
+
+**Structure**: The function name is outside the JSON structure, typically within quasi-XML markers. Arguments are still provided as a JSON object.
+
+**Characteristics**:
+- Function name appears in tag attributes: `<function=function_name>` or `<tool_call name="function_name">`
+- Arguments are a JSON object following the tag
+- Has closing tags: `</function>` or `</tool_call>`
+- Arguments remain valid JSON
+
+**Examples**:
+
+Nemotron-style:
+```xml
+<TOOLCALL>get_weather{"location": "Paris"}</TOOLCALL>
+```
+
+Functionary v3.1:
+```xml
+<function=get_weather>{"location": "Paris", "unit": "celsius"}</function>
+```
+
+ByteDance Seed-OSS:
+```xml
+<seed:tool_call>
+<tool_name>get_weather</tool_name>
+<parameters>{"location": "Paris"}</parameters>
+</seed:tool_call>
+```
+
+MiniMax:
+```xml
+<minimax:tool_call>
+<tool_name>calculate</tool_name>
+<arguments>{"expr": "2+2"}</arguments>
+</minimax:tool_call>
+```
+
+**Detection**: `func_name_prefix` starts with `<`, `args_start == "{"`, arguments are JSON
+
+---
+
+### TAG_WITH_TAGGED
+
+**Structure**: Both the function name AND argument names are in XML-style tags. Argument values may be JSON or unquoted primitives depending on schema type.
+
+**Characteristics**:
+- Function name in tag: `<function=name>` or `<invoke=name>`
+- Each argument has its own tag: `<param=key>value</param>`
+- String values are **unquoted** (raw text content of the tag)
+- Non-string values (objects, arrays, numbers, booleans) are still JSON-formatted
+- Supports streaming: partial arguments can be parsed incrementally
+
+**Examples**:
+
+Qwen/Hermes XML format:
+```xml
+<function=get_weather>
+<param=location>Paris</param>
+<param=unit>celsius</param>
+</function>
+```
+
+Note how string values (`Paris`, `celsius`) are unquoted inside the tags.
+
+Mixed types example:
+```xml
+<function=calculate>
+<param=expr>2+2</param>
+<param=precision>2</param>
+<param=options>{"round": true}</param>
+</function>
+```
+
+Here:
+- `expr` and `precision` are strings (unquoted)
+- `options` is an object (JSON-formatted inside the tag)
+
+**Detection**: `arg_name_prefix` is non-empty, arguments use tagged format rather than JSON object
+
+---
+
+### Other Formats (To Be Deprecated)
+
+The following formats are currently supported but will likely be deprecated:
+
+| Format | Description | Example |
+|--------|-------------|---------|
+| `BRACKET_TAG` | Bracket-based markers | `[TOOL_CALLS]func[ARGS]{...}` |
+| `PREFIXED_INDEXED` | Namespace prefix with index | `functions.name:0{...}` |
+| `RECIPIENT_BASED` | Recipient routing | `>>>recipient\n{content}` |
+| `MARKDOWN_BLOCK` | Markdown code blocks | `Action:\n\`\`\`json\n[...]` |
+
+## Analysis Flow
+
+```console
+Template
+    |
+    v
+Phase 1: analyze_content_structure()
+    |-- detect_reasoning_markers() - compare outputs with reasoning_content vs without
+    |-- detect_content_markers() - render with content and detect wrapping
+    |-- detect_reasoning_mode() - check if prompt ends with open tag
+    |
+    v
+content_structure
+    |
+    v
+Phase 2: analyze_tool_structure()
+    |-- Check minja.supports_tool_calls
+    |-- Differential analysis for tool patterns
+    |-- Classify function format (JSON vs tagged)
+    |-- Classify argument format (JSON vs tagged)
+    |
+    v
+diff_analysis_result
+    |
+    v
+generate_parser(diff_analysis_result)
+    |-- build_reasoning_block(diff_analysis_result)
+    |-- build_content_block(diff_analysis_result)
+    |-- build_tool_section(diff_analysis_result, tools)
+    |-- Compose into final parser
+    |
+    v
+common_chat_params (parser, grammar, triggers, preserved_tokens)
+```
+
+## Entry Point
+
+The mechanism starts in `common/chat.cpp`, in `common_chat_templates_apply_jinja`:
+
+```cpp
+// 1. Analyze the template (two-phase)
+auto analysis = differential_analyzer::analyze(tmpl);
+
+// 2. Generate the parser and grammar
+auto auto_params = universal_peg_generator::generate_parser(tmpl, params);
+
+// 3. Use if it provides more than basic content handling
+if (auto_params.format != COMMON_CHAT_FORMAT_CONTENT_ONLY ||
+    !auto_params.parser.empty()) {
+    return auto_params;
+}
+```
+
+## Builder Methods
+
+The unified builder (`common_chat_peg_unified_builder`) provides high-level methods:
+
+- `build_reasoning_block(analysis, reasoning_format, thinking_forced_open)` - Build reasoning parser
+- `build_content_block(analysis, reasoning_format)` - Build content parser
+- `build_tool_section(analysis, tools, parallel_tool_calls, force_tool_calls)` - Build tool section
+- `build_function(analysis, name, schema)` - Build single function parser
+- `build_arguments(analysis, schema)` - Build arguments parser
+
+## Key Templates Supported
+
+- **Granite** - `<think></think>` + `<response></response>` with tool calls
+- **Nemotron** - JSON tools with `<TOOLCALL>` wrapper
+- **Qwen/Hermes** - XML-style `<function=X><param=key>` format (TAG_WITH_TAGGED)
+- **Command-R7B** - `<|START_THINKING|>`/`<|START_RESPONSE|>` + `<|START_ACTION|>` tools
+- **DeepSeek R1** - Forced thinking + complex tools
+- **Mistral Nemo** - `[TOOL_CALLS]` wrapper (JSON_NATIVE)
+- **MiniMax** - `<minimax:tool_call>` wrapper with JSON args (TAG_WITH_JSON)
+- **GLM-4.6** - `<minimax:tool_call>` + `<tool_call>name\n<arg_key>...<arg_value>...` format
+- **Kimi-K2** - `PREFIXED_INDEXED` format with namespace and indices
+- **Mistral Small 3.2** - `BRACKET_TAG` format with `[TOOL_CALLS]` markers
+- **Functionary v3.2** - `RECIPIENT_BASED` format with `>>>` routing
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `common/chat-auto-parser.h` | Data structures and API declarations |
+| `common/chat-diff-analyzer.h/cpp` | Differential analysis implementation |
+| `common/chat-auto-parser-generator.cpp` | PEG parser generator |
+| `common/chat-auto-parser-helpers.h/cpp` | Shared helper functions |
+| `common/chat-peg-parser.h/cpp` | Unified builder and mapper classes |
+| `common/chat.cpp` | Main entry point and wire-up |
+
+## Algorithm Details
+
+### Phase 1: Content & Reasoning Analysis
+
+#### Reasoning Detection (4 Methods)
+
+**Method 1: Differential Reasoning Content Analysis**
+
+- Render template with `reasoning_content` field present vs absent
+- Compare outputs to find markers between reasoning and content
+- If only closing tag found, derive opening tag using patterns:
+  - XML: `</tag>` → `<tag>`
+  - Special tokens: `<|END_X|>` → `<|START_X|>`, `<|/X|>` → `<|X|>`
+- Handles various tag formats including XML and special token formats
+
+**Method 2: Enable-Thinking Toggle Analysis**
+
+- Toggle `enable_thinking` context variable between true/false
+- Detects differences in generated prompts
+- Handles two scenarios:
+  - **Normal case**: enable_thinking=true adds reasoning markers
+  - **Reverse case**: enable_thinking=false adds empty thinking block (GLM-4.6 style)
+- Uses string difference analysis to extract markers
+- Validates extracted tags against blacklist of role markers
+
+**Method 3: Prompt Ending Analysis**
+
+- Checks if prompt ends with unclosed reasoning tag
+- Looks for trailing tags in prompt with `enable_thinking=true`
+- Differentiates between open tags (`<think>`) and close tags (`</think>`)
+- Handles blacklisted tags (role markers, system tokens)
+- Validates reasoning-like patterns (contains "think", "reason", "thought")
+
+**Method 4: Adjacent Tag Pair Detection**
+
+- Looks for patterns like `<minimax:tool_call></think>`, `<|START_THINKING|><|END_THINKING|>`, `[think][/think]`
+- Searches for predefined tag patterns in prompt
+- Validates tags are adjacent with only whitespace between
+- Supports both simple and complex token formats
+
+#### Content Detection Algorithm
+
+1. **Dual-Mode Rendering**: Render template with content marker in both thinking-enabled and thinking-disabled modes
+2. **Pattern Matching**: Search for known content wrapper patterns:
+   - `<|START_RESPONSE|>` / `<|END_RESPONSE|>`
+   - `<response>` / `</response>`
+   - `<output>` / `</output>`
+   - `<answer>` / `</answer>`
+   - `<|CHATBOT_TOKEN|>` / `<|END_OF_TURN_TOKEN|>`
+3. **Mode Classification**:
+   - `CONTENT_ALWAYS_WRAPPED`: Found in both thinking modes
+   - `CONTENT_WRAPPED_WITH_REASONING`: Found only with thinking enabled
+   - `CONTENT_PLAIN`: No wrapping detected
+
+#### Reasoning Mode Detection
+
+- **REASONING_FORCED_OPEN**:
+  - **Explicit**: Prompt ends with reasoning start marker (e.g., `<think>`).
+  - **Implicit**: reasoning end marker is present but start marker is empty (e.g., `[BEGIN FINAL RESPONSE]`).
+- **REASONING_OPTIONAL**: Markers present but not forced.
+- **REASONING_NONE**: No markers detected.
+
+### Phase 2: Tool Call Structure Analysis
+
+#### Pure Differential Analysis Algorithm
+
+**Key Principle**: All patterns are extracted through template comparison. The **only heuristic** is detecting JSON vs marker-based structures (via JSON parse attempt). No hardcoded pattern lists.
+
+**Comparison Matrix**:
+
+| Comparison | Purpose | What's Extracted |
+|------------|---------|------------------|
+| **T1**: No tools vs tools | Tool section markers | `tool_section_start`, `tool_section_end` |
+| **T2**: 1 call vs 2 calls | Call separators | `per_call_start`, `call_separator` |
+| **T3**: func_alpha vs func_beta | Function boundaries | `func_name_prefix`, `func_name_suffix` |
+| **T4**: 1 arg vs 2 args | Argument separator | `arg_separator` |
+| **T5**: No args vs args | Args container | `args_start`, `args_end` |
+| **A1**: key1 vs key2 | Arg name boundaries | `arg_name_prefix`, `arg_name_suffix` |
+| **A2**: value A vs B | Arg value boundaries | `arg_value_prefix`, `arg_value_suffix` |
+| **A3**: number vs string | Quoting behavior | Value type handling |
+
+**Structural Extraction Helpers**:
+
+```cpp
+// Extract last structural marker from string (finds last <, [, {, or ")
+std::string extract_structural_suffix(const std::string & str);
+
+// Extract first structural marker from string (finds first >, ], }, or ")
+std::string extract_structural_prefix(const std::string & str);
+
+// The only heuristic: detect if content is valid JSON
+bool is_json_based(const std::string & content);
+```
+
+**Pattern Extraction Process** (Example - T1: Tool Section Markers):
+
+1. Render template with/without tool calls
+2. Compute diff: `calculate_diff_split(output_no_tools, output_with_tools)`
+3. Use controlled function name (`func_alpha`) as anchor in `diff.right`
+4. Extract structural prefix before function name → `tool_section_start`
+5. Extract structural suffix after tool content → `tool_section_end`
+
+**No Pattern Lists**: Unlike the old approach, there are no hardcoded lists like `["<tool_call>", "[TOOL_CALLS]", ...]`. All markers are discovered through differential comparison.
+
+#### Variant Detection Logic
+
+Instead of forcing patterns into enum types, the analyzer detects **variant types** as strings that describe the structural characteristics:
+
+**Variant Types**:
+
+- `"json-native"`: Pure JSON tool calls (Llama, Mistral Nemo)
+- `"tagged-json"`: Function name in markers, args in JSON (Functionary v3.1, Nemotron)
+- `"tagged-args"`: Full XML-style with tagged arguments (Qwen, Hermes, MiniMax)
+- `"bracket-tag"`: Bracket markers (Mistral Small 3.2: `[TOOL_CALLS]func[ARGS]{...}`)
+- `"recipient-based"`: Recipient routing (Functionary v3.2: `>>>func_name`)
+- `"markdown-block"`: Markdown code blocks (Cohere Command-R Plus)
+- `"prefixed-indexed"`: Namespace prefix with indices (Kimi-K2: `functions.name:0`)
+
+**Detection Strategy** (from most to least distinctive):
+
+```cpp
+void detect_tool_variant(diff_analysis_result & result) {
+    // 1. Check for unique markers (most distinctive)
+    if (!result.markers.id_marker.empty())
+        → "bracket-tag"
+
+    if (markers contain ">>>")
+        → "recipient-based"
+
+    if (code_block_marker present)
+        → "markdown-block"
+
+    if (function_namespace or suffix contains ':')
+        → "prefixed-indexed"
+
+    // 2. Check argument structure (JSON variants)
+    if (arg_name_prefix starts with '<')
+        → "tagged-args"
+
+    if (func_name_prefix starts with '<')
+        → "tagged-json"
+
+    // 3. Default
+    → "json-native"
+}
+```
+
+#### Compositional Parser Building
+
+The analyzer builds separate, composable parsers for each component:
+
+**Reasoning Parser**:
+
+- Built from `reasoning_start` and `reasoning_end` markers
+- Supports tag-based, delimiter, and forced-open modes
+
+**Content Parser**:
+
+- Built from `content_start` and `content_end` markers
+- Supports plain, always-wrapped, and conditionally-wrapped modes
+
+**Tool Parser** (variant-specific):
+
+- Built based on `variant_type` detection
+- Each variant has its own builder that uses the extracted markers
+- No enum forcing - structure preserved as discovered
+
+**Final Composition**:
+
+```cpp
+sequence({
+    reasoning_parser,
+    space(),
+    content_parser,
+    space(),
+    tool_parser,
+    end()
+})
+```
+
+### Generator Algorithms
+
+#### Unified Parser Building
+
+**Composition Strategy**:
+
+```cpp
+// Standard format
+sequence({ reasoning, space(), content, space(), tools, space(), content, end() })
+
+// With section markers
+sequence({ reasoning, space(), content_until(section_start), space(), tools, space(), content, end() })
+
+// Forced thinking handling
+optional(reasoning) when thinking_forced_open && tools present
+```
+
+**Trigger Word Detection**:
+
+- Uses `tool_section_start` as primary trigger
+- Falls back to `function_prefix` or `per_call_start`
+- Raw JSON uses regex pattern trigger
+
+**Lazy Grammar Optimization**:
+
+- Enabled by default for performance
+- Disabled when thinking forced open
+- Disabled when no clear trigger word exists
+
+## Testing & Debugging
+
+### Comprehensive Test Coverage
+
+The test suite covers:
+
+**Reasoning Models**:
+
+- Qwen-QwQ-32B (forced-open thinking)
+- DeepSeek R1 variants (reasoning only)
+- IBM Granite (reasoning + tools)
+- ByteDance Seed-OSS (custom reasoning tags)
+- Ministral-3-14B-Reasoning
+- llama-cpp-deepseek-r1
+
+**Tool Call Formats**:
+
+- JSON_NATIVE: Llama 3.x, Mistral Nemo, Hermes, MiMo-VL
+- TAG_WITH_JSON: Nemotron, Qwen3-Coder, MiniMax
+- TAG_WITH_TAGGED: Qwen, Hermes (XML), ByteDance Seed-OSS
+- BRACKET_TAG: Mistral Small 3.2, Devstral
+- PREFIXED_INDEXED: Kimi-K2 variants
+- RECIPIENT_BASED: Functionary v3.2
+- MARKDOWN_BLOCK: Cohere Command-R Plus
+
+**Edge Cases**:
+
+- Streaming/partial parsing
+- Empty content with tools
+- Parallel tool calls
+- Forced thinking mode
+- Multi-byte Unicode markers
+- Null content handling
+- Multi-line code in tool arguments
+- Custom reasoning tags (ByteDance Seed-OSS)
+
+### Debug Tools
+
+**Template Debugger**: `tests/debug-template-parser.cpp`
+
+- Usage: `./bin/debug-template-parser path/to/template.jinja`
+- Shows detected format, markers, generated parser, and GBNF grammar
+
+**Debug Logging**: Enable with `LLAMA_LOG_VERBOSITY=2`
+
+- Shows detailed analysis steps
+- Displays pattern extraction results
+- Lists generated parser structure
+
+**PEG Test Builder**: Fluent API for creating test cases
+
+```cpp
+auto tst = peg_tester("template.jinja");
+tst.test("input")
+   .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+   .tools({tool})
+   .expect(expected_message)
+   .run();
+```
+
+## Adding Support for New Templates
+
+To support a new template format:
+
+1. **If it follows standard patterns** - The auto-parser should detect it automatically using the three main formats (JSON_NATIVE, TAG_WITH_JSON, TAG_WITH_TAGGED)
+2. **If it has unique markers** - Add differential analysis patterns in:
+   - `compare_reasoning_presence()` for reasoning tags
+   - `compare_content_values()` for content wrappers
+   - `extract_tool_section()` for tool call patterns
+3. **If it needs special handling** - Add a dedicated handler in `chat.cpp` before the auto-parser block
+
+## Edge Cases and Quirks
+
+1. **Forced Thinking**: If `enable_thinking` is true but the model has already started a thought block (e.g., ended the prompt with `<think>`), the parser enters "forced thinking" mode where it immediately expects reasoning content.
+2. **Ambiguous Content**: Templates that mix content and tool calls without clear delimiters can be tricky. The analyzer tries to find "common" start/end patterns across multiple examples to be robust.
+3. **Double Wrapping**: Some templates (e.g., Functionary) use the same string for both the tool section start and the function prefix (e.g., `<function=`). The analyzer detects this overlap and prevents double-wrapping in the generated parser.
+4. **Null Content Rendering**: Some templates render `null` content as Python "None" string. The analyzer detects this and patches content to empty string.
+5. **Multi-byte Unicode Markers**: Some templates use special Unicode characters in markers that require careful handling in GBNF generation.
+
+## State of the Autoparser (Jan 2026)
+
+As of January 2026, the unified auto-parser successfully handles major template families including DeepSeek V3/R1, Llama 3.x (native JSON), GLM-4/4.6, and standard XML/JSON formats. It also supports Functionary v3.1/v3.2, Mistral variants, and specialized formats like Kimi-K2's prefixed-indexed structure.
+
+### Tested Templates
+
+The following templates have active tests in `tests/test-chat.cpp`:
+
+| Template | Format | Notes |
+|----------|--------|-------|
+| DeepSeek V3.1 | `JSON_NATIVE` | Forced thinking mode |
+| DeepSeek R1 Distill (Llama/Qwen) | Reasoning only | Forced-open thinking |
+| llama-cpp-deepseek-r1 | Reasoning only | Forced-open thinking |
+| GLM-4.6 | `TAGGED` | `<tool_call>name\n<arg_key>...<arg_value>...` format |
+| Kimi-K2 / Kimi-K2-Instruct / Kimi-K2-Thinking | `PREFIXED_INDEXED` | `functions.name:0` with special markers |
+| Apertus-8B-Instruct | `NAME_AS_KEY` | `{"function_name": {...}}` format |
+| MiniMax-M2 | `TAG_WITH_JSON` | XML invoke with parameter tags |
+| NVIDIA-Nemotron-Nano-v2 | `JSON_NATIVE` | `<TOOLCALL>` wrapper (nested) |
+| Mistral-Nemo-Instruct-2407 | `JSON_NATIVE` | `[TOOL_CALLS]` wrapper with id field |
+| Functionary v3.1 | `TAG_WITH_JSON` | `<function=X>` non-nested format |
+| Functionary v3.2 | `RECIPIENT_BASED` | `>>>` recipient delimiter format |
+| MiMo-VL / Hermes 3 / Qwen 2.5 | `JSON_NATIVE` | `<tool_call>` wrapper |
+| Apriel 1.5 | `JSON_NATIVE` | `<tool_calls>` wrapper with JSON array |
+| Apriel 1.6 Thinker | Reasoning only | Implicit reasoning start |
+| Cohere Command-R7B | `JSON_NATIVE` | START_RESPONSE/ACTION/THINKING markers |
+| Mistral Small 3.2 | `BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` with ID |
+| Devstral | `BRACKET_TAG` | `[TOOL_CALLS]func[ARGS]{...}` without ID |
+| Ministral-3-14B-Reasoning | Custom reasoning | `[THINK]...[/THINK]` tags |
+| IBM Granite | `JSON_NATIVE` | `<think></think>` + `<response></response>` |
+| ByteDance Seed-OSS | `TAG_WITH_TAGGED` | Custom `<seed:think>` and `<seed:tool_call>` tags |
+| Qwen3-Coder | `TAG_WITH_TAGGED` | XML-style tool format |
+| Cohere Command-R Plus | `MARKDOWN_BLOCK` | `Action:\n`\`\`\`json\n[...]\n`\`\`` format |
+
+### Currently Unsupported Templates
+
+| Template Family | Model / Variant | Issue Description |
+|-----------------|-----------------|-------------------|
+| **OpenAI** | `GPT-OSS` | Complex channel markers need new format |
+
+### Templates Without Tool Support
+
+Some templates genuinely don't support tool calls (this is not a detection bug):
+
+- **Phi 3.5 Mini** - The official template has no tool handling. Use Phi-4-mini-instruct for function calling, or community fine-tuned versions.
+- **Google Gemma 2 2B** - Pure instruction-following model without tool capabilities.
+
+### TODO / Roadmap
+
+- [ ] **Fix OpenAI GPT-OSS**: Add handling for channel marker structure.
+- [x] **~~Fix Cohere Command-R Plus~~**: Added `MARKDOWN_BLOCK` format for `Action:\n`\`\`\`json` structure.
+
+### Recent Additions (Dec 2025 - Jan 2026)
+
+- **RECIPIENT_BASED**: Support for Functionary v3.2's `>>>` recipient delimiter format
+- **BRACKET_TAG**: Support for Mistral Small 3.2 and Devstral's `[TOOL_CALLS]...` format
+- **Enhanced Content Detection**: Better handling of custom reasoning tags and content wrappers
+- **Improved Streaming Support**: Better handling of partial parsing for all supported formats
+- **Custom Tag Support**: Support for non-standard reasoning tags like `<seed:think>` (ByteDance)
+- **Multi-line Tool Arguments**: Better parsing of complex tool arguments with code blocks
+- **MARKDOWN_BLOCK**: Support for Cohere Command-R Plus markdown code block format
+- **Implicit Reasoning Support**: Support for templates where reasoning starts implicitly without a start marker.
+- **Pure Differential Refactoring (Jan 2026)**: Complete refactoring to eliminate hardcoded patterns:
+  - Removed all hardcoded pattern lists (previously had `["<tool_call>", "[TOOL_CALLS]", ...]`)
+  - Added structural extraction helpers (`extract_structural_suffix`, `extract_structural_prefix`)
+  - Replaced enum-based classification with string-based variant types
+  - Only remaining heuristic: JSON detection via parse attempt
+  - All markers now discovered through differential template comparison
+- **Three Primary Tool Formats**: Consolidated tool calling formats to JSON_NATIVE, TAG_WITH_JSON, and TAG_WITH_TAGGED for clarity and maintainability
+
+The auto-parser now successfully handles 25+ different template formats across reasoning-only, tool-calling, and hybrid models, with comprehensive test coverage ensuring robust parsing across streaming and non-streaming scenarios.
diff --git a/docs/development/parsing.md b/docs/development/parsing.md
index dbb989bf08e..e627ea65025 100644
--- a/docs/development/parsing.md
+++ b/docs/development/parsing.md
@@ -22,7 +22,7 @@ Below is a contrived example demonstrating how to use the PEG parser to parse
 output from a model that emits arguments as JSON.
 
 ```cpp
-auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
+auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
     // Build a choice of all available tools
     auto tool_choice = p.choice();
     for (const auto & tool : tools) {
@@ -212,7 +212,7 @@ mapper.from_ast(ctx.ast, result);
 
 ### Native
 
-The `common_chat_peg_native_builder` builds a `native` parser suitable for
+The `common_chat_peg_unified_builder` builds a `native` parser suitable for
 models that emit tool arguments as a direct JSON object.
 
 - **`reasoning(p)`** - Tag node for `reasoning_content`
@@ -225,7 +225,7 @@ models that emit tool arguments as a direct JSON object.
 - **`tool_args(p)`** - Tag the tool arguments
 
 ```cpp
-build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) {
+build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
     auto get_weather_tool = p.tool(p.sequence({
         p.tool_open(p.literal("{")),
         p.json_member("name", "\"" + p.tool_name(p.literal("get_weather")) + "\""),
@@ -246,7 +246,7 @@ build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) {
 
 ### Constructed
 
-The `common_chat_peg_constructed_builder` builds a `constructed` parser
+The `common_chat_peg_unified_builder` builds a `constructed` parser
 suitable for models that emit tool arguments as separate entities, such as XML
 tags.
 
@@ -264,7 +264,7 @@ tags.
 - **`tool_arg_json_value(p)`** - Tag JSON value for the argument
 
 ```cpp
-build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
+build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
     auto location_arg = p.tool_arg(
         p.tool_arg_open("<parameter name=\"" + p.tool_arg_name(p.literal("location")) + "\">"),
         p.tool_arg_string_value(p.until("</parameter>")),
diff --git a/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja
new file mode 100755
index 00000000000..9df29255b7d
--- /dev/null
+++ b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja
@@ -0,0 +1,173 @@
+{# ---------------------------------------------------------------------- #}
+{# ƛƬ Default setup and flags                                             #}
+{# ---------------------------------------------------------------------- #}
+{# FIX: Use "is defined" check BEFORE accessing the variable              #}
+{%- set messages = messages if (messages is defined and messages) else [] -%}
+{%- set tools = tools if (tools is defined and tools) else [] -%}
+{%- set add_generation_prompt = add_generation_prompt if (add_generation_prompt is defined) else false -%}
+{%- set available_tool_string = '' -%}
+{%- set add_tool_id = true -%}
+{%- set add_thoughts = true -%}            {# whether to include <thinking> reasoning blocks #}
+{%- set add_generation_prompt = true -%}      {# whether to emit reasoning starter before assistant response #}
+{# Optional token placeholders (safe defaults) #}
+{%- set bos_token = bos_token if (bos_token is defined) else '' -%}
+{%- set eos_token = eos_token if (eos_token is defined) else '' -%}
+{# ---------------------------------------------------------------------- #}
+{# Core reasoning prompt and assistant reasoning prefix                 #}
+{# ---------------------------------------------------------------------- #}
+{%- set reasoning_prompt -%}
+    You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab.
+    Analyze each question carefully, present your reasoning step-by-step, then provide the final
+    response after the marker [BEGIN FINAL RESPONSE].
+{%- endset -%}
+{%- set reasoning_asst_turn_start = 'Here are my reasoning steps:\n' -%}
+{# ---------------------------------------------------------------------- #}
+{# Tool list and tool call output format                                  #}
+{# ---------------------------------------------------------------------- #}
+{%- if tools|length > 0 -%}
+    {%- set available_tool_string -%}
+        You are provided with function signatures within <available_tools></available_tools> XML tags.
+        You may call one or more functions to assist with the user query.
+        Don't make assumptions about the arguments. You should infer the argument values from previous
+        user responses and the system message.
+        Here are the available tools: 
+        <available_tools>
+        {% for tool in tools %}{{ tool|string }}{% endfor %}
+        
+        </available_tools>.
+
+        Return all function calls as a list of JSON objects within <tool_calls></tool_calls> XML tags.
+        Each JSON object should contain a function name and arguments as follows:
+        <tool_calls>[
+            {"name": <function-name-1>, "arguments": <args-dict-1>},
+            {"name": <function-name-2>, "arguments": <args-dict-2>},
+            ...
+        ]</tool_calls>
+    {%- endset -%}
+{%- endif -%}
+{# ---------------------------------------------------------------------- #}
+{# Start system block if first message is not system                      #}
+{# ---------------------------------------------------------------------- #}
+{%- if messages|length > 0 and messages[0]['role'] != 'system' -%}
+    {%- if tools|length > 0 -%}
+        {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + available_tool_string + '\n' }}
+    {%- else -%}
+        {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' }}
+    {%- endif -%}
+{%- endif -%}
+{# ---------------------------------------------------------------------- #}
+{# Iterate through messages                                             #}
+{# ---------------------------------------------------------------------- #}
+{%- for message in messages -%}
+
+    {# ---------------- USER MESSAGE ---------------- #}
+    {%- if message['role'] == 'user' -%}
+        {{ '<|begin_user|>\n' }}
+        {%- if message['content'] is not string -%}
+            {%- for chunk in message['content'] -%}
+                {%- if chunk['type'] == 'text' -%}
+                    {{ chunk['text'] }}
+                {%- elif chunk['type'] in ['image', 'image_url'] -%}
+                    {{ '[IMG]' }}
+                {%- else -%}
+                    {{ raise_exception('Unrecognized content type!') }}
+                {%- endif -%}
+            {%- endfor -%}
+        {%- else -%}
+            {{ message['content'] }}
+        {%- endif -%}
+
+    {# ---------------- SYSTEM MESSAGE ---------------- #}
+    {%- elif message['role'] == 'system' -%}
+        {%- set sys_content = message.get('content', '') -%}
+        {%- if sys_content and sys_content|length > 0 -%}
+            {%- if sys_content is string -%}
+                {%- set system_message = sys_content -%}
+            {%- else -%}
+                {%- set system_message = sys_content[0]['text'] -%}
+            {%- endif -%}
+        {%- else -%}
+            {%- set system_message = '' -%}
+        {%- endif -%}
+
+        {%- if tools|length > 0 -%}
+            {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' + available_tool_string + '\n' }}
+        {%- else -%}
+            {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' }}
+        {%- endif -%}
+
+    {# ---------------- ASSISTANT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'assistant' -%}
+        {%- if loop.last -%}
+            {%- set add_tool_id = false -%}
+        {%- endif -%}
+
+        {{ '\n<|begin_assistant|>\n' }}
+
+        {%- if add_thoughts and message.get('reasoning_content') and loop.last -%}
+            {{ message['reasoning_content'] + '\n[BEGIN FINAL RESPONSE]\n' }}
+        {%- endif -%}
+
+        {%- set asst_content = message.get('content', '') -%}
+        {%- if asst_content and asst_content|length > 0 -%}
+            {%- if asst_content is not string -%}
+                {%- set asst_text = asst_content[0]['text'] -%}
+            {%- else -%}
+                {%- set asst_text = asst_content -%}
+            {%- endif -%}
+            {# For historical turns (not the last), strip reasoning and keep only final response #}
+            {%- if not loop.last and '[BEGIN FINAL RESPONSE]' in asst_text -%}
+                {{- asst_text.split('[BEGIN FINAL RESPONSE]')[-1] | trim -}}
+            {%- else -%}
+                {{- asst_text -}}
+            {%- endif -%}
+        {%- elif message.get('chosen') and message['chosen']|length > 0 -%}
+            {{ message['chosen'][0] }}
+        {%- endif -%}
+
+        {# Tool call output #}
+        {%- set tool_calls = message.get('tool_calls', []) -%}
+        {%- if tool_calls and tool_calls|length > 0 -%}
+            {{ '\n<tool_calls>[' }}
+            {%- for tool_call in tool_calls -%}
+                {{ '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string }}
+                {%- if add_tool_id == true and 'id' in tool_call -%}
+                    {{ ', "id": "' + tool_call['id'] + '"' }}
+                {%- endif -%}
+                {{ '}' }}
+                {%- if not loop.last -%}{{ ', ' }}{%- endif -%}
+            {%- endfor -%}
+            {{ ']</tool_calls>' }}
+        {%- endif -%}
+
+        {%- set training_prompt = training_prompt if (training_prompt is defined) else false -%}
+        {%- if not loop.last or training_prompt -%}
+            {{ '\n<|end|>\n' }}
+        {%- endif -%}
+
+    {# ---------------- TOOL RESULT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'tool' -%}
+        {%- set tool_content = message.get('content', '') -%}
+        {%- if tool_content is string -%}
+            {%- set tool_message = tool_content -%}
+        {%- else -%}
+            {%- set tool_message = tool_content[0]['text'] if tool_content else '' -%}
+        {%- endif -%}
+        {{ '<|begin_tool_result|>\n' + tool_message|string + '\n' }}
+
+    {# ---------------- CONTENT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'content' -%}
+        {%- set msg_content = message.get('content', '') -%}
+        {%- if msg_content is not string -%}
+            {{ '<|begin_content|>\n' + msg_content[0]['text'] + '\n' }}
+        {%- else -%}
+            {{ '<|begin_content|>\n' + msg_content + '\n' }}
+        {%- endif -%}
+    {%- endif -%}
+
+    {# ---------------- REASONING PROMPT BEFORE NEXT ASSISTANT ---------------- #}
+    {%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%}
+        {{ '\n<|begin_assistant|>\n' + reasoning_asst_turn_start }}
+    {%- endif -%} 
+
+{%- endfor -%}
diff --git a/models/templates/Bielik-11B-v3.0-Instruct.jinja b/models/templates/Bielik-11B-v3.0-Instruct.jinja
new file mode 100644
index 00000000000..40ef50076eb
--- /dev/null
+++ b/models/templates/Bielik-11B-v3.0-Instruct.jinja
@@ -0,0 +1,77 @@
+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content'] %}
+    {%- set loop_start_index = 1 %}
+{%- else %}
+    {%- set system_message = "" %}
+    {%- set loop_start_index = 0 %}
+{%- endif %}
+
+{%- if system_message or tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if system_message %}
+        {{- system_message }}
+    {%- endif %}
+    {%- if tools %}
+        {{- '\n\nMasz dostęp do następujących narzędzi. Definicje narzędzi znajdują się poniżej wewnątrz znaczników <|function_list|>:\n<|function_list|>\n' }}
+        {{- '[' }}
+        {%- for tool in tools %}
+            {{- tool | tojson }}
+            {%- if not loop.last %}
+                {{- ',\n' }}
+            {%- endif %}
+        {%- endfor %}
+        {{- ']\n<|function_list|>\n\nAby wywołać narzędzie, użyj formatu <tool_call>{"name": "nazwa_narzędzia", "arguments": {"argument": "wartość"}}</tool_call>. Wyniki działania narzędzi zostaną przekazane z markerem <|function_output|>.\n' }}
+    {%- endif %}
+    {%- if enable_thinking %}
+        {{- '\n\nZanim odpowiesz na pytanie, najpierw przemyśl swoje kroki i umieść swoje myśli wewnątrz tagów <think>...</think>. Musisz najpierw pomyśleć, zanim udzielisz odpowiedzi. ' -}}
+        {{- 'WAŻNE: Powinieneś myśleć w tym samym języku, co pytanie użytkownika. Jeśli pytanie jest zadane po polsku, powinieneś również myśleć po polsku. Jeśli pytanie jest po angielsku, myślisz również po angielsku itd. ' }}
+        {{- '** PAMIĘTAJ! ** Pytanie po polsku -> myślenie po polsku -> odpowiedź po polsku!' -}}
+        {{- '*** BARDZO WAŻNE!!! *** Jesteś Bielikiem, polskim modelem językowym. Twoją główną cechą jest umiejętność pisania po polsku. Jeśli użytkownik zadaje Ci pytania po polsku, ZAWSZE odpowiadaj po polsku. ' -}}
+        {{- 'Nawet, jeśli korzystasz z narzędzia, którego większość instrukcji jest po angielsku, powinieneś przede wszystkim odpowiadać po polsku, jeśli użytkownik zadaje pytanie w tym języku. ' -}}
+    {%- endif %}
+    {{- '<|im_end|>\n' }}
+{%- endif %}
+
+{%- for message in messages[loop_start_index:] %}
+    {%- if message['role'] == 'user' %}
+        {{- '<|im_start|>user\n' + message['content'] + '<|im_end|>\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '<|im_start|>assistant\n' }}
+        {%- set content = message.content | default('') %}
+        {%- set reasoning_content = message.reasoning_content | default('') %}
+        {%- if not reasoning_content and '<think>' in content and '</think>' in content %}
+            {%- set reasoning_parts = content.split('</think>') %}
+            {%- set reasoning_content = reasoning_parts[0].split('<think>')[-1] %}
+            {%- set content = reasoning_parts[1:] | join('</think>') %}
+        {%- endif %}
+        {%- if reasoning_content %}
+            {{- '<think>\n' + reasoning_content.strip() + '\n</think>\n' }}
+        {%- endif %}
+        {{- content.lstrip() }}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '\n<tool_call>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message['role'] == 'tool' %}
+        {%- if loop.index0 == 0 or messages[loop.index0 - 1]['role'] != 'tool' %}
+            {{- '<|im_start|>user\n' }}
+        {%- endif %}
+        {{- '<|function_output|>' + message['content'] }}
+        {%- if loop.last or messages[loop.index0 + 1]['role'] != 'tool' %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking %}
+        {{- '<think>\n' }}
+    {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
index 078e9f5458e..e144cfcf69c 100644
--- a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
+++ b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
@@ -132,7 +132,7 @@ The following instructions take precedence over instructions in the default prea
     {%- elif message.role|lower == 'user' %}
 <|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %}
     {%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %}
-<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[
+<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.reasoning_content}}<|END_THINKING|><|START_ACTION|>[
     {% for tc in message.tool_calls %}
     {"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %}
 
diff --git a/models/templates/GLM-4.7-Flash.jinja b/models/templates/GLM-4.7-Flash.jinja
new file mode 100644
index 00000000000..2ab98ef068d
--- /dev/null
+++ b/models/templates/GLM-4.7-Flash.jinja
@@ -0,0 +1,86 @@
+[gMASK]<sop>
+{%- if tools -%}
+<|system|>
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{% for tool in tools %}
+{{ tool | tojson(ensure_ascii=False) }}
+{% endfor %}
+</tools>
+
+For each function call, output the function name and arguments within the following XML format:
+<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- set ns = namespace(last_user_index=-1) %}
+{%- for m in messages %}
+    {%- if m.role == 'user' %}
+        {% set ns.last_user_index = loop.index0 -%}
+    {%- endif %}
+{%- endfor %}
+{% for m in messages %}
+{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
+{%- elif m.role == 'assistant' -%}
+<|assistant|>
+{%- set reasoning_content = '' %}
+{%- set content = visible_text(m.content) %}
+{%- if m.reasoning_content is string %}
+    {%- set reasoning_content = m.reasoning_content %}
+{%- else %}
+    {%- if '</think>' in content %}
+        {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+        {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+    {%- endif %}
+{%- endif %}
+{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}
+{{ '<think>' + reasoning_content.strip() +  '</think>'}}
+{%- else -%}
+{{ '</think>' }}
+{%- endif -%}
+{%- if content.strip() -%}
+{{ content.strip() }}
+{%- endif -%}
+{% if m.tool_calls %}
+{% for tc in m.tool_calls %}
+{%- if tc.function %}
+    {%- set tc = tc.function %}
+{%- endif %}
+{{- '<tool_call>' + tc.name -}}
+{% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}
+{% endif %}
+{%- elif m.role == 'tool' -%}
+{%- if m.content is string -%}
+{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+    {{- '<|observation|>' }}
+{%- endif %}
+{{- '<tool_response>' }}
+{{- m.content }}
+{{- '</tool_response>' }}
+{%- else -%}
+<|observation|>{% for tr in m.content %}
+<tool_response>{{ tr.output if tr.output is defined else tr }}</tool_response>{% endfor -%}
+{% endif -%}
+{%- elif m.role == 'system' -%}
+<|system|>{{ visible_text(m.content) }}
+{%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    <|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}
+{%- endif -%}
\ No newline at end of file
diff --git a/models/templates/LFM2-8B-A1B.jinja b/models/templates/LFM2-8B-A1B.jinja
new file mode 100644
index 00000000000..3738b3d145b
--- /dev/null
+++ b/models/templates/LFM2-8B-A1B.jinja
@@ -0,0 +1,47 @@
+{{- bos_token -}}
+{%- set system_prompt = "" -%}
+{%- set ns = namespace(system_prompt="") -%}
+{%- if messages[0]["role"] == "system" -%}
+	{%- set ns.system_prompt = messages[0]["content"] -%}
+	{%- set messages = messages[1:] -%}
+{%- endif -%}
+{%- if tools -%}
+	{%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "You can use the following tools: <|tool_list_start|>[" -%}
+	{%- for tool in tools -%}
+		{%- if tool is not string -%}
+			{%- set tool = tool | tojson -%}
+		{%- endif -%}
+		{%- set ns.system_prompt = ns.system_prompt + tool -%}
+		{%- if not loop.last -%}
+			{%- set ns.system_prompt = ns.system_prompt + ", " -%}
+		{%- endif -%}
+	{%- endfor -%}
+	{%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}
+	{{- '**IMPORTANT**: The syntax for calling the tools is: <|tool_call_start|>JSON tool call goes here<|tool_call_end|>. Please only call tools in the specified manner.' -}}
+{%- endif -%}
+{%- if ns.system_prompt -%}
+	{{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
+{%- endif -%}
+{%- for message in messages -%}
+	{{- "<|im_start|>" + message["role"] + "\n" -}}
+	{%- set content = message["content"] -%}
+	{%- if content is not string -%}
+		{%- set content = content | tojson -%}
+	{%- endif -%}
+	{%- if message["role"] == "tool" -%}
+		{%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%}
+	{%- elif message["role"] == "assistant" -%}
+		{%- if message.tool_calls %}
+			{%- for tool_call in message.tool_calls %}
+				{%- if tool_call.function %}
+					{%- set tool_call = tool_call.function %}
+				{%- endif %}
+				{{- '\n<|tool_call_start|>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n<|tool_call_end|>\n' }}
+			{%- endfor %}
+		{%- endif %}
+	{%- endif -%}
+	{{- content + "<|im_end|>\n" -}}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+	{{- "<|im_start|>assistant\n" -}}
+{%- endif -%}
diff --git a/models/templates/Qwen3-Coder.jinja b/models/templates/Qwen3-Coder.jinja
index 49b0e8d0ee7..cde8c0e43db 100644
--- a/models/templates/Qwen3-Coder.jinja
+++ b/models/templates/Qwen3-Coder.jinja
@@ -29,7 +29,7 @@
     {%- endif %}
 {%- endif %}
 {%- if tools is iterable and tools | length > 0 %}
-    {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
+    {{- "\n\n# Tools\n\nYou have access to the following tools:\n\n" }}
     {{- "<tools>" }}
     {%- for tool in tools %}
         {%- if tool.function is defined %}
@@ -63,7 +63,7 @@
         {{- '\n</function>' }}
     {%- endfor %}
     {{- "\n</tools>" }}
-    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+    {{- '\n\nIf you choose to call a tool ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nvalue_2\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening <tool_call> tag and end with a closing </tool_call> tag.\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
 {%- endif %}
 {%- if system_message is defined %}
     {{- '<|im_end|>\n' }}
diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
index c2066bd7391..299f7a7ff12 100644
--- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
@@ -1 +1,44 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\n'}}{% endif %}
\ No newline at end of file
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- set ns.system_prompt = message['content'] -%}
+  {%- endif -%}
+{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is none -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls']-%}
+      {%- if not ns.is_first -%}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+        {%- set ns.is_first = true -%}
+        {%- else -%}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+      {%- endif -%}
+    {%- endfor -%}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is not none -%}
+    {%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+      {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>')[-1] -%}
+      {%- endif -%}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_tool = true -%}
+    {%- if ns.is_output_first -%}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+      {%- set ns.is_output_first = false -%}
+      {%- else -%}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}
+{%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>'}}
+{%- endif -%}
+{%- if add_generation_prompt and not ns.is_tool -%}{{'<｜Assistant｜><think>\n'}}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
index c2066bd7391..0c8d81e107f 100644
--- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
@@ -1 +1,47 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\n'}}{% endif %}
\ No newline at end of file
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- set ns.system_prompt = message['content'] -%}
+  {%- endif -%}
+{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['tool_calls'] -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls']-%}
+      {%- if not ns.is_first -%}
+        {{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+        {%- set ns.is_first = true -%}
+      {%- else -%}
+        {{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+      {%- endif -%}
+    {%- endfor -%}
+    {{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is not none -%}
+    {%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+    {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>')[-1] -%}
+      {%- endif -%}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_tool = true -%}
+    {%- if ns.is_output_first -%}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+      {%- set ns.is_output_first = false -%}
+      {%- else -%}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}
+{%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>'}}
+{%- endif -%}
+{%- if add_generation_prompt and not ns.is_tool -%}{{'<｜Assistant｜><think>\n'}}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja
index e5656196a3f..6ef7fb123c6 100644
--- a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja
@@ -1,3 +1,71 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- if not thinking is defined -%}
+  {%- if enable_thinking is defined -%}
+    {%- set thinking = enable_thinking -%}
+    {%- else -%}
+    {%- set thinking = false -%}
+  {%- endif -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- if ns.is_first_sp -%}
+      {%- set ns.system_prompt = ns.system_prompt + message['content'] -%}
+      {%- set ns.is_first_sp = false -%}
+      {%- else -%}
+      {%- set ns.system_prompt = ns.system_prompt + '
 
-' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<｜Assistant｜></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- else %}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<｜Assistant｜>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}}  {%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<｜Assistant｜>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}
\ No newline at end of file
+' + message['content'] -%}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}{{ bos_token }}{{ ns.system_prompt }}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}
+    {%- set ns.is_first = false -%}
+    {%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none -%}
+    {%- if ns.is_last_user -%}{{'<｜Assistant｜></think>'}}
+    {%- endif -%}
+    {%- set ns.is_last_user = false -%}
+    {%- set ns.is_first = false -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls'] -%}
+      {%- if not ns.is_first -%}
+        {%- if message['content'] is none -%}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}
+          {%- else -%}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}
+        {%- endif -%}
+        {%- set ns.is_first = true -%}
+        {%- else -%}{{'<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}
+      {%- endif -%}
+    {%- endfor -%}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) -%}
+    {%- if ns.is_last_user -%}{{'<｜Assistant｜>'}}
+      {%- if message['prefix'] is defined and message['prefix'] and thinking -%}{{'<think>'}}
+        {%- else -%}{{'</think>'}}
+      {%- endif -%}
+    {%- endif -%}
+    {%- set ns.is_last_user = false -%}
+    {%- if ns.is_tool -%}{{message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+      {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>', 1)[1] -%}
+      {%- endif -%}{{content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_last_user = false -%}
+    {%- set ns.is_tool = true -%}{{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+  {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}{{'<｜Assistant｜>'}}
+  {%- if not thinking -%}{{'</think>'}}
+    {%- else -%}{{'<think>'}}
+  {%- endif -%}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/moonshotai-Kimi-K2.jinja b/models/templates/moonshotai-Kimi-K2.jinja
index ecb49a21085..e286d8a7b5b 100644
--- a/models/templates/moonshotai-Kimi-K2.jinja
+++ b/models/templates/moonshotai-Kimi-K2.jinja
@@ -1,43 +1,43 @@
-{%- if tools -%}
-  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
-{%- endif -%}
-{%- for message in messages -%}
-  {%- if loop.first and messages[0]['role'] != 'system' -%}
-    <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
-  {%- endif -%}
-  {%- if message['role'] == 'system' -%}
-    <|im_system|>system<|im_middle|>
-  {%- elif message['role'] == 'user' -%}
-    <|im_user|>user<|im_middle|>
-  {%- elif message['role'] == 'assistant' -%}
-    <|im_assistant|>assistant<|im_middle|>
-  {%- elif message['role'] == 'tool' -%}
-    <|im_system|>tool<|im_middle|>
-  {%- endif -%}
-  {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
-    {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
-    <|tool_calls_section_begin|>
-    {%- for tool_call in message['tool_calls'] -%}
-      {%- set func_name = tool_call['function']['name'] -%}
-      {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
-      <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
-    {%- endfor -%}
-    <|tool_calls_section_end|>
-  {%- elif message['role'] == 'tool' -%}
-    ## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
-  {%- elif message['content'] is string -%}
-    {{ message['content'] }}
-  {%- elif message['content'] is not none -%}
-    {% for content in message['content'] -%}
-      {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
-        <|media_start|>image<|media_content|><|media_pad|><|media_end|>
-      {% else -%}
-        {{ content['text'] }}
-      {%- endif -%}
-    {%- endfor -%}
-  {%- endif -%}
-  <|im_end|>
-{%- endfor -%}
-{%- if add_generation_prompt -%}
-  <|im_assistant|>assistant<|im_middle|>
-{%- endif -%}
+{%- if tools -%}
+  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
+{%- endif -%}
+{%- for message in messages -%}
+  {%- if loop.first and messages[0]['role'] != 'system' -%}
+    <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
+  {%- endif -%}
+  {%- if message['role'] == 'system' -%}
+    <|im_system|>system<|im_middle|>
+  {%- elif message['role'] == 'user' -%}
+    <|im_user|>user<|im_middle|>
+  {%- elif message['role'] == 'assistant' -%}
+    <|im_assistant|>assistant<|im_middle|>
+  {%- elif message['role'] == 'tool' -%}
+    <|im_system|>tool<|im_middle|>
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
+    {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
+    <|tool_calls_section_begin|>
+    {%- for tool_call in message['tool_calls'] -%}
+      {%- set func_name = tool_call['function']['name'] -%}
+      {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
+      <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
+    {%- endfor -%}
+    <|tool_calls_section_end|>
+  {%- elif message['role'] == 'tool' -%}
+    ## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
+  {%- elif message['content'] is string -%}
+    {{ message['content'] }}
+  {%- elif message['content'] is not none -%}
+    {% for content in message['content'] -%}
+      {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+        <|media_start|>image<|media_content|><|media_pad|><|media_end|>
+      {% else -%}
+        {{ content['text'] }}
+      {%- endif -%}
+    {%- endfor -%}
+  {%- endif -%}
+  <|im_end|>
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+  <|im_assistant|>assistant<|im_middle|>
+{%- endif -%}
diff --git a/models/templates/unsloth-Apriel-1.5.jinja b/models/templates/unsloth-Apriel-1.5.jinja
index 29e582fbf63..8e59d2f1d41 100644
--- a/models/templates/unsloth-Apriel-1.5.jinja
+++ b/models/templates/unsloth-Apriel-1.5.jinja
@@ -86,19 +86,19 @@ Prior to generating the function calls, you should generate the reasoning for wh
             {%- set add_tool_id = false -%}
         {%- endif -%}
         {{- '<|assistant|>\n' -}}
-        {%- if message['content'] is not none and message['content']|length > 0 -%}
+        {%- if message['content'] is defined and message['content'] is not none and message['content']|length > 0 -%}
             {%- if message['content'] is not string and message['content'][0]['text'] is not none %}
                 {{- message['content'][0]['text'] }}
             {%- else %}
                 {{- message['content'] -}}
             {%- endif -%}
-        {%- elif message['chosen'] is not none and message['chosen']|length > 0 -%}
+        {%- elif message['chosen'] is defined and message['chosen'] is not none and message['chosen']|length > 0 -%}
             {{- message['chosen'][0] -}}
         {%- endif -%}
         {%- if add_thoughts and 'thought' in message and message['thought'] is not none -%}
             {{- '<thinking>' + message['thought'] + '</thinking>' -}}
         {%- endif -%}
-        {%- if message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
+        {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
             {{- '\n<tool_calls>[' -}}
             {%- for tool_call in message["tool_calls"] -%}
                 {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string -}}
diff --git a/scripts/server-bench.py b/scripts/server-bench.py
index dbbb0939ffe..2ef72587123 100755
--- a/scripts/server-bench.py
+++ b/scripts/server-bench.py
@@ -230,7 +230,7 @@ def benchmark(
 
     logger.info("")
     logger.info(f"Benchmark duration:                {token_t_last:.2f} s")
-    logger.info(f"Request throughput:                {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last/60):.2f} requests/min")
+    logger.info(f"Request throughput:                {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last / 60):.2f} requests/min")
     logger.info(f"Total prompt length:               {np.sum(prompt_n)} tokens")
     logger.info(f"Average prompt length:             {np.mean(prompt_n):.2f} tokens")
     logger.info(f"Average prompt latency:            {1e3 * np.mean(prompt_t):.2f} ms")
diff --git a/scripts/server-test-model.py b/scripts/server-test-model.py
new file mode 100644
index 00000000000..9049d80279a
--- /dev/null
+++ b/scripts/server-test-model.py
@@ -0,0 +1,202 @@
+import argparse
+import json
+import requests
+import logging
+import sys
+
+handler = logging.StreamHandler(sys.stdout)
+handler.terminator = ""   # ← no newline
+logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[handler])
+logger = logging.getLogger("server-test-model")
+
+
+def run_query(url, messages, tools=None, stream=False, tool_choice=None):
+    payload = {
+        "messages": messages,
+        "stream": stream,
+        "max_tokens": 5000,
+    }
+    if tools:
+        payload["tools"] = tools
+    if tool_choice:
+        payload["tool_choice"] = tool_choice
+
+    try:
+        response = requests.post(url, json=payload, stream=stream)
+        response.raise_for_status()
+    except requests.exceptions.RequestException as e:
+        if e.response is not None:
+            logger.info(f"Response error: {e} for {e.response.content}\n")
+        else:
+            logger.info(f"Error connecting to server: {e}\n")
+        return None
+
+    full_content = ""
+    reasoning_content = ""
+    tool_calls = []
+
+    if stream:
+        logger.info(f"--- Streaming response (Tools: {bool(tools)}) ---\n")
+        for line in response.iter_lines():
+            if line:
+                decoded_line = line.decode("utf-8")
+                if decoded_line.startswith("data: "):
+                    data_str = decoded_line[6:]
+                    if data_str == "[DONE]":
+                        break
+                    try:
+                        data = json.loads(data_str)
+                        if "choices" in data and len(data["choices"]) > 0:
+                            delta = data["choices"][0].get("delta", {})
+
+                            # Content
+                            content_chunk = delta.get("content", "")
+                            if content_chunk:
+                                full_content += content_chunk
+                                logger.info(content_chunk)
+
+                            # Reasoning
+                            reasoning_chunk = delta.get("reasoning_content", "")
+                            if reasoning_chunk:
+                                reasoning_content += reasoning_chunk
+                                logger.info(f"\x1B[3m{reasoning_chunk}\x1B[0m")
+
+                            # Tool calls
+                            if "tool_calls" in delta:
+                                for tc in delta["tool_calls"]:
+                                    index = tc.get("index")
+                                    if index is not None:
+                                        while len(tool_calls) <= index:
+                                            # Using "function" as type default but could be flexible
+                                            tool_calls.append(
+                                                {
+                                                    "id": "",
+                                                    "type": "function",
+                                                    "function": {
+                                                        "name": "",
+                                                        "arguments": "",
+                                                    },
+                                                }
+                                            )
+
+                                        if "id" in tc:
+                                            tool_calls[index]["id"] += tc["id"]
+                                        if "function" in tc:
+                                            if "name" in tc["function"]:
+                                                tool_calls[index]["function"][
+                                                    "name"
+                                                ] += tc["function"]["name"]
+                                            if "arguments" in tc["function"]:
+                                                tool_calls[index]["function"][
+                                                    "arguments"
+                                                ] += tc["function"]["arguments"]
+
+                    except json.JSONDecodeError:
+                        logger.info(f"Failed to decode JSON: {data_str}\n")
+        logger.info("\n--- End of Stream ---\n")
+    else:
+        logger.info(f"--- Non-streaming response (Tools: {bool(tools)}) ---\n")
+        data = response.json()
+        if "choices" in data and len(data["choices"]) > 0:
+            message = data["choices"][0].get("message", {})
+            full_content = message.get("content", "")
+            reasoning_content = message.get("reasoning_content", "")
+            tool_calls = message.get("tool_calls", [])
+            logger.info(full_content)
+        logger.info("--- End of Response ---\n")
+
+    return {
+        "content": full_content,
+        "reasoning_content": reasoning_content,
+        "tool_calls": tool_calls,
+    }
+
+
+def test_chat(url, stream):
+    logger.info(f"\n=== Testing Chat (Stream={stream}) ===\n")
+    messages = [{"role": "user", "content": "What is the capital of France?"}]
+    result = run_query(url, messages, stream=stream)
+
+    if result:
+        if result["content"]:
+            logger.info("PASS: Output received.\n")
+        else:
+            logger.info("WARN: No content received (valid if strict tool call, but unexpected here).\n")
+
+        if result.get("reasoning_content"):
+            logger.info(f"INFO: Reasoning content detected ({len(result['reasoning_content'])} chars).\n")
+        else:
+            logger.info("INFO: No reasoning content detected (Standard model behavior).\n")
+    else:
+        logger.info("FAIL: No result.\n")
+
+
+def test_tool_call(url, stream):
+    logger.info(f"\n=== Testing Tool Call (Stream={stream}) ===\n")
+    messages = [
+        {
+            "role": "user",
+            "content": "What is the weather in London? Please use the get_weather tool.",
+        }
+    ]
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                    },
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+
+    result = run_query(url, messages, tools=tools, tool_choice="auto", stream=stream)
+
+    if result:
+        tcs = result.get("tool_calls")
+        if tcs and len(tcs) > 0:
+            logger.info("PASS: Tool calls detected.")
+            for tc in tcs:
+                func = tc.get("function", {})
+                logger.info(f"  Tool: {func.get('name')}, Args: {func.get('arguments')}\n")
+        else:
+            logger.info(f"FAIL: No tool calls. Content: {result['content']}\n")
+
+        if result.get("reasoning_content"):
+            logger.info(
+                f"INFO: Reasoning content detected during tool call ({len(result['reasoning_content'])} chars).\n"
+            )
+    else:
+        logger.info("FAIL: Query failed.\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Test llama-server functionality.")
+    parser.add_argument("--host", default="localhost", help="Server host")
+    parser.add_argument("--port", default=8080, type=int, help="Server port")
+    args = parser.parse_args()
+
+    base_url = f"http://{args.host}:{args.port}/v1/chat/completions"
+    logger.info(f"Testing server at {base_url}\n")
+
+    # Non-streaming tests
+    test_chat(base_url, stream=False)
+    test_tool_call(base_url, stream=False)
+
+    # Streaming tests
+    test_chat(base_url, stream=True)
+    test_tool_call(base_url, stream=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/snapdragon/qdc/tests/test_bench.py b/scripts/snapdragon/qdc/tests/test_bench.py
index 651ab5b7172..bd19e5d26c9 100644
--- a/scripts/snapdragon/qdc/tests/test_bench.py
+++ b/scripts/snapdragon/qdc/tests/test_bench.py
@@ -14,7 +14,7 @@
 def run_cmd(cmd):
     p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
     sys.stdout.write(p.stdout)
-    assert(p.returncode == 0)
+    assert (p.returncode == 0)
 
 
 @pytest.mark.dependency()
diff --git a/src/models/models.h b/src/models/models.h
index 3a44f7f140f..54f73c64d39 100644
--- a/src/models/models.h
+++ b/src/models/models.h
@@ -1,10 +1,11 @@
 #pragma once
 
-#include "../llama-model.h"
 #include "../llama-graph.h"
+#include "../llama-model.h"
 
 // TODO: remove in follow-up PR - move to .cpp files
 #include "../llama-memory-recurrent.h"
+
 #include <cmath>
 
 struct llm_graph_context_mamba : public llm_graph_context {
@@ -12,9 +13,16 @@ struct llm_graph_context_mamba : public llm_graph_context {
 
     virtual ~llm_graph_context_mamba() = default;
 
-    ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il);
-    ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il) const;
-
+    ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp,
+                                    ggml_tensor *        cur,
+                                    const llama_model &  model,
+                                    const llama_ubatch & ubatch,
+                                    int                  il);
+    ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp,
+                                     ggml_tensor *        cur,
+                                     const llama_model &  model,
+                                     const llama_ubatch & ubatch,
+                                     int                  il) const;
 };
 
 // Base class for RWKV-related models
@@ -158,8 +166,7 @@ struct llm_build_ernie4_5_moe : public llm_graph_context {
     llm_build_ernie4_5_moe(const llama_model & model, const llm_graph_params & params);
 };
 
-template <bool iswa>
-struct llm_build_exaone4 : public llm_graph_context {
+template <bool iswa> struct llm_build_exaone4 : public llm_graph_context {
     llm_build_exaone4(const llama_model & model, const llm_graph_params & params);
 };
 
@@ -183,8 +190,7 @@ struct llm_build_gemma2_iswa : public llm_graph_context {
     llm_build_gemma2_iswa(const llama_model & model, const llm_graph_params & params);
 };
 
-template <bool iswa>
-struct llm_build_gemma3 : public llm_graph_context {
+template <bool iswa> struct llm_build_gemma3 : public llm_graph_context {
     llm_build_gemma3(const llama_model & model, const llm_graph_params & params);
 };
 
@@ -195,8 +201,8 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
     const int64_t n_embd_altup;
     const int64_t n_altup;
     const int     i_altup_act;
-    const int     n_layer_sparsity = 10; // number of layers using activation sparsity
-    const float   f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95)
+    const int     n_layer_sparsity   = 10;                   // number of layers using activation sparsity
+    const float   f_sparsity_std_mul = 1.6448533535003662f;  // std_multiplier = normal_dist.icdf(0.95)
 
     llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params);
     ggml_tensor * calc_magnitude(ggml_tensor * x);
@@ -237,27 +243,26 @@ struct llm_build_gptneox : public llm_graph_context {
 struct llm_build_granite : public llm_graph_context {
     llm_build_granite(const llama_model & model, const llm_graph_params & params);
 
-private:
-    ggml_tensor * build_attention_layer(
-              ggml_tensor             * cur,
-              ggml_tensor             * inp_pos,
-              llm_graph_input_attn_kv * inp_attn,
-        const llama_model             & model,
-        const int64_t                 n_embd_head,
-        const int                     il);
+  private:
+    ggml_tensor * build_attention_layer(ggml_tensor *             cur,
+                                        ggml_tensor *             inp_pos,
+                                        llm_graph_input_attn_kv * inp_attn,
+                                        const llama_model &       model,
+                                        const int64_t             n_embd_head,
+                                        const int                 il);
 
-    ggml_tensor * build_layer_ffn(
-              ggml_tensor       * cur,
-              ggml_tensor       * inpSA,
-        const llama_model       & model,
-        const int                 il);
+    ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il);
 };
 
 struct llm_build_granite_hybrid : public llm_graph_context_mamba {
     llm_build_granite_hybrid(const llama_model & model, const llm_graph_params & params);
     ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il);
-    ggml_tensor * build_attention_layer(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn,
-        const llama_model & model,const int64_t n_embd_head, const int il);
+    ggml_tensor * build_attention_layer(ggml_tensor *             cur,
+                                        ggml_tensor *             inp_pos,
+                                        llm_graph_input_attn_kv * inp_attn,
+                                        const llama_model &       model,
+                                        const int64_t             n_embd_head,
+                                        const int                 il);
 };
 
 struct llm_build_grok : public llm_graph_context {
@@ -294,9 +299,11 @@ struct llm_build_lfm2 : public llm_graph_context {
     llm_build_lfm2(const llama_model & model, const llm_graph_params & params);
     ggml_tensor * build_moe_feed_forward(ggml_tensor * cur, int il) const;
     ggml_tensor * build_dense_feed_forward(ggml_tensor * cur, int il) const;
-    ggml_tensor * build_attn_block(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn, int il) const;
+    ggml_tensor * build_attn_block(ggml_tensor *             cur,
+                                   ggml_tensor *             inp_pos,
+                                   llm_graph_input_attn_kv * inp_attn,
+                                   int                       il) const;
     ggml_tensor * build_shortconv_block(ggml_tensor * cur, llm_graph_input_rs * inp_recr, int il);
-
 };
 
 struct llm_build_llada : public llm_graph_context {
@@ -355,16 +362,18 @@ struct llm_build_nemotron : public llm_graph_context {
 struct llm_build_nemotron_h : public llm_graph_context_mamba {
     llm_build_nemotron_h(const llama_model & model, const llm_graph_params & params);
     ggml_tensor * build_ffn_layer(ggml_tensor * cur, const llama_model & model, const int il);
-    ggml_tensor * build_attention_layer(ggml_tensor * cur, llm_graph_input_attn_kv * inp_attn,
-        const llama_model & model, const int64_t n_embd_head, const int il);
+    ggml_tensor * build_attention_layer(ggml_tensor *             cur,
+                                        llm_graph_input_attn_kv * inp_attn,
+                                        const llama_model &       model,
+                                        const int64_t             n_embd_head,
+                                        const int                 il);
 };
 
 struct llm_build_neo_bert : public llm_graph_context {
     llm_build_neo_bert(const llama_model & model, const llm_graph_params & params);
 };
 
-template <bool iswa>
-struct llm_build_olmo2 : public llm_graph_context {
+template <bool iswa> struct llm_build_olmo2 : public llm_graph_context {
     llm_build_olmo2(const llama_model & model, const llm_graph_params & params);
 };
 
@@ -396,17 +405,23 @@ struct llm_build_phi2 : public llm_graph_context {
     llm_build_phi2(const llama_model & model, const llm_graph_params & params);
 };
 
-template<bool iswa>
-struct llm_build_phi3 : public llm_graph_context {
+template <bool iswa> struct llm_build_phi3 : public llm_graph_context {
     llm_build_phi3(const llama_model & model, const llm_graph_params & params);
 };
 
 struct llm_build_plamo2 : public llm_graph_context_mamba {
     llm_build_plamo2(const llama_model & model, const llm_graph_params & params);
-    private:
-        ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il);
-        ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp, ggml_tensor * inp_pos, ggml_tensor * cur,
-                                                const llama_model & model, int il);
+  private:
+    ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp,
+                                           ggml_tensor *        cur,
+                                           const llama_model &  model,
+                                           const llama_ubatch & ubatch,
+                                           int                  il);
+    ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp,
+                                          ggml_tensor *             inp_pos,
+                                          ggml_tensor *             cur,
+                                          const llama_model &       model,
+                                          int                       il);
 };
 
 struct llm_build_plamo : public llm_graph_context {
@@ -449,26 +464,23 @@ struct llm_build_qwen3vl : public llm_graph_context {
 struct llm_build_qwen3vlmoe : public llm_graph_context {
     llm_build_qwen3vlmoe(const llama_model & model, const llm_graph_params & params);
 };
+
 struct llm_build_qwen3next : public llm_graph_context_mamba {
     llm_build_qwen3next(const llama_model & model, const llm_graph_params & params);
-private:
-    ggml_tensor * build_layer_attn(
-    llm_graph_input_attn_kv * inp_attn,
-                ggml_tensor * cur,
-                ggml_tensor * inp_pos,
-                        int   il);
+  private:
+    ggml_tensor * build_layer_attn(llm_graph_input_attn_kv * inp_attn,
+                                   ggml_tensor *             cur,
+                                   ggml_tensor *             inp_pos,
+                                   int                       il);
 
-    ggml_tensor * build_layer_attn_linear(
-         llm_graph_input_rs * inp,
-                ggml_tensor * cur,
-                ggml_tensor * causal_mask,
-                ggml_tensor * identity,
-                ggml_tensor * diag_mask,
-                        int   il);
+    ggml_tensor * build_layer_attn_linear(llm_graph_input_rs * inp,
+                                          ggml_tensor *        cur,
+                                          ggml_tensor *        causal_mask,
+                                          ggml_tensor *        identity,
+                                          ggml_tensor *        diag_mask,
+                                          int                  il);
 
-    ggml_tensor * build_layer_ffn(
-                ggml_tensor * cur,
-                        int   il);
+    ggml_tensor * build_layer_ffn(ggml_tensor * cur, int il);
 
     // returns pair of output and new state
     std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_chunking(
@@ -535,8 +547,7 @@ struct llm_build_seed_oss : public llm_graph_context {
     llm_build_seed_oss(const llama_model & model, const llm_graph_params & params);
 };
 
-template <bool iswa>
-struct llm_build_smallthinker : public llm_graph_context {
+template <bool iswa> struct llm_build_smallthinker : public llm_graph_context {
     llm_build_smallthinker(const llama_model & model, const llm_graph_params & params);
 };
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index c9436c59953..b45ed04e6ec 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -183,11 +183,10 @@ if (NOT WIN32 OR NOT BUILD_SHARED_LIBS)
     # llama_build_and_test(test-double-float.cpp) # SLOW
 endif()
 
-llama_build_and_test(test-chat-parser.cpp)
 llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
-llama_build_and_test(test-chat-template.cpp)
 llama_build_and_test(test-jinja.cpp)
 llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
+llama_build_and_test(test-chat-auto-parser.cpp)
 llama_build_and_test(test-json-partial.cpp)
 llama_build_and_test(test-log.cpp)
 llama_build_and_test(
@@ -197,6 +196,7 @@ llama_build_and_test(
     peg-parser/test-gbnf-generation.cpp
     peg-parser/test-json-parser.cpp
     peg-parser/test-json-serialization.cpp
+    peg-parser/test-python-dict-parser.cpp
     peg-parser/test-unicode.cpp
     peg-parser/tests.h
 )
@@ -260,3 +260,5 @@ target_link_libraries(${TEST_TARGET} PRIVATE llama)
 
 llama_build_and_test(test-alloc.cpp)
 target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
+
+
diff --git a/tests/peg-parser/test-python-dict-parser.cpp b/tests/peg-parser/test-python-dict-parser.cpp
new file mode 100644
index 00000000000..9db1154b45f
--- /dev/null
+++ b/tests/peg-parser/test-python-dict-parser.cpp
@@ -0,0 +1,279 @@
+#include "tests.h"
+
+void test_python_dict_parser(testing &t) {
+    // Test parsing a simple Python dict object with single quotes
+    t.test("simple Python dict object parsing", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
+
+        std::string    input = "{'name': 'test', 'value': 42, 'flag': true}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing a Python dict array with mixed types
+    t.test("Python dict array with mixed types", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
+
+        std::string    input = "[1, 'hello', true, null, 3.14]";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing nested Python dict with objects and arrays
+    t.test("nested Python dict with objects and arrays", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
+
+        std::string input =
+            "{'users': [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}], 'count': 2, 'metadata': {'version': '1.0', 'tags': ['admin', 'user']}}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing Python dict with escaped single quotes
+    t.test("Python dict with escaped single quotes", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
+
+        std::string    input = "{'message': 'It\\'s working!'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing Python dict with double quotes inside single quotes
+    t.test("Python dict with double quotes inside single quotes", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
+
+        std::string    input = "{'quote': 'He said \"Hello\"'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test the example from the requirements
+    t.test("complex Python dict example from requirements", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
+
+        std::string    input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test need_more_input() parsing - incomplete object
+    t.test("need_more_input() parsing - incomplete object", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
+
+        std::string    input = "{'name': 'test', 'value': ";
+        common_peg_parse_context ctx(input, true);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_need_more_input", true, result.need_more_input());
+    });
+
+    // Test need_more_input() parsing - incomplete single-quoted string
+    t.test("need_more_input() parsing - incomplete single-quoted string", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
+
+        std::string    input = "{'name': 'test";
+        common_peg_parse_context ctx(input, true);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_need_more_input", true, result.need_more_input());
+    });
+
+    // Test unicode in Python dict strings
+    t.test("unicode in Python dict strings", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
+
+        std::string    input = "{'message': 'Hello, 世界!'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test Python dict with unicode escapes
+    t.test("Python dict with unicode escapes", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
+
+        std::string    input = "{'unicode': 'Hello\\u0041'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test that JSON double-quoted strings fail with Python dict parser
+    t.test("JSON double-quoted strings fail with Python dict parser", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_dict(); });
+
+        std::string    input = "{\"name\": \"test\"}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_fail", true, result.fail());
+    });
+
+    // Test Python dict string content parser directly
+    t.test("python dict string content parser", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+            return p.sequence({ p.literal("'"), p.python_dict_string_content(), p.literal("'"), p.space() });
+        });
+
+        t.test("simple string", [&](testing &t) {
+            std::string input = "'hello'";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("string with escaped single quote", [&](testing &t) {
+            std::string input = "'it\\'s'";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("string with double quotes", [&](testing &t) {
+            std::string input = "'say \"hello\"'";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("incomplete string", [&](testing &t) {
+            std::string input = "'hello";
+            common_peg_parse_context ctx(input, true);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("need_more_input", result.need_more_input());
+        });
+    });
+
+    // Test allow_python_dict_format flag usage
+    t.test("allow_python_dict_format flag", [](testing &t) {
+        t.test("flag is false by default", [&](testing &t) {
+            common_peg_parser_builder builder;
+            t.assert_equal("default_value", false, builder.get_allow_python_dict_format());
+        });
+
+        t.test("flag can be set to true", [&](testing &t) {
+            common_peg_parser_builder builder;
+            builder.set_allow_python_dict_format(true);
+            t.assert_equal("after_set", true, builder.get_allow_python_dict_format());
+        });
+
+        t.test("flag can be set back to false", [&](testing &t) {
+            common_peg_parser_builder builder;
+            builder.set_allow_python_dict_format(true);
+            builder.set_allow_python_dict_format(false);
+            t.assert_equal("after_reset", false, builder.get_allow_python_dict_format());
+        });
+    });
+
+    // Test that the flag actually affects json() parser behavior
+    t.test("json() parser with allow_python_dict_format flag", [](testing &t) {
+        t.test("json() rejects single quotes when flag is false", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                p.set_allow_python_dict_format(false);
+                return p.json();
+            });
+
+            std::string input = "{'name': 'test'}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("fail", result.fail());
+        });
+
+        t.test("json() accepts single quotes when flag is true", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                p.set_allow_python_dict_format(true);
+                return p.json();
+            });
+
+            std::string input = "{'name': 'test'}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("json() still accepts double quotes when flag is true", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                p.set_allow_python_dict_format(true);
+                return p.json();
+            });
+
+            std::string input = "{\"name\": \"test\"}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("json() accepts mixed quote styles when flag is true", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                p.set_allow_python_dict_format(true);
+                return p.json();
+            });
+
+            std::string input = "{\"name\": 'test', 'value': \"hello\"}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("complex nested structure with flag true", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                p.set_allow_python_dict_format(true);
+                return p.json();
+            });
+
+            std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+    });
+}
diff --git a/tests/peg-parser/tests.h b/tests/peg-parser/tests.h
index 4d3f4e9eaf5..debd4286c50 100644
--- a/tests/peg-parser/tests.h
+++ b/tests/peg-parser/tests.h
@@ -22,3 +22,4 @@ void test_json_parser(testing &t);
 void test_gbnf_generation(testing &t);
 void test_unicode(testing &t);
 void test_json_serialization(testing &t);
+void test_python_dict_parser(testing &t);
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 90cc0d7da2f..5b80d6987e9 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -7740,6 +7740,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
         test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 1, 64, 256, {1,  1}, {1, 1}));
     }
 
+    test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 6, 4096, 5120, {1, 1}, {1, 1}));
+
 #if 0
     // test the mat-mat path for Metal
     for (int k = 1; k < 512; ++k) {
diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp
new file mode 100644
index 00000000000..015c90d4088
--- /dev/null
+++ b/tests/test-chat-auto-parser.cpp
@@ -0,0 +1,1845 @@
+#include "chat-auto-parser-helpers.h"
+#include "chat-diff-analyzer.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "peg-parser.h"
+#include "testing.h"
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+static void test_calculate_diff_split_basic(testing & t);
+static void test_calculate_diff_split_identical(testing & t);
+static void test_calculate_diff_split_common_prefix(testing & t);
+static void test_calculate_diff_split_common_suffix(testing & t);
+static void test_calculate_diff_split_common_both(testing & t);
+static void test_calculate_diff_split_empty_cases(testing & t);
+static void test_calculate_diff_split_no_common(testing & t);
+static void test_calculate_diff_split_single_char(testing & t);
+static void test_calculate_diff_split_overlaps(testing & t);
+static void test_calculate_diff_split_tag_boundaries(testing & t);
+static void test_calculate_diff_split(testing & t);
+
+static void test_until_common_prefix_basic(testing & t);
+static void test_until_common_prefix(testing & t);
+
+static void test_after_common_suffix_basic(testing & t);
+static void test_after_common_suffix(testing & t);
+
+static void test_analyze_tool_call_pure_json(testing & t);
+static void test_analyze_tool_call_function_name_markers(testing & t);
+static void test_analyze_tool_call_full_markers(testing & t);
+static void test_analyze_tool_call_edge_cases(testing & t);
+
+static void test_compare_variants_basic(testing & t);
+static void test_compare_variants_messages_modifier(testing & t);
+static void test_compare_variants_tools_modifier(testing & t);
+static void test_compare_variants_both_modifiers(testing & t);
+static void test_compare_variants_template_failure(testing & t);
+static void test_compare_variants_identity(testing & t);
+static void test_compare_variants(testing & t);
+
+// Seed-OSS template tool calling analysis tests
+static void test_seed_oss_tool_analysis(testing & t);
+static void test_seed_oss_tool_presence(testing & t);
+static void test_seed_oss_call_count(testing & t);
+static void test_seed_oss_function_names(testing & t);
+static void test_seed_oss_argument_count(testing & t);
+static void test_seed_oss_args_presence(testing & t);
+static void test_seed_oss_tool_with_reasoning(testing & t);
+
+// Nemotron template analysis tests
+static void test_nemotron_analysis(testing & t);
+static void test_nemotron_reasoning_detection(testing & t);
+static void test_nemotron_tool_format(testing & t);
+
+// CohereForAI template analysis tests
+static void test_cohere_reasoning_detection(testing & t);
+static void test_cohere_tool_format(testing & t);
+static void test_cohere_analysis(testing & t);
+
+// Marker separation
+static void test_marker_separation(testing & t);
+
+// standard_json_tools format tests
+static void test_standard_json_tools_formats(testing & t);
+static void test_standard_json_tools_openai(testing & t);
+static void test_standard_json_tools_cohere(testing & t);
+static void test_standard_json_tools_function_key(testing & t);
+
+// normalize_quotes_to_json tests
+static void test_normalize_quotes_to_json(testing & t);
+static void test_normalize_quotes_with_embedded_quotes(testing & t);
+
+// TAG_WITH_TAGGED argument parsing tests
+static void test_tagged_args_with_embedded_quotes(testing & t);
+
+int main(int argc, char * argv[]) {
+    testing t(std::cout);
+    t.verbose = true;
+
+    // usage: test-chat-auto-parser-helpers [filter_regex]
+
+    if (argc > 1) {
+        t.set_filter(argv[1]);
+    }
+
+    t.test("diff_split", test_calculate_diff_split);
+    t.test("common_prefix", test_until_common_prefix);
+    t.test("common_suffix", test_after_common_suffix);
+    t.test("compare_variants", test_compare_variants);
+    t.test("segments", test_marker_separation);
+    t.test("seed_oss_diffs", test_seed_oss_tool_analysis);
+    t.test("cohere", test_cohere_analysis);
+    t.test("nemotron", test_nemotron_analysis);
+    t.test("standard_json_tools", test_standard_json_tools_formats);
+    t.test("normalize_quotes_to_json", test_normalize_quotes_to_json);
+    t.test("tagged_args_embedded_quotes", test_tagged_args_with_embedded_quotes);
+
+    return t.summary();
+}
+
+static void test_marker_separation(testing & t) {
+    auto single_square_marker = segmentize_markers("pre_marker[marker]post_marker");
+    auto single_diag_marker = segmentize_markers("pre_marker<marker>post_marker");
+    auto paired_markers = segmentize_markers("<hello>world</hello>");
+    auto double_different_markers = segmentize_markers("<hello>[hello]<world>[world]");
+    auto in_between = segmentize_markers("im<blue>daba<dee>da[hey]");
+
+    t.test("single_square_marker", [&] (testing & t) {
+        t.assert_equal("first is text", segment_type::TEXT, single_square_marker[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, single_square_marker[1].type);
+        t.assert_equal("last is text", segment_type::TEXT, single_square_marker[2].type);
+
+        t.assert_equal("first is 'pre_marker'", "pre_marker", single_square_marker[0].value);
+        t.assert_equal("second is '[marker]'", "[marker]", single_square_marker[1].value);
+        t.assert_equal("last is 'post_marker'", "post_marker", single_square_marker[2].value);
+    });
+        
+    t.test("single_diagonal_marker", [&] (testing & t) {
+        t.assert_equal("first is text", segment_type::TEXT, single_diag_marker[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, single_diag_marker[1].type);
+        t.assert_equal("last is text", segment_type::TEXT, single_diag_marker[2].type);
+
+        t.assert_equal("first is 'pre_marker'", "pre_marker", single_diag_marker[0].value);
+        t.assert_equal("second is '<marker>'", "<marker>", single_diag_marker[1].value);
+        t.assert_equal("last is 'post_marker'", "post_marker", single_diag_marker[2].value);
+    });
+
+    t.test("paired_markers", [&] (testing & t) {
+        t.assert_equal("first is marker", segment_type::MARKER, paired_markers[0].type);
+        t.assert_equal("second is text", segment_type::TEXT, paired_markers[1].type);
+        t.assert_equal("third is marker", segment_type::MARKER, paired_markers[2].type);
+
+        t.assert_equal("first is '<hello>'", "<hello>", paired_markers[0].value);
+        t.assert_equal("second is 'world'", "world", paired_markers[1].value);
+        t.assert_equal("third is '</hello>'", "</hello>", paired_markers[2].value);
+    });
+
+    t.test("double_different_markers", [&] (testing & t) {
+        t.assert_equal("first is marker", segment_type::MARKER, double_different_markers[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, double_different_markers[1].type);
+        t.assert_equal("third is marker", segment_type::MARKER, double_different_markers[2].type);
+        t.assert_equal("fourth is marker", segment_type::MARKER, double_different_markers[3].type);
+
+        t.assert_equal("first is '<hello>'", "<hello>", double_different_markers[0].value);
+        t.assert_equal("second is '[hello]'", "[hello]", double_different_markers[1].value);
+        t.assert_equal("third is '<world>'", "<world>", double_different_markers[2].value);
+        t.assert_equal("fourth is '[world]'", "[world]", double_different_markers[3].value);
+    });
+
+    t.test("in_between", [&] (testing & t) {
+        t.assert_equal("first is text", segment_type::TEXT, in_between[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, in_between[1].type);
+        t.assert_equal("third is text", segment_type::TEXT, in_between[2].type);
+        t.assert_equal("fourth is marker", segment_type::MARKER, in_between[3].type);
+        t.assert_equal("fifth is text", segment_type::TEXT, in_between[4].type);
+        t.assert_equal("sixth is marker", segment_type::MARKER, in_between[5].type);
+
+        t.assert_equal("first is 'im'", "im", in_between[0].value);
+        t.assert_equal("second is '<blue>'", "<blue>", in_between[1].value);
+        t.assert_equal("third is 'daba'", "daba", in_between[2].value);
+        t.assert_equal("fourth is '<dee>'", "<dee>", in_between[3].value);
+        t.assert_equal("fifth is 'da'", "da", in_between[4].value);
+        t.assert_equal("sixth is '[hey]'", "[hey]", in_between[5].value);
+    });
+}
+
+static void test_calculate_diff_split(testing & t) {
+    t.test("calculate_diff_split basic", test_calculate_diff_split_basic);
+    t.test("calculate_diff_split identical", test_calculate_diff_split_identical);
+    t.test("calculate_diff_split common prefix", test_calculate_diff_split_common_prefix);
+    t.test("calculate_diff_split common suffix", test_calculate_diff_split_common_suffix);
+    t.test("calculate_diff_split common both", test_calculate_diff_split_common_both);
+    t.test("calculate_diff_split empty cases", test_calculate_diff_split_empty_cases);
+    t.test("calculate_diff_split no common", test_calculate_diff_split_no_common);
+    t.test("calculate_diff_split single char", test_calculate_diff_split_single_char);
+    t.test("calculate_diff_split overlaps", test_calculate_diff_split_overlaps);
+    t.test("calculate_diff_split tag boundaries", test_calculate_diff_split_tag_boundaries);
+}
+
+static void test_calculate_diff_split_basic(testing & t) {
+    diff_split result = calculate_diff_split("hello world", "hello test");
+    t.assert_equal("prefix should be 'hello '", "hello ", result.prefix);
+    t.assert_equal("left should be 'world'", "world", result.left);
+    t.assert_equal("right should be 'test'", "test", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("abc", "xyz");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("prefixA suffix", "prefixB suffix");
+    t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
+    t.assert_equal("left should be 'A'", "A", result.left);
+    t.assert_equal("right should be 'B'", "B", result.right);
+    t.assert_equal("suffix should be ' suffix'", " suffix", result.suffix);
+}
+
+static void test_calculate_diff_split_identical(testing & t) {
+    diff_split result = calculate_diff_split("hello", "hello");
+    t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("a", "a");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_common_prefix(testing & t) {
+    diff_split result = calculate_diff_split("abcdef", "abcxyz");
+    t.assert_equal("prefix should be 'abc'", "abc", result.prefix);
+    t.assert_equal("left should be 'def'", "def", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("same", "sameagain");
+    t.assert_equal("prefix should be 'same'", "same", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'again'", "again", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("test", "testing");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'ing'", "ing", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_common_suffix(testing & t) {
+    diff_split result = calculate_diff_split("123end", "456end");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be '123'", "123", result.left);
+    t.assert_equal("right should be '456'", "456", result.right);
+    t.assert_equal("suffix should be 'end'", "end", result.suffix);
+
+    result = calculate_diff_split("start", "end");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'start'", "start", result.left);
+    t.assert_equal("right should be 'end'", "end", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("abcsuffix", "xyzsuffix");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be 'suffix'", "suffix", result.suffix);
+}
+
+static void test_calculate_diff_split_common_both(testing & t) {
+    diff_split result = calculate_diff_split("helloXworld", "helloYworld");
+    t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
+    t.assert_equal("left should be 'X'", "X", result.left);
+    t.assert_equal("right should be 'Y'", "Y", result.right);
+    t.assert_equal("suffix should be 'world'", "world", result.suffix);
+
+    result = calculate_diff_split("ABCmiddleXYZ", "ABCdifferentXYZ");
+    t.assert_equal("prefix should be 'ABC'", "ABC", result.prefix);
+    t.assert_equal("left should be 'middle'", "middle", result.left);
+    t.assert_equal("right should be 'different'", "different", result.right);
+    t.assert_equal("suffix should be 'XYZ'", "XYZ", result.suffix);
+
+    result = calculate_diff_split("startAend", "startBend");
+    t.assert_equal("prefix should be 'start'", "start", result.prefix);
+    t.assert_equal("left should be 'A'", "A", result.left);
+    t.assert_equal("right should be 'B'", "B", result.right);
+    t.assert_equal("suffix should be 'end'", "end", result.suffix);
+
+    // Edge case: common prefix and suffix overlap
+    result = calculate_diff_split("aa", "ab");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be 'a'", "a", result.left);
+    t.assert_equal("right should be 'b'", "b", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_empty_cases(testing & t) {
+    // Empty left, non-empty right
+    diff_split result = calculate_diff_split("", "hello");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'hello'", "hello", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Non-empty left, empty right
+    result = calculate_diff_split("hello", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'hello'", "hello", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Both empty
+    result = calculate_diff_split("", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Left single char, empty right
+    result = calculate_diff_split("a", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'a'", "a", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Empty left, right single char
+    result = calculate_diff_split("", "a");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'a'", "a", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_no_common(testing & t) {
+    diff_split result = calculate_diff_split("abc", "xyz");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("left", "right");
+    // The algorithm finds "t" as a common suffix since both strings end with 't'
+    // This is the algorithm's actual behavior - it finds maximal common suffix
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'lef'", "lef", result.left);
+    t.assert_equal("right should be 'righ'", "righ", result.right);
+    t.assert_equal("suffix should be 't'", "t", result.suffix);
+
+    result = calculate_diff_split("123", "456");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be '123'", "123", result.left);
+    t.assert_equal("right should be '456'", "456", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_single_char(testing & t) {
+    diff_split result = calculate_diff_split("a", "b");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'a'", "a", result.left);
+    t.assert_equal("right should be 'b'", "b", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("a", "a");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("a", "ab");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'b'", "b", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("ab", "a");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be 'b'", "b", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_overlaps(testing & t) {
+    // One string is substring of another
+    diff_split result = calculate_diff_split("test", "testing");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'ing'", "ing", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("testing", "test");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be 'ing'", "ing", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Similar strings with one extra char at start
+    result = calculate_diff_split("Xtest", "Ytest");
+    // The algorithm finds "test" as a common suffix since both strings end with "test"
+    // This is the algorithm's actual behavior - it finds maximal common suffix
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'X'", "X", result.left);
+    t.assert_equal("right should be 'Y'", "Y", result.right);
+    t.assert_equal("suffix should be 'test'", "test", result.suffix);
+
+    // Similar strings with one extra char at end
+    result = calculate_diff_split("testX", "testY");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be 'X'", "X", result.left);
+    t.assert_equal("right should be 'Y'", "Y", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Strings that are reverses
+    result = calculate_diff_split("abc", "cba");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'cba'", "cba", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_tag_boundaries(testing & t) {
+    // Test with unclosed XML tags
+    diff_split result = calculate_diff_split("test<tag", "test>content");
+    // The fix_tag_boundaries should move incomplete tags appropriately
+    t.assert_true("prefix should start with 'test'", result.prefix.find("test") == 0);
+    t.assert_true("should handle tag boundaries", result.left != "" || result.right != "" || result.suffix != "");
+
+    // Test with unclosed brackets
+    result = calculate_diff_split("test[", "test]value");
+    t.assert_true("should handle bracket boundaries", result.left != "" || result.right != "" || result.suffix != "");
+
+    // Test with partial tags on both sides
+    result = calculate_diff_split("prefix<tag>", "prefix</tag>suffix");
+    // fix_tag_boundaries moves the incomplete '<' from prefix to left/right
+    t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
+    t.assert_equal("left should be '<tag>'", "<tag>", result.left);
+    t.assert_equal("right should be '</tag>suffix'", "</tag>suffix", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Test with complex nested tags
+    result = calculate_diff_split("prefix<div>content</div>", "prefix<div>different</div>");
+    // Algorithm finds "ent</div>" as a common suffix because both strings end with it
+    // This is the actual algorithm behavior, though not semantically ideal
+    t.assert_equal("prefix should be 'prefix<div>'", "prefix<div>", result.prefix);
+    t.assert_equal("left should be 'cont'", "cont", result.left);
+    t.assert_equal("right should be 'differ'", "differ", result.right);
+    t.assert_equal("suffix should be 'ent</div>'", "ent</div>", result.suffix);
+
+    // Test with unclosed angle bracket
+    result = calculate_diff_split("Hello <world>", "Hello test");
+    t.assert_equal("prefix should be 'Hello '", "Hello ", result.prefix);
+    t.assert_true("left should contain '<world>'", result.left.find("<world>") != std::string::npos);
+    t.assert_equal("right should be 'test'", "test", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Test with unclosed square bracket
+    result = calculate_diff_split("test [array]", "test other");
+    t.assert_equal("prefix should be 'test '", "test ", result.prefix);
+    t.assert_true("left should contain '[array]'", result.left.find("[array]") != std::string::npos);
+    t.assert_equal("right should be 'other'", "other", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Test empty prefix and suffix with tags
+    result = calculate_diff_split("<tag>left</tag>", "<tag>righ</tag>");
+    t.assert_equal("prefix should be '<tag>'", "<tag>", result.prefix);
+    t.assert_equal("left should be 'left'", "left", result.left);
+    t.assert_equal("right should be 'righ'", "righ", result.right);
+    t.assert_equal("suffix should be '</tag>'", "</tag>", result.suffix);
+
+    {
+        // real case from template tests, simplified
+        std::string left  = "PREFIX</think>Sure";
+        std::string right = "PREFIX<think>Lemme think</think>Sure";
+        result            = calculate_diff_split(left, right);
+        t.assert_equal("prefix should be PREFIX", "PREFIX", result.prefix);
+        t.assert_equal("suffix should be </think>Sure", "</think>Sure", result.suffix);
+        t.assert_equal("left should be empty", "", result.left);
+        t.assert_equal("right should be <think>Lemme think", "<think>Lemme think", result.right);
+    }
+
+    {
+        // Real case: special tokens with |> boundary issue
+        // The suffix starts with |> which should be moved to complete <|END_RESPONSE and <|END_ACTION
+        std::string prefix    = "SOME_PREFIX";
+        std::string suffix    = "|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
+        std::string left_diff = "<|START_RESPONSE|>Let me help you.<|END_RESPONSE";
+        std::string right_diff =
+            "<|START_THINKING|><|END_THINKING|><|START_ACTION|>[\n"
+            "    {\"tool_call_id\": \"0\", \"tool_name\": \"test_function_name\", "
+            "\"parameters\": {\"param1\": \"value1\", \"param2\": \"value2\"}}\n"
+            "]<|END_ACTION";
+
+        std::string left  = prefix + left_diff + suffix;
+        std::string right = prefix + right_diff + suffix;
+        result            = calculate_diff_split(left, right);
+
+        t.assert_equal("special token prefix", prefix, result.prefix);
+        // The |> should be moved from suffix to complete the tokens
+        t.assert_equal("special token left", "<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>", result.left);
+        t.assert_true("special token right ends with |>", result.right.find("<|END_ACTION|>") != std::string::npos);
+        t.assert_equal("special token suffix", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
+                       result.suffix);
+    }
+}
+
+static void test_until_common_prefix(testing & t) {
+    t.test("until_common_prefix basic", test_until_common_prefix_basic);
+}
+
+static void test_until_common_prefix_basic(testing & t) {
+    // Test case from the user request
+    std::string result = until_common_prefix("<function name=foo><arg name=bar>", "<arg name=bar>", "<arg name=baz>");
+    t.assert_equal("untilCommonPrefix should return '<function name=foo>'", "<function name=foo>", result);
+
+    // Additional test cases to ensure robustness
+    // Test with different common prefix lengths
+    result = until_common_prefix("prefix<test>suffix", "<test>different", "<test>other");
+    t.assert_equal("should return 'prefix'", "prefix", result);
+
+    // Test when common prefix is at the start
+    result = until_common_prefix("<common>rest", "<common>left", "<common>right");
+    t.assert_equal("should return empty string when common prefix at start", "", result);
+
+    // Test when there's no common prefix
+    result = until_common_prefix("something", "left", "right");
+    t.assert_equal("should return empty string when no common prefix", "", result);
+
+    // Test with empty strings
+    result = until_common_prefix("test", "", "right");
+    t.assert_equal("should return empty string when left is empty", "", result);
+
+    // Test with longer common prefix
+    result = until_common_prefix("abcXYZ<shared_prefix>rest", "<shared_prefix>left", "<shared_prefix>right");
+    t.assert_equal("should return 'abcXYZ'", "abcXYZ", result);
+}
+
+static void test_after_common_suffix(testing & t) {
+    t.test("after_common_suffix basic", test_after_common_suffix_basic);
+}
+
+static void test_after_common_suffix_basic(testing & t) {
+    // Test case from the user request
+    std::string result = after_common_suffix("<function name=foo><arg name=bar>100</arg></function>",
+                                            "<arg name=bar>100</arg>",
+                                            "<arg name=baz>535</arg>");
+    t.assert_equal("afterCommonSuffix should return '</function>'", "</function>", result);
+
+    // Test when common suffix is at the end
+    result = after_common_suffix("rest<common>", "left<common>", "right<common>");
+    t.assert_equal("should return empty string when common suffix at end", "", result);
+
+    // Test with empty strings
+    result = after_common_suffix("test", "left", "");
+    t.assert_equal("should return empty string when right is empty", "", result);
+
+    // Test case with XML-like structure similar to the main example
+    result = after_common_suffix("<outer><inner>value</inner></outer>",
+                                "<inner>value</inner>",
+                                "<inner>different</inner>");
+    t.assert_equal("should return '</outer>'", "</outer>", result);
+
+    // Test with longer common suffix appearing at the end of full
+    result = after_common_suffix("prefix<shared>rest</shared>", "prefix<shared>left</shared>", "prefix<shared>right</shared>");
+    t.assert_equal("should return '' when common suffix is at end of full", "", result);
+
+    // Test with common suffix appearing in middle but not at end
+    result = after_common_suffix("<tag>content</tag><extra>", "<tag>value</tag>", "<tag>other</tag>");
+    t.assert_equal("should return '<extra>' when common suffix appears before end", "<extra>", result);
+
+    // Test with multi-character common suffix at the very end of full
+    result = after_common_suffix("start<middle>end</middle>", "prefix<middle>left</middle>", "prefix<middle>right</middle>");
+    t.assert_equal("should return '' when common suffix </middle> is at end of full", "", result);
+}
+
+static void test_compare_variants(testing & t) {
+    t.test("compare_variants basic", test_compare_variants_basic);
+    t.test("compare_variants messages modifier", test_compare_variants_messages_modifier);
+    t.test("compare_variants tools modifier", test_compare_variants_tools_modifier);
+    t.test("compare_variants both modifiers", test_compare_variants_both_modifiers);
+    t.test("compare_variants template failure", test_compare_variants_template_failure);
+    t.test("compare_variants identity", test_compare_variants_identity);
+}
+
+static void test_compare_variants_basic(testing & t) {
+    // Create a simple template that just echoes messages
+    common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "Hello"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "World";
+    };
+
+    auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
+
+    t.assert_true("result should have value", result.has_value());
+    // The template might not output anything if messages is empty or format is different
+    // Check that we get a valid result
+    t.assert_true("prefix or left should have content", !result->diff.prefix.empty() || !result->diff.left.empty());
+}
+
+static void test_compare_variants_messages_modifier(testing & t) {
+    // Test with messages modifier only
+    common_chat_template tmpl("{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "A"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "B";
+    };
+
+    std::optional<compare_variants_result> result = differential_analyzer::compare_variants(tmpl, params, modifier);
+
+    t.assert_true("result should have value", result.has_value());
+    t.assert_equal("left should be 'A'", "A", result->diff.left);
+    t.assert_equal("right should be 'B'", "B", result->diff.right);
+}
+
+static void test_compare_variants_tools_modifier(testing & t) {
+    // Test with tools modifier only
+    common_chat_template tmpl(
+        "{% for tool in tools %}{{ tool['name'] }}{% endfor %}", "", "");
+
+    template_params params;
+    params.tools = json::array({
+        json {{"name", "foo"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.tools[0]["name"] = "bar";
+    };
+
+    auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
+
+    t.assert_true("result should have value", result.has_value());
+    t.assert_equal("left should be 'foo'", "foo", result->diff.left);
+    t.assert_equal("right should be 'bar'", "bar", result->diff.right);
+}
+
+static void test_compare_variants_both_modifiers(testing & t) {
+    // Test with both messages and tools modifiers using the for loop approach
+    common_chat_template tmpl(
+        "{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "A"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "B";
+        p.messages[0]["role"] = "newuser";
+    };
+
+    auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
+
+    t.assert_true("result should have value", result.has_value());
+    t.assert_equal("left should be 'user:A'", "user:A", result->diff.left);
+    t.assert_equal("right should be 'newuser:B'", "newuser:B", result->diff.right);
+}
+
+static void test_compare_variants_template_failure(testing & t) {
+    // Test with template that causes failure during application (not construction)
+    // We use a valid template syntax but one that will fail during application
+    common_chat_template tmpl("{{ messages[0]['nonexistent_field'] }}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "Hello"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "World";
+    };
+
+    auto result = differential_analyzer::compare_variants(tmpl, params, modifier);
+
+    t.assert_true("result should be nullopt on template failure", !result.has_value());
+}
+
+static void test_compare_variants_identity(testing & t) {
+    // Test with identity modifier (no change)
+    common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "Hello"}}
+    });
+
+    // No modifier - should use identity
+    auto result = differential_analyzer::compare_variants(tmpl, params, nullptr);
+
+    t.assert_true("result should have value", result.has_value());
+    t.assert_equal("prefix should be 'Hello'", "Hello", result->diff.prefix);
+    t.assert_equal("left should be empty", "", result->diff.left);
+    t.assert_equal("right should be empty", "", result->diff.right);
+    t.assert_equal("suffix should be empty", "", result->diff.suffix);
+}
+
+// ============================================================================
+// Seed-OSS Template Tool Calling Analysis Tests
+// ============================================================================
+
+static void test_seed_oss_tool_analysis(testing & t) {
+    t.test("Seed-OSS tool presence", test_seed_oss_tool_presence);
+    t.test("Seed-OSS call count", test_seed_oss_call_count);
+    t.test("Seed-OSS function names", test_seed_oss_function_names);
+    t.test("Seed-OSS argument count", test_seed_oss_argument_count);
+    t.test("Seed-OSS args presence", test_seed_oss_args_presence);
+    t.test("Seed-OSS tool with reasoning", test_seed_oss_tool_with_reasoning);
+}
+
+// Helper to load Seed-OSS template
+static common_chat_template load_seed_oss_template(testing & t) {
+    std::string template_path = "models/templates/ByteDance-Seed-OSS.jinja";
+    std::ifstream fin(template_path, std::ios::binary);
+    std::ostringstream buf;
+    if (fin.is_open()) {
+        buf << fin.rdbuf();
+    }
+    std::string template_source = buf.str();
+    common_chat_template tmpl(template_source, "", "");
+    t.assert_true("Seed-OSS template loaded successfully", template_source.length() > 0);
+    return tmpl;
+}
+
+// Helper to build tool call JSON
+static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call_001") {
+    return json{
+        {"id", id},
+        {"type", "function"},
+        {"function", json{
+            {"name", name},
+            {"arguments", args}
+        }}
+    };
+}
+
+// Helper to build tools definition
+static json build_tools_definition() {
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["param1"] = json::object({
+        {"type", "string"},
+        {"description", "First parameter"}
+    });
+    parameters_schema["properties"]["param2"] = json::object({
+        {"type", "string"},
+        {"description", "Second parameter"}
+    });
+    parameters_schema["required"] = json::array({"param1", "param2"});
+
+    return json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "test_function_name"},
+                {"description", "A test function for debugging"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+}
+
+// T1: Compare with/without tool call (user, assistant)
+static void test_seed_oss_tool_presence(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_no_tools = json{
+        {"role", "assistant"},
+        {"content", "Let me help you."}
+    };
+
+    json assistant_with_tools = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+
+    template_params params_no_tools;
+    params_no_tools.messages = json::array({user_msg, assistant_no_tools});
+    params_no_tools.tools = build_tools_definition();
+    params_no_tools.add_generation_prompt = false;
+    params_no_tools.enable_thinking = true;
+
+    template_params params_with_tools;
+    params_with_tools.messages = json::array({user_msg, assistant_with_tools});
+    params_with_tools.tools = build_tools_definition();
+    params_with_tools.add_generation_prompt = false;
+    params_with_tools.enable_thinking = true;
+
+    auto result = differential_analyzer::compare_variants(tmpl, params_no_tools,
+        [&](template_params & p) {
+            p.messages = params_with_tools.messages;
+        });
+
+    t.assert_true("T1 result should have value", result.has_value());
+
+    const auto & diff = result->diff;
+    t.assert_true("T1 prefix should contain system", diff.prefix.find("system") != std::string::npos);
+    t.assert_true("T1 prefix should contain user", diff.prefix.find("user") != std::string::npos);
+    t.assert_true("T1 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
+
+    // Left should be the assistant content without tool
+    t.assert_equal("T1 left should contain 'Let me help you.'", "Let me help you.", diff.left);
+
+    // Right should contain the tool call markers
+    t.assert_true("T1 right should contain tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T1 right should contain function tag", diff.right.find("<function=test_function_name>") != std::string::npos);
+    t.assert_true("T1 right should contain parameter=param1", diff.right.find("<parameter=param1>") != std::string::npos);
+    t.assert_true("T1 right should contain parameter=param2", diff.right.find("<parameter=param2>") != std::string::npos);
+    t.assert_true("T1 right should contain value1", diff.right.find("value1") != std::string::npos);
+    t.assert_true("T1 right should contain value2", diff.right.find("value2") != std::string::npos);
+    t.assert_true("T1 right should contain tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
+
+    // Suffix should be the eos token
+    t.assert_equal("T1 suffix should be '<seed:eos>'", "<seed:eos>", diff.suffix);
+}
+
+// T2: Compare one vs two tool calls
+static void test_seed_oss_call_count(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_one_call = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json assistant_two_calls = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
+            build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+
+    template_params params_one;
+    params_one.messages = json::array({user_msg, assistant_one_call});
+    params_one.tools = build_tools_definition();
+    params_one.add_generation_prompt = false;
+    params_one.enable_thinking = true;
+
+    auto result = differential_analyzer::compare_variants(tmpl, params_one,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_two_calls});
+        });
+
+    t.assert_true("T2 result should have value", result.has_value());
+
+    const auto & diff = result->diff;
+
+    // Prefix should include the first tool call
+    t.assert_true("T2 prefix should contain first tool_call begin", diff.prefix.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T2 prefix should contain first function", diff.prefix.find("<function=test_function_name>") != std::string::npos);
+    t.assert_true("T2 prefix should contain value1", diff.prefix.find("value1") != std::string::npos);
+    t.assert_true("T2 prefix should contain value2", diff.prefix.find("value2") != std::string::npos);
+    t.assert_true("T2 prefix should contain first tool_call end", diff.prefix.find("</seed:tool_call>") != std::string::npos);
+
+    // Left should be empty (no second tool call in variant A)
+    t.assert_equal("T2 left should be empty", "", diff.left);
+
+    // Right should contain the second tool call
+    t.assert_true("T2 right should contain second tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T2 right should contain second function", diff.right.find("<function=test_function_name>") != std::string::npos);
+    t.assert_true("T2 right should contain value3", diff.right.find("value3") != std::string::npos);
+    t.assert_true("T2 right should contain value4", diff.right.find("value4") != std::string::npos);
+    t.assert_true("T2 right should contain second tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
+
+    // Suffix should be the eos token
+    t.assert_equal("T2 suffix should be '<seed:eos>'", "<seed:eos>", diff.suffix);
+}
+
+// T3: Compare different function names
+static void test_seed_oss_function_names(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    // Build tools with two different function names
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["arg1"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 1"}
+    });
+    parameters_schema["required"] = json::array({"arg1"});
+
+    json tools = json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "func_alpha"},
+                {"description", "First function"},
+                {"parameters", parameters_schema}
+            }}
+        },
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "func_beta"},
+                {"description", "Second function"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+
+    json assistant_func_alpha = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("func_alpha", json::object({{"arg1", "test_value"}}))
+        })}
+    };
+
+    json assistant_func_beta = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("func_beta", json::object({{"arg1", "test_value"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello"}
+    };
+
+    template_params params_alpha;
+    params_alpha.messages = json::array({user_msg, assistant_func_alpha});
+    params_alpha.tools = tools;
+    params_alpha.add_generation_prompt = false;
+    params_alpha.enable_thinking = true;
+
+    auto result = differential_analyzer::compare_variants(tmpl, params_alpha,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_func_beta});
+        });
+
+    t.assert_true("T3 result should have value", result.has_value());
+
+    const auto & diff = result->diff;
+
+    bool func_alpha_in_left = diff.left.find("func_alpha") != std::string::npos;
+    bool func_alpha_in_prefix = diff.prefix.find("func_alpha") != std::string::npos;
+    bool func_beta_in_right = diff.right.find("func_beta") != std::string::npos;
+    bool func_beta_in_prefix = diff.prefix.find("func_beta") != std::string::npos;
+    bool func_beta_in_suffix = diff.suffix.find("func_beta") != std::string::npos;
+
+    // Left should contain func_alpha (or be in prefix)
+    t.assert_true("T3 left should contain func_alpha (or prefix)", func_alpha_in_left || func_alpha_in_prefix);
+
+    // Right should contain func_beta
+    t.assert_true("T3 right should contain func_beta", func_beta_in_right || func_beta_in_prefix || func_beta_in_suffix);
+
+    // Both should have the same parameter value (in common parts, not in diffs)
+    // Since both have same args, test_value will be in prefix/suffix
+    t.assert_true("T3 diff should contain test_value (in prefix or suffix)",
+        diff.prefix.find("test_value") != std::string::npos || diff.suffix.find("test_value") != std::string::npos);
+}
+
+// T4: Compare different argument counts (zero, one, two parameters)
+static void test_seed_oss_argument_count(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    // Build tools with 0, 1, or 2 required parameters
+    json params_2_required = json::object();
+    params_2_required["type"] = "object";
+    params_2_required["properties"] = json::object();
+    params_2_required["properties"]["arg1"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 1"}
+    });
+    params_2_required["properties"]["arg2"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 2"}
+    });
+    params_2_required["required"] = json::array({"arg1", "arg2"});
+
+    json params_1_required = json::object();
+    params_1_required["type"] = "object";
+    params_1_required["properties"] = json::object();
+    params_1_required["properties"]["arg1"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 1"}
+    });
+    params_1_required["required"] = json::array({"arg1"});
+
+    json tools = json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "test_func"},
+                {"description", "Test function"},
+                {"parameters", params_2_required}
+            }}
+        }
+    });
+
+    // Test: zero args vs one arg
+    json assistant_zero_args = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_func", json::object())
+        })}
+    };
+
+    json assistant_one_arg = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_func", json::object({{"arg1", "value1"}}))
+        })}
+    };
+
+    json assistant_two_args = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_func", json::object({{"arg1", "value1"}, {"arg2", "value2"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello"}
+    };
+
+    // Test zero vs one
+    template_params params_zero;
+    params_zero.messages = json::array({user_msg, assistant_zero_args});
+    params_zero.tools = tools;
+    params_zero.add_generation_prompt = false;
+    params_zero.enable_thinking = true;
+
+    auto result_zero_one = differential_analyzer::compare_variants(tmpl, params_zero,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_one_arg});
+        });
+
+    t.assert_true("T4 zero vs one result should have value", result_zero_one.has_value());
+    t.assert_true("T4 zero vs one left should be empty or minimal", result_zero_one->diff.left.empty() || result_zero_one->diff.left == "");
+    t.assert_true("T4 zero vs one right should contain arg1", result_zero_one->diff.right.find("arg1") != std::string::npos);
+
+    // Test one vs two
+    template_params params_one;
+    params_one.messages = json::array({user_msg, assistant_one_arg});
+    params_one.tools = tools;
+    params_one.add_generation_prompt = false;
+    params_one.enable_thinking = true;
+
+    auto result_one_two = differential_analyzer::compare_variants(tmpl, params_one,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_two_args});
+        });
+
+    t.assert_true("T4 one vs two result should have value", result_one_two.has_value());
+
+    const auto & diff4 = result_one_two->diff;
+    t.assert_true("T4 one vs two left should contain arg1 (or prefix)",
+        diff4.left.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
+    t.assert_true("T4 one vs two right should contain arg1 (or prefix)",
+        diff4.right.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
+    t.assert_true("T4 one vs two right should contain arg2 (or prefix/suffix)",
+        diff4.right.find("arg2") != std::string::npos || diff4.prefix.find("arg2") != std::string::npos || diff4.suffix.find("arg2") != std::string::npos);
+}
+
+// T5: Compare different argument values
+static void test_seed_oss_args_presence(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_same_arg = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}}))
+        })}
+    };
+
+    json assistant_other_arg = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param2", "value2"}}))
+        })}
+    };
+
+    json assistant_both_args = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello"}
+    };
+
+    template_params params_same;
+    params_same.messages = json::array({user_msg, assistant_same_arg});
+    params_same.tools = build_tools_definition();
+    params_same.add_generation_prompt = false;
+    params_same.enable_thinking = true;
+
+    // Test same arg vs other arg
+    auto result_same_other = differential_analyzer::compare_variants(tmpl, params_same,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_other_arg});
+        });
+
+    t.assert_true("T5 same vs other result should have value", result_same_other.has_value());
+    const auto & diff5a = result_same_other->diff;
+    t.assert_true("T5 same vs other left should contain param1 (or prefix/suffix)",
+        diff5a.left.find("param1") != std::string::npos || diff5a.prefix.find("param1") != std::string::npos || diff5a.suffix.find("param1") != std::string::npos);
+    t.assert_true("T5 same vs other left should contain value1 (or prefix/suffix)",
+        diff5a.left.find("value1") != std::string::npos || diff5a.prefix.find("value1") != std::string::npos);
+    t.assert_true("T5 same vs other right should contain param2 (or prefix/suffix)",
+        diff5a.right.find("param2") != std::string::npos || diff5a.prefix.find("param2") != std::string::npos || diff5a.suffix.find("param2") != std::string::npos);
+    t.assert_true("T5 same vs other right should contain value2 (or prefix/suffix)",
+        diff5a.right.find("value2") != std::string::npos || diff5a.prefix.find("value2") != std::string::npos || diff5a.suffix.find("value2") != std::string::npos);
+
+    // Test same arg vs both args
+    auto result_same_both = differential_analyzer::compare_variants(tmpl, params_same,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_both_args});
+        });
+
+    t.assert_true("T5 same vs both result should have value", result_same_both.has_value());
+    const auto & diff5b = result_same_both->diff;
+    t.assert_true("T5 same vs both left should contain param1 (or prefix/suffix)",
+        diff5b.left.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
+    t.assert_true("T5 same vs both right should contain param1 (or prefix/suffix)",
+        diff5b.right.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
+    t.assert_true("T5 same vs both right should contain param2 (or prefix/suffix)",
+        diff5b.right.find("param2") != std::string::npos || diff5b.prefix.find("param2") != std::string::npos || diff5b.suffix.find("param2") != std::string::npos);
+}
+
+// T6: Tool call with vs without reasoning_content
+static void test_seed_oss_tool_with_reasoning(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_tool_only = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json assistant_tool_with_reasoning = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })},
+        {"reasoning_content", "I need to call the tool first."}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+
+    template_params params_tool_only;
+    params_tool_only.messages = json::array({user_msg, assistant_tool_only});
+    params_tool_only.tools = build_tools_definition();
+    params_tool_only.add_generation_prompt = false;
+    params_tool_only.enable_thinking = true;
+
+    auto result = differential_analyzer::compare_variants(tmpl, params_tool_only,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_tool_with_reasoning});
+        });
+
+    t.assert_true("T6 result should have value", result.has_value());
+
+    const auto & diff = result->diff;
+
+    // Left should be empty (no reasoning in variant A)
+    t.assert_equal("T6 left should be empty", "", diff.left);
+
+    // Right should contain the thinking token with reasoning content
+    t.assert_true("T6 right should contain think begin", diff.right.find("<seed:think>") != std::string::npos);
+    t.assert_true("T6 right should contain reasoning content", diff.right.find("I need to call the tool first.") != std::string::npos);
+    t.assert_true("T6 right should contain think end", diff.right.find("</seed:think>") != std::string::npos);
+
+    // Prefix should contain the assistant role
+    t.assert_true("T6 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
+
+    // Suffix should contain the tool call
+    t.assert_true("T6 suffix should contain tool_call begin", diff.suffix.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T6 suffix should contain function name", diff.suffix.find("test_function_name") != std::string::npos);
+    t.assert_true("T6 suffix should contain eos", diff.suffix.find("<seed:eos>") != std::string::npos);
+}
+
+static common_chat_template load_template(testing & t, const std::string & template_path) {
+    std::ifstream fin(template_path, std::ios::binary);
+    std::ostringstream buf;
+    if (fin.is_open()) {
+        buf << fin.rdbuf();
+    }
+    std::string template_source = buf.str();
+    common_chat_template tmpl(template_source, "", "");
+    t.assert_true("Nemotron template loaded successfully", template_source.length() > 0);
+    return tmpl;
+}   
+
+// ============================================================================
+// Nemotron Template Analysis Tests
+// ============================================================================
+static common_chat_template load_nemotron_template(testing & t) {
+    return load_template(t, "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
+}
+
+static void test_nemotron_analysis(testing & t) {
+    t.test("Nemotron reasoning detection", test_nemotron_reasoning_detection);
+    t.test("Nemotron tool format", test_nemotron_tool_format);
+}
+
+static void test_nemotron_reasoning_detection(testing & t) {
+    common_chat_template tmpl = load_nemotron_template(t);
+
+    // Test the comparison manually to see what's happening
+    json user_msg = json{ { "role", "user" }, { "content", "Hello" } };
+    json assistant_no_reasoning = json{
+        { "role", "assistant" },
+        { "content", "I can help." }
+    };
+    json assistant_with_reasoning = json{
+        { "role", "assistant" },
+        { "content", "I can help." },
+        { "reasoning_content", "Let me think about this." }
+    };
+
+    template_params params;
+    params.messages = json::array({ user_msg, assistant_no_reasoning });
+    params.add_generation_prompt = false;
+    params.enable_thinking = true;
+
+    // Run differential analysis
+    auto analysis = differential_analyzer::analyze(tmpl);
+
+    // Check reasoning markers
+    t.assert_equal("reasoning_start should be '<think>'", "<think>", analysis.markers.reasoning_start);
+    t.assert_equal("reasoning_end should be '</think>'", "</think>", analysis.markers.reasoning_end);
+
+    // Check reasoning mode detection
+    // Nemotron uses forced closed reasoning with add_generation_prompt
+    t.assert_equal("reasoning should be FORCED_CLOSED", reasoning_mode::FORCED_CLOSED, analysis.reasoning);
+
+    // Make sure reasoning markers don't spill over to content markers
+    t.assert_equal("content start should be empty", "", analysis.markers.content_start);
+    t.assert_equal("content end should be empty", "", analysis.markers.content_end);
+
+    t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content);
+}
+
+static void test_nemotron_tool_format(testing & t) {
+    common_chat_template tmpl = load_nemotron_template(t);
+
+    // Run differential analysis
+    auto analysis = differential_analyzer::analyze(tmpl);
+
+    // Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped)
+    t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.markers.tool_section_start);
+    t.assert_equal("tool_section_end should be empty (per-call format)", "", analysis.markers.tool_section_end);
+    t.assert_equal("per_call_start should be '<tool_call>\\n'", "<tool_call>\n", analysis.markers.per_call_start);
+    t.assert_equal("per_call_end should be '</tool_call>'", "</tool_call>", analysis.markers.per_call_end);
+    t.assert_true("should support parallel calls", analysis.supports_parallel_calls);
+
+    // Check function markers
+    t.assert_equal("func_name_prefix should be '<function='", "<function=", analysis.markers.func_name_prefix);
+    t.assert_equal("func_name_suffix should be '>\\n'", ">\n", analysis.markers.func_name_suffix);
+    t.assert_equal("func_close should be '</function>'", "</function>", analysis.markers.func_close);
+
+    // Check argument markers (note: markers retain trailing newlines for proper parsing)
+    t.assert_equal("arg_name_prefix should be '<parameter='", "<parameter=", analysis.markers.arg_name_prefix);
+    t.assert_equal("arg_name_suffix should be '>\\n'", ">\n", analysis.markers.arg_name_suffix);
+    t.assert_equal("arg_value_suffix should be '</parameter>\\n'", "</parameter>\n", analysis.markers.arg_value_suffix);
+
+    // Check format classification
+    t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools == tool_format::TAG_WITH_TAGGED);
+
+    // Verify tool support
+    t.assert_true("should support tools", analysis.supports_tools);
+}
+
+static common_chat_template load_cohere_template(testing & t) {
+    return load_template(t, "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
+}
+
+static void test_cohere_analysis(testing & t) {
+    t.test("Cohere reasoning detection", test_cohere_reasoning_detection);
+    t.test("Cohere tool format", test_cohere_tool_format);
+}
+
+static void test_cohere_reasoning_detection(testing & t) {
+    common_chat_template tmpl = load_cohere_template(t);
+
+    // Run differential analysis
+    auto analysis = differential_analyzer::analyze(tmpl);
+
+    // Check reasoning markers - Cohere uses special token format
+    t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.markers.reasoning_start);
+    t.assert_equal("reasoning_end should be '<|END_THINKING|>'", "<|END_THINKING|>", analysis.markers.reasoning_end);
+
+    // Check reasoning mode - Cohere only shows reasoning with tool calls (TOOLS_ONLY)
+    t.assert_equal("reasoning should be TOOLS_ONLY", reasoning_mode::TOOLS_ONLY, analysis.reasoning);
+
+    // Check content markers - Cohere wraps all content with START/END_RESPONSE
+    t.assert_equal("content_start should be '<|START_RESPONSE|>'", "<|START_RESPONSE|>", analysis.markers.content_start);
+    t.assert_equal("content_end should be '<|END_RESPONSE|>'", "<|END_RESPONSE|>", analysis.markers.content_end);
+
+    // Content is always wrapped (both with and without tools)
+    t.assert_equal("content should be ALWAYS_WRAPPED", content_mode::ALWAYS_WRAPPED, analysis.content);
+}
+
+static void test_cohere_tool_format(testing & t) {
+    common_chat_template tmpl = load_cohere_template(t);
+
+    // Run differential analysis
+    auto analysis = differential_analyzer::analyze(tmpl);
+
+    // Check tool section markers - Cohere uses ACTION markers
+    t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.markers.tool_section_start);
+    t.assert_equal("tool_section_end should be '<|END_ACTION|>'", "<|END_ACTION|>", analysis.markers.tool_section_end);
+
+    // JSON_NATIVE format has no per-call markers
+    t.assert_equal("per_call_start should be empty", "", analysis.markers.per_call_start);
+    t.assert_equal("per_call_end should be empty", "", analysis.markers.per_call_end);
+
+    // JSON_NATIVE format has empty function markers (no XML-style markers)
+    t.assert_equal("func_name_prefix should be empty", "", analysis.markers.func_name_prefix);
+    t.assert_equal("func_name_suffix should be empty", "", analysis.markers.func_name_suffix);
+    t.assert_equal("func_close should be empty", "", analysis.markers.func_close);
+
+    // JSON_NATIVE format has empty args markers
+    t.assert_equal("args_start should be empty", "", analysis.markers.args_start);
+    t.assert_equal("args_end should be empty", "", analysis.markers.args_end);
+
+    // JSON_NATIVE format has empty argument markers
+    t.assert_equal("arg_name_prefix should be empty", "", analysis.markers.arg_name_prefix);
+    t.assert_equal("arg_name_suffix should be empty", "", analysis.markers.arg_name_suffix);
+    t.assert_equal("arg_value_prefix should be empty", "", analysis.markers.arg_value_prefix);
+    t.assert_equal("arg_value_suffix should be empty", "", analysis.markers.arg_value_suffix);
+    t.assert_equal("arg_separator should be empty", "", analysis.markers.arg_separator);
+
+    // Check JSON field names - Cohere uses non-standard names
+    t.assert_equal("name_field should be 'tool_name'", "tool_name", analysis.name_field);
+    t.assert_equal("args_field should be 'parameters'", "parameters", analysis.args_field);
+    // This isn't a real tool call id field, i.e. with the OpenAI tool call ID format
+    t.assert_equal("id_field should be 'tool_call_id'", "", analysis.id_field);
+
+    // Check format classification
+    t.assert_equal("tool format should be JSON_NATIVE", tool_format::JSON_NATIVE, analysis.tools);
+
+    // Check flags
+    t.assert_true("should support tools", analysis.supports_tools);
+    t.assert_true("should support parallel calls", analysis.supports_parallel_calls);
+    t.assert_true("should not require nonnull content", !analysis.requires_nonnull_content);
+    t.assert_true("tools_array_wrapped should be true", analysis.tools_array_wrapped);
+}
+
+// ============================================================================
+// standard_json_tools Format Tests
+// ============================================================================
+
+// Helper to build tools definition for tests
+static json build_test_tools() {
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["location"] = json::object({
+        {"type", "string"},
+        {"description", "The city and state"}
+    });
+    parameters_schema["properties"]["unit"] = json::object({
+        {"type", "string"},
+        {"description", "Temperature unit"},
+        {"enum", json::array({"celsius", "fahrenheit"})}
+    });
+    parameters_schema["required"] = json::array({"location"});
+
+    return json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "get_current_weather"},
+                {"description", "Get the current weather in a given location"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+}
+
+static void test_standard_json_tools_formats(testing & t) {
+    t.test("OpenAI format", test_standard_json_tools_openai);
+    t.test("Cohere format", test_standard_json_tools_cohere);
+    t.test("function-as-key format", test_standard_json_tools_function_key);
+}
+
+// Test 1: OpenAI Standard Format
+// {"id": "call_abc", "function": {"name": "get_weather", "arguments": {"location": "NYC"}}}
+static void test_standard_json_tools_openai(testing & t) {
+    json tools = build_test_tools();
+
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        auto tool_call = p.standard_json_tools(
+            "<tool_call>", "</tool_call>", tools,
+            /* parallel */ true,
+            /* force */ false,
+            /* name_key */ "function.name",
+            /* args_key */ "function.arguments",
+            /* array_wrapped */ false,
+            /* function_is_key */ false,
+            /* call_id_key */ "id",
+            /* gen_call_id_key */ "",
+            /* parameters_order */ {}
+        );
+        return p.content(p.until("<tool_call>")) + p.optional(tool_call) + p.end();
+    });
+
+    std::string input =
+        "Let me check the weather."
+        "<tool_call>"
+        R"({"id": "call_abc123", "function": {"name": "get_current_weather", "arguments": {"location": "NYC"}}})"
+        "</tool_call>";
+
+    common_peg_parse_context ctx(input, false);
+    auto result = parser.parse(ctx);
+
+    t.assert_true("parse success", result.success());
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_unified_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+        t.assert_equal("tool id", "call_abc123", msg.tool_calls[0].id);
+    }
+    t.assert_true("content present", msg.content.find("Let me check the weather") != std::string::npos);
+}
+
+// Test 2: Cohere Format
+// {"tool_call_id": 0, "tool_name": "get_weather", "parameters": {"location": "NYC"}}
+static void test_standard_json_tools_cohere(testing & t) {
+    json tools = build_test_tools();
+
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        auto tool_call = p.standard_json_tools(
+            "<|START_ACTION|>[", "]<|END_ACTION|>", tools,
+            /* parallel */ true,
+            /* force */ false,
+            /* name_key */ "tool_name",
+            /* args_key */ "parameters",
+            /* array_wrapped */ false,  // Brackets are part of section markers
+            /* function_is_key */ false,
+            /* call_id_key */ "",
+            /* gen_call_id_key */ "tool_call_id",
+            /* parameters_order */ {"tool_call_id", "tool_name", "parameters"}
+        );
+        return p.content(p.until("<|START_ACTION|>")) + p.optional(tool_call) + p.end();
+    });
+
+    std::string input =
+        "Let me search for that."
+        "<|START_ACTION|>["
+        R"({"tool_call_id": 0, "tool_name": "get_current_weather", "parameters": {"location": "NYC", "unit": "celsius"}})"
+        "]<|END_ACTION|>";
+
+    common_peg_parse_context ctx(input, false);
+    auto result = parser.parse(ctx);
+
+    t.assert_true("parse success", result.success());
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_unified_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+        t.assert_equal("tool id", "0", msg.tool_calls[0].id);
+    }
+    t.assert_true("content present", msg.content.find("Let me search") != std::string::npos);
+}
+
+// Test 3: Function-as-Key Format
+// {"get_current_weather": {"id": "call-0001", "args": {"location": "NYC"}}}
+static void test_standard_json_tools_function_key(testing & t) {
+    json tools = build_test_tools();
+
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        auto tool_call = p.standard_json_tools(
+            "<tool_calls>[", "]</tool_calls>", tools,
+            /* parallel */ true,
+            /* force */ false,
+            /* name_key */ "",  // Name is the key itself
+            /* args_key */ "args",
+            /* array_wrapped */ false,
+            /* function_is_key */ true,
+            /* call_id_key */ "id",
+            /* gen_call_id_key */ "",
+            /* parameters_order */ {}
+        );
+        return p.content(p.until("<tool_calls>")) + p.optional(tool_call) + p.end();
+    });
+
+    std::string input =
+        "I'll call the weather function."
+        "<tool_calls>["
+        R"({"get_current_weather": {"id": "call-0001", "args": {"location": "NYC", "unit": "celsius"}}})"
+        "]</tool_calls>";
+
+    common_peg_parse_context ctx(input, false);
+    auto result = parser.parse(ctx);
+
+    t.assert_true("parse success", result.success());
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_unified_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+        t.assert_equal("tool id", "call-0001", msg.tool_calls[0].id);
+    }
+    t.assert_true("content present", msg.content.find("I'll call the weather") != std::string::npos);
+}
+
+// ============================================================================
+// normalize_quotes_to_json Tests
+// ============================================================================
+
+// Copy of the function for isolated testing (original is static in chat-peg-parser.cpp)
+static std::string normalize_quotes_to_json(const std::string & input) {
+    std::string result;
+    result.reserve(input.size() + 16);
+
+    bool in_single_quoted = false;
+    bool in_double_quoted = false;
+
+    for (size_t i = 0; i < input.size(); ++i) {
+        char c = input[i];
+
+        if (c == '\\' && i + 1 < input.size()) {
+            char next = input[i + 1];
+
+            if (in_single_quoted) {
+                if (next == '\'') {
+                    result += '\'';
+                    ++i;
+                    continue;
+                }
+                if (next == '"') {
+                    result += "\\\"";
+                    ++i;
+                    continue;
+                }
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            if (in_double_quoted) {
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            result += c;
+            continue;
+        }
+
+        if (c == '"') {
+            if (in_single_quoted) {
+                result += "\\\"";
+            } else {
+                in_double_quoted = !in_double_quoted;
+                result += c;
+            }
+        } else if (c == '\'') {
+            if (in_double_quoted) {
+                result += c;
+            } else if (in_single_quoted) {
+                in_single_quoted = false;
+                result += '"';
+            } else {
+                in_single_quoted = true;
+                result += '"';
+            }
+        } else {
+            result += c;
+        }
+    }
+
+    return result;
+}
+
+static void test_normalize_quotes_to_json(testing & t) {
+    t.test("basic single to double quotes", [](testing & t) {
+        std::string input = "{'key': 'value'}";
+        std::string expected = "{\"key\": \"value\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("basic conversion", expected, result);
+    });
+
+    t.test("escaped single quote inside single-quoted string", [](testing & t) {
+        std::string input = "{'code': 'print(\\'hello\\')'}";
+        std::string expected = "{\"code\": \"print('hello')\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("escaped single quote", expected, result);
+    });
+
+    t.test("double quote inside single-quoted string", [](testing & t) {
+        std::string input = "{'msg': 'He said \"hi\"'}";
+        std::string expected = "{\"msg\": \"He said \\\"hi\\\"\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("double quote escaping", expected, result);
+    });
+
+    t.test("nested backslash escapes", [](testing & t) {
+        std::string input = "{'path': 'C:\\\\Users\\\\test'}";
+        std::string expected = "{\"path\": \"C:\\\\Users\\\\test\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("backslash escaping", expected, result);
+    });
+
+    t.test("newline escapes", [](testing & t) {
+        std::string input = "{'text': 'line1\\nline2'}";
+        std::string expected = "{\"text\": \"line1\\nline2\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("newline escaping", expected, result);
+    });
+
+    t.test("mixed quotes", [](testing & t) {
+        std::string input = "{\"already_double\": 'single_value'}";
+        std::string expected = "{\"already_double\": \"single_value\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("mixed quotes", expected, result);
+    });
+
+    t.test("embedded quotes - the test case", test_normalize_quotes_with_embedded_quotes);
+}
+
+// Test case that mirrors the Seed-OSS failing test scenario
+static void test_normalize_quotes_with_embedded_quotes(testing & t) {
+    // This is similar to the Seed-OSS template test case
+    // The input has embedded double quotes like "14" and "bar" inside string values
+    std::string input = "{'filename': 'foo.cpp', 'oldString': 'def foo(arg = \"14\"):\\n    return arg + \"bar\"\\n', 'newString': 'def foo(arg = \"15\"):\\n    pass\\n'}";
+
+    // Expected: Python single quotes -> JSON double quotes, internal double quotes escaped
+    std::string expected = "{\"filename\": \"foo.cpp\", \"oldString\": \"def foo(arg = \\\"14\\\"):\\n    return arg + \\\"bar\\\"\\n\", \"newString\": \"def foo(arg = \\\"15\\\"):\\n    pass\\n\"}";
+
+    std::string result = normalize_quotes_to_json(input);
+
+    t.assert_equal("normalize quotes with embedded double quotes", expected, result);
+
+    // Also verify the result is valid JSON
+    try {
+        json parsed = json::parse(result);
+        t.assert_true("result is valid JSON", true);
+        t.assert_equal("filename field", "foo.cpp", parsed["filename"].get<std::string>());
+        t.assert_true("oldString contains embedded quotes",
+            parsed["oldString"].get<std::string>().find("\"14\"") != std::string::npos);
+        t.assert_true("newString contains embedded quotes",
+            parsed["newString"].get<std::string>().find("\"15\"") != std::string::npos);
+    } catch (const std::exception & e) {
+        t.assert_true(std::string("JSON parse failed: ") + e.what(), false);
+    }
+}
+
+// ============================================================================
+// TAG_WITH_TAGGED Argument Parsing Tests
+// ============================================================================
+
+// Build tools definition for edit function
+static json build_edit_tool() {
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["filename"] = json::object({
+        {"type", "string"},
+        {"description", "Path of file to edit"}
+    });
+    parameters_schema["properties"]["oldString"] = json::object({
+        {"type", "string"},
+        {"description", "String to replace"}
+    });
+    parameters_schema["properties"]["newString"] = json::object({
+        {"type", "string"},
+        {"description", "New (replacement) value"}
+    });
+    parameters_schema["required"] = json::array({"filename", "oldString", "newString"});
+
+    return json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "edit"},
+                {"description", "Edit a file"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+}
+
+// Test that reproduces the Seed-OSS template issue with embedded quotes
+static void test_tagged_args_with_embedded_quotes(testing & t) {
+    json tools = build_edit_tool();
+
+    // Build a parser for TAG_WITH_TAGGED format like Seed-OSS/Nemotron
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        // Build tool choice for the edit function
+        auto tool_choice = p.choice();
+
+        for (const auto & tool_def : tools) {
+            if (!tool_def.contains("function")) continue;
+            const auto & function = tool_def.at("function");
+            std::string name = function.at("name");
+            const auto & params = function.at("parameters");
+
+            if (!params.contains("properties") || !params.at("properties").is_object()) continue;
+
+            const auto & properties = params.at("properties");
+
+            // Build argument parsers
+            std::vector<common_peg_parser> arg_parsers;
+            for (const auto & [param_name, param_schema] : properties.items()) {
+                auto arg = p.tool_arg(
+                    p.tool_arg_open(p.literal("<parameter=") + p.tool_arg_name(p.literal(param_name)) + p.literal(">")) +
+                    p.space() +
+                    p.tool_arg_string_value(p.until("</parameter>")) +
+                    p.space() +
+                    p.tool_arg_close(p.literal("</parameter>"))
+                );
+                arg_parsers.push_back(p.optional(p.rule("arg-" + param_name, arg)));
+            }
+
+            // Build arg sequence with space() between
+            common_peg_parser args_seq = p.eps();
+            for (size_t i = 0; i < arg_parsers.size(); i++) {
+                if (i > 0) {
+                    args_seq = args_seq + p.space();
+                }
+                args_seq = args_seq + arg_parsers[i];
+            }
+
+            auto func_parser =
+                p.tool_open(p.literal("<function=") + p.tool_name(p.literal(name)) + p.literal(">")) +
+                p.space() + args_seq + p.space() +
+                p.tool_close(p.literal("</function>"));
+
+            tool_choice |= p.rule("tool-" + name, p.tool(func_parser));
+        }
+
+        auto tool_section =
+            p.literal("<seed:tool_call>") + p.space() +
+            tool_choice +
+            p.space() + p.literal("</seed:tool_call>");
+
+        return p.content(p.until("<seed:tool_call>")) + p.optional(tool_section) + p.end();
+    });
+
+    // The exact input from the failing test
+    std::string input =
+        "<seed:tool_call>\n"
+        "<function=edit>\n"
+        "<parameter=filename>\n"
+        "foo.cpp\n"
+        "</parameter>\n"
+        "<parameter=oldString>"
+        "def foo(arg = \"14\"):\n"
+        "    return arg + \"bar\"\n"
+        "\n"
+        "</parameter>\n"
+        "<parameter=newString>"
+        "def foo(arg = \"15\"):\n"
+        "    pass\n"
+        "\n"
+        "</parameter>\n"
+        "</function>\n"
+        "</seed:tool_call>";
+
+    common_peg_parse_context ctx(input, false);
+    auto result = parser.parse(ctx);
+
+    t.assert_true("parse success", result.success());
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_unified_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "edit", msg.tool_calls[0].name);
+
+        // Parse the arguments as JSON to verify they're valid
+        std::string args = msg.tool_calls[0].arguments;
+
+        try {
+            json parsed = json::parse(args);
+            t.assert_true("arguments is valid JSON", true);
+
+            // Verify each field has proper value
+            t.assert_equal("filename", "foo.cpp", parsed.value("filename", ""));
+
+            std::string oldString = parsed.value("oldString", "");
+            t.assert_true("oldString contains embedded quotes",
+                oldString.find("\"14\"") != std::string::npos);
+            t.assert_true("oldString contains bar with quotes",
+                oldString.find("\"bar\"") != std::string::npos);
+
+            std::string newString = parsed.value("newString", "");
+            t.assert_true("newString contains embedded quotes",
+                newString.find("\"15\"") != std::string::npos);
+
+        } catch (const std::exception & e) {
+            t.assert_true(std::string("arguments should be valid JSON: ") + e.what(), false);
+        }
+    }
+}
+
diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp
deleted file mode 100644
index 6f44a2b4211..00000000000
--- a/tests/test-chat-parser.cpp
+++ /dev/null
@@ -1,617 +0,0 @@
-//  Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
-//
-//  Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
-//  e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
-//
-//    cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
-//
-#include <exception>
-#include <iostream>
-#include <string>
-
-#include "chat-parser.h"
-#include "common.h"
-#include "log.h"
-#include "regex-partial.h"
-
-template <class T>
-static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
-    if (expected != actual) {
-        std::cerr << label << std::endl;
-        std::cerr << "Expected: " << expected << std::endl;
-        std::cerr << "Actual: " << actual << std::endl;
-        std::cerr << std::flush;
-        throw std::runtime_error("Test failed");
-    }
-}
-
-template <class T>
-static void assert_equals(const T & expected, const T & actual) {
-    assert_equals("", expected, actual);
-}
-static void assert_equals(const char * expected, const std::string & actual) {
-  return assert_equals<std::string>(expected, actual);
-}
-
-static void assert_throws(const std::function<void()> & fn, const std::string & expected_exception_pattern = "") {
-    try {
-        fn();
-    } catch (const std::exception & e) {
-      if (expected_exception_pattern.empty()) {
-          return;
-        }
-        std::regex expected_exception_regex(expected_exception_pattern);
-        std::string actual_message = e.what();
-        if (std::regex_search(actual_message, expected_exception_regex)) {
-            return;
-        }
-        throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")");
-        throw std::runtime_error("Exception of unexpected type: " + std::string(e.what()));
-    }
-    throw std::runtime_error("Exception was expected but not thrown");
-}
-
-static void test_reasoning() {
-  //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("<tnk>Cogito</tnk>Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals(std::string("Cogito"), builder.result().reasoning_content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("Cogito</tnk>Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = true;
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals(std::string("Cogito"), builder.result().reasoning_content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = true;
-    params.thinking_forced_open = true;
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("<think>Cogito</think>", builder.result().content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    const std::string variant("content_only_inline_think");
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    params.parse_tool_calls = false;
-    const std::string input = "<think>Pense</think>Bonjour";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals(variant, std::string("Pense"), msg.reasoning_content);
-    assert_equals(variant, std::string("Bonjour"), msg.content);
-  }
-  {
-    const std::string variant("llama_3_inline_think");
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_LLAMA_3_X;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    params.parse_tool_calls = false;
-    const std::string input = "<think>Plan</think>Réponse";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals(variant, std::string("Plan"), msg.reasoning_content);
-    assert_equals(variant, std::string("Réponse"), msg.content);
-  }
-  // Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = true;
-    params.parse_tool_calls = true;
-    const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
-    common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, params);
-    assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
-    assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
-    assert_equals(variant, std::string("ok"), builder.consume_rest());
-  }
-  // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = true;
-    params.parse_tool_calls = true;
-    const std::string variant("deepseek_v3_1_reasoning_format_none");
-    const std::string input = "REASONING</think>ok";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
-    assert_equals(variant, std::string(""), msg.reasoning_content);
-  }
-}
-
-static void test_regex() {
-  auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") {
-    common_chat_msg_parser builder(input, /* is_partial= */ false, {});
-    assert_throws([&]() { builder.consume_regex(common_regex(regex)); }, expected_exception_pattern);
-  };
-
-  test_throws("Hello, world!", "abc", "^abc$");
-  test_throws("Hello, world!", "e", "^e$");
-
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
-    builder.consume_regex(common_regex("Hello"));
-    assert_equals(", world!", builder.consume_rest());
-  }
-
-  {
-    // When in non partial mode, we can say whether the regex was consumed or not.
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
-    assert_equals(false, builder.try_consume_regex(common_regex("Hello, world!")).has_value());
-  }
-  {
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
-    auto res = builder.try_consume_regex(common_regex("H(el)l(?:o, world!)?"));
-    assert_equals(true, res.has_value());
-    // Verify captures
-    assert_equals<size_t>(2, res->groups.size());
-    assert_equals("Hell", builder.str(res->groups[0]));
-    assert_equals("el", builder.str(res->groups[1]));
-    // Verify position is after the match
-    assert_equals<size_t>(4, builder.pos());
-    assert_equals("o,", builder.consume_rest());
-  }
-  {
-    // But in partial mode, we have a partial final match / can't decide, so we throw a partial exception.
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {});
-    assert_throws([&]() {
-      builder.try_consume_regex(common_regex("Hello, world!"));
-    }, "^Hello, world!$");
-  }
-
-  // Now regardless of the mode, we can tell these aren't a match.
-  for (const auto is_partial : {false, true}) {
-    common_chat_msg_parser builder("Hello,", is_partial, {});
-    assert_equals(false, builder.try_consume_regex(common_regex("a(b|c)(d|e)f")).has_value());
-  }
-  for (const auto is_partial : {false, true}) {
-    common_chat_msg_parser builder("Hello,", is_partial, {});
-    assert_equals(false, builder.try_consume_literal("Oh"));
-  }
-}
-
-const std::vector<std::string> barely_healable_jsons = {
-  "{",
-  "{\"",
-  "{\"\\",
-  "{\"n",
-  "{\"name\"",
-  "{\"name\":",
-  "{\"name\":\"",
-  "{\"name\":\"\\",
-  "{\"name\":\"python",
-  "{\"name\":\"python\\",
-  "{\",",
-  "{\":",
-  "{\"[",
-  "{\"]",
-  "{\"{",
-  "{\"}",
-  "{\"1",
-  "{\"name\":\",",
-  "{\"name\":\":",
-  "{\"name\":\"[",
-  "{\"name\":\"]",
-  "{\"name\":\"{",
-  "{\"name\":\"}",
-  "{\"name\":\"1",
-};
-
-static void test(const std::string & input, bool is_partial, const std::vector<std::vector<std::string>> & args_paths, const std::vector<std::vector<std::string>> & content_paths, const std::string & expected) {
-  common_chat_msg_parser builder(input, is_partial, {});
-  auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths);
-  assert_equals(true, js.has_value());
-  assert_equals(is_partial, js->is_partial);
-  assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
-}
-
-static void test_deepseek_v3_1_tool_calls() {
-    //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
-    // variant: happy path for when it works as the model card says it should
-    const std::string variant("simple");
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    params.parse_tool_calls = true;
-    const std::string input = "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
-    assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
-    // JSON arguments are dumped without spaces
-    assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
-    assert_equals(variant, std::string(""), msg.content);
-    assert_equals(variant, std::string(""), msg.reasoning_content);
-
-    // variant: simple + thinking open
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("simple_thinking");
-        const std::string in = "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
-    }
-    // variant: simple + multiple tool calls
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = false;
-        params.parse_tool_calls = true;
-        const std::string variant("simple_multiple_tool_calls");
-        const std::string in = "CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
-        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-    }
-
-
-    // variant: thinking forced open + tool call in reasoning content
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals(variant, std::string("REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING"), m.reasoning_content);
-    }
-
-    // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
-    //          This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
-    //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
-    //          add the reasoning content as regular content and parse the tool calls.
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals(variant, std::string("REASONING"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-    }
-
-    // variant: thinking forced open + tool call in reasoning content + no closing think + partial
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, /* is_partial= */ true, params);
-        assert_equals(variant, std::string("REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>"), m.reasoning_content);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-    }
-
-    // variant: thinking not forced open + reasoning + regular content + no tool calls
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
-        const std::string in = "REASONING</think>CONTENT";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
-    }
-    // variant: thinking not forced open + missing reasoning + no tool calls
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = false;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
-        const std::string in = "CONTENT";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-    }
-}
-
-static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
-  common_chat_msg_parser builder(input, parse_as_partial, {});
-  auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
-  assert_equals(true, js.has_value());
-  assert_equals(is_partial, js->is_partial);
-  assert_equals(expected, js->value.dump());
-}
-
-static void test_json_with_dumped_args_no_args() {
-  // Normal JSON, nothing to heal, nothing to dump
-  test("{\"name\": \"python\"}", false, {}, {}, "{\"name\":\"python\"}");
-  // Full json is args
-  test("{\"name\": \"python\"}", false, {{}}, {}, "{\"name\":\"python\"}");
-
-  // If the arguments are further down, don't heal partial content.
-  for (const auto & src : barely_healable_jsons) {
-    test(src, true, {{"arguments"}}, {}, "{}");
-  }
-  // But heal content that isn't partial.
-  test("{\"name\": \"python\"", true, {{"arguments"}}, {}, "{\"name\":\"python\"}");
-}
-
-static void test_json_with_dumped_args() {
-
-  // Partial content.
-  test("{\"content\": \"t", true, {}, {{"content"}}, "{\"content\":\"t\"}");
-  test("{\"content\": \"", true, {}, {{"content"}}, "{\"content\":\"\"}");
-  test("{\"content\": ", true, {}, {{"content"}}, "{}");
-
-  // If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted).
-  test("{\"name\": \"python", true, {{}}, {}, "{\"name\":\"python");
-  for (const auto & src : barely_healable_jsons) {
-    test(src, true, {{}}, {}, src);
-  }
-
-  // Full JSON w/ args
-  for (auto parse_as_partial : {true, false}) {
-    test_with_args(
-      R"({"name": "python", "args": {"arg1": 1}})",
-      R"({"name":"python","args":"{\"arg1\":1}"})",
-      parse_as_partial,
-      /* is_partial= */ false
-    );
-  }
-
-  // Partial JSON w/ partial args
-  test_with_args(
-    R"({"foo": "bar", "args": {")",
-    R"({"foo":"bar","args":"{\""})"
-  );
-  // Partial args broken in object key
-  test_with_args(
-    R"({"foo": "bar", "args": {"ar)",
-    R"({"foo":"bar","args":"{\"ar"})"
-  );
-  // Partial args broken after object key
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1")",
-    R"({"foo":"bar","args":"{\"arg1\""})"
-  );
-  // Partial args broken before object value
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1":)",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken before object value (space)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": )",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken in object value that may not be complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": 1)",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken in object value that is complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": 1 )",
-    R"({"foo":"bar","args":"{\"arg1\":1"})"
-  );
-  // Partial args broken in object value that is incomplete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": ")",
-    R"({"foo":"bar","args":"{\"arg1\":\""})"
-  );
-  // Partial args broken in object value that is complete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "1")",
-    R"({"foo":"bar","args":"{\"arg1\":\"1\""})"
-  );
-  // Partial args broken on array opening
-  test_with_args(
-    R"({"foo": "bar", "args": [)",
-    R"({"foo":"bar","args":"["})"
-  );
-  // Partial args broken on array value that is incomplete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": [1)",
-    R"({"foo":"bar","args":"["})"
-  );
-  // Partial args broken on array value that is complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": [1 )",
-    R"({"foo":"bar","args":"[1"})"
-  );
-  // Partial args broken on array value that is complete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": ["1")",
-    R"({"foo":"bar","args":"[\"1\""})"
-  );
-  // Partial args broken after array value
-  test_with_args(
-    R"({"foo": "bar", "args": [1,)",
-    R"({"foo":"bar","args":"[1,"})"
-  );
-  // Partial args broken on nested array
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": [)",
-    R"({"foo":"bar","args":"{\"arg1\":["})"
-  );
-
-  // Unicode tests
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u0)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u0"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u00)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u00"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u000)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u000"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u0000)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u0000"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud8)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud8"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud80)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud80"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\u)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\u"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\ud)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\ud"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc0)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc0"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc00)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc00"})"
-  );
-}
-
-static void test_positions() {
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
-    assert_equals<size_t>(0, builder.pos());
-    assert_throws([&]() { builder.move_to(100); });
-    assert_equals<size_t>(0, builder.pos());
-    assert_throws([&]() { builder.move_back(1); });
-    assert_equals<size_t>(0, builder.pos());
-
-    builder.move_to(8);
-    assert_equals<size_t>(8, builder.pos());
-    builder.move_back(1);
-    assert_equals<size_t>(7, builder.pos());
-    assert_equals("world!", builder.consume_rest());
-
-    builder.move_to(0);
-    assert_equals<size_t>(0, builder.pos());
-
-    assert_throws([&]() { builder.finish(); });
-    assert_equals<size_t>(0, builder.pos());
-
-    builder.move_to(builder.input().size());
-    builder.finish();
-  }
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {});
-
-    builder.move_to(builder.input().size());
-    assert_equals<size_t>(builder.input().size(), builder.pos());
-    builder.finish();
-  }
-}
-
-int main() {
-    test_positions();
-    test_json_with_dumped_args_no_args();
-    test_json_with_dumped_args();
-    test_reasoning();
-    test_regex();
-    test_deepseek_v3_1_tool_calls();
-    std::cout << "All tests passed!\n";
-    return 0;
-}
diff --git a/tests/test-chat-peg-parser.cpp b/tests/test-chat-peg-parser.cpp
index f767c73c27a..ae829666994 100644
--- a/tests/test-chat-peg-parser.cpp
+++ b/tests/test-chat-peg-parser.cpp
@@ -1,8 +1,3 @@
-#include <string>
-#include <iostream>
-#include <numeric>
-
-#include "chat-parser.h"
 #include "chat-peg-parser.h"
 #include "chat.h"
 #include "common.h"
@@ -10,6 +5,11 @@
 #include "peg-parser.h"
 #include "testing.h"
 #include "peg-parser/simple-tokenize.h"
+
+#include <iostream>
+#include <numeric>
+#include <string>
+
 #include "nlohmann/json.hpp"
 
 using json = nlohmann::ordered_json;
@@ -17,9 +17,11 @@ using json = nlohmann::ordered_json;
 static json create_tools();
 static void test_example_native(testing & t);
 static void test_example_qwen3_coder(testing & t);
+static void test_example_qwen3_non_coder(testing & t);
 static void test_command7_parser_compare(testing & t);
+static void test_prefix_tool_names(testing & t);
 
-int main(int argc, char *argv[]) {
+int main(int argc, char * argv[]) {
     testing t(std::cout);
     if (argc >= 2) {
         t.set_filter(argv[1]);
@@ -32,7 +34,9 @@ int main(int argc, char *argv[]) {
 
     t.test("native", test_example_native);
     t.test("qwen3 coder", test_example_qwen3_coder);
+    t.test("qwen3 non-coder", test_example_qwen3_non_coder);
     t.test("comparison", test_command7_parser_compare);
+    t.test("prefix tool names", test_prefix_tool_names);
 
     return t.summary();
 }
@@ -41,87 +45,75 @@ static json create_tools() {
     json tools = json::array();
 
     json tool_weather = {
-        {"type", "function"},
-        {"function", {
-            {"name", "get_current_weather"},
-            {"description", "Get the current weather in a given location"},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"location", {
-                        {"type", "string"},
-                        {"description", "The city and state, e.g. San Francisco, CA"}
-                    }},
-                    {"unit", {
-                        {"type", "string"},
-                        {"enum", {"celsius", "fahrenheit"}},
-                        {"description", "The temperature unit to use. Infer this from the users location."}
-                    }}
-                }},
-                {"required", {"location", "unit"}},
-            }},
-        }}
+        { "type",     "function" },
+        { "function",
+         {
+              { "name", "get_current_weather" },
+              { "description", "Get the current weather in a given location" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      { { "location",
+                          { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } },
+                        { "unit",
+                          { { "type", "string" },
+                            { "enum", { "celsius", "fahrenheit" } },
+                            { "description",
+                              "The temperature unit to use. Infer this from the users location." } } } } },
+                    { "required", { "location", "unit" } },
+                } },
+          }                      }
     };
     tools.push_back(tool_weather);
 
     json tool_forecast = {
-        {"type", "function"},
-        {"function", {
-            {"name", "get_forecast"},
-            {"description", "Get the weather forecast for a given location"},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"location", {
-                        {"type", "string"},
-                        {"description", "The city and state, e.g. San Francisco, CA"}
-                    }},
-                    {"unit", {
-                        {"type", "string"},
-                        {"enum", {"celsius", "fahrenheit"}},
-                        {"description", "The temperature unit to use. Infer this from the users location."}
-                    }},
-                    {"days", {
-                        {"type", "integer"},
-                        {"description", "Number of days to forecast (1-10)"},
-                        {"minimum", 1},
-                        {"maximum", 10}
-                    }}
-                }},
-                {"required", {"location", "unit"}},
-            }},
-        }}
+        { "type",     "function" },
+        { "function",
+         {
+              { "name", "get_forecast" },
+              { "description", "Get the weather forecast for a given location" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      { { "location",
+                          { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } },
+                        { "unit",
+                          { { "type", "string" },
+                            { "enum", { "celsius", "fahrenheit" } },
+                            { "description", "The temperature unit to use. Infer this from the users location." } } },
+                        { "days",
+                          { { "type", "integer" },
+                            { "description", "Number of days to forecast (1-10)" },
+                            { "minimum", 1 },
+                            { "maximum", 10 } } } } },
+                    { "required", { "location", "unit" } },
+                } },
+          }                      }
     };
     tools.push_back(tool_forecast);
 
     json tool_search = {
-        {"type", "function"},
-        {"function", {
-            {"name", "search_knowledge_base"},
-            {"description", "Search the internal technical documentation knowledge base."},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"query", {
-                        {"type", "string"},
-                        {"description", "The search query string."}
-                    }},
-                    {"max_results", {
-                        {"type", "integer"},
-                        {"description", "The maximum number of results to return."},
-                        {"default", 5}
-                    }},
-                    {"category", {
-                        {"type", "string"},
-                        {"enum", {"api", "troubleshooting", "billing", "general"}},
-                        {"description", "Filter search by specific category."}
-                    }}
-                }},
-                {"required", {"query", "category"}},
-                {"additionalProperties", false}
-            }},
-            {"strict", true}
-        }}
+        { "type",     "function" },
+        { "function",
+         { { "name", "search_knowledge_base" },
+            { "description", "Search the internal technical documentation knowledge base." },
+            { "parameters",
+              { { "type", "object" },
+                { "properties",
+                  { { "query", { { "type", "string" }, { "description", "The search query string." } } },
+                    { "max_results",
+                      { { "type", "integer" },
+                        { "description", "The maximum number of results to return." },
+                        { "default", 5 } } },
+                    { "category",
+                      { { "type", "string" },
+                        { "enum", { "api", "troubleshooting", "billing", "general" } },
+                        { "description", "Filter search by specific category." } } } } },
+                { "required", { "query", "category" } },
+                { "additionalProperties", false } } },
+            { "strict", true } } }
     };
     tools.push_back(tool_search);
 
@@ -131,39 +123,39 @@ static json create_tools() {
 struct tool_argument {
     std::string name;
     std::string type;
-    bool is_required;
-    json schema;
+    bool        is_required;
+    json        schema;
 };
 
 struct tool_definition {
-    std::string name;
+    std::string                name;
     std::vector<tool_argument> arguments;
-    json schema;
+    json                       schema;
 };
 
 // Test fictitious model output that emits arguments as JSON.
 static void test_example_native(testing & t) {
     struct test_case {
         // Parameters
-        std::string name;
-        json tools;
+        std::string             name;
+        json                    tools;
         common_chat_tool_choice tool_choice;
         common_reasoning_format reasoning_format;
-        json json_schema;
-        bool parallel_tool_calls;
-        bool thinking_forced_open;
-        std::string input;
+        json                    json_schema;
+        bool                    parallel_tool_calls;
+        bool                    thinking_forced_open;
+        std::string             input;
 
         // Expect
-        std::string expect_reasoning;
-        std::string expect_content;
+        std::string                        expect_reasoning;
+        std::string                        expect_content;
         std::vector<common_chat_tool_call> expect_tool_calls;
     };
 
     auto build_parser = [](const test_case & tc) {
-        return build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
+        return build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
             auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE);
-            auto reasoning = p.eps();
+            auto reasoning            = p.eps();
             if (tc.thinking_forced_open) {
                 // If thinking is forced open, expect a closing tag
                 reasoning = p.reasoning(p.until("</think>")) + "</think>" + p.space();
@@ -174,231 +166,188 @@ static void test_example_native(testing & t) {
 
             // tool calling parser
             if (tc.tools.is_array() && !tc.tools.empty()) {
-                auto tools = p.choice();
-                for (const auto & tool : tc.tools) {
-                    const auto & function = tool.at("function");
-                    std::string name = function.at("name");
-                    const auto & schema = function.at("parameters");
-
-                    auto tool_name = p.json_member("name", "\"" + p.tool_name(p.literal(name)) + "\"");
-                    auto tool_args = p.json_member("arguments", p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
+                auto tool_call =
+                    p.standard_json_tools("<tool_call>[", "]</tool_call>", tc.tools, tc.parallel_tool_calls,
+                                          tc.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED);
 
-                    tools |= p.rule("tool-" + name, p.tool_open(p.literal("{")) << tool_name << "," << tool_args << "}");
-                };
-
-                auto parallel_calls = p.eps();
-                if (tc.parallel_tool_calls) {
-                    parallel_calls = p.zero_or_more("," << tools);
-                }
-
-                auto tool_call = p.trigger_rule("tool-call",
-                    p.sequence({
-                        p.literal("<tool_call>["),
-                        tools,
-                        parallel_calls,
-                        p.literal("]</tool_call>")
-                    })
-                );
-
-                return p.sequence({
-                    (reasoning_in_content ? p.eps() : reasoning),
-                    p.content(p.until("<tool_call>")),
-                    p.optional(p.space() + tool_call),
-                    p.space(),
-                    p.end()
-                });
+                return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.until("<tool_call>")),
+                                    p.optional(p.space() + tool_call), p.space(), p.end() });
             }
 
             // response_format parser
             if (tc.json_schema.is_object() && !tc.json_schema.empty()) {
-                return p.sequence({
-                    (reasoning_in_content ? p.eps() : reasoning),
-                    p.content(p.schema(p.json(), "response-output", tc.json_schema)),
-                    p.space(),
-                    p.end()
-                });
+                return p.sequence({ (reasoning_in_content ? p.eps() : reasoning),
+                                    p.content(p.schema(p.json(), "response-output", tc.json_schema)), p.space(),
+                                    p.end() });
             }
 
             // Content-only parser
-            return p.sequence({
-                (reasoning_in_content ? p.eps() : reasoning),
-                p.content(p.rest()),
-                p.end()
-            });
+            return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.rest()), p.end() });
         });
     };
 
     std::vector<test_case> test_cases = std::vector<test_case>{
         {
-            /* .name =                 */ "content with thinking_forced_open = false",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ false,
-            /* .input =                */ (
-                "<think>The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "The user said hello, I must say hello back",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = false",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ false,
+         /* .input =                */ ("<think>The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "The user said hello, I must say hello back",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = false and no reasoning",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ false,
-            /* .input =                */ (
-                "Hello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = false and no reasoning",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ false,
+         /* .input =                */ ("Hello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = false and reasoning_format = none",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "<think>The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "<think>The user said hello, I must say hello back</think>\nHello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = false and reasoning_format = none",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */ ("<think>The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "<think>The user said hello, I must say hello back</think>\nHello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = true",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "The user said hello, I must say hello back",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = true",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */ ("The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "The user said hello, I must say hello back",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = true and reasoning_format = none",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "The user said hello, I must say hello back</think>\nHello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = true and reasoning_format = none",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */ ("The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "The user said hello, I must say hello back</think>\nHello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "tools with tool_choice = auto and no parallel_tool_calls",
-            /* .tools =                */ create_tools(),
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must get the weather in New York</think>\n"
-                "<tool_call>["
-                R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
-                "]</tool_call>"
-            ),
-            /* .expect_reasoning =     */ "I must get the weather in New York",
-            /* .expect_content =       */ "",
-            /* .expect_tool_calls =    */ {{
+         /* .name =                 */ "tools with tool_choice = auto and no parallel_tool_calls",
+         /* .tools =                */ create_tools(),
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */
+            ("I must get the weather in New York</think>\n"
+             "<tool_call>["
+             R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
+             "]</tool_call>"),
+         /* .expect_reasoning =     */ "I must get the weather in New York",
+         /* .expect_content =       */ "",
+         /* .expect_tool_calls =    */
+            { {
                 /* .name =      */ "get_current_weather",
                 /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
                 /* .id =        */ "",
-            }},
-        },
+            } },
+         },
         {
-            /* .name =                 */ "tools with tool_choice = auto and parallel_tool_calls",
-            /* .tools =                */ create_tools(),
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ true,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me search that for you."
-                "<tool_call>["
-                R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
-                ", "
-                R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
-                ", "
-                R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
-                ", "
-                R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
-                "]</tool_call>"
-            ),
-            /* .expect_reasoning =     */ "I must get the weather in New York and San Francisco and a 3 day forecast of each.",
-            /* .expect_content =       */ "Let me search that for you.",
-            /* .expect_tool_calls =    */ {{
-                /* .name =      */ "get_current_weather",
-                /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
-                /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_current_weather",
-                /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
-                /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_forecast",
-                /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
-                /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_forecast",
-                /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
-                /* .id =        */ "",
-            }},
-        },
+         /* .name =                 */ "tools with tool_choice = auto and parallel_tool_calls",
+         /* .tools =                */ create_tools(),
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ true,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */
+            ("I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me "
+             "search that for you."
+             "<tool_call>["
+             R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
+             ", "
+             R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
+             ", "
+             R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
+             ", "
+             R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
+             "]</tool_call>"),
+         /* .expect_reasoning =     */
+            "I must get the weather in New York and San Francisco and a 3 day forecast of each.",                                                                     /* .expect_content =       */ "Let me search that for you.",
+         /* .expect_tool_calls =    */
+            { {
+                  /* .name =      */ "get_current_weather",
+                  /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_current_weather",
+                  /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_forecast",
+                  /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_forecast",
+                  /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
+                  /* .id =        */ "",
+              } },
+         },
         {
-            /* .name =                 */ "response_format with thinking_forced_open = true",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {
-                {"type", "object"},
-                {"properties", {
-                    {"invoice_number", {{"type", "string"}}},
-                    {"amount", {{"type", "number"}}},
-                    {"due_date", {{"type", "string"}}}
-                }},
-                {"required", {"invoice_number", "amount", "due_date"}}
-            },
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must produce the invoice in the requested format</think>\n"
-                R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"
-            ),
-            /* .expect_reasoning =     */ "I must produce the invoice in the requested format",
-            /* .expect_content =       */ R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "response_format with thinking_forced_open = true",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */
+            { { "type", "object" },
+              { "properties",
+                { { "invoice_number", { { "type", "string" } } },
+                  { "amount", { { "type", "number" } } },
+                  { "due_date", { { "type", "string" } } } } },
+              { "required", { "invoice_number", "amount", "due_date" } } },
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */
+            ("I must produce the invoice in the requested format</think>\n"
+             R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"),
+         /* .expect_reasoning =     */ "I must produce the invoice in the requested format",
+         /* .expect_content =       */
+            R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})", /* .expect_tool_calls =    */ {},
+         },
     };
 
     for (const auto & tc : test_cases) {
         t.test(tc.name, [&](testing & t) {
-            auto parser = build_parser(tc);
-            auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+            auto parser  = build_parser(tc);
+            auto lazy    = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
             auto grammar = build_grammar([&](const common_grammar_builder & builder) {
-                for (auto const & def : tc.tools) {
-                    auto function = def.at("function");
+                for (const auto & def : tc.tools) {
+                    auto function   = def.at("function");
                     auto parameters = function.at("parameters");
                     builder.resolve_refs(parameters);
                 };
@@ -406,17 +355,17 @@ static void test_example_native(testing & t) {
             });
 
             t.log("Grammar:");
-            for (auto const & line : string_split(grammar, "\n")) {
+            for (const auto & line : string_split(grammar, "\n")) {
                 t.log(line);
             }
 
             common_peg_parse_context ctx(tc.input, false);
-            auto result = parser.parse(ctx);
+            auto                     result = parser.parse(ctx);
 
             t.assert_true("success", result.success());
 
             common_chat_msg msg;
-            auto mapper = common_chat_peg_native_mapper(msg);
+            auto            mapper = common_chat_peg_unified_mapper(msg);
             mapper.from_ast(ctx.ast, result);
 
             t.assert_equal("content equal", tc.expect_content, msg.content);
@@ -431,16 +380,16 @@ static void test_example_native(testing & t) {
 }
 
 static void test_example_qwen3_coder(testing & t) {
-    auto tools = create_tools();
-    auto parser = build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
+    auto tools  = create_tools();
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
         auto content = p.rule("content", p.content(p.until("<tool_call>")));
 
         std::vector<common_peg_parser> tool_parsers;
-        for (auto const & def : tools) {
-            auto function = def.at("function");
-            std::string name = function.at("name");
-            auto parameters = function.at("parameters");
-            auto properties = parameters.at("properties");
+        for (const auto & def : tools) {
+            auto        function   = def.at("function");
+            std::string name       = function.at("name");
+            auto        parameters = function.at("parameters");
+            auto        properties = parameters.at("properties");
 
             std::set<std::string> required_properties;
             if (function.contains("required")) {
@@ -450,59 +399,36 @@ static void test_example_qwen3_coder(testing & t) {
             std::vector<common_peg_parser> arg_parsers;
             for (const auto & [param_name, param_schema] : properties.items()) {
                 bool is_required = required_properties.find(param_name) != required_properties.end();
-                auto type = param_schema.value("type", "object");
-
-                auto arg = p.tool_arg(p.sequence({
-                    p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
-                    (type == "string" ?
-                        p.tool_arg_string_value(
-                            p.schema(
-                                p.until_one_of({
-                                    "</parameter>\n<parameter=",
-                                    "</parameter>\n</function>"
-                                }),
-                                "tool-" + name + "-arg-" + param_name + "-schema",
-                                param_schema,
-                                true
-                            )
-                        ) : p.tool_arg_json_value(
-                            p.schema(
-                                p.json(),
-                                "tool-" + name + "-arg-" + param_name + "-schema",
-                                param_schema
-                            )
-                        )
-                    ),
-                    p.tool_arg_close(
-                        "</parameter>\n" +
-                        p.peek(p.literal("<parameter=") | p.literal("</function>"))
-                    )
-                }));
-
-                arg_parsers.push_back(is_required ?
-                    p.rule("tool-" + name + "-arg-" + param_name, arg) :
-                    p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
+                auto type        = param_schema.value("type", "object");
+
+                auto arg = p.tool_arg(
+                    p.sequence({ p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
+                                 (type == "string" ?
+                                      p.tool_arg_string_value(p.schema(
+                                          p.until_one_of({ "</parameter>\n<parameter=", "</parameter>\n</function>" }),
+                                          "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
+                                      p.tool_arg_json_value(p.schema(
+                                          p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema))),
+                                 p.tool_arg_close("</parameter>\n" +
+                                                  p.peek(p.literal("<parameter=") | p.literal("</function>"))) }));
+
+                arg_parsers.push_back(is_required ? p.rule("tool-" + name + "-arg-" + param_name, arg) :
+                                                    p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
             }
 
-            tool_parsers.push_back(p.rule("tool-" + name,
-                p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
-                << p.sequence(arg_parsers)
-                << p.tool_close(p.literal("</function>"))
-            ));
+            tool_parsers.push_back(p.rule("tool-" + name, p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
+                                                              << p.sequence(arg_parsers)
+                                                              << p.tool_close(p.literal("</function>"))));
         };
 
-        auto tool_call = p.trigger_rule("tool-call",
-            "<tool_call>"
-            << p.choice(tool_parsers)
-            << "</tool_call>"
-        );
+        auto tool_call = p.trigger_rule("tool-call", "<tool_call>" << p.choice(tool_parsers) << "</tool_call>");
 
         return content + p.zero_or_more(p.space() + tool_call) + p.end();
     });
 
     auto grammar = build_grammar([&](const common_grammar_builder & builder) {
-        for (auto const & def : tools) {
-            auto function = def.at("function");
+        for (const auto & def : tools) {
+            auto function   = def.at("function");
             auto parameters = function.at("parameters");
             builder.resolve_refs(parameters);
         };
@@ -510,11 +436,11 @@ static void test_example_qwen3_coder(testing & t) {
     });
 
     t.log("Grammar:");
-    for (auto const & line : string_split(grammar, "\n")) {
+    for (const auto & line : string_split(grammar, "\n")) {
         t.log(line);
     }
 
-    t.test("incremental parsing", [&](testing &t) {
+    t.test("incremental parsing", [&](testing & t) {
         std::string input =
             "Let me search the knowledge base for cat pictures."
             "<tool_call>\n"
@@ -538,7 +464,105 @@ static void test_example_qwen3_coder(testing & t) {
             }
 
             common_chat_msg msg;
-            auto mapper = common_chat_peg_constructed_mapper(msg);
+            auto            mapper = common_chat_peg_unified_mapper(msg);
+            mapper.from_ast(ctx.ast, result);
+
+            //t.log("Input: " + input);
+            t.log("===========================================");
+            t.log("Iteration " + std::to_string(in.size()));
+            t.log("Reasoning: " + msg.reasoning_content);
+            t.log("Content  : " + msg.content);
+            for (const auto & tc : msg.tool_calls) {
+                t.log("Tool name: " + tc.name);
+                t.log("Tool args: " + tc.arguments);
+            }
+
+            try {
+                // This shouldn't emit any runtime errors
+                auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
+            } catch (const std::exception & e) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                t.assert_true(std::string("failed with ") + e.what(), false);
+            }
+
+            prev = msg;
+        }
+    });
+}
+
+static void test_example_qwen3_non_coder(testing & t) {
+    auto tools  = create_tools();
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        // tool calling parser using standard JSON format
+        auto tool_call = p.standard_json_tools("<tool_call>", "</tool_call>", tools, true, false);
+
+        return p.sequence({ p.content(p.until("<tool_call>")), p.optional(p.space() + tool_call), p.end() });
+    });
+
+    auto grammar = build_grammar([&](const common_grammar_builder & builder) {
+        for (const auto & def : tools) {
+            auto function   = def.at("function");
+            auto parameters = function.at("parameters");
+            builder.resolve_refs(parameters);
+        };
+        parser.build_grammar(builder);
+    });
+
+    t.log("Grammar:");
+    for (const auto & line : string_split(grammar, "\n")) {
+        t.log(line);
+    }
+
+    t.test("tool call parsing", [&](testing & t) {
+        std::string input =
+            "I need to get the weather.\n"
+            "<tool_call>"
+            "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": "
+            "\"fahrenheit\"}}"
+            "</tool_call>";
+
+        common_peg_parse_context ctx(input, false);
+        auto                     result = parser.parse(ctx);
+
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_unified_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "I need to get the weather.", msg.content);
+        t.assert_equal("reasoning", "", msg.reasoning_content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+            t.assert_equal("tool args", "{\"location\": \"New York City, NY\", \"unit\": \"fahrenheit\"}",
+                           msg.tool_calls[0].arguments);
+        }
+    });
+
+    t.test("incremental parsing", [&](testing & t) {
+        std::string input =
+            "I need to get the weather.\n"
+            "<tool_call>"
+            "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": "
+            "\"fahrenheit\"}}"
+            "</tool_call>";
+
+        std::vector<std::string> tokens = simple_tokenize(input);
+
+        common_chat_msg prev;
+        for (auto it = tokens.begin(); it != tokens.end(); it++) {
+            std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
+
+            common_peg_parse_context ctx(in, it + 1 < tokens.end());
+
+            auto result = parser.parse(ctx);
+            if (!t.assert_equal("not fail", false, result.fail())) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+            }
+
+            common_chat_msg msg;
+            auto            mapper = common_chat_peg_unified_mapper(msg);
             mapper.from_ast(ctx.ast, result);
 
             //t.log("Input: " + input);
@@ -554,7 +578,7 @@ static void test_example_qwen3_coder(testing & t) {
             try {
                 // This shouldn't emit any runtime errors
                 auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
-            } catch(const std::exception & e) {
+            } catch (const std::exception & e) {
                 t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
                 t.assert_true(std::string("failed with ") + e.what(), false);
             }
@@ -565,38 +589,37 @@ static void test_example_qwen3_coder(testing & t) {
 }
 
 void test_command7_parser_compare(testing & t) {
-    auto parser = build_chat_peg_native_parser([](common_chat_peg_native_builder & p) {
-        auto thinking = p.reasoning_block(
-            "<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
+    auto parser = build_chat_peg_unified_parser([](common_chat_peg_unified_builder & p) {
+        auto thinking =
+            p.reasoning_block("<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
 
         auto response = "<|START_RESPONSE|>" << p.content(p.until("<|END_RESPONSE|>")) << "<|END_RESPONSE|>";
 
         auto tool_call_id = p.atomic("\"tool_call_id\"" << (":" << ("\"" + p.tool_id(p.json_string_content()) + "\"")));
-        auto tool_call_name = p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\"")));
+        auto tool_call_name =
+            p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\"")));
         auto tool_call_args = "\"parameters\"" << (":" << p.tool_args(p.json()));
 
         auto tool_call_fields = p.rule("tool-call-fields", tool_call_id | tool_call_name | tool_call_args);
-        auto tool_call = p.rule("tool-call", p.tool(
-            p.tool_open(p.literal("{"))
-            << tool_call_fields
-            << p.zero_or_more( p.literal(",") << tool_call_fields)
-            << p.tool_close(p.literal("}"))
-        ));
-
-        auto tool_calls = p.rule("tool-calls",
-            "<|START_ACTION|>"
-            << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
-            << "<|END_ACTION|>");
+        auto tool_call =
+            p.rule("tool-call", p.tool(p.tool_open(p.literal("{"))
+                                       << tool_call_fields << p.zero_or_more(p.literal(",") << tool_call_fields)
+                                       << p.tool_close(p.literal("}"))));
+
+        auto tool_calls = p.rule(
+            "tool-calls", "<|START_ACTION|>" << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
+                                             << "<|END_ACTION|>");
 
         return p.optional(thinking) << (tool_calls | response) + p.end();
     });
 
-    auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial, bool print_results) {
+    auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial,
+                            bool print_results) {
         common_peg_parse_context ctx(input, is_partial);
-        auto result = p.parse(ctx);
+        auto                     result = p.parse(ctx);
 
         common_chat_msg msg;
-        auto mapper = common_chat_peg_native_mapper(msg);
+        auto            mapper = common_chat_peg_unified_mapper(msg);
         mapper.from_ast(ctx.ast, result);
 
         if (print_results) {
@@ -614,79 +637,19 @@ void test_command7_parser_compare(testing & t) {
         }
     };
 
-    auto test_legacy = [&](const std::string & input, bool need_more_input, bool print_results) {
-        // Original common_chat_combinator_parser taken from chat.cpp
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_GENERIC;
-        params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = false;
-        common_chat_msg_parser builder(
-            input,
-            /* .is_partial = */ need_more_input,
-            params
-        );
-
-        builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
-
-        static const common_regex start_action_regex("<\\|START_ACTION\\|>");
-        static const common_regex end_action_regex("<\\|END_ACTION\\|>");
-        static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
-        static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
-
-        if (auto res = builder.try_find_regex(start_action_regex)) {
-            // If we didn't extract thoughts, prelude includes them.
-            auto tool_calls = builder.consume_json_with_dumped_args({ { "parameters" } });
-            for (const auto & tool_call : tool_calls.value) {
-                std::string name      = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
-                std::string id        = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
-                std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
-                if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-            }
-            if (tool_calls.is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-            builder.consume_regex(end_action_regex);
-        } else if (auto res = builder.try_find_regex(start_response_regex)) {
-            if (!builder.try_find_regex(end_response_regex)) {
-                builder.add_content(builder.consume_rest());
-                throw common_chat_msg_partial_exception(end_response_regex.str());
-            }
-        } else {
-            builder.add_content(builder.consume_rest());
-        }
-
-        if (print_results) {
-            std::cout << "== Parsed (legacy) ==\n";
-            std::cout << "=== Reasoning ===\n";
-            std::cout << builder.result().reasoning_content << "\n";
-            std::cout << "\n\n=== Content ===\n";
-            std::cout << builder.result().content << "\n";
-            std::cout << "\n\n=== Tool Calls ===\n";
-            for (const auto & tc : builder.result().tool_calls) {
-                std::cout << "id: " << tc.id << "\n";
-                std::cout << "name: " << tc.name << "\n";
-                std::cout << "args: " << tc.arguments << "\n";
-            }
-        }
-    };
-
-    std::string reasoning = "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
-            "budget of $4000 for a two-week stay, we need to:\n\n"
-            "1. Identify key historical sites and modern attractions in Japan.\n"
-            "2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
-            "3. Determine the best modes of transportation for getting around Japan.\n"
-            "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
-            "overspending.\n"
-            "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
-            "to attractions.";
-
-    std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {{
-        "call_0",
-        "plan_trip",
-        nlohmann::json::parse(R"({
+    std::string reasoning =
+        "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
+        "budget of $4000 for a two-week stay, we need to:\n\n"
+        "1. Identify key historical sites and modern attractions in Japan.\n"
+        "2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
+        "3. Determine the best modes of transportation for getting around Japan.\n"
+        "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
+        "overspending.\n"
+        "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
+        "to attractions.";
+
+    std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {
+        { "call_0", "plan_trip", nlohmann::json::parse(R"({
             "destination": "Japan",
             "duration": 14,
             "budget": 4000,
@@ -694,8 +657,8 @@ void test_command7_parser_compare(testing & t) {
             "accommodation_preferences": "affordable",
             "transportation_preferences": "efficient",
             "meal_preferences": "local cuisine"
-        })")
-    }};
+        })") }
+    };
 
     std::vector<std::string> tokens;
 
@@ -712,10 +675,10 @@ void test_command7_parser_compare(testing & t) {
 
         auto json = nlohmann::json::array();
         for (const auto & tc : tool_calls) {
-            auto tc_json = nlohmann::json::object();
+            auto tc_json            = nlohmann::json::object();
             tc_json["tool_call_id"] = std::get<0>(tc);
-            tc_json["tool_name"] = std::get<1>(tc);
-            tc_json["parameters"] = std::get<2>(tc);
+            tc_json["tool_name"]    = std::get<1>(tc);
+            tc_json["parameters"]   = std::get<2>(tc);
             json.push_back(tc_json);
         }
 
@@ -727,42 +690,191 @@ void test_command7_parser_compare(testing & t) {
 
     std::string input = std::accumulate(tokens.begin(), tokens.end(), std::string());
 
-    // Run tests
-    t.test("legacy_parse", [&](testing & /* t */) {
-        test_legacy(input, false, false);
-    });
+    t.test("current_parse", [&](testing & /* t */) { test_current(parser, input, false, false); });
+    t.bench("current_parse_benchmark complete", [&]() { test_current(parser, input, false, false); }, 100);
+    t.bench(
+        "current_parse_benchmark incremental",
+        [&]() {
+            std::string in;
+            for (auto i = 0u; i < tokens.size(); i++) {
+                in += tokens[i];
+                test_current(parser, in, i + 1 < tokens.size(), false);
+            }
+        },
+        20);
+}
+
+// Test that tool names that are proper prefixes of other tool names don't cause
+// premature matching during incremental parsing.
+// For example, "special_function" should not match when parsing "special_function_with_opt".
+static void test_prefix_tool_names(testing & t) {
+    // Create tools where one name is a proper prefix of another
+    json tools = json::array();
 
-    t.test("current_parse", [&](testing & /* t */) {
-        test_current(parser, input, false, false);
+    json tool_short = {
+        { "type", "function" },
+        { "function",
+          {
+              { "name", "special_function" },
+              { "description", "A special function" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      {
+                          { "arg1", { { "type", "integer" } } },
+                      } },
+                    { "required", { "arg1" } },
+                } },
+          } }
+    };
+    tools.push_back(tool_short);
+
+    json tool_long = {
+        { "type", "function" },
+        { "function",
+          {
+              { "name", "special_function_with_opt" },
+              { "description", "A special function with optional params" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      {
+                          { "arg1", { { "type", "integer" } } },
+                          { "arg2", { { "type", "integer" } } },
+                      } },
+                    { "required", { "arg1" } },
+                } },
+          } }
+    };
+    tools.push_back(tool_long);
+
+    // Use standard_constructed_tools which had the prefix matching bug
+    std::map<std::string, std::string> markers = {
+        { "tool_call_start_marker", "<tool_call>" },
+        { "tool_call_end_marker", "</tool_call>" },
+        { "function_opener", "<function=" },
+        { "function_closer", "</function>" },
+        { "function_name_suffix", ">" },
+        { "parameter_key_prefix", "<param=" },
+        { "parameter_key_suffix", ">" },
+        { "parameter_closer", "</param>" },
+    };
+
+    auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
+        auto content   = p.rule("content", p.content(p.until("<tool_call>")));
+        auto tool_call = p.standard_constructed_tools(markers, tools, false, false);
+        return content + p.zero_or_more(p.space() + tool_call) + p.end();
     });
 
-    // Run benchmarks
-    t.bench("legacy_parse_benchmark complete", [&]() {
-        test_legacy(input, false, false);
+    // Test parsing the long tool name - this should NOT trigger the short tool name
+    t.test("parse long tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function_with_opt>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
+
+        common_peg_parse_context ctx(input, false);
+        auto                     result = parser.parse(ctx);
+
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_unified_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "Let me call the function.", msg.content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "special_function_with_opt", msg.tool_calls[0].name);
+        }
     });
 
-    t.bench("legacy_parse_benchmark incremental", [&]() {
-        std::string in;
-        for (auto i = 0u; i < tokens.size(); i++) {
-            in += tokens[i];
+    // Test incremental parsing - the key test case
+    // This ensures that when incrementally parsing "special_function_with_opt",
+    // we don't prematurely emit "special_function" as a tool call
+    t.test("incremental parse long tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function_with_opt>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
+
+        std::vector<std::string> tokens = simple_tokenize(input);
+
+        common_chat_msg prev;
+        for (auto it = tokens.begin(); it != tokens.end(); it++) {
+            std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
+
+            common_peg_parse_context ctx(in, it + 1 < tokens.end());
+            auto                     result = parser.parse(ctx);
+
+            if (!t.assert_equal("not fail", false, result.fail())) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                return;
+            }
+
+            common_chat_msg msg;
+            auto            mapper = common_chat_peg_unified_mapper(msg);
+            mapper.from_ast(ctx.ast, result);
+
+            // The critical check: during incremental parsing, we should never
+            // see "special_function" as the tool name when parsing "special_function_with_opt"
+            for (const auto & tc : msg.tool_calls) {
+                if (!t.assert_equal("tool name should not be short prefix", false,
+                                    tc.name == "special_function")) {
+                    t.log("Premature tool name match at input: " + in);
+                    return;
+                }
+            }
 
             try {
-                test_legacy(in, i + 1 < tokens.size(), false);
-            } catch (common_chat_msg_partial_exception & /* e */) {
-                // Do nothing, this is expected
+                auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
+            } catch (const std::exception & e) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                t.assert_true(std::string("diff failed with ") + e.what(), false);
+                return;
             }
+
+            prev = msg;
+        }
+
+        // Final check: the complete parse should have the correct tool name
+        t.assert_equal("final tool calls count", 1u, prev.tool_calls.size());
+        if (!prev.tool_calls.empty()) {
+            t.assert_equal("final tool name", "special_function_with_opt", prev.tool_calls[0].name);
         }
-    }, 20);
+    });
+
+    // Test parsing the short tool name still works
+    t.test("parse short tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
 
-    t.bench("current_parse_benchmark complete", [&]() {
-        test_current(parser, input, false, false);
-    }, 100);
+        common_peg_parse_context ctx(input, false);
+        auto                     result = parser.parse(ctx);
 
-    t.bench("current_parse_benchmark incremental", [&]() {
-        std::string in;
-        for (auto i = 0u; i < tokens.size(); i++) {
-            in += tokens[i];
-            test_current(parser, in, i + 1 < tokens.size(), false);
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_unified_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "Let me call the function.", msg.content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "special_function", msg.tool_calls[0].name);
         }
-    }, 20);
+    });
 }
diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
deleted file mode 100644
index 27b537a0369..00000000000
--- a/tests/test-chat-template.cpp
+++ /dev/null
@@ -1,680 +0,0 @@
-#include <string>
-#include <vector>
-#include <sstream>
-#include <regex>
-#include <iostream>
-#include <fstream>
-#include <filesystem>
-
-#include <nlohmann/json.hpp>
-
-#undef NDEBUG
-#include <cassert>
-
-#include "llama.h"
-#include "common.h"
-#include "chat.h"
-#include "jinja/runtime.h"
-#include "jinja/parser.h"
-#include "jinja/lexer.h"
-#include "jinja/caps.h"
-
-using json = nlohmann::ordered_json;
-
-int main_automated_tests(void);
-
-void run_multiple(std::string dir_path, bool stop_on_first_failure, json input, bool use_common = false);
-void run_single(std::string contents, json input, bool use_common = false, const std::string & output_path = "");
-
-
-
-std::string HELP = R"(
-Usage: test-chat-template [OPTIONS] PATH_TO_TEMPLATE
-Options:
-  -h, --help               Show this help message and exit.
-  --json <path>            Path to the JSON input file.
-  --stop-on-first-fail     Stop testing on the first failure (default: false).
-  --no-common              Use direct Jinja engine instead of common chat templates (default: use common).
-  --output <path>          Path to output results (only for single template runs).
-If PATH_TO_TEMPLATE is a file, runs that single template.
-If PATH_TO_TEMPLATE is a directory, runs all .jinja files in that directory.
-If PATH_TO_TEMPLATE is omitted, runs automated tests (default CI mode).
-)";
-
-std::string DEFAULT_JSON = R"({
-    "messages": [
-        {
-            "role": "user",
-            "content": "Hello, how are you?"
-        },
-        {
-            "role": "assistant",
-            "content": "I am fine, thank you!"
-        }
-    ],
-    "bos_token": "<s>",
-    "eos_token": "</s>",
-    "add_generation_prompt": true
-})";
-
-int main(int argc, char ** argv) {
-    std::vector<std::string> args(argv, argv + argc);
-
-    std::string tmpl_path;
-    std::string json_path;
-    std::string output_path;
-    bool stop_on_first_fail = false;
-    bool use_common = true;
-
-    for (size_t i = 1; i < args.size(); i++) {
-        if (args[i] == "--help" || args[i] == "-h") {
-            std::cout << HELP << "\n";
-            return 0;
-        } else if (args[i] == "--json" && i + 1 < args.size()) {
-            json_path = args[i + 1];
-            i++;
-        } else if (args[i] == "--stop-on-first-fail") {
-            stop_on_first_fail = true;
-        } else if (args[i] == "--output" && i + 1 < args.size()) {
-            output_path = args[i + 1];
-            i++;
-        } else if (args[i] == "--no-common") {
-            use_common = true;
-        } else if (tmpl_path.empty()) {
-            tmpl_path = args[i];
-        } else {
-            std::cerr << "Unknown argument: " << args[i] << "\n";
-            std::cout << HELP << "\n";
-            return 1;
-        }
-    }
-
-    if (tmpl_path.empty()) {
-        return main_automated_tests();
-    }
-
-    json input_json;
-    if (!json_path.empty()) {
-        std::ifstream json_file(json_path);
-        if (!json_file) {
-            std::cerr << "Error: Could not open JSON file: " << json_path << "\n";
-            return 1;
-        }
-        std::string content = std::string(
-            std::istreambuf_iterator<char>(json_file),
-            std::istreambuf_iterator<char>());
-        input_json = json::parse(content);
-    } else {
-        input_json = json::parse(DEFAULT_JSON);
-    }
-
-    std::filesystem::path p(tmpl_path);
-    if (std::filesystem::is_directory(p)) {
-        run_multiple(tmpl_path, stop_on_first_fail, input_json, use_common);
-    } else if (std::filesystem::is_regular_file(p)) {
-        std::ifstream infile(tmpl_path);
-        std::string contents = std::string(
-            std::istreambuf_iterator<char>(infile),
-            std::istreambuf_iterator<char>());
-        run_single(contents, input_json, use_common, output_path);
-    } else {
-        std::cerr << "Error: PATH_TO_TEMPLATE is not a valid file or directory: " << tmpl_path << "\n";
-        return 1;
-    }
-
-    return 0;
-}
-
-void run_multiple(std::string dir_path, bool stop_on_first_fail, json input, bool use_common) {
-    std::vector<std::string> failed_tests;
-
-    // list all files in models/templates/ and run each
-    size_t test_count = 0;
-
-    for (const auto & entry : std::filesystem::directory_iterator(dir_path)) {
-        // only process .jinja files
-        if (entry.path().extension() == ".jinja" && entry.is_regular_file()) {
-            test_count++;
-            std::cout << "\n\n=== RUNNING TEMPLATE FILE: " << entry.path().string() << " ===\n";
-            std::ifstream infile(entry.path());
-            std::string contents((std::istreambuf_iterator<char>(infile)), std::istreambuf_iterator<char>());
-            try {
-                run_single(contents, input, use_common);
-            } catch (const std::exception & e) {
-                std::cout << "Exception: " << e.what() << "\n";
-                std::cout << "=== ERROR WITH TEMPLATE FILE: " << entry.path().string() << " ===\n";
-                failed_tests.push_back(entry.path().string());
-                if (stop_on_first_fail) {
-                    break;
-                }
-            }
-        }
-    }
-
-    std::cout << "\n\n=== TEST SUMMARY ===\n";
-    std::cout << "Total tests run: " << test_count << "\n";
-    std::cout << "Total failed tests: " << failed_tests.size() << "\n";
-    for (const auto & test : failed_tests) {
-        std::cout << "FAILED TEST: " << test << "\n";
-    }
-}
-
-
-static std::string normalize_newlines(const std::string & s) {
-#ifdef _WIN32
-  static const std::regex nl_regex("\r\n");
-  return std::regex_replace(s, nl_regex, "\n");
-#else
-  return s;
-#endif
-}
-
-
-static std::string format_using_common(
-            const std::string & template_str,
-            const std::string & bos_token,
-            const std::string & eos_token,
-            std::vector<common_chat_msg> & messages,
-            std::vector<common_chat_tool> tools = {}) {
-    auto tmpls = common_chat_templates_init(/* model= */ nullptr, template_str, bos_token, eos_token);
-    common_chat_templates_inputs inputs;
-    inputs.use_jinja = true;
-    inputs.messages = messages;
-    inputs.tools = tools;
-    inputs.add_generation_prompt = true;
-    auto output = common_chat_templates_apply(tmpls.get(), inputs).prompt;
-    output = normalize_newlines(output);
-    return output;
-}
-
-
-// skip libcommon, use direct jinja engine
-static jinja::value_string format_using_direct_engine(
-            const std::string & template_str,
-            json & input) {
-    // lexing
-    jinja::lexer lexer;
-    auto lexer_res = lexer.tokenize(template_str);
-
-    // compile to AST
-    jinja::program ast = jinja::parse_from_tokens(lexer_res);
-
-    // check caps for workarounds
-    jinja::caps_get(ast);
-
-    std::cout << "\n=== RUN ===\n";
-    jinja::context ctx(template_str);
-
-    jinja::global_from_json(ctx, input, true);
-
-    jinja::runtime runtime(ctx);
-    const jinja::value results = runtime.execute(ast);
-    auto parts = runtime.gather_string_parts(results);
-
-    std::cout << "\n=== RESULTS ===\n";
-    for (const auto & part : parts->as_string().parts) {
-        std::cout << (part.is_input ? "DATA" : "TMPL") << ": " << part.val << "\n";
-    }
-
-    return parts;
-}
-
-
-void run_single(std::string contents, json input, bool use_common, const std::string & output_path) {
-    jinja::enable_debug(true);
-
-    jinja::value_string output_parts;
-
-    if (use_common) {
-        std::string bos_token = "<s>";
-        std::string eos_token = "</s>";
-        if (input.contains("bos_token")) {
-            bos_token = input["bos_token"].get<std::string>();
-        }
-        if (input.contains("eos_token")) {
-            eos_token = input["eos_token"].get<std::string>();
-        }
-        nlohmann::ordered_json msgs_json = input["messages"];
-        nlohmann::ordered_json tools_json = input["tools"];
-        auto messages = common_chat_msgs_parse_oaicompat(msgs_json);
-        auto tools = common_chat_tools_parse_oaicompat(tools_json);
-        auto output = format_using_common(contents, bos_token, eos_token, messages, tools);
-        std::cout << "\n=== OUTPUT ===\n";
-        std::cout << output << "\n";
-        output_parts = jinja::mk_val<jinja::value_string>(output);
-
-    } else {
-        output_parts = format_using_direct_engine(contents, input);
-        std::cout << "\n=== OUTPUT ===\n";
-        std::cout << output_parts->as_string().str() << "\n";
-    }
-
-    if (!output_path.empty()) {
-        std::ofstream outfile(output_path);
-        if (!outfile) {
-            throw std::runtime_error("Could not open output file: " + output_path);
-        }
-        outfile << output_parts->as_string().str();
-        outfile.close();
-        std::cout << "\n=== OUTPUT WRITTEN TO " << output_path << " ===\n";
-    }
-}
-
-
-
-
-
-//
-// Automated tests for chat templates
-//
-
-#define U8C(x) (const char*)(u8##x)
-
-static common_chat_msg simple_msg(const std::string & role, const std::string & content) {
-    common_chat_msg msg;
-    msg.role = role;
-    msg.content = content;
-    return msg;
-}
-
-int main_automated_tests(void) {
-    // jinja::enable_debug(true);
-
-    std::vector<llama_chat_message> conversation {
-        {"system", "You are a helpful assistant"},
-        {"user", "Hello"},
-        {"assistant", "Hi there"},
-        {"user", "Who are you"},
-        {"assistant", "   I am an assistant   "},
-        {"user", "Another question"},
-    };
-
-    // std::string wrong = /* .template_str= */ u8"[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}";
-    struct TestCase {
-        std::string name;
-        std::string template_str;
-        std::string expected_output;
-        std::string expected_output_jinja;
-        std::string bos_token = "";
-        std::string eos_token = "";
-        bool supported_with_jinja = true;
-    };
-    std::vector<TestCase> test_cases {
-        {
-            /* .name= */ "teknium/OpenHermes-2.5-Mistral-7B",
-            /* .template_str= */ "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
-            /* .expected_output= */ "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\nHi there<|im_end|>\n<|im_start|>user\nWho are you<|im_end|>\n<|im_start|>assistant\n   I am an assistant   <|im_end|>\n<|im_start|>user\nAnother question<|im_end|>\n<|im_start|>assistant\n",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "mistralai/Mistral-7B-Instruct-v0.2 (NOTE: Old pre-v1 without a system prompt)",
-            /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
-            /* .expected_output= */ "[INST] You are a helpful assistant\nHello [/INST]Hi there</s>[INST] Who are you [/INST]   I am an assistant   </s>[INST] Another question [/INST]",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "TheBloke/FusionNet_34Bx2_MoE-AWQ",
-            /* .template_str= */ "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <<SYS>>\\n' + messages[idx]['content'] + '\\n<</SYS>>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' '  + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}",
-            /* .expected_output= */       "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST]Hi there</s><s>[INST] Who are you [/INST]   I am an assistant   </s><s>[INST] Another question [/INST]",
-            /* .expected_output_jinja= */ "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST] Hi there </s><s>[INST] Who are you [/INST]    I am an assistant    </s><s>[INST] Another question [/INST]",
-            /* .bos_token= */ "<s>",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "bofenghuang/vigogne-2-70b-chat",
-            /* .template_str= */ "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif true == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'Vous êtes Vigogne, un assistant IA créé par Zaion Lab. Vous suivez extrêmement bien les instructions. Aidez autant que vous le pouvez.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
-            /* .expected_output= */       "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST]Hi there</s>[INST] Who are you [/INST]I am an assistant</s>[INST] Another question [/INST]",
-            /* .expected_output_jinja= */ "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\nHello [/INST] Hi there </s>[INST] Who are you [/INST] I am an assistant </s>[INST] Another question [/INST]",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "mlabonne/AlphaMonarch-7B",
-            /* .template_str= */ "{% for message in messages %}{{bos_token + message['role'] + '\\n' + message['content'] + eos_token + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\\n' }}{% endif %}",
-            /* .expected_output= */ "system\nYou are a helpful assistant</s>\n<s>user\nHello</s>\n<s>assistant\nHi there</s>\n<s>user\nWho are you</s>\n<s>assistant\n   I am an assistant   </s>\n<s>user\nAnother question</s>\n<s>assistant\n",
-            /* .expected_output_jinja= */ "<s>system\nYou are a helpful assistant</s>\n<s>user\nHello</s>\n<s>assistant\nHi there</s>\n<s>user\nWho are you</s>\n<s>assistant\n   I am an assistant   </s>\n<s>user\nAnother question</s>\n<s>assistant\n",
-            /* .bos_token= */ "<s>",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "google/gemma-7b-it",
-            /* .template_str= */ "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\\n' + message['content'] | trim + '<end_of_turn>\\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\\n'}}{% endif %}",
-            /* .expected_output= */       "<start_of_turn>user\nYou are a helpful assistant\n\nHello<end_of_turn>\n<start_of_turn>model\nHi there<end_of_turn>\n<start_of_turn>user\nWho are you<end_of_turn>\n<start_of_turn>model\nI am an assistant<end_of_turn>\n<start_of_turn>user\nAnother question<end_of_turn>\n<start_of_turn>model\n",
-            /* .expected_output_jinja= */ "<start_of_turn>user\nYou are a helpful assistant\nHello<end_of_turn>\n<start_of_turn>model\nHi there<end_of_turn>\n<start_of_turn>user\nWho are you<end_of_turn>\n<start_of_turn>model\nI am an assistant<end_of_turn>\n<start_of_turn>user\nAnother question<end_of_turn>\n<start_of_turn>model\n",
-        },
-        {
-            /* .name= */ "OrionStarAI/Orion-14B-Chat",
-            /* .template_str= */ "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
-            /* .expected_output= */       "Human: You are a helpful assistant\n\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s>   I am an assistant   </s>Human: Another question\n\nAssistant: </s>",
-            /* .expected_output_jinja= */ "Human: You are a helpful assistant\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s>   I am an assistant   </s>Human: Another question\n\nAssistant: </s>",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "openchat/openchat-3.5-0106",
-            // The included chat_template differs from the author's suggestions here: https://huggingface.co/openchat/openchat_3.5/discussions/5#65448109b4a3f3a2f486fd9d
-            // So we match against the included template but implement the suggested version.
-            /* .template_str= */ "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}",
-            /* .expected_output= */                            "You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant:    I am an assistant   <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:",
-            /* .expected_output_jinja= */ "GPT4 Correct System: You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant:    I am an assistant   <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:",
-        },
-        {
-            /* .name= */ "deepseek-ai/deepseek-coder-33b-instruct",
-            /* .template_str= */ "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
-            /* .expected_output= */ "You are a helpful assistant### Instruction:\nHello\n### Response:\nHi there\n<|EOT|>\n### Instruction:\nWho are you\n### Response:\n   I am an assistant   \n<|EOT|>\n### Instruction:\nAnother question\n### Response:\n",
-            /* .expected_output_jinja= */ "",
-        },
-        {
-            /* .name= */ "eachadea/vicuna-13b-1.1",
-            // No template included in tokenizer_config.json, so this template likely needs to be manually set.
-            /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '' + message['content'] + '\n\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}",
-            /* .expected_output= */ "You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT:    I am an assistant   </s>\nUSER: Another question\nASSISTANT:",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "Orca-Vicuna",
-            // No template included in tokenizer_config.json, so this template likely needs to be manually set.
-            /* .template_str= */ "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}",
-            /* .expected_output= */ "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT:    I am an assistant   </s>\nUSER: Another question\nASSISTANT:",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "CohereForAI/c4ai-command-r-plus",
-            /* .template_str= */ "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
-            /* .expected_output= */ "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a helpful assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Who are you<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I am an assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Another question<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
-            /* .expected_output_jinja= */ "",
-        },
-        {
-            /* .name= */ "Llama-3",
-            /* .template_str= */ "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
-            /* .expected_output= */ "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nAnother question<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
-            /* .expected_output_jinja= */ "",
-        },
-        {
-            /* .name= */ "Phi-3-mini",
-            /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
-            /* .expected_output= */     "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-            /* .expected_output_jinja= */ "<|user|>\nYou are a helpful assistant\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-        },
-        {
-            /* .name= */ "Phi-3-small",
-            /* .template_str= */ "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
-            /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-            /* .expected_output_jinja= */ "",
-        },
-        {
-            /* .name= */ "Phi-3-medium",
-            /* .template_str= */ "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
-            /* .expected_output= */     "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-            /* .expected_output_jinja= */ "<|user|>\nYou are a helpful assistant\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-        },
-        {
-            /* .name= */ "Phi-3-vision",
-            /* .template_str= */ "{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %}",
-            /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\n   I am an assistant   <|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "ChatGLM3",
-            /* .template_str= */ "{% for message in messages %}{% if loop.first %}[gMASK]sop<|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
-            /* .expected_output= */       "[gMASK]sop<|system|>\n You are a helpful assistant<|user|>\n Hello<|assistant|>\n Hi there<|user|>\n Who are you<|assistant|>\n    I am an assistant   <|user|>\n Another question<|assistant|>",
-            /* .expected_output_jinja= */ "[gMASK]sop<|system|>\n You are a helpful assistant<|user|>\n Hello<|assistant|>\n Hi there<|user|>\n Who are you<|assistant|>\n    I am an assistant   <|user|>\n Another question<|assistant|>",
-        },
-        {
-            /* .name= */ "ChatGLM4",
-            /* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}"),
-            /* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>\n",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "GLMEdge",
-            /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
-            /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
-            /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
-            /* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),
-            /* .expected_output= */ U8C("You are a helpful assistant<用户>Hello<AI>Hi there<用户>Who are you<AI>I am an assistant<用户>Another question<AI>"),
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "DeepSeek-V2",
-            /* .template_str= */ "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
-            /* .expected_output= */ U8C("You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<｜end▁of▁sentence｜>User: Who are you\n\nAssistant:    I am an assistant   <｜end▁of▁sentence｜>User: Another question\n\nAssistant:"),
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "<｜end▁of▁sentence｜>",
-        },
-        {
-            /* .name= */ "ibm-granite/granite-3.0-8b-instruct",
-            /* .template_str= */ "{%- if tools %}\n    {{- '<|start_of_role|>available_tools<|end_of_role|>\n' }}\n    {%- for tool in tools %}\n    {{- tool | tojson(indent=4) }}\n    {%- if not loop.last %}\n        {{- '\n\n' }}\n    {%- endif %}\n    {%- endfor %}\n    {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n    {{- '<|start_of_role|>system<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- elif message['role'] == 'user' %}\n    {{- '<|start_of_role|>user<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- elif message['role'] == 'assistant' %}\n    {{- '<|start_of_role|>assistant<|end_of_role|>'  + message['content'] + '<|end_of_text|>\n' }}\n    {%- elif message['role'] == 'assistant_tool_call' %}\n    {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- elif message['role'] == 'tool_response' %}\n    {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- endif %}\n    {%- if loop.last and add_generation_prompt %}\n    {{- '<|start_of_role|>assistant<|end_of_role|>' }}\n    {%- endif %}\n{%- endfor %}",
-            /* .expected_output= */       "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>   I am an assistant   <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>",
-            /* .expected_output_jinja= */ "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>   I am an assistant   <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>",
-        },
-        {
-            /* .name= */ "mistralai/Mistral-7B-Instruct-v0.2 (mistralai 'v1' template with a system prompt)",
-            /* .template_str= */ "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.first and system_message is defined %}\n            {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n        {%- else %}\n            {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n",
-            /* .expected_output= */ " [INST] You are a helpful assistant\n\nHello [/INST] Hi there</s> [INST] Who are you [/INST]    I am an assistant   </s> [INST] Another question [/INST]",
-            /* .expected_output_jinja= */ " [INST] You are a helpful assistant\n\nHello [/INST] Hi there</s> [INST] Who are you [/INST]    I am an assistant   </s> [INST] Another question [/INST]",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "Mistral-Large-Instruct-2407 (mistralai 'v3' template; modified to have system prompt at start)",
-            /* .template_str= */ "{%- if messages[0][\"role\"] == \"system\" %}\n    {%- set system_message = messages[0][\"content\"] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n    {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n            {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n        {%- endif %}\n        {%- set ns.index = ns.index + 1 %}\n    {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if message[\"role\"] == \"user\" %}\n        {%- if tools is not none and (message == user_messages[-1]) %}\n            {{- \"[AVAILABLE_TOOLS] [\" }}\n            {%- for tool in tools %}\n                {%- set tool = tool.function %}\n                {{- '{\"type\": \"function\", \"function\": {' }}\n                {%- for key, val in tool.items() if key != \"return\" %}\n                    {%- if val is string %}\n                        {{- '\"' + key + '\": \"' + val + '\"' }}\n                    {%- else %}\n                        {{- '\"' + key + '\": ' + val|tojson }}\n                    {%- endif %}\n                    {%- if not loop.last %}\n                        {{- \", \" }}\n                    {%- endif %}\n                {%- endfor %}\n                {{- \"}}\" }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- else %}\n                    {{- \"]\" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"[/AVAILABLE_TOOLS]\" }}\n            {%- endif %}\n        {%- if loop.last and system_message is defined %}\n            {{- \"[INST] \" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n        {%- else %}\n            {{- \"[INST] \" + message[\"content\"] + \"[/INST]\" }}\n        {%- endif %}\n    {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n        {{- \"[TOOL_CALLS] [\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {{- out[:-1] }}\n            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n                {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n            {%- endif %}\n            {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]\" + eos_token }}\n            {%- endif %}\n        {%- endfor %}\n    {%- elif message[\"role\"] == \"assistant\" %}\n        {{- \" \" + message[\"content\"]|trim + eos_token}}\n    {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {{- '[TOOL_RESULTS] {\"content\": ' + content|string + \", \" }}\n        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n            {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n        {%- endif %}\n        {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n    {%- else %}\n        {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n    {%- endif %}\n{%- endfor %}\n",
-            /* .expected_output= */       "[INST] You are a helpful assistant\n\nHello[/INST] Hi there</s>[INST] Who are you[/INST] I am an assistant</s>[INST] Another question[/INST]",
-            /* .expected_output_jinja= */ "[INST] Hello[/INST] Hi there</s>[INST] Who are you[/INST] I am an assistant</s>[INST] You are a helpful assistant\n\nAnother question[/INST]",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "Mistral-Nemo-Instruct-2407 (mistralai 'v3-tekken' template; modified to have system prompt at start)",
-            /* .template_str= */ "{%- if messages[0][\"role\"] == \"system\" %}\n    {%- set system_message = messages[0][\"content\"] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n    {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n            {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n        {%- endif %}\n        {%- set ns.index = ns.index + 1 %}\n    {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if message[\"role\"] == \"user\" %}\n        {%- if tools is not none and (message == user_messages[-1]) %}\n            {{- \"[AVAILABLE_TOOLS][\" }}\n            {%- for tool in tools %}\n                {%- set tool = tool.function %}\n                {{- '{\"type\": \"function\", \"function\": {' }}\n                {%- for key, val in tool.items() if key != \"return\" %}\n                    {%- if val is string %}\n                        {{- '\"' + key + '\": \"' + val + '\"' }}\n                    {%- else %}\n                        {{- '\"' + key + '\": ' + val|tojson }}\n                    {%- endif %}\n                    {%- if not loop.last %}\n                        {{- \", \" }}\n                    {%- endif %}\n                {%- endfor %}\n                {{- \"}}\" }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- else %}\n                    {{- \"]\" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"[/AVAILABLE_TOOLS]\" }}\n            {%- endif %}\n        {%- if loop.last and system_message is defined %}\n            {{- \"[INST]\" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n        {%- else %}\n            {{- \"[INST]\" + message[\"content\"] + \"[/INST]\" }}\n        {%- endif %}\n    {%- elif (message.tool_calls is defined and message.tool_calls is not none) %}\n        {{- \"[TOOL_CALLS][\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {{- out[:-1] }}\n            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n                {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n            {%- endif %}\n            {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]\" + eos_token }}\n            {%- endif %}\n        {%- endfor %}\n    {%- elif message[\"role\"] == \"assistant\" %}\n        {{- message[\"content\"] + eos_token}}\n    {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {{- '[TOOL_RESULTS]{\"content\": ' + content|string + \", \" }}\n        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n            {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n        {%- endif %}\n        {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n    {%- else %}\n        {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n    {%- endif %}\n{%- endfor %}\n",
-            /* .expected_output= */       "[INST]You are a helpful assistant\n\nHello[/INST]Hi there</s>[INST]Who are you[/INST]   I am an assistant   </s>[INST]Another question[/INST]",
-            /* .expected_output_jinja= */ "[INST]Hello[/INST]Hi there</s>[INST]Who are you[/INST]   I am an assistant   </s>[INST]You are a helpful assistant\n\nAnother question[/INST]",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "mistralai/Mistral-Large-Instruct-2411 (mistralai 'v7' template)",
-            /* .template_str= */ "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST]' }}{% elif message['role'] == 'system' %}{{ '[SYSTEM_PROMPT] ' + message['content'] + '[/SYSTEM_PROMPT]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token }}{% else %}{{ raise_exception('Only user, system and assistant roles are supported!') }}{% endif %}{% endfor %}",
-            /* .expected_output= */ "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT][INST] Hello[/INST] Hi there</s>[INST] Who are you[/INST]    I am an assistant   </s>[INST] Another question[/INST]",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "</s>",
-        },
-        {
-            /* .name= */ "ai-sage/GigaChat-20B-A3B-instruct",
-            /* .template_str= */ "{% if messages[0]['role'] == 'system' -%}\n    {%- set loop_messages = messages[1:] -%}\n    {%- set system_message = bos_token + messages[0]['content'] + additional_special_tokens[1] -%}\n{%- else -%}\n    {%- set loop_messages = messages -%}\n    {%- set system_message = bos_token + '' -%}\n{%- endif -%}\n{%- for message in loop_messages %}\n    {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n    {% endif %}\n    \n    {%- if loop.index0 == 0 -%}\n        {{ system_message -}}\n    {%- endif -%}\n    {%- if message['role'] == 'user' -%}\n        {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n        {{ 'available functions' + additional_special_tokens[0] + additional_special_tokens[2] + additional_special_tokens[3]  + additional_special_tokens[1] -}}\n    {%- endif -%}\n    {%- if message['role'] == 'assistant' -%}\n        {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n    {%- endif -%}\n    {%- if loop.last and add_generation_prompt -%}\n        {{ 'assistant' + additional_special_tokens[0] -}}\n    {%- endif -%}\n{%- endfor %}",
-            /* .expected_output= */ "<s>You are a helpful assistant<|message_sep|>user<|role_sep|>Hello<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>Hi there<|message_sep|>user<|role_sep|>Who are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>   I am an assistant   <|message_sep|>user<|role_sep|>Another question<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-            /* .supported_with_jinja= */ false, // Requires additional_special_tokens as extra context
-        },
-        {
-            /* .name= */ "Infinigence/Megrez-3B-Instruct",
-            /* .template_str= */ U8C("{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct，将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"),
-            /* .expected_output= */ "<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|>   I am an assistant   <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "phi-4",
-            /* .template_str= */ "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|><|im_start|>assistant<|im_sep|>'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}",
-            /* .expected_output= */ "<|im_start|>system<|im_sep|>You are a helpful assistant<|im_end|><|im_start|>user<|im_sep|>Hello<|im_end|><|im_start|>assistant<|im_sep|>Hi there<|im_end|><|im_start|>user<|im_sep|>Who are you<|im_end|><|im_start|>assistant<|im_sep|>   I am an assistant   <|im_end|><|im_start|>user<|im_sep|>Another question<|im_end|><|im_start|>assistant<|im_sep|>",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "yandex/YandexGPT-5-Lite-8B-instruct",
-            /* .template_str= */ "<s>{%- set names = {'assistant': ' Ассистент:', 'user': ' Пользователь:'} %}\n{%- set tools_prefix = 'Тебе доступны следующие функции:' %}\n{%- macro __render_tool(tool) %}\n    {%- set name = tool.function.name %}\n    {%- set description = tool.function.description|default('') %}\n    {%- set parameters = tool.function.parameters|tojson %}\n    {{- '\\n' }}function {{ '{' }}'name':'{{ name }}',\n    {%- if tool.function.description %}'description':'{{ description }}',{% endif %}\n'parameters':{{ parameters }}\n    {{- '}' }}\n{%- endmacro %}\n{%- macro __render_tools(tools) %}\n    {{- tools_prefix }}\n    {%- for tool in tools %}\n        {{- __render_tool(tool) }}\n    {%- endfor %}\n    {{- '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_tool_message(message) %}\n    {{- '\\n\\nРезультат вызова' }} {{ message.name }}: {{ message.content }} {{ '\\n\\n' }}\n{%- endmacro %}\n{%- if tools -%}\n    {{- __render_tools(tools) }}\n{%- endif -%}\n{%- macro __render_user_message(message) %}\n{{ names.user }} {{ message.content + '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_assistant_message(message) %}\n    {{- names.assistant }}\n    {%- set call = message['function_call'] %}\n    {%- if call %}\n        {{- '\\n[TOOL_CALL_START]' }}{{ call.name }}{{ '\\n' }}{{ call.arguments|tojson }}\n    {%- else %}\n        {{- ' ' + message.content + '\\n\\n' }}\n    {%- endif %}\n{%- endmacro %}\n{%- if not add_generation_prompt is defined %}\n{%- set add_generation_prompt = false %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'user' %}\n        {{- __render_user_message(message) }}\n    {%- endif %}\n    {%- if message.role == 'assistant' and not loop.last %}\n        {{- __render_assistant_message(message) }}\n    {%- endif %}\n    {%- if message.role == 'tool' %}\n        {{- __render_tool_message(message) }}\n    {%- endif %}\n    {%- if loop.last %}\n        {{- ' Ассистент:[SEP]' }}\n    {%- endif %}\n{%- endfor %}\n",
-            /* .expected_output= */ " Пользователь: Hello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент:    I am an assistant   \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
-            /* .expected_output_jinja= */ "<s> Пользователь: You are a helpful assistant\nHello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент:    I am an assistant   \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
-            /* .bos_token= */ "<s>",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "inclusionAI/Ling-lite",
-            /* .template_str */ "{% for message in messages %}{% set role = message['role'] | lower %}{% if role == 'user' %}{% set role = 'HUMAN' %}{% endif %}{% set role = role | upper %}{{ '<role>' + role + '</role>' + message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ '<role>ASSISTANT</role>' }}{% endif %}",
-            /* .expected_output= */ "<role>SYSTEM</role>You are a helpful assistant<role>HUMAN</role>Hello<role>ASSISTANT</role>Hi there<role>HUMAN</role>Who are you<role>ASSISTANT</role>   I am an assistant   <role>HUMAN</role>Another question<role>ASSISTANT</role>",
-            /* .expected_output_jinja= */ "",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
-        {
-            /* .name= */ "ByteDance-Seed/Seed-OSS-36B-Instruct",
-            /* .template_str */ "{# <seed:bos> #}{%- for message in messages %}{%- if message.role in [\"user\", \"system\"] %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- elif message.role == \"assistant\" %}{{ bos_token + message.role }}{%- if message.content is defined and message.content is string and message.content|trim|length > 0 %}{{ \"\\n\" + message.content|trim + eos_token }}{%- endif %}{%- else %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- endif %}{%- endfor %}{%- if add_generation_prompt %}{{ bos_token + \"assistant\\n\" }}{%- endif %}",
-            /* .expected_output= */ "<seed:bos>system\nYou are a helpful assistant<seed:eos><seed:bos>user\nHello<seed:eos><seed:bos>assistant\nHi there<seed:eos><seed:bos>user\nWho are you<seed:eos><seed:bos>assistant\nI am an assistant<seed:eos><seed:bos>user\nAnother question<seed:eos><seed:bos>assistant\n",
-            /* .expected_output_jinja= */ "<seed:bos>system\nYou are a helpful assistant<seed:eos><seed:bos>user\nHello<seed:eos><seed:bos>assistant\nHi there<seed:eos><seed:bos>user\nWho are you<seed:eos><seed:bos>assistant\nI am an assistant<seed:eos><seed:bos>user\nAnother question<seed:eos><seed:bos>assistant\n",
-            /* .bos_token= */ "<seed:bos>",
-            /* .eos_token= */ "<seed:eos>",
-        }
-    };
-    std::vector<char> formatted_chat(1024);
-    int32_t res;
-
-    // list all supported templates
-    std::vector<const char *> supported_tmpl;
-    res = llama_chat_builtin_templates(nullptr, 0);
-    assert(res > 0);
-    supported_tmpl.resize(res);
-    res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size());
-    std::cout << "Built-in chat templates:\n";
-    for (auto tmpl : supported_tmpl) {
-        std::cout << "  " << tmpl << "\n";
-    }
-
-    // test invalid chat template
-    res = llama_chat_apply_template("INVALID TEMPLATE", conversation.data(), conversation.size(), true, formatted_chat.data(), formatted_chat.size());
-    assert(res < 0);
-    const auto add_generation_prompt = true;
-
-    for (const auto & test_case : test_cases) {
-        std::cout << "\n\n=== " << test_case.name << " ===\n\n";
-        formatted_chat.resize(1024);
-        res = llama_chat_apply_template(
-            test_case.template_str.c_str(),
-            conversation.data(),
-            conversation.size(),
-            add_generation_prompt,
-            formatted_chat.data(),
-            formatted_chat.size()
-        );
-        formatted_chat.resize(res);
-        std::string output(formatted_chat.data(), formatted_chat.size());
-        if (output != test_case.expected_output) {
-            std::cout << "Expected:\n" << test_case.expected_output << "\n";
-            std::cout << "-------------------------\n";
-            std::cout << "Actual:\n" << output << "\n";
-            std::cout.flush();
-            assert(output == test_case.expected_output);
-        }
-    }
-
-    std::vector<common_chat_msg> messages;
-    for (const auto & msg : conversation) {
-        messages.push_back(simple_msg(msg.role, msg.content));
-    }
-    for (const auto & test_case : test_cases) {
-        if (!test_case.supported_with_jinja) {
-            continue;
-        }
-        std::cout << "\n\n=== " << test_case.name << " (jinja) ===\n\n";
-        try {
-            auto output = format_using_common(
-                                test_case.template_str,
-                                test_case.bos_token,
-                                test_case.eos_token,
-                                messages);
-            auto expected_output = normalize_newlines(test_case.expected_output_jinja.empty() ? test_case.expected_output : test_case.expected_output_jinja);
-            if (output != expected_output) {
-                std::cout << "Template:```\n" << test_case.template_str << "\n```";
-                std::cout << "-------------------------\n";
-                std::cout << "Expected:```\n" << expected_output << "\n```";
-                std::cout << "-------------------------\n";
-                std::cout << "Actual:```\n" << output << "\n```";
-                std::cout.flush();
-                assert(output == expected_output);
-            }
-        } catch (const std::exception & e) {
-            std::cerr << "ERROR: " << e.what() << "\n";
-            assert(false);
-        }
-    }
-
-    // TODO: llama_chat_format_single will be deprecated, remove these tests later
-
-    // test llama_chat_format_single for system message
-    std::cout << "\n\n=== llama_chat_format_single (system message) ===\n\n";
-    std::vector<common_chat_msg> chat2;
-    auto sys_msg = simple_msg("system", "You are a helpful assistant");
-
-    auto fmt_sys = [&](std::string tmpl_str) {
-        auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str);
-        auto output = common_chat_format_single(tmpls.get(), chat2, sys_msg, false, /* use_jinja= */ false);
-        std::cout << "fmt_sys(" << tmpl_str << ") : " << output << "\n";
-        std::cout << "-------------------------\n";
-        return output;
-    };
-    assert(fmt_sys("chatml") == "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n");
-    assert(fmt_sys("mistral-v1") == " [INST] You are a helpful assistant\n\n");
-    assert(fmt_sys("mistral-v3") == "[INST] You are a helpful assistant\n\n");
-    assert(fmt_sys("mistral-v3-tekken") == "[INST]You are a helpful assistant\n\n");
-    assert(fmt_sys("mistral-v7") == "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT]");
-    assert(fmt_sys("llama2") == "[INST] You are a helpful assistant\n");
-    assert(fmt_sys("llama2-sys") == "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\n");
-    assert(fmt_sys("mistral") == "[INST] You are a helpful assistant\n"); // for old pre-v1 templates
-    assert(fmt_sys("gemma")  == ""); // for gemma, system message is merged with user message
-    assert(fmt_sys("llama3") == "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|>");
-    assert(fmt_sys("gigachat") == "<s>You are a helpful assistant<|message_sep|>");
-
-
-    // test llama_chat_format_single for user message
-    std::cout << "\n\n=== llama_chat_format_single (user message) ===\n\n";
-    chat2.push_back(simple_msg("system", "You are a helpful assistant"));
-    chat2.push_back(simple_msg("user", "Hello"));
-    chat2.push_back(simple_msg("assistant", "I am assistant"));
-    auto new_msg = simple_msg("user", "How are you");
-
-    auto fmt_single = [&](const std::string & tmpl_str) {
-        auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str.c_str());
-        auto output = common_chat_format_single(tmpls.get(), chat2, new_msg, true, /* use_jinja= */ false);
-        std::cout << "fmt_single(" << tmpl_str << ") : " << output << "\n";
-        std::cout << "-------------------------\n";
-        return output;
-    };
-    assert(fmt_single("chatml") == "\n<|im_start|>user\nHow are you<|im_end|>\n<|im_start|>assistant\n");
-    assert(fmt_single("mistral-v1") == " [INST] How are you [/INST]");
-    assert(fmt_single("mistral-v3") == "[INST] How are you[/INST]");
-    assert(fmt_single("mistral-v3-tekken") == "[INST]How are you[/INST]");
-    assert(fmt_single("mistral-v7") == "[INST] How are you[/INST]");
-    assert(fmt_single("llama2") == "[INST] How are you [/INST]");
-    assert(fmt_single("mistral") == "[INST] How are you [/INST]"); // for old pre-v1 templates
-    assert(fmt_single("gemma")  == "\n<start_of_turn>user\nHow are you<end_of_turn>\n<start_of_turn>model\n");
-    assert(fmt_single("llama3") == "<|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n");
-    // assert(fmt_single("gigachat") == "user<|role_sep|>How are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>");
-
-    std::cout << "\nOK: All tests passed successfully.\n";
-
-    return 0;
-}
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 4378a8db716..304370f2c18 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -5,18 +5,21 @@
 //
 //    cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
 //
+#include "../src/llama-grammar.h"
+#include "../src/unicode.h"
+#include "chat-auto-parser.h"
 #include "chat.h"
-
+#include "common.h"
+#include "ggml.h"
 #include "log.h"
 
-#include "../src/unicode.h"
-#include "../src/llama-grammar.h"
-
-#include <nlohmann/json.hpp>
-
+#include <algorithm>
+#include <exception>
 #include <fstream>
-#include <iostream>
 #include <functional>
+#include <iostream>
+#include <nlohmann/json.hpp>
+#include <stdexcept>
 #include <string>
 
 using json = nlohmann::ordered_json;
@@ -33,6 +36,7 @@ static std::ostream & operator<<(std::ostream & os, const common_chat_msg_diff &
     os << "}";
     return os;
 }
+
 // operator<< for vector<common_chat_msg_diff>:
 static std::ostream & operator<<(std::ostream & os, const std::vector<common_chat_msg_diff> & diffs) {
     os << "[\n";
@@ -42,6 +46,7 @@ static std::ostream & operator<<(std::ostream & os, const std::vector<common_cha
     os << "]";
     return os;
 }
+
 static std::ostream & operator<<(std::ostream & os, const common_chat_msg & msg) {
     os << "{ role: " << msg.role << "; ";
     os << "content: " << msg.content << "; ";
@@ -53,7 +58,8 @@ static std::ostream & operator<<(std::ostream & os, const common_chat_msg & msg)
     os << "reasoning_content: " << msg.reasoning_content << "; ";
     os << "tool_calls: [\n";
     for (const auto & tool_call : msg.tool_calls) {
-        os << "  { name: " << tool_call.name << "; arguments: " << tool_call.arguments << "; id: " << tool_call.id << " },\n";
+        os << "  { name: " << tool_call.name << "; arguments: " << tool_call.arguments << "; id: " << tool_call.id
+           << " },\n";
     }
     os << "]";
     os << "}";
@@ -70,29 +76,29 @@ static common_chat_msg normalize(const common_chat_msg & msg) {
         try {
             tool_call.arguments = json::parse(tool_call.arguments).dump();
         } catch (const std::exception &) {
-            // Do nothing
         }
     }
     return normalized;
 }
 
-
-template <>
-bool equals(const common_chat_msg & expected, const common_chat_msg & actual) {
+template <> bool equals(const common_chat_msg & expected, const common_chat_msg & actual) {
     return normalize(expected) == normalize(actual);
 }
 
 template <class T> static void assert_equals(const T & expected, const T & actual) {
     if (!equals(expected, actual)) {
-        std::cerr << "Expected:```\n" << expected << "\n```" << std::endl;
-        std::cerr << "Actual:```\n" << actual << "\n```" << std::endl;
-        std::cerr << std::flush;
+        std::ostringstream oss_expected;
+        oss_expected << expected;
+        std::ostringstream oss_actual;
+        oss_actual << actual;
+        LOG_ERR("Expected: %s\n", oss_expected.str().c_str());
+        LOG_ERR("Actual: %s\n", oss_actual.str().c_str());
+        common_log_flush(common_log_main());
         throw std::runtime_error("Test failed");
     }
 }
 
 static std::string read_file(const std::string & path) {
-    std::cerr << "# Reading: " << path << '\n' << std::flush;
     std::ifstream fs(path, std::ios_base::binary);
     if (!fs.is_open()) {
         fs = std::ifstream("../" + path, std::ios_base::binary);
@@ -146,11 +152,13 @@ static std::string renormalize_json(const std::string & json_str) {
         auto json_obj = json::parse(json_str);
         return json_obj.dump();
     } catch (const std::exception & e) {
-        std::cerr << "Failed to parse JSON: " << e.what() << '\n';
-        return json_str;
+        return "";  // ignore parial JSON contents for comparison purposes
     }
 }
-static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual, bool ignore_whitespace_differences = false) {
+
+static void assert_msg_equals(const common_chat_msg & expected,
+                              const common_chat_msg & actual,
+                              bool                    ignore_whitespace_differences = false) {
     assert_equals(expected.role, actual.role);
     if (ignore_whitespace_differences) {
         assert_equals(string_strip(expected.content), string_strip(actual.content));
@@ -183,7 +191,7 @@ static void assert_msg_equals(const common_chat_msg & expected, const common_cha
     }
 }
 
-common_chat_tool special_function_tool {
+static common_chat_tool special_function_tool{
     /* .name = */ "special_function",
     /* .description = */ "I'm special",
     /* .parameters = */ R"({
@@ -197,7 +205,7 @@ common_chat_tool special_function_tool {
         "required": ["arg1"]
     })",
 };
-common_chat_tool special_function_tool_with_optional_param {
+static common_chat_tool special_function_tool_with_optional_param{
     /* .name = */ "special_function_with_opt",
     /* .description = */ "I'm special but have optional stuff",
     /* .parameters = */ R"({
@@ -215,7 +223,7 @@ common_chat_tool special_function_tool_with_optional_param {
         "required": ["arg1"]
     })",
 };
-common_chat_tool python_tool {
+static common_chat_tool python_tool{
     /* .name = */ "python",
     /* .description = */ "an ipython interpreter",
     /* .parameters = */ R"({
@@ -229,44 +237,229 @@ common_chat_tool python_tool {
         "required": ["code"]
     })",
 };
-common_chat_tool code_interpreter_tool {
-    /* .name = */ "code_interpreter",
-    /* .description = */ "an ipython interpreter",
+
+static common_chat_tool html_tool{
+    /* .name = */ "html",
+    /* .description = */ "an html validator",
     /* .parameters = */ R"({
         "type": "object",
         "properties": {
-            "code": {
+            "markup": {
                 "type": "string",
-                "description": "Python code to execute."
+                "description": "HTML markup to validate."
             }
         },
-        "required": ["code"]
+        "required": ["markup"]
     })",
 };
-std::vector<common_chat_tool> tools           { special_function_tool, special_function_tool_with_optional_param, python_tool };
-std::vector<common_chat_tool> llama_3_1_tools { special_function_tool, code_interpreter_tool };
 
-struct delta_data {
-    std::string        delta;
-    common_chat_params params;
+static common_chat_tool get_time_tool{
+    /* .name = */ "get_time",
+    /* .description = */ "Get the current time in a city",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "city": {
+                "type": "string",
+                "description": "City name"
+            }
+        },
+        "required": ["city"]
+    })",
+};
+
+static common_chat_tool get_weather_tool{
+    /* .name = */ "get_weather",
+    /* .description = */ "Get the current weather in a city",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "city": {
+                "type": "string",
+                "description": "City name"
+            }
+        },
+        "required": ["city"]
+    })",
+};
+
+static common_chat_tool todo_list{
+    /* .name = */ "todo_list",
+    /* .description = */ "Create or update the todo list",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "todos": {
+                "type": "array",
+                "description": "List of TODO list items"
+            }
+        },
+        "required": ["todos"]
+    })",
+};
+
+static common_chat_tool edit_tool{
+    /* .name = */ "edit",
+    /* .description = */ "Edit file",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "filename": {
+                "type": "string",
+                "description": "Path of file to edit"
+            },
+            "oldString": {
+                "type": "string",
+                "description": "String to replace"
+            },
+            "newString": {
+                "type": "string",
+                "description": "New (replacement) value"
+            }
+        },
+        "required": ["filename", "oldString", "newString"]
+    })",
+};
+
+static std::vector<common_chat_tool> tools{ special_function_tool, special_function_tool_with_optional_param,
+                                            python_tool, html_tool, todo_list };
+
+const common_chat_msg message_user{
+    "user",
+    "Hey there!",
+    /* .content_parts = */ {},
+    /* .tool_calls = */ {},
+    /* .reasoning_content = */ "",
+    /* .tool_name = */ "",
+    /* .tool_call_id = */ "",
 };
 
-static common_chat_msg simple_assist_msg(const std::string & content, const std::string & reasoning_content = "", const std::string & tool_name = "", const std::string & arguments = "", const std::string & id = "") {
+const common_chat_msg message_user_parts{
+    "user",
+    /* .content = */ "",
+    /* .content_parts = */
+    {
+     { "text", "Hey" },
+     { "text", "there" },
+     },
+    /* .tool_calls = */
+    {                 },
+    /* .reasoning_content = */
+    "",
+    /* .tool_name = */ "",
+    /* .tool_call_id = */ "",
+};
+
+static common_chat_msg simple_assist_msg(const std::string & content,
+                                         const std::string & reasoning_content = "",
+                                         const std::string & tool_name         = "",
+                                         const std::string & arguments         = "",
+                                         const std::string & id                = "") {
     common_chat_msg msg;
-    msg.role = "assistant";
-    msg.content = content;
+    msg.role              = "assistant";
+    msg.content           = content;
     msg.reasoning_content = reasoning_content;
-    if (!tool_name.empty()) {
+    if (!tool_name.empty() || !id.empty()) {
         msg.tool_calls.push_back({ tool_name, arguments, id });
     }
     return msg;
 }
 
-static delta_data init_delta(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
-                             const common_chat_msg & user_message,
-                             const common_chat_msg & delta_message,
+static common_chat_msg message_with_tool_calls(const std::string & tool_name, const std::string & arguments) {
+    return simple_assist_msg("", "", tool_name, arguments);
+}
+
+static common_chat_msg message_with_tool_calls_and_reasoning(const std::string & tool_name,
+                                                             const std::string & arguments,
+                                                             const std::string & reasoning) {
+    return simple_assist_msg("", reasoning, tool_name, arguments);
+}
+
+static common_chat_msg message_with_reasoning_content_and_multiple_tool_calls(
+    const std::string &                                      reasoning,
+    const std::string &                                      content,
+    const std::vector<std::pair<std::string, std::string>> & tool_calls) {
+    common_chat_msg msg;
+    msg.role              = "assistant";
+    msg.content           = content;
+    msg.reasoning_content = reasoning;
+    for (const auto & [name, args] : tool_calls) {
+        msg.tool_calls.push_back({ name, args, "" });
+    }
+    return msg;
+}
+
+static common_chat_msg message_with_content_and_tool_call(const std::string & content,
+                                                          const std::string & tool_name,
+                                                          const std::string & arguments) {
+    return simple_assist_msg(content, "", tool_name, arguments);
+}
+
+static common_chat_msg message_with_reasoning_and_tool_call(const std::string & reasoning,
+                                                            const std::string & tool_name,
+                                                            const std::string & arguments) {
+    return simple_assist_msg("", reasoning, tool_name, arguments);
+}
+
+const common_chat_msg message_assist       = simple_assist_msg("Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_empty = simple_assist_msg("");
+const common_chat_msg message_assist_thoughts_unparsed_deepseek =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_unparsed_md =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
+const common_chat_msg message_assist_thoughts_unparsed_md_partial =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
+
+const common_chat_msg message_assist_thoughts_unparsed_r7b =
+    simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_unparsed_magistral =
+    simple_assist_msg("[THINK]raisonnement[/THINK]Réponse");
+const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
+const common_chat_msg message_assist_thoughts_unopened_unparsed =
+    simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking");
+const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_noopt =
+    simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_withopt =
+    simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}");
+const common_chat_msg message_assist_call_content =
+    simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}");
+const common_chat_msg message_assist_call_empty_args  = simple_assist_msg("", "", "special_function");
+const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg");
+const common_chat_msg message_assist_call_thoughts =
+    simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}");
+const common_chat_msg message_assist_call_thoughts_unparsed =
+    simple_assist_msg("<think>I'm\nthinking</think>\n\n", "", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_thoughts_content =
+    simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_id =
+    simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789");
+const common_chat_msg message_assist_call_idx =
+    simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0");
+const common_chat_msg message_assist_thoughts_call_idx =
+    simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0");
+const common_chat_msg message_assist_thoughts_partial_call =
+    simple_assist_msg("", "I'm\nthinking", "special_function", "", /* id = */ "0");
+const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}");
+const common_chat_msg message_assist_call_python_lines =
+    simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}");
+const common_chat_msg message_assist_call_python_lines_unclosed =
+    simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
+const common_chat_msg message_assist_json_content =
+    simple_assist_msg("{\n  \"response\": \"Hello, world!\\nWhat's up?\"\n}");
+
+struct delta_data {
+    std::string        delta;
+    common_chat_params params;
+};
+
+static delta_data init_delta(const struct common_chat_templates *  tmpls,
+                             const std::vector<std::string> &      end_tokens,
+                             const common_chat_msg &               user_message,
+                             const common_chat_msg &               delta_message,
                              const std::vector<common_chat_tool> & tools,
-                             const common_chat_tool_choice & tool_choice) {
+                             const common_chat_tool_choice &       tool_choice) {
     common_chat_templates_inputs inputs;
     inputs.parallel_tool_calls = true;
     inputs.messages.push_back(user_message);
@@ -317,20 +510,27 @@ static delta_data init_delta(const struct common_chat_templates * tmpls, const s
   gets the diff, removes any end tokens and parses the result w/ the grammar, checking that
   the parsed message is the same as the test_message
 */
-static void test_templates(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
-                          const common_chat_msg & test_message,
-                          const std::vector<common_chat_tool> & tools = {},
-                          const std::string & expected_delta = "",
-                          bool expect_grammar_triggered = true,
-                          bool test_grammar_if_triggered = true,
-                          common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE,
-                          bool ignore_whitespace_differences = false
-                        ) {
+static void test_templates(const struct common_chat_templates *  tmpls,
+                           const std::vector<std::string> &      end_tokens,
+                           const common_chat_msg &               test_message,
+                           const std::vector<common_chat_tool> & tools                     = {},
+                           const std::string &                   expected_delta            = "",
+                           bool                                  expect_grammar_triggered  = true,
+                           bool                                  test_grammar_if_triggered = true,
+                           common_reasoning_format               reasoning_format = COMMON_REASONING_FORMAT_NONE,
+                           bool                                  ignore_whitespace_differences = false) {
     common_chat_msg user_message;
-    user_message.role = "user";
+    user_message.role    = "user";
     user_message.content = "Hello, world!";
 
-    for (const auto & tool_choice : std::vector<common_chat_tool_choice> {COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED}) {
+    common_chat_templates_inputs inputs_tools;
+    inputs_tools.messages = { message_user };
+    inputs_tools.tools    = { special_function_tool };
+
+    common_chat_params params = common_chat_templates_apply(tmpls, inputs_tools);
+
+    for (const auto & tool_choice :
+         std::vector<common_chat_tool_choice>{ COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED }) {
         auto data = init_delta(tmpls, end_tokens, user_message, test_message, tools, tool_choice);
         if (!expected_delta.empty()) {
             if (ignore_whitespace_differences) {
@@ -342,10 +542,14 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
 
         if (expect_grammar_triggered) {
             // TODO @ngxson : refactor common_chat_parse to avoid passing format/reasoning_format every time
-            common_chat_parser_params params;
-            params.format = data.params.format;
-            params.reasoning_format = reasoning_format;
-            const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, params);
+            common_chat_parser_params parser_params;
+            parser_params.format           = data.params.format;
+            parser_params.reasoning_format = reasoning_format;
+            if (!parser_params.parser.empty()) {
+                parser_params.parser = common_peg_arena();
+                parser_params.parser.load(params.parser);
+            }
+            const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, parser_params);
             assert_msg_equals(test_message, msg, ignore_whitespace_differences);
         }
 
@@ -358,43 +562,43 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
                 throw std::runtime_error("Failed to build grammar");
             }
             auto earliest_trigger_pos = std::string::npos;
-            auto constrained = data.delta;
+            auto constrained          = data.delta;
             for (const auto & trigger : data.params.grammar_triggers) {
-                size_t pos = std::string::npos;
+                size_t      pos = std::string::npos;
                 std::smatch match;
                 switch (trigger.type) {
                     case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
-                    {
-                        const auto & word = trigger.value;
-                        pos = constrained.find(word);
-                        break;
-                    }
+                        {
+                            const auto & word = trigger.value;
+                            pos               = constrained.find(word);
+                            break;
+                        }
                     case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
-                    {
-                        const auto & pattern = trigger.value;
-                        if (std::regex_search(constrained, match, std::regex(pattern))) {
-                            pos = match.position(1);
+                        {
+                            const auto & pattern = trigger.value;
+                            if (std::regex_search(constrained, match, std::regex(pattern))) {
+                                pos = match.position(1);
+                            }
+                            break;
                         }
-                        break;
-                    }
                     case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
-                    {
-                        const auto & pattern = trigger.value;
-                        if (std::regex_match(constrained, match, std::regex(pattern))) {
-                            auto mpos = std::string::npos;
-                            for (size_t i = 1; i < match.size(); ++i) {
-                                if (match[i].length() > 0) {
-                                    mpos = match.position(i);
-                                    break;
+                        {
+                            const auto & pattern = trigger.value;
+                            if (std::regex_match(constrained, match, std::regex(pattern))) {
+                                auto mpos = std::string::npos;
+                                for (size_t i = 1; i < match.size(); ++i) {
+                                    if (match[i].length() > 0) {
+                                        mpos = match.position(i);
+                                        break;
+                                    }
                                 }
+                                if (mpos == std::string::npos) {
+                                    mpos = match.position(0);
+                                }
+                                pos = mpos;
                             }
-                            if (mpos == std::string::npos) {
-                                mpos = match.position(0);
-                            }
-                            pos = mpos;
+                            break;
                         }
-                        break;
-                    }
                     default:
                         throw std::runtime_error("Unknown trigger type");
                 }
@@ -407,7 +611,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
             }
             auto grammar_triggered = false;
             if (earliest_trigger_pos != std::string::npos) {
-                constrained = constrained.substr(earliest_trigger_pos);
+                constrained       = constrained.substr(earliest_trigger_pos);
                 grammar_triggered = true;
             }
             if (data.params.grammar_lazy) {
@@ -416,8 +620,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
 
             if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) {
                 throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta +
-                    "\n\nConstrained: " + constrained +
-                    "\n\nGrammar: " + data.params.grammar);
+                                         "\n\nConstrained: " + constrained + "\n\nGrammar: " + data.params.grammar);
             }
         }
     }
@@ -431,24 +634,31 @@ template <typename T>
 static void test_parser_with_streaming(const common_chat_msg & expected, const std::string & raw_message, T parse_msg) {
     constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t {
         auto len = s.size();
-        if (len == 0) return 0;
+        if (len == 0) {
+            return 0;
+        }
         auto i = len;
         for (size_t back = 0; back < 4 && i > 0; ++back) {
             --i;
             unsigned char c = s[i];
             if ((c & 0x80) == 0) {
                 return len;
-            } else if ((c & 0xC0) == 0xC0) {
+            }
+            if ((c & 0xC0) == 0xC0) {
                 size_t expected_len = 0;
-                if ((c & 0xE0) == 0xC0) expected_len = 2;
-                else if ((c & 0xF0) == 0xE0) expected_len = 3;
-                else if ((c & 0xF8) == 0xF0) expected_len = 4;
-                else return i;
-                if (len - i >= expected_len) {
-                    return len;
+                if ((c & 0xE0) == 0xC0) {
+                    expected_len = 2;
+                } else if ((c & 0xF0) == 0xE0) {
+                    expected_len = 3;
+                } else if ((c & 0xF8) == 0xF0) {
+                    expected_len = 4;
                 } else {
                     return i;
                 }
+                if (len - i >= expected_len) {
+                    return len;
+                }
+                return i;
             }
         }
         return len - std::min(len, size_t(3));
@@ -457,13 +667,15 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s
         return s.substr(0, utf8_truncate_safe_len(s));
     };
 
-    auto merged = simple_assist_msg("");
+    auto merged   = simple_assist_msg("");
     auto last_msg = parse_msg("");
     for (size_t i = 1; i <= raw_message.size(); ++i) {
         auto curr_msg = parse_msg(std::string(utf8_truncate_safe_view(std::string_view(raw_message).substr(0, i))));
-        if (curr_msg == simple_assist_msg("")) continue;
-        LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({curr_msg}).dump().c_str());
-        for (auto diff: common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) {
+        if (curr_msg == simple_assist_msg("")) {
+            continue;
+        }
+        LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({ curr_msg }).dump().c_str());
+        for (auto diff : common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) {
             LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat(diff).dump().c_str());
             if (!diff.reasoning_content_delta.empty()) {
                 merged.reasoning_content += diff.reasoning_content_delta;
@@ -473,14 +685,14 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s
             }
             if (diff.tool_call_index != std::string::npos) {
                 if (!diff.tool_call_delta.name.empty()) {
-                    merged.tool_calls.push_back({diff.tool_call_delta.name, "", ""});
+                    merged.tool_calls.push_back({ diff.tool_call_delta.name, "", "" });
                 }
                 if (!diff.tool_call_delta.arguments.empty()) {
                     GGML_ASSERT(!merged.tool_calls.empty());
                     merged.tool_calls.back().arguments += diff.tool_call_delta.arguments;
                 }
             }
-            LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({merged}).dump().c_str());
+            LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({ merged }).dump().c_str());
         }
         assert_msg_equals(curr_msg, merged, true);
         last_msg = curr_msg;
@@ -489,99 +701,92 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s
     assert_msg_equals(expected, merged, true);
 }
 
-const common_chat_msg message_user {
-    "user",
-    "Hey there!",
-    /* .content_parts = */ {},
-    /* .tool_calls = */ {},
-    /* .reasoning_content = */ "",
-    /* .tool_name = */ "",
-    /* .tool_call_id = */ "",
-};
-
-const common_chat_msg message_user_parts {
-    "user",
-    /* .content = */ "",
-    /* .content_parts = */ {
-        { "text", "Hey" },
-        { "text", "there" },
-    },
-    /* .tool_calls = */ {},
-    /* .reasoning_content = */ "",
-    /* .tool_name = */ "",
-    /* .tool_call_id = */ "",
-};
-
-const common_chat_msg message_assist                              = simple_assist_msg("Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_empty                        = simple_assist_msg("");
-const common_chat_msg message_assist_thoughts_unparsed_deepseek   = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_unparsed_md         = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
-const common_chat_msg message_assist_thoughts_unparsed_md_partial = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
-
-const common_chat_msg message_assist_thoughts_unparsed_r7b       = simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_unparsed_magistral = simple_assist_msg("[THINK]raisonnement[/THINK]Réponse");
-const common_chat_msg message_assist_thoughts                    = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
-const common_chat_msg message_assist_thoughts_unopened_unparsed  = simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_no_content         = simple_assist_msg("", "I'm\nthinking");
-const common_chat_msg message_assist_call                        = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_noopt                  = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_withopt                = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}");
-const common_chat_msg message_assist_call_content                = simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}");
-const common_chat_msg message_assist_call_empty_args             = simple_assist_msg("", "", "special_function");
-const common_chat_msg message_assist_call_cutoff_args            = simple_assist_msg("", "", "special_function", "{\"arg");
-const common_chat_msg message_assist_call_thoughts               = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}");
-const common_chat_msg message_assist_call_thoughts_unparsed      = simple_assist_msg("<think>I'm\nthinking</think>\n\n", "", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_thoughts_content       = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_id                     = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789");
-const common_chat_msg message_assist_call_idx                    = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0");
-const common_chat_msg message_assist_thoughts_call_idx           = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0");
-const common_chat_msg message_assist_call_python                 = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}");
-const common_chat_msg message_assist_call_python_lines           = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}");
-const common_chat_msg message_assist_call_python_lines_unclosed  = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
-const common_chat_msg message_assist_call_code_interpreter       = simple_assist_msg("", "", "code_interpreter", "{\"code\":\"print('hey')\"}");
-
 // Use for PEG parser implementations
 struct peg_test_case {
     common_chat_templates_inputs params;
-    std::string input;
-    common_chat_msg expect;
+    std::string                  input;
+    common_chat_msg              expect;
+    bool                         is_partial = false;
 };
 
 struct make_peg_parser {
     common_chat_params params_;
-    common_peg_arena arena_;
-
-    make_peg_parser(common_chat_templates * tmpls, const common_chat_templates_inputs & inputs) {
-        params_ = common_chat_templates_apply(tmpls, inputs);
+    common_peg_arena   arena_;
+    bool               detailed_debug_;
+
+    make_peg_parser(common_chat_templates *              tmpls,
+                    const common_chat_templates_inputs & inputs,
+                    bool                                 detailed_debug = false) {
+        detailed_debug_ = detailed_debug;
+        params_         = common_chat_templates_apply(tmpls, inputs);
         arena_.load(params_.parser);
     }
 
-    common_chat_msg parse(const std::string & msg, bool is_partial) {
+    common_chat_msg parse(const std::string & msg, bool is_partial) const {
         common_chat_parser_params parser_params;
         parser_params.format = params_.format;
+        parser_params.debug = detailed_debug_;
         return common_chat_peg_parse(arena_, msg, is_partial, parser_params);
     }
 };
 
-static void test_peg_parser(common_chat_templates * tmpls, const std::function<void(peg_test_case &)> & init) {
+static void test_peg_parser(common_chat_templates *                      tmpls,
+                            const std::function<void(peg_test_case &)> & init,
+                            bool                                         detailed_debug) {
+    // UTF-8-safe truncation helper (same as in test_parser_with_streaming)
+    constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t {
+        auto len = s.size();
+        if (len == 0) {
+            return 0;
+        }
+        auto i = len;
+        for (size_t back = 0; back < 4 && i > 0; ++back) {
+            --i;
+            unsigned char c = s[i];
+            if ((c & 0x80) == 0) {
+                return len;
+            }
+            if ((c & 0xC0) == 0xC0) {
+                size_t expected_len = 0;
+                if ((c & 0xE0) == 0xC0) {
+                    expected_len = 2;
+                } else if ((c & 0xF0) == 0xE0) {
+                    expected_len = 3;
+                } else if ((c & 0xF8) == 0xF0) {
+                    expected_len = 4;
+                } else {
+                    return i;
+                }
+                if (len - i >= expected_len) {
+                    return len;
+                }
+                return i;
+            }
+        }
+        return len - std::min(len, size_t(3));
+    };
+
     peg_test_case tc;
     init(tc);
     if (tc.params.messages.empty()) {
-        tc.params.messages = {message_user};
+        tc.params.messages = { message_user };
     }
     if (tc.expect.role.empty()) {
         tc.expect.role = "assistant";
     }
 
-    auto parser = make_peg_parser(tmpls, tc.params);
+    auto parser = make_peg_parser(tmpls, tc.params, detailed_debug);
 
     common_chat_msg msg_accum;
     common_chat_msg msg_prev;
     msg_accum.role = msg_prev.role = "assistant";
 
     for (size_t i = 1; i <= tc.input.size(); ++i) {
-        auto is_partial = i < tc.input.size();
-        common_chat_msg msg_current = parser.parse(tc.input.substr(0, i), is_partial);
+        auto            is_partial  = i < tc.input.size() || tc.is_partial;
+        // Use UTF-8 safe truncation to avoid corrupting multi-byte characters
+        size_t          safe_len    = utf8_truncate_safe_len(std::string_view(tc.input).substr(0, i));
+        std::string     prefix      = tc.input.substr(0, safe_len);
+        common_chat_msg msg_current = parser.parse(prefix, is_partial);
 
         for (const auto & diff : common_chat_msg_diff::compute_diffs(msg_prev, msg_current)) {
             if (!diff.reasoning_content_delta.empty()) {
@@ -591,24 +796,147 @@ static void test_peg_parser(common_chat_templates * tmpls, const std::function<v
                 msg_accum.content += diff.content_delta;
             }
             if (diff.tool_call_index != std::string::npos) {
+                // During partial parsing, a new tool call may appear with empty name initially
+                // The name gets filled in as more input is parsed
+                while (msg_accum.tool_calls.size() <= diff.tool_call_index) {
+                    msg_accum.tool_calls.push_back({ "", "", "" });
+                }
+                // Always update name and id from diff (may change during incremental parsing), but only if the delta
+                // actually contains them
                 if (!diff.tool_call_delta.name.empty()) {
-                    msg_accum.tool_calls.push_back({diff.tool_call_delta.name, "", diff.tool_call_delta.id});
+                    msg_accum.tool_calls[diff.tool_call_index].name = diff.tool_call_delta.name;
+                }
+                if (!diff.tool_call_delta.id.empty()) {
+                    msg_accum.tool_calls[diff.tool_call_index].id = diff.tool_call_delta.id;
                 }
                 if (!diff.tool_call_delta.arguments.empty()) {
-                    msg_accum.tool_calls.back().arguments += diff.tool_call_delta.arguments;
+                    msg_accum.tool_calls[diff.tool_call_index].arguments += diff.tool_call_delta.arguments;
                 }
             }
         }
-        assert_msg_equals(msg_current, msg_accum, true);
+        try {
+            assert_msg_equals(msg_current, msg_accum, true);
+        } catch (std::exception & e) {
+            throw std::runtime_error((std::string("Error comparing accumulated message to current: ") + e.what()).c_str());
+        }
+        
         msg_prev = msg_current;
     }
 
-    assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
+    if (!tc.is_partial) {
+        assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
+    }
     assert_msg_equals(tc.expect, msg_accum, true);
 }
 
+// Global template filter for --template flag
+static std::string g_template_filter;
+
+// Fluent builder for PEG parser tests
+class peg_test_builder;
+
+class peg_tester {
+    common_chat_templates_ptr tmpls_;
+    std::string               template_path_;
+    bool                      detailed_debug_;
+    friend class peg_test_builder;
+
+  public:
+    explicit peg_tester(const std::string & template_path, const bool detailed_debug = false) :
+        tmpls_(read_templates(template_path)),
+        template_path_(template_path),
+        detailed_debug_(detailed_debug) {}
+
+    const std::string & template_path() const { return template_path_; }
+
+    peg_test_builder test(const std::string & input);
+};
+
+class peg_test_builder {
+    peg_tester &  tester_;
+    peg_test_case tc_;
+
+  public:
+    peg_test_builder(peg_tester & tester, const std::string & input) : tester_(tester) { tc_.input = input; }
+
+    // Parameter setters
+    peg_test_builder & reasoning_format(common_reasoning_format fmt) {
+        tc_.params.reasoning_format = fmt;
+        return *this;
+    }
+
+    peg_test_builder & tools(std::vector<common_chat_tool> tools) {
+        tc_.params.tools = std::move(tools);
+        return *this;
+    }
+
+    peg_test_builder & enable_thinking(bool val) {
+        tc_.params.enable_thinking = val;
+        return *this;
+    }
+
+    peg_test_builder & parallel_tool_calls(bool val) {
+        tc_.params.parallel_tool_calls = val;
+        return *this;
+    }
+
+    peg_test_builder & json_schema(const std::string & schema) {
+        tc_.params.json_schema = schema;
+        return *this;
+    }
+
+    peg_test_builder & is_partial(bool val) {
+        tc_.is_partial = val;
+        return *this;
+    }
+
+    // Expect setters
+    peg_test_builder & expect(const common_chat_msg & msg) {
+        tc_.expect = msg;
+        return *this;
+    }
+
+    peg_test_builder & expect_content(const std::string & content) {
+        tc_.expect.content = content;
+        return *this;
+    }
+
+    peg_test_builder & expect_reasoning(const std::string & reasoning) {
+        tc_.expect.reasoning_content = reasoning;
+        return *this;
+    }
+
+    peg_test_builder & expect_tool_calls(std::vector<common_chat_tool_call> calls) {
+        tc_.expect.tool_calls = std::move(calls);
+        return *this;
+    }
+
+    // Execute the test
+    void run() {
+        // Check template filter
+        if (!g_template_filter.empty()) {
+            // Case-insensitive substring match
+            std::string template_path_lower = tester_.template_path();
+            std::string filter_lower        = g_template_filter;
+            std::transform(template_path_lower.begin(), template_path_lower.end(), template_path_lower.begin(),
+                           ::tolower);
+            std::transform(filter_lower.begin(), filter_lower.end(), filter_lower.begin(), ::tolower);
+            if (template_path_lower.find(filter_lower) == std::string::npos) {
+                // Skip this test
+                return;
+            }
+        }
+        LOG_INF("\n\x1b[38;5;126m[%s]\x1b[0m\n%s\n\n", tester_.template_path().c_str(), tc_.input.c_str());
+        test_peg_parser(tester_.tmpls_.get(), [this](peg_test_case & t) { t = tc_; }, tester_.detailed_debug_);
+    }
+};
+
+peg_test_builder peg_tester::test(const std::string & input) {
+    return peg_test_builder(*this, input);
+}
+
 static void test_msgs_oaicompat_json_conversion() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
     std::vector<common_chat_msg> msgs{
         message_user,
         message_user_parts,
@@ -619,54 +947,50 @@ static void test_msgs_oaicompat_json_conversion() {
         message_assist_call_id,
         message_assist_call_idx,
         message_assist_call_python,
-        message_assist_call_code_interpreter,
     };
     for (const auto & msg : msgs) {
-        auto oai_json = common_chat_msgs_to_json_oaicompat({msg});
-        auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json);
+        auto oai_json = common_chat_msgs_to_json_oaicompat({ msg });
+        auto msgs2    = common_chat_msgs_parse_oaicompat(oai_json);
         assert_equals((size_t) 1, msgs2.size());
-        auto msg2 = msgs2[0];
+        const auto & msg2 = msgs2[0];
         assert_msg_equals(msg, msg2);
     }
-    assert_equals(
-        std::string(
-            "[\n"
-            "  {\n"
-            "    \"role\": \"user\",\n"
-            "    \"content\": [\n"
-            "      {\n"
-            "        \"type\": \"text\",\n"
-            "        \"text\": \"Hey\"\n"
-            "      },\n"
-            "      {\n"
-            "        \"type\": \"text\",\n"
-            "        \"text\": \"there\"\n"
-            "      }\n"
-            "    ]\n"
-            "  }\n"
-            "]"
-        ),
-        common_chat_msgs_to_json_oaicompat({message_user_parts}).dump(2));
-
-    assert_equals(
-        std::string(
-            "[\n"
-            "  {\n"
-            "    \"role\": \"assistant\",\n"
-            "    \"content\": \"\",\n"
-            "    \"tool_calls\": [\n"
-            "      {\n"
-            "        \"type\": \"function\",\n"
-            "        \"function\": {\n"
-            "          \"name\": \"python\",\n"
-            "          \"arguments\": \"{\\\"code\\\":\\\"print('hey')\\\"}\"\n"
-            "        }\n"
-            "      }\n"
-            "    ]\n"
-            "  }\n"
-            "]"
-        ),
-        common_chat_msgs_to_json_oaicompat({message_assist_call_python}).dump(2));
+    assert_equals(std::string("[\n"
+                              "  {\n"
+                              "    \"role\": \"user\",\n"
+                              "    \"content\": [\n"
+                              "      {\n"
+                              "        \"type\": \"text\",\n"
+                              "        \"text\": \"Hey\"\n"
+                              "      },\n"
+                              "      {\n"
+                              "        \"type\": \"text\",\n"
+                              "        \"text\": \"there\"\n"
+                              "      }\n"
+                              "    ]\n"
+                              "  }\n"
+                              "]"),
+                  common_chat_msgs_to_json_oaicompat({ message_user_parts }).dump(2));
+
+    // Note: content is "" instead of null due to workaround for templates that render null as "None"
+    assert_equals(std::string("[\n"
+                              "  {\n"
+                              "    \"role\": \"assistant\",\n"
+                              "    \"content\": \"\",\n"
+                              "    \"tool_calls\": [\n"
+                              "      {\n"
+                              "        \"type\": \"function\",\n"
+                              "        \"function\": {\n"
+                              "          \"name\": \"python\",\n"
+                              "          \"arguments\": {\n"
+                              "            \"code\": \"print('hey')\"\n"
+                              "          }\n"
+                              "        }\n"
+                              "      }\n"
+                              "    ]\n"
+                              "  }\n"
+                              "]"),
+                  common_chat_msgs_to_json_oaicompat({ message_assist_call_python }).dump(2));
 
     auto res = common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\", \"tool_calls\": []}]"));
     assert_equals<size_t>(1, res.size());
@@ -685,16 +1009,15 @@ static void test_msgs_oaicompat_json_conversion() {
 }
 
 static void test_tools_oaicompat_json_conversion() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
     std::vector<common_chat_tool> tools{
         special_function_tool,
         python_tool,
-        code_interpreter_tool,
     };
 
     for (const auto & tool : tools) {
-        auto oai_json = common_chat_tools_to_json_oaicompat({tool});
-        auto tools2 = common_chat_tools_parse_oaicompat(oai_json);
+        auto oai_json = common_chat_tools_to_json_oaicompat({ tool });
+        auto tools2   = common_chat_tools_parse_oaicompat(oai_json);
         assert_equals((size_t) 1, tools2.size());
         auto tool2 = tools2[0];
         assert_equals(tool.name, tool2.name);
@@ -702,3235 +1025,1052 @@ static void test_tools_oaicompat_json_conversion() {
         assert_equals(json::parse(tool.parameters).dump(2), json::parse(tool2.parameters).dump(2));
     }
 
-    assert_equals(
-        std::string(
-            "[\n"
-            "  {\n"
-            "    \"type\": \"function\",\n"
-            "    \"function\": {\n"
-            "      \"name\": \"special_function\",\n"
-            "      \"description\": \"I'm special\",\n"
-            "      \"parameters\": {\n"
-            "        \"type\": \"object\",\n"
-            "        \"properties\": {\n"
-            "          \"arg1\": {\n"
-            "            \"type\": \"integer\",\n"
-            "            \"description\": \"The arg.\"\n"
-            "          }\n"
-            "        },\n"
-            "        \"required\": [\n"
-            "          \"arg1\"\n"
-            "        ]\n"
-            "      }\n"
-            "    }\n"
-            "  }\n"
-            "]"
-        ),
-        common_chat_tools_to_json_oaicompat({special_function_tool}).dump(2));
+    assert_equals(std::string("[\n"
+                              "  {\n"
+                              "    \"type\": \"function\",\n"
+                              "    \"function\": {\n"
+                              "      \"name\": \"special_function\",\n"
+                              "      \"description\": \"I'm special\",\n"
+                              "      \"parameters\": {\n"
+                              "        \"type\": \"object\",\n"
+                              "        \"properties\": {\n"
+                              "          \"arg1\": {\n"
+                              "            \"type\": \"integer\",\n"
+                              "            \"description\": \"The arg.\"\n"
+                              "          }\n"
+                              "        },\n"
+                              "        \"required\": [\n"
+                              "          \"arg1\"\n"
+                              "        ]\n"
+                              "      }\n"
+                              "    }\n"
+                              "  }\n"
+                              "]"),
+                  common_chat_tools_to_json_oaicompat({ special_function_tool }).dump(2));
+}
+
+static void test_template_output_peg_parsers(bool detailed_debug) {
+    LOG_DBG("%s\n", __func__);
+
+    // JSON schemas
+    const char * invoice_schema = R"({
+        "type": "object",
+        "properties": {
+            "amount": {"type": "number"},
+            "date": {"type": "string"}
+        }
+    })";
 
     {
-        auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func", "description": "A test"}}])"));
-        assert_equals((size_t) 1, tools_no_params.size());
-        assert_equals(std::string("test_func"), tools_no_params[0].name);
-        assert_equals(std::string("A test"), tools_no_params[0].description);
-        assert_equals(std::string("{}"), tools_no_params[0].parameters);
+        // Ministral-3-14B-Reasoning-2512
+        auto tst = peg_tester("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+            .expect_content("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+            .run();
+
+        tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "[THINK]I'm\nthinking[/THINK]"
+               R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})"
+                 R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        tst.test(
+               "[THINK]I need to output the invoice details in JSON[/THINK]"
+               "```json\n"
+               R"({"amount": 123.45, "date": "2025-12-03"})"
+               "\n```")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .json_schema(invoice_schema)
+            .expect_reasoning("I need to output the invoice details in JSON")
+            .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})")
+            .run();
     }
+
     {
-        auto tools_no_desc = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func", "parameters": {"type": "object"}}}])"));
-        assert_equals((size_t) 1, tools_no_desc.size());
-        assert_equals(std::string("test_func"), tools_no_desc[0].name);
-        assert_equals(std::string(""), tools_no_desc[0].description);
+        // NVIDIA Nemotron-3 Nano
+        auto tst = peg_tester("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").enable_thinking(false).expect(message_assist).run();
+
+        tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_NONE)
+            .expect_content("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .run();
+
+        tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "I'm\nthinking\n</think>\n"
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>\n"
+               "<tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "<parameter=arg2>\n2\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Hello, world!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({
+                python_tool
+        })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+            })
+            .run();
+
+        tst.test(
+               "I need to output the invoice details in JSON\n"
+               "</think>\n"
+               R"({"amount": 123.45, "date": "2025-12-03"})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .json_schema(invoice_schema)
+            .expect_reasoning("I need to output the invoice details in JSON")
+            .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})")
+            .run();
     }
+
     {
-        auto tools_minimal = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func"}}])"));
-        assert_equals((size_t) 1, tools_minimal.size());
-        assert_equals(std::string("test_func"), tools_minimal[0].name);
-        assert_equals(std::string(""), tools_minimal[0].description);
-        assert_equals(std::string("{}"), tools_minimal[0].parameters);
+        // CohereForAI Command-R 7B (2024-tool_use)
+        auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug);
+
+        tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>")
+            .expect(message_assist_thoughts_unparsed_r7b)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+               "]<|END_ACTION|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_thoughts_call_idx)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", ")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .is_partial(true)
+            .expect(message_assist_thoughts_partial_call)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|><|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+               "]<|END_ACTION|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_idx)
+            .run();
     }
-}
-
-// for compat; ref: https://github.com/ggml-org/llama.cpp/pull/18961
-struct test_parser_params {
-    common_chat_format       format                = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    common_reasoning_format  reasoning_format      = COMMON_REASONING_FORMAT_NONE;
-    bool                     reasoning_in_content  = false;
-    bool                     thinking_forced_open  = false;
-    bool                     parse_tool_calls      = true;
-};
 
-static common_chat_msg test_chat_parse(const std::string & input, bool is_partial, const test_parser_params & syntax) {
-    common_chat_parser_params params;
-    params.format               = syntax.format;
-    params.reasoning_format     = syntax.reasoning_format;
-    params.reasoning_in_content = syntax.reasoning_in_content;
-    params.thinking_forced_open = syntax.thinking_forced_open;
-    params.parse_tool_calls     = syntax.parse_tool_calls;
-    return common_chat_parse(input, is_partial, params);
-}
+    {
+        // Google Gemma 2 2B - does not support tool calling
+        auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja");
 
-static void test_template_output_parsers() {
-    printf("[%s]\n", __func__);
+        tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run();
 
-    common_chat_templates_inputs inputs_no_tools;
-    inputs_no_tools.messages                = {message_user};
+        tst.test("Line 1\nLine 2\nLine 3").expect(simple_assist_msg("Line 1\nLine 2\nLine 3")).run();
+    }
 
-    common_chat_templates_inputs inputs_tools;
-    inputs_tools.messages                   = {message_user};
-    inputs_tools.tools                      = {special_function_tool};
+    {
+        // Qwen-QwQ-32B (reasoning model)
+        auto tst = peg_tester("models/templates/Qwen-QwQ-32B.jinja");
 
-    common_chat_templates_inputs inputs_tools_builtin;
-    inputs_tools_builtin.messages           = {message_user};
-    inputs_tools_builtin.tools              = {python_tool};
+        // QwQ always has thinking forced open - input starts after the <think>\n in the prompt
+        tst.test("Let me think about this...\n</think>\nThe answer is 42.")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(simple_assist_msg("The answer is 42.", "Let me think about this..."))
+            .run();
 
+        tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run();
+    }
     {
-        // Not supported yet
-        auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja");
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+        // NousResearch-Hermes-2-Pro and Hermes-3 (tool calling models)
+        auto tst = peg_tester("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja", detailed_debug);
+
+        tst.test(
+               "<tool_call>\n"
+               "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "Hello, world!\nWhat's up?<tool_call>\n"
+               "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_content)
+            .run();
+
+        // Note: Hermes template doesn't support thinking/reasoning natively
+        // Note: We only support one tool calling format per template, no alternate formats
     }
     {
-        auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
-        std::vector<std::string>   end_tokens{ "<|END_OF_TURN_TOKEN|>" };
-
-        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, params.format);
-            assert_equals(false, params.thinking_forced_open);
-        }
+        // Test simple content-only template
+        auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja", detailed_debug);
 
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_COMMAND_R7B}));
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_COMMAND_R7B}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ true,
-                    /* .thinking_forced_open = */ false,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_r7b,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_COMMAND_R7B}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_call_idx,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_ACTION|>[\n"
-                "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
-                "]<|END_ACTION|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_no_content,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_ACTION|>[\n"
-                "    {\"tool_call_id\": \"0\", \"tool_name\": \"special",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        test_templates(tmpls.get(), end_tokens, message_assist_call_idx, tools,
-                      "<|START_THINKING|><|END_THINKING|>"
-                      "<|START_ACTION|>[\n"
-                      "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
-                      "]<|END_ACTION|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      COMMON_REASONING_FORMAT_DEEPSEEK);
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "<|START_RESPONSE|>Hello, world!\n"
-                      "What's up?<|END_RESPONSE|>",
-                      /* expect_grammar_triggered= */ false);
-    }
-    // TODO @ngxson : generic tool calls is too costly to maintain, consider removing it in the future
-    {
-        auto tmpls = read_templates("models/templates/google-gemma-2-2b-it.jinja");
-        std::vector<std::string>   end_tokens{ "<end_of_turn>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GENERIC,
-                      common_chat_templates_apply(
-                          read_templates("models/templates/microsoft-Phi-3.5-mini-instruct.jinja").get(),
-                          inputs_tools)
-                          .format);
-
-        // Generic tool calls doesn't generate / parse content-only messages symmetrically.
-
-        assert_equals(
-            simple_assist_msg("{ \"tool_call\" : { \"name\" : \"t"),
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"t",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GENERIC,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_equals(
-            message_assist_empty,
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"t",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-
-        assert_equals(
-            simple_assist_msg("", "", "puppeteer_screenshot", "{\"name\":\"servethehome_homepage\","),
-            test_chat_parse(
-                R"({"tool_call": {"name": "puppeteer_screenshot", "arguments": {"name": "servethehome_homepage",)",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-
-        assert_equals(
-            message_assist_call_empty_args,
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"special_function\"",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-        assert_equals(
-            message_assist_call_cutoff_args,
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"special_function\", \"arguments\" : { \"arg",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "{\n"
-                "  \"response\": \"Hello, world!\\nWhat's up?\"\n"
-                "}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-#if 0
-        test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
-                      "{\n"
-                      "  \"tool_calls\": [\n"
-                      "    {\n"
-                      "      \"name\": \"special_function\",\n"
-                      "      \"arguments\": {\n"
-                      "        \"arg1\": 1\n"
-                      "      },\n"
-                      "      \"id\": \"123456789\"\n"
-                      "    }\n"
-                      "  ],\n"
-                      "  \"content\": \"\"\n"
-                      "}");
-#endif
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
     }
     {
-        auto tmpls = read_templates("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja");
-        std::vector<std::string>   end_tokens{ "</s>" };
+        // IBM Granite (reasoning and tool calling model)
+        auto tst = peg_tester("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
 
-        assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+        tst.test("<think>I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
 
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(
-            tmpls.get(), end_tokens, message_assist_call_id, tools,
-            "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]");
+        // TODO: pending support for WRAPPED_WITH_REASONING
+        // tst.test("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>")
+        //     .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+        //     .expect(message_assist_thoughts)
+        //     .run();
     }
+
     {
-        assert_msg_equals(
-            simple_assist_msg("Réponse", "raisonnement"),
-            test_chat_parse(
-                message_assist_thoughts_unparsed_magistral.content,
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_MAGISTRAL,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
+        // ByteDance-Seed-OSS (reasoning and tool calling model)
+        auto tst = peg_tester("models/templates/ByteDance-Seed-OSS.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        tst.test("<seed:think>I'm thinking about the answer</seed:think>Hello, world!")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(simple_assist_msg("Hello, world!", "I'm thinking about the answer"))
+            .run();
+
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>\n"
+               "<seed:tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "<parameter=arg2>2</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=todo_list>\n"
+               "<parameter=todos>[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({
+                todo_list
+        })
+            .expect_tool_calls({
+                { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+            })
+            .run();
+
+        // single-quote normalization
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=todo_list>\n"
+               "<parameter=todos>[{'item': 'Check stuff', 'selected': false}, {'item': 'Prepare stuff', 'selected': true}]</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({
+                todo_list
+        })
+            .expect_tool_calls({
+                { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+            })
+            .run();
+
+        // tool call with inside quotes
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=edit>\n"
+               "<parameter=filename>\n"
+               "foo.cpp\n"
+               "</parameter>\n"
+               "<parameter=oldString>"
+               "def foo(arg = \"14\"):\n"
+               "    return arg + \"bar\"\n"
+               "\n"
+               "</parameter>\n"
+               "<parameter=newString>"
+               "def foo(arg = \"15\"):\n"
+               "    pass\n"
+               "\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({
+                edit_tool
+        })
+            .expect_tool_calls({
+                { "edit", "{\"filename\": \"foo.cpp\", "
+                    "\"oldString\": \"def foo(arg = \\\"14\\\"):\\n    return arg + \\\"bar\\\"\\n\", "
+                    "\"newString\": \"def foo(arg = \\\"15\\\"):\\n    pass\\n\"}", {}
+                }
+            })
+            .run();
     }
-    {
-        auto tmpls = read_templates("models/templates/Qwen-QwQ-32B.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
 
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-    }
     {
-        auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(
-            COMMON_CHAT_FORMAT_HERMES_2_PRO,
-            common_chat_templates_apply(
-                read_templates("models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja").get(),
-                inputs_tools)
-                .format);
-        assert_equals(
-            COMMON_CHAT_FORMAT_HERMES_2_PRO,
-            common_chat_templates_apply(
-                read_templates("models/templates/Qwen-Qwen2.5-7B-Instruct.jinja").get(),
-                inputs_tools)
-                .format);
-
-        // Test parsing
-        assert_msg_equals(
-            simple_assist_msg("", "", "python", ""),
-            test_chat_parse(
-                "```json\n"
-                "<function_call> { \"name\" : \"python\"",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            simple_assist_msg("Let's call something\n"),
-            test_chat_parse(
-                "Let's call something\n"
-                "<tool_call>{\"name\"",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("Let's call something\n"),
-            test_chat_parse(
-                "Let's call something\n"
-                "<tool_call>{\"name",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                // QwQ-32B's template adds a trailing <think> if add_generation_prompt
-                "I'm\nthinking</think>\n"
-                "<tool_call>{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function name=\"special_function\">\n"
-                "{\"arg1\": 1}\n"
-                "</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tool>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tools>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tools>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<response>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</response>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```xml\n"
-                "<response>\n"
-                "    {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</response>\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```xml\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```json\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```json\n"
-                "\n"
-                "                    <function_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}} \n"
-                "                    </function_call> \n"
-                "``` ",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<json>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</json>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<xml>\n"
-                "  {\n"
-                "    \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}\n"
-                "  }\n"
-                "</xml>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<JSON>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</JSON>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\n  \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        // Test multiple tool calls
-        common_chat_msg message_assist_multiple_calls;
-        message_assist_multiple_calls.role = "assistant";
-        message_assist_multiple_calls.content = "";
-        message_assist_multiple_calls.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
-        message_assist_multiple_calls.tool_calls.push_back({"python", "{\"code\":\"print('hello')\"}", ""});
-
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "{\"name\": \"python\", \"arguments\": {\"code\":\"print('hello')\"}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}</function>\n"
-                "<function=python>{\"code\":\"print('hello')\"}</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "This is not a tool call:",
-                "",
-                "special_function",
-                "{\"arg1\": 1}"),
-            test_chat_parse(
-                "This is not a tool call:\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        // assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-        //     test_chat_parse(
-        //         "I'm\nthinking</think>Hello, world!\nWhat's up?",
-        //         COMMON_CHAT_FORMAT_HERMES_2_PRO));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_md,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ true,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_md_partial,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ true,
-                    /* .thinking_forced_open = */ false,
-                }));
-        assert_msg_equals(message_assist_thoughts_unopened_unparsed,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                      "</tool_call>");
-
-        // Test multiple tool calls with template
-        common_chat_msg message_assist_multiple_calls_template;
-        message_assist_multiple_calls_template.role = "assistant";
-        message_assist_multiple_calls_template.content = "";
-        message_assist_multiple_calls_template.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
-        message_assist_multiple_calls_template.tool_calls.push_back({"python", "{\"code\":\"print('test')\"}", ""});
-
-        test_templates(tmpls.get(), end_tokens, message_assist_multiple_calls_template, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                      "</tool_call>\n"
-                      "<tool_call>\n"
-                      "{\"name\": \"python\", \"arguments\": {\"code\":\"print('test')\"}}\n"
-                      "</tool_call>");
-
-        test_templates(tmpls.get(), end_tokens, message_assist_call_python_lines, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"python\", \"arguments\": {\"code\":\"# This is a program:\\nprint('hey')\"}}\n"
-                      "</tool_call>");
-        assert_msg_equals(
-            simple_assist_msg("", /* reasoning_content= */ "<tool_call>nah uhg</tool_call>"),
-            test_chat_parse(
-                "<think><tool_call>nah uhg</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
+        // Qwen3-Coder (tool calling with XML-style format)
+        auto tst = peg_tester("models/templates/Qwen3-Coder.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>\n"
+               "<tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "<parameter=arg2>\n"
+               "2\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        // Test with code content (multiline)
+        tst.test(
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Hello, world!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                python_tool
+        })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+            })
+            .run();
+
+        // Test with HTML tag content
+        tst.test(
+               "<tool_call>\n"
+               "<function=html>\n"
+               "<parameter=markup>\n"
+               "<html>\n"
+               " <head>\n"
+               "  <title>Hello!</title>\n"
+               " </head>\n"
+               "</html>\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                html_tool
+        })
+            .expect_tool_calls({
+                { "html", "{\"markup\": \"<html>\\n <head>\\n  <title>Hello!</title>\\n </head>\\n</html>\"}", {} },
+            })
+            .run();
+
+        // Test with TODO list (array of objects)
+        tst.test(
+               "<tool_call>\n"
+               "<function=todo_list>\n"
+               "<parameter=todos>\n"
+               "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                todo_list
+        })
+            .expect_tool_calls({
+                { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+            })
+            .run();
     }
     {
-        auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-                      common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-                      common_chat_templates_apply(
-                          read_templates("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja").get(),
-                          inputs_tools_builtin)
-                          .format);
-
-        assert_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LLAMA_3_X}));
-
-        // test_templates(tmpls.get(), end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call_code_interpreter, llama_3_1_tools,
-                      "<|python_tag|>code_interpreter.call(code=\"print('hey')\")");
-        test_templates(tmpls.get(), end_tokens, message_assist_call_python, tools,
-                      "<|python_tag|>python.call(code=\"print('hey')\")");
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"XYZCITY\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
+            .run();
     }
-    {
-        auto tmpls = read_templates("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
 
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
-    }
     {
-        auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
-                      common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
-            common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
-                        common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
-        for (auto is_partial : { false, true }) {
-            assert_equals(
-                message_assist_call,
-                test_chat_parse(
-                    "<function=special_function>{\"arg1\": 1}</function>",
-                    is_partial,
-                    {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
-        }
-
-        assert_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}<",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<function=special_function>{\"arg1\": 1}</function>");
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls_and_reasoning("get_time", "{\"city\":\"Tokyo\"}", "REASONING"))
+            .run();
     }
+
     {
-        auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.2.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "Hello, world!\nnono\nWhat's up?",
-                "",
-                "special_function",
-                "{\"arg1\": 1}"),
-            test_chat_parse(
-                "all\n"
-                "Hello, world!\n"
-                "nono\n"
-                "What's up?>>>special_function\n"
-                "{\"arg1\": 1}\n",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call_python_lines,
-            test_chat_parse(
-                "python\n"
-                "# This is a program:\n"
-                "print('hey')",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call_python_lines_unclosed,
-            test_chat_parse(
-                "python\n"
-                "# This is a program:\n"
-                "print('hey')",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "special_function\n"
-                "{\"arg1\": 1} \n                    ",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "all\n"
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-
-        test_templates(tmpls.get(), end_tokens, message_assist, {},
-                      "all\n"
-                      "Hello, world!\n"
-                      "What's up?",
-                      /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "special_function\n"
-                      "{\"arg1\": 1}");
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "REASONING</think>CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": "
+               "\"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({
+                get_time_tool, get_weather_tool
+        })
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .parallel_tool_calls(true)
+            .expect(message_with_reasoning_content_and_multiple_tool_calls(
+                "REASONING", "CONTENT",
+                { { "get_time", "{\"city\":\"Paris\"}" }, { "get_weather", "{\"city\":\"Paris\"}" } }))
+            .run();
     }
-    {
-        auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja");
-        std::vector<std::string>   end_tokens{ "<|eot_id|>" };
 
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]");
-    }
     {
-        // Original DeepSeek R1 template. Leaves <｜tool▁calls▁begin｜> and others unclosed. Our logic fixes the prompt.
-        auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
-
-        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, params.format);
-            assert_equals(true, params.thinking_forced_open);
-        }
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "<think>I'm\nthinking"),
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I need to remember the correct syntax. It starts with <｜tool▁calls▁begin｜> and ends with"),
-            test_chat_parse(
-                "I need to remember the correct syntax. It starts with <｜tool▁calls▁begin｜> and ends with",
-                /* is_partial= */ true,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unopened_unparsed,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            // Latest template update (ast of 20250209) adds a trailing <think>\n if add_generation_prompt is true.
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        // test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-        //               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-        //               "```json\n"
-        //               "{\"arg1\": 1}\n"
-        //               // Look what's not here: <｜tool▁calls▁end｜> (also missing the <｜end▁of▁sentence｜>, but that is removed lazily by the test's delta logic)
-        //               "```<｜tool▁call▁end｜>",
-        //               /* expect_grammar_triggered= */ true,
-        //               /* test_grammar_if_triggered= */ false);
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test("REASONING</think>\nCONTENT")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(simple_assist_msg("CONTENT", "REASONING\n"))
+            .run();
     }
+
     {
-        // Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
-        auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-
-        assert_msg_equals(message_assist_call_thoughts_unparsed,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>\n\n"
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<｜tool▁calls｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>\n\n"
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>");
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test("CONTENT").expect(simple_assist_msg("CONTENT", "")).run();
     }
+
+    // GLM-4.6 tests - format: <tool_call>function_name\n<arg_key>...</arg_key>\n<arg_value>...</arg_value>\n</tool_call>
     {
-        auto tmpls = read_templates("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja");
-        std::vector<std::string> end_tokens{ "<|end_of_text|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
-        assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(simple_assist_msg("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>"),
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(message_assist_empty,
-            test_chat_parse(
-                "<think",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_empty,
-            test_chat_parse(
-                "<think",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(message_assist_thoughts_no_content,
-            test_chat_parse(
-                "<think>I'm\nthinking",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(
-            message_assist_empty,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_empty_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\"",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_cutoff_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_cutoff_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(
-            message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-    // TODO @ngxson : generic tool call should be removed in the future
-#if 0
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
-                      "{\n"
-                      "  \"tool_calls\": [\n"
-                      "    {\n"
-                      "      \"name\": \"special_function\",\n"
-                      "      \"arguments\": {\n"
-                      "        \"arg1\": 1\n"
-                      "      },\n"
-                      "      \"id\": \"123456789\"\n"
-                      "    }\n"
-                      "  ],\n"
-                      "  \"content\": \"\"\n"
-                      "}",
-                      /* expect_grammar_triggered= */ false
-        );
-#endif
-    }
+        auto tst = peg_tester("models/templates/GLM-4.6.jinja", detailed_debug);
+        tst.test(
+               "<tool_call>special_function\n"
+               "<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // GLM-4.7-Flash tests - format: <tool_call>function_name<arg_key>...</arg_key><arg_value>...</arg_value></tool_call>
+    // Note: Template uses forced-open thinking mode (prompt ends with <think>)
     {
-        auto tmpls = read_templates("models/templates/openai-gpt-oss-120b.jinja");
-        std::vector<std::string> end_tokens{ "<|return|>", "<|call|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        assert_msg_equals(simple_assist_msg("", "I'm\nthink"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthink",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>analysis to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-
-        // Test parse_tool_calls == false
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-
-        // Test reasoning formats
-        assert_msg_equals(
-            simple_assist_msg(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
-                }));
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ true,
-                }));
-
-        // Test tool calling in role header
-        assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                " to=functions.special_function<|channel|>commentary <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                " to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-    }
+        auto tst = peg_tester("models/templates/GLM-4.7-Flash.jinja", detailed_debug);
+
+        // Pure content (no reasoning)
+        tst.test("Hello, world!\nWhat's up?")
+            .enable_thinking(false)
+            .expect(message_assist)
+            .run();
+
+        // Reasoning with content (forced-open mode - input starts after <think>)
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Tool call without reasoning
+        tst.test(
+               "<tool_call>special_function"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "</tool_call>")
+            .enable_thinking(false)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with reasoning (forced-open mode)
+        tst.test(
+               "I'm\nthinking</think>"
+               "<tool_call>special_function"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // String argument starting with '[' - should NOT be treated as JSON array
+        // This tests the fix for Godot scene files and similar content
+        tst.test(
+               "<tool_call>html"
+               "<arg_key>markup</arg_key><arg_value>[gd_scene load_steps=3 format=3]</arg_value>"
+               "</tool_call>")
+            .enable_thinking(false)
+            .tools({ html_tool })
+            .expect_tool_calls({
+                { "html", "{\"markup\": \"[gd_scene load_steps=3 format=3]\"}", {} },
+            })
+            .run();
+
+        // Multiple tool calls
+        // Note: Parallel tool calls streaming test skipped - the KEY_VALUE_TAGS format has
+        // partial parsing edge cases when function names share common prefixes (special_function vs special_function_with_opt)
+        // The grammar and full parsing work correctly, but incremental streaming detection needs more work.
+    }
+
+    // Kimi-K2-Thinking tests - FUNC_PREFIXED_INDEXED format
     {
-        // Seed-OSS format tests
-        auto tmpls = read_templates("models/templates/ByteDance-Seed-OSS.jinja");
-        std::vector<std::string> end_tokens{ "<seed:eos>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-
-        // Test simple reasoning content
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!", "I'm thinking about the answer"),
-            test_chat_parse(
-                "<seed:think>I'm thinking about the answer</seed:think>Hello, world!",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test budget reflection tags
-        common_chat_msg msg_budget_reflect;
-        msg_budget_reflect.role = "assistant";
-        msg_budget_reflect.content = "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>I need to calculate this step by step.";
-        msg_budget_reflect.reasoning_content = "Token usage: 45/1000\nI should continue thinking to find the best solution.";
-        assert_msg_equals(
-            msg_budget_reflect,
-            test_chat_parse(
-                "<seed:think>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:think>"
-                "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>"
-                "I need to calculate this step by step.",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test tool calls with Seed-OSS format
-        common_chat_msg msg_tool_call;
-        msg_tool_call.role = "assistant";
-        msg_tool_call.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
-        assert_msg_equals(
-            msg_tool_call,
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1, 2, 3]</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
-
-        // Test reasoning + tool call combination
-        common_chat_msg msg_reasoning_tool;
-        msg_reasoning_tool.role = "assistant";
-        msg_reasoning_tool.content = "";
-        msg_reasoning_tool.reasoning_content = "I need to calculate the sum of these numbers";
-        msg_reasoning_tool.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
-        assert_msg_equals(
-            msg_reasoning_tool,
-            test_chat_parse(
-                "<seed:think>I need to calculate the sum of these numbers</seed:think>"
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1, 2, 3]</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test deltas: the number of tool calls in partial parses should never decrease
-        std::string tool_msg = "<seed:tool_call>\n"
-            "<function=fun>\n"
-            "<parameter=smth>[1, 2, 3]</parameter>\n"
-            "</function>";
-        std::size_t previousToolCalls = 0;
-        for (std::size_t i = std::string("<seed:tool_call>").length(); i < tool_msg.length() - 1; i++) {
-            auto partial = tool_msg.substr(0, i);
-            auto partial_res = test_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK });
-            if (partial_res.tool_calls.size() < previousToolCalls) {
-                throw std::runtime_error("Tool call size decreased on partial: " + partial + " from " + std::to_string(previousToolCalls) + " to " + std::to_string(partial_res.tool_calls.size()));
-            }
-            previousToolCalls = partial_res.tool_calls.size();
-        }
-
-        // Test multiple parameters in tool call
-        common_chat_msg msg_multi_param;
-        msg_multi_param.role = "assistant";
-        msg_multi_param.tool_calls.push_back({"process_data", "{\"input\": \"test\", \"format\": \"json\"}", ""});
-        assert_msg_equals(
-            msg_multi_param,
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=process_data>\n"
-                "<parameter=input>test</parameter>\n"
-                "<parameter=format>json</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
-
-        // Test partial parsing for incomplete tool call - don't actually add the call until parsing parameters is done
-        assert_msg_equals(
-            simple_assist_msg("", "", "calculate_sum", "{\"numbers\":"),
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1,\n",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
-
-        // Test incomplete reasoning tag
-        assert_msg_equals(
-            simple_assist_msg("", "I was thinking"),
-            test_chat_parse(
-                "<seed:think>I was thinking",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test content without reasoning
-        assert_msg_equals(
-            simple_assist_msg("This is a simple response without reasoning."),
-            test_chat_parse(
-                "This is a simple response without reasoning.",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
+        auto tst = peg_tester("models/templates/Kimi-K2-Thinking.jinja", detailed_debug);
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
     }
-    {
-        auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-Nano-v2.jinja");
-        std::vector<std::string> end_tokens{ "<SPECIAL_12>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
 
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?\n",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                      /* expect_grammar_triggered= */ true
-        );
-    }
+    // Apertus-8B-Instruct tests - FUNC_NAME_AS_KEY format
+    // Format: <|tools_prefix|>[{"function_name": {...arguments...}}]<|tools_suffix|>
     {
-        auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-V3.1.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
-
-        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, params.format);
-            assert_equals(true, params.thinking_forced_open);
-        }
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        // variant: thinking forced open, reasoning_format none
-        assert_msg_equals(
-            simple_assist_msg("REASONING</think>ok", ""),
-            test_chat_parse(
-                "REASONING</think>ok",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: happy path for when it works as the model card says it should
-        assert_msg_equals(
-            simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: simple + thinking open
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: simple + multiple tool calls
-        common_chat_msg message_assist_multiple_calls;
-        message_assist_multiple_calls.role = "assistant";
-        message_assist_multiple_calls.content = "CONTENT";
-        message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""});
-        message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""});
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
-        //          This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
-        //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
-        //          add the reasoning content as regular content and parse the tool calls.
-        assert_msg_equals(
-            simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content + no closing think + partial
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>", "", ""),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ true,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking not forced open + missing reasoning + no tool calls
-        assert_msg_equals(
-            simple_assist_msg("CONTENT", ""),
-            test_chat_parse(
-                "CONTENT",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
+        auto tst = peg_tester("models/templates/Apertus-8B-Instruct.jinja", detailed_debug);
+        tst.test("<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
     }
-    {
-        auto tmpls = read_templates("models/templates/Apertus-8B-Instruct.jinja");
-        std::vector<std::string> end_tokens{ "<|assistant_end|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
 
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
+    // MiniMax-M2 tests - XML invoke format with parameter tags
+    // Format: <minimax:tool_call><invoke name="func"><parameter name="key">value</parameter></invoke></minimax:tool_call>
+    {
+        auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug);
+        tst.test(
+               "<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter "
+               "name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
 
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                      /* expect_grammar_triggered= */ true
-        );
+    // NVIDIA-Nemotron-Nano-v2 tests - <TOOLCALL>...</TOOLCALL> format
+    // Format: <TOOLCALL>[{"name": "func", "arguments": {...}}]</TOOLCALL>
+    {
+        auto tst = peg_tester("models/templates/NVIDIA-Nemotron-Nano-v2.jinja", detailed_debug);
+        tst.test("<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL><SPECIAL_12>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
 
-        // TODO @ngxson : not sure why this fails, but not very important for now
-        // assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get()));
+    // CohereForAI-c4ai-command-r7b (uses START_RESPONSE/END_RESPONSE, START_THINKING/END_THINKING, START_ACTION/END_ACTION)
+    {
+        auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug);
+        tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run();
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+               "]<|END_ACTION|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_thoughts_call_idx)
+            .run();
+    }
+    // CohereForAI-c4ai-command-r-plus (uses markdown code block format)
+    {
+        auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja", detailed_debug);
+        tst.test("<|CHATBOT_TOKEN|>Hello, world!\nWhat's up?<|END_OF_TURN_TOKEN|>").expect(message_assist).run();
+        // Tool calls: Action: followed by JSON code block
+        tst.test(
+               "Action:\n"
+               "```json\n"
+               "[{\"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}]\n"
+               "```")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // mistralai-Mistral-Nemo-Instruct-2407.jinja
+    {
+        auto tst = peg_tester("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_id)
+            .run();
     }
     {
-        // LFM2 format tests
-        auto tmpls = read_templates("models/templates/llama-cpp-lfm2.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
-
-        auto inputs_tools_forced_json_schema = std::invoke([&]() -> common_chat_templates_inputs {
-            common_chat_templates_inputs inputs;
-            inputs.messages = {
-                std::invoke([&]() -> common_chat_msg {
-                    common_chat_msg msg;
-                    msg.role = "system";
-                    msg.content = "force json schema.\n";
-                    return msg;
-                }),
-                message_user,
-            };
-            inputs.tools = {special_function_tool};
-            return inputs;
-        });
-
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_no_tools);
-            assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
-            assert_equals(false, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-        }
-
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_tools);
-            assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
-            assert_equals(false, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>system
-List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
-<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-            assert_equals(true, params.grammar.empty());
-        }
-
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_tools_forced_json_schema);
-            assert_equals(COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, params.format);
-            assert_equals(true, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>system
-List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
-<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-            assert_equals(false, params.grammar.empty());
-        }
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test single tool call with JSON format
-        common_chat_msg msg_single_tool_call;
-        msg_single_tool_call.role = "assistant";
-        msg_single_tool_call.tool_calls.push_back({"special_function", "{\"arg1\":1}", ""});
-        assert_msg_equals(
-            msg_single_tool_call,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with string argument
-        common_chat_msg msg_tool_call_string;
-        msg_tool_call_string.role = "assistant";
-        msg_tool_call_string.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_tool_call_string,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with multiple arguments
-        common_chat_msg msg_multi_args;
-        msg_multi_args.role = "assistant";
-        msg_multi_args.tool_calls.push_back({"calculate", "{\"x\":10,\"y\":20,\"operation\":\"add\"}", ""});
-        assert_msg_equals(
-            msg_multi_args,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"calculate\", \"arguments\": {\"x\": 10, \"y\": 20, \"operation\": \"add\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test multiple tool calls in single array
-        common_chat_msg msg_multiple_tools;
-        msg_multiple_tools.role = "assistant";
-        msg_multiple_tools.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        msg_multiple_tools.tool_calls.push_back({"get_time", "{\"timezone\":\"UTC\"}", ""});
-        assert_msg_equals(
-            msg_multiple_tools,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}, {\"name\": \"get_time\", \"arguments\": {\"timezone\": \"UTC\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with content before
-        common_chat_msg msg_content_before_tool;
-        msg_content_before_tool.role = "assistant";
-        msg_content_before_tool.content = "Let me check the weather for you.";
-        msg_content_before_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_content_before_tool,
-            test_chat_parse(
-                "Let me check the weather for you.<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with content after
-        common_chat_msg msg_content_after_tool;
-        msg_content_after_tool.role = "assistant";
-        msg_content_after_tool.content = "Here's the result.";
-        msg_content_after_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_content_after_tool,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>Here's the result.",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with newlines (common in LLM output)
-        common_chat_msg msg_tool_call_newlines;
-        msg_tool_call_newlines.role = "assistant";
-        msg_tool_call_newlines.tool_calls.push_back({"get_current_time", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_tool_call_newlines,
-            test_chat_parse(
-                "<|tool_call_start|>[{\n    \"name\": \"get_current_time\",\n    \"arguments\": {\n        \"location\": \"Paris\"\n    }\n}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Note: LFM2 uses JSON format for tool calls: [{"name": "...", "arguments": {...}}]
-        // Unlike other formats, LFM2 template does not render tool calls in conversation history,
-        // so we don't use test_templates() for tool call generation. Instead, the parsing tests
-        // above verify edge cases and format variations for the tool call output format.
+        auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.1.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("<function=special_function>{\"arg1\": 1}</function>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+    // Functionary v3.2 - recipient-based format: >>>recipient\n{content}
+    {
+        auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.2.jinja", detailed_debug);
+        tst.test(">>>all\nHello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test(">>>special_function\n{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
     }
 
+    // FireFunction
     {
-        auto tmpls = read_templates("models/templates/MiniMax-M2.jinja");
-        std::vector<std::string> end_tokens{ "[e~[" };
-
-        assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "<think>I'm\nthinking</think>\n\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
+        auto tst = peg_tester("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test(" functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
     }
 
+    // DeepSeek R1 Distill Llama 8B - reasoning tests only (forced open thinking)
+    // Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
     {
-        auto tmpls = read_templates("models/templates/GLM-4.6.jinja");
-        std::vector<std::string>   end_tokens{ "<|assistant|>", "<|observation|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>\nHello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }), true);
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}), true);
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }), true);
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}
-            ), true);
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }), true);
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "\n<think>I'm\nthinking</think>\n\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-            test_parser_with_streaming(
-                simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<tool_call>complex_function\n"
-                "<arg_key>name</arg_key>\n"
-                "<arg_value>John Doe</arg_value>\n"
-                "<arg_key>age</arg_key>\n"
-                "<arg_value>30</arg_value>\n"
-                "<arg_key>active</arg_key>\n"
-                "<arg_value>true</arg_value>\n"
-                "<arg_key>score</arg_key>\n"
-                "<arg_value>95.5</arg_value>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
-                "<tool_call>web_search\n"
-                "<arg_key>query</arg_key>\n"
-                "<arg_value>\"From Zero\" Linkin Park album tracklist complete songs</arg_value>\n"
-                "<arg_key>limit</arg_key>\n"
-                "<arg_value>3</arg_value>\n"
-                "<arg_key>type</arg_key>\n"
-                "<arg_value>text</arg_value>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
-
-        // Test interleaved thinking
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\n\nWhat's up?", "I'm\nthinkingThinking2", "special_function", "{\"arg1\": 1}"),
-            "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?", "", "special_function", "{\"arg1\": 1}"),
-            "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "\n<think></think>\nHello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "\n<think></think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?")
+            .enable_thinking(true)  // Forced open
+            .expect(message_assist)
+            .run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+    }
+    // llama-cpp DeepSeek R1 template (always forced-open thinking)
+    {
+        auto tst = peg_tester("models/templates/llama-cpp-deepseek-r1.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+        tst.test(
+               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
+               "```json\n{\"arg1\": 1}```<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({ special_function_tool })
+            .parallel_tool_calls(true)
+            .expect(message_assist_call)
+            .run();
+    }
+    // DeepSeek R1 Distill Qwen 32B - reasoning tests only (forced open thinking)
+    // Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").enable_thinking(true).expect(message_assist).run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+        tst.test(
+               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
+               "```json\n{\"arg1\": 1}```<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+    // Kimi-K2 (moonshotai) - FUNC_PREFIXED_INDEXED format
+    {
+        auto tst = peg_tester("models/templates/moonshotai-Kimi-K2.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+    // Kimi-K2-Instruct - FUNC_PREFIXED_INDEXED format
+    {
+        auto tst = peg_tester("models/templates/Kimi-K2-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // MiMo-VL / Hermes 3 / Qwen 2.5 (Common <tool_call> JSON format)
+    for (const auto & path :
+         { "models/templates/MiMo-VL.jinja", "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
+           "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja" }) {
+        auto tst = peg_tester(path, detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("<tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // Apriel 1.5
+    {
+        auto tst = peg_tester("models/templates/unsloth-Apriel-1.5.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("<tool_calls>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</tool_calls>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
     }
 
+    // Apriel 1.6 Thinker (reasoning-only support)
     {
-        auto tmpls = read_templates("models/templates/Kimi-K2-Thinking.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "<think>I'm\nthinking</think>\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": \"123456\"}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": \"123456\"}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": [1, 2, \"345\", 6]}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": [1, 2, \"345\", 6]}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:0<|tool_call_argument_begin|>"
-                "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg(
-                        "Let me start by examining the relevant files to understand the current implementation.", "",
-                        "read_file",
-                        "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}"),
-                "Let me start by examining the relevant files to understand the current implementation."
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        auto multi_tool_msg = simple_assist_msg("Let me call multiple tools.", "I'm thinking.");
-        multi_tool_msg.tool_calls.push_back({ "read_file", "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}", "" });
-        multi_tool_msg.tool_calls.push_back({ "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}", "" });
-        multi_tool_msg.tool_calls.push_back({ "complex_function", "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}", "" });
-        multi_tool_msg.tool_calls.push_back({ "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" });
-        test_parser_with_streaming(multi_tool_msg,
-                "<think>I'm thinking.</think>Let me call multiple tools."
-                "<|tool_calls_section_begin|>"
-                "<|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>"
-                "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.complex_function:2<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.emoji_function:3<|tool_call_argument_begin|>"
-                "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}"
-                "<|tool_call_end|>"
-                "<|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "I'm thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("Hello", "I'm thinkingI'm still thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>I'm still thinking</think>Hello",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-
-        // Test template rendering
-        common_chat_templates_inputs conversation_with_tools = inputs_tools;
-        conversation_with_tools.messages.push_back(simple_assist_msg("Let's do it", "Think first", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 1",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "complex_function",
-            /* .tool_call_id = */ "",
-        });
-        conversation_with_tools.messages.push_back(simple_assist_msg("Continue", "Think next", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 2",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "web_search",
-            /* .tool_call_id = */ "",
-        });
-        conversation_with_tools.messages.push_back(simple_assist_msg("CC", "Think last", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 3",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "read_file",
-            /* .tool_call_id = */ "",
-        });
-        assert_equals(common_chat_templates_apply(tmpls.get(), conversation_with_tools).prompt, std::string("<|im_system|>tool_declare<|im_middle|>[{\"type\": \"function\", \"function\": {\"name\": \"special_function\", \"description\": \"I'm special\", \"parameters\": {\"type\": \"object\", \"properties\": {\"arg1\": {\"type\": \"integer\", \"description\": \"The arg.\"}}, \"required\": [\"arg1\"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hey there!<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think first</think>Let's do it<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>complex_function<|im_middle|>## Return of functions.complex_function:0\nTool response 1<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think next</think>Continue<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>web_search<|im_middle|>## Return of functions.web_search:1\nTool response 2<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think last</think>CC<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:2<|tool_call_argument_begin|>{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>read_file<|im_middle|>## Return of functions.read_file:2\nTool response 3<|im_end|><|im_assistant|>assistant<|im_middle|>"));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "<think></think>Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-    }
-
-    // Test Qwen3-Coder XML format
+        auto tst = peg_tester("models/templates/Apriel-1.6-15b-Thinker-fixed.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Implicit reasoning start (forced open)
+        tst.test("I'm\nthinking\n[BEGIN FINAL RESPONSE]\nHello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Reasoning + Tool calls
+        tst.test(
+               "I'm\nthinking\n[BEGIN FINAL RESPONSE]\n<tool_calls>[{\"name\": \"special_function\", \"arguments\": "
+               "{\"arg1\": 1}}]</tool_calls>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+    }
+
+    // Mistral Small 3.2 - FUNC_BRACKET_TAG format: [TOOL_CALLS]func_name[CALL_ID]id[ARGS]{...}
     {
-        // Basic XML tool call parsing
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tool_call>\n"
-                "  <function=special_function>\n"
-                "    <parameter=arg1>\n"
-                "      1\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
-
-        // Multiple parameters with different types
-        common_chat_msg expected_multi_param;
-        expected_multi_param.role = "assistant";
-        expected_multi_param.tool_calls = {
-            { "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}", "" }
-        };
-
-        test_parser_with_streaming(expected_multi_param,
-                "<tool_call>\n"
-                "  <function=complex_function>\n"
-                "    <parameter=name>\n"
-                "      John Doe\n"
-                "    </parameter>\n"
-                "    <parameter=age>\n"
-                "      30\n"
-                "    </parameter>\n"
-                "    <parameter=active>\n"
-                "      true\n"
-                "    </parameter>\n"
-                "    <parameter=score>\n"
-                "      95.5\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Special characters and Unicode
-        common_chat_msg expected_special_chars;
-        expected_special_chars.role = "assistant";
-        expected_special_chars.tool_calls = {
-            { "unicode_function", "{\"message\":\"Hello 世界! 🌍 Special chars: @#$%^&*()\"}", "" }
-        };
-
-        test_parser_with_streaming(expected_special_chars,
-                "<tool_call>\n"
-                "  <function=unicode_function>\n"
-                "    <parameter=message>\n"
-                "      Hello 世界! 🌍 Special chars: @#$%^&*()\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Multiline content with newlines and indentation
-        common_chat_msg expected_multiline;
-        expected_multiline.role = "assistant";
-        expected_multiline.tool_calls = {
-            { "code_function", "{\"code\":\"def hello():\\n    print(\\\"Hello, World!\\\")\\n    return True\"}", "" }
-        };
-
-        test_parser_with_streaming(expected_multiline,
-                "<tool_call>\n"
-                "  <function=code_function>\n"
-                "    <parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, World!\")\n"
-                "    return True\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // JSON object as parameter value
-        common_chat_msg expected_json_param;
-        expected_json_param.role = "assistant";
-        expected_json_param.tool_calls = {
-            { "json_function", "{\"config\":{\"host\":\"localhost\",\"port\":8080,\"ssl\":false}}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_json_param,
-                "<tool_call>\n"
-                "  <function=json_function>\n"
-                "    <parameter=config>\n"
-                "      {\"host\": \"localhost\", \"port\": 8080, \"ssl\": false}\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Array as parameter value
-        common_chat_msg expected_array_param;
-        expected_array_param.role = "assistant";
-        expected_array_param.tool_calls = {
-            { "array_function", "{\"items\":[\"apple\",\"banana\",\"cherry\"]}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_array_param,
-                "<tool_call>\n"
-                "  <function=array_function>\n"
-                "    <parameter=items>\n"
-                "      [\"apple\", \"banana\", \"cherry\"]\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Empty parameter
-        common_chat_msg expected_empty_param;
-        expected_empty_param.role = "assistant";
-        expected_empty_param.tool_calls = {
-            { "empty_function", "{\"empty_param\":\"\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_empty_param,
-                "<tool_call>\n"
-                "  <function=empty_function>\n"
-                "    <parameter=empty_param>\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Boolean values (true/false)
-        common_chat_msg expected_boolean;
-        expected_boolean.role = "assistant";
-        expected_boolean.tool_calls = {
-            { "boolean_function", "{\"enabled\":true,\"debug\":false}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_boolean,
-                "<tool_call>\n"
-                "  <function=boolean_function>\n"
-                "    <parameter=enabled>\n"
-                "      true\n"
-                "    </parameter>\n"
-                "    <parameter=debug>\n"
-                "      false\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Null value
-        common_chat_msg expected_null;
-        expected_null.role = "assistant";
-        expected_null.tool_calls = {
-            { "null_function", "{\"optional_param\":null}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_null,
-                "<tool_call>\n"
-                "  <function=null_function>\n"
-                "    <parameter=optional_param>\n"
-                "      null\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Negative numbers and scientific notation
-        common_chat_msg expected_numbers;
-        expected_numbers.role = "assistant";
-        expected_numbers.tool_calls = {
-            { "math_function", "{\"negative\":-42,\"decimal\":-3.14,\"scientific\":1.23e-4}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_numbers,
-                "<tool_call>\n"
-                "  <function=math_function>\n"
-                "    <parameter=negative>\n"
-                "      -42\n"
-                "    </parameter>\n"
-                "    <parameter=decimal>\n"
-                "      -3.14\n"
-                "    </parameter>\n"
-                "    <parameter=scientific>\n"
-                "      1.23e-4\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // XML-like content in parameters (should be escaped)
-        common_chat_msg expected_xml_content;
-        expected_xml_content.role = "assistant";
-        expected_xml_content.tool_calls = {
-            { "xml_function", "{\"xml_content\":\"<root><item>value</item></root>\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_xml_content,
-                "<tool_call>\n"
-                "  <function=xml_function>\n"
-                "    <parameter=xml_content>\n"
-                "      <root><item>value</item></root>\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Quotes and escape characters
-        common_chat_msg expected_quotes;
-        expected_quotes.role = "assistant";
-        expected_quotes.tool_calls = {
-            { "quote_function", "{\"message\":\"She said \\\"Hello!\\\" and left.\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_quotes,
-                "<tool_call>\n"
-                "  <function=quote_function>\n"
-                "    <parameter=message>\n"
-                "      She said \"Hello!\" and left.\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Long parameter value (simplified)
-        std::string long_text = "This is a long text parameter that should test the parser's ability to handle larger amounts of text data.";
-
-        common_chat_msg expected_long_text;
-        expected_long_text.role = "assistant";
-        expected_long_text.tool_calls = {
-            { "long_function", "{\"long_text\":\"" + long_text + "\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_long_text,
-                "<tool_call>\n"
-                "  <function=long_function>\n"
-                "    <parameter=long_text>\n"
-                "      " + long_text + "\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Mixed content with text before and after tool call
-        common_chat_msg expected_mixed_content;
-        expected_mixed_content.role = "assistant";
-        expected_mixed_content.content = "I'll help you search for products. ";
-        expected_mixed_content.tool_calls = {
-            { "search_function", "{\"query\":\"laptops\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_mixed_content,
-                "I'll help you search for products. <tool_call>\n"
-                "  <function=search_function>\n"
-                "    <parameter=query>\n"
-                "      laptops\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Compact format (no extra whitespace)
-        common_chat_msg expected_compact;
-        expected_compact.role = "assistant";
-        expected_compact.tool_calls = {
-            { "compact_function", "{\"param\":\"value\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_compact,
-                "<tool_call><function=compact_function><parameter=param>value</parameter></function></tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Function name with underscores and numbers
-        common_chat_msg expected_complex_name;
-        expected_complex_name.role = "assistant";
-        expected_complex_name.tool_calls = {
-            { "get_user_data_v2", "{\"user_id\":12345}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_complex_name,
-                "<tool_call>\n"
-                "  <function=get_user_data_v2>\n"
-                "    <parameter=user_id>\n"
-                "      12345\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Parameter names with underscores and numbers
-        common_chat_msg expected_complex_params;
-        expected_complex_params.role = "assistant";
-        expected_complex_params.tool_calls = {
-            { "test_function", "{\"param_1\":\"value1\",\"param_2_name\":\"value2\",\"param3\":123}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_complex_params,
-                "<tool_call>\n"
-                "  <function=test_function>\n"
-                "    <parameter=param_1>\n"
-                "      value1\n"
-                "    </parameter>\n"
-                "    <parameter=param_2_name>\n"
-                "      value2\n"
-                "    </parameter>\n"
-                "    <parameter=param3>\n"
-                "      123\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Very deeply nested XML content in parameter
-        common_chat_msg expected_deep_xml;
-        expected_deep_xml.role = "assistant";
-        expected_deep_xml.tool_calls = {
-            { "xml_parser", "{\"xml\":\"<root><level1><level2><level3>deep content</level3></level2></level1></root>\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_deep_xml,
-                "<tool_call>\n"
-                "  <function=xml_parser>\n"
-                "    <parameter=xml>\n"
-                "      <root><level1><level2><level3>deep content</level3></level2></level1></root>\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Parameter with only whitespace
-        common_chat_msg expected_whitespace_param;
-        expected_whitespace_param.role = "assistant";
-        expected_whitespace_param.tool_calls = {
-            { "whitespace_function", "{\"spaces\":\"\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_whitespace_param,
-                "<tool_call>\n"
-                "  <function=whitespace_function>\n"
-                "    <parameter=spaces>\n"
-                "      \n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Parameter with tabs and mixed whitespace
-        common_chat_msg expected_mixed_whitespace;
-        expected_mixed_whitespace.role = "assistant";
-        expected_mixed_whitespace.tool_calls = {
-            { "tab_function", "{\"content\":\"line1\\n\\tindented line\\n    spaces\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_mixed_whitespace,
-                "<tool_call>\n"
-                "  <function=tab_function>\n"
-                "    <parameter=content>\n"
-                "line1\n"
-                "\tindented line\n"
-                "    spaces\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Control characters and special Unicode
-        common_chat_msg expected_control_chars;
-        expected_control_chars.role = "assistant";
-        expected_control_chars.tool_calls = {
-            { "control_function", "{\"text\":\"Line1\\nLine2\\tTabbed\\rCarriage return\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_control_chars,
-                "<tool_call>\n"
-                "  <function=control_function>\n"
-                "    <parameter=text>\n"
-                "Line1\nLine2\tTabbed\rCarriage return\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Emoji and extended Unicode characters
-        common_chat_msg expected_emoji;
-        expected_emoji.role = "assistant";
-        expected_emoji.tool_calls = {
-            { "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_emoji,
-                "<tool_call>\n"
-                "  <function=emoji_function>\n"
-                "    <parameter=message>\n"
-                "      Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Mathematical expressions and formulas
-        common_chat_msg expected_math;
-        expected_math.role = "assistant";
-        expected_math.tool_calls = {
-            { "math_function", "{\"formula\":\"E = mc² and ∫f(x)dx = F(x) + C\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_math,
-                "<tool_call>\n"
-                "  <function=math_function>\n"
-                "    <parameter=formula>\n"
-                "      E = mc² and ∫f(x)dx = F(x) + C\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // SQL injection-like content (should be safely escaped)
-        common_chat_msg expected_sql;
-        expected_sql.role = "assistant";
-        expected_sql.tool_calls = {
-            { "sql_function", "{\"query\":\"SELECT * FROM users WHERE id = 1; DROP TABLE users; --\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_sql,
-                "<tool_call>\n"
-                "  <function=sql_function>\n"
-                "    <parameter=query>\n"
-                "      SELECT * FROM users WHERE id = 1; DROP TABLE users; --\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // HTML/XML injection content
-        common_chat_msg expected_html;
-        expected_html.role = "assistant";
-        expected_html.tool_calls = {
-            { "html_function", "{\"content\":\"<script>alert('xss')</script><img src=x onerror=alert(1)>\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_html,
-                "<tool_call>\n"
-                "  <function=html_function>\n"
-                "    <parameter=content>\n"
-                "      <script>alert('xss')</script><img src=x onerror=alert(1)>\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Binary-like content (base64)
-        common_chat_msg expected_binary;
-        expected_binary.role = "assistant";
-        expected_binary.tool_calls = {
-            { "binary_function", "{\"data\":\"SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\"}", "" }
-        };
-
-        test_parser_with_streaming(
-            expected_binary,
-                "<tool_call>\n"
-                "  <function=binary_function>\n"
-                "    <parameter=data>\n"
-                "      SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
-
-        // Very large numbers (should be parsed as scientific notation)
-        common_chat_msg expected_large_numbers;
-        expected_large_numbers.role = "assistant";
-        expected_large_numbers.tool_calls = {
-            { "number_function", "{\"big_int\":1e+60}", "" }  // Large number becomes scientific notation
-        };
-
-        test_parser_with_streaming(
-            expected_large_numbers,
-                "<tool_call>\n"
-                "  <function=number_function>\n"
-                "    <parameter=big_int>\n"
-                "      999999999999999999999999999999999999999999999999999999999999\n"
-                "    </parameter>\n"
-                "  </function>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
+        auto tst = peg_tester("models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("[TOOL_CALLS]special_function[CALL_ID]123456789[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_id)
+            .run();
+    }
+    // Devstral - FUNC_BRACKET_TAG format (no ID marker): [TOOL_CALLS]func_name[ARGS]{...}
+    {
+        auto tst = peg_tester("models/templates/unsloth-mistral-Devstral-Small-2507.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+        tst.test("Hello, world!\nWhat's up?[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_content)
+            .run();
     }
 
     {
-        // Qwen3-Coder template
-        auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja");
-        common_chat_templates_inputs inputs;
-        inputs.messages = { message_user };
-
-        common_chat_tool qwen_union_tool {
-            /* .name = */ "qwen_union",
-            /* .description = */ "Test tool for union/anyOf handling",
-            /* .parameters = */ R"({
-                "type": "object",
-                "properties": {
-                    "priority": { "type": ["number", "null"] },
-                    "maybe_text": { "anyOf": [ { "type": "string" } ] },
-                    "config": { "anyOf": [ { "type": "object" }, { "type": "null" } ] }
-                },
-                "required": []
-            })",
-        };
-        inputs.tools = { qwen_union_tool };
-
-        auto params = common_chat_templates_apply(tmpls.get(), inputs);
-        assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, params.format);
-        assert_equals(false, params.grammar.empty());
-
-        // Grammar should compile successfully
-        auto grammar = build_grammar(params.grammar);
-        GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types");
+        // Llama 3.1
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run();
     }
-}
-
-static void test_template_output_peg_parsers() {
-    printf("[%s]\n", __func__);
-
-    // JSON schemas
-    const char * invoice_schema = R"({
-        "type": "object",
-        "properties": {
-            "amount": {"type": "number"},
-            "date": {"type": "string"}
-        }
-    })";
 
     {
-        // Ministral-3-14B-Reasoning-2512
-        auto tmpls = read_templates("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning with reasoning_format = none
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-            t.expect.content = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-        });
-
-        // Test basic message and reasoning with reasoning_format = auto
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]"
-                      R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call_thoughts;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})"
-                      R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I need to output the invoice details in JSON[/THINK]"
-                      "```json\n"
-                      R"({"amount": 123.45, "date": "2025-12-03"})"
-                      "\n```";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        // Llama 3.2
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run();
     }
 
     {
-        // NVIDIA Nemotron-3 Nano
-        auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning with reasoning_format = none
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-            t.expect.content = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-        });
-
-        // Test basic message and reasoning with reasoning_format = auto
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-            t.params.enable_thinking = true;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "I'm\nthinking\n</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call_thoughts;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "<function=special_function_with_opt>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "<parameter=arg2>\n"
-                "2\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter and no closing </parameter> tag
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-              "I need to output the invoice details in JSON\n"
-              "</think>\n"
-              R"({"amount": 123.45, "date": "2025-12-03"})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        // Llama 3.3
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ python_tool }).expect(message_assist).run();
     }
 
+    // GPT-OSS format tests
     {
-        // Solar-Open-100B
-        auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|><|begin|>assistant<|content|>Hello, world!\nWhat's up?";
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test basic message and reasoning_effort = low
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>Hello, world!\nWhat's up?";
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.expect = message_assist;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|tool_calls|>"
-                      "<|tool_call:begin|>123456789"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.params.tools = {special_function_tool};
-            t.expect = message_assist_call_id;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.expect = message_assist_thoughts_call_idx;
-        });
-
-        // Test tool call with reasoning and tool_choice = required
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-            t.expect = message_assist_thoughts_call_idx;
-        });
-
-        // Test tool call without reasoning and tool_choice = required
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.expect = message_assist_call_idx;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>"
-                      "<|tool_call:begin|>1"
-                      "<|tool_call:name|>special_function_with_opt"
-                      "<|tool_call:args|>{\"arg1\": 1, \"arg2\": 2}"
-                      "<|tool_call:end|>";
-
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.reasoning_content = "I'm\nthinking";
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        "0",
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        "1",
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I need to output the invoice details in JSON<|end|>"
-                      "<|begin|>assistant<|content|>"
-                      R"({"amount": 123.45, "date": "2025-12-03"})";
-
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
-
-        // Test response format no reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>"
-                      R"({"amount": 123.45, "date": "2025-12-03"})";
-
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.params.json_schema = invoice_schema;
-
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        auto tst = peg_tester("models/templates/openai-gpt-oss-120b.jinja", detailed_debug);
+
+        // Basic content only - final channel
+        tst.test("<|channel|>final<|message|>Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Basic content only - commentary channel
+        tst.test("<|channel|>commentary<|message|>Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Analysis channel (reasoning) with final channel (content)
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's "
+               "up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Analysis channel only (partial) - still works when reasoning format is set
+        tst.test("<|channel|>analysis<|message|>I'm\nthinking")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .is_partial(true)
+            .expect_reasoning("I'm\nthinking")
+            .run();
+
+        // Reasoning format none - reasoning stays in content
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's "
+               "up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_NONE)
+            .expect_content(
+                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?")
+            .run();
+
+        // Tool call with recipient in role header: " to=functions.NAME<|channel|>analysis<|message|>JSON"
+        tst.test(" to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with recipient in channel header: "<|channel|>analysis to=functions.NAME<|message|>JSON"
+        tst.test("<|channel|>analysis to=functions.special_function<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with constraint: " to=functions.NAME<|channel|>analysis <|constrain|>json<|message|>JSON"
+        tst.test(" to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call in commentary channel (channel header variant)
+        tst.test("<|channel|>commentary to=functions.special_function<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with reasoning + content (analysis first, then tool call)
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n"
+               "<|start|>assistant to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Tool calling with extra channel before
+        tst.test(
+                "<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>commentary"
+                " to=functions.special_function <|message|>{\"arg1\": 1}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Reasoning after final channel
+        // Tool calling after final channel
+        tst.test(
+            "<|channel|>final<|message|><|end|>"
+            "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit..."
+        )
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect_reasoning("Thinking about edit...")
+            .expect_content("")
+            .run();
+
+        // Tool calling after final channel
+        tst.test(
+            "<|channel|>final<|message|><|end|>"
+            "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit...<|end|>"
+            "<|start|>assistant<|channel|>commentary to=functions.edit <|constrain|>json"
+            "<|message|>{\"filePath\": \"file.js\", \"oldString\": \"if (part < railCount - 1) {\", \"newString\": \"if (part < 4) {\", \"replaceAll\": false}"
+            )
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({
+                {
+                    /* .name = */ "edit",
+                    /* .description = */ "Edit a file",
+                    /* .parameters = */ R"({
+                        "type": "object",
+                        "properties": {
+                            "oldString": {
+                                "type": "string",
+                                "description": "Old string to replace."
+                            },
+                            "newString": {
+                                "type": "string",
+                                "description": "New replacement string."
+                            },
+                            "replaceAll": {
+                                "type": "boolean",
+                                "description": "Whether to replace all occurences."
+                            }
+                        },
+                        "required": ["oldString", "newString"]
+                    })",
+                }
+            })
+            .expect_reasoning("Thinking about edit...")
+            .expect_tool_calls({
+                { "edit", R"({"filePath": "file.js", "oldString": "if (part < railCount - 1) {", "newString": "if (part < 4) {", "replaceAll": false})", {} }
+            })
+            .run();
+
+        // Parallel tool calls
+        tst.test(
+               " to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}\n"
+               "<|start|>assistant to=functions.special_function_with_opt<|channel|>analysis<|message|>{\"arg1\": 1, "
+               "\"arg2\": 2}")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
     }
 }
 
 static void test_msg_diffs_compute() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
     {
         common_chat_msg msg1;
 
@@ -3940,9 +2080,7 @@ static void test_msg_diffs_compute() {
         common_chat_msg_diff diff;
         diff.content_delta = "Hello, world!";
 
-        assert_equals(
-            {diff},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2));
     }
     {
         common_chat_msg msg1;
@@ -3954,37 +2092,35 @@ static void test_msg_diffs_compute() {
         common_chat_msg_diff diff;
         diff.content_delta = " world!";
 
-        assert_equals(
-            {diff},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2));
     }
     {
         common_chat_msg msg0;
 
         common_chat_msg msg1;
-        msg1.tool_calls = { { "special_function", "{\"ar", /* .id = */ "123" } };
+        msg1.tool_calls = {
+            { "special_function", "{\"ar", /* .id = */ "123" }
+        };
 
         common_chat_msg msg2;
-        msg2.tool_calls = { { "special_function", "{\"arg1\": 1}", /* .id = */ "123" } };
+        msg2.tool_calls = {
+            { "special_function", "{\"arg1\": 1}", /* .id = */ "123" }
+        };
 
         common_chat_msg_diff diff01;
-        diff01.tool_call_index = 0;
-        diff01.tool_call_delta.name = "special_function";
-        diff01.tool_call_delta.id = "123";
+        diff01.tool_call_index           = 0;
+        diff01.tool_call_delta.name      = "special_function";
+        diff01.tool_call_delta.id        = "123";
         diff01.tool_call_delta.arguments = "{\"ar";
 
-        assert_equals(
-            {diff01},
-            common_chat_msg_diff::compute_diffs(msg0, msg1));
+        assert_equals({ diff01 }, common_chat_msg_diff::compute_diffs(msg0, msg1));
 
         common_chat_msg_diff diff12;
-        diff12.tool_call_index = 0;
+        diff12.tool_call_index           = 0;
         // Note: neither id nor name change here.
         diff12.tool_call_delta.arguments = "g1\": 1}";
 
-        assert_equals(
-            {diff12},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff12 }, common_chat_msg_diff::compute_diffs(msg1, msg2));
     }
     {
         common_chat_msg msg0;
@@ -3996,68 +2132,81 @@ static void test_msg_diffs_compute() {
         };
 
         common_chat_msg_diff diff1;
-        diff1.tool_call_index = 0;
-        diff1.tool_call_delta.name = "f1";
-        diff1.tool_call_delta.id = "123";
+        diff1.tool_call_index           = 0;
+        diff1.tool_call_delta.name      = "f1";
+        diff1.tool_call_delta.id        = "123";
         diff1.tool_call_delta.arguments = "{\"arg1\": 1}";
 
         common_chat_msg_diff diff2;
-        diff2.tool_call_index = 1;
-        diff2.tool_call_delta.name = "f2";
-        diff2.tool_call_delta.id = "222";
+        diff2.tool_call_index           = 1;
+        diff2.tool_call_delta.name      = "f2";
+        diff2.tool_call_delta.id        = "222";
         diff2.tool_call_delta.arguments = "{\"arg2\": 2}";
 
-        assert_equals(
-            {diff1, diff2},
-            common_chat_msg_diff::compute_diffs(msg0, msg2));
+        assert_equals({ diff1, diff2 }, common_chat_msg_diff::compute_diffs(msg0, msg2));
     }
 }
 
 int main(int argc, char ** argv) {
-    common_log_set_verbosity_thold(999);
+    bool detailed_debug    = false;
+    bool only_run_filtered = false;
+
+    // Check for --template flag
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+        if (arg == "--template" && i + 1 < argc) {
+            g_template_filter = argv[++i];
+            // Only run PEG parser tests with the filter
+            only_run_filtered = true;
+        }
+        if (arg == "--detailed") {
+            detailed_debug = true;
+            common_log_set_verbosity_thold(999);
+        }
+    }
+
+    if (only_run_filtered) {
+        test_template_output_peg_parsers(detailed_debug);
+        std::cout << "\n[chat] All template tests passed!" << '\n';
+        return 0;
+    }
 
-    // try {
 #ifndef _WIN32
-        if (argc > 1) {
-            common_chat_templates_inputs inputs;
-            common_chat_msg msg;
-            msg.role = "user";
-            msg.content = "Hey";
-            inputs.messages = {msg};
-            inputs.tools = { special_function_tool };
-
-            std::cout << "| Template | Format |\n";
-            std::cout << "|----------|--------|\n";
-
-            for (int i = 1; i < argc; i++) {
-                try {
-                    std::string path = argv[i];
-                    if (path.rfind(".jinja") != path.size() - 6) {
-                        std::cerr << "Skipping non-jinja file: " << path << '\n';
-                        continue;
-                    }
-                    auto tmpls = read_templates(path);
-                    auto parts  = string_split(path, "/");
-                    auto name   = parts[parts.size() - 1];
-                    auto format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
-                    std::cout << "| " << name << " | " << format << " |\n";
-                } catch (const std::exception & e) {
-                    std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
+    if (argc > 1) {
+        common_chat_templates_inputs inputs;
+        common_chat_msg              msg;
+        msg.role        = "user";
+        msg.content     = "Hey";
+        inputs.messages = { msg };
+        inputs.tools    = { special_function_tool };
+
+        std::cout << "| Template | Format |\n";
+        std::cout << "|----------|--------|\n";
+
+        for (int i = 1; i < argc; i++) {
+            try {
+                std::string path = argv[i];
+                if (path.rfind(".jinja") != path.size() - 6) {
+                    std::cerr << "Skipping non-jinja file: " << path << '\n';
+                    continue;
                 }
+                auto         tmpls  = read_templates(path);
+                auto         parts  = string_split(path, "/");
+                const auto & name   = parts[parts.size() - 1];
+                const auto * format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
+                std::cout << "| " << name << " | " << format << " |\n";
+            } catch (const std::exception & e) {
+                std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
             }
-        } else
-#endif
-        {
-            test_msg_diffs_compute();
-            test_msgs_oaicompat_json_conversion();
-            test_tools_oaicompat_json_conversion();
-            test_template_output_parsers();
-            test_template_output_peg_parsers();
-            std::cout << "\n[chat] All tests passed!" << '\n';
         }
-        return 0;
-    // } catch (const std::exception & e) {
-    //     std::cerr << "Error: " << e.what() << '\n';
-    //     return 1;
-    // }
+    } else
+#endif
+    {
+        test_msg_diffs_compute();
+        test_msgs_oaicompat_json_conversion();
+        test_tools_oaicompat_json_conversion();
+        test_template_output_peg_parsers(detailed_debug);
+        std::cout << "\n[chat] All tests passed!" << '\n';
+    }
+    return 0;
 }
diff --git a/tests/test-peg-parser.cpp b/tests/test-peg-parser.cpp
index 220745d0293..7d22d776120 100644
--- a/tests/test-peg-parser.cpp
+++ b/tests/test-peg-parser.cpp
@@ -20,6 +20,7 @@ int main(int argc, char *argv[]) {
     t.test("json", test_json_parser);
     t.test("gbnf", test_gbnf_generation);
     t.test("serialization", test_json_serialization);
+    t.test("python-dict", test_python_dict_parser);
 
     return t.summary();
 }
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 518f8b9ae74..7c63b3aae54 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -26,6 +26,7 @@ else()
         add_subdirectory(server)
     endif()
     add_subdirectory(tokenize)
+    add_subdirectory(parser)
     add_subdirectory(tts)
     add_subdirectory(mtmd)
     if (GGML_RPC)
diff --git a/tools/parser/CMakeLists.txt b/tools/parser/CMakeLists.txt
new file mode 100644
index 00000000000..73157b0a0ef
--- /dev/null
+++ b/tools/parser/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(TARGET llama-debug-template-parser)
+add_executable(${TARGET} debug-template-parser.cpp)
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_17)
+
+if(LLAMA_TOOLS_INSTALL)
+    install(TARGETS ${TARGET} RUNTIME)
+endif()
+
+set(TARGET llama-template-analysis)
+add_executable(${TARGET} template-analysis.cpp)
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_17)
+
+if(LLAMA_TOOLS_INSTALL)
+    install(TARGETS ${TARGET} RUNTIME)
+endif()
diff --git a/tools/parser/debug-template-parser.cpp b/tools/parser/debug-template-parser.cpp
new file mode 100644
index 00000000000..b8b4f3dfd34
--- /dev/null
+++ b/tools/parser/debug-template-parser.cpp
@@ -0,0 +1,482 @@
+#include "../src/llama-grammar.h"
+#include "chat-auto-parser.h"
+#include "chat-diff-analyzer.h"
+#include "chat.h"
+#include "common.h"
+#include "gguf.h"
+#include "jinja/runtime.h"
+#include "log.h"
+
+#include <fstream>
+#include <numeric>
+#include <sstream>
+#include <string>
+
+#include "nlohmann/json.hpp"
+#include "peg-parser.h"
+
+using json = nlohmann::ordered_json;
+
+enum class output_mode {
+    ANALYSIS,  // Only output analysis results (default)
+    TEMPLATE,  // Only output rendered template
+    BOTH       // Output both
+};
+
+enum class input_message_type {
+    NONE,                    // Don't render any message scenarios (only analysis)
+    CONTENT_ONLY,            // Simple assistant message with content
+    REASONING_CONTENT,       // Message with reasoning_content + content
+    TOOL_CALL_ONLY,          // Message with tool_calls only
+    CONTENT_TOOL_CALL,       // Message with content + tool_calls
+    REASONING_TOOL_CALL,     // Message with reasoning_content + tool_calls
+    CONTENT_FAKE_TOOL_CALL,  // Message with content but no actual tool_calls (for testing)
+    ALL                      // Render all scenarios
+};
+
+struct debug_options {
+    std::string      template_path;
+    bool             with_tools        = true;
+    bool             generation_prompt = true;
+    bool             enable_reasoning  = true;
+    output_mode       mode             = output_mode::BOTH;
+    input_message_type input_message     = input_message_type::NONE;
+};
+
+static std::string read_file(const std::string & path) {
+    std::ifstream fin(path, std::ios::binary);
+    if (!fin.is_open()) {
+        throw std::runtime_error("Could not open file: " + path);
+    }
+    std::ostringstream buf;
+    buf << fin.rdbuf();
+    return buf.str();
+}
+
+static std::string read_gguf_chat_template(const std::string & path) {
+    struct gguf_init_params params = { /*no_alloc =*/true,  // We only need metadata, not tensor data
+                                       /*ctx=*/nullptr };
+
+    struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params);
+    if (ctx == nullptr) {
+        throw std::runtime_error("Could not open GGUF file: " + path);
+    }
+
+    const char * key    = "tokenizer.chat_template";
+    int64_t      key_id = gguf_find_key(ctx, key);
+
+    if (key_id == -1) {
+        gguf_free(ctx);
+        throw std::runtime_error("GGUF file does not contain chat template key: " + std::string(key));
+    }
+
+    const char * template_str = gguf_get_val_str(ctx, key_id);
+    if (template_str == nullptr) {
+        gguf_free(ctx);
+        throw std::runtime_error("GGUF file contains chat template key but value is null");
+    }
+
+    std::string result = template_str;
+    gguf_free(ctx);
+    return result;
+}
+
+static void print_usage(const char * program_name) {
+    LOG_ERR("Usage: %s <template_or_gguf_path> [options]\n", program_name);
+    LOG_ERR("\nOptions:\n");
+    LOG_ERR("  --no-tools              Disable tool definitions\n");
+    LOG_ERR("  --generation-prompt=0|1 Set add_generation_prompt (default: 1)\n");
+    LOG_ERR("  --enable-reasoning=0|1  Enable reasoning parsing (default: 1)\n");
+    LOG_ERR("  --output=MODE           Output mode: analysis, template, both (default: both)\n");
+    LOG_ERR("  --input-message=TYPE    Message type to render:\n");
+    LOG_ERR("                          content_only, reasoning_content, tool_call_only,\n");
+    LOG_ERR("                          content_tool_call, reasoning_tool_call,\n");
+    LOG_ERR("                          content_fake_tool_call, all\n");
+    LOG_ERR("\nExamples:\n");
+    LOG_ERR("  %s template.jinja --input-message=all --generation-prompt=1\n", program_name);
+    LOG_ERR("  %s template.jinja --output=template --input-message=tool_call_only\n", program_name);
+}
+
+static bool parse_bool_option(const std::string & value) {
+    return value == "1" || value == "true" || value == "yes";
+}
+
+static bool parse_options(int argc, char ** argv, debug_options & opts) {
+    if (argc < 2) {
+        print_usage(argv[0]);
+        return false;
+    }
+
+    opts.template_path = argv[1];
+
+    for (int i = 2; i < argc; ++i) {
+        std::string arg = argv[i];
+
+        if (arg == "--no-tools") {
+            opts.with_tools = false;
+        } else if (arg.rfind("--generation-prompt=", 0) == 0) {
+            opts.generation_prompt = parse_bool_option(arg.substr(20));
+        } else if (arg.rfind("--enable-reasoning=", 0) == 0) {
+            opts.enable_reasoning = parse_bool_option(arg.substr(19));
+        } else if (arg.rfind("--output=", 0) == 0) {
+            std::string mode = arg.substr(9);
+            if (mode == "analysis") {
+                opts.mode = output_mode::ANALYSIS;
+            } else if (mode == "template") {
+                opts.mode = output_mode::TEMPLATE;
+            } else if (mode == "both") {
+                opts.mode = output_mode::BOTH;
+            } else {
+                LOG_ERR("Unknown output mode: %s\n", mode.c_str());
+                return false;
+            }
+        } else if (arg.rfind("--input-message=", 0) == 0) {
+            std::string type = arg.substr(16);
+            if (type == "content_only") {
+                opts.input_message = input_message_type::CONTENT_ONLY;
+            } else if (type == "reasoning_content") {
+                opts.input_message = input_message_type::REASONING_CONTENT;
+            } else if (type == "tool_call_only") {
+                opts.input_message = input_message_type::TOOL_CALL_ONLY;
+            } else if (type == "content_tool_call") {
+                opts.input_message = input_message_type::CONTENT_TOOL_CALL;
+            } else if (type == "reasoning_tool_call") {
+                opts.input_message = input_message_type::REASONING_TOOL_CALL;
+            } else if (type == "content_fake_tool_call") {
+                opts.input_message = input_message_type::CONTENT_FAKE_TOOL_CALL;
+            } else if (type == "all") {
+                opts.input_message = input_message_type::ALL;
+            } else {
+                LOG_ERR("Unknown input message type: %s\n", type.c_str());
+                return false;
+            }
+        } else {
+            LOG_ERR("Unknown option: %s\n", arg.c_str());
+            print_usage(argv[0]);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static json build_user_message() {
+    return json{
+        { "role",    "user"                               },
+        { "content", "Hello, please help me with a task." }
+    };
+}
+
+static json build_content_only_message() {
+    return json{
+        { "role",    "assistant"                                   },
+        { "content", "Hello! I'm here to help you with your task." }
+    };
+}
+
+static json build_reasoning_content_message() {
+    return json{
+        { "role",              "assistant"                                                               },
+        { "content",           "Hello! I'm here to help you with your task."                             },
+        { "reasoning_content", "The user is greeting me and asking for help. I should respond politely." }
+    };
+}
+
+static json build_tool_call_only_message() {
+    return json{
+        { "role",       "assistant"      },
+        { "content",    nullptr          },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function", json{ { "name", "test_function_name" },
+                                  { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } },
+              { "id", "123456789" } } }) }
+    };
+}
+
+static json build_content_tool_call_message() {
+    return json{
+        { "role",       "assistant"                                                                              },
+        { "content",    "I'll help you by calling a function."                                                   },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function",
+                json{ { "name", "test_function_name" },
+                      { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+    };
+}
+
+static json build_reasoning_tool_call_message() {
+    return json{
+        { "role",              "assistant"                                                                       },
+        { "content",           nullptr                                                                           },
+        { "reasoning_content", "I need to call a function to help with this task."                               },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function",
+                json{ { "name", "test_function_name" },
+                      { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+    };
+}
+
+static json build_content_fake_tool_call_message() {
+    // This message has content but NO tool_calls field
+    // It's used to test if a template renders tool definitions but not tool calls
+    return json{
+        { "role",    "assistant"                            },
+        { "content", "I'll help you by calling a function." }
+    };
+}
+
+static json build_tools_definition() {
+    json parameters_schema                    = json::object();
+    parameters_schema["type"]                 = "object";
+    parameters_schema["properties"]           = json::object();
+    parameters_schema["properties"]["param1"] = json::object({
+        { "type",        "string"          },
+        { "description", "First parameter" }
+    });
+    parameters_schema["properties"]["param2"] = json::object({
+        { "type",        "string"           },
+        { "description", "Second parameter" }
+    });
+    parameters_schema["required"]             = json::array({ "param1" });
+
+    return json::array({
+        json{ { "type", "function" },
+             { "function", json{ { "name", "test_function_name" },
+                                  { "description", "A test function for debugging" },
+                                  { "parameters", parameters_schema } } } }
+    });
+}
+
+static void render_scenario(const common_chat_template & tmpl,
+                            const std::string &          scenario_name,
+                            const json &                 messages,
+                            const json &                 tools,
+                            bool                         add_generation_prompt,
+                            bool                         enable_thinking) {
+    LOG_ERR("\n=== Scenario: %s ===\n", scenario_name.c_str());
+    LOG_ERR("add_generation_prompt: %s, enable_thinking: %s\n", add_generation_prompt ? "true" : "false",
+            enable_thinking ? "true" : "false");
+
+    // When add_generation_prompt is true, add a trailing user message to trigger the prompt
+    json final_messages = messages;
+    if (add_generation_prompt && !messages.empty() && messages.back().value("role", "") == "assistant") {
+        final_messages.push_back(json{
+            { "role",    "user" },
+            { "content", "Now please continue with another response." }
+        });
+    }
+
+    LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str());
+
+    try {
+        templates_params inputs;
+        inputs.messages                         = final_messages;
+        inputs.add_generation_prompt            = add_generation_prompt;
+        inputs.extra_context["enable_thinking"] = enable_thinking;
+
+        if (!tools.is_null() && tools.is_array() && !tools.empty()) {
+            inputs.tools = tools;
+        }
+
+        std::string output = common_chat_template_direct_apply(tmpl, inputs);
+
+        LOG_ERR("\n--- Rendered Output ---\n");
+        LOG_ERR("%s\n", output.c_str());
+        LOG_ERR("--- End Output (length: %zu) ---\n", output.length());
+    } catch (const std::exception & e) {
+        LOG_ERR("Rendering failed: %s\n", e.what());
+    }
+}
+
+static void render_all_scenarios(const common_chat_template & tmpl,
+                                 const json &                 tools,
+                                 bool                         add_generation_prompt,
+                                 bool                         enable_thinking,
+                                 input_message_type             message_type) {
+    json user_msg = build_user_message();
+
+    auto render_if = [&](input_message_type type, const std::string & name, const json & assistant_msg) {
+        if (message_type == input_message_type::ALL || message_type == type) {
+            json messages = json::array({ user_msg, assistant_msg });
+            render_scenario(tmpl, name, messages, tools, add_generation_prompt, enable_thinking);
+        }
+    };
+
+    render_if(input_message_type::CONTENT_ONLY, "content_only", build_content_only_message());
+    render_if(input_message_type::REASONING_CONTENT, "reasoning_content", build_reasoning_content_message());
+    render_if(input_message_type::TOOL_CALL_ONLY, "tool_call_only", build_tool_call_only_message());
+    render_if(input_message_type::CONTENT_TOOL_CALL, "content_tool_call", build_content_tool_call_message());
+    render_if(input_message_type::REASONING_TOOL_CALL, "reasoning_tool_call", build_reasoning_tool_call_message());
+    render_if(input_message_type::CONTENT_FAKE_TOOL_CALL, "content_fake_tool_call",
+              build_content_fake_tool_call_message());
+
+    // Also render with add_generation_prompt=true to show the prompt ending
+    if (message_type == input_message_type::ALL) {
+        LOG_ERR("\n\n=== Generation Prompt Scenarios (add_generation_prompt=true) ===\n");
+
+        json prompt_messages = json::array({ user_msg });
+        render_scenario(tmpl, "generation_prompt_only", prompt_messages, tools, true, enable_thinking);
+
+        // With enable_thinking toggled
+        render_scenario(tmpl, "generation_prompt_thinking_disabled", prompt_messages, tools, true, false);
+    }
+}
+
+template <typename T>
+static std::string mode_to_str(T mode) {
+    std::ostringstream os;
+    os << mode;
+    return os.str();
+}
+
+int main(int argc, char ** argv) {
+    // Set log level to most verbose to capture all debug output
+    common_log_set_verbosity_thold(99);
+
+    if (std::getenv("LLAMA_DEBUG_JINJA") != nullptr) {
+        jinja::enable_debug(true);
+    }
+
+    debug_options opts;
+    if (!parse_options(argc, argv, opts)) {
+        return 1;
+    }
+
+    std::string template_source;
+    try {
+        // Check if the file is a GGUF file
+        if (opts.template_path.size() >= 5 &&
+            opts.template_path.compare(opts.template_path.size() - 5, 5, ".gguf") == 0) {
+            template_source = read_gguf_chat_template(opts.template_path);
+        } else {
+            template_source = read_file(opts.template_path);
+        }
+    } catch (const std::exception & e) {
+        LOG_ERR("Error reading template: %s\n", e.what());
+        return 1;
+    }
+
+    LOG_ERR("Analyzing template: %s\n", opts.template_path.c_str());
+    LOG_ERR("Options: with_tools=%s, generation_prompt=%s, enable_reasoning=%s\n", opts.with_tools ? "true" : "false",
+            opts.generation_prompt ? "true" : "false", opts.enable_reasoning ? "true" : "false");
+
+    try {
+        common_chat_template chat_template(template_source, "", "");
+
+        // Build tools definition
+        json tools = opts.with_tools ? build_tools_definition() : json();
+
+        // Render template scenarios if requested
+        if (opts.input_message != input_message_type::NONE &&
+            (opts.mode == output_mode::TEMPLATE || opts.mode == output_mode::BOTH)) {
+            LOG_ERR("\n");
+            LOG_ERR("================================================================================\n");
+            LOG_ERR("                         TEMPLATE RENDERING OUTPUT\n");
+            LOG_ERR("================================================================================\n");
+
+            render_all_scenarios(chat_template, tools, opts.generation_prompt, opts.enable_reasoning,
+                                 opts.input_message);
+        }
+
+        // Output analysis if requested
+        if (opts.mode == output_mode::ANALYSIS || opts.mode == output_mode::BOTH) {
+            LOG_ERR("\n");
+            LOG_ERR("================================================================================\n");
+            LOG_ERR("                           TEMPLATE ANALYSIS\n");
+            LOG_ERR("================================================================================\n");
+
+            diff_analysis_result analysis = differential_analyzer::analyze(chat_template);
+
+            // Generate Parser
+            templates_params params;
+            params.messages = json::array();
+            params.reasoning_format =
+                opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE;
+            params.enable_thinking       = opts.enable_reasoning;
+            params.add_generation_prompt = opts.generation_prompt;
+
+            if (opts.with_tools) {
+                params.tools       = tools;
+                params.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+            } else {
+                params.tools       = json();
+                params.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
+            }
+            params.parallel_tool_calls = false;
+
+            auto parser_data = universal_peg_generator::generate_parser(chat_template, params, analysis);
+
+            LOG_ERR("\n=== Differential Analysis Results ===\n");
+
+            LOG_ERR("\n--- Reasoning & Content Structure ---\n");
+            LOG_ERR("reasoning_mode: %s\n", mode_to_str(analysis.reasoning).c_str());
+            LOG_ERR("reasoning_start: '%s'\n", analysis.markers.reasoning_start.c_str());
+            LOG_ERR("reasoning_end: '%s'\n", analysis.markers.reasoning_end.c_str());
+            LOG_ERR("content_mode: %s\n", mode_to_str(analysis.content).c_str());
+            LOG_ERR("content_start: '%s'\n", analysis.markers.content_start.c_str());
+            LOG_ERR("content_end: '%s'\n", analysis.markers.content_end.c_str());
+
+            LOG_ERR("\n--- Tool Call Structure ---\n");
+            LOG_ERR("tool_mode: %s\n", mode_to_str(analysis.tools).c_str());
+            LOG_ERR("supports_tools: %s\n", analysis.supports_tools ? "true" : "false");
+            LOG_ERR("supports_parallel_calls: %s\n", analysis.supports_parallel_calls ? "true" : "false");
+            LOG_ERR("tool_section_start: '%s'\n", analysis.markers.tool_section_start.c_str());
+            LOG_ERR("tool_section_end: '%s'\n", analysis.markers.tool_section_end.c_str());
+            LOG_ERR("per_call_start: '%s'\n", analysis.markers.per_call_start.c_str());
+            LOG_ERR("per_call_end: '%s'\n", analysis.markers.per_call_end.c_str());
+            LOG_ERR("func_name_prefix: '%s'\n", analysis.markers.func_name_prefix.c_str());
+            LOG_ERR("func_name_suffix: '%s'\n", analysis.markers.func_name_suffix.c_str());
+            LOG_ERR("func_close: '%s'\n", analysis.markers.func_close.c_str());
+            LOG_ERR("arg_name_prefix: '%s'\n", analysis.markers.arg_name_prefix.c_str());
+            LOG_ERR("arg_name_suffix: '%s'\n", analysis.markers.arg_name_suffix.c_str());
+            LOG_ERR("arg_value_prefix: '%s'\n", analysis.markers.arg_value_prefix.c_str());
+            LOG_ERR("arg_value_suffix: '%s'\n", analysis.markers.arg_value_suffix.c_str());
+            LOG_ERR("name_field: '%s'\n", analysis.name_field.c_str());
+            LOG_ERR("args_field: '%s'\n", analysis.args_field.c_str());
+            LOG_ERR("id_field: '%s'\n", analysis.id_field.c_str());
+            LOG_ERR("gen_id_field: '%s'\n", analysis.gen_id_field.c_str());
+            LOG_ERR("parameter_order: '%s'\n", std::accumulate(analysis.parameter_order.begin(), analysis.parameter_order.end(), 
+                std::string(""), [] (const std::string & a, const std::string & b) { return a.empty() ? b : a + ", " + b; }
+                ).c_str());
+
+            LOG_ERR("\n=== Generated Parser ===\n");
+            common_peg_arena arena;
+            arena.load(parser_data.parser);
+            LOG_ERR("%s\n", arena.dump(arena.root()).c_str());
+
+            LOG_ERR("\n=== Generated Grammar ===\n");
+            LOG_ERR("%s\n", parser_data.grammar.c_str());
+
+            LOG_ERR("\n=== Generated Lazy Grammar ===\n");
+            LOG_ERR("%d\n", parser_data.grammar_lazy);
+
+            LOG_ERR("\n=== Generated Grammar Triggers ===\n");
+            for (const common_grammar_trigger & cgt : parser_data.grammar_triggers) {
+                LOG_ERR("Token: %d | Type: %d | Value: %s\n", cgt.token, cgt.type, cgt.value.c_str());
+            }
+
+            LOG_ERR("\n=== Preserved Tokens ===\n");
+            for (const std::string & token : parser_data.preserved_tokens) {
+                LOG_ERR("  '%s'\n", token.c_str());
+            }
+
+            LOG_ERR("\n=== Verifying created grammar ===\n");
+            auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root",
+                                                     parser_data.grammar_lazy, nullptr, 0, nullptr, 0);
+            if (grammar != nullptr) {
+                LOG_ERR("\n=== Grammar successfully created ===\n");
+            }
+        }
+    } catch (const std::exception & e) {
+        LOG_ERR("Analysis failed: %s\n", e.what());
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/tools/parser/template-analysis.cpp b/tools/parser/template-analysis.cpp
new file mode 100644
index 00000000000..0fbcc09390d
--- /dev/null
+++ b/tools/parser/template-analysis.cpp
@@ -0,0 +1,610 @@
+#include "chat-auto-parser.h"
+#include "chat-auto-parser-helpers.h"
+#include "chat.h"
+#include "log.h"
+#include "jinja/caps.h"
+#include "jinja/runtime.h"
+
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#include "nlohmann/json.hpp"
+
+using json = nlohmann::ordered_json;
+
+// ANSI color codes - using 256-color palette for brighter colors (all bold)
+#define ANSI_RESET       "\033[0m"
+#define ANSI_PURPLE      "\033[1m\x1b[38;5;126m"  // Bold bright purple for main headers
+#define ANSI_CYAN        "\033[1m\x1b[38;5;81m"   // Bold bright cyan for section headers
+#define ANSI_BLUE        "\033[1m\x1b[38;5;12m"   // Bold bright blue for labels
+#define ANSI_ORANGE      "\033[1m\x1b[38;5;209m"  // Bold orange for right differences
+#define ANSI_GREEN       "\033[1m\x1b[38;5;83m"   // Bold bright green for left differences
+#define ANSI_GRAY        "\033[1m\x1b[38;5;240m"  // Bold gray (used for "no variables" message)
+#define ANSI_BOLD        "\033[1m"                // Standalone bold
+#define ANSI_PREFIX      "\033[1m\x1b[38;5;176m"  // Bold color for common prefix
+#define ANSI_SUFFIX      "\033[1m\x1b[38;5;61m"   // Bold color for common suffix
+
+// All template paths extracted from tests/test-chat.cpp
+static const std::vector<std::string> ALL_TEMPLATE_PATHS = {
+    "models/templates/Apertus-8B-Instruct.jinja",
+    "models/templates/Apriel-1.6-15b-Thinker-fixed.jinja",
+    "models/templates/ByteDance-Seed-OSS.jinja",
+    "models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja",
+    "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja",
+    "models/templates/GLM-4.6.jinja",
+    "models/templates/GLM-4.7-Flash.jinja",
+    "models/templates/Kimi-K2-Instruct.jinja",
+    "models/templates/Kimi-K2-Thinking.jinja",
+    "models/templates/MiMo-VL.jinja",
+    "models/templates/MiniMax-M2.jinja",
+    "models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja",
+    "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja",
+    "models/templates/NVIDIA-Nemotron-Nano-v2.jinja",
+    "models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja",
+    "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
+    "models/templates/Qwen-QwQ-32B.jinja",
+    "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja",
+    "models/templates/Qwen3-Coder.jinja",
+    "models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja",
+    "models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja",
+    "models/templates/deepseek-ai-DeepSeek-V3.1.jinja",
+    "models/templates/fireworks-ai-llama-3-firefunction-v2.jinja",
+    "models/templates/google-gemma-2-2b-it.jinja",
+    "models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja",
+    "models/templates/llama-cpp-deepseek-r1.jinja",
+    "models/templates/meetkai-functionary-medium-v3.1.jinja",
+    "models/templates/meetkai-functionary-medium-v3.2.jinja",
+    "models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja",
+    "models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja",
+    "models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja",
+    "models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja",
+    "models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja",
+    "models/templates/moonshotai-Kimi-K2.jinja",
+    "models/templates/openai-gpt-oss-120b.jinja",
+    "models/templates/unsloth-Apriel-1.5.jinja",
+    "models/templates/unsloth-mistral-Devstral-Small-2507.jinja",
+};
+
+struct analysis_options {
+    std::vector<std::string> template_paths;
+    bool                     analyze_all = false;
+};
+
+static std::string read_file(const std::string & path) {
+    std::ifstream fin(path, std::ios::binary);
+    if (!fin.is_open()) {
+        throw std::runtime_error("Could not open file: " + path);
+    }
+    std::ostringstream buf;
+    buf << fin.rdbuf();
+    return buf.str();
+}
+
+static void print_usage(const char * program_name) {
+    LOG_ERR("Usage: %s [options]\n", program_name);
+    LOG_ERR("\nOptions:\n");
+    LOG_ERR("  --template <name>       Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1')\n");
+    LOG_ERR("  --template-file <path>  Analyze custom template file\n");
+    LOG_ERR("  --all                   Analyze all templates from test suite\n");
+    LOG_ERR("\nExamples:\n");
+    LOG_ERR("  %s --all\n", program_name);
+    LOG_ERR("  %s --template deepseek\n", program_name);
+    LOG_ERR("  %s --template-file my-template.jinja\n", program_name);
+}
+
+static bool parse_options(int argc, char ** argv, analysis_options & opts) {
+    if (argc < 2) {
+        print_usage(argv[0]);
+        return false;
+    }
+
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+
+        if (arg == "--all") {
+            opts.analyze_all = true;
+        } else if (arg == "--template") {
+            if (i + 1 >= argc) {
+                LOG_ERR("--template requires an argument\n");
+                return false;
+            }
+            std::string pattern = argv[++i];
+            std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
+
+            // Find matching templates
+            bool found = false;
+            for (const auto & path : ALL_TEMPLATE_PATHS) {
+                std::string path_lower = path;
+                std::transform(path_lower.begin(), path_lower.end(), path_lower.begin(), ::tolower);
+                if (path_lower.find(pattern) != std::string::npos) {
+                    opts.template_paths.push_back(path);
+                    found = true;
+                }
+            }
+
+            if (!found) {
+                LOG_ERR("No templates found matching: %s\n", pattern.c_str());
+                return false;
+            }
+        } else if (arg == "--template-file") {
+            if (i + 1 >= argc) {
+                LOG_ERR("--template-file requires an argument\n");
+                return false;
+            }
+            opts.template_paths.push_back(argv[++i]);
+        } else {
+            LOG_ERR("Unknown option: %s\n", arg.c_str());
+            print_usage(argv[0]);
+            return false;
+        }
+    }
+
+    if (opts.analyze_all) {
+        opts.template_paths = ALL_TEMPLATE_PATHS;
+    }
+
+    if (opts.template_paths.empty()) {
+        LOG_ERR("No templates specified\n");
+        print_usage(argv[0]);
+        return false;
+    }
+
+    return true;
+}
+
+static json build_tools_definition() {
+    json parameters_schema                    = json::object();
+    parameters_schema["type"]                 = "object";
+    parameters_schema["properties"]           = json::object();
+    parameters_schema["properties"]["param1"] = json::object({
+        { "type",        "string"          },
+        { "description", "First parameter" }
+    });
+    parameters_schema["properties"]["param2"] = json::object({
+        { "type",        "string"           },
+        { "description", "Second parameter" }
+    });
+    parameters_schema["required"]             = json::array({ "param1", "param2" });
+
+    return json::array({
+        json{ { "type", "function" },
+             { "function", json{ { "name", "test_function_name" },
+                                  { "description", "A test function for debugging" },
+                                  { "parameters", parameters_schema } } } }
+    });
+}
+
+// Helper to create a tool call with arguments as JSON object
+static json build_tool_call(const std::string & name, const json & args_object, const std::string & id = "call_001") {
+    return json{
+        {"id", id},
+        {"type", "function"},
+        {"function", json{
+            {"name", name},
+            {"arguments", args_object}  // Pass as JSON object, not serialized string
+        }}
+    };
+}
+
+// Helper functions to create repeating message definitions
+static json make_user_msg() {
+    return json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+}
+
+static json make_user_msg2() {
+    return json{
+        {"role", "user"},
+        {"content", "Thank you."}
+    };
+}
+
+static json make_user_msg2_continue() {
+    return json{
+        {"role", "user"},
+        {"content", "Continue."}
+    };
+}
+
+static json make_assistant_no_tool() {
+    return json{
+        {"role", "assistant"},
+        {"content", "Let me help you."}
+    };
+}
+
+static json make_assistant_one_tool() {
+    return json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+}
+
+static json make_assistant_two_tools() {
+    return json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
+            build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
+        })}
+    };
+}
+
+static json make_assistant_no_reasoning() {
+    return json{
+        {"role", "assistant"},
+        {"content", "I can help you with that."}
+    };
+}
+
+static json make_assistant_with_reasoning() {
+    return json{
+        {"role", "assistant"},
+        {"content", "I can help you with that."},
+        {"reasoning_content", "The user is asking for help. I should respond positively."}
+    };
+}
+
+static json make_assistant_one_tool_with_reasoning() {
+    return json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })},
+        {"reasoning_content", "I need to call the tool first."}
+    };
+}
+
+static void print_diff_split(const std::string & title, const diff_split & diff) {
+    LOG_ERR("\n%s=== %s ===%s\n", ANSI_CYAN, title.c_str(), ANSI_RESET);
+    LOG_ERR("%sCommon Prefix:%s '%s'\n", ANSI_PREFIX, ANSI_RESET, diff.prefix.c_str());
+    LOG_ERR("%sCommon Suffix:%s '%s'\n", ANSI_SUFFIX, ANSI_RESET, diff.suffix.c_str());
+    LOG_ERR("%sLeft (difference):%s '%s'\n", ANSI_GREEN, ANSI_RESET, diff.left.c_str());
+    LOG_ERR("%sRight (difference):%s '%s'\n", ANSI_ORANGE, ANSI_RESET, diff.right.c_str());
+}
+
+static void check_reasoning_variables(const common_chat_template & tmpl) {
+    LOG_ERR("\n%s=== Checking Reasoning Variables ===%s\n", ANSI_CYAN, ANSI_RESET);
+
+    try {
+        // Create a list of candidate reasoning/thinking variable names to probe
+        std::vector<std::string> candidate_vars = {
+            "enable_reasoning",
+            "use_reasoning",
+            "reasoning_enabled",
+            "has_reasoning",
+            "reasoning_mode",
+            "reasoning_format",
+            "reasoning_active",
+            "with_reasoning",
+            "use_thinking",
+            "thinking_enabled",
+            "has_thinking",
+            "thinking_mode",
+            "thinking_format",
+            "thinking_active",
+            "with_thinking",
+            "enable_reason",
+            "reason_enabled",
+            "enable_think",
+            "think_enabled",
+        };
+
+        jinja::context ctx;
+        ctx.is_get_stats = true;
+
+        json messages = json::array({
+            json{
+                {"role", "user"},
+                {"content", "Test message"}
+            },
+            json{
+                {"role", "assistant"},
+                {"content", "Response"},
+                {"reasoning_content", "Some reasoning"}
+            }
+        });
+
+        // Set up base context
+        jinja::global_from_json(ctx, json{
+            {"messages", messages},
+            {"tools", json::array()},
+            {"bos_token", ""},
+            {"eos_token", ""},
+            {"add_generation_prompt", false},
+            {"enable_thinking", true}  // Already passed, so we'll exclude this from results
+        }, true);
+
+        // Add candidate variables as undefined to probe which ones are accessed
+        for (const auto & var_name : candidate_vars) {
+            ctx.set_val(var_name, jinja::mk_val<jinja::value_undefined_t>(var_name));
+        }
+
+        try {
+            jinja::runtime runtime(ctx);
+            runtime.execute(tmpl.prog);
+        } catch (const std::exception & e) {
+            // Execution may fail, that's okay - we just want to see what variables were accessed
+        }
+
+        // Check which candidate variables were accessed (stats.used = true)
+        std::vector<std::string> accessed_vars;
+        for (const auto & var_name : candidate_vars) {
+            auto val = ctx.get_val(var_name);
+            if (!val->is_undefined()) {
+                // Variable was overwritten, skip it
+                continue;
+            }
+            if (val->stats.used) {
+                accessed_vars.push_back(var_name);
+            }
+        }
+
+        if (accessed_vars.empty()) {
+            LOG_ERR("%sNo reasoning/thinking-related variables were queried by the template%s\n", ANSI_GRAY, ANSI_RESET);
+        } else {
+            LOG_ERR("Template queries the following reasoning/thinking-related variables:\n");
+            for (const auto & var : accessed_vars) {
+                LOG_ERR("  %s- %s%s\n", ANSI_ORANGE, var.c_str(), ANSI_RESET);
+            }
+        }
+
+    } catch (const std::exception & e) {
+        LOG_ERR("Error checking reasoning variables: %s\n", e.what());
+    }
+}
+
+static void analyze_template(const std::string & template_path) {
+    LOG_ERR("\n");
+    LOG_ERR("%s", ANSI_PURPLE);
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("                    ANALYZING TEMPLATE: %s\n", template_path.c_str());
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("%s", ANSI_RESET);
+
+    std::string template_source;
+    try {
+        template_source = read_file(template_path);
+    } catch (const std::exception & e) {
+        LOG_ERR("Error reading template: %s\n", e.what());
+        return;
+    }
+
+    try {
+        common_chat_template chat_template(template_source, "", "");
+        json tools = build_tools_definition();
+
+        // ===== CAPABILITIES ANALYSIS =====
+        LOG_ERR("\n%s=== Template Capabilities (from jinja::caps) ===%s\n", ANSI_CYAN, ANSI_RESET);
+        auto caps = chat_template.original_caps();
+        LOG_ERR("%ssupports_tools:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tools ? "true" : "false");
+        LOG_ERR("%ssupports_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tool_calls ? "true" : "false");
+        LOG_ERR("%ssupports_system_role:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_system_role ? "true" : "false");
+        LOG_ERR("%ssupports_parallel_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_parallel_tool_calls ? "true" : "false");
+        LOG_ERR("%srequires_typed_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.requires_typed_content ? "true" : "false");
+
+        // ===== DIFFERENTIAL ANALYSIS =====
+
+        // Test 1: With and without tools (single user message)
+        {
+            json user_msg = make_user_msg();
+
+            templates_params params_no_tools;
+            params_no_tools.messages = json::array({ user_msg });
+            params_no_tools.add_generation_prompt = false;
+            params_no_tools.tools = json::array();
+
+            templates_params params_with_tools = params_no_tools;
+            params_with_tools.tools = tools;
+
+            std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools);
+            std::string output_with_tools = common_chat_template_direct_apply(chat_template, params_with_tools);
+
+            auto diff = calculate_diff_split(output_no_tools, output_with_tools);
+            print_diff_split("Diff: With vs Without Tools (single user message)", diff);
+        }
+
+        // Test 2: With and without add_generation_prompt (single user message)
+        {
+            json user_msg = make_user_msg();
+
+            templates_params params_no_prompt;
+            params_no_prompt.messages = json::array({ user_msg });
+            params_no_prompt.add_generation_prompt = false;
+            params_no_prompt.tools = json::array();
+
+            templates_params params_with_prompt = params_no_prompt;
+            params_with_prompt.add_generation_prompt = true;
+
+            std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt);
+            std::string output_with_prompt = common_chat_template_direct_apply(chat_template, params_with_prompt);
+
+            auto diff = calculate_diff_split(output_no_prompt, output_with_prompt);
+            print_diff_split("Diff: With vs Without add_generation_prompt (single user message)", diff);
+        }
+
+        // Test 3: Assistant with reasoning_content (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            templates_params params_no_reasoning;
+            params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() });
+            params_no_reasoning.add_generation_prompt = false;
+            params_no_reasoning.enable_thinking = true;
+
+            templates_params params_with_reasoning = params_no_reasoning;
+            params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() });
+
+            std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+            std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+            auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+            print_diff_split("Diff: With vs Without reasoning_content (user, assistant)", diff);
+        }
+
+        // Test 4: Assistant with reasoning_content (user, assistant, user)
+        {
+            json user_msg = make_user_msg();
+            json user_msg2 = make_user_msg2();
+
+            templates_params params_no_reasoning;
+            params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 });
+            params_no_reasoning.add_generation_prompt = false;
+            params_no_reasoning.enable_thinking = true;
+
+            templates_params params_with_reasoning = params_no_reasoning;
+            params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 });
+
+            std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+            std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+            auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+            print_diff_split("Diff: With vs Without reasoning_content (user, assistant, user)", diff);
+        }
+
+        // Test 5: Tool call in last assistant message (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            templates_params params_no_tool;
+            params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() });
+            params_no_tool.add_generation_prompt = false;
+            params_no_tool.tools = tools;
+
+            templates_params params_with_tool = params_no_tool;
+            params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
+
+            std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
+            std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
+
+            auto diff = calculate_diff_split(output_no_tool, output_with_tool);
+            print_diff_split("Diff: With vs Without tool call (user, assistant)", diff);
+        }
+
+        // Test 6: Tool call in last assistant message (user, assistant, user)
+        {
+            json user_msg = make_user_msg();
+            json user_msg2 = make_user_msg2_continue();
+
+            templates_params params_no_tool;
+            params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 });
+            params_no_tool.add_generation_prompt = false;
+            params_no_tool.tools = tools;
+
+            templates_params params_with_tool = params_no_tool;
+            params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
+
+            std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
+            std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
+
+            auto diff = calculate_diff_split(output_no_tool, output_with_tool);
+            print_diff_split("Diff: With vs Without tool call (user, assistant, user)", diff);
+        }
+
+        // Test 7: One vs two tool calls (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            templates_params params_one_tool;
+            params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
+            params_one_tool.add_generation_prompt = false;
+            params_one_tool.tools = tools;
+
+            templates_params params_two_tools = params_one_tool;
+            params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() });
+
+            std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
+            std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
+
+            auto diff = calculate_diff_split(output_one_tool, output_two_tools);
+            print_diff_split("Diff: One vs Two tool calls (user, assistant)", diff);
+        }
+
+        // Test 8: One vs two tool calls (user, assistant, user)
+        {
+            json user_msg = make_user_msg();
+            json user_msg2 = make_user_msg2_continue();
+
+            templates_params params_one_tool;
+            params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
+            params_one_tool.add_generation_prompt = false;
+            params_one_tool.tools = tools;
+
+            templates_params params_two_tools = params_one_tool;
+            params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 });
+
+            std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
+            std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
+
+            auto diff = calculate_diff_split(output_one_tool, output_two_tools);
+            print_diff_split("Diff: One vs Two tool calls (user, assistant, user)", diff);
+        }
+
+        // Test 9: Tool call with vs without reasoning_content (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            templates_params params_no_reasoning;
+            params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() });
+            params_no_reasoning.add_generation_prompt = false;
+            params_no_reasoning.tools = tools;
+            params_no_reasoning.enable_thinking = true;
+
+            templates_params params_with_reasoning = params_no_reasoning;
+            params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() });
+
+            std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+            std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+            auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+            print_diff_split("Diff: Tool call with vs without reasoning_content (user, assistant)", diff);
+        }
+
+        // Check reasoning variables
+        check_reasoning_variables(chat_template);
+
+    } catch (const std::exception & e) {
+        LOG_ERR("Analysis failed: %s\n", e.what());
+    }
+}
+
+int main(int argc, char ** argv) {
+    // Set log level to capture all output
+    common_log_set_verbosity_thold(99);
+
+    analysis_options opts;
+    if (!parse_options(argc, argv, opts)) {
+        return 1;
+    }
+
+    LOG_ERR("\n");
+    LOG_ERR("%s", ANSI_PURPLE);
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("                      TEMPLATE ANALYSIS TOOL\n");
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("%s", ANSI_RESET);
+    LOG_ERR("Analyzing %s%zu%s template(s)\n", ANSI_CYAN, opts.template_paths.size(), ANSI_RESET);
+
+    for (const auto & path : opts.template_paths) {
+        analyze_template(path);
+    }
+
+    LOG_ERR("\n");
+    LOG_ERR("%s", ANSI_GREEN);
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("                      ANALYSIS COMPLETE\n");
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("%s", ANSI_RESET);
+
+    return 0;
+}
diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index 7f9c3c566be..b6f0333a387 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -15,6 +15,7 @@
 #include <cstddef>
 #include <cinttypes>
 #include <memory>
+#include <stdexcept>
 #include <filesystem>
 
 // fix problem with std::min and std::max
@@ -2741,7 +2742,15 @@ struct server_context_impl {
 
                 slot.i_batch = -1;
 
-                common_sampler_accept(slot.smpl.get(), id, true);
+                try {
+                    common_sampler_accept(slot.smpl.get(), id, true);
+                } catch (std::runtime_error & e) {
+                    SLT_ERR(slot, "Error when accepting token for sampler: %s\n", e.what());
+                    send_error(slot, std::string("Error when accepting token for sampler: ") + e.what(), ERROR_TYPE_SERVER);
+                    slot.release();
+                    slot.i_batch = -1;
+                    continue; // continue loop of slots
+                }
 
                 // here we have synchronized the llama_context (due to the sampling above), so we can do time measurement
                 const int64_t t_current = ggml_time_us();
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 2d25db63b74..a2bc514bf06 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -1,12 +1,12 @@
-#include "server-common.h"
 #include "server-task.h"
 
+#include "chat.h"
 #include "common.h"
+#include "json-schema-to-grammar.h"
 #include "llama.h"
-#include "chat.h"
 #include "sampling.h"
 #include "speculative.h"
-#include "json-schema-to-grammar.h"
+#include "server-common.h"
 
 using json = nlohmann::ordered_json;
 
@@ -18,8 +18,8 @@ json task_params::format_logit_bias(const std::vector<llama_logit_bias> & logit_
     json data = json::array();
     for (const auto & lb : logit_bias) {
         data.push_back(json{
-            {"bias", lb.bias},
-            {"token", lb.token},
+            { "bias",  lb.bias  },
+            { "token", lb.token },
         });
     }
     return data;
@@ -34,41 +34,44 @@ json task_params::to_json(bool only_metrics) const {
 
     json lora = json::array();
     for (auto & it : this->lora) {
-        lora.push_back({{"id", it.first}, {"scale", it.second}});
+        lora.push_back({
+            { "id",    it.first  },
+            { "scale", it.second }
+        });
     }
 
     if (only_metrics) {
-        return json {
-            {"seed",                      sampling.seed},
-            {"temperature",               sampling.temp},
-            {"dynatemp_range",            sampling.dynatemp_range},
-            {"dynatemp_exponent",         sampling.dynatemp_exponent},
-            {"top_k",                     sampling.top_k},
-            {"top_p",                     sampling.top_p},
-            {"min_p",                     sampling.min_p},
-            {"top_n_sigma",               sampling.top_n_sigma},
-            {"xtc_probability",           sampling.xtc_probability},
-            {"xtc_threshold",             sampling.xtc_threshold},
-            {"typical_p",                 sampling.typ_p},
-            {"repeat_last_n",             sampling.penalty_last_n},
-            {"repeat_penalty",            sampling.penalty_repeat},
-            {"presence_penalty",          sampling.penalty_present},
-            {"frequency_penalty",         sampling.penalty_freq},
-            {"dry_multiplier",            sampling.dry_multiplier},
-            {"dry_base",                  sampling.dry_base},
-            {"dry_allowed_length",        sampling.dry_allowed_length},
-            {"dry_penalty_last_n",        sampling.dry_penalty_last_n},
-            {"mirostat",                  sampling.mirostat},
-            {"mirostat_tau",              sampling.mirostat_tau},
-            {"mirostat_eta",              sampling.mirostat_eta},
-            {"max_tokens",                n_predict},
-            {"n_predict",                 n_predict}, // TODO: deduplicate?
-            {"n_keep",                    n_keep},
-            {"n_discard",                 n_discard},
-            {"ignore_eos",                sampling.ignore_eos},
-            {"stream",                    stream},
-            {"n_probs",                   sampling.n_probs},
-            {"min_keep",                  sampling.min_keep},
+        return json{
+            { "seed",                 sampling.seed                                                        },
+            { "temperature",          sampling.temp                                                        },
+            { "dynatemp_range",       sampling.dynatemp_range                                              },
+            { "dynatemp_exponent",    sampling.dynatemp_exponent                                           },
+            { "top_k",                sampling.top_k                                                       },
+            { "top_p",                sampling.top_p                                                       },
+            { "min_p",                sampling.min_p                                                       },
+            { "top_n_sigma",          sampling.top_n_sigma                                                 },
+            { "xtc_probability",      sampling.xtc_probability                                             },
+            { "xtc_threshold",        sampling.xtc_threshold                                               },
+            { "typical_p",            sampling.typ_p                                                       },
+            { "repeat_last_n",        sampling.penalty_last_n                                              },
+            { "repeat_penalty",       sampling.penalty_repeat                                              },
+            { "presence_penalty",     sampling.penalty_present                                             },
+            { "frequency_penalty",    sampling.penalty_freq                                                },
+            { "dry_multiplier",       sampling.dry_multiplier                                              },
+            { "dry_base",             sampling.dry_base                                                    },
+            { "dry_allowed_length",   sampling.dry_allowed_length                                          },
+            { "dry_penalty_last_n",   sampling.dry_penalty_last_n                                          },
+            { "mirostat",             sampling.mirostat                                                    },
+            { "mirostat_tau",         sampling.mirostat_tau                                                },
+            { "mirostat_eta",         sampling.mirostat_eta                                                },
+            { "max_tokens",           n_predict                                                            },
+            { "n_predict",            n_predict                                                            }, // TODO: deduplicate?
+            { "n_keep",               n_keep                                                               },
+            { "n_discard",            n_discard                                                            },
+            { "ignore_eos",           sampling.ignore_eos                                                  },
+            { "stream",               stream                                                               },
+            { "n_probs",              sampling.n_probs                                                     },
+            { "min_keep",             sampling.min_keep                                                    },
             {"chat_format",               common_chat_format_name(chat_parser_params.format)},
             {"reasoning_format",          common_reasoning_format_name(chat_parser_params.reasoning_format)},
             {"reasoning_in_content",      chat_parser_params.reasoning_in_content},
@@ -95,44 +98,44 @@ json task_params::to_json(bool only_metrics) const {
         grammar_triggers.push_back(ct.to_json());
     }
 
-    return json {
-        {"seed",                      sampling.seed},
-        {"temperature",               sampling.temp},
-        {"dynatemp_range",            sampling.dynatemp_range},
-        {"dynatemp_exponent",         sampling.dynatemp_exponent},
-        {"top_k",                     sampling.top_k},
-        {"top_p",                     sampling.top_p},
-        {"min_p",                     sampling.min_p},
-        {"top_n_sigma",               sampling.top_n_sigma},
-        {"xtc_probability",           sampling.xtc_probability},
-        {"xtc_threshold",             sampling.xtc_threshold},
-        {"typical_p",                 sampling.typ_p},
-        {"repeat_last_n",             sampling.penalty_last_n},
-        {"repeat_penalty",            sampling.penalty_repeat},
-        {"presence_penalty",          sampling.penalty_present},
-        {"frequency_penalty",         sampling.penalty_freq},
-        {"dry_multiplier",            sampling.dry_multiplier},
-        {"dry_base",                  sampling.dry_base},
-        {"dry_allowed_length",        sampling.dry_allowed_length},
-        {"dry_penalty_last_n",        sampling.dry_penalty_last_n},
-        {"dry_sequence_breakers",     sampling.dry_sequence_breakers},
-        {"mirostat",                  sampling.mirostat},
-        {"mirostat_tau",              sampling.mirostat_tau},
-        {"mirostat_eta",              sampling.mirostat_eta},
-        {"stop",                      antiprompt},
-        {"max_tokens",                n_predict},
-        {"n_predict",                 n_predict}, // TODO: deduplicate?
-        {"n_keep",                    n_keep},
-        {"n_discard",                 n_discard},
-        {"ignore_eos",                sampling.ignore_eos},
-        {"stream",                    stream},
-        {"logit_bias",                format_logit_bias(sampling.logit_bias)},
-        {"n_probs",                   sampling.n_probs},
-        {"min_keep",                  sampling.min_keep},
-        {"grammar",                   sampling.grammar},
-        {"grammar_lazy",              sampling.grammar_lazy},
-        {"grammar_triggers",          grammar_triggers},
-        {"preserved_tokens",          sampling.preserved_tokens},
+    return json{
+        { "seed",                  sampling.seed                                                        },
+        { "temperature",           sampling.temp                                                        },
+        { "dynatemp_range",        sampling.dynatemp_range                                              },
+        { "dynatemp_exponent",     sampling.dynatemp_exponent                                           },
+        { "top_k",                 sampling.top_k                                                       },
+        { "top_p",                 sampling.top_p                                                       },
+        { "min_p",                 sampling.min_p                                                       },
+        { "top_n_sigma",           sampling.top_n_sigma                                                 },
+        { "xtc_probability",       sampling.xtc_probability                                             },
+        { "xtc_threshold",         sampling.xtc_threshold                                               },
+        { "typical_p",             sampling.typ_p                                                       },
+        { "repeat_last_n",         sampling.penalty_last_n                                              },
+        { "repeat_penalty",        sampling.penalty_repeat                                              },
+        { "presence_penalty",      sampling.penalty_present                                             },
+        { "frequency_penalty",     sampling.penalty_freq                                                },
+        { "dry_multiplier",        sampling.dry_multiplier                                              },
+        { "dry_base",              sampling.dry_base                                                    },
+        { "dry_allowed_length",    sampling.dry_allowed_length                                          },
+        { "dry_penalty_last_n",    sampling.dry_penalty_last_n                                          },
+        { "dry_sequence_breakers", sampling.dry_sequence_breakers                                       },
+        { "mirostat",              sampling.mirostat                                                    },
+        { "mirostat_tau",          sampling.mirostat_tau                                                },
+        { "mirostat_eta",          sampling.mirostat_eta                                                },
+        { "stop",                  antiprompt                                                           },
+        { "max_tokens",            n_predict                                                            },
+        { "n_predict",             n_predict                                                            }, // TODO: deduplicate?
+        { "n_keep",                n_keep                                                               },
+        { "n_discard",             n_discard                                                            },
+        { "ignore_eos",            sampling.ignore_eos                                                  },
+        { "stream",                stream                                                               },
+        { "logit_bias",            format_logit_bias(sampling.logit_bias)                               },
+        { "n_probs",               sampling.n_probs                                                     },
+        { "min_keep",              sampling.min_keep                                                    },
+        { "grammar",               sampling.grammar                                                     },
+        { "grammar_lazy",          sampling.grammar_lazy                                                },
+        { "grammar_triggers",      grammar_triggers                                                     },
+        { "preserved_tokens",      sampling.preserved_tokens                                            },
         {"chat_format",               common_chat_format_name(chat_parser_params.format)},
         {"reasoning_format",          common_reasoning_format_name(chat_parser_params.reasoning_format)},
         {"reasoning_in_content",      chat_parser_params.reasoning_in_content},
@@ -156,21 +159,75 @@ json task_params::to_json(bool only_metrics) const {
 //
 // task_result_state
 //
-common_chat_msg task_result_state::update_chat_msg(
-        const std::string & text_added,
-        bool is_partial,
-        std::vector<common_chat_msg_diff> & diffs) {
+common_chat_msg task_result_state::update_chat_msg(const std::string &                 text_added,
+                                                   bool                                is_partial,
+                                                   std::vector<common_chat_msg_diff> & diffs,
+                                                   bool                                filter_tool_calls) {
     generated_text += text_added;
     auto msg_prv_copy = chat_msg;
     SRV_DBG("Parsing chat message: %s\n", generated_text.c_str());
-    auto new_msg = common_chat_parse(
-        generated_text,
-        is_partial,
-        chat_parser_params);
+    auto new_msg = common_chat_parse(generated_text, is_partial, chat_parser_params);
     if (!new_msg.empty()) {
         new_msg.set_tool_call_ids(generated_tool_call_ids, gen_tool_call_id);
-        chat_msg = new_msg;
-        diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, new_msg.empty() ? msg_prv_copy : new_msg);
+        chat_msg       = new_msg;
+        auto all_diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, chat_msg);
+
+        if (!filter_tool_calls) {
+            diffs = std::move(all_diffs);
+        } else {
+            for (auto & d : all_diffs) {
+                // If this is a new type of delta, flush all currently pending tool call names
+                for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
+                    if (sent_tool_call_names.count(i) || chat_msg.tool_calls[i].name.empty()) {
+                        continue;
+                    }
+                    if (d.tool_call_index != i || !d.tool_call_delta.arguments.empty()) {
+                        common_chat_msg_diff header;
+                        header.tool_call_index      = i;
+                        header.tool_call_delta.id   = chat_msg.tool_calls[i].id;
+                        header.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                        diffs.push_back(std::move(header));
+                        sent_tool_call_names.insert(i);
+                    }
+                }
+
+                if (d.tool_call_index == std::string::npos) {
+                    diffs.push_back(std::move(d));
+                } else {
+                    size_t i = d.tool_call_index;
+                    if (sent_tool_call_names.count(i)) {
+                        if (!d.tool_call_delta.arguments.empty()) {
+                            d.tool_call_delta.name = "";
+                            d.tool_call_delta.id   = "";
+                            diffs.push_back(std::move(d));
+                        }
+                    } else {
+                        // Not sent yet.
+                        if (!d.tool_call_delta.arguments.empty() || !is_partial) {
+                            d.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                            d.tool_call_delta.id   = chat_msg.tool_calls[i].id;
+                            diffs.push_back(std::move(d));
+                            sent_tool_call_names.insert(i);
+                        } else {
+                            // Suppress
+                        }
+                    }
+                }
+            }
+            // Final check at EOF
+            if (!is_partial) {
+                for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
+                    if (!sent_tool_call_names.count(i) && !chat_msg.tool_calls[i].name.empty()) {
+                        common_chat_msg_diff header;
+                        header.tool_call_index      = i;
+                        header.tool_call_delta.id   = chat_msg.tool_calls[i].id;
+                        header.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                        diffs.push_back(std::move(header));
+                        sent_tool_call_names.insert(i);
+                    }
+                }
+            }
+        }
     }
     return chat_msg;
 }
@@ -179,11 +236,10 @@ common_chat_msg task_result_state::update_chat_msg(
 // server_task
 //
 
-task_params server_task::params_from_json_cmpl(
-        const llama_vocab * vocab,
-        const common_params & params_base,
-        const int n_ctx_slot,
-        const json & data) {
+task_params server_task::params_from_json_cmpl(const llama_vocab *   vocab,
+                                               const common_params & params_base,
+                                               const int             n_ctx_slot,
+                                               const json &          data) {
     task_params params;
 
     // Sampling parameter defaults are loaded from the global server context (but individual requests can still them)
@@ -213,8 +269,8 @@ task_params server_task::params_from_json_cmpl(
     params.n_cmpl           = json_value(data,       "n_cmpl",             json_value(data, "n", 1));
     params.n_cache_reuse    = json_value(data,       "n_cache_reuse",      defaults.n_cache_reuse);
     //params.t_max_prompt_ms  = json_value(data,       "t_max_prompt_ms",    defaults.t_max_prompt_ms); // TODO: implement
-    params.t_max_predict_ms = json_value(data,       "t_max_predict_ms",   defaults.t_max_predict_ms);
-    params.response_fields  = json_value(data,       "response_fields",    std::vector<std::string>());
+    params.t_max_predict_ms = json_value(data, "t_max_predict_ms", defaults.t_max_predict_ms);
+    params.response_fields  = json_value(data, "response_fields", std::vector<std::string>());
 
     params.sampling.top_k              = json_value(data, "top_k",               defaults.sampling.top_k);
     params.sampling.top_p              = json_value(data, "top_p",               defaults.sampling.top_p);
@@ -266,7 +322,7 @@ task_params server_task::params_from_json_cmpl(
     params.speculative.ngram_min_hits   = std::max(std::min(1, (int) params.speculative.ngram_min_hits),   1024);
 
     // Use OpenAI API logprobs only if n_probs wasn't provided
-    if (data.contains("logprobs") && params.sampling.n_probs == defaults.sampling.n_probs){
+    if (data.contains("logprobs") && params.sampling.n_probs == defaults.sampling.n_probs) {
         params.sampling.n_probs = json_value(data, "logprobs", defaults.sampling.n_probs);
     }
 
@@ -309,7 +365,8 @@ task_params server_task::params_from_json_cmpl(
         // Ref: https://github.com/oobabooga/text-generation-webui/blob/d1af7a41ade7bd3c3a463bfa640725edb818ebaf/extensions/openai/typing.py#L39
 
         if (data.contains("dry_sequence_breakers")) {
-            params.sampling.dry_sequence_breakers = json_value(data, "dry_sequence_breakers", std::vector<std::string>());
+            params.sampling.dry_sequence_breakers =
+                json_value(data, "dry_sequence_breakers", std::vector<std::string>());
             if (params.sampling.dry_sequence_breakers.empty()) {
                 throw std::runtime_error("Error: dry_sequence_breakers must be a non-empty array of strings");
             }
@@ -319,15 +376,15 @@ task_params server_task::params_from_json_cmpl(
     // process "json_schema" and "grammar"
     if (data.contains("json_schema") && !data.contains("grammar")) {
         try {
-            auto schema                  = json_value(data, "json_schema", json::object());
+            auto schema = json_value(data, "json_schema", json::object());
             SRV_DBG("JSON schema: %s\n", schema.dump(2).c_str());
-            params.sampling.grammar      = json_schema_to_grammar(schema);
+            params.sampling.grammar = json_schema_to_grammar(schema);
             SRV_DBG("Converted grammar: %s\n", params.sampling.grammar.c_str());
         } catch (const std::exception & e) {
             throw std::runtime_error(std::string("\"json_schema\": ") + e.what());
         }
     } else {
-        params.sampling.grammar      = json_value(data, "grammar", defaults.sampling.grammar);
+        params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar);
         SRV_DBG("Grammar: %s\n", params.sampling.grammar.c_str());
         params.sampling.grammar_lazy = json_value(data, "grammar_lazy", defaults.sampling.grammar_lazy);
         SRV_DBG("Grammar lazy: %s\n", params.sampling.grammar_lazy ? "true" : "false");
@@ -346,9 +403,10 @@ task_params server_task::params_from_json_cmpl(
             reasoning_format = common_reasoning_format_from_name(data.at("reasoning_format").get<std::string>());
         }
         params.chat_parser_params.reasoning_format = reasoning_format;
-        params.chat_parser_params.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
+        params.chat_parser_params.reasoning_in_content =
+            params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
         params.chat_parser_params.thinking_forced_open = json_value(data, "thinking_forced_open", false);
-        params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false);
+        params.chat_parser_params.parse_tool_calls     = json_value(data, "parse_tool_calls", false);
         if (data.contains("chat_parser")) {
             params.chat_parser_params.parser.load(data.at("chat_parser").get<std::string>());
         }
@@ -358,7 +416,8 @@ task_params server_task::params_from_json_cmpl(
         const auto preserved_tokens = data.find("preserved_tokens");
         if (preserved_tokens != data.end()) {
             for (const auto & t : *preserved_tokens) {
-                auto ids = common_tokenize(vocab, t.get<std::string>(), /* add_special= */ false, /* parse_special= */ true);
+                auto ids =
+                    common_tokenize(vocab, t.get<std::string>(), /* add_special= */ false, /* parse_special= */ true);
                 if (ids.size() == 1) {
                     SRV_DBG("Preserved token: %d\n", ids[0]);
                     params.sampling.preserved_tokens.insert(ids[0]);
@@ -377,18 +436,20 @@ task_params server_task::params_from_json_cmpl(
                     auto ids = common_tokenize(vocab, word, /* add_special= */ false, /* parse_special= */ true);
                     if (ids.size() == 1) {
                         auto token = ids[0];
-                        if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) {
-                            throw std::runtime_error("Grammar trigger word should be marked as preserved token: " + word);
+                        if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(),
+                                      (llama_token) token) == params.sampling.preserved_tokens.end()) {
+                            throw std::runtime_error("Grammar trigger word should be marked as preserved token: " +
+                                                     word);
                         }
                         SRV_DBG("Grammar trigger token: %d (`%s`)\n", token, word.c_str());
                         common_grammar_trigger trigger;
-                        trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
+                        trigger.type  = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
                         trigger.value = word;
                         trigger.token = token;
                         params.sampling.grammar_triggers.push_back(std::move(trigger));
                     } else {
                         SRV_DBG("Grammar trigger word: `%s`\n", word.c_str());
-                        params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
+                        params.sampling.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word });
                     }
                 } else {
                     if (ct.value.type == COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN) {
@@ -428,12 +489,12 @@ task_params server_task::params_from_json_cmpl(
                     if (el[0].is_number_integer()) {
                         llama_token tok = el[0].get<llama_token>();
                         if (tok >= 0 && tok < n_vocab) {
-                            params.sampling.logit_bias.push_back({tok, bias});
+                            params.sampling.logit_bias.push_back({ tok, bias });
                         }
                     } else if (el[0].is_string()) {
                         auto toks = common_tokenize(vocab, el[0].get<std::string>(), false);
                         for (auto tok : toks) {
-                            params.sampling.logit_bias.push_back({tok, bias});
+                            params.sampling.logit_bias.push_back({ tok, bias });
                         }
                     }
                 }
@@ -441,8 +502,8 @@ task_params server_task::params_from_json_cmpl(
         } else if (logit_bias != data.end() && logit_bias->is_object()) {
             const int n_vocab = llama_vocab_n_tokens(vocab);
             for (const auto & el : logit_bias->items()) {
-                float bias;
-                const auto & key = el.key();
+                float        bias;
+                const auto & key   = el.key();
                 const auto & value = el.value();
                 if (value.is_number()) {
                     bias = value.get<float>();
@@ -452,16 +513,16 @@ task_params server_task::params_from_json_cmpl(
                     continue;
                 }
 
-                char *end;
+                char *      end;
                 llama_token tok = strtol(key.c_str(), &end, 10);
                 if (*end == 0) {
                     if (tok >= 0 && tok < n_vocab) {
-                        params.sampling.logit_bias.push_back({tok, bias});
+                        params.sampling.logit_bias.push_back({ tok, bias });
                     }
                 } else {
                     auto toks = common_tokenize(vocab, key, false);
                     for (auto tok : toks) {
-                        params.sampling.logit_bias.push_back({tok, bias});
+                        params.sampling.logit_bias.push_back({ tok, bias });
                     }
                 }
             }
@@ -469,9 +530,9 @@ task_params server_task::params_from_json_cmpl(
 
         params.sampling.ignore_eos = json_value(data, "ignore_eos", params_base.sampling.ignore_eos);
         if (params.sampling.ignore_eos) {
-            params.sampling.logit_bias.insert(
-                    params.sampling.logit_bias.end(),
-                    defaults.sampling.logit_bias_eog.begin(), defaults.sampling.logit_bias_eog.end());
+            params.sampling.logit_bias.insert(params.sampling.logit_bias.end(),
+                                              defaults.sampling.logit_bias_eog.begin(),
+                                              defaults.sampling.logit_bias_eog.end());
         }
     }
 
@@ -497,7 +558,7 @@ task_params server_task::params_from_json_cmpl(
         if (samplers != data.end()) {
             if (samplers->is_array()) {
                 params.sampling.samplers = common_sampler_types_from_names(*samplers, false);
-            } else if (samplers->is_string()){
+            } else if (samplers->is_string()) {
                 params.sampling.samplers = common_sampler_types_from_chars(samplers->get<std::string>());
             }
         } else {
@@ -518,21 +579,21 @@ task_params server_task::params_from_json_cmpl(
 
 json result_timings::to_json() const {
     json base = {
-        {"cache_n",                cache_n},
+        { "cache_n",                cache_n                },
 
-        {"prompt_n",               prompt_n},
-        {"prompt_ms",              prompt_ms},
-        {"prompt_per_token_ms",    prompt_per_token_ms},
-        {"prompt_per_second",      prompt_per_second},
+        { "prompt_n",               prompt_n               },
+        { "prompt_ms",              prompt_ms              },
+        { "prompt_per_token_ms",    prompt_per_token_ms    },
+        { "prompt_per_second",      prompt_per_second      },
 
-        {"predicted_n",            predicted_n},
-        {"predicted_ms",           predicted_ms},
-        {"predicted_per_token_ms", predicted_per_token_ms},
-        {"predicted_per_second",   predicted_per_second},
+        { "predicted_n",            predicted_n            },
+        { "predicted_ms",           predicted_ms           },
+        { "predicted_per_token_ms", predicted_per_token_ms },
+        { "predicted_per_second",   predicted_per_second   },
     };
 
     if (draft_n > 0) {
-        base["draft_n"] = draft_n;
+        base["draft_n"]          = draft_n;
         base["draft_n_accepted"] = draft_n_accepted;
     }
 
@@ -543,20 +604,24 @@ json result_timings::to_json() const {
 // result_prompt_progress
 //
 json result_prompt_progress::to_json() const {
-    return json {
-        {"total",     total},
-        {"cache",     cache},
-        {"processed", processed},
-        {"time_ms",   time_ms},
+    return json{
+        { "total",     total     },
+        { "cache",     cache     },
+        { "processed", processed },
+        { "time_ms",   time_ms   },
     };
 }
 
 static inline std::string stop_type_to_str(stop_type type) {
     switch (type) {
-        case STOP_TYPE_EOS:   return "eos";
-        case STOP_TYPE_WORD:  return "word";
-        case STOP_TYPE_LIMIT: return "limit";
-        default:              return "none";
+        case STOP_TYPE_EOS:
+            return "eos";
+        case STOP_TYPE_WORD:
+            return "word";
+        case STOP_TYPE_LIMIT:
+            return "limit";
+        default:
+            return "none";
     }
 }
 
@@ -569,36 +634,28 @@ json completion_token_output::to_json(bool post_sampling_probs) const {
     for (const auto & p : probs) {
         std::string txt(p.txt);
         txt.resize(validate_utf8(txt));
-        probs_for_token.push_back(json {
-            {"id",      p.tok},
-            {"token",   txt},
-            {"bytes",   str_to_bytes(p.txt)},
-            {
-                post_sampling_probs ? "prob" : "logprob",
-                post_sampling_probs ? p.prob : logarithm(p.prob)
-            },
+        probs_for_token.push_back(json{
+            { "id",                                     p.tok                                            },
+            { "token",                                  txt                                              },
+            { "bytes",                                  str_to_bytes(p.txt)                              },
+            { post_sampling_probs ? "prob" : "logprob", post_sampling_probs ? p.prob : logarithm(p.prob) },
         });
     }
     return probs_for_token;
 }
 
-json completion_token_output::probs_vector_to_json(const std::vector<completion_token_output> & probs, bool post_sampling_probs) {
+json completion_token_output::probs_vector_to_json(const std::vector<completion_token_output> & probs,
+                                                   bool                                         post_sampling_probs) {
     json out = json::array();
     for (const auto & p : probs) {
         std::string txt(p.text_to_send);
         txt.resize(validate_utf8(txt));
-        out.push_back(json {
-            {"id",           p.tok},
-            {"token",        txt},
-            {"bytes",        str_to_bytes(p.text_to_send)},
-            {
-                post_sampling_probs ? "prob" : "logprob",
-                post_sampling_probs ? p.prob : logarithm(p.prob)
-            },
-            {
-                post_sampling_probs ? "top_probs" : "top_logprobs",
-                p.to_json(post_sampling_probs)
-            },
+        out.push_back(json{
+            { "id",                                               p.tok                                            },
+            { "token",                                            txt                                              },
+            { "bytes",                                            str_to_bytes(p.text_to_send)                     },
+            { post_sampling_probs ? "prob" : "logprob",           post_sampling_probs ? p.prob : logarithm(p.prob) },
+            { post_sampling_probs ? "top_probs" : "top_logprobs", p.to_json(post_sampling_probs)                   },
         });
     }
     return out;
@@ -639,61 +696,58 @@ json server_task_result_cmpl_final::to_json() {
 }
 
 json server_task_result_cmpl_final::to_json_non_oaicompat() {
-    json res = json {
-        {"index",               index},
-        {"content",             content},
-        {"tokens",              tokens},
-        {"id_slot",             id_slot},
-        {"stop",                true},
-        {"model",               oaicompat_model},
-        {"tokens_predicted",    n_decoded},
-        {"tokens_evaluated",    n_prompt_tokens},
-        {"generation_settings", generation_params.to_json()},
-        {"prompt",              prompt},
-        {"has_new_line",        has_new_line},
-        {"truncated",           truncated},
-        {"stop_type",           stop_type_to_str(stop)},
-        {"stopping_word",       stopping_word},
-        {"tokens_cached",       n_tokens_cached},
-        {"timings",             timings.to_json()},
+    json res = json{
+        { "index",               index                       },
+        { "content",             content                     },
+        { "tokens",              tokens                      },
+        { "id_slot",             id_slot                     },
+        { "stop",                true                        },
+        { "model",               oaicompat_model             },
+        { "tokens_predicted",    n_decoded                   },
+        { "tokens_evaluated",    n_prompt_tokens             },
+        { "generation_settings", generation_params.to_json() },
+        { "prompt",              prompt                      },
+        { "has_new_line",        has_new_line                },
+        { "truncated",           truncated                   },
+        { "stop_type",           stop_type_to_str(stop)      },
+        { "stopping_word",       stopping_word               },
+        { "tokens_cached",       n_tokens_cached             },
+        { "timings",             timings.to_json()           },
     };
     if (!stream && !probs_output.empty()) {
-        res["completion_probabilities"] = completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs);
+        res["completion_probabilities"] =
+            completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs);
     }
     return response_fields.empty() ? res : json_get_nested_values(response_fields, res);
 }
 
 json server_task_result_cmpl_final::to_json_oaicompat() {
-    std::time_t t = std::time(0);
-    json logprobs = json(nullptr); // OAI default to null
+    std::time_t t        = std::time(0);
+    json        logprobs = json(nullptr);  // OAI default to null
     if (!stream && probs_output.size() > 0) {
         logprobs = json{
-            {"content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs)},
+            { "content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs) },
         };
     }
     json finish_reason = "length";
     if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
         finish_reason = "stop";
     }
-    json res = json {
-        {"choices",            json::array({
-            json{
-                {"text",          content},
-                {"index",         index},
-                {"logprobs",      logprobs},
-                {"finish_reason", finish_reason},
-            }
-        })},
-        {"created",            t},
-        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
-        {"object",             "text_completion"},
-        {"usage", json {
-            {"completion_tokens", n_decoded},
-            {"prompt_tokens",     n_prompt_tokens},
-            {"total_tokens",      n_decoded + n_prompt_tokens}
-        }},
-        {"id", oaicompat_cmpl_id}
+    json res = json{
+        { "choices",            json::array({ json{
+                         { "text", content },
+                         { "index", index },
+                         { "logprobs", logprobs },
+                         { "finish_reason", finish_reason },
+                     } })                                                },
+        { "created",            t                                                            },
+        { "model",              oaicompat_model                                              },
+        { "system_fingerprint", build_info                                                   },
+        { "object",             "text_completion"                                            },
+        { "usage",              json{ { "completion_tokens", n_decoded },
+                         { "prompt_tokens", n_prompt_tokens },
+                         { "total_tokens", n_decoded + n_prompt_tokens } } },
+        { "id",                 oaicompat_cmpl_id                                            }
     };
 
     // extra fields for debugging purposes
@@ -701,19 +755,19 @@ json server_task_result_cmpl_final::to_json_oaicompat() {
         res["__verbose"] = to_json_non_oaicompat();
     }
     if (timings.prompt_n >= 0) {
-        res.push_back({"timings", timings.to_json()});
+        res.push_back({ "timings", timings.to_json() });
     }
 
     return res;
 }
 
 json server_task_result_cmpl_final::to_json_oaicompat_chat() {
-    std::string finish_reason = "length";
+    std::string     finish_reason = "length";
     common_chat_msg msg;
     if (!oaicompat_msg.empty()) {
         msg = oaicompat_msg;
     } else {
-        msg.role = "assistant";
+        msg.role    = "assistant";
         msg.content = content;
     }
     if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
@@ -728,24 +782,22 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() {
 
     if (!stream && probs_output.size() > 0) {
         choice["logprobs"] = json{
-            {"content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs)},
+            { "content", completion_token_output::probs_vector_to_json(probs_output, post_sampling_probs) },
         };
     }
 
     std::time_t t = std::time(0);
 
-    json res = json {
-        {"choices",            json::array({choice})},
-        {"created",            t},
-        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
-        {"object",             "chat.completion"},
-        {"usage", json {
-            {"completion_tokens", n_decoded},
-            {"prompt_tokens",     n_prompt_tokens},
-            {"total_tokens",      n_decoded + n_prompt_tokens}
-        }},
-        {"id", oaicompat_cmpl_id}
+    json res = json{
+        { "choices",            json::array({ choice })                                      },
+        { "created",            t                                                            },
+        { "model",              oaicompat_model                                              },
+        { "system_fingerprint", build_info                                                   },
+        { "object",             "chat.completion"                                            },
+        { "usage",              json{ { "completion_tokens", n_decoded },
+                         { "prompt_tokens", n_prompt_tokens },
+                         { "total_tokens", n_decoded + n_prompt_tokens } } },
+        { "id",                 oaicompat_cmpl_id                                            }
     };
 
     // extra fields for debugging purposes
@@ -753,14 +805,14 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() {
         res["__verbose"] = to_json_non_oaicompat();
     }
     if (timings.prompt_n >= 0) {
-        res.push_back({"timings", timings.to_json()});
+        res.push_back({ "timings", timings.to_json() });
     }
 
     return res;
 }
 
 json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
-    std::time_t t = std::time(0);
+    std::time_t t             = std::time(0);
     std::string finish_reason = "length";
     if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
         finish_reason = oaicompat_msg.tool_calls.empty() ? "stop" : "tool_calls";
@@ -785,40 +837,41 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
     }
 
     deltas.push_back({
-        {"choices", json::array({
-            json {
-                {"finish_reason", finish_reason},
-                {"index", 0},
-                {"delta", json::object()},
-            },
-        })},
-        {"created",            t},
-        {"id",                 oaicompat_cmpl_id},
-        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
-        {"object",             "chat.completion.chunk"},
+        { "choices",            json::array({
+                         json{
+                             { "finish_reason", finish_reason },
+                             { "index", 0 },
+                             { "delta", json::object() },
+                         },
+                     })             },
+        { "created",            t                       },
+        { "id",                 oaicompat_cmpl_id       },
+        { "model",              oaicompat_model         },
+        { "system_fingerprint", build_info              },
+        { "object",             "chat.completion.chunk" },
     });
 
     if (include_usage) {
         // OpenAI API spec for chat.completion.chunks specifies an empty `choices` array for the last chunk when including usage
         // https://platform.openai.com/docs/api-reference/chat_streaming/streaming#chat_streaming/streaming-choices
         deltas.push_back({
-            {"choices", json::array()},
-            {"created",            t},
-            {"id",                 oaicompat_cmpl_id},
-            {"model",              oaicompat_model},
-            {"system_fingerprint", build_info},
-            {"object",             "chat.completion.chunk"},
-            {"usage", json {
-                {"completion_tokens", n_decoded},
-                {"prompt_tokens",     n_prompt_tokens},
-                {"total_tokens",      n_decoded + n_prompt_tokens},
-            }},
+            { "choices",            json::array()           },
+            { "created",            t                       },
+            { "id",                 oaicompat_cmpl_id       },
+            { "model",              oaicompat_model         },
+            { "system_fingerprint", build_info              },
+            { "object",             "chat.completion.chunk" },
+            { "usage",
+             json{
+                  { "completion_tokens", n_decoded },
+                  { "prompt_tokens", n_prompt_tokens },
+                  { "total_tokens", n_decoded + n_prompt_tokens },
+              }                                             },
         });
     }
 
     if (timings.prompt_n >= 0) {
-        deltas.back().push_back({"timings", timings.to_json()});
+        deltas.back().push_back({ "timings", timings.to_json() });
     }
 
     // extra fields for debugging purposes
@@ -1021,7 +1074,7 @@ json server_task_result_cmpl_final::to_json_anthropic() {
     if (!oaicompat_msg.empty()) {
         msg = oaicompat_msg;
     } else {
-        msg.role = "assistant";
+        msg.role    = "assistant";
         msg.content = content;
     }
 
@@ -1036,16 +1089,16 @@ json server_task_result_cmpl_final::to_json_anthropic() {
 
     if (!msg.content.empty()) {
         content_blocks.push_back({
-            {"type", "text"},
-            {"text", msg.content}
+            { "type", "text"      },
+            { "text", msg.content }
         });
     }
 
     for (const auto & tool_call : msg.tool_calls) {
         json tool_use_block = {
-            {"type", "tool_use"},
-            {"id", tool_call.id},
-            {"name", tool_call.name}
+            { "type", "tool_use"     },
+            { "id",   tool_call.id   },
+            { "name", tool_call.name }
         };
 
         try {
@@ -1058,17 +1111,14 @@ json server_task_result_cmpl_final::to_json_anthropic() {
     }
 
     json res = {
-        {"id", oaicompat_cmpl_id},
-        {"type", "message"},
-        {"role", "assistant"},
-        {"content", content_blocks},
-        {"model", oaicompat_model},
-        {"stop_reason", stop_reason},
-        {"stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)},
-        {"usage", {
-            {"input_tokens", n_prompt_tokens},
-            {"output_tokens", n_decoded}
-        }}
+        { "id",            oaicompat_cmpl_id                                                       },
+        { "type",          "message"                                                               },
+        { "role",          "assistant"                                                             },
+        { "content",       content_blocks                                                          },
+        { "model",         oaicompat_model                                                         },
+        { "stop_reason",   stop_reason                                                             },
+        { "stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)                   },
+        { "usage",         { { "input_tokens", n_prompt_tokens }, { "output_tokens", n_decoded } } }
     };
 
     return res;
@@ -1163,31 +1213,27 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() {
                 const auto & full_tool_call = oaicompat_msg.tool_calls[diff.tool_call_index];
 
                 events.push_back({
-                    {"event", "content_block_start"},
-                    {"data", {
-                        {"type", "content_block_start"},
-                        {"index", content_block_index},
-                        {"content_block", {
-                            {"type", "tool_use"},
-                            {"id", full_tool_call.id},
-                            {"name", full_tool_call.name}
-                        }}
-                    }}
+                    { "event", "content_block_start"              },
+                    { "data",
+                     { { "type", "content_block_start" },
+                        { "index", content_block_index },
+                        { "content_block",
+                          { { "type", "tool_use" },
+                            { "id", full_tool_call.id },
+                            { "name", full_tool_call.name } } } } }
                 });
                 tool_calls_started.insert(diff.tool_call_index);
             }
 
             if (!diff.tool_call_delta.arguments.empty()) {
                 events.push_back({
-                    {"event", "content_block_delta"},
-                    {"data", {
-                        {"type", "content_block_delta"},
-                        {"index", content_block_index},
-                        {"delta", {
-                            {"type", "input_json_delta"},
-                            {"partial_json", diff.tool_call_delta.arguments}
-                        }}
-                    }}
+                    { "event", "content_block_delta"                                 },
+                    { "data",
+                     { { "type", "content_block_delta" },
+                        { "index", content_block_index },
+                        { "delta",
+                          { { "type", "input_json_delta" },
+                            { "partial_json", diff.tool_call_delta.arguments } } } } }
                 });
             }
         }
@@ -1230,33 +1276,24 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() {
     for (size_t i = 0; i < num_tool_calls; i++) {
         size_t content_block_index = (has_thinking ? 1 : 0) + (has_text ? 1 : 0) + i;
         events.push_back({
-            {"event", "content_block_stop"},
-            {"data", {
-                {"type", "content_block_stop"},
-                {"index", content_block_index}
-            }}
+            { "event", "content_block_stop"                                                   },
+            { "data",  { { "type", "content_block_stop" }, { "index", content_block_index } } }
         });
     }
 
     events.push_back({
-        {"event", "message_delta"},
-        {"data", {
-            {"type", "message_delta"},
-            {"delta", {
-                {"stop_reason", stop_reason},
-                {"stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)}
-            }},
-            {"usage", {
-                {"output_tokens", n_decoded}
-            }}
-        }}
+        { "event", "message_delta"                            },
+        { "data",
+         { { "type", "message_delta" },
+            { "delta",
+              { { "stop_reason", stop_reason },
+                { "stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word) } } },
+            { "usage", { { "output_tokens", n_decoded } } } } }
     });
 
     events.push_back({
-        {"event", "message_stop"},
-        {"data", {
-            {"type", "message_stop"}
-        }}
+        { "event", "message_stop"                 },
+        { "data",  { { "type", "message_stop" } } }
     });
 
     return events;
@@ -1315,50 +1352,49 @@ json server_task_result_cmpl_partial::to_json() {
 
 json server_task_result_cmpl_partial::to_json_non_oaicompat() {
     // non-OAI-compat JSON
-    json res = json {
-        {"index",            index},
-        {"content",          content},
-        {"tokens",           tokens},
-        {"stop",             false},
-        {"id_slot",          id_slot},
-        {"tokens_predicted", n_decoded},
-        {"tokens_evaluated", n_prompt_tokens},
+    json res = json{
+        { "index",            index           },
+        { "content",          content         },
+        { "tokens",           tokens          },
+        { "stop",             false           },
+        { "id_slot",          id_slot         },
+        { "tokens_predicted", n_decoded       },
+        { "tokens_evaluated", n_prompt_tokens },
     };
     // populate the timings object when needed (usually for the last response or with timings_per_token enabled)
     if (timings.prompt_n > 0) {
-        res.push_back({"timings", timings.to_json()});
+        res.push_back({ "timings", timings.to_json() });
     }
     if (is_progress) {
-        res.push_back({"prompt_progress", progress.to_json()});
+        res.push_back({ "prompt_progress", progress.to_json() });
     }
     if (!prob_output.probs.empty()) {
-        res["completion_probabilities"] = completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs);
+        res["completion_probabilities"] =
+            completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs);
     }
     return res;
 }
 
 json server_task_result_cmpl_partial::to_json_oaicompat() {
-    std::time_t t = std::time(0);
-    json logprobs = json(nullptr); // OAI default to null
+    std::time_t t        = std::time(0);
+    json        logprobs = json(nullptr);  // OAI default to null
     if (prob_output.probs.size() > 0) {
         logprobs = json{
-            {"content", completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs)},
+            { "content", completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs) },
         };
     }
-    json res = json {
-        {"choices",            json::array({
-            json{
-                {"text",          content},
-                {"index",         index},
-                {"logprobs",      logprobs},
-                {"finish_reason", nullptr},
-            }
-        })},
-        {"created",            t},
-        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
-        {"object",             "text_completion"},
-        {"id",                 oaicompat_cmpl_id}
+    json res = json{
+        { "choices",            json::array({ json{
+                         { "text", content },
+                         { "index", index },
+                         { "logprobs", logprobs },
+                         { "finish_reason", nullptr },
+                     } })     },
+        { "created",            t                 },
+        { "model",              oaicompat_model   },
+        { "system_fingerprint", build_info        },
+        { "object",             "text_completion" },
+        { "id",                 oaicompat_cmpl_id }
     };
 
     // extra fields for debugging purposes
@@ -1366,42 +1402,42 @@ json server_task_result_cmpl_partial::to_json_oaicompat() {
         res["__verbose"] = to_json_non_oaicompat();
     }
     if (timings.prompt_n >= 0) {
-        res.push_back({"timings", timings.to_json()});
+        res.push_back({ "timings", timings.to_json() });
     }
     if (is_progress) {
-        res.push_back({"prompt_progress", progress.to_json()});
+        res.push_back({ "prompt_progress", progress.to_json() });
     }
 
     return res;
 }
 
 json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
-    bool first = n_decoded == 1;
-    std::time_t t = std::time(0);
-    json choices;
+    bool        first = n_decoded == 1;
+    std::time_t t     = std::time(0);
+    json        choices;
 
     std::vector<json> deltas;
-    auto add_delta = [&](const json & delta) {
+    auto              add_delta = [&](const json & delta) {
         deltas.push_back({
-            {"choices", json::array({
-                json {
-                    {"finish_reason", nullptr},
-                    {"index", index},
-                    {"delta", delta},
-                },
-            })},
-            {"created", t},
-            {"id", oaicompat_cmpl_id},
-            {"model", oaicompat_model},
-            {"system_fingerprint", build_info},
-            {"object", "chat.completion.chunk"},
+            { "choices",            json::array({
+                             json{
+                                              { "finish_reason", nullptr },
+                                              { "index", index },
+                                              { "delta", delta },
+                             },
+                         })         },
+            { "created",            t                       },
+            { "id",                 oaicompat_cmpl_id       },
+            { "model",              oaicompat_model         },
+            { "system_fingerprint", build_info              },
+            { "object",             "chat.completion.chunk" },
         });
     };
     // We have to send an initial update to conform to openai behavior
     if (first || is_progress) {
         add_delta({
-            {"role", "assistant"},
-            {"content", nullptr},
+            { "role",    "assistant" },
+            { "content", nullptr     },
         });
     }
 
@@ -1414,16 +1450,16 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
         GGML_ASSERT(last_json.at("choices").size() >= 1);
 
         if (prob_output.probs.size() > 0) {
-            last_json.at("choices").at(0)["logprobs"] = json {
-                {"content", completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs)},
+            last_json.at("choices").at(0)["logprobs"] = json{
+                { "content", completion_token_output::probs_vector_to_json({ prob_output }, post_sampling_probs) },
             };
         }
 
         if (timings.prompt_n >= 0) {
-            last_json.push_back({"timings", timings.to_json()});
+            last_json.push_back({ "timings", timings.to_json() });
         }
         if (is_progress) {
-            last_json.push_back({"prompt_progress", progress.to_json()});
+            last_json.push_back({ "prompt_progress", progress.to_json() });
         }
     }
 
@@ -1564,23 +1600,18 @@ json server_task_result_cmpl_partial::to_json_anthropic() {
 
     if (first) {
         events.push_back({
-            {"event", "message_start"},
-            {"data", {
-                {"type", "message_start"},
-                {"message", {
-                    {"id", oaicompat_cmpl_id},
-                    {"type", "message"},
-                    {"role", "assistant"},
-                    {"content", json::array()},
-                    {"model", oaicompat_model},
-                    {"stop_reason", nullptr},
-                    {"stop_sequence", nullptr},
-                    {"usage", {
-                        {"input_tokens", n_prompt_tokens},
-                        {"output_tokens", 0}
-                    }}
-                }}
-            }}
+            { "event", "message_start"                                                                 },
+            { "data",
+             { { "type", "message_start" },
+                { "message",
+                  { { "id", oaicompat_cmpl_id },
+                    { "type", "message" },
+                    { "role", "assistant" },
+                    { "content", json::array() },
+                    { "model", oaicompat_model },
+                    { "stop_reason", nullptr },
+                    { "stop_sequence", nullptr },
+                    { "usage", { { "input_tokens", n_prompt_tokens }, { "output_tokens", 0 } } } } } } }
         });
     }
 
@@ -1662,30 +1693,26 @@ json server_task_result_cmpl_partial::to_json_anthropic() {
 
             if (!diff.tool_call_delta.name.empty()) {
                 events.push_back({
-                    {"event", "content_block_start"},
-                    {"data", {
-                        {"type", "content_block_start"},
-                        {"index", content_block_index},
-                        {"content_block", {
-                            {"type", "tool_use"},
-                            {"id", diff.tool_call_delta.id},
-                            {"name", diff.tool_call_delta.name}
-                        }}
-                    }}
+                    { "event", "content_block_start"                    },
+                    { "data",
+                     { { "type", "content_block_start" },
+                        { "index", content_block_index },
+                        { "content_block",
+                          { { "type", "tool_use" },
+                            { "id", diff.tool_call_delta.id },
+                            { "name", diff.tool_call_delta.name } } } } }
                 });
             }
 
             if (!diff.tool_call_delta.arguments.empty()) {
                 events.push_back({
-                    {"event", "content_block_delta"},
-                    {"data", {
-                        {"type", "content_block_delta"},
-                        {"index", content_block_index},
-                        {"delta", {
-                            {"type", "input_json_delta"},
-                            {"partial_json", diff.tool_call_delta.arguments}
-                        }}
-                    }}
+                    { "event", "content_block_delta"                                 },
+                    { "data",
+                     { { "type", "content_block_delta" },
+                        { "index", content_block_index },
+                        { "delta",
+                          { { "type", "input_json_delta" },
+                            { "partial_json", diff.tool_call_delta.arguments } } } } }
                 });
             }
         }
@@ -1745,28 +1772,28 @@ json server_task_result_error::to_json() {
 // server_task_result_metrics
 //
 json server_task_result_metrics::to_json() {
-    return json {
-        { "idle",                            n_idle_slots },
-        { "processing",                      n_processing_slots },
-        { "deferred",                        n_tasks_deferred },
-        { "t_start",                         t_start },
+    return json{
+        { "idle",                            n_idle_slots                    },
+        { "processing",                      n_processing_slots              },
+        { "deferred",                        n_tasks_deferred                },
+        { "t_start",                         t_start                         },
 
         { "n_prompt_tokens_processed_total", n_prompt_tokens_processed_total },
-        { "t_tokens_generation_total",       t_tokens_generation_total },
-        { "n_tokens_predicted_total",        n_tokens_predicted_total },
-        { "t_prompt_processing_total",       t_prompt_processing_total },
+        { "t_tokens_generation_total",       t_tokens_generation_total       },
+        { "n_tokens_predicted_total",        n_tokens_predicted_total        },
+        { "t_prompt_processing_total",       t_prompt_processing_total       },
 
-        { "n_tokens_max",                    n_tokens_max },
+        { "n_tokens_max",                    n_tokens_max                    },
 
-        { "n_prompt_tokens_processed",       n_prompt_tokens_processed },
-        { "t_prompt_processing",             t_prompt_processing },
-        { "n_tokens_predicted",              n_tokens_predicted },
-        { "t_tokens_generation",             t_tokens_generation },
+        { "n_prompt_tokens_processed",       n_prompt_tokens_processed       },
+        { "t_prompt_processing",             t_prompt_processing             },
+        { "n_tokens_predicted",              n_tokens_predicted              },
+        { "t_tokens_generation",             t_tokens_generation             },
 
-        { "n_decode_total",                  n_decode_total },
-        { "n_busy_slots_total",              n_busy_slots_total },
+        { "n_decode_total",                  n_decode_total                  },
+        { "n_busy_slots_total",              n_busy_slots_total              },
 
-        { "slots",                           slots_data },
+        { "slots",                           slots_data                      },
     };
 }
 
@@ -1775,25 +1802,21 @@ json server_task_result_metrics::to_json() {
 //
 json server_task_result_slot_save_load::to_json() {
     if (is_save) {
-        return json {
-            { "id_slot",   id_slot },
-            { "filename",  filename },
-            { "n_saved",   n_tokens },
-            { "n_written", n_bytes },
-            { "timings", {
-                { "save_ms", t_ms }
-            }},
+        return json{
+            { "id_slot",   id_slot                 },
+            { "filename",  filename                },
+            { "n_saved",   n_tokens                },
+            { "n_written", n_bytes                 },
+            { "timings",   { { "save_ms", t_ms } } },
         };
     }
 
-    return json {
-        { "id_slot",    id_slot },
-        { "filename",   filename },
-        { "n_restored", n_tokens },
-        { "n_read",     n_bytes },
-        { "timings", {
-            { "restore_ms", t_ms }
-        }},
+    return json{
+        { "id_slot",    id_slot                    },
+        { "filename",   filename                   },
+        { "n_restored", n_tokens                   },
+        { "n_read",     n_bytes                    },
+        { "timings",    { { "restore_ms", t_ms } } },
     };
 }
 
@@ -1801,8 +1824,8 @@ json server_task_result_slot_save_load::to_json() {
 // server_task_result_slot_erase
 //
 json server_task_result_slot_erase::to_json() {
-    return json {
-        { "id_slot",  id_slot },
+    return json{
+        { "id_slot",  id_slot  },
         { "n_erased", n_erased },
     };
 }
@@ -1814,13 +1837,13 @@ json server_task_result_slot_erase::to_json() {
 json server_task_result_get_lora::to_json() {
     json result = json::array();
     for (size_t i = 0; i < loras.size(); ++i) {
-        auto & lora = loras[i];
-        json entry = {
-            {"id",            i},
-            {"path",          lora.info.path},
-            {"scale",         lora.info.scale},
-            {"task_name",     lora.info.task_name},
-            {"prompt_prefix", lora.info.prompt_prefix},
+        auto & lora  = loras[i];
+        json   entry = {
+            { "id",            i                       },
+            { "path",          lora.info.path          },
+            { "scale",         lora.info.scale         },
+            { "task_name",     lora.info.task_name     },
+            { "prompt_prefix", lora.info.prompt_prefix },
         };
         if (!lora.alora_invocation_tokens.empty()) {
             entry["alora_invocation_string"] = lora.alora_invocation_string;
@@ -1836,7 +1859,9 @@ json server_task_result_get_lora::to_json() {
 //
 
 json server_task_result_apply_lora::to_json() {
-    return json {{ "success", true }};
+    return json{
+        { "success", true }
+    };
 }
 
 //
@@ -1894,7 +1919,7 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t
     } catch (const std::bad_alloc & e) {
         SRV_ERR("failed to allocate memory for prompt cache state: %s\n", e.what());
 
-        limit_size = std::max<size_t>(1, 0.4*size());
+        limit_size = std::max<size_t>(1, 0.4 * size());
 
         SRV_WRN(" - cache size limit reduced to %.3f MiB\n", limit_size / (1024.0 * 1024.0));
 
@@ -1905,16 +1930,19 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t
 
     // TODO: for some reason we can't copy server_tokens, so we have to do this workaround
     auto & cur = states.emplace_back();
-    cur = {
-        /*.tokens      =*/ server_tokens(prompt.tokens.get_text_tokens(), false),
-        /*.data        =*/ std::move(state_data),
-        /*.checkpoints =*/ prompt.checkpoints,
+    cur        = {
+        /*.tokens      =*/server_tokens(prompt.tokens.get_text_tokens(), false),
+        /*.data        =*/std::move(state_data),
+        /*.checkpoints =*/prompt.checkpoints,
     };
 
     return &cur;
 }
 
-bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tokens_new, llama_context * ctx, int32_t id_slot) {
+bool server_prompt_cache::load(server_prompt &       prompt,
+                               const server_tokens & tokens_new,
+                               llama_context *       ctx,
+                               int32_t               id_slot) {
     const int lcp_best = prompt.tokens.get_common_prefix(tokens_new);
 
     float f_keep_best = float(lcp_best) / prompt.tokens.size();
@@ -1948,7 +1976,7 @@ bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tok
         SRV_WRN(" - found better prompt with f_keep = %.3f, sim = %.3f\n", f_keep_best, sim_best);
 
         const size_t size = it_best->data.size();
-        const size_t n = llama_state_seq_set_data_ext(ctx, it_best->data.data(), size, id_slot, 0);
+        const size_t n    = llama_state_seq_set_data_ext(ctx, it_best->data.data(), size, id_slot, 0);
         if (n != size) {
             SRV_WRN("failed to restore state with size %zu\n", size);
 
@@ -1974,7 +2002,8 @@ void server_prompt_cache::update() {
                 break;
             }
 
-            SRV_WRN(" - cache size limit reached, removing oldest entry (size = %.3f MiB)\n", states.front().size() / (1024.0 * 1024.0));
+            SRV_WRN(" - cache size limit reached, removing oldest entry (size = %.3f MiB)\n",
+                    states.front().size() / (1024.0 * 1024.0));
 
             states.pop_front();
         }
@@ -1984,7 +2013,8 @@ void server_prompt_cache::update() {
     const float size_per_token = std::max<float>(1.0f, float(size()) / (std::max<size_t>(1, n_tokens())));
 
     // dynamically increase the token limit if it can fit in the memory limit
-    const size_t limit_tokens_cur = limit_size > 0 ? std::max<size_t>(limit_tokens, limit_size/size_per_token) : limit_tokens;
+    const size_t limit_tokens_cur =
+        limit_size > 0 ? std::max<size_t>(limit_tokens, limit_size / size_per_token) : limit_tokens;
 
     if (limit_tokens > 0) {
         while (states.size() > 1 && n_tokens() > limit_tokens_cur) {
@@ -1999,11 +2029,11 @@ void server_prompt_cache::update() {
         }
     }
 
-    SRV_WRN(" - cache state: %zu prompts, %.3f MiB (limits: %.3f MiB, %zu tokens, %zu est)\n",
-            states.size(), size() / (1024.0 * 1024.0), limit_size / (1024.0 * 1024.0), limit_tokens, limit_tokens_cur);
+    SRV_WRN(" - cache state: %zu prompts, %.3f MiB (limits: %.3f MiB, %zu tokens, %zu est)\n", states.size(),
+            size() / (1024.0 * 1024.0), limit_size / (1024.0 * 1024.0), limit_tokens, limit_tokens_cur);
 
     for (const auto & state : states) {
-        SRV_WRN("   - prompt %p: %7d tokens, checkpoints: %2zu, %9.3f MiB\n",
-                (const void *)&state, state.n_tokens(), state.checkpoints.size(), state.size() / (1024.0 * 1024.0));
+        SRV_WRN("   - prompt %p: %7d tokens, checkpoints: %2zu, %9.3f MiB\n", (const void *) &state, state.n_tokens(),
+                state.checkpoints.size(), state.size() / (1024.0 * 1024.0));
     }
 }
diff --git a/tools/server/server-task.h b/tools/server/server-task.h
index a69e8f1a3d2..7ccaf3c31bf 100644
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -3,10 +3,10 @@
 #include "common.h"
 #include "llama.h"
 
-#include <string>
-#include <unordered_set>
 #include <list>
 #include <map>
+#include <string>
+#include <unordered_set>
 
 // TODO: prevent including the whole server-common.h as we only use server_tokens
 #include "server-common.h"
@@ -30,7 +30,7 @@ enum server_task_type {
 
 // TODO: change this to more generic "response_format" to replace the "format_response_*" in server-common
 enum task_response_type {
-    TASK_RESPONSE_TYPE_NONE, // llama.cpp native format
+    TASK_RESPONSE_TYPE_NONE,  // llama.cpp native format
     TASK_RESPONSE_TYPE_OAI_CHAT,
     TASK_RESPONSE_TYPE_OAI_CMPL,
     TASK_RESPONSE_TYPE_OAI_RESP,
@@ -48,22 +48,23 @@ enum stop_type {
 struct task_params {
     bool stream          = true;
     bool include_usage   = false;
-    bool cache_prompt    = true; // remember the prompt to avoid reprocessing all prompt
+    bool cache_prompt    = true;  // remember the prompt to avoid reprocessing all prompt
     bool return_tokens   = false;
     bool return_progress = false;
 
-    int32_t n_keep    =  0; // number of tokens to keep from initial prompt
-    int32_t n_discard =  0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
-    int32_t n_predict = -1; // new tokens to predict
-    int32_t n_indent  =  0; // minimum line indentation for the generated text in number of whitespace characters
-    int32_t n_cmpl    =  1; // number of completions to generate from this prompt
+    int32_t n_keep = 0;  // number of tokens to keep from initial prompt
+    int32_t n_discard =
+        0;  // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
+    int32_t n_predict = -1;     // new tokens to predict
+    int32_t n_indent  = 0;      // minimum line indentation for the generated text in number of whitespace characters
+    int32_t n_cmpl    = 1;      // number of completions to generate from this prompt
 
-    int32_t n_cache_reuse = 0; // min chunk size to attempt reusing from the cache via KV shifting (0 = disabled)
+    int32_t n_cache_reuse = 0;  // min chunk size to attempt reusing from the cache via KV shifting (0 = disabled)
 
-    int64_t t_max_prompt_ms  = -1; // TODO: implement
-    int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
+    int64_t t_max_prompt_ms  = -1;  // TODO: implement
+    int64_t t_max_predict_ms = -1;  // if positive, limit the generation phase to this time limit
 
-    std::map<int, float> lora; // mapping adapter ID -> scale
+    std::map<int, float> lora;      // mapping adapter ID -> scale
 
     std::vector<std::string> antiprompt;
     std::vector<std::string> response_fields;
@@ -71,7 +72,7 @@ struct task_params {
     bool timings_per_token   = false;
     bool post_sampling_probs = false;
 
-    struct common_params_sampling sampling;
+    struct common_params_sampling    sampling;
     struct common_params_speculative speculative;
 
     // response formatting
@@ -84,7 +85,7 @@ struct task_params {
     common_chat_parser_params chat_parser_params;
 
     // Embeddings
-    int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm)
+    int32_t embd_normalize = 2;  // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm)
 
     json format_logit_bias(const std::vector<llama_logit_bias> & logit_bias) const;
     json to_json(bool only_metrics = false) const;
@@ -95,9 +96,10 @@ struct task_result_state {
     // tracking diffs for partial tool calls
     std::vector<common_chat_msg_diff> diffs;
     common_chat_parser_params chat_parser_params;
-    common_chat_msg chat_msg;
-    std::string generated_text; // append new chunks of generated text here
-    std::vector<std::string> generated_tool_call_ids;
+    common_chat_msg                   chat_msg;
+    std::string                       generated_text;  // append new chunks of generated text here
+    std::vector<std::string>          generated_tool_call_ids;
+    std::unordered_set<size_t>        sent_tool_call_names;
 
     // for OpenAI Responses and Anthropic streaming API:
     // track output item / content block state across chunks
@@ -117,17 +119,17 @@ struct task_result_state {
         , oai_resp_message_id("msg_" + random_string()) {}
 
     // parse partial tool calls and update the internal state
-    common_chat_msg update_chat_msg(
-        const std::string & text_added,
-        bool is_partial,
-        std::vector<common_chat_msg_diff> & diffs);
+    common_chat_msg update_chat_msg(const std::string &                 text_added,
+                                    bool                                is_partial,
+                                    std::vector<common_chat_msg_diff> & diffs,
+                                    bool                                filter_tool_calls = false);
 };
 
 struct server_task {
-    int id = -1; // to be filled by server_queue
+    int id = -1;  // to be filled by server_queue
 
     // TODO @ngxson : remove this field and implement a mapping task_id -> idx in the response_reader
-    size_t index = 0; // used when there are multiple prompts (batch request)
+    size_t index = 0;  // used when there are multiple prompts (batch request)
 
     // used by SERVER_TASK_TYPE_CANCEL
     int id_target = -1;
@@ -157,13 +159,14 @@ struct server_task {
         std::string filename;
         std::string filepath;
     };
+
     slot_action slot_action;
 
     // used by SERVER_TASK_TYPE_METRICS
     bool metrics_reset_bucket = false;
 
     // used by SERVER_TASK_TYPE_SET_LORA
-    std::map<int, float> set_lora; // mapping adapter ID -> scale
+    std::map<int, float> set_lora;  // mapping adapter ID -> scale
 
     server_task() = default;
 
@@ -203,11 +206,10 @@ struct server_task {
         }
     }
 
-    static task_params params_from_json_cmpl(
-        const llama_vocab * vocab,
-        const common_params & params_base,
-        const int n_ctx_slot,
-        const json & data);
+    static task_params params_from_json_cmpl(const llama_vocab *   vocab,
+                                             const common_params & params_base,
+                                             const int             n_ctx_slot,
+                                             const json &          data);
 
     // utility function
     static std::unordered_set<int> get_list_id(const std::vector<server_task> & tasks) {
@@ -259,50 +261,53 @@ struct result_timings {
     int32_t cache_n = -1;
 
     int32_t prompt_n = -1;
-    double prompt_ms;
-    double prompt_per_token_ms;
-    double prompt_per_second;
+    double  prompt_ms;
+    double  prompt_per_token_ms;
+    double  prompt_per_second;
 
     int32_t predicted_n = -1;
-    double predicted_ms;
-    double predicted_per_token_ms;
-    double predicted_per_second;
+    double  predicted_ms;
+    double  predicted_per_token_ms;
+    double  predicted_per_second;
 
     // Optional speculative metrics - only included when > 0
-    int32_t draft_n = 0;
+    int32_t draft_n          = 0;
     int32_t draft_n_accepted = 0;
 
     json to_json() const;
 };
 
 struct result_prompt_progress {
-    int32_t total = 0;
-    int32_t cache = 0;
+    int32_t total     = 0;
+    int32_t cache     = 0;
     int32_t processed = 0;
-    int64_t time_ms = 0;
+    int64_t time_ms   = 0;
 
     json to_json() const;
 };
 
 struct server_task_result {
-    int id           = -1;
-    int id_slot      = -1;
+    int id      = -1;
+    int id_slot = -1;
 
     // TODO @ngxson : remove this field and implement a mapping task_id -> idx in the response_reader
-    size_t index = 0; // to be used for batched tasks
+    size_t index = 0;  // to be used for batched tasks
 
     virtual bool is_error() {
         // only used by server_task_result_error
         return false;
     }
+
     virtual bool is_stop() {
         // only used by server_task_result_cmpl_*
         return true;
     }
+
     virtual void update(task_result_state &) {
         // only used by server_task_result_cmpl_*
     }
-    virtual json to_json() = 0;
+
+    virtual json to_json()        = 0;
     virtual ~server_task_result() = default;
 };
 
@@ -311,13 +316,15 @@ using server_task_result_ptr = std::unique_ptr<server_task_result>;
 
 struct completion_token_output {
     llama_token tok;
-    float prob;
+    float       prob;
     std::string text_to_send;
+
     struct prob_info {
         llama_token tok;
         std::string txt;
-        float prob;
+        float       prob;
     };
+
     std::vector<prob_info> probs;
 
     json to_json(bool post_sampling_probs) const;
@@ -327,29 +334,28 @@ struct completion_token_output {
     static float logarithm(float x);
 
     static std::vector<unsigned char> str_to_bytes(const std::string & str);
-
 };
 
 struct server_task_result_cmpl_final : server_task_result {
-    std::string content;
+    std::string  content;
     llama_tokens tokens;
 
-    bool stream;
-    bool include_usage;
+    bool           stream;
+    bool           include_usage;
     result_timings timings;
-    std::string prompt;
+    std::string    prompt;
 
-    bool truncated;
-    int32_t n_decoded;
-    int32_t n_prompt_tokens;
-    int32_t n_tokens_cached;
-    bool has_new_line;
+    bool        truncated;
+    int32_t     n_decoded;
+    int32_t     n_prompt_tokens;
+    int32_t     n_tokens_cached;
+    bool        has_new_line;
     std::string stopping_word;
-    stop_type stop = STOP_TYPE_NONE;
+    stop_type   stop = STOP_TYPE_NONE;
 
-    bool post_sampling_probs;
+    bool                                 post_sampling_probs;
     std::vector<completion_token_output> probs_output;
-    std::vector<std::string>  response_fields;
+    std::vector<std::string>             response_fields;
 
     task_params generation_params;
 
@@ -358,7 +364,7 @@ struct server_task_result_cmpl_final : server_task_result {
     task_response_type res_type = TASK_RESPONSE_TYPE_NONE;
     std::string        oaicompat_model;
     std::string        oaicompat_cmpl_id;
-    common_chat_msg    oaicompat_msg; // to be populated by update()
+    common_chat_msg    oaicompat_msg;                       // to be populated by update()
 
     std::vector<common_chat_msg_diff> oaicompat_msg_diffs; // to be populated by update()
     bool is_updated = false;
@@ -369,7 +375,7 @@ struct server_task_result_cmpl_final : server_task_result {
     std::string oai_resp_message_id;
 
     virtual bool is_stop() override {
-        return true; // in stream mode, final responses are considered stop
+        return true;  // in stream mode, final responses are considered stop
     }
 
     virtual json to_json() override;
@@ -407,11 +413,11 @@ struct server_task_result_cmpl_partial : server_task_result {
     int32_t n_decoded;
     int32_t n_prompt_tokens;
 
-    bool post_sampling_probs;
-    bool is_progress = false;
+    bool                    post_sampling_probs;
+    bool                    is_progress = false;
     completion_token_output prob_output;
-    result_timings timings;
-    result_prompt_progress progress;
+    result_timings          timings;
+    result_prompt_progress  progress;
 
     // response formatting
     bool               verbose  = false;
@@ -435,7 +441,7 @@ struct server_task_result_cmpl_partial : server_task_result {
     bool anthropic_has_reasoning = false;
 
     virtual bool is_stop() override {
-        return false; // in stream mode, partial responses are not considered stop
+        return false;  // in stream mode, partial responses are not considered stop
     }
 
     virtual void update(task_result_state & state) override;
@@ -477,24 +483,22 @@ struct server_task_result_rerank : server_task_result {
 };
 
 struct server_task_result_error : server_task_result {
-    error_type err_type = ERROR_TYPE_SERVER;
+    error_type  err_type = ERROR_TYPE_SERVER;
     std::string err_msg;
 
     // for ERROR_TYPE_EXCEED_CONTEXT_SIZE
     int32_t n_prompt_tokens = 0;
     int32_t n_ctx           = 0;
 
-    virtual bool is_error() override {
-        return true;
-    }
+    virtual bool is_error() override { return true; }
 
     virtual json to_json() override;
 };
 
 struct server_task_result_metrics : server_task_result {
-    int n_idle_slots;
-    int n_processing_slots;
-    int n_tasks_deferred;
+    int     n_idle_slots;
+    int     n_processing_slots;
+    int     n_tasks_deferred;
     int64_t t_start;
 
     // TODO: somehow reuse server_metrics in the future, instead of duplicating the fields
@@ -523,7 +527,7 @@ struct server_task_result_metrics : server_task_result {
 
 struct server_task_result_slot_save_load : server_task_result {
     std::string filename;
-    bool is_save; // true = save, false = load
+    bool        is_save;  // true = save, false = load
 
     size_t n_tokens;
     size_t n_bytes;
@@ -541,9 +545,10 @@ struct server_task_result_slot_erase : server_task_result {
 struct server_task_result_get_lora : server_task_result {
     struct lora {
         common_adapter_lora_info info;
-        std::string  alora_invocation_string;
-        llama_tokens alora_invocation_tokens;
+        std::string              alora_invocation_string;
+        llama_tokens             alora_invocation_tokens;
     };
+
     std::vector<lora> loras;
 
     virtual json to_json() override;
@@ -559,9 +564,7 @@ struct server_prompt_checkpoint {
 
     std::vector<uint8_t> data;
 
-    size_t size() const {
-        return data.size();
-    }
+    size_t size() const { return data.size(); }
 };
 
 struct server_prompt {
@@ -581,22 +584,14 @@ struct server_prompt {
         return res;
     }
 
-    int n_tokens() const {
-        return tokens.size();
-    }
+    int n_tokens() const { return tokens.size(); }
 
-    server_prompt clone() const {
-        return server_prompt {
-            tokens.clone(),
-            data,
-            checkpoints
-        };
-    }
+    server_prompt clone() const { return server_prompt{ tokens.clone(), data, checkpoints }; }
 };
 
 struct server_prompt_cache {
     server_prompt_cache(int32_t limit_size_mib, size_t limit_tokens) {
-        this->limit_size   = 1024ull*1024ull*(limit_size_mib < 0 ? 0 : limit_size_mib);
+        this->limit_size   = 1024ull * 1024ull * (limit_size_mib < 0 ? 0 : limit_size_mib);
         this->limit_tokens = limit_tokens;
     }