enforce response_format and json_schema for Kimi K2 #18851

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

akoumjian wants to merge 1 commit into ggml-org:master from akoumjian:fix/kimi-chat-response-format-grammar

+173 −9

common/chat-parser-xml-toolcall.cpp

-Original file line number
+Diff line change
@@ Expand Up @@
             return l;
         };
         constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
-            auto best_match = content.size();
-            for (auto pattern: list) {
-                if (pattern.size() == 0) continue;
+            // Trim partial suffixes that look like an incomplete special marker (e.g. "<|tool_call_end|>").
+            //
+            // Some tool syntaxes include a normal JSON delimiter *before* a special token, e.g. "}<|tool_call_end|>".
+            // In that case we must avoid trimming the valid JSON '}' when only the beginning of the pattern matches.
+            auto best_erase_from = content.size();
+            for (auto pattern : list) {
+                if (pattern.empty()) {
+                    continue;
+                }
+                // If the pattern contains a '<', treat everything before it as a "normal prefix" and only trim if the
+                // model actually started emitting the special token (i.e. matched beyond the prefix).
+                const auto special_pos = pattern.find('<');
                 for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
-                    auto match_len = content.size() - match_idx;
-                    if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
-                        best_match = match_idx;
+                    const auto match_len = content.size() - match_idx;
+                    if (content.compare(match_idx, match_len, pattern.data(), match_len) != 0) {
+                        continue;
+                    }
+                    if (special_pos != std::string_view::npos && special_pos > 0) {
+                        // Only matched the normal prefix (e.g. "}") - do not trim.
+                        if (match_len <= special_pos) {
+                            continue;
+                        }
+                        // Trim from the start of the special token, preserving the normal prefix.
+                        best_erase_from = std::min(best_erase_from, match_idx + special_pos);
+                    } else {
+                        best_erase_from = std::min(best_erase_from, match_idx);
                     }
                 }
             }
-            if (content.size() > best_match) {
-                content.erase(best_match);
+            if (content.size() > best_erase_from) {
+                content.erase(best_erase_from);
             }
         };
         const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {
@@ Expand Down @@

common/chat.cpp

-Original file line number
+Diff line change
@@ Expand Up @@
     static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
         common_chat_params data;
-        data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+        const bool has_tools  = params.tools.is_array() && !params.tools.empty();
+        const bool has_schema = params.json_schema.is_object();
+        data.grammar_lazy = has_tools && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
         data.prompt = apply(tmpl, params);
         data.format = COMMON_CHAT_FORMAT_KIMI_K2;
+        if (has_tools && has_schema) {
+            throw std::runtime_error("Kimi K2: cannot combine \"tools\" with \"json_schema\"/response_format; remove tools or remove response_format");
+        }
+        if (!has_tools && has_schema) {
+            if (!params.grammar.empty()) {
+                throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
+            }
+            // Mirror the generic "content-only" schema enforcement behavior
+            data.grammar = json_schema_to_grammar(params.json_schema);
+        } else {
+            data.grammar = params.grammar;
+        }
         data.preserved_tokens = {
             "<think>",
             "</think>",
@@ Expand Down @@

tests/CMakeLists.txt

-Original file line number
+Diff line change
@@ Expand Up @@
     llama_build_and_test(test-chat-template.cpp)
     llama_build_and_test(test-jinja.cpp)
     llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
+    llama_build_and_test(test-kimi-response-format.cpp)
     llama_build_and_test(test-json-partial.cpp)
     llama_build_and_test(test-log.cpp)
     llama_build_and_test(
@@ Expand Down @@

tests/test-kimi-response-format.cpp

-Original file line number
+Diff line change
@@ -0,0 +1,121 @@
+    #include <cassert>
+    #include <stdexcept>
+    #include <string>
+    #include <vector>
+    #include "chat.h"
+    // Regression test:
+    // - llama-server /chat/completions parses `response_format` into a JSON schema and passes it into
+    //   common_chat_templates_apply() as inputs.json_schema.
+    // - For templates detected as "Kimi K2", llama.cpp selected a Kimi-specific handler that did not
+    //   apply json_schema-to-grammar conversion, so schema enforcement was silently dropped.
+    //
+    // This test asserts that for the Kimi K2 chat template, providing a json_schema results in a
+    // non-empty grammar being returned by common_chat_templates_apply() (hard enforcement expected).
+    static const char * KIMI_K2_TEMPLATE = R"JINJA({%- if tools -%}
+      <|im_system|>tool_declare<|im_middle|>
+      # Tools
+      {{ tools | tojson }}<|im_end|>
+    {%- endif -%}
+    {%- for message in messages -%}
+      {%- if loop.first and messages[0]['role'] != 'system' -%}
+        <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
+      {%- endif -%}
+      {%- set role_name =  message.get('name') or  message['role'] -%}
+      {%- if message['role'] == 'user' -%}
+        <|im_user|>{{role_name}}<|im_middle|>
+      {%- elif message['role'] == 'assistant' -%}
+        <|im_assistant|>{{role_name}}<|im_middle|>
+      {%- else -%}
+        <|im_system|>{{role_name}}<|im_middle|>
+      {% endif %}
+      {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
+        {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
+        <|tool_calls_section_begin|>
+        {%- for tool_call in message['tool_calls'] -%}
+          {%- set formatted_id = tool_call['id'] -%}
+          <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
+        {%- endfor -%}
+        <|tool_calls_section_end|>
+      {%- elif message['role'] == 'tool' -%}
+        ## Return of {{ message.tool_call_id }}
+        {{ message['content'] }}
+      {%- elif message['content'] is string -%}
+        {{ message['content'] }}
+      {%- elif message['content'] is not none -%}
+        {% for content in message['content'] -%}
+          {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+            <|media_start|>image<|media_content|><|media_pad|><|media_end|>
+          {% else -%}
+            {{ content['text'] }}
+          {%- endif -%}
+        {%- endfor -%}
+      {%- endif -%}
+      <|im_end|>
+    {%- endfor -%}
+    {%- if add_generation_prompt -%}
+      <|im_assistant|>assistant<|im_middle|>
+    {%- endif -%})JINJA";
+    int main() {
+        auto tmpls = common_chat_templates_init(/* model= */ nullptr, KIMI_K2_TEMPLATE);
+        common_chat_templates_inputs inputs;
+        inputs.use_jinja = true;
+        inputs.add_generation_prompt = true;
+        // No tools
+        inputs.tools = {};
+        inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
+        inputs.json_schema = R"JSON({
+          "type": "object",
+          "properties": { "ok": { "type": "boolean" } },
+          "required": ["ok"],
+          "additionalProperties": false
+        })JSON";
+        inputs.messages = {
+            common_chat_msg{"system", "Return ONLY JSON with key ok.", {}, {}, "", "", ""},
+            common_chat_msg{"user", "ok", {}, {}, "", "", ""},
+        };
+        const auto out = common_chat_templates_apply(tmpls.get(), inputs);
+        // Confirm the Kimi K2 handler was actually selected (not a generic fallback).
+        assert(out.format == COMMON_CHAT_FORMAT_KIMI_K2);
+        assert(!out.grammar.empty());
+        // tools + json_schema is explicitly unsupported for Kimi K2 (ambiguous composition).
+        // Ensure we fail loudly rather than silently dropping schema enforcement.
+        inputs.tools = {
+            common_chat_tool{
+                /* .name = */ "noop",
+                /* .description = */ "No-op tool",
+                /* .parameters = */ R"JSON({
+                  "type": "object",
+                  "properties": { "x": { "type": "string" } },
+                  "required": ["x"],
+                  "additionalProperties": false
+                })JSON",
+            },
+        };
+        inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+        bool threw = false;
+        try {
+            (void) common_chat_templates_apply(tmpls.get(), inputs);
+        } catch (const std::exception &) {
+            threw = true;
+        }
+        // Avoid relying on assert() in Release builds (may be compiled out).
+        if (!threw) {
+            return 2;
+        }
+        return 0;
+    }

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

enforce response_format and json_schema for Kimi K2 #18851

Diff view

Diff view

There are no files selected for viewing

enforce response_format and json_schema for Kimi K2 #18851

Are you sure you want to change the base?

enforce response_format and json_schema for Kimi K2 #18851

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing