Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 32 additions & 8 deletions common/chat-parser-xml-toolcall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -667,18 +667,42 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
return l;
};
constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
auto best_match = content.size();
for (auto pattern: list) {
if (pattern.size() == 0) continue;
// Trim partial suffixes that look like an incomplete special marker (e.g. "<|tool_call_end|>").
//
// Some tool syntaxes include a normal JSON delimiter *before* a special token, e.g. "}<|tool_call_end|>".
// In that case we must avoid trimming the valid JSON '}' when only the beginning of the pattern matches.
auto best_erase_from = content.size();

for (auto pattern : list) {
if (pattern.empty()) {
continue;
}

// If the pattern contains a '<', treat everything before it as a "normal prefix" and only trim if the
// model actually started emitting the special token (i.e. matched beyond the prefix).
const auto special_pos = pattern.find('<');

for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
auto match_len = content.size() - match_idx;
if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
best_match = match_idx;
const auto match_len = content.size() - match_idx;
if (content.compare(match_idx, match_len, pattern.data(), match_len) != 0) {
continue;
}

if (special_pos != std::string_view::npos && special_pos > 0) {
// Only matched the normal prefix (e.g. "}") - do not trim.
if (match_len <= special_pos) {
continue;
}
// Trim from the start of the special token, preserving the normal prefix.
best_erase_from = std::min(best_erase_from, match_idx + special_pos);
} else {
best_erase_from = std::min(best_erase_from, match_idx);
}
}
}
if (content.size() > best_match) {
content.erase(best_match);

if (content.size() > best_erase_from) {
content.erase(best_erase_from);
}
};
const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {
Expand Down
20 changes: 19 additions & 1 deletion common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1881,11 +1881,29 @@ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_c

static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
common_chat_params data;
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
const bool has_tools = params.tools.is_array() && !params.tools.empty();
const bool has_schema = params.json_schema.is_object();

data.grammar_lazy = has_tools && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;

data.prompt = apply(tmpl, params);
data.format = COMMON_CHAT_FORMAT_KIMI_K2;


if (has_tools && has_schema) {
throw std::runtime_error("Kimi K2: cannot combine \"tools\" with \"json_schema\"/response_format; remove tools or remove response_format");
}

if (!has_tools && has_schema) {
if (!params.grammar.empty()) {
throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
}
// Mirror the generic "content-only" schema enforcement behavior
data.grammar = json_schema_to_grammar(params.json_schema);
} else {
data.grammar = params.grammar;
}

data.preserved_tokens = {
"<think>",
"</think>",
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
llama_build_and_test(test-chat-template.cpp)
llama_build_and_test(test-jinja.cpp)
llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
llama_build_and_test(test-kimi-response-format.cpp)
llama_build_and_test(test-json-partial.cpp)
llama_build_and_test(test-log.cpp)
llama_build_and_test(
Expand Down
121 changes: 121 additions & 0 deletions tests/test-kimi-response-format.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#include <cassert>
#include <stdexcept>
#include <string>
#include <vector>

#include "chat.h"

// Regression test:
// - llama-server /chat/completions parses `response_format` into a JSON schema and passes it into
// common_chat_templates_apply() as inputs.json_schema.
// - For templates detected as "Kimi K2", llama.cpp selected a Kimi-specific handler that did not
// apply json_schema-to-grammar conversion, so schema enforcement was silently dropped.
//
// This test asserts that for the Kimi K2 chat template, providing a json_schema results in a
// non-empty grammar being returned by common_chat_templates_apply() (hard enforcement expected).

static const char * KIMI_K2_TEMPLATE = R"JINJA({%- if tools -%}
<|im_system|>tool_declare<|im_middle|>
# Tools
{{ tools | tojson }}<|im_end|>
{%- endif -%}
{%- for message in messages -%}
{%- if loop.first and messages[0]['role'] != 'system' -%}
<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
{%- endif -%}

{%- set role_name = message.get('name') or message['role'] -%}
{%- if message['role'] == 'user' -%}
<|im_user|>{{role_name}}<|im_middle|>
{%- elif message['role'] == 'assistant' -%}
<|im_assistant|>{{role_name}}<|im_middle|>
{%- else -%}
<|im_system|>{{role_name}}<|im_middle|>
{% endif %}

{%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
{%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
<|tool_calls_section_begin|>
{%- for tool_call in message['tool_calls'] -%}
{%- set formatted_id = tool_call['id'] -%}
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
{%- endfor -%}
<|tool_calls_section_end|>
{%- elif message['role'] == 'tool' -%}
## Return of {{ message.tool_call_id }}
{{ message['content'] }}
{%- elif message['content'] is string -%}
{{ message['content'] }}
{%- elif message['content'] is not none -%}
{% for content in message['content'] -%}
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
{% else -%}
{{ content['text'] }}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
<|im_end|>
{%- endfor -%}
{%- if add_generation_prompt -%}
<|im_assistant|>assistant<|im_middle|>
{%- endif -%})JINJA";

int main() {
auto tmpls = common_chat_templates_init(/* model= */ nullptr, KIMI_K2_TEMPLATE);

common_chat_templates_inputs inputs;
inputs.use_jinja = true;
inputs.add_generation_prompt = true;

// No tools
inputs.tools = {};
inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;

inputs.json_schema = R"JSON({
"type": "object",
"properties": { "ok": { "type": "boolean" } },
"required": ["ok"],
"additionalProperties": false
})JSON";

inputs.messages = {
common_chat_msg{"system", "Return ONLY JSON with key ok.", {}, {}, "", "", ""},
common_chat_msg{"user", "ok", {}, {}, "", "", ""},
};

const auto out = common_chat_templates_apply(tmpls.get(), inputs);

// Confirm the Kimi K2 handler was actually selected (not a generic fallback).
assert(out.format == COMMON_CHAT_FORMAT_KIMI_K2);
assert(!out.grammar.empty());

// tools + json_schema is explicitly unsupported for Kimi K2 (ambiguous composition).
// Ensure we fail loudly rather than silently dropping schema enforcement.
inputs.tools = {
common_chat_tool{
/* .name = */ "noop",
/* .description = */ "No-op tool",
/* .parameters = */ R"JSON({
"type": "object",
"properties": { "x": { "type": "string" } },
"required": ["x"],
"additionalProperties": false
})JSON",
},
};
inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;

bool threw = false;
try {
(void) common_chat_templates_apply(tmpls.get(), inputs);
} catch (const std::exception &) {
threw = true;
}
// Avoid relying on assert() in Release builds (may be compiled out).
if (!threw) {
return 2;
}
return 0;
}