diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index ee123a7..a36e446 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -1355,8 +1355,13 @@ class BinaryOpExpr : public Expression { case Op::Gt: return l > r; case Op::Le: return l <= r; case Op::Ge: return l >= r; - case Op::In: return (r.is_array() || r.is_object()) && r.contains(l); - case Op::NotIn: return !(r.is_array() && r.contains(l)); + case Op::In: return (((r.is_array() || r.is_object()) && r.contains(l)) || + (l.is_string() && r.is_string() && + r.to_str().find(l.to_str()) != std::string::npos)); + case Op::NotIn: + return !(((r.is_array() || r.is_object()) && r.contains(l)) || + (l.is_string() && r.is_string() && + r.to_str().find(l.to_str()) != std::string::npos)); default: break; } throw std::runtime_error("Unknown binary operator"); @@ -1552,6 +1557,19 @@ class MethodCallExpr : public Expression { else res[i] = std::tolower(res[i]); } return res; + } else if (method->get_name() == "replace") { + vargs.expectArgs("replace method", {2, 3}, {0, 0}); + auto before = vargs.args[0].get(); + auto after = vargs.args[1].get(); + auto count = vargs.args.size() == 3 ? vargs.args[2].get() + : str.length(); + size_t start_pos = 0; + while ((start_pos = str.find(before, start_pos)) != std::string::npos && + count-- > 0) { + str.replace(start_pos, before.length(), after); + start_pos += after.length(); + } + return str; } } throw std::runtime_error("Unknown method: " + method->get_name()); @@ -2128,7 +2146,7 @@ class Parser { } } - if ((has_first_colon || has_second_colon) && (start || end || step)) { + if ((has_first_colon || has_second_colon)) { index = std::make_shared(slice_loc, std::move(start), std::move(end), std::move(step)); } else { index = std::move(start); diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py index 6e65099..6950ffe 100644 --- a/scripts/fetch_templates_and_goldens.py +++ b/scripts/fetch_templates_and_goldens.py @@ -427,7 +427,15 @@ async def process_model(output_folder: str, model_id: str, contexts: list[Contex except json.JSONDecodeError: config = json.loads(re.sub(r'\}([\n\s]*\}[\n\s]*\],[\n\s]*"clean_up_tokenization_spaces")', r'\1', config_str)) - assert 'chat_template' in config, 'No "chat_template" entry in tokenizer_config.json!' + if 'chat_template' not in config: + try: + chat_template = await async_hf_download(model_id, "chat_template.jinja") + config.update({'chat_template': chat_template}) + except Exception as e: + logger.error(f"Failed to fetch chat_template.jinja for model {model_id}: {e}") + raise e + + assert 'chat_template' in config, 'No "chat_template" entry in tokenizer_config.json or no chat_template.jinja file found!' chat_template = config['chat_template'] if isinstance(chat_template, str): await handle_chat_template(output_folder, model_id, None, chat_template, contexts) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 09323b3..c624d5a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -148,6 +148,7 @@ set(MODEL_IDS HuggingFaceTB/SmolLM2-1.7B-Instruct HuggingFaceTB/SmolLM2-135M-Instruct HuggingFaceTB/SmolLM2-360M-Instruct + HuggingFaceTB/SmolLM3-3B huihui-ai/DeepSeek-R1-Distill-Llama-70B-abliterated huihui-ai/DeepSeek-R1-Distill-Llama-8B-abliterated huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2 diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp index a628aa2..1051b81 100644 --- a/tests/test-syntax.cpp +++ b/tests/test-syntax.cpp @@ -84,6 +84,12 @@ TEST(SyntaxTest, SimpleCases) { EXPECT_EQ( "Ok", render("{{ 'ok'.capitalize() }}", {}, {})); + EXPECT_EQ("aouiXYZaouiXYZaoui", + render("{{ 'abcXYZabcXYZabc'.replace('bc', 'oui') }}", {}, {})); + EXPECT_EQ("okXYZokXYZabc", + render("{{ 'abcXYZabcXYZabc'.replace('abc', 'ok', 2) }}", {}, {})); + EXPECT_EQ("abcXYZabcXYZabc", + render("{{ 'abcXYZabcXYZabc'.replace('def', 'ok') }}", {}, {})); EXPECT_EQ( "ok", @@ -199,6 +205,10 @@ TEST(SyntaxTest, SimpleCases) { EXPECT_EQ( "True,False", render(R"({{ 'a' in ["a"] }},{{ 'a' in [] }})", {}, {})); + EXPECT_EQ("True,False", + render(R"({{ 'a' in 'abc' }},{{ 'd' in 'abc' }})", {}, {})); + EXPECT_EQ("False,True", + render(R"({{ 'a' not in 'abc' }},{{ 'd' not in 'abc' }})", {}, {})); EXPECT_EQ( R"([{'a': 1}])", render(R"({{ [{"a": 1}, {"a": 2}, {}] | selectattr("a", "equalto", 1) | list }})", {}, {})); @@ -481,8 +491,8 @@ TEST(SyntaxTest, SimpleCases) { "[1, 2, 3][0, 1][1, 2]", render("{% set x = [0, 1, 2, 3] %}{{ x[1:] }}{{ x[:2] }}{{ x[1:3] }}", {}, {})); EXPECT_EQ( - "123;01;12", - render("{% set x = '0123' %}{{ x[1:] }};{{ x[:2] }};{{ x[1:3] }}", {}, {})); + "123;01;12;0123;0123", + render("{% set x = '0123' %}{{ x[1:] }};{{ x[:2] }};{{ x[1:3] }};{{ x[:] }};{{ x[::] }}", {}, {})); EXPECT_EQ( "[3, 2, 1, 0][3, 2, 1][2, 1, 0][2, 1][0, 2][3, 1][2, 0]", render("{% set x = [0, 1, 2, 3] %}{{ x[::-1] }}{{ x[:0:-1] }}{{ x[2::-1] }}{{ x[2:0:-1] }}{{ x[::2] }}{{ x[::-2] }}{{ x[-2::-2] }}", {}, {}));