5151namespace ovms {
5252
5353static const std::string CHAT_TEMPLATE_WARNING_MESSAGE = " Warning: Chat template has not been loaded properly. Servable will not respond to /chat/completions endpoint." ;
54- static const std::string DEFAULT_CHAT_TEMPLATE = R"( {% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] }}{% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %})" ;
5554
5655void GenAiServableInitializer::loadChatTemplate (std::shared_ptr<GenAiServableProperties> properties, const std::string& chatTemplateDirectory) {
5756#if (PYTHON_DISABLE == 0)
5857 ExtraGenerationInfo extraGenInfo = readExtraGenerationInfo (properties, chatTemplateDirectory);
5958 loadPyTemplateProcessor (properties, extraGenInfo);
6059#else
61- loadDefaultTemplateProcessorIfNeeded (properties);
60+ if (properties->tokenizer .get_chat_template ().empty ()) {
61+ SPDLOG_LOGGER_DEBUG (modelmanager_logger, CHAT_TEMPLATE_WARNING_MESSAGE);
62+ }
6263#endif
64+ // In non-python build, GenAI handles chat template loading
6365}
6466
6567#if (PYTHON_DISABLE == 0)
@@ -123,29 +125,37 @@ ExtraGenerationInfo GenAiServableInitializer::readExtraGenerationInfo(std::share
123125}
124126
125127void GenAiServableInitializer::loadPyTemplateProcessor (std::shared_ptr<GenAiServableProperties> properties, const ExtraGenerationInfo& extraGenInfo) {
126- // GGUF models specific validation
127- if (extraGenInfo.isGgufModel ) {
128- bool errorFound = false ;
129- if (extraGenInfo.eosTokenFromTokenizer .empty ()) {
130- SPDLOG_ERROR (" Tokenizer eos token not found in tokenizer nor in vocabulary but required for GGUF models." );
131- errorFound = true ;
132- }
133- if (extraGenInfo.bosTokenFromTokenizer .empty ()) {
134- SPDLOG_ERROR (" Tokenizer bos token not found in tokenizer nor in vocabulary but required for GGUF models." );
135- errorFound = true ;
136- }
137- if (extraGenInfo.chatTemplateFromTokenizer .empty ()) {
138- SPDLOG_ERROR (" Tokenizer chat template not found in tokenizer but required for GGUF models." );
139- errorFound = true ;
140- }
141- if (errorFound)
142- return ;
128+ // At this point tokenizer cannot be uninitialized as we need to access its methods for prepare for chat template processing
129+ if (properties->tokenizer == ov::genai::Tokenizer ()) {
130+ SPDLOG_LOGGER_ERROR (modelmanager_logger, " Tokenizer is not initialized. Cannot load chat template processor." );
131+ return ;
132+ }
133+ std::string chatTemplate = properties->tokenizer .get_original_chat_template ();
134+ std::string bosToken = properties->tokenizer .get_bos_token ();
135+ std::string eosToken = properties->tokenizer .get_eos_token ();
136+ if (bosToken.empty ()) {
137+ SPDLOG_ERROR (" BOS token was not found in model files." );
138+ return ;
143139 }
140+ if (eosToken.empty ()) {
141+ SPDLOG_ERROR (" EOS token was not found in model files." );
142+ return ;
143+ }
144+ if (chatTemplate.empty ()) {
145+ SPDLOG_ERROR (" Chat template was not found in model files." );
146+ return ;
147+ }
148+
149+ properties->templateProcessor .bosToken = bosToken;
150+ properties->templateProcessor .eosToken = eosToken;
151+
152+ SPDLOG_LOGGER_DEBUG (modelmanager_logger, " Loading Python Jinja template processor with chat template from tokenizer. Bos token: {}, Eos token: {}, chat template: \n {}" ,
153+ bosToken, eosToken, chatTemplate);
154+
144155 py::gil_scoped_acquire acquire;
145156 try {
146- auto locals = py::dict (" tokenizer_template" _a = extraGenInfo.chatTemplateFromTokenizer ,
147- " templates_directory" _a = extraGenInfo.chatTemplateDirectory ,
148- " is_gguf_model" _a = extraGenInfo.isGgufModel );
157+ auto locals = py::dict (" chat_template" _a = chatTemplate,
158+ " templates_directory" _a = extraGenInfo.chatTemplateDirectory );
149159 py::exec (R"(
150160 # Following the logic from:
151161 # https://github.com/huggingface/transformers/blob/25245ec26dc29bcf6102e1b4ddd0dfd02e720cf5/src/transformers/tokenization_utils_base.py#L1837
@@ -214,71 +224,51 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
214224 self._rendered_blocks = None
215225 self._generation_indices = None
216226
217-
218- # Default chat template accepts only single message and outputs only it's 'content'
219- # effectively turning it into a regular prompt.
220- default_chat_template = "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] }}{% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}"
221-
222- bos_token = ""
223- eos_token = ""
224- chat_template = default_chat_template
227+
228+ # Optional dedicated tool chat template (might not be present)
225229 tool_chat_template = None
226230
231+ # Variables needed to be set at the end of this script execution
227232 template = None
228233 tool_template = None
229234
230- # Try to read template from template.jinja file
231- jinja_file = Path(templates_directory + "/chat_template.jinja")
232- jinja_file_legacy = Path(templates_directory + "/template.jinja")
235+ # Load Jinja2 environment
233236 template_loader = jinja2.FileSystemLoader(searchpath=templates_directory)
234237 jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True, extensions=[AssistantTracker, jinja2.ext.loopcontrols], loader=template_loader)
235238 jinja_env.policies["json.dumps_kwargs"]["ensure_ascii"] = False
236239 jinja_env.globals["raise_exception"] = raise_exception
237240 jinja_env.globals["strftime_now"] = strftime_now
238241 jinja_env.filters["from_json"] = json.loads
239- if jinja_file.is_file():
240- template = jinja_env.get_template("chat_template.jinja")
241- elif jinja_file_legacy.is_file():
242- template = jinja_env.get_template("template.jinja")
243242
244- # Try to read data from tokenizer_config.json
243+ # Try to read data from tokenizer_config.json to get additional tool chat template if present
245244 tokenizer_config_file = Path(templates_directory + "/tokenizer_config.json")
246245 if tokenizer_config_file.is_file():
247246 f = open(templates_directory + "/tokenizer_config.json", "r", encoding="utf-8")
248247 data = json.load(f)
249- bos_token = data.get("bos_token", "")
250- bos_token = "" if bos_token is None else bos_token # Null token conversion to empty string.
251- eos_token = data.get("eos_token", "")
252- eos_token = "" if eos_token is None else eos_token # Null token conversion to empty string.
253-
254- chat_template = data.get("chat_template", default_chat_template)
255- if isinstance(chat_template, list):
256- for template_entry in chat_template:
248+
249+ chat_template_from_tokenizer_config = data.get("chat_template", None)
250+ if isinstance(chat_template_from_tokenizer_config, list):
251+ for template_entry in chat_template_from_tokenizer_config:
257252 if isinstance(template_entry, dict):
258- if template_entry.get("name") == "default":
259- chat_template = template_entry.get("template")
260- elif template_entry.get("name") == "tool_use":
253+ if template_entry.get("name") == "tool_use":
261254 tool_chat_template = template_entry.get("template")
262- if template is None:
263- if is_gguf_model and (chat_template == default_chat_template):
264- # GGUF model directory might not contain files with chat template and in that case we use template read from the tokenizer
265- template = jinja_env.from_string(tokenizer_template)
266- else:
267- template = jinja_env.from_string(chat_template)
255+
256+ # Try read tool_use.jinja template file from additional_chat_templates directory if exists
257+ additional_templates_dir = Path(templates_directory + "/additional_chat_templates")
258+ tool_use_template_file = additional_templates_dir / "tool_use.jinja"
259+ if tool_use_template_file.is_file():
260+ with open(tool_use_template_file, "r", encoding="utf-8") as f:
261+ tool_chat_template = f.read()
262+
263+ # Load templates from strings
264+ template = jinja_env.from_string(chat_template)
268265 if tool_chat_template is not None:
269266 tool_template = jinja_env.from_string(tool_chat_template)
270267 else:
271268 tool_template = template
272269 )" ,
273270 py::globals (), locals);
274271
275- if (extraGenInfo.isGgufModel ) {
276- properties->templateProcessor .bosToken = extraGenInfo.bosTokenFromTokenizer ;
277- properties->templateProcessor .eosToken = extraGenInfo.eosTokenFromTokenizer ;
278- } else {
279- properties->templateProcessor .bosToken = locals[" bos_token" ].cast <std::string>();
280- properties->templateProcessor .eosToken = locals[" eos_token" ].cast <std::string>();
281- }
282272 properties->templateProcessor .chatTemplate = std::make_unique<PyObjectWrapper<py::object>>(locals[" template" ]);
283273 properties->templateProcessor .toolTemplate = std::make_unique<PyObjectWrapper<py::object>>(locals[" tool_template" ]);
284274 } catch (const pybind11::error_already_set& e) {
@@ -298,15 +288,6 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
298288 SPDLOG_DEBUG (" Chat template loading failed with an unexpected error" );
299289 }
300290}
301-
302- #else
303- void GenAiServableInitializer::loadDefaultTemplateProcessorIfNeeded (std::shared_ptr<GenAiServableProperties> properties) {
304- const std::string modelChatTemplate = properties->tokenizer .get_chat_template ();
305- if (modelChatTemplate.empty ()) {
306- SPDLOG_LOGGER_DEBUG (modelmanager_logger, " Could not load model chat template. Using default template." );
307- properties->tokenizer .set_chat_template (DEFAULT_CHAT_TEMPLATE);
308- }
309- }
310291#endif
311292
312293Status parseModelsPath (std::string& outPath, std::string modelsPath, std::string graphPath) {
0 commit comments