Skip to content
This repository was archived by the owner on Mar 25, 2026. It is now read-only.

Commit d408d3c

Browse files
Fix chat template (#3)
* Cache chat template in HATTokenizer * Apply HAT chat template correctly
1 parent 899d83f commit d408d3c

File tree

3 files changed

+30
-1
lines changed

3 files changed

+30
-1
lines changed

vllm/entrypoints/chat_utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,5 +1346,22 @@ def apply_mistral_chat_template(
13461346
"template")
13471347
raise ValueError(str(e)) from e
13481348

1349+
def apply_hat_chat_template(
1350+
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
1351+
conversation: list[ConversationMessage],
1352+
chat_template: Optional[str],
1353+
tools: Optional[list[dict[str, Any]]],
1354+
tokenize: bool = False,
1355+
**kwargs: Any,
1356+
) -> str:
1357+
chat_template = tokenizer.get_chat_template(chat_template, tools=tools)
1358+
return tokenizer.apply_chat_template(
1359+
conversation=conversation,
1360+
chat_template=chat_template,
1361+
tools=tools,
1362+
tokenize=tokenize,
1363+
**kwargs,
1364+
)
1365+
13491366
def random_tool_call_id() -> str:
13501367
return f"chatcmpl-tool-{random_uuid()}"

vllm/entrypoints/openai/serving_engine.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
ConversationMessage,
3434
apply_hf_chat_template,
3535
apply_mistral_chat_template,
36+
apply_hat_chat_template,
3637
parse_chat_messages_futures,
3738
resolve_chat_template_content_format)
3839
from vllm.entrypoints.logger import RequestLogger
@@ -76,6 +77,7 @@
7677
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
7778
from vllm.utils import (AsyncMicrobatchTokenizer, is_list_of,
7879
merge_async_iterators, random_uuid)
80+
from vllm.v1.hat.hat_tokenizer import HATTokenizer
7981

8082
logger = init_logger(__name__)
8183

@@ -882,6 +884,12 @@ async def _preprocess_chat(
882884
messages=messages,
883885
**_chat_template_kwargs,
884886
)
887+
elif isinstance(tokenizer, HATTokenizer):
888+
request_prompt = apply_hat_chat_template(
889+
tokenizer,
890+
conversation=conversation,
891+
**_chat_template_kwargs,
892+
)
885893
else:
886894
request_prompt = apply_hf_chat_template(
887895
tokenizer=tokenizer,

vllm/v1/hat/hat_tokenizer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ def __init__(self, special_token_dict: Dict[str, int]):
2424
self.name_or_path = "HAT"
2525
self.jinja2_env = ImmutableSandboxedEnvironment()
2626
self.special_tokens_map = None
27+
# Cache for compiled Jinja2 templates to avoid recompiling on every request
28+
self._template_cache: Dict[str, Any] = {}
2729

2830
@property
2931
def all_special_tokens_extended(self) -> List[str]:
@@ -141,7 +143,9 @@ def apply_chat_template(self,
141143
tokenize: bool,
142144
tools: Optional[List[Dict[str, Any]]] = None,
143145
**kwargs) -> str:
144-
compiled_template = self.jinja2_env.from_string(chat_template)
146+
if chat_template not in self._template_cache:
147+
self._template_cache[chat_template] = self.jinja2_env.from_string(chat_template)
148+
compiled_template = self._template_cache[chat_template]
145149
rendered = compiled_template.render(messages=conversation,
146150
add_generation_prompt=True)
147151
return rendered

0 commit comments

Comments
 (0)