From 4944e86fc831dc9a1e41243a8ba5905d72c67822 Mon Sep 17 00:00:00 2001 From: Guan Luo <41310872+GuanLuo@users.noreply.github.com> Date: Fri, 31 Oct 2025 02:08:56 +0800 Subject: [PATCH 1/6] test: update test case Signed-off-by: Guan Luo <41310872+GuanLuo@users.noreply.github.com> --- lib/llm/tests/test_jail.rs | 121 +++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs index 4f033f1b30..702f119c97 100644 --- a/lib/llm/tests/test_jail.rs +++ b/lib/llm/tests/test_jail.rs @@ -1792,6 +1792,127 @@ mod tests { ); } + #[tokio::test] + async fn test_deepseek_v3_1() { + // Harmony format with analysis text and a tool call encoded in special tags + let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Berlin", "units": "metric"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather_forecast<|tool▁sep|>{"location": "Berlin", "days": 7, "units": "imperial"}<|tool▁call▁end|><|tool▁call▁begin|>get_air_quality<|tool▁sep|>{"location": "Berlin", "radius": 50}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>"#; + + let chunks = vec![create_mock_response_chunk(text.to_string(), 0)]; + + let input_stream = stream::iter(chunks); + + let jail = JailedStream::builder() + .tool_call_parser("deepseek_v3_1") + .build(); + let jailed_stream = jail.apply(input_stream); + let results: Vec<_> = jailed_stream.collect().await; + + // Should have at least one output containing both analysis text and parsed tool call + assert!(!results.is_empty()); + + for result in results.iter() { + println!("Result chunk: {:?}", result); + } + + // Verify a tool call was parsed with expected name and args + let tool_call_idx = results + .iter() + .position(test_utils::has_tool_call) + .expect("Should have a tool call result"); + test_utils::assert_tool_call( + &results[tool_call_idx], + "get_current_weather", + json!({"location": "Berlin", "units": "metric"}), + ); + for result in results { + let Some(data) = result.data else { continue; }; + for choice in data.choices { + if let Some(content) = choice.delta.content { + assert!( + !content.contains("<|tool▁calls▁end|>"), + "Should not contain deepseek special tokens in content" + ); + } + } + } + } + + #[tokio::test] + async fn test_deepseek_v3_1_chunk() { + // Harmony format with analysis text and a tool call encoded in special tags + let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Berlin", "units": "metric"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather_forecast<|tool▁sep|>{"location": "Berlin", "days": 7, "units": "imperial"}<|tool▁call▁end|><|tool▁call▁begin|>get_air_quality<|tool▁sep|>{"location": "Berlin", "radius": 50}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>"#; + + // Split text into words, treating angle-bracketed tokens as one word + let mut words = Vec::new(); + let mut i = 0; + let chars: Vec = text.chars().collect(); + while i < chars.len() { + if chars[i] == '<' { + // Find the next '>' + if let Some(end) = chars[i..].iter().position(|&c| c == '>') { + let word: String = chars[i..=i+end].iter().collect(); + words.push(word); + i += end + 1; + } else { + // Malformed, just push the rest + words.push(chars[i..].iter().collect()); + break; + } + } else if chars[i].is_whitespace() { + i += 1; + } else { + // Collect until next whitespace or '<' + let start = i; + while i < chars.len() && !chars[i].is_whitespace() && chars[i] != '<' { + i += 1; + } + words.push(chars[start..i].iter().collect()); + } + } + + let chunks = words + .into_iter() + .map(|word| create_mock_response_chunk(word, 0)) + .collect::>(); + + let input_stream = stream::iter(chunks); + + let jail = JailedStream::builder() + .tool_call_parser("deepseek_v3_1") + .build(); + let jailed_stream = jail.apply(input_stream); + let results: Vec<_> = jailed_stream.collect().await; + + // Should have at least one output containing both analysis text and parsed tool call + assert!(!results.is_empty()); + + for result in results.iter() { + println!("Result chunk: {:?}", result); + } + + // Verify a tool call was parsed with expected name and args + let tool_call_idx = results + .iter() + .position(test_utils::has_tool_call) + .expect("Should have a tool call result"); + test_utils::assert_tool_call( + &results[tool_call_idx], + "get_current_weather", + json!({"location": "Berlin", "units": "metric"}), + ); + for result in results { + let Some(data) = result.data else { continue; }; + for choice in data.choices { + if let Some(content) = choice.delta.content { + assert!( + !content.contains("<|tool▁calls▁end|>"), + "Should not contain deepseek special tokens in content" + ); + } + } + } + } + #[tokio::test] async fn test_jailed_stream_mistral_false_positive_curly() { // Curly brace in normal text should not trigger tool call detection for mistral From fe48e86c862addd47a20a85c2c64abac91192849 Mon Sep 17 00:00:00 2001 From: Guan Luo <41310872+GuanLuo@users.noreply.github.com> Date: Fri, 31 Oct 2025 02:18:08 +0800 Subject: [PATCH 2/6] fix: fix slipping tool call parsing for DeepSeek v3.1 Signed-off-by: Guan Luo <41310872+GuanLuo@users.noreply.github.com> --- lib/parsers/src/tool_calling/config.rs | 10 +++++-- .../src/tool_calling/json/deepseek_parser.rs | 26 ++++++++++++++++--- lib/parsers/src/tool_calling/parsers.rs | 6 ++--- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/lib/parsers/src/tool_calling/config.rs b/lib/parsers/src/tool_calling/config.rs index 57c3acb5e5..6892440301 100644 --- a/lib/parsers/src/tool_calling/config.rs +++ b/lib/parsers/src/tool_calling/config.rs @@ -153,16 +153,22 @@ impl ToolCallConfig { } pub fn deepseek_v3_1() -> Self { + // The whole tool calls block is wrapped between + // <|tool▁calls▁begin|> ... <|tool▁calls▁end|> + // regardless of number of tool calls. For external use of this + // config, we want them to only be operating on the whole block, + // so the tool parser can properly consume all tool call tokens. + // https://huggingface.co/deepseek-ai/DeepSeek-V3.1#toolcall Self { format: ToolCallParserType::Json, json: JsonParserConfig { tool_call_start_tokens: vec![ "<|tool▁calls▁begin|>".to_string(), - "<|tool▁call▁begin|>".to_string(), + // "<|tool▁call▁begin|>".to_string(), ], tool_call_end_tokens: vec![ "<|tool▁calls▁end|>".to_string(), - "<|tool▁call▁end|>".to_string(), + // "<|tool▁call▁end|>".to_string(), ], tool_call_separator_tokens: vec!["<|tool▁sep|>".to_string()], parser_type: JsonParserType::DeepseekV31, diff --git a/lib/parsers/src/tool_calling/json/deepseek_parser.rs b/lib/parsers/src/tool_calling/json/deepseek_parser.rs index 1d15ba86fc..34fdcf6942 100644 --- a/lib/parsers/src/tool_calling/json/deepseek_parser.rs +++ b/lib/parsers/src/tool_calling/json/deepseek_parser.rs @@ -126,8 +126,25 @@ pub fn parse_tool_calls_deepseek_v3_1( return Ok((vec![], Some(String::new()))); } - let tool_call_start_tokens = &config.tool_call_start_tokens; - let tool_call_end_tokens = &config.tool_call_end_tokens; + // For DeepSeek_v3_1, we consider the complete tool call block to be + // <|tool▁calls▁begin|>...<|tool▁calls▁end|>, even though the + // individual calls are parsed by <|tool▁call▁begin|>...<|tool▁call▁end|>. + // This is because if we may all call(s) tokens, we are not properly grouping + // the tool calls and results in group: + // 1. <|tool▁calls▁begin|><|tool▁call▁begin|>...<|tool▁call▁end|> + // 2. <|tool▁calls▁end|> + let has_end_token = config + .tool_call_end_tokens + .iter() + .any(|token| !token.is_empty() && trimmed.contains(token)); + if !has_end_token { + return Ok((vec![], Some(trimmed.to_string()))); + } + + let mut tool_call_start_tokens = config.tool_call_start_tokens.clone(); + tool_call_start_tokens.extend(vec!["<|tool▁call▁begin|>".to_string()]); + let mut tool_call_end_tokens = config.tool_call_end_tokens.clone(); + tool_call_end_tokens.extend(vec!["<|tool▁call▁end|>".to_string()]); let separator_tokens = &config.tool_call_separator_tokens; // Early exit if no tokens configured @@ -166,7 +183,7 @@ pub fn parse_tool_calls_deepseek_v3_1( }; // Extract individual tool call blocks - let blocks = extract_tool_call_blocks(trimmed, tool_call_start_tokens, tool_call_end_tokens); + let blocks = extract_tool_call_blocks(trimmed, &tool_call_start_tokens, &tool_call_end_tokens); if blocks.is_empty() { // Found start token but no valid blocks @@ -255,6 +272,9 @@ mod tests { let (name, args) = extract_name_and_args(result[1].clone()); assert_eq!(name, "get_current_weather"); assert_eq!(args["location"], "Paris"); + assert!(content.is_some()); + print!("**** {}", content.clone().unwrap()); + assert!(!content.unwrap().contains("<|tool▁calls▁end|>")); } #[test] diff --git a/lib/parsers/src/tool_calling/parsers.rs b/lib/parsers/src/tool_calling/parsers.rs index d539917321..1c947bf9ff 100644 --- a/lib/parsers/src/tool_calling/parsers.rs +++ b/lib/parsers/src/tool_calling/parsers.rs @@ -2413,15 +2413,15 @@ mod detect_parser_tests { } #[test] - fn test_e2e_detect_tool_call_start_deepseek_v3_1() { + fn test_e2e_detect_incomplete_tool_call_start_deepseek_v3_1() { let text = r#"<|tool▁call▁begin|>get_current_weather{"location": "Tokyo"}<|tool▁call▁end|>"#; let result = detect_tool_call_start(text, Some("deepseek_v3_1")).unwrap(); - assert!(result); + assert!(!result); } #[test] - fn test_e2e_detect_tool_call_multiple_start_deepseek_v3_1() { + fn test_e2e_detect_tool_call_start_deepseek_v3_1() { let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather{"location": "Tokyo"}<|tool▁call▁end|>"#; let result = detect_tool_call_start(text, Some("deepseek_v3_1")).unwrap(); assert!(result); From c56dded874ced3e1de7e3d34c7d4ea6145aaac2f Mon Sep 17 00:00:00 2001 From: Guan Luo <41310872+GuanLuo@users.noreply.github.com> Date: Fri, 31 Oct 2025 02:21:32 +0800 Subject: [PATCH 3/6] style: format Signed-off-by: Guan Luo <41310872+GuanLuo@users.noreply.github.com> --- lib/llm/tests/test_jail.rs | 42 +++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs index 702f119c97..e5d97fabf7 100644 --- a/lib/llm/tests/test_jail.rs +++ b/lib/llm/tests/test_jail.rs @@ -1825,7 +1825,9 @@ mod tests { json!({"location": "Berlin", "units": "metric"}), ); for result in results { - let Some(data) = result.data else { continue; }; + let Some(data) = result.data else { + continue; + }; for choice in data.choices { if let Some(content) = choice.delta.content { assert!( @@ -1848,25 +1850,25 @@ mod tests { let chars: Vec = text.chars().collect(); while i < chars.len() { if chars[i] == '<' { - // Find the next '>' - if let Some(end) = chars[i..].iter().position(|&c| c == '>') { - let word: String = chars[i..=i+end].iter().collect(); - words.push(word); - i += end + 1; - } else { - // Malformed, just push the rest - words.push(chars[i..].iter().collect()); - break; - } + // Find the next '>' + if let Some(end) = chars[i..].iter().position(|&c| c == '>') { + let word: String = chars[i..=i + end].iter().collect(); + words.push(word); + i += end + 1; + } else { + // Malformed, just push the rest + words.push(chars[i..].iter().collect()); + break; + } } else if chars[i].is_whitespace() { - i += 1; - } else { - // Collect until next whitespace or '<' - let start = i; - while i < chars.len() && !chars[i].is_whitespace() && chars[i] != '<' { i += 1; - } - words.push(chars[start..i].iter().collect()); + } else { + // Collect until next whitespace or '<' + let start = i; + while i < chars.len() && !chars[i].is_whitespace() && chars[i] != '<' { + i += 1; + } + words.push(chars[start..i].iter().collect()); } } @@ -1901,7 +1903,9 @@ mod tests { json!({"location": "Berlin", "units": "metric"}), ); for result in results { - let Some(data) = result.data else { continue; }; + let Some(data) = result.data else { + continue; + }; for choice in data.choices { if let Some(content) = choice.delta.content { assert!( From 4424705b43e66192d771fa276e66df0ccc061154 Mon Sep 17 00:00:00 2001 From: Guan Luo <41310872+GuanLuo@users.noreply.github.com> Date: Fri, 31 Oct 2025 02:30:59 +0800 Subject: [PATCH 4/6] chore: address comment Signed-off-by: Guan Luo <41310872+GuanLuo@users.noreply.github.com> --- lib/llm/tests/test_jail.rs | 8 -------- .../src/tool_calling/json/deepseek_parser.rs | 16 ++++++++-------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs index e5d97fabf7..321585152b 100644 --- a/lib/llm/tests/test_jail.rs +++ b/lib/llm/tests/test_jail.rs @@ -1810,10 +1810,6 @@ mod tests { // Should have at least one output containing both analysis text and parsed tool call assert!(!results.is_empty()); - for result in results.iter() { - println!("Result chunk: {:?}", result); - } - // Verify a tool call was parsed with expected name and args let tool_call_idx = results .iter() @@ -1888,10 +1884,6 @@ mod tests { // Should have at least one output containing both analysis text and parsed tool call assert!(!results.is_empty()); - for result in results.iter() { - println!("Result chunk: {:?}", result); - } - // Verify a tool call was parsed with expected name and args let tool_call_idx = results .iter() diff --git a/lib/parsers/src/tool_calling/json/deepseek_parser.rs b/lib/parsers/src/tool_calling/json/deepseek_parser.rs index 34fdcf6942..bcc316aabd 100644 --- a/lib/parsers/src/tool_calling/json/deepseek_parser.rs +++ b/lib/parsers/src/tool_calling/json/deepseek_parser.rs @@ -126,13 +126,16 @@ pub fn parse_tool_calls_deepseek_v3_1( return Ok((vec![], Some(String::new()))); } - // For DeepSeek_v3_1, we consider the complete tool call block to be - // <|tool▁calls▁begin|>...<|tool▁calls▁end|>, even though the - // individual calls are parsed by <|tool▁call▁begin|>...<|tool▁call▁end|>. - // This is because if we may all call(s) tokens, we are not properly grouping - // the tool calls and results in group: + // For DeepSeek_v3_1, we consider the tool call block to be + // <|tool▁calls▁begin|>...<|tool▁calls▁end|> and only start parsing + // if seeing <|tool▁calls▁begin|>, even though the individual calls are + // parsed by <|tool▁call▁begin|>...<|tool▁call▁end|>. + // This is because if we start parsing by considering all call(s) tokens, + // we are not properly grouping the tool calls and results in groups: // 1. <|tool▁calls▁begin|><|tool▁call▁begin|>...<|tool▁call▁end|> // 2. <|tool▁calls▁end|> + // where 2. will not be recognized as part of the tool call block due + // to missing start token and will not be consumed. let has_end_token = config .tool_call_end_tokens .iter() @@ -272,9 +275,6 @@ mod tests { let (name, args) = extract_name_and_args(result[1].clone()); assert_eq!(name, "get_current_weather"); assert_eq!(args["location"], "Paris"); - assert!(content.is_some()); - print!("**** {}", content.clone().unwrap()); - assert!(!content.unwrap().contains("<|tool▁calls▁end|>")); } #[test] From b9bb7e01bff6c580d68d14c9de93dabbef759a10 Mon Sep 17 00:00:00 2001 From: Guan Luo <41310872+GuanLuo@users.noreply.github.com> Date: Fri, 31 Oct 2025 02:39:01 +0800 Subject: [PATCH 5/6] chore: update comment Signed-off-by: Guan Luo <41310872+GuanLuo@users.noreply.github.com> --- lib/llm/tests/test_jail.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs index 321585152b..c45207e569 100644 --- a/lib/llm/tests/test_jail.rs +++ b/lib/llm/tests/test_jail.rs @@ -1794,7 +1794,7 @@ mod tests { #[tokio::test] async fn test_deepseek_v3_1() { - // Harmony format with analysis text and a tool call encoded in special tags + // DeepSeek v3.1 format with two tool calls encoded in special tags let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Berlin", "units": "metric"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather_forecast<|tool▁sep|>{"location": "Berlin", "days": 7, "units": "imperial"}<|tool▁call▁end|><|tool▁call▁begin|>get_air_quality<|tool▁sep|>{"location": "Berlin", "radius": 50}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>"#; let chunks = vec![create_mock_response_chunk(text.to_string(), 0)]; @@ -1837,7 +1837,7 @@ mod tests { #[tokio::test] async fn test_deepseek_v3_1_chunk() { - // Harmony format with analysis text and a tool call encoded in special tags + // DeepSeek v3.1 format with two tool calls encoded in special tags let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>get_current_weather<|tool▁sep|>{"location": "Berlin", "units": "metric"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather_forecast<|tool▁sep|>{"location": "Berlin", "days": 7, "units": "imperial"}<|tool▁call▁end|><|tool▁call▁begin|>get_air_quality<|tool▁sep|>{"location": "Berlin", "radius": 50}<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>"#; // Split text into words, treating angle-bracketed tokens as one word From 05f4472d9ed935aa8aa7f48198f348e023291853 Mon Sep 17 00:00:00 2001 From: Guan Luo <41310872+GuanLuo@users.noreply.github.com> Date: Mon, 3 Nov 2025 18:06:40 +0800 Subject: [PATCH 6/6] fix: update test for new behavior Signed-off-by: Guan Luo <41310872+GuanLuo@users.noreply.github.com> --- lib/parsers/src/tool_calling/json/deepseek_parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/parsers/src/tool_calling/json/deepseek_parser.rs b/lib/parsers/src/tool_calling/json/deepseek_parser.rs index bcc316aabd..abb1fb9be7 100644 --- a/lib/parsers/src/tool_calling/json/deepseek_parser.rs +++ b/lib/parsers/src/tool_calling/json/deepseek_parser.rs @@ -418,7 +418,7 @@ mod detect_parser_tests { let text = r#"<|tool▁call▁begin|>get_current_weather宽带}"#; let config = ToolCallConfig::deepseek_v3_1().json; let result = detect_tool_call_start_deepseek_v3_1(text, &config); - assert!(result); + assert!(!result); } #[test]