Skip to content

Commit cee8d5f

Browse files
committed
fix: consolidated error handling improvements
This PR consolidates the following error handling fixes: - #48: Handle semaphore and init failures gracefully in async_utils - #54: Improve error handling in session storage operations (includes TOCTOU race fixes) - #55: Add validation for threshold, ratio, and token count fields - #56: Replace unwrap with proper error handling for client access - #57: Use unwrap_or_default for SystemTime operations - #61: Handle invalid request-id header values gracefully - #65: Improve error handling for timestamp and JSON operations in streaming Key changes: - Added graceful handling for semaphore and init failures - Bound ToolResponseStore size and cleanup consumed entries - Eliminated TOCTOU races in MCP server and plugin registry - Replaced unwrap() with proper error handling throughout - Added validation for config fields - Improved error propagation in middleware
1 parent c398212 commit cee8d5f

File tree

19 files changed

+813
-167
lines changed

19 files changed

+813
-167
lines changed

src/cortex-agents/src/mention.rs

Lines changed: 142 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,46 @@
1717
use regex::Regex;
1818
use std::sync::LazyLock;
1919

20+
/// Safely get the string slice up to the given byte position.
21+
///
22+
/// Returns the slice `&text[..pos]` if `pos` is at a valid UTF-8 character boundary.
23+
/// If `pos` is inside a multi-byte character, finds the nearest valid boundary
24+
/// by searching backwards.
25+
fn safe_slice_up_to(text: &str, pos: usize) -> &str {
26+
if pos >= text.len() {
27+
return text;
28+
}
29+
if text.is_char_boundary(pos) {
30+
return &text[..pos];
31+
}
32+
// Find the nearest valid boundary by searching backwards
33+
let mut valid_pos = pos;
34+
while valid_pos > 0 && !text.is_char_boundary(valid_pos) {
35+
valid_pos -= 1;
36+
}
37+
&text[..valid_pos]
38+
}
39+
40+
/// Safely get the string slice from the given byte position to the end.
41+
///
42+
/// Returns the slice `&text[pos..]` if `pos` is at a valid UTF-8 character boundary.
43+
/// If `pos` is inside a multi-byte character, finds the nearest valid boundary
44+
/// by searching forwards.
45+
fn safe_slice_from(text: &str, pos: usize) -> &str {
46+
if pos >= text.len() {
47+
return "";
48+
}
49+
if text.is_char_boundary(pos) {
50+
return &text[pos..];
51+
}
52+
// Find the nearest valid boundary by searching forwards
53+
let mut valid_pos = pos;
54+
while valid_pos < text.len() && !text.is_char_boundary(valid_pos) {
55+
valid_pos += 1;
56+
}
57+
&text[valid_pos..]
58+
}
59+
2060
/// A parsed agent mention from user input.
2161
#[derive(Debug, Clone, PartialEq, Eq)]
2262
pub struct AgentMention {
@@ -108,10 +148,10 @@ pub fn extract_mention_and_text(
108148
) -> Option<(AgentMention, String)> {
109149
let mention = find_first_valid_mention(text, valid_agents)?;
110150

111-
// Remove the mention from text
151+
// Remove the mention from text, using safe slicing for UTF-8 boundaries
112152
let mut remaining = String::with_capacity(text.len());
113-
remaining.push_str(&text[..mention.start]);
114-
remaining.push_str(&text[mention.end..]);
153+
remaining.push_str(safe_slice_up_to(text, mention.start));
154+
remaining.push_str(safe_slice_from(text, mention.end));
115155

116156
// Trim and normalize whitespace
117157
let remaining = remaining.trim().to_string();
@@ -123,7 +163,8 @@ pub fn extract_mention_and_text(
123163
pub fn starts_with_mention(text: &str, valid_agents: &[&str]) -> bool {
124164
let text = text.trim();
125165
if let Some(mention) = find_first_valid_mention(text, valid_agents) {
126-
mention.start == 0 || text[..mention.start].trim().is_empty()
166+
// Use safe slicing to handle UTF-8 boundaries
167+
mention.start == 0 || safe_slice_up_to(text, mention.start).trim().is_empty()
127168
} else {
128169
false
129170
}
@@ -196,8 +237,8 @@ pub fn parse_message_for_agent(text: &str, valid_agents: &[&str]) -> ParsedAgent
196237

197238
// Check if message starts with @agent
198239
if let Some((mention, remaining)) = extract_mention_and_text(text, valid_agents) {
199-
// Only trigger if mention is at the start
200-
if mention.start == 0 || text[..mention.start].trim().is_empty() {
240+
// Only trigger if mention is at the start, using safe slicing for UTF-8 boundaries
241+
if mention.start == 0 || safe_slice_up_to(text, mention.start).trim().is_empty() {
201242
return ParsedAgentMessage::for_agent(mention.agent_name, remaining, text.to_string());
202243
}
203244
}
@@ -318,4 +359,99 @@ mod tests {
318359
assert_eq!(mentions[0].agent_name, "my-agent");
319360
assert_eq!(mentions[1].agent_name, "my_agent");
320361
}
362+
363+
// UTF-8 boundary safety tests
364+
#[test]
365+
fn test_safe_slice_up_to_ascii() {
366+
let text = "hello world";
367+
assert_eq!(safe_slice_up_to(text, 5), "hello");
368+
assert_eq!(safe_slice_up_to(text, 0), "");
369+
assert_eq!(safe_slice_up_to(text, 100), "hello world");
370+
}
371+
372+
#[test]
373+
fn test_safe_slice_up_to_multibyte() {
374+
// "こんにちは" - each character is 3 bytes
375+
let text = "こんにちは";
376+
assert_eq!(safe_slice_up_to(text, 3), "こ"); // Valid boundary
377+
assert_eq!(safe_slice_up_to(text, 6), "こん"); // Valid boundary
378+
// Position 4 is inside the second character, should return "こ"
379+
assert_eq!(safe_slice_up_to(text, 4), "こ");
380+
assert_eq!(safe_slice_up_to(text, 5), "こ");
381+
}
382+
383+
#[test]
384+
fn test_safe_slice_from_multibyte() {
385+
let text = "こんにちは";
386+
assert_eq!(safe_slice_from(text, 3), "んにちは"); // Valid boundary
387+
// Position 4 is inside second character, should skip to position 6
388+
assert_eq!(safe_slice_from(text, 4), "にちは");
389+
assert_eq!(safe_slice_from(text, 5), "にちは");
390+
}
391+
392+
#[test]
393+
fn test_extract_mention_with_multibyte_prefix() {
394+
let valid = vec!["general"];
395+
396+
// Multi-byte characters before mention
397+
let result = extract_mention_and_text("日本語 @general search files", &valid);
398+
assert!(result.is_some());
399+
let (mention, remaining) = result.unwrap();
400+
assert_eq!(mention.agent_name, "general");
401+
// The prefix should be preserved without panicking
402+
assert!(remaining.contains("search files"));
403+
}
404+
405+
#[test]
406+
fn test_starts_with_mention_multibyte() {
407+
let valid = vec!["general"];
408+
409+
// Whitespace with multi-byte characters should not cause panic
410+
assert!(starts_with_mention(" @general task", &valid));
411+
412+
// Multi-byte characters before mention - should return false, not panic
413+
assert!(!starts_with_mention("日本語 @general task", &valid));
414+
}
415+
416+
#[test]
417+
fn test_parse_message_for_agent_multibyte() {
418+
let valid = vec!["general"];
419+
420+
// Multi-byte prefix - should not panic
421+
let parsed = parse_message_for_agent("日本語 @general find files", &valid);
422+
// Since mention is not at the start, should not invoke task
423+
assert!(!parsed.should_invoke_task);
424+
425+
// Multi-byte in the prompt (after mention)
426+
let parsed = parse_message_for_agent("@general 日本語を検索", &valid);
427+
assert!(parsed.should_invoke_task);
428+
assert_eq!(parsed.agent, Some("general".to_string()));
429+
assert_eq!(parsed.prompt, "日本語を検索");
430+
}
431+
432+
#[test]
433+
fn test_extract_mention_with_emoji() {
434+
let valid = vec!["general"];
435+
436+
// Emojis are 4 bytes each
437+
let result = extract_mention_and_text("🎉 @general celebrate", &valid);
438+
assert!(result.is_some());
439+
let (mention, remaining) = result.unwrap();
440+
assert_eq!(mention.agent_name, "general");
441+
assert!(remaining.contains("celebrate"));
442+
}
443+
444+
#[test]
445+
fn test_mixed_multibyte_and_ascii() {
446+
let valid = vec!["general"];
447+
448+
// Mix of ASCII, CJK, and emoji
449+
let text = "Hello 世界 🌍 @general search for 日本語";
450+
let result = extract_mention_and_text(text, &valid);
451+
assert!(result.is_some());
452+
let (mention, remaining) = result.unwrap();
453+
assert_eq!(mention.agent_name, "general");
454+
// Should not panic and produce valid output
455+
assert!(!remaining.is_empty());
456+
}
321457
}

src/cortex-app-server/src/auth.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ impl Claims {
4545
pub fn new(user_id: impl Into<String>, expiry_seconds: u64) -> Self {
4646
let now = SystemTime::now()
4747
.duration_since(UNIX_EPOCH)
48-
.unwrap()
48+
.unwrap_or_default()
4949
.as_secs();
5050

5151
Self {
@@ -75,7 +75,7 @@ impl Claims {
7575
pub fn is_expired(&self) -> bool {
7676
let now = SystemTime::now()
7777
.duration_since(UNIX_EPOCH)
78-
.unwrap()
78+
.unwrap_or_default()
7979
.as_secs();
8080
self.exp < now
8181
}
@@ -187,7 +187,7 @@ impl AuthService {
187187
pub async fn cleanup_revoked_tokens(&self) {
188188
let now = SystemTime::now()
189189
.duration_since(UNIX_EPOCH)
190-
.unwrap()
190+
.unwrap_or_default()
191191
.as_secs();
192192

193193
let mut revoked = self.revoked_tokens.write().await;

src/cortex-app-server/src/config.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,18 @@ pub struct ServerConfig {
4949
pub max_body_size: usize,
5050

5151
/// Request timeout in seconds (applies to full request lifecycle).
52+
///
53+
/// See `cortex_common::http_client` module documentation for the complete
54+
/// timeout hierarchy across Cortex services.
5255
#[serde(default = "default_request_timeout")]
5356
pub request_timeout: u64,
5457

5558
/// Read timeout for individual chunks in seconds.
5659
/// Applies to chunked transfer encoding to prevent indefinite hangs
5760
/// when clients disconnect without sending the terminal chunk.
61+
///
62+
/// See `cortex_common::http_client` module documentation for the complete
63+
/// timeout hierarchy across Cortex services.
5864
#[serde(default = "default_read_timeout")]
5965
pub read_timeout: u64,
6066

@@ -71,12 +77,16 @@ pub struct ServerConfig {
7177
pub cors_origins: Vec<String>,
7278

7379
/// Graceful shutdown timeout in seconds.
80+
///
81+
/// See `cortex_common::http_client` module documentation for the complete
82+
/// timeout hierarchy across Cortex services.
7483
#[serde(default = "default_shutdown_timeout")]
7584
pub shutdown_timeout: u64,
7685
}
7786

7887
fn default_shutdown_timeout() -> u64 {
7988
30 // 30 seconds for graceful shutdown
89+
// See cortex_common::http_client for timeout hierarchy documentation
8090
}
8191

8292
fn default_listen_addr() -> String {

src/cortex-app-server/src/middleware.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ pub async fn request_id_middleware(mut request: Request, next: Next) -> Response
4040
let mut response = next.run(request).await;
4141
response.headers_mut().insert(
4242
REQUEST_ID_HEADER,
43-
HeaderValue::from_str(&request_id).unwrap(),
43+
HeaderValue::from_str(&request_id)
44+
.unwrap_or_else(|_| HeaderValue::from_static("invalid-request-id")),
4445
);
4546

4647
response

src/cortex-app-server/src/storage.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ pub struct StoredToolCall {
4747

4848
/// Session storage manager.
4949
pub struct SessionStorage {
50-
#[allow(dead_code)]
51-
base_dir: PathBuf,
5250
sessions_dir: PathBuf,
5351
history_dir: PathBuf,
5452
}
@@ -66,7 +64,6 @@ impl SessionStorage {
6664
info!("Session storage initialized at {:?}", base_dir);
6765

6866
Ok(Self {
69-
base_dir,
7067
sessions_dir,
7168
history_dir,
7269
})

src/cortex-apply-patch/src/hunk.rs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,6 @@ pub struct SearchReplace {
250250
pub search: String,
251251
/// The text to replace with.
252252
pub replace: String,
253-
/// Replace all occurrences (true) or just the first (false).
254-
#[allow(dead_code)]
255-
pub replace_all: bool,
256253
}
257254

258255
impl SearchReplace {
@@ -266,16 +263,8 @@ impl SearchReplace {
266263
path: path.into(),
267264
search: search.into(),
268265
replace: replace.into(),
269-
replace_all: false,
270266
}
271267
}
272-
273-
/// Set whether to replace all occurrences.
274-
#[allow(dead_code)]
275-
pub fn with_replace_all(mut self, replace_all: bool) -> Self {
276-
self.replace_all = replace_all;
277-
self
278-
}
279268
}
280269

281270
#[cfg(test)]

src/cortex-common/src/http_client.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,54 @@
99
//!
1010
//! DNS caching is configured with reasonable TTL to allow failover and load
1111
//! balancer updates (#2177).
12+
//!
13+
//! # Timeout Configuration Guide
14+
//!
15+
//! This section documents the timeout hierarchy across the Cortex codebase. Use this
16+
//! as a reference when configuring timeouts for new features or debugging timeout issues.
17+
//!
18+
//! ## Timeout Hierarchy
19+
//!
20+
//! | Use Case | Timeout | Constant/Location | Rationale |
21+
//! |-----------------------------|---------|--------------------------------------------|-----------------------------------------|
22+
//! | Health checks | 5s | `HEALTH_CHECK_TIMEOUT` (this module) | Quick validation of service status |
23+
//! | Standard HTTP requests | 30s | `DEFAULT_TIMEOUT` (this module) | Normal API calls with reasonable margin |
24+
//! | Per-chunk read (streaming) | 30s | `read_timeout` (cortex-app-server/config) | Individual chunk timeout during stream |
25+
//! | Pool idle timeout | 60s | `POOL_IDLE_TIMEOUT` (this module) | DNS re-resolution for failover |
26+
//! | LLM Request (non-streaming) | 120s | `DEFAULT_REQUEST_TIMEOUT_SECS` (cortex-exec/runner) | Model inference takes time |
27+
//! | LLM Streaming total | 300s | `STREAMING_TIMEOUT` (this module) | Long-running streaming responses |
28+
//! | Server request lifecycle | 300s | `request_timeout` (cortex-app-server/config) | Full HTTP request/response cycle |
29+
//! | Entire exec session | 600s | `DEFAULT_TIMEOUT_SECS` (cortex-exec/runner) | Multi-turn conversation limit |
30+
//! | Graceful shutdown | 30s | `shutdown_timeout` (cortex-app-server/config) | Time for cleanup on shutdown |
31+
//!
32+
//! ## Module-Specific Timeouts
33+
//!
34+
//! ### cortex-common (this module)
35+
//! - `DEFAULT_TIMEOUT` (30s): Use for standard API calls.
36+
//! - `STREAMING_TIMEOUT` (300s): Use for LLM streaming endpoints.
37+
//! - `HEALTH_CHECK_TIMEOUT` (5s): Use for health/readiness checks.
38+
//! - `POOL_IDLE_TIMEOUT` (60s): Connection pool cleanup for DNS freshness.
39+
//!
40+
//! ### cortex-exec (runner.rs)
41+
//! - `DEFAULT_TIMEOUT_SECS` (600s): Maximum duration for entire exec session.
42+
//! - `DEFAULT_REQUEST_TIMEOUT_SECS` (120s): Single LLM request timeout.
43+
//!
44+
//! ### cortex-app-server (config.rs)
45+
//! - `request_timeout` (300s): Full request lifecycle timeout.
46+
//! - `read_timeout` (30s): Per-chunk timeout for streaming reads.
47+
//! - `shutdown_timeout` (30s): Graceful shutdown duration.
48+
//!
49+
//! ### cortex-engine (api_client.rs)
50+
//! - Re-exports constants from this module for consistency.
51+
//!
52+
//! ## Recommendations
53+
//!
54+
//! When adding new timeout configurations:
55+
//! 1. Use constants from this module when possible for consistency.
56+
//! 2. Document any new timeout constants with their rationale.
57+
//! 3. Consider the timeout hierarchy - inner timeouts should be shorter than outer ones.
58+
//! 4. For LLM operations, use longer timeouts (120s-300s) to accommodate model inference.
59+
//! 5. For health checks and quick validations, use short timeouts (5s-10s).
1260
1361
use reqwest::Client;
1462
use std::time::Duration;

0 commit comments

Comments
 (0)