Skip to content

Commit b963993

Browse files
factorydroidechobt
authored andcommitted
fix(agents): handle UTF-16 encoded agent config files
Fixes bounty issue #1469 Agent configuration files encoded as UTF-16 (common on Windows) would fail with an unhelpful 'stream did not contain valid UTF-8' error when using std::fs::read_to_string(). This change uses the existing text_encoding utilities to detect and decode various file encodings including UTF-16LE and UTF-16BE, providing clearer error messages when encoding issues occur.
1 parent 82a1a4b commit b963993

File tree

2 files changed

+49
-9
lines changed

2 files changed

+49
-9
lines changed

cortex-cli/src/agent_cmd.rs

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,32 @@
77
88
use anyhow::{Context, Result, bail};
99
use clap::Parser;
10+
use cortex_engine::text_encoding::{Encoding, decode};
1011
use serde::{Deserialize, Serialize};
1112
use std::collections::HashMap;
1213
use std::io::{self, BufRead, Write};
1314
use std::path::{Path, PathBuf};
1415

16+
/// Read a file's contents, automatically detecting and handling various text encodings.
17+
///
18+
/// This function reads raw bytes from the file, detects the encoding (including UTF-16),
19+
/// and decodes it to a UTF-8 string. This is more robust than `std::fs::read_to_string`
20+
/// which only handles UTF-8.
21+
fn read_file_with_encoding(path: &Path) -> Result<String> {
22+
let bytes =
23+
std::fs::read(path).with_context(|| format!("Failed to read {}", path.display()))?;
24+
let encoding = Encoding::detect(&bytes);
25+
decode(&bytes, encoding)
26+
.map(|cow| cow.into_owned())
27+
.with_context(|| {
28+
format!(
29+
"Failed to decode file '{}' with encoding {}",
30+
path.display(),
31+
encoding.name()
32+
)
33+
})
34+
}
35+
1536
/// Agent management CLI.
1637
#[derive(Debug, Parser)]
1738
pub struct AgentCli {
@@ -613,8 +634,7 @@ fn load_agents_from_dir(dir: &Path, source: AgentSource) -> Result<Vec<AgentInfo
613634

614635
/// Load an agent from a markdown file with YAML frontmatter.
615636
fn load_agent_from_md(path: &Path, source: AgentSource) -> Result<AgentInfo> {
616-
let content = std::fs::read_to_string(path)
617-
.with_context(|| format!("Failed to read {}", path.display()))?;
637+
let content = read_file_with_encoding(path)?;
618638

619639
let (frontmatter, body) = parse_frontmatter(&content)?;
620640

@@ -643,8 +663,7 @@ fn load_agent_from_md(path: &Path, source: AgentSource) -> Result<AgentInfo> {
643663

644664
/// Load an agent from a JSON file.
645665
fn load_agent_from_json(path: &Path, source: AgentSource) -> Result<AgentInfo> {
646-
let content = std::fs::read_to_string(path)
647-
.with_context(|| format!("Failed to read {}", path.display()))?;
666+
let content = read_file_with_encoding(path)?;
648667

649668
let frontmatter: AgentFrontmatter = serde_json::from_str(&content)
650669
.with_context(|| format!("Failed to parse {}", path.display()))?;
@@ -653,7 +672,7 @@ fn load_agent_from_json(path: &Path, source: AgentSource) -> Result<AgentInfo> {
653672
let prompt = if let Some(parent) = path.parent() {
654673
let prompt_file = parent.join("prompt.md");
655674
if prompt_file.exists() {
656-
Some(std::fs::read_to_string(&prompt_file)?)
675+
Some(read_file_with_encoding(&prompt_file)?)
657676
} else {
658677
None
659678
}

cortex-engine/src/agents.rs

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,27 @@ use serde::{Deserialize, Serialize};
2525
use tokio::sync::RwLock;
2626

2727
use crate::error::{CortexError, Result};
28+
use crate::text_encoding::{Encoding, decode};
29+
30+
/// Read a file's contents, automatically detecting and handling various text encodings.
31+
///
32+
/// This function reads raw bytes from the file, detects the encoding (including UTF-16),
33+
/// and decodes it to a UTF-8 string. This is more robust than `std::fs::read_to_string`
34+
/// which only handles UTF-8.
35+
fn read_file_with_encoding(path: &Path) -> Result<String> {
36+
let bytes = std::fs::read(path)?;
37+
let encoding = Encoding::detect(&bytes);
38+
decode(&bytes, encoding)
39+
.map(|cow| cow.into_owned())
40+
.map_err(|e| {
41+
CortexError::InvalidInput(format!(
42+
"Failed to decode file '{}' with encoding {}: {}",
43+
path.display(),
44+
encoding.name(),
45+
e
46+
))
47+
})
48+
}
2849

2950
// ============================================================================
3051
// OS-Specific Agents Directory
@@ -461,15 +482,15 @@ impl AgentRegistry {
461482
agent_md: &Path,
462483
source: AgentSource,
463484
) -> Result<Agent> {
464-
let content = std::fs::read_to_string(agent_md)?;
485+
let content = read_file_with_encoding(agent_md)?;
465486
let (metadata, prompt) = parse_agent_md(&content)?;
466487

467488
metadata.validate()?;
468489

469490
// Resolve system prompt
470491
let system_prompt = if let Some(ref prompt_file) = metadata.prompt_file {
471492
let prompt_path = agent_dir.join(prompt_file);
472-
std::fs::read_to_string(&prompt_path)?
493+
read_file_with_encoding(&prompt_path)?
473494
} else if let Some(ref prompt) = metadata.system_prompt {
474495
prompt.clone()
475496
} else {
@@ -491,15 +512,15 @@ impl AgentRegistry {
491512
agent_json: &Path,
492513
source: AgentSource,
493514
) -> Result<Agent> {
494-
let content = std::fs::read_to_string(agent_json)?;
515+
let content = read_file_with_encoding(agent_json)?;
495516
let metadata: AgentMetadata = serde_json::from_str(&content)?;
496517

497518
metadata.validate()?;
498519

499520
// Resolve system prompt
500521
let system_prompt = if let Some(ref prompt_file) = metadata.prompt_file {
501522
let prompt_path = agent_dir.join(prompt_file);
502-
std::fs::read_to_string(&prompt_path)?
523+
read_file_with_encoding(&prompt_path)?
503524
} else {
504525
metadata.system_prompt.clone().unwrap_or_default()
505526
};

0 commit comments

Comments
 (0)